This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] i386: Don't use frame pointer without stack access
On Mon, Aug 7, 2017 at 1:05 PM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> Arjan van de Ven <arjan@linux.intel.com> writes:
>> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>>> When Linux/x86-64 kernel is compiled with -fno-omit-frame-pointer.
>>>> this optimization removes more than 730
>>>>
>>>> pushq %rbp
>>>> movq %rsp, %rbp
>>>> popq %rbp
>>>
>>> If you don't want the frame pointer, why are you compiling with
>>> -fno-omit-frame-pointer? Are you going to add
>>> -fforce-no-omit-frame-pointer or something similar so that people can
>>> actually get what they are asking for? This doesn't really make sense.
>>> It is perfectly fine to omit frame pointer by default, when it isn't
>>> required for something, but if the user asks for it, we shouldn't ignore his
>>> request.
>>>
>>
>>
>> wanting a framepointer is very nice and desired... ... but if the
>> optimizer/ins scheduler moves instructions outside of the frame'd
>> portion, (it does it for cases like below as well), the value is
>> already negative for these functions that don't have stack use.
>>
>> <MPIDU_Sched_are_pending@@Base>:
>> mov all_schedules@@Base-0x38460,%rax
>> push %rbp
>> mov %rsp,%rbp
>> pop %rbp
>> cmpq $0x0,(%rax)
>> setne %al
>> movzbl %al,%eax
>> retq
>
> Yeah, and it could be even weirder for big single-block functions.
> I think GCC has been doing this kind of scheduling of prologue and
> epilogue instructions for a while, so there hasn*t really been a
> guarantee which parts of the function will have a new FP and which
> will still have the old one.
>
> Also, with an arbitrarily-picked host compiler (GCC 6.3.1), shrink-wrapping
> kicks in when the following is compiled with -O3 -fno-omit-frame-pointer:
>
> void f (int *);
> void
> g (int *x)
> {
> for (int i = 0; i < 1000; ++i)
> x[i] += 1;
> if (x[0])
> {
> int temp;
> f (&temp);
> }
> }
>
> so only the block with the call to f sets up FP. The relatively
> long-running loop runs with the caller's FP.
>
> I hope we can go for a target-independent position that what HJ*s
> patch does is OK...
>
In light of this, I am resubmitting my patch. I added 3 more testcases
and also handle:
typedef int v8si __attribute__ ((vector_size (32)));
void
foo (v8si *out_start, v8si *out_end, v8si *regions)
{
v8si base = regions[3];
*out_start = base;
*out_end = base;
}
OK for trunk?
Thanks.
--
H.J.
From d18008a8052973ab8582a1662f42669a9d318d0d Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sun, 6 Aug 2017 06:24:36 -0700
Subject: [PATCH] i386: Don't use frame pointer without stack access
When there is no stack access, there is no need to use frame pointer
even if -fno-omit-frame-pointer is used.
gcc/
PR target/81736
* config/i386/i386.c (ix86_finalize_stack_realign_flags): Renamed
to ...
(ix86_finalize_stack_frame_flags): This. Also clear
frame_pointer_needed if -fno-omit-frame-pointer is used without
stack access.
(ix86_expand_prologue): Replace ix86_finalize_stack_realign_flags
with ix86_finalize_stack_frame_flags.
(ix86_expand_epilogue): Likewise.
(ix86_expand_split_stack_prologue): Likewise.
gcc/testsuite/
PR target/81736
* gcc.target/i386/pr81736-1.c: New test.
* gcc.target/i386/pr81736-2.c: Likewise.
* gcc.target/i386/pr81736-3.c: Likewise.
* gcc.target/i386/pr81736-4.c: Likewise.
* gcc.target/i386/pr81736-5.c: Likewise.
* gcc.target/i386/pr81736-6.c: Likewise.
* gcc.target/i386/pr81736-7.c: Likewise.
---
gcc/config/i386/i386.c | 23 ++++++++++++-----------
gcc/testsuite/gcc.target/i386/pr81736-1.c | 13 +++++++++++++
gcc/testsuite/gcc.target/i386/pr81736-2.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/pr81736-3.c | 11 +++++++++++
gcc/testsuite/gcc.target/i386/pr81736-4.c | 11 +++++++++++
gcc/testsuite/gcc.target/i386/pr81736-5.c | 20 ++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr81736-6.c | 16 ++++++++++++++++
gcc/testsuite/gcc.target/i386/pr81736-7.c | 13 +++++++++++++
8 files changed, 110 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-7.c
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index dfef996e36c..ed51595298d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14116,10 +14116,11 @@ output_probe_stack_range (rtx reg, rtx end)
return "";
}
-/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
- to be generated in correct form. */
+/* Finalize stack_realign_needed and frame_pointer_needed flags, which
+ will guide prologue/epilogue to be generated in correct form. */
+
static void
-ix86_finalize_stack_realign_flags (void)
+ix86_finalize_stack_frame_flags (void)
{
/* Check if stack realign is really needed after reload, and
stores result in cfun */
@@ -14142,13 +14143,13 @@ ix86_finalize_stack_realign_flags (void)
}
/* If the only reason for frame_pointer_needed is that we conservatively
- assumed stack realignment might be needed, but in the end nothing that
- needed the stack alignment had been spilled, clear frame_pointer_needed
- and say we don't need stack realignment. */
- if (stack_realign
+ assumed stack realignment might be needed or -fno-omit-frame-pointer
+ is used, but in the end nothing that needed the stack alignment had
+ been spilled nor stack access, clear frame_pointer_needed and say we
+ don't need stack realignment. */
+ if ((stack_realign || !flag_omit_frame_pointer)
&& frame_pointer_needed
&& crtl->is_leaf
- && flag_omit_frame_pointer
&& crtl->sp_is_unchanging
&& !ix86_current_function_calls_tls_descriptor
&& !crtl->accesses_prior_frames
@@ -14339,7 +14340,7 @@ ix86_expand_prologue (void)
if (ix86_function_naked (current_function_decl))
return;
- ix86_finalize_stack_realign_flags ();
+ ix86_finalize_stack_frame_flags ();
/* DRAP should not coexist with stack_realign_fp */
gcc_assert (!(crtl->drap_reg && stack_realign_fp));
@@ -15203,7 +15204,7 @@ ix86_expand_epilogue (int style)
return;
}
- ix86_finalize_stack_realign_flags ();
+ ix86_finalize_stack_frame_flags ();
frame = m->frame;
m->fs.sp_realigned = stack_realign_fp;
@@ -15738,7 +15739,7 @@ ix86_expand_split_stack_prologue (void)
gcc_assert (flag_split_stack && reload_completed);
- ix86_finalize_stack_realign_flags ();
+ ix86_finalize_stack_frame_flags ();
frame = cfun->machine->frame;
allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1.c b/gcc/testsuite/gcc.target/i386/pr81736-1.c
new file mode 100644
index 00000000000..92c7bc97a0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+extern int i;
+
+int
+foo (void)
+{
+ return i;
+}
+
+/* No need to use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-2.c b/gcc/testsuite/gcc.target/i386/pr81736-2.c
new file mode 100644
index 00000000000..a3720879937
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (int i)
+{
+ return i;
+}
+
+/* No need to use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-3.c b/gcc/testsuite/gcc.target/i386/pr81736-3.c
new file mode 100644
index 00000000000..c3bde7dd933
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+void
+foo (void)
+{
+ asm ("# " : : : "ebx");
+}
+
+/* Need to use a frame pointer. */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-4.c b/gcc/testsuite/gcc.target/i386/pr81736-4.c
new file mode 100644
index 00000000000..25f50016a64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+foo (int i1, int i2, int i3, int i4, int i5, int i6, int i7)
+{
+ return i7;
+}
+
+/* Need to use a frame pointer. */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-5.c b/gcc/testsuite/gcc.target/i386/pr81736-5.c
new file mode 100644
index 00000000000..e1602cf25ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-5.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -mavx" } */
+
+typedef int v8si __attribute__ ((vector_size (32)));
+
+void
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (v8si *out_start, v8si *out_end, v8si *regions)
+{
+ v8si base = regions[3];
+ *out_start = base;
+ *out_end = base;
+}
+
+/* No need to use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
+/* Verify no dynamic realignment is performed. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-6.c b/gcc/testsuite/gcc.target/i386/pr81736-6.c
new file mode 100644
index 00000000000..6198574c8cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-6.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+struct foo
+{
+ int head;
+} a;
+
+int
+bar (void)
+{
+ return a.head != 0;
+}
+
+/* No need to use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-7.c b/gcc/testsuite/gcc.target/i386/pr81736-7.c
new file mode 100644
index 00000000000..f947886e642
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+extern int foo (void);
+
+int
+bar (void)
+{
+ return foo ();
+}
+
+/* No need to use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
--
2.13.4