This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
RFC: [PATCH] Add __builtin_ia32_stack_top
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Uros Bizjak <ubizjak at gmail dot com>
- Date: Tue, 21 Jul 2015 14:45:39 -0700
- Subject: RFC: [PATCH] Add __builtin_ia32_stack_top
- Authentication-results: sourceware.org; auth=none
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
When __builtin_frame_address is used to retrieve the address of the
function stack frame, the frame pointer is always kept, which wastes one
register and 2 instructions. For x86-32, one less register means
significant negative impact on performance. This patch adds a new
builtin function, __builtin_ia32_stack_top, to x86 backend. It
returns the stack address when the function is called.
Any comments, feedbacks?
Thanks.
H.J.
---
gcc/
PR target/66960
* config/i386/i386.c (ix86_expand_prologue): Sorry if DRAP is
used and the stack address has been taken.
(ix86_builtins): Add IX86_BUILTIN_STACK_TOP.
(ix86_init_mmx_sse_builtins): Add __builtin_ia32_stack_top.
(ix86_expand_builtin): Handle IX86_BUILTIN_STACK_TOP.
* config/i386/i386.h (machine_function): Add stack_top_taken.
* doc/extend.texi: Document __builtin_ia32_stack_top.
gcc/testsuite/
PR target/66960
* gcc.target/i386/pr66960-1.c: New test.
* gcc.target/i386/pr66960-2.c: Likewise.
* gcc.target/i386/pr66960-3.c: Likewise.
* gcc.target/i386/pr66960-4.c: Likewise.
---
gcc/config/i386/i386.c | 28 ++++++++++++++++++++++++++
gcc/config/i386/i386.h | 4 ++++
gcc/doc/extend.texi | 3 +++
gcc/testsuite/gcc.target/i386/pr66960-1.c | 33 +++++++++++++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr66960-2.c | 33 +++++++++++++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr66960-3.c | 17 ++++++++++++++++
gcc/testsuite/gcc.target/i386/pr66960-4.c | 21 ++++++++++++++++++++
7 files changed, 139 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr66960-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr66960-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr66960-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr66960-4.c
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8b9baf8..a252473 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -11613,6 +11613,12 @@ ix86_expand_prologue (void)
{
int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+ /* Can't use DRAP if the stack address has been taken. */
+ if (cfun->machine->stack_top_taken)
+ sorry ("%<__builtin_ia32_stack_top%> not supported with stack"
+ " realignment. This may be worked around by adding"
+ " -maccumulate-outgoing-arg.");
+
/* Only need to push parameter pointer reg if it is caller saved. */
if (!call_used_regs[REGNO (crtl->drap_reg)])
{
@@ -30779,6 +30785,9 @@ enum ix86_builtins
IX86_BUILTIN_READ_FLAGS,
IX86_BUILTIN_WRITE_FLAGS,
+ /* Get the stack address when the function is called. */
+ IX86_BUILTIN_STACK_TOP,
+
IX86_BUILTIN_MAX
};
@@ -34391,6 +34400,10 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
+ /* Get the stack address when the function is called. */
+ def_builtin (0, "__builtin_ia32_stack_top",
+ PVOID_FTYPE_VOID, IX86_BUILTIN_STACK_TOP);
+
/* Add FMA4 multi-arg argument instructions */
for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
{
@@ -40325,6 +40338,21 @@ addcarryx:
emit_insn (gen_xabort (op0));
return 0;
+ case IX86_BUILTIN_STACK_TOP:
+ cfun->machine->stack_top_taken = true;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ /* After the prologue, stack top is at -WORD(AP) in the current
+ frame. */
+ emit_insn (gen_rtx_SET (target,
+ plus_constant (Pmode, arg_pointer_rtx,
+ -UNITS_PER_WORD)));
+ return target;
+
default:
break;
}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index ab668fe..a33a9eb 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2483,6 +2483,10 @@ struct GTY(()) machine_function {
/* If true, it is safe to not save/restore DRAP register. */
BOOL_BITFIELD no_drap_save_restore : 1;
+ /* If true, the stack address of the current function has been
+ taken. */
+ BOOL_BITFIELD stack_top_taken : 1;
+
/* If true, there is register available for argument passing. This
is used only in ix86_function_ok_for_sibcall by 32-bit to determine
if there is scratch register available for indirect sibcall. In
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index b18d8fb..a41defc 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -16661,6 +16661,9 @@ The following built-in function is always available.
@item void __builtin_ia32_pause (void)
Generates the @code{pause} machine instruction with a compiler memory
barrier.
+
+@item void *__builtin_ia32_stack_top (void)
+Retrieves the stack address when the function is called.
@end table
The following floating-point built-in functions are made available in the
diff --git a/gcc/testsuite/gcc.target/i386/pr66960-1.c b/gcc/testsuite/gcc.target/i386/pr66960-1.c
new file mode 100644
index 0000000..74181cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66960-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fomit-frame-pointer" { target { lp64 } } } */
+/* { dg-options "-O2 -fomit-frame-pointer -maddress-mode=short" { target { x32 } } } */
+/* { dg-options "-O2 -fomit-frame-pointer -miamcu" { target { ia32 } } } */
+
+extern char **environ;
+extern void exit (int status);
+extern int main (long argc, char **argv, char **envp);
+
+void
+_start (void)
+{
+ void *argc_p = __builtin_ia32_stack_top ();
+ char **argv = (char **) (argc_p + sizeof (void *));
+ long argc = *(long *) argc_p;
+ int status;
+
+ environ = argv + argc + 1;
+
+ status = main (argc, argv, environ);
+
+ exit (status);
+}
+
+/* { dg-final { scan-assembler "movq\[ \t\]8\\(%rsp\\), %rdi" { target lp64 } } } */
+/* { dg-final { scan-assembler "leaq\[ \t\]16\\(%rsp\\), %rsi" { target lp64 } } } */
+/* { dg-final { scan-assembler "leaq\[ \t\]24\\(%rsp,%rdi,8\\), %rdx" { target lp64 } } } */
+/* { dg-final { scan-assembler "movl\[ \t\]8\\(%esp\\), %edi" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]12\\(%rsp\\), %esi" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]4\\(%rsi,%rdi,4\\), %edx" { target x32 } } } */
+/* { dg-final { scan-assembler "movl\[ \t\]\\(%esp\\), %eax" { target ia32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]4\\(%esp\\), %edx" { target ia32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]8\\(%esp,%eax,4\\), %ecx" { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr66960-2.c b/gcc/testsuite/gcc.target/i386/pr66960-2.c
new file mode 100644
index 0000000..8252c2c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66960-2.c
@@ -0,0 +1,33 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" { target { lp64 } } } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -maddress-mode=short" { target { x32 } } } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -miamcu" { target { ia32 } } } */
+
+extern char **environ;
+extern void exit (int status);
+extern int main (long argc, char **argv, char **envp);
+
+void
+_start (void)
+{
+ void *argc_p = __builtin_ia32_stack_top ();
+ char **argv = (char **) (argc_p + sizeof (void *));
+ long argc = *(long *) argc_p;
+ int status;
+
+ environ = argv + argc + 1;
+
+ status = main (argc, argv, environ);
+
+ exit (status);
+}
+
+/* { dg-final { scan-assembler "movq\[ \t\]8\\(%rbp\\), %rdi" { target lp64 } } } */
+/* { dg-final { scan-assembler "leaq\[ \t\]16\\(%rbp\\), %rsi" { target lp64 } } } */
+/* { dg-final { scan-assembler "leaq\[ \t\]24\\(%rbp,%rdi,8\\), %rdx" { target lp64 } } } */
+/* { dg-final { scan-assembler "movl\[ \t\]8\\(%ebp\\), %edi" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]12\\(%rbp\\), %esi" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]4\\(%rsi,%rdi,4\\), %edx" { target x32 } } } */
+/* { dg-final { scan-assembler "movl\[ \t\]4\\(%ebp\\), %eax" { target ia32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]8\\(%ebp\\), %edx" { target ia32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]12\\(%ebp,%eax,4\\), %ecx" { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr66960-3.c b/gcc/testsuite/gcc.target/i386/pr66960-3.c
new file mode 100644
index 0000000..1698003
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66960-3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -mno-accumulate-outgoing-args" { target { lp64 } } } */
+/* { dg-options "-O2 -mno-accumulate-outgoing-args -maddress-mode=short" { target { x32 } } } */
+/* { dg-options "-O2 -mno-accumulate-outgoing-args -miamcu" { target { ia32 } } } */
+
+extern void abort (void);
+extern int check_int (int *i, int align);
+typedef int aligned __attribute__((aligned(64)));
+
+void *
+foo (void)
+{
+ aligned j;
+ if (check_int (&j, __alignof__(j)) != j)
+ abort ();
+ return __builtin_ia32_stack_top ();
+} /* { dg-message "sorry, unimplemented: .__builtin_ia32_stack_top. not supported" } */
diff --git a/gcc/testsuite/gcc.target/i386/pr66960-4.c b/gcc/testsuite/gcc.target/i386/pr66960-4.c
new file mode 100644
index 0000000..82a032c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66960-4.c
@@ -0,0 +1,21 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -maccumulate-outgoing-args" { target { lp64 } } } */
+/* { dg-options "-O2 -maccumulate-outgoing-args -maddress-mode=short" { target { x32 } } } */
+/* { dg-options "-O2 -maccumulate-outgoing-args -miamcu" { target { ia32 } } } */
+
+extern void abort (void);
+extern int check_int (int *i, int align);
+typedef int aligned __attribute__((aligned(64)));
+
+void *
+foo (void)
+{
+ aligned j;
+ if (check_int (&j, __alignof__(j)) != j)
+ abort ();
+ return __builtin_ia32_stack_top ();
+}
+
+/* { dg-final { scan-assembler "leaq\[ \t\]8\\(%rbp\\), %rax" { target lp64 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]8\\(%rbp\\), %eax" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]4\\(%ebp\\), %eax" { target ia32 } } } */
--
2.4.3