This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Optimize out unnecessary __builtin_stack_{save,restore}s (PR middle-end/23848)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sun, 4 Nov 2007 19:12:00 -0500
- Subject: [PATCH] Optimize out unnecessary __builtin_stack_{save,restore}s (PR middle-end/23848)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
This patch allows to optimize out unnecessary __builtin_stack_{save,restore}
which often means saving one call saved register (or stack slot) for each VLA
nesting for VLAs where they get out of scope at the end of the function (they
then work just like normal alloca) or when there are no function calls nor
inline asm between leaving their scope and function exit (or another
__builtin_stack_restore). This is just a simplistic optimization, but
should always cover VLAs in the function outermost scope (which really are
supposed to work like alloca) and hit quite often in other cases as well.
At least for 4.3 I don't think we need to make this a whole new pass which
will try harder to optimize by walking possibly many BBs, checking if there
could be function calls or inline asm in between the stack restore and
return (resp. another stack restore) and if there aren't back edges.
Say the attached pr23848-1.c (foo2) before this patch on x86_64 -O2 is:
foo2:
pushq %rbp
movslq %edi,%rcx
movl %edi, %r8d
addq $30, %rcx
leal -1(%r8), %eax
movq %rsp, %rbp
andq $-16, %rcx
movq %rbx, -32(%rbp)
movq %r12, -24(%rbp)
cltq
movq %r13, -16(%rbp)
movq %r14, -8(%rbp)
subq $32, %rsp
movq %rsp, %r14
subq %rcx, %rsp
movq %rsp, %r13
leaq 15(%rsp), %rdi
subq %rcx, %rsp
movq %rsp, %r12
leaq 15(%rsp), %rsi
subq %rcx, %rsp
movq %rsp, %rbx
leaq 15(%rsp), %rdx
subq %rcx, %rsp
leaq 15(%rsp), %rcx
andq $-16, %rdi
andq $-16, %rsi
movb $0, (%rdi,%rax)
andq $-16, %rdx
andq $-16, %rcx
call bar2
movq %rbx, %rsp
movq %r12, %rsp
movq %r13, %rsp
movq %r14, %rsp
movq -32(%rbp), %rbx
movq -24(%rbp), %r12
movq -16(%rbp), %r13
movq -8(%rbp), %r14
leave
ret
while with the patch just:
foo2:
movslq %edi,%rcx
pushq %rbp
movl %edi, %r8d
addq $30, %rcx
leal -1(%r8), %eax
andq $-16, %rcx
movq %rsp, %rbp
subq %rcx, %rsp
cltq
leaq 15(%rsp), %rdi
subq %rcx, %rsp
leaq 15(%rsp), %rsi
subq %rcx, %rsp
leaq 15(%rsp), %rdx
subq %rcx, %rsp
andq $-16, %rdi
leaq 15(%rsp), %rcx
movb $0, (%rdi,%rax)
andq $-16, %rsi
andq $-16, %rdx
andq $-16, %rcx
call bar2
leave
ret
Bootstrapped/regtested on x86_64-linux, ok for trunk?
2007-11-05 Jakub Jelinek <jakub@redhat.com>
PR middle-end/23848
* tree-ssa-ccp.c (optimize_stack_restore): New function.
(execute_fold_all_builtins): Call optimize_stack_restore for
BUILT_IN_STACK_RESTORE.
* gcc.dg/tree-ssa/pr23848-1.c: New test.
* gcc.dg/tree-ssa/pr23848-2.c: New test.
* gcc.dg/tree-ssa/pr23848-3.c: New test.
* gcc.dg/tree-ssa/pr23848-4.c: New test.
--- gcc/tree-ssa-ccp.c.jj 2007-09-04 23:09:30.000000000 +0200
+++ gcc/tree-ssa-ccp.c 2007-11-04 22:51:10.000000000 +0100
@@ -2598,6 +2598,76 @@ fold_stmt_inplace (tree stmt)
return changed;
}
+/* Try to optimize out __builtin_stack_restore. Optimize it out
+ if there is another __builtin_stack_restore in the same basic
+ block and no calls or ASM_EXPRs are in between, or if this block's
+ only outgoing edge is to EXIT_BLOCK and there are no calls or
+ ASM_EXPRs after this __builtin_stack_restore. */
+
+static tree
+optimize_stack_restore (basic_block bb, tree call, block_stmt_iterator i)
+{
+ tree stack_save, stmt, callee;
+
+ if (TREE_CODE (call) != CALL_EXPR
+ || call_expr_nargs (call) != 1
+ || TREE_CODE (CALL_EXPR_ARG (call, 0)) != SSA_NAME
+ || !POINTER_TYPE_P (TREE_TYPE (CALL_EXPR_ARG (call, 0))))
+ return NULL_TREE;
+
+ for (bsi_next (&i); !bsi_end_p (i); bsi_next (&i))
+ {
+ tree call;
+
+ stmt = bsi_stmt (i);
+ if (TREE_CODE (stmt) == ASM_EXPR)
+ return NULL_TREE;
+ call = get_call_expr_in (stmt);
+ if (call == NULL)
+ continue;
+
+ callee = get_callee_fndecl (call);
+ if (!callee || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL)
+ return NULL_TREE;
+
+ if (DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE)
+ break;
+ }
+
+ if (bsi_end_p (i)
+ && (! single_succ_p (bb)
+ || single_succ_edge (bb)->dest != EXIT_BLOCK_PTR))
+ return NULL_TREE;
+
+ stack_save = SSA_NAME_DEF_STMT (CALL_EXPR_ARG (call, 0));
+ if (TREE_CODE (stack_save) != GIMPLE_MODIFY_STMT
+ || GIMPLE_STMT_OPERAND (stack_save, 0) != CALL_EXPR_ARG (call, 0)
+ || TREE_CODE (GIMPLE_STMT_OPERAND (stack_save, 1)) != CALL_EXPR
+ || tree_could_throw_p (stack_save)
+ || !has_single_use (CALL_EXPR_ARG (call, 0)))
+ return NULL_TREE;
+
+ callee = get_callee_fndecl (GIMPLE_STMT_OPERAND (stack_save, 1));
+ if (!callee
+ || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL
+ || DECL_FUNCTION_CODE (callee) != BUILT_IN_STACK_SAVE
+ || call_expr_nargs (GIMPLE_STMT_OPERAND (stack_save, 1)) != 0)
+ return NULL_TREE;
+
+ stmt = stack_save;
+ push_stmt_changes (&stmt);
+ if (!set_rhs (&stmt,
+ build_int_cst (TREE_TYPE (CALL_EXPR_ARG (call, 0)), 0)))
+ {
+ discard_stmt_changes (&stmt);
+ return NULL_TREE;
+ }
+ gcc_assert (stmt == stack_save);
+ pop_stmt_changes (&stmt);
+
+ return integer_zero_node;
+}
+
/* Convert EXPR into a GIMPLE value suitable for substitution on the
RHS of an assignment. Insert the necessary statements before
iterator *SI_P.
@@ -2682,6 +2752,12 @@ execute_fold_all_builtins (void)
result = integer_zero_node;
break;
+ case BUILT_IN_STACK_RESTORE:
+ result = optimize_stack_restore (bb, *stmtp, i);
+ if (result)
+ break;
+ /* FALLTHRU */
+
default:
bsi_next (&i);
continue;
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-1.c.jj 2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-1.c 2007-11-04 23:03:30.000000000 +0100
@@ -0,0 +1,32 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar1 (char *, int);
+void foo1 (int size)
+{
+ char temp[size];
+ temp[size-1] = '\0';
+ bar1 (temp, size);
+}
+
+void bar2 (char *, char *, char *, char *, int);
+void foo2 (int size)
+{
+ char temp[size];
+ temp[size-1] = '\0';
+ {
+ char temp2[size];
+ {
+ char temp3[size];
+ {
+ char temp4[size];
+ bar2 (temp, temp2, temp3, temp4, size);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_stack_save" "optimized"} } */
+/* { dg-final { scan-tree-dump-not "__builtin_stack_restore" "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-2.c.jj 2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-2.c 2007-11-04 23:04:06.000000000 +0100
@@ -0,0 +1,25 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar (char *, char *, char *, char *, int);
+void foo (int size)
+{
+ char temp[size];
+ temp[size-1] = '\0';
+ {
+ char temp2[size];
+ {
+ char temp3[size];
+ {
+ char temp4[size];
+ bar (temp, temp2, temp3, temp4, size);
+ }
+ }
+ bar (temp, temp2, (char *) 0, (char *) 0, size);
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-3.c.jj 2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-3.c 2007-11-04 23:03:51.000000000 +0100
@@ -0,0 +1,28 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar (int, char *, char *, char *, char *, int);
+void foo (int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ char temp[size];
+ temp[size-1] = '\0';
+ {
+ char temp2[size];
+ {
+ char temp3[size];
+ {
+ char temp4[size];
+ bar (i, temp, temp2, temp3, temp4, size);
+ }
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-4.c.jj 2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-4.c 2007-11-04 23:04:48.000000000 +0100
@@ -0,0 +1,25 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar (char *, char *, char *, char *, int);
+void foo (int size)
+{
+ char temp[size];
+ temp[size-1] = '\0';
+ {
+ char temp2[size];
+ {
+ char temp3[size];
+ {
+ char temp4[size];
+ bar (temp, temp2, temp3, temp4, size);
+ }
+ }
+ __asm __volatile ("" : : "r" (&temp[0]), "r" (&temp2[0]) : "memory");
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Jakub