This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Optimize out unnecessary __builtin_stack_{save,restore}s (PR middle-end/23848)


Hi!

This patch allows to optimize out unnecessary __builtin_stack_{save,restore}
which often means saving one call saved register (or stack slot) for each VLA
nesting for VLAs where they get out of scope at the end of the function (they
then work just like normal alloca) or when there are no function calls nor
inline asm between leaving their scope and function exit (or another
__builtin_stack_restore).  This is just a simplistic optimization, but
should always cover VLAs in the function outermost scope (which really are
supposed to work like alloca) and hit quite often in other cases as well.
At least for 4.3 I don't think we need to make this a whole new pass which
will try harder to optimize by walking possibly many BBs, checking if there
could be function calls or inline asm in between the stack restore and
return (resp. another stack restore) and if there aren't back edges.
Say the attached pr23848-1.c (foo2) before this patch on x86_64 -O2 is:
foo2:
        pushq   %rbp
        movslq  %edi,%rcx
        movl    %edi, %r8d
        addq    $30, %rcx
        leal    -1(%r8), %eax
        movq    %rsp, %rbp
        andq    $-16, %rcx
        movq    %rbx, -32(%rbp)
        movq    %r12, -24(%rbp)
        cltq
        movq    %r13, -16(%rbp)
        movq    %r14, -8(%rbp)
        subq    $32, %rsp
        movq    %rsp, %r14
        subq    %rcx, %rsp
        movq    %rsp, %r13
        leaq    15(%rsp), %rdi
        subq    %rcx, %rsp
        movq    %rsp, %r12
        leaq    15(%rsp), %rsi
        subq    %rcx, %rsp
        movq    %rsp, %rbx
        leaq    15(%rsp), %rdx
        subq    %rcx, %rsp
        leaq    15(%rsp), %rcx
        andq    $-16, %rdi
        andq    $-16, %rsi
        movb    $0, (%rdi,%rax)
        andq    $-16, %rdx
        andq    $-16, %rcx
        call    bar2
        movq    %rbx, %rsp
        movq    %r12, %rsp
        movq    %r13, %rsp
        movq    %r14, %rsp
        movq    -32(%rbp), %rbx
        movq    -24(%rbp), %r12
        movq    -16(%rbp), %r13
        movq    -8(%rbp), %r14
        leave
        ret
while with the patch just:
foo2:
        movslq  %edi,%rcx
        pushq   %rbp
        movl    %edi, %r8d
        addq    $30, %rcx
        leal    -1(%r8), %eax
        andq    $-16, %rcx
        movq    %rsp, %rbp
        subq    %rcx, %rsp
        cltq
        leaq    15(%rsp), %rdi
        subq    %rcx, %rsp
        leaq    15(%rsp), %rsi
        subq    %rcx, %rsp
        leaq    15(%rsp), %rdx
        subq    %rcx, %rsp
        andq    $-16, %rdi
        leaq    15(%rsp), %rcx
        movb    $0, (%rdi,%rax)
        andq    $-16, %rsi
        andq    $-16, %rdx
        andq    $-16, %rcx
        call    bar2
        leave
        ret

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2007-11-05  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/23848
	* tree-ssa-ccp.c (optimize_stack_restore): New function.
	(execute_fold_all_builtins): Call optimize_stack_restore for
	BUILT_IN_STACK_RESTORE.

	* gcc.dg/tree-ssa/pr23848-1.c: New test.
	* gcc.dg/tree-ssa/pr23848-2.c: New test.
	* gcc.dg/tree-ssa/pr23848-3.c: New test.
	* gcc.dg/tree-ssa/pr23848-4.c: New test.

--- gcc/tree-ssa-ccp.c.jj	2007-09-04 23:09:30.000000000 +0200
+++ gcc/tree-ssa-ccp.c	2007-11-04 22:51:10.000000000 +0100
@@ -2598,6 +2598,76 @@ fold_stmt_inplace (tree stmt)
   return changed;
 }
 
+/* Try to optimize out __builtin_stack_restore.  Optimize it out
+   if there is another __builtin_stack_restore in the same basic
+   block and no calls or ASM_EXPRs are in between, or if this block's
+   only outgoing edge is to EXIT_BLOCK and there are no calls or
+   ASM_EXPRs after this __builtin_stack_restore.  */
+
+static tree
+optimize_stack_restore (basic_block bb, tree call, block_stmt_iterator i)
+{
+  tree stack_save, stmt, callee;
+
+  if (TREE_CODE (call) != CALL_EXPR
+      || call_expr_nargs (call) != 1
+      || TREE_CODE (CALL_EXPR_ARG (call, 0)) != SSA_NAME
+      || !POINTER_TYPE_P (TREE_TYPE (CALL_EXPR_ARG (call, 0))))
+    return NULL_TREE;
+
+  for (bsi_next (&i); !bsi_end_p (i); bsi_next (&i))
+    {
+      tree call;
+
+      stmt = bsi_stmt (i);
+      if (TREE_CODE (stmt) == ASM_EXPR)
+	return NULL_TREE;
+      call = get_call_expr_in (stmt);
+      if (call == NULL)
+	continue;
+
+      callee = get_callee_fndecl (call);
+      if (!callee || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL)
+	return NULL_TREE;
+
+      if (DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE)
+	break;
+    }
+
+  if (bsi_end_p (i)
+      && (! single_succ_p (bb)
+	  || single_succ_edge (bb)->dest != EXIT_BLOCK_PTR))
+    return NULL_TREE;
+
+  stack_save = SSA_NAME_DEF_STMT (CALL_EXPR_ARG (call, 0));
+  if (TREE_CODE (stack_save) != GIMPLE_MODIFY_STMT
+      || GIMPLE_STMT_OPERAND (stack_save, 0) != CALL_EXPR_ARG (call, 0)
+      || TREE_CODE (GIMPLE_STMT_OPERAND (stack_save, 1)) != CALL_EXPR
+      || tree_could_throw_p (stack_save)
+      || !has_single_use (CALL_EXPR_ARG (call, 0)))
+    return NULL_TREE;
+
+  callee = get_callee_fndecl (GIMPLE_STMT_OPERAND (stack_save, 1));
+  if (!callee
+      || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL
+      || DECL_FUNCTION_CODE (callee) != BUILT_IN_STACK_SAVE
+      || call_expr_nargs (GIMPLE_STMT_OPERAND (stack_save, 1)) != 0)
+    return NULL_TREE;
+
+  stmt = stack_save;
+  push_stmt_changes (&stmt);
+  if (!set_rhs (&stmt,
+		build_int_cst (TREE_TYPE (CALL_EXPR_ARG (call, 0)), 0)))
+    {
+      discard_stmt_changes (&stmt);
+      return NULL_TREE;
+    }
+  gcc_assert (stmt == stack_save);
+  pop_stmt_changes (&stmt);
+
+  return integer_zero_node;
+}
+
 /* Convert EXPR into a GIMPLE value suitable for substitution on the
    RHS of an assignment.  Insert the necessary statements before
    iterator *SI_P. 
@@ -2682,6 +2752,12 @@ execute_fold_all_builtins (void)
 		result = integer_zero_node;
 		break;
 
+	      case BUILT_IN_STACK_RESTORE:
+		result = optimize_stack_restore (bb, *stmtp, i);
+		if (result)
+		  break;
+		/* FALLTHRU */
+
 	      default:
 		bsi_next (&i);
 		continue;
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-1.c.jj	2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-1.c	2007-11-04 23:03:30.000000000 +0100
@@ -0,0 +1,32 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar1 (char *, int);
+void foo1 (int size)
+{
+  char temp[size];
+  temp[size-1] = '\0';
+  bar1 (temp, size);
+}
+
+void bar2 (char *, char *, char *, char *, int);
+void foo2 (int size)
+{
+  char temp[size];
+  temp[size-1] = '\0';
+  {
+    char temp2[size];
+    {
+      char temp3[size];
+      {
+	char temp4[size];
+	bar2 (temp, temp2, temp3, temp4, size);
+      }
+    }
+  }
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_stack_save" "optimized"} } */
+/* { dg-final { scan-tree-dump-not "__builtin_stack_restore" "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-2.c.jj	2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-2.c	2007-11-04 23:04:06.000000000 +0100
@@ -0,0 +1,25 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar (char *, char *, char *, char *, int);
+void foo (int size)
+{
+  char temp[size];
+  temp[size-1] = '\0';
+  {
+    char temp2[size];
+    {
+      char temp3[size];
+      {
+	char temp4[size];
+	bar (temp, temp2, temp3, temp4, size);
+      }
+    }
+    bar (temp, temp2, (char *) 0, (char *) 0, size);
+  }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-3.c.jj	2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-3.c	2007-11-04 23:03:51.000000000 +0100
@@ -0,0 +1,28 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar (int, char *, char *, char *, char *, int);
+void foo (int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      char temp[size];
+      temp[size-1] = '\0';
+      {
+	char temp2[size];
+	{
+	  char temp3[size];
+	  {
+	    char temp4[size];
+	    bar (i, temp, temp2, temp3, temp4, size);
+	  }
+	}
+      }
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-4.c.jj	2007-11-04 22:53:03.000000000 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-4.c	2007-11-04 23:04:48.000000000 +0100
@@ -0,0 +1,25 @@
+/* PR middle-end/23848 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar (char *, char *, char *, char *, int);
+void foo (int size)
+{
+  char temp[size];
+  temp[size-1] = '\0';
+  {
+    char temp2[size];
+    {
+      char temp3[size];
+      {
+	char temp4[size];
+	bar (temp, temp2, temp3, temp4, size);
+      }
+    }
+    __asm __volatile ("" : : "r" (&temp[0]), "r" (&temp2[0]) : "memory");
+  }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]