This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH/RFC] Improve inlining heuristics for 4.0


Hi!

This is a simple attempt to improve inlining heuristics for 4.0
while keeping in mind that for 4.1 this will be all done very
differently.

For removing C++ abstraction with inlining the following two
optimizations help a lot:

1. Both the C and the C++ frontend create a temporary for the
   return expression, i.e. we always have something like
      tmp = whatever
      return tmp;
   estimate_num_insns sees two modify exprs for this and as
   such for a simple int foo(void) { return 0; } we overestimate
   the size of the function by a factor of two.
   The fix is to simply ignore RETURN_EXPR in estimate_num_insns.

2. For code like
     inline int foo(void) { return 0; }
     int foo1(void) { return foo(); }
   we do not honour the removed cost of initializing a temporary
   for the result of the function call to foo.
   The fix is to subtract the cost of the MODIFY_EXPR (..., CALL_EXPR)
   in the caller if inlining the call.  We have a cost for this
   initialization in the inlined function anyway.

The second fix needs the fix for TARGET_EXPR handling in
estimate_num_insns.

So, for the numbers, a -O2 optimized tramp3d-v3 takes 1m58s before
and 49s after the patch.  This is nearly a 60% improvement in
runtime for such a simple patch.  Note that this brings us back
to 3.4 performance which is 52s.

Would this be appropriate for 4.0?

Thanks,
Richard.


2005-Feb-24  Richard Guenther  <rguenth@gcc.gnu.org>

	* tree-inline.h: Define INSNS_PER_CALL.
	* tree-inline.c (estimate_num_insns): Use INSNS_PER_CALL
	where appropriate.  Do not count RETURN_EXPR.  Use move
	cost for the first operand of TARGET_EXPR, not for
	TARGET_EXPR itself.
	* cgraphunit.c: Remove define for INSNS_PER_CALL.
	(cgraph_estimate_size_after_inlining): Account for removed
	cost of initializing the result object.


Index: tree-inline.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-inline.h,v
retrieving revision 1.14
diff -u -r1.14 tree-inline.h
--- tree-inline.h	8 Nov 2004 22:40:09 -0000	1.14
+++ tree-inline.h	24 Feb 2005 15:10:03 -0000
@@ -38,4 +38,9 @@

 extern int flag_inline_trees;

+/* Instructions per call.  Used in estimate_num_insns and in the
+   inliner to account for removed calls.  */
+
+#define INSNS_PER_CALL 10
+
 #endif /* GCC_TREE_INLINE_H */
Index: tree-inline.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-inline.c,v
retrieving revision 1.170
diff -u -r1.170 tree-inline.c
--- tree-inline.c	27 Jan 2005 14:36:17 -0000	1.170
+++ tree-inline.c	24 Feb 2005 15:10:04 -0000
@@ -1223,7 +1223,6 @@
     case TRY_FINALLY_EXPR:
     case LABEL_EXPR:
     case GOTO_EXPR:
-    case RETURN_EXPR:
     case EXIT_EXPR:
     case LOOP_EXPR:
     case PHI_NODE:
@@ -1240,6 +1239,10 @@
     case COMPLEX_CST:
     case VECTOR_CST:
     case STRING_CST:
+      /* FALLTHRU */
+
+    /* Return cost is accounted for by previous assignments to temporaries.  */
+    case RETURN_EXPR:
       *walk_subtrees = 0;
       return NULL;

@@ -1247,9 +1250,9 @@
        big arrays.  */
     case INIT_EXPR:
     case MODIFY_EXPR:
+    case TARGET_EXPR:
       x = TREE_OPERAND (x, 0);
       /* FALLTHRU */
-    case TARGET_EXPR:
     case CONSTRUCTOR:
       {
 	HOST_WIDE_INT size;
@@ -1257,7 +1260,7 @@
 	size = int_size_in_bytes (TREE_TYPE (x));

 	if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO)
-	  *count += 10;
+	  *count += INSNS_PER_CALL;
 	else
 	  *count += ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
       }
@@ -1363,7 +1366,7 @@
 	    default:
 	      break;
 	    }
-	*count += 10;
+	*count += INSNS_PER_CALL;
 	break;
       }
     default:
Index: cgraphunit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraphunit.c,v
retrieving revision 1.93
diff -u -r1.93 cgraphunit.c
--- cgraphunit.c	21 Feb 2005 14:39:46 -0000	1.93
+++ cgraphunit.c	24 Feb 2005 15:10:04 -0000
@@ -190,8 +190,6 @@
 #include "function.h"
 #include "tree-gimple.h"

-#define INSNS_PER_CALL 10
-
 static void cgraph_expand_all_functions (void);
 static void cgraph_mark_functions_to_output (void);
 static void cgraph_expand_function (struct cgraph_node *);
@@ -1031,7 +1029,17 @@
 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
 				     struct cgraph_node *what)
 {
-  return (what->global.insns - INSNS_PER_CALL) * times + to->global.insns;
+  int return_cost = 0;
+  tree result_decl = DECL_RESULT (what->decl);
+  if (! VOID_TYPE_P (TREE_TYPE (result_decl)))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (result_decl));
+      if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO)
+        return_cost = INSNS_PER_CALL;
+      else
+        return_cost = ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
+    }
+  return (what->global.insns - INSNS_PER_CALL - return_cost) * times + to->global.insns;
 }

 /* Estimate the growth caused by inlining NODE into all callees.  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]