This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PR rtl-optimization/28071 (quadratic bottlenecks in inliner)


Hi,
this program plugs two quadratic bottlenecks in inliner cutting it down from
40% of compilation time to 9% (the remaining bottleneck is split_block) at -O1
compile time of the testcase.
One problem is cgraph_edge walking all calls that is solved by adding an
hashtable for functions with large number of edges.  Other problem is quadratic
checking of maximal function growth parameter in the inline heuristics.  This
is remainder of the old implementation that always inlined all calls of the
function A to function B, while now we thread each edge independently.
cgraph_check_inline_limits however each time walks all edges to figure out how
many calls are and allows inlining only when inlining all callers is allowed.

While without profiling our algorithm is still not smart enought to
differentiate different call sites, the new behaviour is probably not worse
than the former and definitly with profiling we can inline the hot edges
leaving the cold uninlinined.

Bootstrapped/regtested i686-linux.  Will commit it shortly after I figure out
why SVN is preventing me from doing any commits at all.

Honza

	PR rtl-optimization/28071
	* tree-optimize.c (tree_rest_of_compilation): Do not remove edges
	twice.
	* tree-inline.c (copy_bb): Use cgraph_set_call_stmt.
	* ipa-inline.c (cgraph_check_inline_limits): Add one_only argument.
	(cgraph_decide_inlining, cgraph_decide_inlining_of_small_function,
	cgraph_decide_inlining_incrementally): Update use of
	cgraph_check_inline_limits.
	* cgraph.c (edge_hash, edge_eq): New function.
	(cgraph_edge, cgraph_set_call_stmt, cgraph_create_edge,
	cgraph_edge_remove_caller, cgraph_node_remove_callees,
	cgraph_remove_node): Maintain call site hash.
	* cgraph.h (struct cgraph_node): Add call_site_hash.
	(cgraph_set_call_stmt): New function.
Index: tree-optimize.c
===================================================================
--- tree-optimize.c	(revision 116257)
+++ tree-optimize.c	(working copy)
@@ -393,15 +393,14 @@ tree_rest_of_compilation (tree fndecl)
 	  timevar_pop (TV_INTEGRATION);
 	}
     }
-  /* We are not going to maintain the cgraph edges up to date.
-     Kill it so it won't confuse us.  */
-  while (node->callees)
+  /* In non-unit-at-a-time we must mark all referenced functions as needed.
+     */
+  if (!flag_unit_at_a_time)
     {
-      /* In non-unit-at-a-time we must mark all referenced functions as needed.
-         */
-      if (node->callees->callee->analyzed && !flag_unit_at_a_time)
-        cgraph_mark_needed_node (node->callees->callee);
-      cgraph_remove_edge (node->callees);
+      struct cgraph_edge *e;
+      for (e = node->callees; e; e = e->next_callee)
+	if (e->callee->analyzed)
+          cgraph_mark_needed_node (e->callee);
     }
 
   /* We are not going to maintain the cgraph edges up to date.
Index: tree-inline.c
===================================================================
--- tree-inline.c	(revision 116257)
+++ tree-inline.c	(working copy)
@@ -737,14 +737,14 @@ copy_bb (copy_body_data *id, basic_block
 		    {
 		      edge = cgraph_edge (node, orig_stmt);
 		      gcc_assert (edge);
-		      edge->call_stmt = stmt;
+		      cgraph_set_call_stmt (edge, stmt);
 		    }
 		  /* FALLTHRU */
 
 		case CB_CGE_MOVE:
 		  edge = cgraph_edge (id->dst_node, orig_stmt);
 		  if (edge)
-		    edge->call_stmt = stmt;
+		    cgraph_set_call_stmt (edge, stmt);
 		  break;
 
 		default:
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 116257)
+++ ipa-inline.c	(working copy)
@@ -243,20 +243,27 @@ cgraph_estimate_growth (struct cgraph_no
 }
 
 /* Return false when inlining WHAT into TO is not good idea
-   as it would cause too large growth of function bodies.  */
+   as it would cause too large growth of function bodies.  
+   When ONE_ONLY is true, assume that only one call site is going
+   to be inlined, otherwise figure out how many call sites in
+   TO calls WHAT and verify that all can be inlined.
+   */
 
 static bool
 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
-			    const char **reason)
+			    const char **reason, bool one_only)
 {
   int times = 0;
   struct cgraph_edge *e;
   int newsize;
   int limit;
 
-  for (e = to->callees; e; e = e->next_callee)
-    if (e->callee == what)
-      times++;
+  if (one_only)
+    times = 1;
+  else
+    for (e = to->callees; e; e = e->next_callee)
+      if (e->callee == what)
+	times++;
 
   if (to->global.inlined_to)
     to = to->global.inlined_to;
@@ -836,7 +843,7 @@ cgraph_decide_inlining_of_small_function
 	{
 	  struct cgraph_node *callee;
 	  if (!cgraph_check_inline_limits (edge->caller, edge->callee,
-					   &edge->inline_failed))
+					   &edge->inline_failed, true))
 	    {
 	      if (dump_file)
 		fprintf (dump_file, " Not inlining into %s:%s.\n",
@@ -1027,7 +1044,7 @@ cgraph_decide_inlining (void)
 		  old_insns = overall_insns;
 
 		  if (cgraph_check_inline_limits (node->callers->caller, node,
-					  	  NULL))
+					  	  NULL, false))
 		    {
 		      cgraph_mark_inline (node->callers);
 		      if (dump_file)
@@ -1099,7 +1116,8 @@ cgraph_decide_inlining_incrementally (st
 	  && (!early
 	      || (cgraph_estimate_size_after_inlining (1, e->caller, e->callee)
 	          <= e->caller->global.insns))
-	  && cgraph_check_inline_limits (node, e->callee, &e->inline_failed)
+	  && cgraph_check_inline_limits (node, e->callee, &e->inline_failed,
+	    				 false)
 	  && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
 	{
 	  if (cgraph_default_inline_p (e->callee, &failed_reason))
Index: cgraph.c
===================================================================
--- cgraph.c	(revision 116257)
+++ cgraph.c	(working copy)
@@ -293,11 +293,32 @@ cgraph_node_for_asm (tree asmname)
   return NULL;
 }
 
+/* Returns a hash value for X (which really is a die_struct).  */
+
+static hashval_t
+edge_hash (const void *x)
+{
+  return htab_hash_pointer (((struct cgraph_edge *) x)->call_stmt);
+}
+
+/* Return nonzero if decl_id of die_struct X is the same as UID of decl *Y.  */
+
+static int
+edge_eq (const void *x, const void *y)
+{
+  return ((struct cgraph_edge *) x)->call_stmt == y;
+}
+
 /* Return callgraph edge representing CALL_EXPR statement.  */
 struct cgraph_edge *
 cgraph_edge (struct cgraph_node *node, tree call_stmt)
 {
-  struct cgraph_edge *e;
+  struct cgraph_edge *e, *e2;
+  int n = 0;
+
+  if (node->call_site_hash)
+    return htab_find_with_hash (node->call_site_hash, call_stmt,
+      				htab_hash_pointer (call_stmt));
 
   /* This loop may turn out to be performance problem.  In such case adding
      hashtables into call nodes with very many edges is probably best
@@ -305,11 +326,51 @@ cgraph_edge (struct cgraph_node *node, t
      because we want to make possible having multiple cgraph nodes representing
      different clones of the same body before the body is actually cloned.  */
   for (e = node->callees; e; e= e->next_callee)
-    if (e->call_stmt == call_stmt)
-      break;
+    {
+      if (e->call_stmt == call_stmt)
+	break;
+      n++;
+    }
+  if (n > 100)
+    {
+      node->call_site_hash = htab_create_ggc (120, edge_hash, edge_eq, NULL);
+      for (e2 = node->callees; e2; e2 = e2->next_callee)
+	{
+          void **slot;
+	  slot = htab_find_slot_with_hash (node->call_site_hash,
+					   e2->call_stmt,
+					   htab_hash_pointer (e2->call_stmt),
+					   INSERT);
+	  gcc_assert (!*slot);
+	  *slot = e2;
+	}
+    }
   return e;
 }
 
+/* Change call_smtt of edge E to NEW_STMT.  */
+void
+cgraph_set_call_stmt (struct cgraph_edge *e, tree new_stmt)
+{
+  if (e->caller->call_site_hash)
+    {
+      htab_remove_elt_with_hash (e->caller->call_site_hash,
+				 e->call_stmt,
+				 htab_hash_pointer (e->call_stmt));
+    }
+  e->call_stmt = new_stmt;
+  if (e->caller->call_site_hash)
+    {
+      void **slot;
+      slot = htab_find_slot_with_hash (e->caller->call_site_hash,
+				       e->call_stmt,
+				       htab_hash_pointer
+				       (e->call_stmt), INSERT);
+      gcc_assert (!*slot);
+      *slot = e;
+    }
+}
+
 /* Create edge from CALLER to CALLEE in the cgraph.  */
 
 struct cgraph_edge *
@@ -353,6 +414,17 @@ cgraph_create_edge (struct cgraph_node *
   callee->callers = edge;
   edge->count = count;
   edge->loop_nest = nest;
+  if (caller->call_site_hash)
+    {
+      void **slot;
+      slot = htab_find_slot_with_hash (caller->call_site_hash,
+				       edge->call_stmt,
+				       htab_hash_pointer
+					 (edge->call_stmt),
+				       INSERT);
+      gcc_assert (!*slot);
+      *slot = edge;
+    }
   return edge;
 }
 
@@ -380,6 +452,10 @@ cgraph_edge_remove_caller (struct cgraph
     e->next_callee->prev_callee = e->prev_callee;
   if (!e->prev_callee)
     e->caller->callees = e->next_callee;
+  if (e->caller->call_site_hash)
+    htab_remove_elt_with_hash (e->caller->call_site_hash,
+			       e->call_stmt,
+	  		       htab_hash_pointer (e->call_stmt));
 }
 
 /* Remove the edge E in the cgraph.  */
@@ -425,6 +501,11 @@ cgraph_node_remove_callees (struct cgrap
   for (e = node->callees; e; e = e->next_callee)
     cgraph_edge_remove_callee (e);
   node->callees = NULL;
+  if (node->call_site_hash)
+    {
+      htab_delete (node->call_site_hash);
+      node->call_site_hash = NULL;
+    }
 }
 
 /* Remove all callers from the node.  */
@@ -521,6 +602,11 @@ cgraph_remove_node (struct cgraph_node *
       DECL_INITIAL (node->decl) = error_mark_node;
     }
   node->decl = NULL;
+  if (node->call_site_hash)
+    {
+      htab_delete (node->call_site_hash);
+      node->call_site_hash = NULL;
+    }
   cgraph_n_nodes--;
   /* Do not free the structure itself so the walk over chain can continue.  */
 }
Index: cgraph.h
===================================================================
--- cgraph.h	(revision 116257)
+++ cgraph.h	(working copy)
@@ -133,6 +133,9 @@ struct cgraph_node GTY((chain_next ("%h.
   /* Pointer to a single unique cgraph node for this function.  If the
      function is to be output, this is the copy that will survive.  */
   struct cgraph_node *master_clone;
+  /* For functions with many calls sites it holds map from call expression
+     to the edge to speed up cgraph_edge function.  */
+  htab_t GTY((param_is (struct cgraph_edge))) call_site_hash;
 
   PTR GTY ((skip)) aux;
 
@@ -266,6 +269,7 @@ struct cgraph_edge *cgraph_create_edge (
 struct cgraph_node *cgraph_node (tree);
 struct cgraph_node *cgraph_node_for_asm (tree asmname);
 struct cgraph_edge *cgraph_edge (struct cgraph_node *, tree);
+void cgraph_set_call_stmt (struct cgraph_edge *, tree);
 struct cgraph_local_info *cgraph_local_info (tree);
 struct cgraph_global_info *cgraph_global_info (tree);
 struct cgraph_rtl_info *cgraph_rtl_info (tree);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]