[tree-profiling] inlining refinements

Jan Hubicka jh@suse.cz
Sat Jan 15 21:02:00 GMT 2005


Hi,
this patch kills some of hacks introduced by cfg inliner (primarily the
unnecesary current_basic_block level in cgraph_node) and adds heuristics
sensitive to loop depth.  This change is SPEC neutral, but it helps to
pooma3d testcase.  It is not that far as leafify trick, but it makes
tree-profiling outperform mainline where it originally failed.
The reason for SPEC neutrality is the fact that we almost never hit the
inline unit growth and the patch is not affecting the other limits (it
just changes the order of inlining candidates).
I wonder if we can get closer to reducing inlining unit growth without
too much slowdown and/or we can get some better scores by allowing
larger functions inlined within loop nest.

Bootstrapped/regtested i686-pc-gnu-linux.
Honza

2005-01-15  Jan Hubicka  <jh@suse.cz>
	* cgrpah.c (cgrpah_edge): Take bb argument instead of
	current_basic_block.
	(dump_cgraph_node): Dump loop nest.
	(cgraph_clone_edge): Maintain loop nest.
	(cgraph_clone_node): Likewise.
	* cgraph.h (cgraph_node): Remove current_basic_block field; add
	loop_nest.
	(cgraph_create_edge, cgraph_clone_edge, cgraph_clone_node): Modify
	prototype.
	* cgraphunit.c: Include cfgloop.h
	(current_basic_block): New static.
	(record_call): Update.
	(cgraph_analyze_function): Build loop tree.
	* ipa-inline.c (cgraph_clone_inlined_nodes): Maintain loop depth.
	(cgraph_edge_baddness): Consider nest.
	(cgraph_decide_recursive_inlining): Update call of cgraph_clone_node.
	* tree-inline.c (copy_body_r): Update call of cgraph_clone_edge.
	(expand_call_inline): Update call of cgraph_create_edge.
	* tree-optimize.c (tree_rest_of_compilation): Update call of edge clonning.
Index: cgraph.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraph.c,v
retrieving revision 1.4.4.18.2.23
diff -c -3 -p -r1.4.4.18.2.23 cgraph.c
*** cgraph.c	9 Jan 2005 14:16:51 -0000	1.4.4.18.2.23
--- cgraph.c	13 Jan 2005 00:10:51 -0000
*************** cgraph_edge (struct cgraph_node *node, t
*** 279,285 ****
  
  struct cgraph_edge *
  cgraph_create_edge (struct cgraph_node *caller, struct cgraph_node *callee,
! 		    tree call_expr)
  {
    struct cgraph_edge *edge = ggc_alloc (sizeof (struct cgraph_edge));
  #ifdef ENABLE_CHECKING
--- 279,285 ----
  
  struct cgraph_edge *
  cgraph_create_edge (struct cgraph_node *caller, struct cgraph_node *callee,
! 		    tree call_expr, gcov_type count, int nest)
  {
    struct cgraph_edge *edge = ggc_alloc (sizeof (struct cgraph_edge));
  #ifdef ENABLE_CHECKING
*************** cgraph_create_edge (struct cgraph_node *
*** 310,317 ****
    edge->next_callee = caller->callees;
    caller->callees = edge;
    callee->callers = edge;
!   edge->count = caller->current_basic_block
!     ? caller->current_basic_block->count : 0;
    return edge;
  }
  
--- 310,317 ----
    edge->next_callee = caller->callees;
    caller->callees = edge;
    callee->callers = edge;
!   edge->count = count;
!   edge->loop_nest = nest;
    return edge;
  }
  
*************** dump_cgraph_node (FILE *f, struct cgraph
*** 590,595 ****
--- 590,597 ----
        if (edge->count)
  	fprintf (f, "("HOST_WIDEST_INT_PRINT_DEC"x) ",
  		 (HOST_WIDEST_INT)edge->count);
+       if (edge->loop_nest)
+ 	fprintf (f, "(nested in %i loops) ", edge->loop_nest);
      }
  
    fprintf (f, "\n  calls: ");
*************** dump_cgraph_node (FILE *f, struct cgraph
*** 602,607 ****
--- 604,611 ----
        if (edge->count)
  	fprintf (f, "("HOST_WIDEST_INT_PRINT_DEC"x) ",
  		 (HOST_WIDEST_INT)edge->count);
+       if (edge->loop_nest)
+ 	fprintf (f, "(nested in %i loops) ", edge->loop_nest);
      }
    fprintf (f, "\n  cycle: ");
    n = node->next_cycle;
*************** cgraph_function_possibly_inlined_p (tree
*** 850,861 ****
  /* Create clone of E in the node N represented by CALL_EXPR the callgraph.  */
  struct cgraph_edge *
  cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n,
! 		   tree call_expr, int count_scale)
  {
!   struct cgraph_edge *new = cgraph_create_edge (n, e->callee, call_expr);
  
    new->inline_failed = e->inline_failed;
-   new->count = e->count * count_scale / REG_BR_PROB_BASE;
    e->count -= new->count;
    return new;
  }
--- 854,868 ----
  /* Create clone of E in the node N represented by CALL_EXPR the callgraph.  */
  struct cgraph_edge *
  cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n,
! 		   tree call_expr, int count_scale, int loop_nest)
  {
!   struct cgraph_edge *new;
! 
!   new = cgraph_create_edge (n, e->callee, call_expr,
!                             e->count * count_scale / REG_BR_PROB_BASE,
! 		            e->loop_nest + loop_nest);
  
    new->inline_failed = e->inline_failed;
    e->count -= new->count;
    return new;
  }
*************** cgraph_clone_edge (struct cgraph_edge *e
*** 863,869 ****
  /* Create node representing clone of N executed COUNT times.  Decrease
     the execution counts from original node too.  */
  struct cgraph_node *
! cgraph_clone_node (struct cgraph_node *n, gcov_type count)
  {
    struct cgraph_node *new = cgraph_create_node ();
    struct cgraph_edge *e;
--- 870,876 ----
  /* Create node representing clone of N executed COUNT times.  Decrease
     the execution counts from original node too.  */
  struct cgraph_node *
! cgraph_clone_node (struct cgraph_node *n, gcov_type count, int loop_nest)
  {
    struct cgraph_node *new = cgraph_create_node ();
    struct cgraph_edge *e;
*************** cgraph_clone_node (struct cgraph_node *n
*** 890,896 ****
    n->count -= count;
  
    for (e = n->callees;e; e=e->next_callee)
!     cgraph_clone_edge (e, new, e->call_expr, count_scale);
  
    new->next_clone = n->next_clone;
    n->next_clone = new;
--- 897,903 ----
    n->count -= count;
  
    for (e = n->callees;e; e=e->next_callee)
!     cgraph_clone_edge (e, new, e->call_expr, count_scale, loop_nest);
  
    new->next_clone = n->next_clone;
    n->next_clone = new;
Index: cgraph.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraph.h,v
retrieving revision 1.1.4.16.2.22
diff -c -3 -p -r1.1.4.16.2.22 cgraph.h
*** cgraph.h	9 Jan 2005 14:16:51 -0000	1.1.4.16.2.22
--- cgraph.h	13 Jan 2005 00:10:51 -0000
*************** struct cgraph_node GTY((chain_next ("%h.
*** 159,166 ****
    bool analyzed;
    /* Set when function is scheduled to be assembled.  */
    bool output;
-   /* Used only while constructing the callgraph.  */
-   basic_block current_basic_block;
  };
  
  struct cgraph_edge GTY((chain_next ("%h.next_caller")))
--- 159,164 ----
*************** struct cgraph_edge GTY((chain_next ("%h.
*** 176,181 ****
--- 174,181 ----
    const char *inline_failed;
    /* Expected number of executions: calculated in profile.c.  */
    gcov_type count;
+   /* Depth of loop nest, 1 means no loop nest.  */
+   int loop_nest;
  };
  
  /* The cgraph_varpool data structure.
*************** void cgraph_remove_edge (struct cgraph_e
*** 229,235 ****
  void cgraph_remove_node (struct cgraph_node *);
  struct cgraph_edge *cgraph_create_edge (struct cgraph_node *,
  					struct cgraph_node *,
! 				        tree);
  struct cgraph_node *cgraph_node (tree);
  struct cgraph_node *cgraph_node_for_asm (tree asmname);
  struct cgraph_edge *cgraph_edge (struct cgraph_node *, tree);
--- 229,235 ----
  void cgraph_remove_node (struct cgraph_node *);
  struct cgraph_edge *cgraph_create_edge (struct cgraph_node *,
  					struct cgraph_node *,
! 				        tree, gcov_type, int);
  struct cgraph_node *cgraph_node (tree);
  struct cgraph_node *cgraph_node_for_asm (tree asmname);
  struct cgraph_edge *cgraph_edge (struct cgraph_node *, tree);
*************** struct cgraph_local_info *cgraph_local_i
*** 238,245 ****
  struct cgraph_global_info *cgraph_global_info (tree);
  struct cgraph_rtl_info *cgraph_rtl_info (tree);
  const char * cgraph_node_name (struct cgraph_node *);
! struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *, struct cgraph_node *, tree, int);
! struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type);
  
  struct cgraph_varpool_node *cgraph_varpool_node (tree);
  struct cgraph_varpool_node *cgraph_varpool_node_for_asm (tree asmname);
--- 238,245 ----
  struct cgraph_global_info *cgraph_global_info (tree);
  struct cgraph_rtl_info *cgraph_rtl_info (tree);
  const char * cgraph_node_name (struct cgraph_node *);
! struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *, struct cgraph_node *, tree, int, int);
! struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type, int);
  
  struct cgraph_varpool_node *cgraph_varpool_node (tree);
  struct cgraph_varpool_node *cgraph_varpool_node_for_asm (tree asmname);
Index: cgraphunit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraphunit.c,v
retrieving revision 1.1.4.35.2.38
diff -c -3 -p -r1.1.4.35.2.38 cgraphunit.c
*** cgraphunit.c	15 Dec 2004 22:17:50 -0000	1.1.4.35.2.38
--- cgraphunit.c	13 Jan 2005 00:10:51 -0000
*************** Software Foundation, 59 Temple Place - S
*** 167,172 ****
--- 167,173 ----
  #include "tree-gimple.h"
  #include "output.h"
  #include "tree-pass.h"
+ #include "cfgloop.h"
  
  static void cgraph_expand_all_functions (void);
  static void cgraph_mark_functions_to_output (void);
*************** cgraph_lower_function (struct cgraph_nod
*** 408,413 ****
--- 409,417 ----
    node->lowered = true;
  }
  
+ /* Used only while constructing the callgraph.  */
+ static basic_block current_basic_block;
+ 
  /* Walk tree and record all calls.  Called via walk_tree.  */
  static tree
  record_call_1 (tree *tp, int *walk_subtrees, void *data)
*************** record_call_1 (tree *tp, int *walk_subtr
*** 445,451 ****
  	tree decl = get_callee_fndecl (*tp);
  	if (decl && TREE_CODE (decl) == FUNCTION_DECL)
  	  {
! 	    cgraph_create_edge (data, cgraph_node (decl), *tp);
  
  	    /* When we see a function call, we don't want to look at the
  	       function reference in the ADDR_EXPR that is hanging from
--- 449,457 ----
  	tree decl = get_callee_fndecl (*tp);
  	if (decl && TREE_CODE (decl) == FUNCTION_DECL)
  	  {
! 	    cgraph_create_edge (data, cgraph_node (decl), *tp,
! 			        current_basic_block->count,
! 				current_basic_block->loop_depth);
  
  	    /* When we see a function call, we don't want to look at the
  	       function reference in the ADDR_EXPR that is hanging from
*************** cgraph_create_edges (struct cgraph_node 
*** 501,506 ****
--- 507,513 ----
    /* The nodes we're interested in are never shared, so walk
       the tree ignoring duplicates.  */
    visited_nodes = pointer_set_create ();
+   current_basic_block = NULL;
  
    if (TREE_CODE (body) == FUNCTION_DECL)
      {
*************** cgraph_create_edges (struct cgraph_node 
*** 512,521 ****
  	 enclosing basic-blocks in the call edges.  */
        FOR_EACH_BB_FN (this_block, this_cfun)
  	{
! 	  node->current_basic_block = this_block;
  	  walk_tree (&this_block->stmt_list, record_call_1, node, visited_nodes);
  	}
!       node->current_basic_block = (basic_block)0;
  
        /* Walk over any private statics that may take addresses of functions.  */
        if (TREE_CODE (DECL_INITIAL (body)) == BLOCK)
--- 519,528 ----
  	 enclosing basic-blocks in the call edges.  */
        FOR_EACH_BB_FN (this_block, this_cfun)
  	{
! 	  current_basic_block = this_block;
  	  walk_tree (&this_block->stmt_list, record_call_1, node, visited_nodes);
  	}
!       current_basic_block = NULL;
  
        /* Walk over any private statics that may take addresses of functions.  */
        if (TREE_CODE (DECL_INITIAL (body)) == BLOCK)
*************** static void
*** 777,782 ****
--- 784,790 ----
  cgraph_analyze_function (struct cgraph_node *node)
  {
    tree decl = node->decl;
+   struct loops loops;
  
    timevar_push (TV_IPA_ANALYSIS);
    push_cfun (DECL_STRUCT_FUNCTION (decl));
*************** cgraph_analyze_function (struct cgraph_n
*** 788,795 ****
  
    node->count = ENTRY_BLOCK_PTR->count;
  
!   /* First kill forward declaration so reverse inlining works properly.  */
    cgraph_create_edges (node, decl);
  
    /* Only optimization we do in non-unit-at-a-time mode is inlining.  We don't
       use the passmanager then and instead call it directly.  Since we probably
--- 796,807 ----
  
    node->count = ENTRY_BLOCK_PTR->count;
  
!   if (optimize)
!     flow_loops_find (&loops, LOOP_TREE);
    cgraph_create_edges (node, decl);
+   if (optimize)
+     flow_loops_free (&loops);
+   free_dominance_info (CDI_DOMINATORS);
  
    /* Only optimization we do in non-unit-at-a-time mode is inlining.  We don't
       use the passmanager then and instead call it directly.  Since we probably
*************** cgraph_optimize (void)
*** 1170,1176 ****
      if (node->analyzed)
        ipa_analyze_function (node);
    for (vnode = cgraph_varpool_nodes_queue; vnode; vnode = vnode->next_needed)
!     ipa_analyze_variable (vnode);
  
    if (flag_ipa_cp && flag_ipa_no_cloning)
      ipcp_driver ();
--- 1182,1189 ----
      if (node->analyzed)
        ipa_analyze_function (node);
    for (vnode = cgraph_varpool_nodes_queue; vnode; vnode = vnode->next_needed)
!     if (!vnode->non_ipa)
!       ipa_analyze_variable (vnode);
  
    if (flag_ipa_cp && flag_ipa_no_cloning)
      ipcp_driver ();
*************** cgraph_optimize (void)
*** 1202,1208 ****
  #endif
    
    cgraph_mark_functions_to_output ();
-   
    cgraph_expand_all_functions ();
  
    cgraph_varpool_assemble_pending_decls ();
--- 1215,1220 ----
Index: ipa-inline.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/ipa-inline.c,v
retrieving revision 1.1.2.5
diff -c -3 -p -r1.1.2.5 ipa-inline.c
*** ipa-inline.c	8 Dec 2004 22:49:14 -0000	1.1.2.5
--- ipa-inline.c	13 Jan 2005 00:10:51 -0000
*************** cgraph_clone_inlined_nodes (struct cgrap
*** 126,132 ****
      }
     else if (duplicate)
      {
!       n = cgraph_clone_node (e->callee, e->count);
        cgraph_redirect_edge_callee (e, n);
      }
  
--- 126,132 ----
      }
     else if (duplicate)
      {
!       n = cgraph_clone_node (e->callee, e->count, e->loop_nest);
        cgraph_redirect_edge_callee (e, n);
      }
  
*************** cgraph_edge_badness (struct cgraph_edge 
*** 349,361 ****
      }
    else
    {
!     int growth = cgraph_estimate_growth (edge->callee);
  
      /* Make recursive inlining happen always after other inlining is done.  */
      if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
!       return growth + 1;
      else
!       return growth;
    }
  }
  
--- 349,364 ----
      }
    else
    {
!     int nest = MIN (edge->loop_nest, 8);
!     int badness = cgraph_estimate_growth (edge->callee) * 256;
! 		    
!     badness >>= nest;
  
      /* Make recursive inlining happen always after other inlining is done.  */
      if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
!       return badness + 1;
      else
!       return badness;
    }
  }
  
*************** cgraph_decide_recursive_inlining (struct
*** 472,478 ****
  	     cgraph_node_name (node));
  
    /* We need original clone to copy around.  */
!   master_clone = cgraph_clone_node (node, 0);
    master_clone->needed = true;
    for (e = master_clone->callees; e; e = e->next_callee)
      if (!e->inline_failed)
--- 476,482 ----
  	     cgraph_node_name (node));
  
    /* We need original clone to copy around.  */
!   master_clone = cgraph_clone_node (node, 0, 1);
    master_clone->needed = true;
    for (e = master_clone->callees; e; e = e->next_callee)
      if (!e->inline_failed)
Index: tree-inline.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-inline.c,v
retrieving revision 1.26.2.83.2.26
diff -c -3 -p -r1.26.2.83.2.26 tree-inline.c
*** tree-inline.c	9 Jan 2005 14:17:46 -0000	1.26.2.83.2.26
--- tree-inline.c	13 Jan 2005 00:10:52 -0000
*************** copy_body_r (tree *tp, int *walk_subtree
*** 720,726 ****
  	      edge = cgraph_edge (id->current_node, old_node);
  
  	      if (edge)
! 	        cgraph_clone_edge (edge, id->node, *tp, REG_BR_PROB_BASE);
  	    }
  	}
        else if (TREE_CODE (*tp) == RESX_EXPR)
--- 720,726 ----
  	      edge = cgraph_edge (id->current_node, old_node);
  
  	      if (edge)
! 	        cgraph_clone_edge (edge, id->node, *tp, REG_BR_PROB_BASE, 1);
  	    }
  	}
        else if (TREE_CODE (*tp) == RESX_EXPR)
*************** expand_call_inline (tree *tp, int *walk_
*** 2097,2103 ****
           constant propagating arguments.  In all other cases we hit a bug
           (incorrect node sharing is most common reason for missing edges.  */
        gcc_assert (dest->needed || !flag_unit_at_a_time);
!       cgraph_create_edge (id->node, dest, t)->inline_failed
  	= N_("originally indirect function call not considered for inlining");
        goto egress;
      }
--- 2097,2104 ----
           constant propagating arguments.  In all other cases we hit a bug
           (incorrect node sharing is most common reason for missing edges.  */
        gcc_assert (dest->needed || !flag_unit_at_a_time);
!       cgraph_create_edge (id->node, dest, t, id->oic_basic_block->count,
! 		          id->oic_basic_block->loop_depth)->inline_failed
  	= N_("originally indirect function call not considered for inlining");
        goto egress;
      }
Index: tree-optimize.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
retrieving revision 1.1.4.122.2.31
diff -c -3 -p -r1.1.4.122.2.31 tree-optimize.c
*** tree-optimize.c	9 Jan 2005 14:17:49 -0000	1.1.4.122.2.31
--- tree-optimize.c	13 Jan 2005 00:10:52 -0000
*************** tree_rest_of_compilation (tree fndecl)
*** 796,802 ****
  	  struct cgraph_edge *e;
  
  	  node = cgraph_node (current_function_decl);
! 	  saved_node = cgraph_clone_node (node, node->count);
  	  for (e = saved_node->callees; e; e = e->next_callee)
  	    if (!e->inline_failed)
  	      cgraph_clone_inlined_nodes (e, true);
--- 796,802 ----
  	  struct cgraph_edge *e;
  
  	  node = cgraph_node (current_function_decl);
! 	  saved_node = cgraph_clone_node (node, node->count, 1);
  	  for (e = saved_node->callees; e; e = e->next_callee)
  	    if (!e->inline_failed)
  	      cgraph_clone_inlined_nodes (e, true);



More information about the Gcc-patches mailing list