This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Recursive inlining fixes


Hi,
this patch updates recursive inlining logic to use profile  (I did that
once already but the patch apparently got lost, so sorry for comming
late with this).   Basically the functions are now inlined in frequency
order, profile is updated more or less correctly (see FIXME comment in
the patch) and there is new min-inline-recursive-probability to trottle
down inlining on unlikely calls.  To my surprise this parameter seems to
be resulting in best code when set to pretty low value, perhaps this
will change when we finally get the tail recursin interference right.

I've bootstrapped/regtested i686-pc-gnu-linux and will commit it tomorrow if
there will be no comments.  (I am also constructing sane testcase for
this)

Honza

2005-07-28  Jan Hubicka  <jh@suse.cz>
	* cgraph.c (cgraph_clone_edge): New UPDATE_ORIGINAL argument.
	(cgraph_clone_node): Likewise.
	* cgraph.h (cgraph_clone_edge): Update prototype.
	(cgraph_clone_node): Likewise.
	* ipa-inline.c (cgraph_clone_inlined_nodes): Update call of
	cgraph_clone_node.
	(lookup_recursive_calls): Consider profile.
	(cgraph_decide_recursive_inlining): Fix updating; use new
	probability argument; use profile.
	* params.def (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY): New.
	* tree-inline.c (copy_bb): Update clal of clone_edge.
	* tree-optimize.c (tree_rest_of_compilation): UPdate cal of clone_node.

	* invoke.texi (min-inline-recursive-probability): Document.
Index: cgraph.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraph.c,v
retrieving revision 1.81
diff -c -3 -p -r1.81 cgraph.c
*** cgraph.c	5 Jul 2005 16:19:43 -0000	1.81
--- cgraph.c	25 Jul 2005 21:24:58 -0000
*************** cgraph_function_possibly_inlined_p (tree
*** 884,890 ****
  /* Create clone of E in the node N represented by CALL_EXPR the callgraph.  */
  struct cgraph_edge *
  cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n,
! 		   tree call_stmt, int count_scale, int loop_nest)
  {
    struct cgraph_edge *new;
  
--- 884,891 ----
  /* Create clone of E in the node N represented by CALL_EXPR the callgraph.  */
  struct cgraph_edge *
  cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n,
! 		   tree call_stmt, int count_scale, int loop_nest,
! 		   bool update_original)
  {
    struct cgraph_edge *new;
  
*************** cgraph_clone_edge (struct cgraph_edge *e
*** 893,906 ****
  		            e->loop_nest + loop_nest);
  
    new->inline_failed = e->inline_failed;
!   e->count -= new->count;
    return new;
  }
  
  /* Create node representing clone of N executed COUNT times.  Decrease
!    the execution counts from original node too.  */
  struct cgraph_node *
! cgraph_clone_node (struct cgraph_node *n, gcov_type count, int loop_nest)
  {
    struct cgraph_node *new = cgraph_create_node ();
    struct cgraph_edge *e;
--- 894,913 ----
  		            e->loop_nest + loop_nest);
  
    new->inline_failed = e->inline_failed;
!   if (update_original)
!     e->count -= new->count;
    return new;
  }
  
  /* Create node representing clone of N executed COUNT times.  Decrease
!    the execution counts from original node too. 
! 
!    When UPDATE_ORIGINAL is true, the counts are subtracted from the original
!    function's profile to reflect the fact that part of execution is handled
!    by node.  */
  struct cgraph_node *
! cgraph_clone_node (struct cgraph_node *n, gcov_type count, int loop_nest,
! 		   bool update_original)
  {
    struct cgraph_node *new = cgraph_create_node ();
    struct cgraph_edge *e;
*************** cgraph_clone_node (struct cgraph_node *n
*** 923,932 ****
      count_scale = new->count * REG_BR_PROB_BASE / n->count;
    else
      count_scale = 0;
!   n->count -= count;
  
    for (e = n->callees;e; e=e->next_callee)
!     cgraph_clone_edge (e, new, e->call_stmt, count_scale, loop_nest);
  
    new->next_clone = n->next_clone;
    new->prev_clone = n;
--- 930,941 ----
      count_scale = new->count * REG_BR_PROB_BASE / n->count;
    else
      count_scale = 0;
!   if (update_original)
!     n->count -= count;
  
    for (e = n->callees;e; e=e->next_callee)
!     cgraph_clone_edge (e, new, e->call_stmt, count_scale, loop_nest,
! 		       update_original);
  
    new->next_clone = n->next_clone;
    new->prev_clone = n;
Index: cgraph.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraph.h,v
retrieving revision 1.61
diff -c -3 -p -r1.61 cgraph.h
*** cgraph.h	16 Jul 2005 18:56:51 -0000	1.61
--- cgraph.h	25 Jul 2005 21:24:58 -0000
*************** struct cgraph_local_info *cgraph_local_i
*** 240,247 ****
  struct cgraph_global_info *cgraph_global_info (tree);
  struct cgraph_rtl_info *cgraph_rtl_info (tree);
  const char * cgraph_node_name (struct cgraph_node *);
! struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *, struct cgraph_node *, tree, int, int);
! struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type, int);
  
  struct cgraph_varpool_node *cgraph_varpool_node (tree);
  struct cgraph_varpool_node *cgraph_varpool_node_for_asm (tree asmname);
--- 240,250 ----
  struct cgraph_global_info *cgraph_global_info (tree);
  struct cgraph_rtl_info *cgraph_rtl_info (tree);
  const char * cgraph_node_name (struct cgraph_node *);
! struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *,
! 				        struct cgraph_node *,
! 				        tree, int, int, bool);
! struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type,
! 					int, bool);
  
  struct cgraph_varpool_node *cgraph_varpool_node (tree);
  struct cgraph_varpool_node *cgraph_varpool_node_for_asm (tree asmname);
Index: ipa-inline.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/ipa-inline.c,v
retrieving revision 2.12
diff -c -3 -p -r2.12 ipa-inline.c
*** ipa-inline.c	15 Jul 2005 09:31:14 -0000	2.12
--- ipa-inline.c	25 Jul 2005 21:25:00 -0000
*************** cgraph_clone_inlined_nodes (struct cgrap
*** 131,137 ****
      }
     else if (duplicate)
      {
!       n = cgraph_clone_node (e->callee, e->count, e->loop_nest);
        cgraph_redirect_edge_callee (e, n);
      }
  
--- 131,137 ----
      }
     else if (duplicate)
      {
!       n = cgraph_clone_node (e->callee, e->count, e->loop_nest, true);
        cgraph_redirect_edge_callee (e, n);
      }
  
*************** lookup_recursive_calls (struct cgraph_no
*** 429,438 ****
    for (e = where->callees; e; e = e->next_callee)
      if (e->callee == node)
        {
! 	/* FIXME: Once counts and frequencies are available we should drive the
! 	   order by these.  For now force the order to be simple queue since
! 	   we get order dependent on recursion depth for free by this.  */
!         fibheap_insert (heap, priority++, e);
        }
    for (e = where->callees; e; e = e->next_callee)
      if (!e->inline_failed)
--- 429,441 ----
    for (e = where->callees; e; e = e->next_callee)
      if (e->callee == node)
        {
! 	/* When profile feedback is available, prioritize by expected number
! 	   of calls.  Without profile feedback we maintain simple queue
! 	   to order candidates via recursive depths.  */
!         fibheap_insert (heap,
! 			!max_count ? priority++
! 		        : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
! 		        e);
        }
    for (e = where->callees; e; e = e->next_callee)
      if (!e->inline_failed)
*************** cgraph_decide_recursive_inlining (struct
*** 506,511 ****
--- 509,515 ----
  {
    int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
    int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
+   int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
    fibheap_t heap;
    struct cgraph_edge *e;
    struct cgraph_node *master_clone;
*************** cgraph_decide_recursive_inlining (struct
*** 536,542 ****
  	     cgraph_node_name (node));
  
    /* We need original clone to copy around.  */
!   master_clone = cgraph_clone_node (node, 0, 1);
    master_clone->needed = true;
    for (e = master_clone->callees; e; e = e->next_callee)
      if (!e->inline_failed)
--- 540,546 ----
  	     cgraph_node_name (node));
  
    /* We need original clone to copy around.  */
!   master_clone = cgraph_clone_node (node, node->count, 1, false);
    master_clone->needed = true;
    for (e = master_clone->callees; e; e = e->next_callee)
      if (!e->inline_failed)
*************** cgraph_decide_recursive_inlining (struct
*** 544,570 ****
  
    /* Do the inlining and update list of recursive call during process.  */
    while (!fibheap_empty (heap)
! 	 && cgraph_estimate_size_after_inlining (1, node, master_clone) <= limit)
      {
        struct cgraph_edge *curr = fibheap_extract_min (heap);
!       struct cgraph_node *node;
  
!       depth = 0;
!       for (node = curr->caller;
! 	   node; node = node->global.inlined_to)
  	if (node->decl == curr->callee->decl)
  	  depth++;
        if (depth > max_depth)
! 	continue;
  
        if (dump_file)
! 	fprintf (dump_file, 
! 		 "   Inlining call of depth %i\n", depth);
        cgraph_redirect_edge_callee (curr, master_clone);
        cgraph_mark_inline_edge (curr);
        lookup_recursive_calls (node, curr->callee, heap);
        n++;
      }
  
    fibheap_delete (heap);
    if (dump_file)
--- 548,607 ----
  
    /* Do the inlining and update list of recursive call during process.  */
    while (!fibheap_empty (heap)
! 	 && (cgraph_estimate_size_after_inlining (1, node, master_clone)
! 	     <= limit))
      {
        struct cgraph_edge *curr = fibheap_extract_min (heap);
!       struct cgraph_node *cnode;
  
!       depth = 1;
!       for (cnode = curr->caller;
! 	   cnode->global.inlined_to; cnode = cnode->callers->caller)
  	if (node->decl == curr->callee->decl)
  	  depth++;
        if (depth > max_depth)
! 	{
!           if (dump_file)
! 	    fprintf (dump_file, 
! 		     "   maxmal depth reached\n");
! 	  continue;
! 	}
! 
!       if (max_count)
! 	{
!           if (!cgraph_maybe_hot_edge_p (curr))
! 	    {
! 	      if (dump_file)
! 		fprintf (dump_file, "   Not inlining cold call\n");
! 	      continue;
! 	    }
!           if (curr->count * 100 / node->count < probability)
! 	    {
! 	      if (dump_file)
! 		fprintf (dump_file, 
! 			 "   Probability of edge is too small\n");
! 	      continue;
! 	    }
! 	}
  
        if (dump_file)
! 	{
! 	  fprintf (dump_file, 
! 		   "   Inlining call of depth %i", depth);
! 	  if (node->count)
! 	    {
! 	      fprintf (dump_file, " called approx. %.2f times per call",
! 		       (double)curr->count / node->count);
! 	    }
! 	  fprintf (dump_file, "\n");
! 	}
        cgraph_redirect_edge_callee (curr, master_clone);
        cgraph_mark_inline_edge (curr);
        lookup_recursive_calls (node, curr->callee, heap);
        n++;
      }
+   if (!fibheap_empty (heap) && dump_file)
+     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
  
    fibheap_delete (heap);
    if (dump_file)
*************** cgraph_decide_recursive_inlining (struct
*** 580,585 ****
--- 617,626 ----
      if (node->global.inlined_to == master_clone)
        cgraph_remove_node (node);
    cgraph_remove_node (master_clone);
+   /* FIXME: Recursive inlining actually reduces number of calls of the
+      function.  At this place we should probably walk the function and
+      inline clones and compensate the counts accordingly.  This probably
+      doesn't matter much in practice.  */
    return true;
  }
  
Index: params.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/params.def,v
retrieving revision 1.62
diff -c -3 -p -r1.62 params.def
*** params.def	2 Jul 2005 13:18:25 -0000	1.62
--- params.def	25 Jul 2005 21:25:01 -0000
*************** DEFPARAM (PARAM_MAX_INLINE_RECURSIVE_DEP
*** 111,116 ****
--- 111,121 ----
  	  "The maximum depth of recursive inlining for non-inline functions",
  	  8, 0, 0)
  
+ DEFPARAM (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY,
+ 	  "min-inline-recursive-probability",
+ 	  "Inline recursively only when the probability of call being executed exceeds the parameter",
+ 	  10, 0, 0)
+ 
  /* Limit the number of expansions created by the variable expansion
     optimization to avoid register pressure.  */
  DEFPARAM (PARAM_MAX_VARIABLE_EXPANSIONS,
Index: tree-inline.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-inline.c,v
retrieving revision 1.206
diff -c -3 -p -r1.206 tree-inline.c
*** tree-inline.c	22 Jul 2005 18:09:34 -0000	1.206
--- tree-inline.c	25 Jul 2005 21:25:03 -0000
*************** copy_bb (inline_data *id, basic_block bb
*** 751,757 ****
  		  edge = cgraph_edge (id->current_node, orig_stmt);
  		  if (edge)
  		    cgraph_clone_edge (edge, id->node, stmt,
! 				       REG_BR_PROB_BASE, 1);
  		}
  	    }
  	  /* If you think we can abort here, you are wrong.
--- 751,757 ----
  		  edge = cgraph_edge (id->current_node, orig_stmt);
  		  if (edge)
  		    cgraph_clone_edge (edge, id->node, stmt,
! 				       REG_BR_PROB_BASE, 1, true);
  		}
  	    }
  	  /* If you think we can abort here, you are wrong.
Index: tree-optimize.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
retrieving revision 2.120
diff -c -3 -p -r2.120 tree-optimize.c
*** tree-optimize.c	17 Jul 2005 17:13:51 -0000	2.120
--- tree-optimize.c	25 Jul 2005 21:25:04 -0000
*************** tree_rest_of_compilation (tree fndecl)
*** 372,378 ****
  	{
  	  struct cgraph_edge *e;
  
! 	  saved_node = cgraph_clone_node (node, node->count, 1);
  	  for (e = saved_node->callees; e; e = e->next_callee)
  	    if (!e->inline_failed)
  	      cgraph_clone_inlined_nodes (e, true);
--- 372,378 ----
  	{
  	  struct cgraph_edge *e;
  
! 	  saved_node = cgraph_clone_node (node, node->count, 1, false);
  	  for (e = saved_node->callees; e; e = e->next_callee)
  	    if (!e->inline_failed)
  	      cgraph_clone_inlined_nodes (e, true);
Index: doc/invoke.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/invoke.texi,v
retrieving revision 1.654
diff -c -3 -p -r1.654 invoke.texi
*** doc/invoke.texi	23 Jul 2005 08:36:41 -0000	1.654
--- doc/invoke.texi	25 Jul 2005 21:25:23 -0000
*************** happens only when @option{-finline-funct
*** 5787,5792 ****
--- 5787,5804 ----
  enabled and @option{--param max-inline-recursive-depth-auto} is used.  The
  default value is 450.
  
+ @item min-inline-recursive-probability
+ Recursive inlining is profitable only for function having deep recursion
+ in average and can hurt for function having little recursion depth by
+ increasing the prologue size or complexity of function body to other
+ optimizers.
+ 
+ When profile feedback is available (see @option{-fprofile-generate}) the actual
+ recursion depth can be guessed from probability that function will recurse via
+ given call expression.  This parameter limits inlining only to call expression
+ whose probability exceeds given threshold (in percents).  The default value is
+ 10.
+ 
  @item inline-call-cost
  Specify cost of call instruction relative to simple arithmetics operations
  (having cost of 1).  Increasing this cost disqualifies inlining of non-leaf


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]