[tree-ssa, will commit] Recursive inlining

Jan Hubicka jh@suse.cz
Sat Dec 13 23:42:00 GMT 2003


Hi,
this patch implements recursive inlining.  The heuristics itself needs work,
but all bits to perform inlining and non-transitive inlining plans are in place
now.  The speedups on simple recusive functions are quite considerable about
140%, such as hanoi solver, ackerman's function computation and similar.

I would like to restrict the heuristics to not inline tail calls and to limit
inlining inside loops as too deep loop nest prevent usual loop optimizations
from happening but for that we need CFG before inlining first.

I also verified that it does not increase compilation time of Gerald's testcase.
(it has not recursive inline candidates :)

Bootstrapped/regtested i686-pc-gnu, will commit it tomorrow unless someone complains.

Honza

2003-12-13  Jan Hubicka  <jh@suse.cz>
	* cgraphunit.c (cgraph_expand_function):  Release function body when no
	longer needed.
	(lookup_recursive_calls): New function.
	(cgraph_decide_recursive_inlining): Likewise.
	(cgraph_decide_inlining_of_small_functions): Do recursive inlining.
	* tree-inline.c: Include function.h
	(copy_body): Choose saved body for recursive inlining.
	(initialize_inlined_parameters): Likewise.
	(expand_call_inline): Do not verify nodes when recursivly inlining,
	insert ret_label into decl map.
	* params.def (PARAM_MAX_INLINE_INSNS_RECURSIVE,
	PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO,
	PARAM_MAX_INLINE_RECURSIVE_DEPTH,
	PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO): New argument.
	* invoke.texi (max-inline-insns-recursive, max-inline-recursive-depth):
	Document.
	* Makefile.in (tree-inline.o): Include function.h.
*** cgraphunit.c.norec	Sat Dec 13 19:47:26 2003
--- cgraphunit.c	Sat Dec 13 21:26:50 2003
*************** cgraph_expand_function (struct cgraph_no
*** 623,628 ****
--- 623,631 ----
      abort ();
  
    current_function_decl = NULL;
+   if (DECL_SAVED_TREE (node->decl)
+       && !cgraph_preserve_function_body_p (node->decl))
+     DECL_SAVED_TREE (node->decl) = NULL;
  }
  
  /* Fill array order with all nodes with output flag set in the reverse
*************** update_callee_keys (fibheap_t heap, stru
*** 911,916 ****
--- 914,1023 ----
        update_callee_keys (heap, heap_node, e->callee);
  }
  
+ /* Enqueue all recursive calls from NODE into queue linked via aux pointers
+    in between FIRST and LAST.  WHERE is used for bookkeeping while looking
+    int calls inlined within NODE.  */
+ static void
+ lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
+ 			struct cgraph_edge **first, struct cgraph_edge **last)
+ {
+   struct cgraph_edge *e;
+   for (e = where->callees; e; e = e->next_callee)
+     if (e->callee == node)
+       {
+ 	if (!*first)
+ 	  *first = e;
+ 	else
+ 	  (*last)->aux = e;
+ 	*last = e;
+       }
+   for (e = where->callees; e; e = e->next_callee)
+     if (e->inline_call)
+       lookup_recursive_calls (node, e->callee, first, last);
+ }
+ 
+ /* Decide on recursive inlining: in the case function has recursive calls,
+    inline until body size reaches given argument.  */
+ static void
+ cgraph_decide_recursive_inlining (struct cgraph_node *node)
+ {
+   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
+   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
+   struct cgraph_edge *first_call = NULL, *last_call = NULL;
+   struct cgraph_edge *last_in_current_depth;
+   struct cgraph_edge *e;
+   struct cgraph_node *master_clone;
+   int depth = 0;
+   int n = 0;
+ 
+   if (DECL_DECLARED_INLINE_P (node->decl))
+     {
+       limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
+       max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
+     }
+ 
+   /* Make sure that function is small enought to be considered for inlining.  */
+   if (!max_depth
+       || cgraph_estimate_size_after_inlining (1, node, node)  >= limit)
+     return;
+   lookup_recursive_calls (node, node, &first_call, &last_call);
+   if (!first_call)
+     return;
+ 
+   if (cgraph_dump_file)
+     fprintf (cgraph_dump_file, 
+ 	     "\nPerforming recursive inlining on %s\n",
+ 	     cgraph_node_name (node));
+ 
+   /* We need original clone to copy around.  */
+   master_clone = cgraph_clone_node (node);
+   master_clone->needed = true;
+   for (e = master_clone->callees; e; e = e->next_callee)
+     if (e->inline_call)
+       cgraph_clone_inlined_nodes (e, true);
+ 
+   /* Do the inlining and update list of recursive call during process.  */
+   last_in_current_depth = last_call;
+   while (first_call
+ 	 && cgraph_estimate_size_after_inlining (1, node, master_clone) <= limit)
+     {
+       struct cgraph_edge *curr = first_call;
+ 
+       first_call = first_call->aux;
+       curr->aux = NULL;
+ 
+       cgraph_redirect_edge_callee (curr, master_clone);
+       cgraph_mark_inline_edge (curr);
+       lookup_recursive_calls (node, curr->callee, &first_call, &last_call);
+ 
+       if (last_in_current_depth
+ 	  && ++depth >= max_depth)
+ 	break;
+       n++;
+     }
+ 
+   /* Cleanup queue pointers.  */
+   while (first_call)
+     {
+       struct cgraph_edge *next = first_call->aux;
+       first_call->aux = NULL;
+       first_call = next;
+     }
+   if (cgraph_dump_file)
+     fprintf (cgraph_dump_file, 
+ 	     "\n   Inlined %i times, body grown from %i to %i insns\n", n,
+ 	     master_clone->global.insns, node->global.insns);
+ 
+   /* Remove master clone we used for inlining.  We rely that clones inlined
+      into master clone gets queued just before master clone so we don't
+      need recursion.  */
+   for (node = cgraph_nodes; node != master_clone;
+        node = node->next)
+     if (node->global.inlined_to == master_clone)
+       cgraph_remove_node (node);
+   cgraph_remove_node (master_clone);
+ }
+ 
  /* We use greedy algorithm for inlining of small functions:
     All inline candidates are put into prioritized heap based on estimated
     growth of the overall number of instructions and then update the estimates.
*************** cgraph_decide_inlining_of_small_function
*** 1001,1006 ****
--- 1108,1115 ----
  	    }
  	}
  
+       cgraph_decide_recursive_inlining (node);
+ 
        /* Similarly all functions called by the function we just inlined
           are now called more times; update keys.  */
        update_callee_keys (heap, heap_node, node);
*** tree-inline.c.norec	Sat Dec 13 19:47:04 2003
--- tree-inline.c	Sat Dec 13 20:57:08 2003
*************** Boston, MA 02111-1307, USA.  */
*** 40,45 ****
--- 40,46 ----
  #include "cgraph.h"
  #include "intl.h"
  #include "tree-mudflap.h"
+ #include "function.h"
  
  
  /* I'm not real happy about this, but we need to handle gimple and
*************** static tree
*** 679,686 ****
  copy_body (inline_data *id)
  {
    tree body;
  
!   body = DECL_SAVED_TREE (VARRAY_TOP_TREE (id->fns));
    walk_tree (&body, copy_body_r, id, NULL);
  
    return body;
--- 680,692 ----
  copy_body (inline_data *id)
  {
    tree body;
+   tree fndecl = VARRAY_TOP_TREE (id->fns);
  
!   if (fndecl == current_function_decl
!       && cfun->saved_tree)
!     body = cfun->saved_tree;
!   else
!     body = DECL_SAVED_TREE (fndecl);
    walk_tree (&body, copy_body_r, id, NULL);
  
    return body;
*************** initialize_inlined_parameters (inline_da
*** 701,708 ****
    int argnum = 0;
  
    /* Figure out what the parameters are.  */
!   parms = 
! DECL_ARGUMENTS (fn);
  
    /* Start with no initializations whatsoever.  */
    init_stmts = NULL_TREE;
--- 707,715 ----
    int argnum = 0;
  
    /* Figure out what the parameters are.  */
!   parms = DECL_ARGUMENTS (fn);
!   if (fn == current_function_decl)
!     parms = cfun->saved_args;
  
    /* Start with no initializations whatsoever.  */
    init_stmts = NULL_TREE;
*************** expand_call_inline (tree *tp, int *walk_
*** 1476,1482 ****
      }
  
  #ifdef ENABLE_CHECKING
!   verify_cgraph_node (edge->callee);
  #endif
  
    if (! (*lang_hooks.tree_inlining.start_inlining) (fn))
--- 1483,1490 ----
      }
  
  #ifdef ENABLE_CHECKING
!   if (edge->callee->decl != id->node->decl)
!     verify_cgraph_node (edge->callee);
  #endif
  
    if (! (*lang_hooks.tree_inlining.start_inlining) (fn))
*************** expand_call_inline (tree *tp, int *walk_
*** 1549,1554 ****
--- 1557,1563 ----
    id->ret_label = build_decl (LABEL_DECL, NULL_TREE, NULL_TREE);
    DECL_ARTIFICIAL (id->ret_label) = 1;
    DECL_CONTEXT (id->ret_label) = VARRAY_TREE (id->fns, 0);
+   insert_decl_map (id, id->ret_label, id->ret_label);
  
    if (! DECL_INITIAL (fn)
        || TREE_CODE (DECL_INITIAL (fn)) != BLOCK)
Index: params.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/params.def,v
retrieving revision 1.15.2.14
diff -c -3 -p -r1.15.2.14 params.def
*** params.def	13 Nov 2003 02:37:56 -0000	1.15.2.14
--- params.def	13 Dec 2003 19:42:54 -0000
*************** DEFPARAM (PARAM_MAX_INLINE_INSNS_AUTO,
*** 65,70 ****
--- 65,90 ----
  	  "The maximum number of instructions when automatically inlining",
  	  150)
  
+ DEFPARAM (PARAM_MAX_INLINE_INSNS_RECURSIVE,
+ 	  "max-inline-insns-recursive",
+ 	  "The maximum number of instructions inline function can grow to via recursive inlining",
+ 	  500)
+ 
+ DEFPARAM (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO,
+ 	  "max-inline-insns-recursive-auto",
+ 	  "The maximum number of instructions non-inline function can grow to via recursive inlining",
+ 	  500)
+ 
+ DEFPARAM (PARAM_MAX_INLINE_RECURSIVE_DEPTH,
+ 	  "max-inline-recursive-depth",
+ 	  "The maximum depth of recursive inlining for inline functions",
+ 	  8)
+ 
+ DEFPARAM (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO,
+ 	  "max-inline-recursive-depth-auto",
+ 	  "The maximum depth of recursive inlining for non-inline functions",
+ 	  8)
+ 
  /* For languages that (still) use the RTL inliner, we can specify
     limits for the RTL inliner separately.
     The parameter here defines the maximum number of RTL instructions
Index: invoke.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/invoke.texi,v
retrieving revision 1.152.2.62
diff -c -3 -p -r1.152.2.62 invoke.texi
*** invoke.texi	6 Dec 2003 12:31:28 -0000	1.152.2.62
--- invoke.texi	13 Dec 2003 23:15:29 -0000
*************** Specifies maximal overall growth of the 
*** 4963,4968 ****
--- 4963,4989 ----
  This parameter is ignored when @option{-funit-at-a-time} is not used.
  The default value is 150.
  
+ @item max-inline-insns-recursive
+ @itemx max-inline-insns-recursive-auto
+ Specifies maximum number of instructions out-of-line copy of self recursive inline
+ function can grow into by performing recursive inlining.
+ 
+ For functions declared inline @option{--param max-inline-insns-recursive} is
+ taken into acount.  For function not declared inline, recursive inlining
+ happens only when @option{-finline-functions} (included in @option{-O3}) is
+ enabled and @option{--param max-inline-insns-recursive-auto} is used.  The
+ default value is 500.
+ 
+ @item max-inline-recursive-depth
+ @itemx max-inline-recursive-depth-auto
+ Specifies maximum recursion depth used by the recursive inlining.
+ 
+ For functions declared inline @option{--param max-inline-recursive-depth} is
+ taken into acount.  For function not declared inline, recursive inlining
+ happens only when @option{-finline-functions} (included in @option{-O3}) is
+ enabled and @option{--param max-inline-recursive-depth-auto} is used.  The
+ default value is 500.
+ 
  @item max-inline-insns-rtl
  For languages that use the RTL inliner (this happens at a later stage
  than tree inlining), you can set the maximum allowable size (counted 
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.903.2.154
diff -c -3 -p -r1.903.2.154 Makefile.in
*** Makefile.in	13 Dec 2003 19:39:47 -0000	1.903.2.154
--- Makefile.in	13 Dec 2003 23:24:13 -0000
*************** tree-dump.o: tree-dump.c $(CONFIG_H) $(S
*** 1534,1540 ****
  tree-inline.o : tree-inline.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
     $(RTL_H) $(EXPR_H) flags.h $(PARAMS_H) input.h insn-config.h $(INTEGRATE_H) \
     $(VARRAY_H) $(HASHTAB_H) $(SPLAY_TREE_H) toplev.h langhooks.h \
!    tree-inline.h cgraph.h $(TREE_SIMPLE_H)
  print-tree.o : print-tree.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
     $(GGC_H) langhooks.h real.h
  stor-layout.o : stor-layout.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
--- 1534,1540 ----
  tree-inline.o : tree-inline.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
     $(RTL_H) $(EXPR_H) flags.h $(PARAMS_H) input.h insn-config.h $(INTEGRATE_H) \
     $(VARRAY_H) $(HASHTAB_H) $(SPLAY_TREE_H) toplev.h langhooks.h \
!    tree-inline.h cgraph.h $(TREE_SIMPLE_H) function.h
  print-tree.o : print-tree.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
     $(GGC_H) langhooks.h real.h
  stor-layout.o : stor-layout.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \



More information about the Gcc-patches mailing list