This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] Early inlining pass


Hi,
this patch adds early inlining pass that inline the "obvious" stuff (ie
functions having body smaller than the call construct and always inline
functions).  This is followed by cleanup_cfg and true inlining pass.

This helps in scenarios:
 1) With tree profiling we no longer have counter per each inlined call.
    This makes surprising difference in tramp3d where we have few hounders
    dummy call for each usefull instruction executed, so execution times with
    -fprofile-generate -ftree-based-profiling goes down from 10minutes to 6
    seconds, RTL profiling is at 4.5 seconds.
 2) Inlining of body we already inlined into is slightly cheaper than inlining
    inlining the body and it's callers all the time.  This seems to account by
    up to 11% compile time difference for Gerald's testcase (but inlining
    decisions are affected too so it is dificult to judge, performance and code
    size however remains (almost) unchanged.  It also seems to account in SPEC
    build and GCC bootstrap as bootstrap time seems improved by about 30-100
    seconds (even if this close to noise factor).
 3) We have better chances that pre-inline optimization would be usefull for
    C++.  I experimented with this on tree-profiling branch and while it does
    resonable (ie measurable) job for SPEC2000, it is hardly usefull for C++
    as the tiny functions are almost unoptimizable in isolation.

And now the problems - I am not sure how to interface properly to the IPA pm.
At the moment I have pass early_local_passes that is IPA pass and the local
passes (profiling and cleanup_cfg currently) are run as it's subpasses. I am
letting passmanger to switch from IPA to non-IPA by iterating over all
functions whenever there is subpass of any IPA pass.  I am not sure if this is
way we want to go (perhaps we might add some IPA property to make this
prettier)

Other problem is that now tree profiling pass needs to happen either in
lowering or in early optimization depending if IPA is run at all.  This is now done
by adding new early_tree_profiling pass so we have two sets of command line options
to get the same dump, that is not prettiest either.

And finally I am not sure if the idea sounds like good way to go, but I am
overall quite surprised that it is somewhat effective even for "common" code,
not only tramp3d it was originally designed as a quick hack for, so I don't
seem to come with good reasons against..

It was bootstrapped/regtested i686-pc-gnu-linux, Comments?

2005-05-29  Jan Hubicka  <jh@suse.cz>
	* cgraph.c (cgraph_remove_node): Kill dead extern inlines only
	when having whole CFG built.
	* cgraph.h (cgraph_decide_inlining_incrementally): Add support for
	early inlining
	* cgraphunit.c (cgraph_finalize_function): Update call of
	cgraph_decide_inlining.
	(rebuild_cgraph_edges): New function.
	(pass_rebuild_cgraph_edges): New pass.
	* common.opt (fearly-inlining): New switch.
	* ipa-inline.c: Include ggc.h
	(cgraph_decide_inlining_incrementally):  Support early inlining.
	(cgraph_early_inlining): New function.
	(cgraph_gate_early_inlining): New gate.
	(pass_early_ipa_inline): New pass.
	* tree-optimize.c (pass_early_local_passes): New pass.
	(pass_cleanup_cfg): New pass.
	(init_tree_optimization_passes): Add all the new passes.
	(execute_pass_list): Deal with IPA to local switching.
	(ipa_passes): Kill cfun.
	* tree-passes.h (pass_early_tree_profile, pass_rebuild_cgraph_edges,
	pass_early_ipa_inline): New passes.
	* tree-profile.c (do_early_tree_profiling): New gate.
	(pass_early_tree_profile): New pass.
Index: cgraph.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraph.c,v
retrieving revision 1.74
diff -c -3 -p -r1.74 cgraph.c
*** cgraph.c	27 May 2005 21:17:48 -0000	1.74
--- cgraph.c	31 May 2005 22:57:39 -0000
*************** cgraph_remove_node (struct cgraph_node *
*** 460,466 ****
      {
        struct cgraph_node *n = *slot;
        if (!n->next_clone && !n->global.inlined_to
! 	  && (TREE_ASM_WRITTEN (n->decl) || DECL_EXTERNAL (n->decl)))
  	kill_body = true;
      }
  
--- 460,467 ----
      {
        struct cgraph_node *n = *slot;
        if (!n->next_clone && !n->global.inlined_to
! 	  && (cgraph_global_info_ready
! 	      && (TREE_ASM_WRITTEN (n->decl) || DECL_EXTERNAL (n->decl))))
  	kill_body = true;
      }
  
Index: cgraph.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraph.h,v
retrieving revision 1.55
diff -c -3 -p -r1.55 cgraph.h
*** cgraph.h	27 May 2005 21:17:49 -0000	1.55
--- cgraph.h	31 May 2005 22:57:39 -0000
*************** bool cgraph_remove_unreachable_nodes (bo
*** 247,253 ****
  int cgraph_postorder (struct cgraph_node **);
  
  /* In ipa-inline.c  */
! void cgraph_decide_inlining_incrementally (struct cgraph_node *);
  void cgraph_clone_inlined_nodes (struct cgraph_edge *, bool);
  void cgraph_mark_inline_edge (struct cgraph_edge *);
  bool cgraph_default_inline_p (struct cgraph_node *);
--- 247,253 ----
  int cgraph_postorder (struct cgraph_node **);
  
  /* In ipa-inline.c  */
! bool cgraph_decide_inlining_incrementally (struct cgraph_node *, bool);
  void cgraph_clone_inlined_nodes (struct cgraph_edge *, bool);
  void cgraph_mark_inline_edge (struct cgraph_edge *);
  bool cgraph_default_inline_p (struct cgraph_node *);
Index: cgraphunit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraphunit.c,v
retrieving revision 1.110
diff -c -3 -p -r1.110 cgraphunit.c
*** cgraphunit.c	29 May 2005 19:38:31 -0000	1.110
--- cgraphunit.c	31 May 2005 22:57:39 -0000
*************** cgraph_finalize_function (tree decl, boo
*** 416,422 ****
    if (!flag_unit_at_a_time)
      {
        cgraph_analyze_function (node);
!       cgraph_decide_inlining_incrementally (node);
      }
  
    if (decide_is_function_needed (node, decl))
--- 416,422 ----
    if (!flag_unit_at_a_time)
      {
        cgraph_analyze_function (node);
!       cgraph_decide_inlining_incrementally (node, false);
      }
  
    if (decide_is_function_needed (node, decl))
*************** cgraph_create_edges (struct cgraph_node 
*** 559,564 ****
--- 559,608 ----
    visited_nodes = NULL;
  }
  
+ /* Rebuild call edges from current function after a passes not aware
+    of cgraph updating.  */
+ static void
+ rebuild_cgraph_edges (void)
+ {
+   basic_block bb;
+   struct cgraph_node *node = cgraph_node (current_function_decl);
+   block_stmt_iterator bsi;
+ 
+   while (node->callees)
+     cgraph_remove_edge (node->callees);
+ 
+   node->count = ENTRY_BLOCK_PTR->count;
+ 
+   FOR_EACH_BB (bb)
+     for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+       {
+ 	tree stmt = bsi_stmt (bsi);
+ 	tree call = get_call_expr_in (stmt);
+ 	tree decl;
+ 
+ 	if (call && (decl = get_callee_fndecl (call)))
+ 	  cgraph_create_edge (node, cgraph_node (decl), stmt,
+ 			      bb->count,
+ 			      bb->loop_depth);
+       }
+ }
+ 
+ struct tree_opt_pass pass_rebuild_cgraph_edges =
+ {
+   NULL,					/* name */
+   NULL,					/* gate */
+   rebuild_cgraph_edges,			/* execute */
+   NULL,					/* sub */
+   NULL,					/* next */
+   0,					/* static_pass_number */
+   0,					/* tv_id */
+   PROP_cfg,				/* properties_required */
+   0,					/* properties_provided */
+   0,					/* properties_destroyed */
+   0,					/* todo_flags_start */
+   0,					/* todo_flags_finish */
+   0					/* letter */
+ };
  
  /* Verify cgraph nodes of given cgraph node.  */
  void
Index: common.opt
===================================================================
RCS file: /cvs/gcc/gcc/gcc/common.opt,v
retrieving revision 1.71
diff -c -3 -p -r1.71 common.opt
*** common.opt	25 May 2005 04:16:38 -0000	1.71
--- common.opt	31 May 2005 22:57:39 -0000
*************** finline-functions
*** 473,478 ****
--- 473,482 ----
  Common Report Var(flag_inline_functions)
  Integrate simple functions into their callers
  
+ fearly-inlining
+ Common Report Var(flag_early_inlining) Init(1)
+ Perform early inlining
+ 
  finline-limit-
  Common RejectNegative Joined UInteger
  
Index: ipa-inline.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/ipa-inline.c,v
retrieving revision 2.7
diff -c -3 -p -r2.7 ipa-inline.c
*** ipa-inline.c	29 May 2005 19:38:32 -0000	2.7
--- ipa-inline.c	31 May 2005 22:57:39 -0000
*************** Software Foundation, 59 Temple Place - S
*** 79,84 ****
--- 79,85 ----
  #include "intl.h"
  #include "tree-pass.h"
  #include "coverage.h"
+ #include "ggc.h"
  
  /* Statistics we collect about inlining algorithm.  */
  static int ncalls_inlined;
*************** cgraph_decide_inlining (void)
*** 876,885 ****
  /* Decide on the inlining.  We do so in the topological order to avoid
     expenses on updating data structures.  */
  
! void
! cgraph_decide_inlining_incrementally (struct cgraph_node *node)
  {
    struct cgraph_edge *e;
  
    /* First of all look for always inline functions.  */
    for (e = node->callees; e; e = e->next_callee)
--- 877,887 ----
  /* Decide on the inlining.  We do so in the topological order to avoid
     expenses on updating data structures.  */
  
! bool
! cgraph_decide_inlining_incrementally (struct cgraph_node *node, bool early)
  {
    struct cgraph_edge *e;
+   bool inlined = false;
  
    /* First of all look for always inline functions.  */
    for (e = node->callees; e; e = e->next_callee)
*************** cgraph_decide_inlining_incrementally (st
*** 889,895 ****
  	/* ??? It is possible that renaming variable removed the function body
  	   in duplicate_decls. See gcc.c-torture/compile/20011119-2.c  */
  	&& DECL_SAVED_TREE (e->callee->decl))
!       cgraph_mark_inline (e);
  
    /* Now do the automatic inlining.  */
    if (!flag_really_no_inline)
--- 891,903 ----
  	/* ??? It is possible that renaming variable removed the function body
  	   in duplicate_decls. See gcc.c-torture/compile/20011119-2.c  */
  	&& DECL_SAVED_TREE (e->callee->decl))
!       {
!         if (dump_file && early)
!           fprintf (dump_file, "  Early inlining %s into %s\n",
! 		   cgraph_node_name (e->callee), cgraph_node_name (node));
! 	cgraph_mark_inline (e);
! 	inlined = true;
!       }
  
    /* Now do the automatic inlining.  */
    if (!flag_really_no_inline)
*************** cgraph_decide_inlining_incrementally (st
*** 898,912 ****
  	  && e->inline_failed
  	  && !e->callee->local.disregard_inline_limits
  	  && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
  	  && cgraph_check_inline_limits (node, e->callee, &e->inline_failed)
  	  && DECL_SAVED_TREE (e->callee->decl))
  	{
  	  if (cgraph_default_inline_p (e->callee))
! 	    cgraph_mark_inline (e);
! 	  else
  	    e->inline_failed
  	      = N_("--param max-inline-insns-single limit reached");
  	}
  }
  
  /* When inlining shall be performed.  */
--- 906,941 ----
  	  && e->inline_failed
  	  && !e->callee->local.disregard_inline_limits
  	  && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
+ 	  && (!early
+ 	      || (cgraph_estimate_size_after_inlining (1, e->caller, node)
+ 	          <= e->caller->global.insns))
  	  && cgraph_check_inline_limits (node, e->callee, &e->inline_failed)
  	  && DECL_SAVED_TREE (e->callee->decl))
  	{
  	  if (cgraph_default_inline_p (e->callee))
! 	    {
! 	      if (dump_file && early)
!                 fprintf (dump_file, "  Early inlining %s into %s\n",
! 			 cgraph_node_name (e->callee), cgraph_node_name (node));
! 	      cgraph_mark_inline (e);
! 	      inlined = true;
! 	    }
! 	  else if (!early)
  	    e->inline_failed
  	      = N_("--param max-inline-insns-single limit reached");
  	}
+   if (early && inlined)
+     {
+       push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+       tree_register_cfg_hooks ();
+       current_function_decl = node->decl;
+       optimize_inline_calls (current_function_decl);
+       node->local.self_insns = node->global.insns;
+       current_function_decl = NULL;
+       pop_cfun ();
+       ggc_collect ();
+     }
+   return inlined;
  }
  
  /* When inlining shall be performed.  */
*************** struct tree_opt_pass pass_ipa_inline = 
*** 926,932 ****
    0,					/* static_pass_number */
    TV_INTEGRATION,			/* tv_id */
    0,	                                /* properties_required */
!   PROP_trees,				/* properties_provided */
    0,					/* properties_destroyed */
    0,					/* todo_flags_start */
    TODO_dump_cgraph | TODO_dump_func,	/* todo_flags_finish */
--- 955,1009 ----
    0,					/* static_pass_number */
    TV_INTEGRATION,			/* tv_id */
    0,	                                /* properties_required */
!   PROP_cfg,				/* properties_provided */
!   0,					/* properties_destroyed */
!   0,					/* todo_flags_start */
!   TODO_dump_cgraph | TODO_dump_func,	/* todo_flags_finish */
!   0					/* letter */
! };
! 
! /* Do inlining of small functions.  Doing so early helps profiling and other
!    passes to be somewhat more effective and avoids some code duplication in
!    later real inlining pass for testcases with very many function calls.  */
! static void
! cgraph_early_inlining (void)
! {
!   struct cgraph_node *node;
!   int nnodes;
!   struct cgraph_node **order =
!     xcalloc (cgraph_n_nodes, sizeof (struct cgraph_node *));
!   int i;
! 
!   nnodes = cgraph_postorder (order);
!   for (i = nnodes - 1; i >= 0; i--)
!     {
!       node = order[i];
!       if (node->analyzed && node->local.inlinable
! 	  && (node->needed || node->reachable)
! 	  && node->callers)
! 	cgraph_decide_inlining_incrementally (node, true);
!     }
!   free (order);
! }
! 
! /* When inlining shall be performed.  */
! static bool
! cgraph_gate_early_inlining (void)
! {
!   return flag_inline_trees && flag_early_inlining;
! }
! 
! struct tree_opt_pass pass_early_ipa_inline = 
! {
!   "einline",	 			/* name */
!   cgraph_gate_early_inlining,		/* gate */
!   cgraph_early_inlining,		/* execute */
!   NULL,					/* sub */
!   NULL,					/* next */
!   0,					/* static_pass_number */
!   TV_INTEGRATION,			/* tv_id */
!   0,	                                /* properties_required */
!   PROP_cfg,				/* properties_provided */
    0,					/* properties_destroyed */
    0,					/* todo_flags_start */
    TODO_dump_cgraph | TODO_dump_func,	/* todo_flags_finish */
Index: tree-optimize.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
retrieving revision 2.103
diff -c -3 -p -r2.103 tree-optimize.c
*** tree-optimize.c	29 May 2005 13:14:42 -0000	2.103
--- tree-optimize.c	31 May 2005 22:57:39 -0000
*************** int dump_flags;
*** 56,62 ****
  bool in_gimple_form;
  
  /* The root of the compilation pass tree, once constructed.  */
! static struct tree_opt_pass *all_passes, *all_ipa_passes, * all_lowering_passes;
  
  /* Gate: execute, or not, all of the non-trivial optimizations.  */
  
--- 56,62 ----
  bool in_gimple_form;
  
  /* The root of the compilation pass tree, once constructed.  */
! static struct tree_opt_pass *all_passes, *all_ipa_passes, *all_lowering_passes;
  
  /* Gate: execute, or not, all of the non-trivial optimizations.  */
  
*************** static struct tree_opt_pass pass_all_opt
*** 85,90 ****
--- 85,136 ----
    0					/* letter */
  };
  
+ static struct tree_opt_pass pass_early_local_passes =
+ {
+   NULL,					/* name */
+   gate_all_optimizations,		/* gate */
+   NULL,					/* execute */
+   NULL,					/* sub */
+   NULL,					/* next */
+   0,					/* static_pass_number */
+   0,					/* tv_id */
+   0,					/* properties_required */
+   0,					/* properties_provided */
+   0,					/* properties_destroyed */
+   0,					/* todo_flags_start */
+   0,					/* todo_flags_finish */
+   0					/* letter */
+ };
+ 
+ /* Pass: cleanup the CFG just before expanding trees to RTL.
+    This is just a round of label cleanups and case node grouping
+    because after the tree optimizers have run such cleanups may
+    be necessary.  */
+ 
+ static void 
+ execute_cleanup_cfg_pre_ipa (void)
+ {
+   cleanup_tree_cfg ();
+ }
+ 
+ static struct tree_opt_pass pass_cleanup_cfg =
+ {
+   "cleanup_cfg",			/* name */
+   NULL,					/* gate */
+   execute_cleanup_cfg_pre_ipa,		/* execute */
+   NULL,					/* sub */
+   NULL,					/* next */
+   0,					/* static_pass_number */
+   0,					/* tv_id */
+   PROP_cfg,				/* properties_required */
+   0,					/* properties_provided */
+   0,					/* properties_destroyed */
+   0,					/* todo_flags_start */
+   TODO_dump_func,					/* todo_flags_finish */
+   0					/* letter */
+ };
+ 
+ 
  /* Pass: cleanup the CFG just before expanding trees to RTL.
     This is just a round of label cleanups and case node grouping
     because after the tree optimizers have run such cleanups may
*************** init_tree_optimization_passes (void)
*** 363,368 ****
--- 409,416 ----
  #define NEXT_PASS(PASS)  (p = next_pass_1 (p, &PASS))
    /* Intraprocedural optimization passes.  */
    p = &all_ipa_passes;
+   NEXT_PASS (pass_early_ipa_inline);
+   NEXT_PASS (pass_early_local_passes);
    NEXT_PASS (pass_ipa_inline);
    *p = NULL;
  
*************** init_tree_optimization_passes (void)
*** 377,383 ****
--- 425,437 ----
    NEXT_PASS (pass_build_cfg); 
    NEXT_PASS (pass_pre_expand);
    NEXT_PASS (pass_warn_function_return);
+   NEXT_PASS (pass_early_tree_profile);
+   *p = NULL;
+ 
+   p = &pass_early_local_passes.sub;
    NEXT_PASS (pass_tree_profile);
+   NEXT_PASS (pass_cleanup_cfg);
+   NEXT_PASS (pass_rebuild_cgraph_edges);
    *p = NULL;
  
    p = &all_passes;
*************** execute_pass_list (struct tree_opt_pass 
*** 658,664 ****
    do
      {
        if (execute_one_pass (pass) && pass->sub)
! 	execute_pass_list (pass->sub);
        pass = pass->next;
      }
    while (pass);
--- 712,738 ----
    do
      {
        if (execute_one_pass (pass) && pass->sub)
! 	{
! 	  if (cfun)
! 	    execute_pass_list (pass->sub);
! 	  else
! 	    {
! 	      struct cgraph_node *node;
! 	      tree_register_cfg_hooks ();
! 	      for (node = cgraph_nodes; node; node = node->next)
! 		if (node->analyzed)
! 		  {
! 		    push_cfun (DECL_STRUCT_FUNCTION (node->decl));
! 		    current_function_decl = node->decl;
! 		    execute_pass_list (pass->sub);
! 		    free_dominance_info (CDI_DOMINATORS);
! 		    free_dominance_info (CDI_POST_DOMINATORS);
! 		    current_function_decl = NULL;
! 		    pop_cfun ();
! 		    ggc_collect ();
! 		  }
! 	    }
! 	}
        pass = pass->next;
      }
    while (pass);
*************** tree_lowering_passes (tree fn)
*** 685,690 ****
--- 759,765 ----
  void
  ipa_passes (void)
  {
+   cfun = NULL;
    bitmap_obstack_initialize (NULL);
    execute_pass_list (all_ipa_passes);
    bitmap_obstack_release (NULL);
Index: tree-pass.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-pass.h,v
retrieving revision 2.38
diff -c -3 -p -r2.38 tree-pass.h
*** tree-pass.h	26 May 2005 18:14:47 -0000	2.38
--- tree-pass.h	31 May 2005 22:57:39 -0000
*************** extern struct tree_opt_pass pass_lower_c
*** 164,169 ****
--- 164,170 ----
  extern struct tree_opt_pass pass_lower_eh;
  extern struct tree_opt_pass pass_build_cfg;
  extern struct tree_opt_pass pass_tree_profile;
+ extern struct tree_opt_pass pass_early_tree_profile;
  extern struct tree_opt_pass pass_referenced_vars;
  extern struct tree_opt_pass pass_sra;
  extern struct tree_opt_pass pass_tail_recursion;
*************** extern struct tree_opt_pass pass_store_c
*** 220,226 ****
--- 221,229 ----
  extern struct tree_opt_pass pass_vrp;
  extern struct tree_opt_pass pass_create_structure_vars;
  extern struct tree_opt_pass pass_uncprop;
+ extern struct tree_opt_pass pass_rebuild_cgraph_edges;
  
  extern struct tree_opt_pass pass_ipa_inline;
+ extern struct tree_opt_pass pass_early_ipa_inline;
  
  #endif /* GCC_TREE_PASS_H */
Index: tree-profile.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-profile.c,v
retrieving revision 2.12
diff -c -3 -p -r2.12 tree-profile.c
*** tree-profile.c	24 May 2005 20:19:12 -0000	2.12
--- tree-profile.c	31 May 2005 22:57:39 -0000
*************** struct tree_opt_pass pass_tree_profile =
*** 273,278 ****
--- 273,305 ----
    0					/* letter */
  };
  
+ /* Return 1 if tree-based profiling is in effect, else 0.
+    If it is, set up hooks for tree-based profiling.
+    Gate for pass_tree_profile.  */
+ 
+ static bool
+ do_early_tree_profiling (void)
+ {
+   return (do_tree_profiling () && (!flag_unit_at_a_time || !optimize));
+ }
+ 
+ struct tree_opt_pass pass_early_tree_profile = 
+ {
+   "early_tree_profile",			/* name */
+   do_early_tree_profiling,		/* gate */
+   tree_profiling,			/* execute */
+   NULL,					/* sub */
+   NULL,					/* next */
+   0,					/* static_pass_number */
+   TV_BRANCH_PROB,			/* tv_id */
+   PROP_gimple_leh | PROP_cfg,		/* properties_required */
+   PROP_gimple_leh | PROP_cfg,		/* properties_provided */
+   0,					/* properties_destroyed */
+   0,					/* todo_flags_start */
+   TODO_verify_stmts,			/* todo_flags_finish */
+   0					/* letter */
+ };
+ 
  struct profile_hooks tree_profile_hooks =
  {
    tree_init_edge_profiler,      /* init_edge_profiler */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]