This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Whole program optimization and functions-only-called-once.


> Jan Hubicka wrote:
>
>> -fno-ipa-cp should work around your problem for time being.
>
> Indeed it did. Some figures:

Thanks for confirmation!

> Considering invlo4 size 1462.
>  Called once from lowpass 2293 insns.
>  Not inlined because --param large-function-growth limit reached.
>
> Considering invlo2 size 933.
>  Called once from lowpass 2293 insns.
>  Not inlined because --param large-function-growth limit reached.
>
> where the largest callee *does* get inlined, while two smaller ones  
> don't (I agree with Jan that this would have been solved by training the  
> inliner with profiling data, because only invlo4 gets called).

Using profiling data does not really make inliner to bypass
large-function-growth.  We can experiment with large-function-growth tweaking.
So far i didn't see any testcase where this limit would result in runtime
regression.

This is patch I intend to commit after re-testing at x86_64-linux after
some last minute changes.

	* cgraph.c (cgraph_release_function_body): Update use of
	ipa_transforms_to_apply.
	(cgraph_remove_node): Remove ipa_transforms_to_apply.
	* cgraph.h (struct cgraph_node): Add ipa_transforms_to_apply.
	* cgraphunit.c (save_inline_function_body): Clear ipa_transforms for
	copied body.
	(cgraph_materialize_clone): Remove original if dead.
	* lto-streamer-in.c (lto_read_body): Remove FIXME and
	ipa_transforms_to_apply hack.
	* function.h (struct function): Add ipa_transforms_to_apply.
	* ipa.c (cgraph_remove_unreachable_nodes): Handle dead clone originals.
	* tree-inline.c (copy_bb): Update sanity check.
	(initialize_cfun): Do not copy ipa_transforms_to_apply.
	(expand_call_inline): remove dead clone originals.
	(tree_function_versioning): Merge transformation queues.
	* passes.c (add_ipa_transform_pass): Remove.
	(execute_one_ipa_transform_pass): Update ipa_transforms_to_apply
	tracking.
	(execute_all_ipa_transforms): Update.
	(execute_one_pass): Update.

	* lto.c (read_cgraph_and_symbols): Set also ipa_transforms_to_apply.
Index: cgraph.c
===================================================================
*** cgraph.c	(revision 154198)
--- cgraph.c	(working copy)
*************** cgraph_release_function_body (struct cgr
*** 1132,1138 ****
        pop_cfun();
        gimple_set_body (node->decl, NULL);
        VEC_free (ipa_opt_pass, heap,
!       		DECL_STRUCT_FUNCTION (node->decl)->ipa_transforms_to_apply);
        /* Struct function hangs a lot of data that would leak if we didn't
           removed all pointers to it.   */
        ggc_free (DECL_STRUCT_FUNCTION (node->decl));
--- 1132,1138 ----
        pop_cfun();
        gimple_set_body (node->decl, NULL);
        VEC_free (ipa_opt_pass, heap,
!       		node->ipa_transforms_to_apply);
        /* Struct function hangs a lot of data that would leak if we didn't
           removed all pointers to it.   */
        ggc_free (DECL_STRUCT_FUNCTION (node->decl));
*************** cgraph_remove_node (struct cgraph_node *
*** 1159,1164 ****
--- 1159,1166 ----
    cgraph_call_node_removal_hooks (node);
    cgraph_node_remove_callers (node);
    cgraph_node_remove_callees (node);
+   VEC_free (ipa_opt_pass, heap,
+             node->ipa_transforms_to_apply);
  
    /* Incremental inlining access removed nodes stored in the postorder list.
       */
Index: cgraph.h
===================================================================
*** cgraph.h	(revision 154198)
--- cgraph.h	(working copy)
*************** struct GTY((chain_next ("%h.next"), chai
*** 190,195 ****
--- 190,200 ----
  
    PTR GTY ((skip)) aux;
  
+   /* Interprocedural passes scheduled to have their transform functions
+      applied next time we execute local pass on them.  We maintain it
+      per-function in order to allow IPA passes to introduce new functions.  */
+   VEC(ipa_opt_pass,heap) * GTY((skip)) ipa_transforms_to_apply;
+ 
    struct cgraph_local_info local;
    struct cgraph_global_info global;
    struct cgraph_rtl_info rtl;
*************** struct GTY((chain_next ("%h.next"), chai
*** 206,221 ****
       number of cfg nodes with -fprofile-generate and -fprofile-use */
    int pid;
  
!   /* Set when function must be output - it is externally visible
!      or its address is taken.  */
    unsigned needed : 1;
!   /* Set when function has address taken.  */
    unsigned address_taken : 1;
    /* Set when decl is an abstract function pointed to by the
       ABSTRACT_DECL_ORIGIN of a reachable function.  */
    unsigned abstract_and_needed : 1;
    /* Set when function is reachable by call from other function
!      that is either reachable or needed.  */
    unsigned reachable : 1;
    /* Set once the function is lowered (i.e. its CFG is built).  */
    unsigned lowered : 1;
--- 211,234 ----
       number of cfg nodes with -fprofile-generate and -fprofile-use */
    int pid;
  
!   /* Set when function must be output for some reason.  The primary
!      use of this flag is to mark functions needed to be output for
!      non-standard reason.  Functions that are externally visible
!      or reachable from functions needed to be output are marked
!      by specialized flags.  */
    unsigned needed : 1;
!   /* Set when function has address taken.
!      In current implementation it imply needed flag. */
    unsigned address_taken : 1;
    /* Set when decl is an abstract function pointed to by the
       ABSTRACT_DECL_ORIGIN of a reachable function.  */
    unsigned abstract_and_needed : 1;
    /* Set when function is reachable by call from other function
!      that is either reachable or needed.  
!      This flag is computed at original cgraph construction and then
!      updated in cgraph_remove_unreachable_nodes.  Note that after
!      cgraph_remove_unreachable_nodes cgraph still can contain unreachable
!      nodes when they are needed for virtual clone instantiation.  */
    unsigned reachable : 1;
    /* Set once the function is lowered (i.e. its CFG is built).  */
    unsigned lowered : 1;
Index: cgraphunit.c
===================================================================
*** cgraphunit.c	(revision 154198)
--- cgraphunit.c	(working copy)
*************** save_inline_function_body (struct cgraph
*** 1777,1784 ****
    TREE_PUBLIC (first_clone->decl) = 0;
    DECL_COMDAT (first_clone->decl) = 0;
    VEC_free (ipa_opt_pass, heap,
!             DECL_STRUCT_FUNCTION (first_clone->decl)->ipa_transforms_to_apply);
!   DECL_STRUCT_FUNCTION (first_clone->decl)->ipa_transforms_to_apply = NULL;
  
  #ifdef ENABLE_CHECKING
    verify_cgraph_node (first_clone);
--- 1777,1784 ----
    TREE_PUBLIC (first_clone->decl) = 0;
    DECL_COMDAT (first_clone->decl) = 0;
    VEC_free (ipa_opt_pass, heap,
!             first_clone->ipa_transforms_to_apply);
!   first_clone->ipa_transforms_to_apply = NULL;
  
  #ifdef ENABLE_CHECKING
    verify_cgraph_node (first_clone);
*************** cgraph_materialize_clone (struct cgraph_
*** 1810,1815 ****
--- 1810,1817 ----
      node->clone_of->clones = node->next_sibling_clone;
    node->next_sibling_clone = NULL;
    node->prev_sibling_clone = NULL;
+   if (!node->clone_of->analyzed && !node->clone_of->clones)
+     cgraph_remove_node (node->clone_of);
    node->clone_of = NULL;
    bitmap_obstack_release (NULL);
  }
Index: lto-streamer-in.c
===================================================================
*** lto-streamer-in.c	(revision 154198)
--- lto-streamer-in.c	(working copy)
*************** lto_read_body (struct lto_file_decl_data
*** 1476,1490 ****
        /* Restore decl state */
        file_data->current_decl_state = file_data->global_decl_state;
  
-       /* FIXME: ipa_transforms_to_apply holds list of passes that have optimization
-          summaries computed and needs to apply changes.  At the moment WHOPR only
-          supports inlining, so we can push it here by hand.  In future we need to stream
-          this field into ltrans compilation.  This will also need to move the field
- 	 from struct function into cgraph node where it belongs.  */
-       if (flag_ltrans && !cgraph_node (fn_decl)->global.inlined_to)
- 	 VEC_safe_push (ipa_opt_pass, heap,
- 			cfun->ipa_transforms_to_apply,
- 			(ipa_opt_pass)&pass_ipa_inline);
        pop_cfun ();
      }
    else 
--- 1476,1481 ----
Index: function.h
===================================================================
*** function.h	(revision 154198)
--- function.h	(working copy)
*************** struct GTY(()) function {
*** 522,532 ****
    unsigned int curr_properties;
    unsigned int last_verified;
  
-   /* Interprocedural passes scheduled to have their transform functions
-      applied next time we execute local pass on them.  We maintain it
-      per-function in order to allow IPA passes to introduce new functions.  */
-   VEC(ipa_opt_pass,heap) * GTY((skip)) ipa_transforms_to_apply;
- 
    /* Non-null if the function does something that would prevent it from
       being copied; this applies to both versioning and inlining.  Set to
       a string describing the reason for failure.  */
--- 522,527 ----
Index: ipa.c
===================================================================
*** ipa.c	(revision 154198)
--- ipa.c	(working copy)
*************** bool
*** 121,126 ****
--- 121,127 ----
  cgraph_remove_unreachable_nodes (bool before_inlining_p, FILE *file)
  {
    struct cgraph_node *first = (struct cgraph_node *) (void *) 1;
+   struct cgraph_node *processed = (struct cgraph_node *) (void *) 2;
    struct cgraph_node *node, *next;
    bool changed = false;
  
*************** cgraph_remove_unreachable_nodes (bool be
*** 142,150 ****
          gcc_assert (!node->global.inlined_to);
  	node->aux = first;
  	first = node;
        }
      else
!       gcc_assert (!node->aux);
  
    /* Perform reachability analysis.  As a special case do not consider
       extern inline functions not inlined as live because we won't output
--- 143,155 ----
          gcc_assert (!node->global.inlined_to);
  	node->aux = first;
  	first = node;
+ 	node->reachable = true;
        }
      else
!       {
!         gcc_assert (!node->aux);
! 	node->reachable = false;
!       }
  
    /* Perform reachability analysis.  As a special case do not consider
       extern inline functions not inlined as live because we won't output
*************** cgraph_remove_unreachable_nodes (bool be
*** 154,170 ****
        struct cgraph_edge *e;
        node = first;
        first = (struct cgraph_node *) first->aux;
  
!       for (e = node->callees; e; e = e->next_callee)
! 	if (!e->callee->aux
! 	    && node->analyzed
! 	    && (!e->inline_failed || !e->callee->analyzed
! 		|| (!DECL_EXTERNAL (e->callee->decl))
!                 || before_inlining_p))
! 	  {
! 	    e->callee->aux = first;
! 	    first = e->callee;
! 	  }
        while (node->clone_of && !node->clone_of->aux && !gimple_has_body_p (node->decl))
          {
  	  node = node->clone_of;
--- 159,184 ----
        struct cgraph_edge *e;
        node = first;
        first = (struct cgraph_node *) first->aux;
+       node->aux = processed;
  
!       if (node->reachable)
!         for (e = node->callees; e; e = e->next_callee)
! 	  if (!e->callee->reachable
! 	      && node->analyzed
! 	      && (!e->inline_failed || !e->callee->analyzed
! 		  || (!DECL_EXTERNAL (e->callee->decl))
!                   || before_inlining_p))
! 	    {
! 	      bool prev_reachable = e->callee->reachable;
! 	      e->callee->reachable |= node->reachable;
! 	      if (!e->callee->aux
! 	          || (e->callee->aux == processed
! 		      && prev_reachable != e->callee->reachable))
! 	        {
! 	          e->callee->aux = first;
! 	          first = e->callee;
! 	        }
! 	    }
        while (node->clone_of && !node->clone_of->aux && !gimple_has_body_p (node->decl))
          {
  	  node = node->clone_of;
*************** cgraph_remove_unreachable_nodes (bool be
*** 184,196 ****
    for (node = cgraph_nodes; node; node = next)
      {
        next = node->next;
        if (!node->aux)
  	{
            node->global.inlined_to = NULL;
  	  if (file)
  	    fprintf (file, " %s", cgraph_node_name (node));
! 	  if (!node->analyzed || !DECL_EXTERNAL (node->decl)
! 	      || before_inlining_p)
  	    cgraph_remove_node (node);
  	  else
  	    {
--- 198,215 ----
    for (node = cgraph_nodes; node; node = next)
      {
        next = node->next;
+       if (node->aux && !node->reachable)
+         {
+ 	  cgraph_node_remove_callees (node);
+ 	  node->analyzed = false;
+ 	  node->local.inlinable = false;
+ 	}
        if (!node->aux)
  	{
            node->global.inlined_to = NULL;
  	  if (file)
  	    fprintf (file, " %s", cgraph_node_name (node));
! 	  if (!node->analyzed || !DECL_EXTERNAL (node->decl) || before_inlining_p)
  	    cgraph_remove_node (node);
  	  else
  	    {
*************** cgraph_remove_unreachable_nodes (bool be
*** 219,224 ****
--- 238,249 ----
  		      node->analyzed = false;
  		      node->local.inlinable = false;
  		    }
+ 		  if (node->prev_sibling_clone)
+ 		    node->prev_sibling_clone->next_sibling_clone = node->next_sibling_clone;
+ 		  else if (node->clone_of)
+ 		    node->clone_of->clones = node->next_sibling_clone;
+ 		  if (node->next_sibling_clone)
+ 		    node->next_sibling_clone->prev_sibling_clone = node->prev_sibling_clone;
  		}
  	      else
  		cgraph_remove_node (node);
Index: lto/lto.c
===================================================================
*** lto/lto.c	(revision 154198)
--- lto/lto.c	(working copy)
*************** read_cgraph_and_symbols (unsigned nfiles
*** 1826,1834 ****
       phase. */
    if (flag_ltrans)
      for (node = cgraph_nodes; node; node = node->next)
!       if (!node->global.inlined_to
! 	  && cgraph_decide_is_function_needed (node, node->decl))
!         cgraph_mark_needed_node (node);
  
    timevar_push (TV_IPA_LTO_DECL_IO);
  
--- 1826,1844 ----
       phase. */
    if (flag_ltrans)
      for (node = cgraph_nodes; node; node = node->next)
!       {
!         if (!node->global.inlined_to
! 	    && cgraph_decide_is_function_needed (node, node->decl))
!           cgraph_mark_needed_node (node);
! 	/* FIXME: ipa_transforms_to_apply holds list of passes that have optimization
! 	   summaries computed and needs to apply changes.  At the moment WHOPR only
! 	   supports inlining, so we can push it here by hand.  In future we need to stream
! 	   this field into ltrans compilation.  */
! 	if (node->analyzed)
! 	  VEC_safe_push (ipa_opt_pass, heap,
! 			 node->ipa_transforms_to_apply,
! 			 (ipa_opt_pass)&pass_ipa_inline);
!       }
  
    timevar_push (TV_IPA_LTO_DECL_IO);
  
Index: tree-inline.c
===================================================================
*** tree-inline.c	(revision 154198)
--- tree-inline.c	(working copy)
*************** copy_bb (copy_body_data *id, basic_block
*** 1665,1674 ****
  
  		  /* We have missing edge in the callgraph.  This can happen
  		     when previous inlining turned an indirect call into a
! 		     direct call by constant propagating arguments.  In all
  		     other cases we hit a bug (incorrect node sharing is the
  		     most common reason for missing edges).  */
! 		  gcc_assert (dest->needed || !dest->analyzed);
  		  if (id->transform_call_graph_edges == CB_CGE_MOVE_CLONES)
  		    cgraph_create_edge_including_clones
  		      (id->dst_node, dest, stmt, bb->count,
--- 1665,1676 ----
  
  		  /* We have missing edge in the callgraph.  This can happen
  		     when previous inlining turned an indirect call into a
! 		     direct call by constant propagating arguments or we are
! 		     producing dead clone (for further clonning).  In all
  		     other cases we hit a bug (incorrect node sharing is the
  		     most common reason for missing edges).  */
! 		  gcc_assert (dest->needed || !dest->analyzed
! 		  	      || !id->src_node->analyzed);
  		  if (id->transform_call_graph_edges == CB_CGE_MOVE_CLONES)
  		    cgraph_create_edge_including_clones
  		      (id->dst_node, dest, stmt, bb->count,
*************** initialize_cfun (tree new_fndecl, tree c
*** 1983,1991 ****
    cfun->function_end_locus = src_cfun->function_end_locus;
    cfun->curr_properties = src_cfun->curr_properties;
    cfun->last_verified = src_cfun->last_verified;
-   if (src_cfun->ipa_transforms_to_apply)
-     cfun->ipa_transforms_to_apply = VEC_copy (ipa_opt_pass, heap,
- 					      src_cfun->ipa_transforms_to_apply);
    cfun->va_list_gpr_size = src_cfun->va_list_gpr_size;
    cfun->va_list_fpr_size = src_cfun->va_list_fpr_size;
    cfun->function_frequency = src_cfun->function_frequency;
--- 1985,1990 ----
*************** expand_call_inline (basic_block bb, gimp
*** 3822,3827 ****
--- 3821,3830 ----
    (*debug_hooks->outlining_inline_function) (cg_edge->callee->decl);
  
    /* Update callgraph if needed.  */
+   if (cg_edge->callee->clone_of
+       && !cg_edge->callee->clone_of->next_sibling_clone
+       && !cg_edge->callee->analyzed)
+     cgraph_remove_node (cg_edge->callee);
    cgraph_remove_node (cg_edge->callee);
  
    id->block = NULL_TREE;
*************** tree_function_versioning (tree old_decl,
*** 4848,4853 ****
--- 4851,4869 ----
    id.src_node = old_version_node;
    id.dst_node = new_version_node;
    id.src_cfun = DECL_STRUCT_FUNCTION (old_decl);
+   if (id.src_node->ipa_transforms_to_apply)
+     {
+       VEC(ipa_opt_pass,heap) * old_transforms_to_apply = id.dst_node->ipa_transforms_to_apply;
+       unsigned int i;
+ 
+       id.dst_node->ipa_transforms_to_apply = VEC_copy (ipa_opt_pass, heap,
+ 					               id.src_node->ipa_transforms_to_apply);
+       for (i = 0; i < VEC_length (ipa_opt_pass, old_transforms_to_apply); i++)
+         VEC_safe_push (ipa_opt_pass, heap, id.dst_node->ipa_transforms_to_apply,
+ 		       VEC_index (ipa_opt_pass,
+ 		       		  old_transforms_to_apply,
+ 				  i));
+     }
    
    id.copy_decl = copy_decl_no_change;
    id.transform_call_graph_edges
Index: passes.c
===================================================================
*** passes.c	(revision 154198)
--- passes.c	(working copy)
*************** update_properties_after_pass (void *data
*** 1376,1390 ****
  		           & ~pass->properties_destroyed;
  }
  
- /* Schedule IPA transform pass DATA for CFUN.  */
- 
- static void
- add_ipa_transform_pass (void *data)
- {
-   struct ipa_opt_pass_d *ipa_pass = (struct ipa_opt_pass_d *) data;
-   VEC_safe_push (ipa_opt_pass, heap, cfun->ipa_transforms_to_apply, ipa_pass);
- }
- 
  /* Execute summary generation for all of the passes in IPA_PASS.  */
  
  void
--- 1376,1381 ----
*************** execute_one_ipa_transform_pass (struct c
*** 1464,1482 ****
  void
  execute_all_ipa_transforms (void)
  {
!   if (cfun && cfun->ipa_transforms_to_apply)
      {
        unsigned int i;
-       struct cgraph_node *node = cgraph_node (current_function_decl);
  
!       for (i = 0; i < VEC_length (ipa_opt_pass, cfun->ipa_transforms_to_apply);
  	   i++)
  	execute_one_ipa_transform_pass (node,
  					VEC_index (ipa_opt_pass,
! 						   cfun->ipa_transforms_to_apply,
  						   i));
!       VEC_free (ipa_opt_pass, heap, cfun->ipa_transforms_to_apply);
!       cfun->ipa_transforms_to_apply = NULL;
      }
  }
  
--- 1455,1476 ----
  void
  execute_all_ipa_transforms (void)
  {
!   struct cgraph_node *node;
!   if (!cfun)
!     return;
!   node = cgraph_node (current_function_decl);
!   if (node->ipa_transforms_to_apply)
      {
        unsigned int i;
  
!       for (i = 0; i < VEC_length (ipa_opt_pass, node->ipa_transforms_to_apply);
  	   i++)
  	execute_one_ipa_transform_pass (node,
  					VEC_index (ipa_opt_pass,
! 						   node->ipa_transforms_to_apply,
  						   i));
!       VEC_free (ipa_opt_pass, heap, node->ipa_transforms_to_apply);
!       node->ipa_transforms_to_apply = NULL;
      }
  }
  
*************** execute_one_pass (struct opt_pass *pass)
*** 1551,1557 ****
    execute_todo (todo_after | pass->todo_flags_finish);
    verify_interpass_invariants ();
    if (pass->type == IPA_PASS)
!     do_per_function (add_ipa_transform_pass, pass);
  
    if (!current_function_decl)
      cgraph_process_new_functions ();
--- 1545,1557 ----
    execute_todo (todo_after | pass->todo_flags_finish);
    verify_interpass_invariants ();
    if (pass->type == IPA_PASS)
!     {
!       struct cgraph_node *node;
!       for (node = cgraph_nodes; node; node = node->next)
!         if (node->analyzed)
!           VEC_safe_push (ipa_opt_pass, heap, node->ipa_transforms_to_apply,
! 			 (struct ipa_opt_pass_d *)pass);
!     }
  
    if (!current_function_decl)
      cgraph_process_new_functions ();


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]