Speedup flatteing of very large inline trees

Jan Hubicka hubicka@ucw.cz
Fri Aug 10 07:55:00 GMT 2012


Hi,
the testcase causes huge inline recursion tree via flatten attribute. In this
case we run into quadratic time issue because we try to update overall
size/time of the function after each inlining decision.
This patch breaks out the part of inline_merge_summary that only recomputes the
overall size (and is O(n) where N is number of functions inlined together from the
rest and makes flatten_function to call it only once it is done with all the dirty work.

It is possible to solve this more effectively by making inline_merge_summary to incrementally
update the values, but it is fragile so unless I am really forced to it, I will avoid it.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

	PR middle-end/54146
	* ipa-inline-transform.c (inline_call): Add UPDATE_OVERALL_SUMMARY
	parameter; honnor it.
	* ipa-inline.c (recursive_inlining): Update call
	of inline_call.
	(inline_small_functions): Likewise.
	(ipa_inline): Likewise.
	(inline_always_inline_functions): Likewise.
	(early_inline_small_functions): Likewise.
	(flatten_function): Do separate update of summary info.
	* ipa-inline.h (inline_update_overall_summary): Declare.
	(inline_call): Update.
	* ipa-inline-analysis.c (inline_merge_summary): Break out
	updating code to ...
	(inline_update_overall_summary): Likewise.
Index: ipa-inline-transform.c
===================================================================
*** ipa-inline-transform.c	(revision 190281)
--- ipa-inline-transform.c	(working copy)
*************** clone_inlined_nodes (struct cgraph_edge 
*** 193,205 ****
  /* Mark edge E as inlined and update callgraph accordingly.  UPDATE_ORIGINAL
     specify whether profile of original function should be updated.  If any new
     indirect edges are discovered in the process, add them to NEW_EDGES, unless
!    it is NULL.  Return true iff any new callgraph edges were discovered as a
     result of inlining.  */
  
  bool
  inline_call (struct cgraph_edge *e, bool update_original,
  	     VEC (cgraph_edge_p, heap) **new_edges,
! 	     int *overall_size)
  {
    int old_size = 0, new_size = 0;
    struct cgraph_node *to = NULL;
--- 193,209 ----
  /* Mark edge E as inlined and update callgraph accordingly.  UPDATE_ORIGINAL
     specify whether profile of original function should be updated.  If any new
     indirect edges are discovered in the process, add them to NEW_EDGES, unless
!    it is NULL. If UPDATE_OVERALL_SUMMARY is false, do not bother to recompute overall
!    size of caller after inlining. Caller is required to eventually do it via
!    inline_update_overall_summary.
! 
!    Return true iff any new callgraph edges were discovered as a
     result of inlining.  */
  
  bool
  inline_call (struct cgraph_edge *e, bool update_original,
  	     VEC (cgraph_edge_p, heap) **new_edges,
! 	     int *overall_size, bool update_overall_summary)
  {
    int old_size = 0, new_size = 0;
    struct cgraph_node *to = NULL;
*************** inline_call (struct cgraph_edge *e, bool
*** 244,249 ****
--- 248,255 ----
  
    old_size = inline_summary (to)->size;
    inline_merge_summary (e);
+   if (update_overall_summary)
+    inline_update_overall_summary (to);
    new_size = inline_summary (to)->size;
    if (overall_size)
      *overall_size += new_size - old_size;
Index: ipa-inline.c
===================================================================
*** ipa-inline.c	(revision 190281)
--- ipa-inline.c	(working copy)
*************** recursive_inlining (struct cgraph_edge *
*** 1209,1215 ****
  	}
  
        cgraph_redirect_edge_callee (curr, master_clone);
!       inline_call (curr, false, new_edges, &overall_size);
        lookup_recursive_calls (node, curr->callee, heap);
        n++;
      }
--- 1209,1215 ----
  	}
  
        cgraph_redirect_edge_callee (curr, master_clone);
!       inline_call (curr, false, new_edges, &overall_size, true);
        lookup_recursive_calls (node, curr->callee, heap);
        n++;
      }
*************** inline_small_functions (void)
*** 1480,1486 ****
  	    fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
  
  	  gcc_checking_assert (!callee->global.inlined_to);
! 	  inline_call (edge, true, &new_indirect_edges, &overall_size);
  	  if (flag_indirect_inlining)
  	    add_new_edges_to_heap (heap, new_indirect_edges);
  
--- 1480,1486 ----
  	    fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
  
  	  gcc_checking_assert (!callee->global.inlined_to);
! 	  inline_call (edge, true, &new_indirect_edges, &overall_size, true);
  	  if (flag_indirect_inlining)
  	    add_new_edges_to_heap (heap, new_indirect_edges);
  
*************** flatten_function (struct cgraph_node *no
*** 1602,1608 ****
  		 xstrdup (cgraph_node_name (callee)),
  		 xstrdup (cgraph_node_name (e->caller)));
        orig_callee = callee;
!       inline_call (e, true, NULL, NULL);
        if (e->callee != orig_callee)
  	orig_callee->symbol.aux = (void *) node;
        flatten_function (e->callee, early);
--- 1602,1608 ----
  		 xstrdup (cgraph_node_name (callee)),
  		 xstrdup (cgraph_node_name (e->caller)));
        orig_callee = callee;
!       inline_call (e, true, NULL, NULL, false);
        if (e->callee != orig_callee)
  	orig_callee->symbol.aux = (void *) node;
        flatten_function (e->callee, early);
*************** flatten_function (struct cgraph_node *no
*** 1611,1616 ****
--- 1611,1618 ----
      }
  
    node->symbol.aux = NULL;
+   if (!node->global.inlined_to)
+     inline_update_overall_summary (node);
  }
  
  /* Decide on the inlining.  We do so in the topological order to avoid
*************** ipa_inline (void)
*** 1710,1716 ****
  			       inline_summary (node->callers->caller)->size);
  		    }
  
! 		  inline_call (node->callers, true, NULL, NULL);
  		  if (dump_file)
  		    fprintf (dump_file,
  			     " Inlined into %s which now has %i size\n",
--- 1712,1718 ----
  			       inline_summary (node->callers->caller)->size);
  		    }
  
! 		  inline_call (node->callers, true, NULL, NULL, true);
  		  if (dump_file)
  		    fprintf (dump_file,
  			     " Inlined into %s which now has %i size\n",
*************** inline_always_inline_functions (struct c
*** 1768,1776 ****
  	fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
  		 xstrdup (cgraph_node_name (e->callee)),
  		 xstrdup (cgraph_node_name (e->caller)));
!       inline_call (e, true, NULL, NULL);
        inlined = true;
      }
  
    return inlined;
  }
--- 1770,1780 ----
  	fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
  		 xstrdup (cgraph_node_name (e->callee)),
  		 xstrdup (cgraph_node_name (e->caller)));
!       inline_call (e, true, NULL, NULL, false);
        inlined = true;
      }
+   if (inlined)
+     inline_update_overall_summary (node);
  
    return inlined;
  }
*************** early_inline_small_functions (struct cgr
*** 1818,1824 ****
  	fprintf (dump_file, " Inlining %s into %s.\n",
  		 xstrdup (cgraph_node_name (callee)),
  		 xstrdup (cgraph_node_name (e->caller)));
!       inline_call (e, true, NULL, NULL);
        inlined = true;
      }
  
--- 1822,1828 ----
  	fprintf (dump_file, " Inlining %s into %s.\n",
  		 xstrdup (cgraph_node_name (callee)),
  		 xstrdup (cgraph_node_name (e->caller)));
!       inline_call (e, true, NULL, NULL, true);
        inlined = true;
      }
  
Index: ipa-inline.h
===================================================================
*** ipa-inline.h	(revision 190281)
--- ipa-inline.h	(working copy)
*************** void estimate_ipcp_clone_size_and_time (
*** 173,178 ****
--- 173,179 ----
  					int *, int *);
  int do_estimate_growth (struct cgraph_node *);
  void inline_merge_summary (struct cgraph_edge *edge);
+ void inline_update_overall_summary (struct cgraph_node *node);
  int do_estimate_edge_growth (struct cgraph_edge *edge);
  int do_estimate_edge_time (struct cgraph_edge *edge);
  void initialize_growth_caches (void);
*************** void free_growth_caches (void);
*** 180,186 ****
  void compute_inline_parameters (struct cgraph_node *, bool);
  
  /* In ipa-inline-transform.c  */
! bool inline_call (struct cgraph_edge *, bool, VEC (cgraph_edge_p, heap) **, int *);
  unsigned int inline_transform (struct cgraph_node *);
  void clone_inlined_nodes (struct cgraph_edge *e, bool, bool, int *);
  
--- 181,187 ----
  void compute_inline_parameters (struct cgraph_node *, bool);
  
  /* In ipa-inline-transform.c  */
! bool inline_call (struct cgraph_edge *, bool, VEC (cgraph_edge_p, heap) **, int *, bool);
  unsigned int inline_transform (struct cgraph_node *);
  void clone_inlined_nodes (struct cgraph_edge *e, bool, bool, int *);
  
Index: ipa-inline-analysis.c
===================================================================
*** ipa-inline-analysis.c	(revision 190281)
--- ipa-inline-analysis.c	(working copy)
*************** inline_merge_summary (struct cgraph_edge
*** 2680,2692 ****
      }
    remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
  			clause, &toplev_predicate);
-   info->size = 0;
-   info->time = 0;
-   for (i = 0; VEC_iterate (size_time_entry, info->entry, i, e); i++)
-     info->size += e->size, info->time += e->time;
-   estimate_calls_size_and_time (to, &info->size, &info->time,
- 				~(clause_t)(1 << predicate_false_condition),
- 				NULL, NULL);
  
    inline_update_callee_summaries (edge->callee,
  				  inline_edge_summary (edge)->loop_depth);
--- 2680,2685 ----
*************** inline_merge_summary (struct cgraph_edge
*** 2696,2707 ****
    /* Similarly remove param summaries.  */
    VEC_free (inline_param_summary_t, heap, es->param);
    VEC_free (int, heap, operand_map);
  
    info->time = (info->time + INLINE_TIME_SCALE / 2) / INLINE_TIME_SCALE;
    info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
  }
  
- 
  /* Estimate the time cost for the caller when inlining EDGE.
     Only to be called via estimate_edge_time, that handles the
     caching mechanism.
--- 2689,2717 ----
    /* Similarly remove param summaries.  */
    VEC_free (inline_param_summary_t, heap, es->param);
    VEC_free (int, heap, operand_map);
+ }
+ 
+ /* For performance reasons inline_merge_summary is not updating overall size
+    and time.  Recompute it.  */
+ 
+ void
+ inline_update_overall_summary (struct cgraph_node *node)
+ {
+   struct inline_summary *info = inline_summary (node);
+   size_time_entry *e;
+   int i;
  
+   info->size = 0;
+   info->time = 0;
+   for (i = 0; VEC_iterate (size_time_entry, info->entry, i, e); i++)
+     info->size += e->size, info->time += e->time;
+   estimate_calls_size_and_time (node, &info->size, &info->time,
+ 				~(clause_t)(1 << predicate_false_condition),
+ 				NULL, NULL);
    info->time = (info->time + INLINE_TIME_SCALE / 2) / INLINE_TIME_SCALE;
    info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
  }
  
  /* Estimate the time cost for the caller when inlining EDGE.
     Only to be called via estimate_edge_time, that handles the
     caching mechanism.



More information about the Gcc-patches mailing list