[PATCH] Time profiler - phase 2
Jan Hubicka
hubicka@ucw.cz
Sat Nov 16 12:59:00 GMT 2013
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index c566a85..1562098 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,15 @@
> +2013-11-13 Martin Liska <marxin.liska@gmail.com>
> + Jan Hubicka <jh@suse.cz>
> +
> + * cgraphunit.c (node_cmp): New function.
> + (expand_all_functions): Function ordering added.
> + * common.opt: New profile based function reordering flag introduced.
> + * coverage.c (get_coverage_counts): Wrong profile handled.
> + * ipa.c (cgraph_externally_visible_p): New late flag introduced.
> + * lto-partition.c: Support for time profile added.
> + * lto.c: Likewise.
> + * value-prof.c: Histogram instrumentation switch added.
> +
> 2013-11-13 Vladimir Makarov <vmakarov@redhat.com>
>
> PR rtl-optimization/59036
> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
> index 4765e6a..7cdd9a4 100644
> --- a/gcc/cgraphunit.c
> +++ b/gcc/cgraphunit.c
> @@ -1821,6 +1821,17 @@ expand_function (struct cgraph_node *node)
> ipa_remove_all_references (&node->ref_list);
> }
>
> +/* Node comparer that is responsible for the order that corresponds
> + to time when a function was launched for the first time. */
> +
> +static int
> +node_cmp (const void *pa, const void *pb)
> +{
> + const struct cgraph_node *a = *(const struct cgraph_node * const *) pa;
> + const struct cgraph_node *b = *(const struct cgraph_node * const *) pb;
> +
> + return b->tp_first_run - a->tp_first_run;
Please stabilize this by using node->order when tp_first_run is equivalent.
Later we ought to use better heuristic here, but order may be good enough to
start with.
> diff --git a/gcc/ipa.c b/gcc/ipa.c
> index a11b1c7..d92a332 100644
> --- a/gcc/ipa.c
> +++ b/gcc/ipa.c
> @@ -761,10 +761,14 @@ cgraph_externally_visible_p (struct cgraph_node *node,
> This improves code quality and we know we will duplicate them at most twice
> (in the case that we are not using plugin and link with object file
> implementing same COMDAT) */
> - if ((in_lto_p || whole_program)
> - && DECL_COMDAT (node->decl)
> - && comdat_can_be_unshared_p (node))
> - return false;
> + if ((in_lto_p || whole_program || profile_arc_flag)
> + && DECL_COMDAT (node->decl)
> + && comdat_can_be_unshared_p (node))
> + {
> + gcc_checking_assert (cgraph_function_body_availability (node)
> + > AVAIL_OVERWRITABLE);
> + return false;
> + }
>
> /* When doing link time optimizations, hidden symbols become local. */
> if (in_lto_p
> @@ -932,7 +936,7 @@ function_and_variable_visibility (bool whole_program)
> }
> gcc_assert ((!DECL_WEAK (node->decl)
> && !DECL_COMDAT (node->decl))
> - || TREE_PUBLIC (node->decl)
> + || TREE_PUBLIC (node->decl)
> || node->weakref
> || DECL_EXTERNAL (node->decl));
> if (cgraph_externally_visible_p (node, whole_program))
> @@ -949,7 +953,7 @@ function_and_variable_visibility (bool whole_program)
> && node->definition && !node->weakref
> && !DECL_EXTERNAL (node->decl))
> {
> - gcc_assert (whole_program || in_lto_p
> + gcc_assert (whole_program || in_lto_p || profile_arc_flag
> || !TREE_PUBLIC (node->decl));
> node->unique_name = ((node->resolution == LDPR_PREVAILING_DEF_IRONLY
> || node->resolution == LDPR_PREVAILING_DEF_IRONLY_EXP)
These changes are unrelated, please remove them.
> @@ -395,6 +397,20 @@ node_cmp (const void *pa, const void *pb)
> {
> const struct cgraph_node *a = *(const struct cgraph_node * const *) pa;
> const struct cgraph_node *b = *(const struct cgraph_node * const *) pb;
> +
> + /* Profile reorder flag enables function reordering based on first execution
> + of a function. All functions with profile are placed in ascending
> + order at the beginning. */
> +
> + if (flag_profile_reorder_functions)
&& a->tp_first_run != b->tp_first_run
> + {
> + if (a->tp_first_run && b->tp_first_run)
> + return a->tp_first_run - b->tp_first_run;
> +
> + if (a->tp_first_run || b->tp_first_run)
> + return b->tp_first_run - a->tp_first_run;
Drop a comment explaining the logic here ;)
> @@ -449,7 +465,7 @@ void
> lto_balanced_map (void)
> {
> int n_nodes = 0;
> - int n_varpool_nodes = 0, varpool_pos = 0, best_varpool_pos = 0;
> + int n_varpool_nodes = 0, varpool_pos = 0;
> struct cgraph_node **order = XNEWVEC (struct cgraph_node *, cgraph_max_uid);
> struct varpool_node **varpool_order = NULL;
> int i;
> @@ -481,10 +497,13 @@ lto_balanced_map (void)
> get better about minimizing the function bounday, but until that
> things works smoother if we order in source order. */
> qsort (order, n_nodes, sizeof (struct cgraph_node *), node_cmp);
> +
> + if (cgraph_dump_file)
> + for(i = 0; i < n_nodes; i++)
> + fprintf (cgraph_dump_file, "Balanced map symbol order:%s:%u\n", cgraph_node_asm_name (order[i]), order[i]->tp_first_run);
> +
> if (!flag_toplevel_reorder)
> {
> - qsort (order, n_nodes, sizeof (struct cgraph_node *), node_cmp);
> -
> FOR_EACH_VARIABLE (vnode)
> if (get_symbol_class (vnode) == SYMBOL_PARTITION)
> n_varpool_nodes++;
Good catch (the redundant qsort)
> @@ -683,7 +702,6 @@ lto_balanced_map (void)
> best_i = i;
> best_n_nodes = lto_symtab_encoder_size (partition->encoder);
> best_total_size = total_size;
> - best_varpool_pos = varpool_pos;
> }
> if (cgraph_dump_file)
> fprintf (cgraph_dump_file, "Step %i: added %s/%i, size %i, cost %i/%i "
> @@ -701,7 +719,6 @@ lto_balanced_map (void)
> fprintf (cgraph_dump_file, "Unwinding %i insertions to step %i\n",
> i - best_i, best_i);
> undo_partition (partition, best_n_nodes);
> - varpool_pos = best_varpool_pos;
> }
> i = best_i;
> /* When we are finished, avoid creating empty partition. */
This actually reverts a bugfix, so please remove the two changes above.
> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
> index 62856d0..2401c76 100644
> --- a/gcc/lto/lto.c
> +++ b/gcc/lto/lto.c
> @@ -2453,9 +2453,12 @@ lto_wpa_write_files (void)
> /* Sort partitions by size so small ones are compiled last.
> FIXME: Even when not reordering we may want to output one list for parallel make
> and other for final link command. */
> - ltrans_partitions.qsort (flag_toplevel_reorder
> +
> + if (!flag_profile_reorder_functions || !flag_profile_use)
> + ltrans_partitions.qsort (flag_toplevel_reorder
> ? cmp_partitions_size
> : cmp_partitions_order);
> +
Add a FIXME explaining that we ought to produce two list - one for Make parallelizm and one for final link time.
lets work on this incrementally.
> diff --git a/gcc/varasm.c b/gcc/varasm.c
> index 2226912..758367f 100644
> --- a/gcc/varasm.c
> +++ b/gcc/varasm.c
> @@ -548,7 +548,14 @@ default_function_section (tree decl, enum node_frequency freq,
> unlikely executed (this happens especially with function splitting
> where we can split away unnecessary parts of static constructors. */
> if (startup && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED)
> - return get_named_text_section (decl, ".text.startup", NULL);
> + {
> + /* If we do have a profile or(and) LTO phase is executed, we do not need
> + these ELF section. */
> + if (!in_lto_p || !flag_profile_values)
> + return get_named_text_section (decl, ".text.startup", NULL);
> + else
> + return NULL;
> + }
>
> /* Similarly for exit. */
> if (exit && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED)
> @@ -560,7 +567,10 @@ default_function_section (tree decl, enum node_frequency freq,
> case NODE_FREQUENCY_UNLIKELY_EXECUTED:
> return get_named_text_section (decl, ".text.unlikely", NULL);
> case NODE_FREQUENCY_HOT:
> - return get_named_text_section (decl, ".text.hot", NULL);
> + /* If we do have a profile or(and) LTO phase is executed, we do not need
> + these ELF section. */
> + if (!in_lto_p || !flag_profile_values)
> + return get_named_text_section (decl, ".text.hot", NULL);
> default:
> return NULL;
> }
Lets handle this in separate patch.
The patch is missing documentation in doc/invoke.texi. You need to document
the new command line option and update documentation of -fprofile-use. Please
add it and send an update patch.
Also Teresa recently sent patch for resetting profiles of functions where we clearly missed the profile.
Please add there a code filling in tp_first_run from largest first_run of the callers with non-0 count.
Thanks,
Honza
More information about the Gcc-patches
mailing list