This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Time profiler - phase 2
- From: Jan Hubicka <hubicka at ucw dot cz>
- To: Martin Liška <marxin dot liska at gmail dot com>
- Cc: Jan Hubicka <hubicka at ucw dot cz>, gcc-patches <gcc-patches at gcc dot gnu dot org>
- Date: Mon, 16 Dec 2013 00:21:04 +0100
- Subject: Re: [PATCH] Time profiler - phase 2
- Authentication-results: sourceware.org; auth=none
- References: <20131117233727 dot GD2268 at kam dot mff dot cuni dot cz> <CAObPJ3OmMabfRtMFEFrUWddcYxnZ7VGeGE0CL=6bkQzdxF8vow at mail dot gmail dot com> <20131118101656 dot GA28536 at kam dot mff dot cuni dot cz> <CAObPJ3M7maO8FmJHMpKFv-sqr=ch=gUWRDKdBzyunmAYDfzpHw at mail dot gmail dot com> <CAObPJ3OKRZhb8UYEtvq3RPt3Xw_QzGEpx_vFjFGibtiLXZWSVQ at mail dot gmail dot com> <20131205133454 dot GA15832 at kam dot mff dot cuni dot cz> <20131205133845 dot GC15832 at kam dot mff dot cuni dot cz> <CAObPJ3MfLHJ0T-5puiLCKtagaySnP8AefLJjpZmCryCxCEhYnA at mail dot gmail dot com> <20131205213205 dot GA11011 at kam dot mff dot cuni dot cz> <CAObPJ3OBCKZthHZQFM-jMQjiNvD-xYXqSJKD7-f+1ptHtGyNLg at mail dot gmail dot com>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 93e857df..d5a0ac8 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,14 @@
> +2013-12-15 Martin Liska <marxin.liska@gmail.com>
> + Jan Hubicka <jh@suse.cz>
> +
> + * cgraphunit.c (node_cmp): New function.
> + (expand_all_functions): Function ordering added.
> + * common.opt: New profile based function reordering flag introduced.
> + * lto-partition.c: Support for time profile added.
> + * lto.c: Likewise.
> + * predict.c (handle_missing_profiles): Time profile handled in
> + missing profiles.
> +
OK, thanks, with the changes bellow!
(I tought this patch was already in! Also please be careful about
applying the changes - it seems that in the previous commit you
M
omitted some)
> @@ -1842,11 +1859,14 @@ expand_function (struct cgraph_node *node)
> to use subsections to make the output functions appear in top-down
> order). */
>
> +
Bogus whitespace
> static void
> expand_all_functions (void)
> {
> struct cgraph_node *node;
> struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
> +
> + unsigned int expanded_func_count = 0, profiled_func_count = 0;
> int order_pos, new_order_pos = 0;
> int i;
>
> @@ -1859,20 +1879,39 @@ expand_all_functions (void)
> if (order[i]->process)
> order[new_order_pos++] = order[i];
>
> + if (flag_profile_reorder_functions)
> + qsort (order, new_order_pos, sizeof (struct cgraph_node *), node_cmp);
> +
> for (i = new_order_pos - 1; i >= 0; i--)
> {
> node = order[i];
> +
> if (node->process)
> {
> + expanded_func_count++;
> + if(node->tp_first_run)
> + profiled_func_count++;
> +
> + if (cgraph_dump_file)
> + fprintf (cgraph_dump_file, "Time profile order in expand_all_functions:%s:%d\n", node->asm_name (), node->tp_first_run);
> +
> node->process = 0;
> expand_function (node);
> }
> }
> +
> + if (in_lto_p && dump_file)
> + fprintf (dump_file, "Expanded functions with time profile (%s):%u/%u\n",
> + main_input_filename, profiled_func_count, expanded_func_count);
> +
> + if (cgraph_dump_file && flag_profile_reorder_functions && in_lto_p)
> + fprintf (cgraph_dump_file, "Expanded functions with time profile:%u/%u\n",
> + profiled_func_count, expanded_func_count);
Make the dumps unconditoinal, I do not see why they should be in_lto_p here.
> @@ -689,7 +713,6 @@ lto_balanced_map (void)
> best_i = i;
> best_n_nodes = lto_symtab_encoder_size (partition->encoder);
> best_total_size = total_size;
> - best_varpool_pos = varpool_pos;
> }
> if (cgraph_dump_file)
> fprintf (cgraph_dump_file, "Step %i: added %s/%i, size %i, cost %i/%i "
> @@ -707,7 +730,6 @@ lto_balanced_map (void)
> fprintf (cgraph_dump_file, "Unwinding %i insertions to step %i\n",
> i - best_i, best_i);
> undo_partition (partition, best_n_nodes);
> - varpool_pos = best_varpool_pos;
> }
> i = best_i;
> /* When we are finished, avoid creating empty partition. */
I already asked you to remove these changes - they revert earlier fix.
> diff --git a/gcc/predict.c b/gcc/predict.c
> index a5ad34f..1826a06 100644
> --- a/gcc/predict.c
> +++ b/gcc/predict.c
> @@ -2839,12 +2839,24 @@ handle_missing_profiles (void)
> {
> struct cgraph_edge *e;
> gcov_type call_count = 0;
> + gcov_type max_tp_first_run = 0;
> struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
>
> if (node->count)
> continue;
> for (e = node->callers; e; e = e->next_caller)
> + {
> call_count += e->count;
> +
> + if (e->caller->tp_first_run > max_tp_first_run)
> + max_tp_first_run = e->caller->tp_first_run;
> + }
> +
> + /* If time profile is missing, let assign the maximum that comes from
> + caller functions. */
> + if (!node->tp_first_run && max_tp_first_run)
> + node->tp_first_run = max_tp_first_run + 1;
> +
I believe you also need minizming node->tp_first_run in ipa_merge_profiles.
> if (call_count
> && fn && fn->cfg
> && (call_count * unlikely_count_fraction >= profile_info->runs))
> diff --git a/gcc/varasm.c b/gcc/varasm.c
> index 5c5025a..f34946c 100644
> --- a/gcc/varasm.c
> +++ b/gcc/varasm.c
> @@ -552,7 +552,14 @@ default_function_section (tree decl, enum node_frequency freq,
> unlikely executed (this happens especially with function splitting
> where we can split away unnecessary parts of static constructors. */
> if (startup && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED)
> - return get_named_text_section (decl, ".text.startup", NULL);
> + {
> + /* If we do have a profile or(and) LTO phase is executed, we do not need
> + these ELF section. */
> + if (!in_lto_p || !flag_profile_values)
> + return get_named_text_section (decl, ".text.startup", NULL);
> + else
> + return NULL;
> + }
>
> /* Similarly for exit. */
> if (exit && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED)
> @@ -564,7 +571,10 @@ default_function_section (tree decl, enum node_frequency freq,
> case NODE_FREQUENCY_UNLIKELY_EXECUTED:
> return get_named_text_section (decl, ".text.unlikely", NULL);
> case NODE_FREQUENCY_HOT:
> - return get_named_text_section (decl, ".text.hot", NULL);
> + /* If we do have a profile or(and) LTO phase is executed, we do not need
> + these ELF section. */
> + if (!in_lto_p || !flag_profile_values)
> + return get_named_text_section (decl, ".text.hot", NULL);
> default:
> return NULL;
Please duplicate these changes into config/darwin.c that has identical code in it.
OK with those changes.
Honza