[PATCH PR77536]Generate correct profiling information for vectorized loop
Jan Hubicka
hubicka@ucw.cz
Wed Feb 22 14:59:00 GMT 2017
> Hi Honza,
OK,
thanks!
Honz
> There is the 3rd version patch fixing mentioned issues. Is it OK?
>
> Thanks,
> bin
> diff --git a/gcc/testsuite/gcc.dg/vect/pr79347.c b/gcc/testsuite/gcc.dg/vect/pr79347.c
> index 586c638..6825420 100644
> --- a/gcc/testsuite/gcc.dg/vect/pr79347.c
> +++ b/gcc/testsuite/gcc.dg/vect/pr79347.c
> @@ -10,4 +10,4 @@ void n(void)
> a[i]++;
> }
>
> -/* { dg-final { scan-tree-dump-times "Invalid sum of " 2 "vect" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum of " "vect" } } */
> diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c
> index 43df29c..22c832a 100644
> --- a/gcc/tree-ssa-loop-manip.c
> +++ b/gcc/tree-ssa-loop-manip.c
> @@ -1093,6 +1093,33 @@ scale_dominated_blocks_in_loop (struct loop *loop, basic_block bb,
> }
> }
>
> +/* Return estimated niter for LOOP after unrolling by FACTOR times. */
> +
> +gcov_type
> +niter_for_unrolled_loop (struct loop *loop, unsigned factor)
> +{
> + gcc_assert (factor != 0);
> + bool profile_p = false;
> + gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
> + gcov_type new_est_niter = est_niter / factor;
> +
> + /* Without profile feedback, loops for which we do not know a better estimate
> + are assumed to roll 10 times. When we unroll such loop, it appears to
> + roll too little, and it may even seem to be cold. To avoid this, we
> + ensure that the created loop appears to roll at least 5 times (but at
> + most as many times as before unrolling). Don't do adjustment if profile
> + feedback is present. */
> + if (new_est_niter < 5 && !profile_p)
> + {
> + if (est_niter < 5)
> + new_est_niter = est_niter;
> + else
> + new_est_niter = 5;
> + }
> +
> + return new_est_niter;
> +}
> +
> /* Unroll LOOP FACTOR times. DESC describes number of iterations of LOOP.
> EXIT is the exit of the loop to that DESC corresponds.
>
> @@ -1170,12 +1197,12 @@ tree_transform_and_unroll_loop (struct loop *loop, unsigned factor,
> gimple_stmt_iterator bsi;
> use_operand_p op;
> bool ok;
> - unsigned est_niter, prob_entry, scale_unrolled, scale_rest, freq_e, freq_h;
> - unsigned new_est_niter, i, prob;
> + unsigned i, prob, prob_entry, scale_unrolled, scale_rest;
> + gcov_type freq_e, freq_h;
> + gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor);
> unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
> auto_vec<edge> to_remove;
>
> - est_niter = expected_loop_iterations (loop);
> determine_exit_conditions (loop, desc, factor,
> &enter_main_cond, &exit_base, &exit_step,
> &exit_cmp, &exit_bound);
> @@ -1207,22 +1234,6 @@ tree_transform_and_unroll_loop (struct loop *loop, unsigned factor,
> gcc_assert (new_loop != NULL);
> update_ssa (TODO_update_ssa);
>
> - /* Determine the probability of the exit edge of the unrolled loop. */
> - new_est_niter = est_niter / factor;
> -
> - /* Without profile feedback, loops for that we do not know a better estimate
> - are assumed to roll 10 times. When we unroll such loop, it appears to
> - roll too little, and it may even seem to be cold. To avoid this, we
> - ensure that the created loop appears to roll at least 5 times (but at
> - most as many times as before unrolling). */
> - if (new_est_niter < 5)
> - {
> - if (est_niter < 5)
> - new_est_niter = est_niter;
> - else
> - new_est_niter = 5;
> - }
> -
> /* Prepare the cfg and update the phi nodes. Move the loop exit to the
> loop latch (and make its condition dummy, for the moment). */
> rest = loop_preheader_edge (new_loop)->src;
> @@ -1326,10 +1337,25 @@ tree_transform_and_unroll_loop (struct loop *loop, unsigned factor,
> /* Ensure that the frequencies in the loop match the new estimated
> number of iterations, and change the probability of the new
> exit edge. */
> - freq_h = loop->header->frequency;
> - freq_e = EDGE_FREQUENCY (loop_preheader_edge (loop));
> +
> + freq_h = loop->header->count;
> + freq_e = (loop_preheader_edge (loop))->count;
> + /* Use frequency only if counts are zero. */
> + if (freq_h == 0 && freq_e == 0)
> + {
> + freq_h = loop->header->frequency;
> + freq_e = EDGE_FREQUENCY (loop_preheader_edge (loop));
> + }
> if (freq_h != 0)
> - scale_loop_frequencies (loop, freq_e * (new_est_niter + 1), freq_h);
> + {
> + gcov_type scale;
> + /* Avoid dropping loop body profile counter to 0 because of zero count
> + in loop's preheader. */
> + freq_e = MAX (freq_e, 1);
> + /* This should not overflow. */
> + scale = GCOV_COMPUTE_SCALE (freq_e * (new_est_niter + 1), freq_h);
> + scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
> + }
>
> exit_bb = single_pred (loop->latch);
> new_exit = find_edge (exit_bb, rest);
> diff --git a/gcc/tree-ssa-loop-manip.h b/gcc/tree-ssa-loop-manip.h
> index 1e7531f..a139050 100644
> --- a/gcc/tree-ssa-loop-manip.h
> +++ b/gcc/tree-ssa-loop-manip.h
> @@ -48,6 +48,7 @@ extern bool gimple_duplicate_loop_to_header_edge (struct loop *, edge,
> int);
> extern bool can_unroll_loop_p (struct loop *loop, unsigned factor,
> struct tree_niter_desc *niter);
> +extern gcov_type niter_for_unrolled_loop (struct loop *, unsigned);
> extern void tree_transform_and_unroll_loop (struct loop *, unsigned,
> edge, struct tree_niter_desc *,
> transform_callback, void *);
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index c5a1627..6bbf816 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -6718,6 +6718,50 @@ loop_niters_no_overflow (loop_vec_info loop_vinfo)
> return false;
> }
>
> +/* Scale profiling counters by estimation for LOOP which is vectorized
> + by factor VF. */
> +
> +static void
> +scale_profile_for_vect_loop (struct loop *loop, unsigned vf)
> +{
> + edge preheader = loop_preheader_edge (loop);
> + /* Reduce loop iterations by the vectorization factor. */
> + gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf);
> + gcov_type freq_h = loop->header->count, freq_e = preheader->count;
> +
> + /* Use frequency only if counts are zero. */
> + if (freq_h == 0 && freq_e == 0)
> + {
> + freq_h = loop->header->frequency;
> + freq_e = EDGE_FREQUENCY (preheader);
> + }
> + if (freq_h != 0)
> + {
> + gcov_type scale;
> +
> + /* Avoid dropping loop body profile counter to 0 because of zero count
> + in loop's preheader. */
> + freq_e = MAX (freq_e, 1);
> + /* This should not overflow. */
> + scale = GCOV_COMPUTE_SCALE (freq_e * (new_est_niter + 1), freq_h);
> + scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
> + }
> +
> + basic_block exit_bb = single_pred (loop->latch);
> + edge exit_e = single_exit (loop);
> + exit_e->count = loop_preheader_edge (loop)->count;
> + exit_e->probability = REG_BR_PROB_BASE / (new_est_niter + 1);
> +
> + edge exit_l = single_pred_edge (loop->latch);
> + int prob = exit_l->probability;
> + exit_l->probability = REG_BR_PROB_BASE - exit_e->probability;
> + exit_l->count = exit_bb->count - exit_e->count;
> + if (exit_l->count < 0)
> + exit_l->count = 0;
> + if (prob > 0)
> + scale_bbs_frequencies_int (&loop->latch, 1, exit_l->probability, prob);
> +}
> +
> /* Function vect_transform_loop.
>
> The analysis phase has determined that the loop is vectorizable.
> @@ -6743,16 +6787,10 @@ vect_transform_loop (loop_vec_info loop_vinfo)
> bool transform_pattern_stmt = false;
> bool check_profitability = false;
> int th;
> - /* Record number of iterations before we started tampering with the profile. */
> - gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
>
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
>
> - /* If profile is inprecise, we have chance to fix it up. */
> - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
> - expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
> -
> /* Use the more conservative vectorization threshold. If the number
> of iterations is constant assume the cost check has been performed
> by our caller. If the threshold makes all loops profitable that
> @@ -7068,9 +7106,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
>
> slpeel_make_loop_iterate_ntimes (loop, niters_vector);
>
> - /* Reduce loop iterations by the vectorization factor. */
> - scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vf),
> - expected_iterations / vf);
> + scale_profile_for_vect_loop (loop, vf);
> +
> /* The minimum number of iterations performed by the epilogue. This
> is 1 when peeling for gaps because we always need a final scalar
> iteration. */
More information about the Gcc-patches
mailing list