This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH GCC]Improve candidate selecting in IVOPT
- From: Sebastian Pop <sebpop at gmail dot com>
- To: Bin Cheng <bin dot cheng at arm dot com>
- Cc: gcc-patches at gcc dot gnu dot org, ook at ucw dot cz
- Date: Tue, 30 Sep 2014 22:31:11 +0000
- Subject: Re: [PATCH GCC]Improve candidate selecting in IVOPT
- Authentication-results: sourceware.org; auth=none
- References: <000401cfdc95$3358d010$9a0a7030$ at arm dot com>
Bin Cheng wrote:
> Hi,
> As analyzed in PR62178, IVOPT can't find the optimal iv set for that case.
> The problem with current heuristic algorithm is it only replaces candidate
> with ones not in current solution one by one, starting from small solution.
> This patch adds another heuristic which starts from assigning the best
> candidate for each iv use, then replaces candidate with ones in the current
> solution.
> Before this patch, there are two runs of find_optimal_set_1 to find the
> optimal iv sets, we name them as set_a and set_b. After this patch we will
> have set_c. At last, IVOPT chooses the best one from set_a/set_b/set_c. To
> prove that this patch is necessary, I collected instrumental data for gcc
> bootstrap, spec2k, eembc and can confirm for some cases only the newly added
> heuristic can find the optimal iv set. The number of these cases in which
> set_c is the optimal one is on the same level of set_b.
> As for the compilation time, the newly added function actually is one
> iteration of previous selection algorithm, it should be much faster than
> previous process.
>
> I also added one target dependent test case.
> Bootstrap and test on x86_64, test on aarch64. Any comments?
I verified that the patch fixes the performance regression on intmm. I have
seen improvements to other benchmarks, and very small degradations that could
very well be noise.
Thanks for fixing this perf issue!
Sebastian
>
> 2014-09-30 Bin Cheng <bin.cheng@arm.com>
>
> PR tree-optimization/62178
> * tree-ssa-loop-ivopts.c (enum sel_type): New.
> (iv_ca_add_use): Add parameter RELATED_P and find the best cand
> for iv use if it's true.
> (try_add_cand_for, get_initial_solution): Change paramter ORIGINALP
> to SELECT_TYPE and handle it.
> (find_optimal_iv_set_1): Ditto.
> (try_prune_iv_set, find_optimal_iv_set_2): New functions.
> (find_optimal_iv_set): Call find_optimal_iv_set_2 and choose the
> best candidate set.
>
> gcc/testsuite/ChangeLog
> 2014-09-30 Bin Cheng <bin.cheng@arm.com>
>
> PR tree-optimization/62178
> * gcc.target/aarch64/pr62178.c: New test.
> Index: gcc/testsuite/gcc.target/aarch64/pr62178.c
> ===================================================================
> --- gcc/testsuite/gcc.target/aarch64/pr62178.c (revision 0)
> +++ gcc/testsuite/gcc.target/aarch64/pr62178.c (revision 0)
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1];
> +
> +void Intmm (int run) {
> + int i, j, k;
> +
> + for ( i = 1; i <= 30; i++ )
> + for ( j = 1; j <= 30; j++ ) {
> + r[i][j] = 0;
> + for(k = 1; k <= 30; k++ )
> + r[i][j] += a[i][k]*b[k][j];
> + }
> +}
> +
> +/* { dg-final { scan-assembler "ld1r\\t\{v\[0-9\]+\."} } */
> Index: gcc/tree-ssa-loop-ivopts.c
> ===================================================================
> --- gcc/tree-ssa-loop-ivopts.c (revision 215113)
> +++ gcc/tree-ssa-loop-ivopts.c (working copy)
> @@ -254,6 +254,14 @@ struct iv_inv_expr_ent
> hashval_t hash;
> };
>
> +/* Types used to start selecting the candidate for each IV use. */
> +enum sel_type
> +{
> + SEL_ORIGINAL, /* Start selecting from original cands. */
> + SEL_IMPORTANT, /* Start selecting from important cands. */
> + SEL_RELATED /* Start selecting from related cands. */
> +};
> +
> /* The data used by the induction variable optimizations. */
>
> typedef struct iv_use *iv_use_p;
> @@ -5417,22 +5425,51 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_
> }
>
> /* Extend set IVS by expressing USE by some of the candidates in it
> - if possible. Consider all important candidates if candidates in
> - set IVS don't give any result. */
> + if possible. If RELATED_P is FALSE, consider all important
> + candidates if candidates in set IVS don't give any result;
> + otherwise, try to find the best one from related or all candidates,
> + depending on consider_all_candidates. */
>
> static void
> iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
> - struct iv_use *use)
> + struct iv_use *use, bool related_p)
> {
> struct cost_pair *best_cp = NULL, *cp;
> bitmap_iterator bi;
> unsigned i;
> struct iv_cand *cand;
>
> - gcc_assert (ivs->upto >= use->id);
> + gcc_assert (ivs->upto == use->id);
> ivs->upto++;
> ivs->bad_uses++;
>
> + if (related_p)
> + {
> + if (data->consider_all_candidates)
> + {
> + for (i = 0; i < n_iv_cands (data); i++)
> + {
> + cand = iv_cand (data, i);
> + cp = get_use_iv_cost (data, use, cand);
> + if (cheaper_cost_pair (cp, best_cp))
> + best_cp = cp;
> + }
> + }
> + else
> + {
> + EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
> + {
> + cand = iv_cand (data, i);
> + cp = get_use_iv_cost (data, use, cand);
> + if (cheaper_cost_pair (cp, best_cp))
> + best_cp = cp;
> + }
> + }
> +
> + iv_ca_set_cp (data, ivs, use, best_cp);
> + return;
> + }
> +
> EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
> {
> cand = iv_cand (data, i);
> @@ -5440,7 +5477,7 @@ iv_ca_add_use (struct ivopts_data *data, struct iv
> if (cheaper_cost_pair (cp, best_cp))
> best_cp = cp;
> }
> -
> +
> if (best_cp == NULL)
> {
> EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
> @@ -5869,13 +5906,15 @@ iv_ca_prune (struct ivopts_data *data, struct iv_c
> }
>
> /* Tries to extend the sets IVS in the best possible way in order
> - to express the USE. If ORIGINALP is true, prefer candidates from
> - the original set of IVs, otherwise favor important candidates not
> - based on any memory object. */
> + to express the USE. If SELECT_TYPE is SEL_ORIGINAL, prefer
> + candidates from the original set of IVs; if it's SEL_IMPORTANT,
> + favor important candidates not based on any memory object;
> + if it's SEL_RELATED, assign the best candidate to each use if
> + possible. */
>
> static bool
> try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
> - struct iv_use *use, bool originalp)
> + struct iv_use *use, enum sel_type select_type)
> {
> comp_cost best_cost, act_cost;
> unsigned i;
> @@ -5883,8 +5922,9 @@ try_add_cand_for (struct ivopts_data *data, struct
> struct iv_cand *cand;
> struct iv_ca_delta *best_delta = NULL, *act_delta;
> struct cost_pair *cp;
> + bool originalp = (select_type == SEL_ORIGINAL);
>
> - iv_ca_add_use (data, ivs, use);
> + iv_ca_add_use (data, ivs, use, select_type == SEL_RELATED);
> best_cost = iv_ca_cost (ivs);
> cp = iv_ca_cand_for_use (ivs, use);
> if (cp)
> @@ -5892,7 +5932,16 @@ try_add_cand_for (struct ivopts_data *data, struct
> best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
> iv_ca_set_no_cp (data, ivs, use);
> }
> + if (select_type == SEL_RELATED)
> + {
> + /* We should have selected the best candidate if possible. */
> + gcc_assert (cp != NULL || infinite_cost_p (best_cost));
> + iv_ca_delta_commit (data, ivs, best_delta, true);
> + iv_ca_delta_free (&best_delta);
>
> + return !infinite_cost_p (best_cost);
> + }
> +
> /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
> first try important candidates not based on any memory object. Only if
> this fails, try the specific ones. Rationale -- in loops with many
> @@ -5983,16 +6032,17 @@ try_add_cand_for (struct ivopts_data *data, struct
> return !infinite_cost_p (best_cost);
> }
>
> -/* Finds an initial assignment of candidates to uses. */
> +/* Finds an initial assignment of candidates to uses according to
> + SELECT_TYPE. */
>
> static struct iv_ca *
> -get_initial_solution (struct ivopts_data *data, bool originalp)
> +get_initial_solution (struct ivopts_data *data, enum sel_type select_type)
> {
> struct iv_ca *ivs = iv_ca_new (data);
> unsigned i;
>
> for (i = 0; i < n_iv_uses (data); i++)
> - if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
> + if (!try_add_cand_for (data, ivs, iv_use (data, i), select_type))
> {
> iv_ca_free (&ivs);
> return NULL;
> @@ -6059,17 +6109,43 @@ try_improve_iv_set (struct ivopts_data *data, stru
> return true;
> }
>
> +/* Tries to prune set of induction variables IVS. */
> +
> +static bool
> +try_prune_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
> +{
> + comp_cost new_cost, old_cost;
> + struct iv_ca_delta *delta = NULL;
> +
> + if (iv_ca_n_cands (ivs) > ALWAYS_PRUNE_CAND_SET_BOUND)
> + return false;
> +
> + old_cost = iv_ca_cost (ivs);
> + /* Try pruning the candidates from the set. */
> + new_cost = iv_ca_prune (data, ivs, NULL, &delta);
> +
> + /* Nothing more we can do. */
> + if (!delta
> + || compare_costs (new_cost, old_cost) >= 0)
> + return false;
> +
> + iv_ca_delta_commit (data, ivs, delta, true);
> + gcc_assert (compare_costs (new_cost, iv_ca_cost (ivs)) == 0);
> + iv_ca_delta_free (&delta);
> + return true;
> +}
> +
> /* Attempts to find the optimal set of induction variables. We do simple
> greedy heuristic -- we try to replace at most one candidate in the selected
> solution and remove the unused ivs while this improves the cost. */
>
> static struct iv_ca *
> -find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
> +find_optimal_iv_set_1 (struct ivopts_data *data, enum sel_type select_type)
> {
> struct iv_ca *set;
>
> /* Get the initial solution. */
> - set = get_initial_solution (data, originalp);
> + set = get_initial_solution (data, select_type);
> if (!set)
> {
> if (dump_file && (dump_flags & TDF_DETAILS))
> @@ -6095,25 +6171,72 @@ static struct iv_ca *
> return set;
> }
>
> +/* Attempts to find the optimal set of induction variables. Different
> + with find_optimal_iv_set_1 -- this function uses greedy heuristic
> + that firstly assigns the best candidate to each use, then tries to
> + prune the solution. */
> +
> static struct iv_ca *
> +find_optimal_iv_set_2 (struct ivopts_data *data, enum sel_type select_type)
> +{
> + struct iv_ca *set;
> +
> + /* Get the initial solution. */
> + set = get_initial_solution (data, select_type);
> + if (!set)
> + {
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
> + return NULL;
> + }
> +
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + {
> + fprintf (dump_file, "Initial set of candidates:\n");
> + iv_ca_dump (data, dump_file, set);
> + }
> +
> + while (try_prune_iv_set (data, set))
> + {
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + {
> + fprintf (dump_file, "Pruned to:\n");
> + iv_ca_dump (data, dump_file, set);
> + }
> + }
> +
> + return set;
> +}
> +
> +/* Find the optimal IV set by using two different heuristic strategies.
> + The first strategy starts with small IV solution, tries to replace at
> + most one candidate with others not in the current solution at one
> + time. The second strategy starts with large IV set, tries to replace
> + candidates with others in the current solution. */
> +
> +static struct iv_ca *
> find_optimal_iv_set (struct ivopts_data *data)
> {
> unsigned i;
> - struct iv_ca *set, *origset;
> + struct iv_ca *set, *origset, *pruneset;
> struct iv_use *use;
> - comp_cost cost, origcost;
> + comp_cost cost, origcost, prunecost;
>
> - /* Determine the cost based on a strategy that starts with original IVs,
> - and try again using a strategy that prefers candidates not based
> - on any IVs. */
> - origset = find_optimal_iv_set_1 (data, true);
> - set = find_optimal_iv_set_1 (data, false);
> + /* Try to find optimal IV set using the first strategy and starting
> + with original IVS. */
> + origset = find_optimal_iv_set_1 (data, SEL_ORIGINAL);
> + /* Try to find optimal IV set using the first strategy and starting
> + with important IVS. */
> + set = find_optimal_iv_set_1 (data, SEL_IMPORTANT);
> + /* Try to find optimal IV set using the second strategy. */
> + pruneset = find_optimal_iv_set_2 (data, SEL_RELATED);
>
> - if (!origset && !set)
> + if (!origset && !set && !pruneset)
> return NULL;
>
> origcost = origset ? iv_ca_cost (origset) : infinite_cost;
> cost = set ? iv_ca_cost (set) : infinite_cost;
> + prunecost = pruneset ? iv_ca_cost (pruneset) : infinite_cost;
>
> if (dump_file && (dump_flags & TDF_DETAILS))
> {
> @@ -6121,9 +6244,12 @@ find_optimal_iv_set (struct ivopts_data *data)
> origcost.cost, origcost.complexity);
> fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
> cost.cost, cost.complexity);
> + fprintf (dump_file, "Pruned cost %d (complexity %d)\n\n",
> + prunecost.cost, prunecost.complexity);
> }
>
> - /* Choose the one with the best cost. */
> + /* Choose the one with the best cost among the three candidates. */
> +
> if (compare_costs (origcost, cost) <= 0)
> {
> if (set)
> @@ -6133,6 +6259,15 @@ find_optimal_iv_set (struct ivopts_data *data)
> else if (origset)
> iv_ca_free (&origset);
>
> + if (compare_costs (prunecost, cost) < 0)
> + {
> + if (set)
> + iv_ca_free (&set);
> + set = pruneset;
> + }
> + else if (pruneset)
> + iv_ca_free (&pruneset);
> +
> for (i = 0; i < n_iv_uses (data); i++)
> {
> use = iv_use (data, i);