[PATCH GCC]Improve candidate selecting in IVOPT

Bin.Cheng amker.cheng@gmail.com
Wed Oct 8 03:37:00 GMT 2014


Ping.  Any review comments?

Thanks,
bin

On Wed, Oct 1, 2014 at 6:31 AM, Sebastian Pop <sebpop@gmail.com> wrote:
> Bin Cheng wrote:
>> Hi,
>> As analyzed in PR62178, IVOPT can't find the optimal iv set for that case.
>> The problem with current heuristic algorithm is it only replaces candidate
>> with ones not in current solution one by one, starting from small solution.
>> This patch adds another heuristic which starts from assigning the best
>> candidate for each iv use, then replaces candidate with ones in the current
>> solution.
>> Before this patch, there are two runs of find_optimal_set_1 to find the
>> optimal iv sets, we name them as set_a and set_b.  After this patch we will
>> have set_c.  At last, IVOPT chooses the best one from set_a/set_b/set_c.  To
>> prove that this patch is necessary, I collected instrumental data for gcc
>> bootstrap, spec2k, eembc and can confirm for some cases only the newly added
>> heuristic can find the optimal iv set.  The number of these cases in which
>> set_c is the optimal one is on the same level of set_b.
>> As for the compilation time, the newly added function actually is one
>> iteration of previous selection algorithm, it should be much faster than
>> previous process.
>>
>> I also added one target dependent test case.
>> Bootstrap and test on x86_64, test on aarch64.  Any comments?
>
> I verified that the patch fixes the performance regression on intmm.  I have
> seen improvements to other benchmarks, and very small degradations that could
> very well be noise.
>
> Thanks for fixing this perf issue!
> Sebastian
>
>>
>> 2014-09-30  Bin Cheng  <bin.cheng@arm.com>
>>
>>       PR tree-optimization/62178
>>       * tree-ssa-loop-ivopts.c (enum sel_type): New.
>>       (iv_ca_add_use): Add parameter RELATED_P and find the best cand
>>       for iv use if it's true.
>>       (try_add_cand_for, get_initial_solution): Change paramter ORIGINALP
>>       to SELECT_TYPE and handle it.
>>       (find_optimal_iv_set_1): Ditto.
>>       (try_prune_iv_set, find_optimal_iv_set_2): New functions.
>>       (find_optimal_iv_set): Call find_optimal_iv_set_2 and choose the
>>       best candidate set.
>>
>> gcc/testsuite/ChangeLog
>> 2014-09-30  Bin Cheng  <bin.cheng@arm.com>
>>
>>       PR tree-optimization/62178
>>       * gcc.target/aarch64/pr62178.c: New test.
>
>> Index: gcc/testsuite/gcc.target/aarch64/pr62178.c
>> ===================================================================
>> --- gcc/testsuite/gcc.target/aarch64/pr62178.c        (revision 0)
>> +++ gcc/testsuite/gcc.target/aarch64/pr62178.c        (revision 0)
>> @@ -0,0 +1,17 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3" } */
>> +
>> +int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1];
>> +
>> +void Intmm (int run) {
>> +  int i, j, k;
>> +
>> +  for ( i = 1; i <= 30; i++ )
>> +    for ( j = 1; j <= 30; j++ ) {
>> +      r[i][j] = 0;
>> +      for(k = 1; k <= 30; k++ )
>> +        r[i][j] += a[i][k]*b[k][j];
>> +    }
>> +}
>> +
>> +/* { dg-final { scan-assembler "ld1r\\t\{v\[0-9\]+\."} } */
>> Index: gcc/tree-ssa-loop-ivopts.c
>> ===================================================================
>> --- gcc/tree-ssa-loop-ivopts.c        (revision 215113)
>> +++ gcc/tree-ssa-loop-ivopts.c        (working copy)
>> @@ -254,6 +254,14 @@ struct iv_inv_expr_ent
>>    hashval_t hash;
>>  };
>>
>> +/* Types used to start selecting the candidate for each IV use.  */
>> +enum sel_type
>> +{
>> +  SEL_ORIGINAL,              /* Start selecting from original cands.  */
>> +  SEL_IMPORTANT,     /* Start selecting from important cands.  */
>> +  SEL_RELATED                /* Start selecting from related cands.  */
>> +};
>> +
>>  /* The data used by the induction variable optimizations.  */
>>
>>  typedef struct iv_use *iv_use_p;
>> @@ -5417,22 +5425,51 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_
>>  }
>>
>>  /* Extend set IVS by expressing USE by some of the candidates in it
>> -   if possible.  Consider all important candidates if candidates in
>> -   set IVS don't give any result.  */
>> +   if possible.  If RELATED_P is FALSE, consider all important
>> +   candidates if candidates in set IVS don't give any result;
>> +   otherwise, try to find the best one from related or all candidates,
>> +   depending on consider_all_candidates.  */
>>
>>  static void
>>  iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
>> -            struct iv_use *use)
>> +            struct iv_use *use, bool related_p)
>>  {
>>    struct cost_pair *best_cp = NULL, *cp;
>>    bitmap_iterator bi;
>>    unsigned i;
>>    struct iv_cand *cand;
>>
>> -  gcc_assert (ivs->upto >= use->id);
>> +  gcc_assert (ivs->upto == use->id);
>>    ivs->upto++;
>>    ivs->bad_uses++;
>>
>> +  if (related_p)
>> +    {
>> +      if (data->consider_all_candidates)
>> +     {
>> +       for (i = 0; i < n_iv_cands (data); i++)
>> +         {
>> +           cand = iv_cand (data, i);
>> +           cp = get_use_iv_cost (data, use, cand);
>> +           if (cheaper_cost_pair (cp, best_cp))
>> +             best_cp = cp;
>> +         }
>> +     }
>> +      else
>> +     {
>> +       EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
>> +         {
>> +           cand = iv_cand (data, i);
>> +           cp = get_use_iv_cost (data, use, cand);
>> +           if (cheaper_cost_pair (cp, best_cp))
>> +             best_cp = cp;
>> +            }
>> +     }
>> +
>> +      iv_ca_set_cp (data, ivs, use, best_cp);
>> +      return;
>> +    }
>> +
>>    EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
>>      {
>>        cand = iv_cand (data, i);
>> @@ -5440,7 +5477,7 @@ iv_ca_add_use (struct ivopts_data *data, struct iv
>>        if (cheaper_cost_pair (cp, best_cp))
>>       best_cp = cp;
>>      }
>> -
>> +
>>    if (best_cp == NULL)
>>      {
>>        EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
>> @@ -5869,13 +5906,15 @@ iv_ca_prune (struct ivopts_data *data, struct iv_c
>>  }
>>
>>  /* Tries to extend the sets IVS in the best possible way in order
>> -   to express the USE.  If ORIGINALP is true, prefer candidates from
>> -   the original set of IVs, otherwise favor important candidates not
>> -   based on any memory object.  */
>> +   to express the USE.  If SELECT_TYPE is SEL_ORIGINAL, prefer
>> +   candidates from the original set of IVs; if it's SEL_IMPORTANT,
>> +   favor important candidates not based on any memory object;
>> +   if it's SEL_RELATED, assign the best candidate to each use if
>> +   possible.  */
>>
>>  static bool
>>  try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
>> -               struct iv_use *use, bool originalp)
>> +               struct iv_use *use, enum sel_type select_type)
>>  {
>>    comp_cost best_cost, act_cost;
>>    unsigned i;
>> @@ -5883,8 +5922,9 @@ try_add_cand_for (struct ivopts_data *data, struct
>>    struct iv_cand *cand;
>>    struct iv_ca_delta *best_delta = NULL, *act_delta;
>>    struct cost_pair *cp;
>> +  bool originalp = (select_type == SEL_ORIGINAL);
>>
>> -  iv_ca_add_use (data, ivs, use);
>> +  iv_ca_add_use (data, ivs, use, select_type == SEL_RELATED);
>>    best_cost = iv_ca_cost (ivs);
>>    cp = iv_ca_cand_for_use (ivs, use);
>>    if (cp)
>> @@ -5892,7 +5932,16 @@ try_add_cand_for (struct ivopts_data *data, struct
>>        best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
>>        iv_ca_set_no_cp (data, ivs, use);
>>      }
>> +  if (select_type == SEL_RELATED)
>> +    {
>> +      /* We should have selected the best candidate if possible.  */
>> +      gcc_assert (cp != NULL || infinite_cost_p (best_cost));
>> +      iv_ca_delta_commit (data, ivs, best_delta, true);
>> +      iv_ca_delta_free (&best_delta);
>>
>> +      return !infinite_cost_p (best_cost);
>> +    }
>> +
>>    /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
>>       first try important candidates not based on any memory object.  Only if
>>       this fails, try the specific ones.  Rationale -- in loops with many
>> @@ -5983,16 +6032,17 @@ try_add_cand_for (struct ivopts_data *data, struct
>>    return !infinite_cost_p (best_cost);
>>  }
>>
>> -/* Finds an initial assignment of candidates to uses.  */
>> +/* Finds an initial assignment of candidates to uses according to
>> +   SELECT_TYPE.  */
>>
>>  static struct iv_ca *
>> -get_initial_solution (struct ivopts_data *data, bool originalp)
>> +get_initial_solution (struct ivopts_data *data, enum sel_type select_type)
>>  {
>>    struct iv_ca *ivs = iv_ca_new (data);
>>    unsigned i;
>>
>>    for (i = 0; i < n_iv_uses (data); i++)
>> -    if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
>> +    if (!try_add_cand_for (data, ivs, iv_use (data, i), select_type))
>>        {
>>       iv_ca_free (&ivs);
>>       return NULL;
>> @@ -6059,17 +6109,43 @@ try_improve_iv_set (struct ivopts_data *data, stru
>>    return true;
>>  }
>>
>> +/* Tries to prune set of induction variables IVS.  */
>> +
>> +static bool
>> +try_prune_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
>> +{
>> +  comp_cost new_cost, old_cost;
>> +  struct iv_ca_delta *delta = NULL;
>> +
>> +  if (iv_ca_n_cands (ivs) > ALWAYS_PRUNE_CAND_SET_BOUND)
>> +    return false;
>> +
>> +  old_cost = iv_ca_cost (ivs);
>> +  /* Try pruning the candidates from the set.  */
>> +  new_cost = iv_ca_prune (data, ivs, NULL, &delta);
>> +
>> +  /* Nothing more we can do.  */
>> +  if (!delta
>> +      || compare_costs (new_cost, old_cost) >= 0)
>> +    return false;
>> +
>> +  iv_ca_delta_commit (data, ivs, delta, true);
>> +  gcc_assert (compare_costs (new_cost, iv_ca_cost (ivs)) == 0);
>> +  iv_ca_delta_free (&delta);
>> +  return true;
>> +}
>> +
>>  /* Attempts to find the optimal set of induction variables.  We do simple
>>     greedy heuristic -- we try to replace at most one candidate in the selected
>>     solution and remove the unused ivs while this improves the cost.  */
>>
>>  static struct iv_ca *
>> -find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
>> +find_optimal_iv_set_1 (struct ivopts_data *data, enum sel_type select_type)
>>  {
>>    struct iv_ca *set;
>>
>>    /* Get the initial solution.  */
>> -  set = get_initial_solution (data, originalp);
>> +  set = get_initial_solution (data, select_type);
>>    if (!set)
>>      {
>>        if (dump_file && (dump_flags & TDF_DETAILS))
>> @@ -6095,25 +6171,72 @@ static struct iv_ca *
>>    return set;
>>  }
>>
>> +/* Attempts to find the optimal set of induction variables.  Different
>> +   with find_optimal_iv_set_1 -- this function uses greedy heuristic
>> +   that firstly assigns the best candidate to each use, then tries to
>> +   prune the solution.  */
>> +
>>  static struct iv_ca *
>> +find_optimal_iv_set_2 (struct ivopts_data *data, enum sel_type select_type)
>> +{
>> +  struct iv_ca *set;
>> +
>> +  /* Get the initial solution.  */
>> +  set = get_initial_solution (data, select_type);
>> +  if (!set)
>> +    {
>> +      if (dump_file && (dump_flags & TDF_DETAILS))
>> +     fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
>> +      return NULL;
>> +    }
>> +
>> +  if (dump_file && (dump_flags & TDF_DETAILS))
>> +    {
>> +      fprintf (dump_file, "Initial set of candidates:\n");
>> +      iv_ca_dump (data, dump_file, set);
>> +    }
>> +
>> +  while (try_prune_iv_set (data, set))
>> +    {
>> +      if (dump_file && (dump_flags & TDF_DETAILS))
>> +     {
>> +       fprintf (dump_file, "Pruned to:\n");
>> +       iv_ca_dump (data, dump_file, set);
>> +     }
>> +    }
>> +
>> +  return set;
>> +}
>> +
>> +/* Find the optimal IV set by using two different heuristic strategies.
>> +   The first strategy starts with small IV solution, tries to replace at
>> +   most one candidate with others not in the current solution at one
>> +   time.  The second strategy starts with large IV set, tries to replace
>> +   candidates with others in the current solution.  */
>> +
>> +static struct iv_ca *
>>  find_optimal_iv_set (struct ivopts_data *data)
>>  {
>>    unsigned i;
>> -  struct iv_ca *set, *origset;
>> +  struct iv_ca *set, *origset, *pruneset;
>>    struct iv_use *use;
>> -  comp_cost cost, origcost;
>> +  comp_cost cost, origcost, prunecost;
>>
>> -  /* Determine the cost based on a strategy that starts with original IVs,
>> -     and try again using a strategy that prefers candidates not based
>> -     on any IVs.  */
>> -  origset = find_optimal_iv_set_1 (data, true);
>> -  set = find_optimal_iv_set_1 (data, false);
>> +  /* Try to find optimal IV set using the first strategy and starting
>> +     with original IVS.  */
>> +  origset = find_optimal_iv_set_1 (data, SEL_ORIGINAL);
>> +  /* Try to find optimal IV set using the first strategy and starting
>> +     with important IVS.  */
>> +  set = find_optimal_iv_set_1 (data, SEL_IMPORTANT);
>> +  /* Try to find optimal IV set using the second strategy.  */
>> +  pruneset = find_optimal_iv_set_2 (data, SEL_RELATED);
>>
>> -  if (!origset && !set)
>> +  if (!origset && !set && !pruneset)
>>      return NULL;
>>
>>    origcost = origset ? iv_ca_cost (origset) : infinite_cost;
>>    cost = set ? iv_ca_cost (set) : infinite_cost;
>> +  prunecost = pruneset ? iv_ca_cost (pruneset) : infinite_cost;
>>
>>    if (dump_file && (dump_flags & TDF_DETAILS))
>>      {
>> @@ -6121,9 +6244,12 @@ find_optimal_iv_set (struct ivopts_data *data)
>>              origcost.cost, origcost.complexity);
>>        fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
>>              cost.cost, cost.complexity);
>> +      fprintf (dump_file, "Pruned cost %d (complexity %d)\n\n",
>> +            prunecost.cost, prunecost.complexity);
>>      }
>>
>> -  /* Choose the one with the best cost.  */
>> +  /* Choose the one with the best cost among the three candidates.  */
>> +
>>    if (compare_costs (origcost, cost) <= 0)
>>      {
>>        if (set)
>> @@ -6133,6 +6259,15 @@ find_optimal_iv_set (struct ivopts_data *data)
>>    else if (origset)
>>      iv_ca_free (&origset);
>>
>> +  if (compare_costs (prunecost, cost) < 0)
>> +    {
>> +      if (set)
>> +     iv_ca_free (&set);
>> +      set = pruneset;
>> +    }
>> +  else if (pruneset)
>> +    iv_ca_free (&pruneset);
>> +
>>    for (i = 0; i < n_iv_uses (data); i++)
>>      {
>>        use = iv_use (data, i);
>



More information about the Gcc-patches mailing list