This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH GCC]Improve candidate selecting in IVOPT


Bin Cheng wrote:
> Hi,
> As analyzed in PR62178, IVOPT can't find the optimal iv set for that case.
> The problem with current heuristic algorithm is it only replaces candidate
> with ones not in current solution one by one, starting from small solution.
> This patch adds another heuristic which starts from assigning the best
> candidate for each iv use, then replaces candidate with ones in the current
> solution.
> Before this patch, there are two runs of find_optimal_set_1 to find the
> optimal iv sets, we name them as set_a and set_b.  After this patch we will
> have set_c.  At last, IVOPT chooses the best one from set_a/set_b/set_c.  To
> prove that this patch is necessary, I collected instrumental data for gcc
> bootstrap, spec2k, eembc and can confirm for some cases only the newly added
> heuristic can find the optimal iv set.  The number of these cases in which
> set_c is the optimal one is on the same level of set_b.
> As for the compilation time, the newly added function actually is one
> iteration of previous selection algorithm, it should be much faster than
> previous process.
> 
> I also added one target dependent test case.
> Bootstrap and test on x86_64, test on aarch64.  Any comments?

I verified that the patch fixes the performance regression on intmm.  I have
seen improvements to other benchmarks, and very small degradations that could
very well be noise.

Thanks for fixing this perf issue!
Sebastian

> 
> 2014-09-30  Bin Cheng  <bin.cheng@arm.com>
> 
> 	PR tree-optimization/62178
> 	* tree-ssa-loop-ivopts.c (enum sel_type): New.
> 	(iv_ca_add_use): Add parameter RELATED_P and find the best cand
> 	for iv use if it's true.
> 	(try_add_cand_for, get_initial_solution): Change paramter ORIGINALP
> 	to SELECT_TYPE and handle it.
> 	(find_optimal_iv_set_1): Ditto.
> 	(try_prune_iv_set, find_optimal_iv_set_2): New functions.
> 	(find_optimal_iv_set): Call find_optimal_iv_set_2 and choose the
> 	best candidate set.
> 
> gcc/testsuite/ChangeLog
> 2014-09-30  Bin Cheng  <bin.cheng@arm.com>
> 
> 	PR tree-optimization/62178
> 	* gcc.target/aarch64/pr62178.c: New test.

> Index: gcc/testsuite/gcc.target/aarch64/pr62178.c
> ===================================================================
> --- gcc/testsuite/gcc.target/aarch64/pr62178.c	(revision 0)
> +++ gcc/testsuite/gcc.target/aarch64/pr62178.c	(revision 0)
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1];
> +
> +void Intmm (int run) {
> +  int i, j, k;
> +
> +  for ( i = 1; i <= 30; i++ )
> +    for ( j = 1; j <= 30; j++ ) {
> +      r[i][j] = 0;
> +      for(k = 1; k <= 30; k++ )
> +        r[i][j] += a[i][k]*b[k][j];
> +    }
> +}
> +
> +/* { dg-final { scan-assembler "ld1r\\t\{v\[0-9\]+\."} } */
> Index: gcc/tree-ssa-loop-ivopts.c
> ===================================================================
> --- gcc/tree-ssa-loop-ivopts.c	(revision 215113)
> +++ gcc/tree-ssa-loop-ivopts.c	(working copy)
> @@ -254,6 +254,14 @@ struct iv_inv_expr_ent
>    hashval_t hash;
>  };
>  
> +/* Types used to start selecting the candidate for each IV use.  */
> +enum sel_type
> +{
> +  SEL_ORIGINAL,		/* Start selecting from original cands.  */
> +  SEL_IMPORTANT,	/* Start selecting from important cands.  */
> +  SEL_RELATED		/* Start selecting from related cands.  */
> +};
> +
>  /* The data used by the induction variable optimizations.  */
>  
>  typedef struct iv_use *iv_use_p;
> @@ -5417,22 +5425,51 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_
>  }
>  
>  /* Extend set IVS by expressing USE by some of the candidates in it
> -   if possible.  Consider all important candidates if candidates in
> -   set IVS don't give any result.  */
> +   if possible.  If RELATED_P is FALSE, consider all important
> +   candidates if candidates in set IVS don't give any result;
> +   otherwise, try to find the best one from related or all candidates,
> +   depending on consider_all_candidates.  */
>  
>  static void
>  iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
> -	       struct iv_use *use)
> +	       struct iv_use *use, bool related_p)
>  {
>    struct cost_pair *best_cp = NULL, *cp;
>    bitmap_iterator bi;
>    unsigned i;
>    struct iv_cand *cand;
>  
> -  gcc_assert (ivs->upto >= use->id);
> +  gcc_assert (ivs->upto == use->id);
>    ivs->upto++;
>    ivs->bad_uses++;
>  
> +  if (related_p)
> +    {
> +      if (data->consider_all_candidates)
> +	{
> +	  for (i = 0; i < n_iv_cands (data); i++)
> +	    {
> +	      cand = iv_cand (data, i);
> +	      cp = get_use_iv_cost (data, use, cand);
> +	      if (cheaper_cost_pair (cp, best_cp))
> +		best_cp = cp;
> +	    }
> +	}
> +      else
> +	{
> +	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
> +	    {
> +	      cand = iv_cand (data, i);
> +	      cp = get_use_iv_cost (data, use, cand);
> +	      if (cheaper_cost_pair (cp, best_cp))
> +		best_cp = cp;
> +            }
> +	}
> +
> +      iv_ca_set_cp (data, ivs, use, best_cp);
> +      return;
> +    }
> +
>    EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
>      {
>        cand = iv_cand (data, i);
> @@ -5440,7 +5477,7 @@ iv_ca_add_use (struct ivopts_data *data, struct iv
>        if (cheaper_cost_pair (cp, best_cp))
>  	best_cp = cp;
>      }
> -   
> +
>    if (best_cp == NULL)
>      {
>        EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
> @@ -5869,13 +5906,15 @@ iv_ca_prune (struct ivopts_data *data, struct iv_c
>  }
>  
>  /* Tries to extend the sets IVS in the best possible way in order
> -   to express the USE.  If ORIGINALP is true, prefer candidates from
> -   the original set of IVs, otherwise favor important candidates not
> -   based on any memory object.  */
> +   to express the USE.  If SELECT_TYPE is SEL_ORIGINAL, prefer
> +   candidates from the original set of IVs; if it's SEL_IMPORTANT,
> +   favor important candidates not based on any memory object;
> +   if it's SEL_RELATED, assign the best candidate to each use if
> +   possible.  */
>  
>  static bool
>  try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
> -		  struct iv_use *use, bool originalp)
> +		  struct iv_use *use, enum sel_type select_type)
>  {
>    comp_cost best_cost, act_cost;
>    unsigned i;
> @@ -5883,8 +5922,9 @@ try_add_cand_for (struct ivopts_data *data, struct
>    struct iv_cand *cand;
>    struct iv_ca_delta *best_delta = NULL, *act_delta;
>    struct cost_pair *cp;
> +  bool originalp = (select_type == SEL_ORIGINAL);
>  
> -  iv_ca_add_use (data, ivs, use);
> +  iv_ca_add_use (data, ivs, use, select_type == SEL_RELATED);
>    best_cost = iv_ca_cost (ivs);
>    cp = iv_ca_cand_for_use (ivs, use);
>    if (cp)
> @@ -5892,7 +5932,16 @@ try_add_cand_for (struct ivopts_data *data, struct
>        best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
>        iv_ca_set_no_cp (data, ivs, use);
>      }
> +  if (select_type == SEL_RELATED)
> +    {
> +      /* We should have selected the best candidate if possible.  */
> +      gcc_assert (cp != NULL || infinite_cost_p (best_cost));
> +      iv_ca_delta_commit (data, ivs, best_delta, true);
> +      iv_ca_delta_free (&best_delta);
>  
> +      return !infinite_cost_p (best_cost);
> +    }
> +
>    /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
>       first try important candidates not based on any memory object.  Only if
>       this fails, try the specific ones.  Rationale -- in loops with many
> @@ -5983,16 +6032,17 @@ try_add_cand_for (struct ivopts_data *data, struct
>    return !infinite_cost_p (best_cost);
>  }
>  
> -/* Finds an initial assignment of candidates to uses.  */
> +/* Finds an initial assignment of candidates to uses according to
> +   SELECT_TYPE.  */
>  
>  static struct iv_ca *
> -get_initial_solution (struct ivopts_data *data, bool originalp)
> +get_initial_solution (struct ivopts_data *data, enum sel_type select_type)
>  {
>    struct iv_ca *ivs = iv_ca_new (data);
>    unsigned i;
>  
>    for (i = 0; i < n_iv_uses (data); i++)
> -    if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
> +    if (!try_add_cand_for (data, ivs, iv_use (data, i), select_type))
>        {
>  	iv_ca_free (&ivs);
>  	return NULL;
> @@ -6059,17 +6109,43 @@ try_improve_iv_set (struct ivopts_data *data, stru
>    return true;
>  }
>  
> +/* Tries to prune set of induction variables IVS.  */
> +
> +static bool
> +try_prune_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
> +{
> +  comp_cost new_cost, old_cost;
> +  struct iv_ca_delta *delta = NULL;
> +
> +  if (iv_ca_n_cands (ivs) > ALWAYS_PRUNE_CAND_SET_BOUND)
> +    return false;
> +
> +  old_cost = iv_ca_cost (ivs);
> +  /* Try pruning the candidates from the set.  */
> +  new_cost = iv_ca_prune (data, ivs, NULL, &delta);
> +
> +  /* Nothing more we can do.  */
> +  if (!delta
> +      || compare_costs (new_cost, old_cost) >= 0)
> +    return false;
> +
> +  iv_ca_delta_commit (data, ivs, delta, true);
> +  gcc_assert (compare_costs (new_cost, iv_ca_cost (ivs)) == 0);
> +  iv_ca_delta_free (&delta);
> +  return true;
> +}
> +
>  /* Attempts to find the optimal set of induction variables.  We do simple
>     greedy heuristic -- we try to replace at most one candidate in the selected
>     solution and remove the unused ivs while this improves the cost.  */
>  
>  static struct iv_ca *
> -find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
> +find_optimal_iv_set_1 (struct ivopts_data *data, enum sel_type select_type)
>  {
>    struct iv_ca *set;
>  
>    /* Get the initial solution.  */
> -  set = get_initial_solution (data, originalp);
> +  set = get_initial_solution (data, select_type);
>    if (!set)
>      {
>        if (dump_file && (dump_flags & TDF_DETAILS))
> @@ -6095,25 +6171,72 @@ static struct iv_ca *
>    return set;
>  }
>  
> +/* Attempts to find the optimal set of induction variables.  Different
> +   with find_optimal_iv_set_1 -- this function uses greedy heuristic
> +   that firstly assigns the best candidate to each use, then tries to
> +   prune the solution.  */
> +
>  static struct iv_ca *
> +find_optimal_iv_set_2 (struct ivopts_data *data, enum sel_type select_type)
> +{
> +  struct iv_ca *set;
> +
> +  /* Get the initial solution.  */
> +  set = get_initial_solution (data, select_type);
> +  if (!set)
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
> +      return NULL;
> +    }
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "Initial set of candidates:\n");
> +      iv_ca_dump (data, dump_file, set);
> +    }
> +
> +  while (try_prune_iv_set (data, set))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +	{
> +	  fprintf (dump_file, "Pruned to:\n");
> +	  iv_ca_dump (data, dump_file, set);
> +	}
> +    }
> +
> +  return set;
> +}
> +
> +/* Find the optimal IV set by using two different heuristic strategies.
> +   The first strategy starts with small IV solution, tries to replace at
> +   most one candidate with others not in the current solution at one
> +   time.  The second strategy starts with large IV set, tries to replace
> +   candidates with others in the current solution.  */
> +
> +static struct iv_ca *
>  find_optimal_iv_set (struct ivopts_data *data)
>  {
>    unsigned i;
> -  struct iv_ca *set, *origset;
> +  struct iv_ca *set, *origset, *pruneset;
>    struct iv_use *use;
> -  comp_cost cost, origcost;
> +  comp_cost cost, origcost, prunecost;
>  
> -  /* Determine the cost based on a strategy that starts with original IVs,
> -     and try again using a strategy that prefers candidates not based
> -     on any IVs.  */
> -  origset = find_optimal_iv_set_1 (data, true);
> -  set = find_optimal_iv_set_1 (data, false);
> +  /* Try to find optimal IV set using the first strategy and starting
> +     with original IVS.  */
> +  origset = find_optimal_iv_set_1 (data, SEL_ORIGINAL);
> +  /* Try to find optimal IV set using the first strategy and starting
> +     with important IVS.  */
> +  set = find_optimal_iv_set_1 (data, SEL_IMPORTANT);
> +  /* Try to find optimal IV set using the second strategy.  */
> +  pruneset = find_optimal_iv_set_2 (data, SEL_RELATED);
>  
> -  if (!origset && !set)
> +  if (!origset && !set && !pruneset)
>      return NULL;
>  
>    origcost = origset ? iv_ca_cost (origset) : infinite_cost;
>    cost = set ? iv_ca_cost (set) : infinite_cost;
> +  prunecost = pruneset ? iv_ca_cost (pruneset) : infinite_cost;
>  
>    if (dump_file && (dump_flags & TDF_DETAILS))
>      {
> @@ -6121,9 +6244,12 @@ find_optimal_iv_set (struct ivopts_data *data)
>  	       origcost.cost, origcost.complexity);
>        fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
>  	       cost.cost, cost.complexity);
> +      fprintf (dump_file, "Pruned cost %d (complexity %d)\n\n",
> +	       prunecost.cost, prunecost.complexity);
>      }
>  
> -  /* Choose the one with the best cost.  */
> +  /* Choose the one with the best cost among the three candidates.  */
> +
>    if (compare_costs (origcost, cost) <= 0)
>      {
>        if (set)
> @@ -6133,6 +6259,15 @@ find_optimal_iv_set (struct ivopts_data *data)
>    else if (origset)
>      iv_ca_free (&origset);
>  
> +  if (compare_costs (prunecost, cost) < 0)
> +    {
> +      if (set)
> +	iv_ca_free (&set);
> +      set = pruneset;
> +    }
> +  else if (pruneset)
> +    iv_ca_free (&pruneset);
> +
>    for (i = 0; i < n_iv_uses (data); i++)
>      {
>        use = iv_use (data, i);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]