Index: gcc/testsuite/gcc.target/aarch64/pr62178.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/pr62178.c (revision 0) +++ gcc/testsuite/gcc.target/aarch64/pr62178.c (revision 0) @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1]; + +void Intmm (int run) { + int i, j, k; + + for ( i = 1; i <= 30; i++ ) + for ( j = 1; j <= 30; j++ ) { + r[i][j] = 0; + for(k = 1; k <= 30; k++ ) + r[i][j] += a[i][k]*b[k][j]; + } +} + +/* { dg-final { scan-assembler "ld1r\\t\{v\[0-9\]+\."} } */ Index: gcc/tree-ssa-loop-ivopts.c =================================================================== --- gcc/tree-ssa-loop-ivopts.c (revision 215113) +++ gcc/tree-ssa-loop-ivopts.c (working copy) @@ -254,6 +254,14 @@ struct iv_inv_expr_ent hashval_t hash; }; +/* Types used to start selecting the candidate for each IV use. */ +enum sel_type +{ + SEL_ORIGINAL, /* Start selecting from original cands. */ + SEL_IMPORTANT, /* Start selecting from important cands. */ + SEL_RELATED /* Start selecting from related cands. */ +}; + /* The data used by the induction variable optimizations. */ typedef struct iv_use *iv_use_p; @@ -5417,22 +5425,51 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_ } /* Extend set IVS by expressing USE by some of the candidates in it - if possible. Consider all important candidates if candidates in - set IVS don't give any result. */ + if possible. If RELATED_P is FALSE, consider all important + candidates if candidates in set IVS don't give any result; + otherwise, try to find the best one from related or all candidates, + depending on consider_all_candidates. */ static void iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, - struct iv_use *use) + struct iv_use *use, bool related_p) { struct cost_pair *best_cp = NULL, *cp; bitmap_iterator bi; unsigned i; struct iv_cand *cand; - gcc_assert (ivs->upto >= use->id); + gcc_assert (ivs->upto == use->id); ivs->upto++; ivs->bad_uses++; + if (related_p) + { + if (data->consider_all_candidates) + { + for (i = 0; i < n_iv_cands (data); i++) + { + cand = iv_cand (data, i); + cp = get_use_iv_cost (data, use, cand); + if (cheaper_cost_pair (cp, best_cp)) + best_cp = cp; + } + } + else + { + EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi) + { + cand = iv_cand (data, i); + cp = get_use_iv_cost (data, use, cand); + if (cheaper_cost_pair (cp, best_cp)) + best_cp = cp; + } + } + + iv_ca_set_cp (data, ivs, use, best_cp); + return; + } + EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) { cand = iv_cand (data, i); @@ -5440,7 +5477,7 @@ iv_ca_add_use (struct ivopts_data *data, struct iv if (cheaper_cost_pair (cp, best_cp)) best_cp = cp; } - + if (best_cp == NULL) { EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) @@ -5869,13 +5906,15 @@ iv_ca_prune (struct ivopts_data *data, struct iv_c } /* Tries to extend the sets IVS in the best possible way in order - to express the USE. If ORIGINALP is true, prefer candidates from - the original set of IVs, otherwise favor important candidates not - based on any memory object. */ + to express the USE. If SELECT_TYPE is SEL_ORIGINAL, prefer + candidates from the original set of IVs; if it's SEL_IMPORTANT, + favor important candidates not based on any memory object; + if it's SEL_RELATED, assign the best candidate to each use if + possible. */ static bool try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, - struct iv_use *use, bool originalp) + struct iv_use *use, enum sel_type select_type) { comp_cost best_cost, act_cost; unsigned i; @@ -5883,8 +5922,9 @@ try_add_cand_for (struct ivopts_data *data, struct struct iv_cand *cand; struct iv_ca_delta *best_delta = NULL, *act_delta; struct cost_pair *cp; + bool originalp = (select_type == SEL_ORIGINAL); - iv_ca_add_use (data, ivs, use); + iv_ca_add_use (data, ivs, use, select_type == SEL_RELATED); best_cost = iv_ca_cost (ivs); cp = iv_ca_cand_for_use (ivs, use); if (cp) @@ -5892,7 +5932,16 @@ try_add_cand_for (struct ivopts_data *data, struct best_delta = iv_ca_delta_add (use, NULL, cp, NULL); iv_ca_set_no_cp (data, ivs, use); } + if (select_type == SEL_RELATED) + { + /* We should have selected the best candidate if possible. */ + gcc_assert (cp != NULL || infinite_cost_p (best_cost)); + iv_ca_delta_commit (data, ivs, best_delta, true); + iv_ca_delta_free (&best_delta); + return !infinite_cost_p (best_cost); + } + /* If ORIGINALP is true, try to find the original IV for the use. Otherwise first try important candidates not based on any memory object. Only if this fails, try the specific ones. Rationale -- in loops with many @@ -5983,16 +6032,17 @@ try_add_cand_for (struct ivopts_data *data, struct return !infinite_cost_p (best_cost); } -/* Finds an initial assignment of candidates to uses. */ +/* Finds an initial assignment of candidates to uses according to + SELECT_TYPE. */ static struct iv_ca * -get_initial_solution (struct ivopts_data *data, bool originalp) +get_initial_solution (struct ivopts_data *data, enum sel_type select_type) { struct iv_ca *ivs = iv_ca_new (data); unsigned i; for (i = 0; i < n_iv_uses (data); i++) - if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp)) + if (!try_add_cand_for (data, ivs, iv_use (data, i), select_type)) { iv_ca_free (&ivs); return NULL; @@ -6059,17 +6109,43 @@ try_improve_iv_set (struct ivopts_data *data, stru return true; } +/* Tries to prune set of induction variables IVS. */ + +static bool +try_prune_iv_set (struct ivopts_data *data, struct iv_ca *ivs) +{ + comp_cost new_cost, old_cost; + struct iv_ca_delta *delta = NULL; + + if (iv_ca_n_cands (ivs) > ALWAYS_PRUNE_CAND_SET_BOUND) + return false; + + old_cost = iv_ca_cost (ivs); + /* Try pruning the candidates from the set. */ + new_cost = iv_ca_prune (data, ivs, NULL, &delta); + + /* Nothing more we can do. */ + if (!delta + || compare_costs (new_cost, old_cost) >= 0) + return false; + + iv_ca_delta_commit (data, ivs, delta, true); + gcc_assert (compare_costs (new_cost, iv_ca_cost (ivs)) == 0); + iv_ca_delta_free (&delta); + return true; +} + /* Attempts to find the optimal set of induction variables. We do simple greedy heuristic -- we try to replace at most one candidate in the selected solution and remove the unused ivs while this improves the cost. */ static struct iv_ca * -find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) +find_optimal_iv_set_1 (struct ivopts_data *data, enum sel_type select_type) { struct iv_ca *set; /* Get the initial solution. */ - set = get_initial_solution (data, originalp); + set = get_initial_solution (data, select_type); if (!set) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -6095,25 +6171,72 @@ static struct iv_ca * return set; } +/* Attempts to find the optimal set of induction variables. Different + with find_optimal_iv_set_1 -- this function uses greedy heuristic + that firstly assigns the best candidate to each use, then tries to + prune the solution. */ + static struct iv_ca * +find_optimal_iv_set_2 (struct ivopts_data *data, enum sel_type select_type) +{ + struct iv_ca *set; + + /* Get the initial solution. */ + set = get_initial_solution (data, select_type); + if (!set) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); + return NULL; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Initial set of candidates:\n"); + iv_ca_dump (data, dump_file, set); + } + + while (try_prune_iv_set (data, set)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Pruned to:\n"); + iv_ca_dump (data, dump_file, set); + } + } + + return set; +} + +/* Find the optimal IV set by using two different heuristic strategies. + The first strategy starts with small IV solution, tries to replace at + most one candidate with others not in the current solution at one + time. The second strategy starts with large IV set, tries to replace + candidates with others in the current solution. */ + +static struct iv_ca * find_optimal_iv_set (struct ivopts_data *data) { unsigned i; - struct iv_ca *set, *origset; + struct iv_ca *set, *origset, *pruneset; struct iv_use *use; - comp_cost cost, origcost; + comp_cost cost, origcost, prunecost; - /* Determine the cost based on a strategy that starts with original IVs, - and try again using a strategy that prefers candidates not based - on any IVs. */ - origset = find_optimal_iv_set_1 (data, true); - set = find_optimal_iv_set_1 (data, false); + /* Try to find optimal IV set using the first strategy and starting + with original IVS. */ + origset = find_optimal_iv_set_1 (data, SEL_ORIGINAL); + /* Try to find optimal IV set using the first strategy and starting + with important IVS. */ + set = find_optimal_iv_set_1 (data, SEL_IMPORTANT); + /* Try to find optimal IV set using the second strategy. */ + pruneset = find_optimal_iv_set_2 (data, SEL_RELATED); - if (!origset && !set) + if (!origset && !set && !pruneset) return NULL; origcost = origset ? iv_ca_cost (origset) : infinite_cost; cost = set ? iv_ca_cost (set) : infinite_cost; + prunecost = pruneset ? iv_ca_cost (pruneset) : infinite_cost; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -6121,9 +6244,12 @@ find_optimal_iv_set (struct ivopts_data *data) origcost.cost, origcost.complexity); fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity); + fprintf (dump_file, "Pruned cost %d (complexity %d)\n\n", + prunecost.cost, prunecost.complexity); } - /* Choose the one with the best cost. */ + /* Choose the one with the best cost among the three candidates. */ + if (compare_costs (origcost, cost) <= 0) { if (set) @@ -6133,6 +6259,15 @@ find_optimal_iv_set (struct ivopts_data *data) else if (origset) iv_ca_free (&origset); + if (compare_costs (prunecost, cost) < 0) + { + if (set) + iv_ca_free (&set); + set = pruneset; + } + else if (pruneset) + iv_ca_free (&pruneset); + for (i = 0; i < n_iv_uses (data); i++) { use = iv_use (data, i);