[PATCH] Split vector loop analysis into main and epilogue analysis
Richard Sandiford
richard.sandiford@arm.com
Fri Nov 5 11:55:06 GMT 2021
Richard Biener <rguenther@suse.de> writes:
> As discussed this splits the analysis loop into two, first settling
> on a vector mode used for the main loop and only then analyzing
> the epilogue of that for possible vectorization. That makes it
> easier to put in support for unrolled main loops.
>
> On the way I've realized some cleanup opportunities, namely caching
> n_stmts in vec_info_shared (it's computed by dataref analysis)
> avoiding to pass that around and setting/clearing loop->aux
> during analysis - try_vectorize_loop_1 will ultimatively set it
> on those we vectorize.
>
> This also gets rid of the previously introduced callback in
> vect_analyze_loop_1 in favor of making that advance the mode iterator.
> I'm now pushing VOIDmode explicitely into the vector_modes array
> which makes the re-start on the epilogue side a bit more
> straight-forward. Note that will now use auto-detection of the
> vector mode in case the main loop used it and we want to try
> LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P and the first mode from
> the target array if not. I've added a comment that says we may
> want to make sure we don't try vectorizing the epilogue with a
> bigger vector size than the main loop but the situation isn't
> very likely to appear in practice I guess (and it was also present
> before this change).
>
> In principle this change should not change vectorization decisions
> but the way we handled re-analyzing epilogues as main loops makes
> me only 99% sure that it does.
>
> Bootstrapped and tested on x86_64-unkown-linux-gnu.
Comments inline.
>
> OK?
>
> Thanks,
> Richard.
>
> 2021-11-05 Richard Biener <rguenther@suse.de>
>
> * tree-vectorizer.h (vec_info_shared::n_stmts): Add.
> (LOOP_VINFO_N_STMTS): Likewise.
> (vec_info_for_bb): Remove unused function.
> * tree-vectorizer.c (vec_info_shared::vec_info_shared):
> Initialize n_stmts member.
> * tree-vect-loop.c: Remove INCLUDE_FUNCTIONAL.
> (vect_create_loop_vinfo): Do not set loop->aux.
> (vect_analyze_loop_2): Do not get n_stmts as argument,
> instead use LOOP_VINFO_N_STMTS. Set LOOP_VINFO_VECTORIZABLE_P
> here.
> (vect_analyze_loop_1): Remove callback, get the mode iterator
> and autodetected_vector_mode as argument, advancing the
> iterator and initializing autodetected_vector_mode here.
> (vect_analyze_loop): Split analysis loop into two, first
> processing main loops only and then epilogues.
> ---
> gcc/tree-vect-loop.c | 415 +++++++++++++++++++++---------------------
> gcc/tree-vectorizer.c | 3 +-
> gcc/tree-vectorizer.h | 10 +-
> 3 files changed, 212 insertions(+), 216 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 13a53436729..abf87f99d6d 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -20,7 +20,6 @@ along with GCC; see the file COPYING3. If not see
> <http://www.gnu.org/licenses/>. */
>
> #define INCLUDE_ALGORITHM
> -#define INCLUDE_FUNCTIONAL
> #include "config.h"
> #include "system.h"
> #include "coretypes.h"
> @@ -1520,8 +1519,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
> = wi::smin (nit, param_vect_inner_loop_cost_factor).to_uhwi ();
> }
>
> - gcc_assert (!loop->aux);
> - loop->aux = loop_vinfo;
> return loop_vinfo;
> }
>
> @@ -2209,7 +2206,7 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo,
> for it. The different analyses will record information in the
> loop_vec_info struct. */
> static opt_result
> -vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
> +vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
> {
> opt_result ok = opt_result::success ();
> int res;
> @@ -2244,7 +2241,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
> opt_result res
> = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo),
> &LOOP_VINFO_DATAREFS (loop_vinfo),
> - n_stmts);
> + &LOOP_VINFO_N_STMTS (loop_vinfo));
> if (!res)
> {
> if (dump_enabled_p ())
> @@ -2341,7 +2338,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
> poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
>
> /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
> - ok = vect_analyze_slp (loop_vinfo, *n_stmts);
> + ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
> if (!ok)
> return ok;
>
> @@ -2641,6 +2638,7 @@ start_over:
> LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
>
> /* Ok to vectorize! */
> + LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
> return opt_result::success ();
>
> again:
> @@ -2891,46 +2889,70 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
> return true;
> }
>
> -/* Analyze LOOP with VECTOR_MODE and as epilogue if MAIN_LOOP_VINFO is
> - not NULL. Process the analyzed loop with PROCESS even if analysis
> - failed. Sets *N_STMTS and FATAL according to the analysis.
> +/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is
> + not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
> + MODE_I to the next mode useful to analyze.
> Return the loop_vinfo on success and wrapped null on failure. */
>
> static opt_loop_vec_info
> vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
> const vect_loop_form_info *loop_form_info,
> - machine_mode vector_mode, loop_vec_info main_loop_vinfo,
> - unsigned int *n_stmts, bool &fatal,
> - std::function<void(loop_vec_info)> process = nullptr)
> + loop_vec_info main_loop_vinfo,
> + const vector_modes &vector_modes, unsigned &mode_i,
> + machine_mode &autodetected_vector_mode,
> + bool &fatal)
> {
> loop_vec_info loop_vinfo
> = vect_create_loop_vinfo (loop, shared, loop_form_info);
> - loop_vinfo->vector_mode = vector_mode;
> -
> if (main_loop_vinfo)
> LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = main_loop_vinfo;
>
> + machine_mode vector_mode = vector_modes[mode_i];
> + loop_vinfo->vector_mode = vector_mode;
> +
> /* Run the main analysis. */
> - fatal = false;
> - opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, n_stmts);
> - loop->aux = NULL;
> + opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal);
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "***** Analysis %s with vector mode %s\n",
> + res ? "succeeded" : " failed",
> + GET_MODE_NAME (loop_vinfo->vector_mode));
>
> - /* Process info before we destroy loop_vinfo upon analysis failure
> - when there was no fatal failure. */
> - if (!fatal && process)
> - process (loop_vinfo);
> + /* Remember the autodetected vector mode. */
> + if (vector_mode == VOIDmode)
> + autodetected_vector_mode = loop_vinfo->vector_mode;
>
> - if (dump_enabled_p ())
> + /* Advance mode_i, first skipping modes that would result in the
> + same analysis result. */
> + while (mode_i + 1 < vector_modes.length ()
> + && vect_chooses_same_modes_p (loop_vinfo,
> + vector_modes[mode_i + 1]))
> {
> - if (res)
> + if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> - "***** Analysis succeeded with vector mode %s\n",
> - GET_MODE_NAME (loop_vinfo->vector_mode));
> - else
> + "***** The result for vector mode %s would"
> + " be the same\n",
> + GET_MODE_NAME (vector_modes[mode_i + 1]));
> + mode_i += 1;
> + }
> + if (mode_i + 1 < vector_modes.length ()
> + && VECTOR_MODE_P (autodetected_vector_mode)
> + && (related_vector_mode (vector_modes[mode_i + 1],
> + GET_MODE_INNER (autodetected_vector_mode))
> + == autodetected_vector_mode)
> + && (related_vector_mode (autodetected_vector_mode,
> + GET_MODE_INNER (vector_modes[mode_i + 1]))
> + == vector_modes[mode_i + 1]))
> + {
> + if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> - "***** Analysis failed with vector mode %s\n",
> - GET_MODE_NAME (loop_vinfo->vector_mode));
> + "***** Skipping vector mode %s, which would"
> + " repeat the analysis for %s\n",
> + GET_MODE_NAME (vector_modes[mode_i + 1]),
> + GET_MODE_NAME (autodetected_vector_mode));
> + mode_i += 1;
> }
> + mode_i++;
>
> if (!res)
> {
> @@ -2940,7 +2962,6 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
> return opt_loop_vec_info::propagate_failure (res);
> }
>
> - LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
> return opt_loop_vec_info::success (loop_vinfo);
> }
>
> @@ -2952,14 +2973,6 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
> opt_loop_vec_info
> vect_analyze_loop (class loop *loop, vec_info_shared *shared)
> {
> - auto_vector_modes vector_modes;
> -
> - /* Autodetect first vector size we try. */
> - unsigned int autovec_flags
> - = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
> - loop->simdlen != 0);
> - unsigned int mode_i = 0;
> -
> DUMP_VECT_SCOPE ("analyze_loop_nest");
>
> if (loop_outer (loop)
> @@ -2985,70 +2998,59 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
> return opt_loop_vec_info::propagate_failure (res);
> }
>
> - unsigned n_stmts = 0;
> - machine_mode autodetected_vector_mode = VOIDmode;
> - opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
> - machine_mode next_vector_mode = VOIDmode;
> - poly_uint64 lowest_th = 0;
> - bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
> - && !unlimited_cost_model (loop));
> + /* When pick_lowest_cost_p is true, we should in principle iterate
> + over all the loop_vec_infos that LOOP_VINFO could replace and
> + try to vectorize LOOP_VINFO under the same conditions.
> + E.g. when trying to replace an epilogue loop, we should vectorize
> + LOOP_VINFO as an epilogue loop with the same VF limit. When trying
> + to replace the main loop, we should vectorize LOOP_VINFO as a main
> + loop too.
>
> - bool vect_epilogues = false;
> - unsigned HOST_WIDE_INT simdlen = loop->simdlen;
> - while (1)
> - {
> - /* When pick_lowest_cost_p is true, we should in principle iterate
> - over all the loop_vec_infos that LOOP_VINFO could replace and
> - try to vectorize LOOP_VINFO under the same conditions.
> - E.g. when trying to replace an epilogue loop, we should vectorize
> - LOOP_VINFO as an epilogue loop with the same VF limit. When trying
> - to replace the main loop, we should vectorize LOOP_VINFO as a main
> - loop too.
> + However, autovectorize_vector_modes is usually sorted as follows:
>
> - However, autovectorize_vector_modes is usually sorted as follows:
> + - Modes that naturally produce lower VFs usually follow modes that
> + naturally produce higher VFs.
>
> - - Modes that naturally produce lower VFs usually follow modes that
> - naturally produce higher VFs.
> + - When modes naturally produce the same VF, maskable modes
> + usually follow unmaskable ones, so that the maskable mode
> + can be used to vectorize the epilogue of the unmaskable mode.
>
> - - When modes naturally produce the same VF, maskable modes
> - usually follow unmaskable ones, so that the maskable mode
> - can be used to vectorize the epilogue of the unmaskable mode.
> + This order is preferred because it leads to the maximum
> + epilogue vectorization opportunities. Targets should only use
> + a different order if they want to make wide modes available while
> + disparaging them relative to earlier, smaller modes. The assumption
> + in that case is that the wider modes are more expensive in some
> + way that isn't reflected directly in the costs.
>
> - This order is preferred because it leads to the maximum
> - epilogue vectorization opportunities. Targets should only use
> - a different order if they want to make wide modes available while
> - disparaging them relative to earlier, smaller modes. The assumption
> - in that case is that the wider modes are more expensive in some
> - way that isn't reflected directly in the costs.
> + There should therefore be few interesting cases in which
> + LOOP_VINFO fails when treated as an epilogue loop, succeeds when
> + treated as a standalone loop, and ends up being genuinely cheaper
> + than FIRST_LOOP_VINFO. */
I think the patch obsoletes this big comment, which was trying to explain
why we *didn't* try to vectorise as a main loop separately from an
epilogue loop. (It was already on shaky ground, as previously discussed.)
>
> - There should therefore be few interesting cases in which
> - LOOP_VINFO fails when treated as an epilogue loop, succeeds when
> - treated as a standalone loop, and ends up being genuinely cheaper
> - than FIRST_LOOP_VINFO. */
> + auto_vector_modes vector_modes;
> + /* Autodetect first vector size we try. */
> + vector_modes.safe_push (VOIDmode);
> + unsigned int autovec_flags
> + = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
> + loop->simdlen != 0);
> + bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
> + && !unlimited_cost_model (loop));
> + machine_mode autodetected_vector_mode = VOIDmode;
> + opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
> + unsigned int mode_i = 0;
> + unsigned int first_loop_i = 0;
> + unsigned int first_loop_next_i = 0;
> + unsigned HOST_WIDE_INT simdlen = loop->simdlen;
>
> + /* First determine the main loop vectorization mode. */
> + while (1)
> + {
> + unsigned int loop_vinfo_i = mode_i;
> bool fatal;
> - auto cb = [&] (loop_vec_info loop_vinfo)
> - {
> - if (mode_i == 0)
> - autodetected_vector_mode = loop_vinfo->vector_mode;
> - while (mode_i < vector_modes.length ()
> - && vect_chooses_same_modes_p (loop_vinfo,
> - vector_modes[mode_i]))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_NOTE, vect_location,
> - "***** The result for vector mode %s would"
> - " be the same\n",
> - GET_MODE_NAME (vector_modes[mode_i]));
> - mode_i += 1;
> - }
> - };
> opt_loop_vec_info loop_vinfo
> = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> - next_vector_mode,
> - vect_epilogues
> - ? (loop_vec_info)first_loop_vinfo : NULL,
> - &n_stmts, fatal, cb);
> + NULL, vector_modes, mode_i,
> + autodetected_vector_mode, fatal);
> if (fatal)
> break;
>
> @@ -3061,10 +3063,107 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
> {
> delete first_loop_vinfo;
> first_loop_vinfo = opt_loop_vec_info::success (NULL);
> - LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL;
> simdlen = 0;
> }
> else if (pick_lowest_cost_p && first_loop_vinfo)
> + {
> + /* Keep trying to roll back vectorization attempts while the
> + loop_vec_infos they produced were worse than this one. */
> + if (vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
> + {
> + delete first_loop_vinfo;
> + first_loop_vinfo = opt_loop_vec_info::success (NULL);
> + }
The comment no longer really describes the code here. We're just
making a straight comparison between two main loop vinfos (which is
a good thing). Think it might be easier to follow if the joust condition
was part of the “else if”, so that it's more obviously a sibling of the
simdlen comparison.
> + }
> + if (first_loop_vinfo == NULL)
> + {
> + first_loop_vinfo = loop_vinfo;
> + first_loop_i = loop_vinfo_i;
> + first_loop_next_i = mode_i;
> + }
> + else
> + {
> + delete loop_vinfo;
> + loop_vinfo = opt_loop_vec_info::success (NULL);
> + }
> +
> + /* Commit to first_loop_vinfo if we have no reason to try
> + alternatives. */
> + if (!simdlen && !pick_lowest_cost_p)
> + break;
> + }
> + if (mode_i == vector_modes.length ()
> + || autodetected_vector_mode == VOIDmode)
> + break;
> +
> + /* Try the next biggest vector size. */
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "***** Re-trying analysis with vector mode %s\n",
> + GET_MODE_NAME (vector_modes[mode_i]));
> + }
> + if (!first_loop_vinfo)
> + return opt_loop_vec_info::propagate_failure (res);
> +
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "***** Choosing vector mode %s\n",
> + GET_MODE_NAME (first_loop_vinfo->vector_mode));
> +
> + /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
> + enabled, SIMDUID is not set, it is the innermost loop and we have
> + either already found the loop's SIMDLEN or there was no SIMDLEN to
> + begin with.
> + TODO: Enable epilogue vectorization for loops with SIMDUID set. */
> + bool vect_epilogues = (!simdlen
> + && loop->inner == NULL
> + && param_vect_epilogues_nomask
> + && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
> + && !loop->simduid);
> + if (!vect_epilogues)
> + return first_loop_vinfo;
> +
> + /* Now analyze first_loop_vinfo for epilogue vectorization. */
> + poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
> +
> + /* Handle the case that the original loop can use partial
> + vectorization, but want to only adopt it for the epilogue.
> + The retry should be in the same mode as original. */
> + if (LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (first_loop_vinfo))
> + {
> + gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (first_loop_vinfo)
> + && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (first_loop_vinfo));
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "***** Re-trying analysis with same vector mode"
> + " %s for epilogue with partial vectors.\n",
> + GET_MODE_NAME (first_loop_vinfo->vector_mode));
> + mode_i = first_loop_i;
> + }
> + else
> + {
> + mode_i = first_loop_next_i;
> + if (mode_i == vector_modes.length ())
> + return first_loop_vinfo;
> + }
It's an interesting question whether we should continue doing this,
or whether we should consider all epilogue alternatives even for
LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P. Perhaps this reorg makes
LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P redundant.
Obviously doesn't affect this patch though.
Looks great to me otherwise FWIW.
Thanks,
Richard
> +
> + /* ??? If first_loop_vinfo was using VOIDmode then we probably
> + want to instead search for the corresponding mode in vector_modes[]. */
> +
> + while (1)
> + {
> + bool fatal;
> + opt_loop_vec_info loop_vinfo
> + = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> + first_loop_vinfo,
> + vector_modes, mode_i,
> + autodetected_vector_mode, fatal);
> + if (fatal)
> + break;
> +
> + if (loop_vinfo)
> + {
> + if (pick_lowest_cost_p)
> {
> /* Keep trying to roll back vectorization attempts while the
> loop_vec_infos they produced were worse than this one. */
> @@ -3075,59 +3174,9 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
> gcc_assert (vect_epilogues);
> delete vinfos.pop ();
> }
> - if (vinfos.is_empty ()
> - && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
> - {
> - if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
> - {
> - delete first_loop_vinfo;
> - first_loop_vinfo = opt_loop_vec_info::success (NULL);
> - }
> - else
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_NOTE, vect_location,
> - "***** Reanalyzing as a main loop "
> - "with vector mode %s\n",
> - GET_MODE_NAME
> - (loop_vinfo->vector_mode));
> - opt_loop_vec_info main_loop_vinfo
> - = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> - loop_vinfo->vector_mode,
> - NULL, &n_stmts, fatal);
> - if (main_loop_vinfo
> - && vect_joust_loop_vinfos (main_loop_vinfo,
> - first_loop_vinfo))
> - {
> - delete first_loop_vinfo;
> - first_loop_vinfo = opt_loop_vec_info::success (NULL);
> - delete loop_vinfo;
> - loop_vinfo
> - = opt_loop_vec_info::success (main_loop_vinfo);
> - }
> - else
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_NOTE, vect_location,
> - "***** No longer preferring vector"
> - " mode %s after reanalyzing the "
> - " loop as a main loop\n",
> - GET_MODE_NAME
> - (loop_vinfo->vector_mode));
> - delete main_loop_vinfo;
> - }
> - }
> - }
> }
> -
> - if (first_loop_vinfo == NULL)
> - {
> - first_loop_vinfo = loop_vinfo;
> - lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
> - }
> - else if (vect_epilogues
> - /* For now only allow one epilogue loop. */
> - && first_loop_vinfo->epilogue_vinfos.is_empty ())
> + /* For now only allow one epilogue loop. */
> + if (first_loop_vinfo->epilogue_vinfos.is_empty ())
> {
> first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo);
> poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
> @@ -3144,86 +3193,34 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
> loop_vinfo = opt_loop_vec_info::success (NULL);
> }
>
> - /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
> - enabled, SIMDUID is not set, it is the innermost loop and we have
> - either already found the loop's SIMDLEN or there was no SIMDLEN to
> - begin with.
> - TODO: Enable epilogue vectorization for loops with SIMDUID set. */
> - vect_epilogues = (!simdlen
> - && loop->inner == NULL
> - && param_vect_epilogues_nomask
> - && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
> - && !loop->simduid
> - /* For now only allow one epilogue loop, but allow
> - pick_lowest_cost_p to replace it. */
> - && (first_loop_vinfo->epilogue_vinfos.is_empty ()
> - || pick_lowest_cost_p));
> -
> - /* Commit to first_loop_vinfo if we have no reason to try
> - alternatives. */
> - if (!simdlen && !vect_epilogues && !pick_lowest_cost_p)
> + /* For now only allow one epilogue loop, but allow
> + pick_lowest_cost_p to replace it, so commit to the
> + first epilogue if we have no reason to try alternatives. */
> + if (!pick_lowest_cost_p)
> break;
> }
>
> - /* Handle the case that the original loop can use partial
> - vectorization, but want to only adopt it for the epilogue.
> - The retry should be in the same mode as original. */
> - if (vect_epilogues
> - && loop_vinfo
> - && LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo))
> - {
> - gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
> - && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_NOTE, vect_location,
> - "***** Re-trying analysis with same vector mode"
> - " %s for epilogue with partial vectors.\n",
> - GET_MODE_NAME (loop_vinfo->vector_mode));
> - continue;
> - }
> -
> - if (mode_i < vector_modes.length ()
> - && VECTOR_MODE_P (autodetected_vector_mode)
> - && (related_vector_mode (vector_modes[mode_i],
> - GET_MODE_INNER (autodetected_vector_mode))
> - == autodetected_vector_mode)
> - && (related_vector_mode (autodetected_vector_mode,
> - GET_MODE_INNER (vector_modes[mode_i]))
> - == vector_modes[mode_i]))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_NOTE, vect_location,
> - "***** Skipping vector mode %s, which would"
> - " repeat the analysis for %s\n",
> - GET_MODE_NAME (vector_modes[mode_i]),
> - GET_MODE_NAME (autodetected_vector_mode));
> - mode_i += 1;
> - }
> -
> - if (mode_i == vector_modes.length ()
> - || autodetected_vector_mode == VOIDmode)
> + if (mode_i == vector_modes.length ())
> break;
>
> /* Try the next biggest vector size. */
> - next_vector_mode = vector_modes[mode_i++];
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> - "***** Re-trying analysis with vector mode %s\n",
> - GET_MODE_NAME (next_vector_mode));
> + "***** Re-trying epilogue analysis with vector "
> + "mode %s\n", GET_MODE_NAME (vector_modes[mode_i]));
> }
>
> - if (first_loop_vinfo)
> + if (!first_loop_vinfo->epilogue_vinfos.is_empty ())
> {
> - loop->aux = (loop_vec_info) first_loop_vinfo;
> + LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> - "***** Choosing vector mode %s\n",
> - GET_MODE_NAME (first_loop_vinfo->vector_mode));
> - LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
> - return first_loop_vinfo;
> + "***** Choosing epilogue vector mode %s\n",
> + GET_MODE_NAME
> + (first_loop_vinfo->epilogue_vinfos[0]->vector_mode));
> }
>
> - return opt_loop_vec_info::propagate_failure (res);
> + return first_loop_vinfo;
> }
>
> /* Return true if there is an in-order reduction function for CODE, storing
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index 4c9ab8124b5..a2e13acb6d2 100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -475,7 +475,8 @@ vec_info::~vec_info ()
> }
>
> vec_info_shared::vec_info_shared ()
> - : datarefs (vNULL),
> + : n_stmts (0),
> + datarefs (vNULL),
> datarefs_copy (vNULL),
> ddrs (vNULL)
> {
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 3f069e71296..7d3d3935c95 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -350,6 +350,9 @@ public:
> void save_datarefs();
> void check_datarefs();
>
> + /* The number of scalar stmts. */
> + unsigned n_stmts;
> +
> /* All data references. Freed by free_data_refs, so not an auto_vec. */
> vec<data_reference_p> datarefs;
> vec<data_reference> datarefs_copy;
> @@ -822,6 +825,7 @@ public:
> #define LOOP_VINFO_RGROUP_COMPARE_TYPE(L) (L)->rgroup_compare_type
> #define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type
> #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask
> +#define LOOP_VINFO_N_STMTS(L) (L)->shared->n_stmts
> #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest
> #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs
> #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs
> @@ -928,12 +932,6 @@ public:
> #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs
> #define BB_VINFO_DDRS(B) (B)->shared->ddrs
>
> -static inline bb_vec_info
> -vec_info_for_bb (basic_block bb)
> -{
> - return (bb_vec_info) bb->aux;
> -}
> -
> /*-----------------------------------------------------------------*/
> /* Info on vectorized defs. */
> /*-----------------------------------------------------------------*/
More information about the Gcc-patches
mailing list