[PATCH] Split vector loop analysis into main and epilogue analysis

Richard Sandiford richard.sandiford@arm.com
Fri Nov 5 11:55:06 GMT 2021


Richard Biener <rguenther@suse.de> writes:
> As discussed this splits the analysis loop into two, first settling
> on a vector mode used for the main loop and only then analyzing
> the epilogue of that for possible vectorization.  That makes it
> easier to put in support for unrolled main loops.
>
> On the way I've realized some cleanup opportunities, namely caching
> n_stmts in vec_info_shared (it's computed by dataref analysis)
> avoiding to pass that around and setting/clearing loop->aux
> during analysis - try_vectorize_loop_1 will ultimatively set it
> on those we vectorize.
>
> This also gets rid of the previously introduced callback in
> vect_analyze_loop_1 in favor of making that advance the mode iterator.
> I'm now pushing VOIDmode explicitely into the vector_modes array
> which makes the re-start on the epilogue side a bit more
> straight-forward.  Note that will now use auto-detection of the
> vector mode in case the main loop used it and we want to try
> LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P and the first mode from
> the target array if not.  I've added a comment that says we may
> want to make sure we don't try vectorizing the epilogue with a
> bigger vector size than the main loop but the situation isn't
> very likely to appear in practice I guess (and it was also present
> before this change).
>
> In principle this change should not change vectorization decisions
> but the way we handled re-analyzing epilogues as main loops makes
> me only 99% sure that it does.
>
> Bootstrapped and tested on x86_64-unkown-linux-gnu.

Comments inline.

>
> OK?
>
> Thanks,
> Richard.
>
> 2021-11-05  Richard Biener  <rguenther@suse.de>
>
> 	* tree-vectorizer.h (vec_info_shared::n_stmts): Add.
> 	(LOOP_VINFO_N_STMTS): Likewise.
> 	(vec_info_for_bb): Remove unused function.
> 	* tree-vectorizer.c (vec_info_shared::vec_info_shared):
> 	Initialize n_stmts member.
> 	* tree-vect-loop.c: Remove INCLUDE_FUNCTIONAL.
> 	(vect_create_loop_vinfo): Do not set loop->aux.
> 	(vect_analyze_loop_2): Do not get n_stmts as argument,
> 	instead use LOOP_VINFO_N_STMTS.  Set LOOP_VINFO_VECTORIZABLE_P
> 	here.
> 	(vect_analyze_loop_1): Remove callback, get the mode iterator
> 	and autodetected_vector_mode as argument, advancing the
> 	iterator and initializing autodetected_vector_mode here.
> 	(vect_analyze_loop): Split analysis loop into two, first
> 	processing main loops only and then epilogues.
> ---
>  gcc/tree-vect-loop.c  | 415 +++++++++++++++++++++---------------------
>  gcc/tree-vectorizer.c |   3 +-
>  gcc/tree-vectorizer.h |  10 +-
>  3 files changed, 212 insertions(+), 216 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 13a53436729..abf87f99d6d 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -20,7 +20,6 @@ along with GCC; see the file COPYING3.  If not see
>  <http://www.gnu.org/licenses/>.  */
>  
>  #define INCLUDE_ALGORITHM
> -#define INCLUDE_FUNCTIONAL
>  #include "config.h"
>  #include "system.h"
>  #include "coretypes.h"
> @@ -1520,8 +1519,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
>  	  = wi::smin (nit, param_vect_inner_loop_cost_factor).to_uhwi ();
>      }
>  
> -  gcc_assert (!loop->aux);
> -  loop->aux = loop_vinfo;
>    return loop_vinfo;
>  }
>  
> @@ -2209,7 +2206,7 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo,
>     for it.  The different analyses will record information in the
>     loop_vec_info struct.  */
>  static opt_result
> -vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
> +vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
>  {
>    opt_result ok = opt_result::success ();
>    int res;
> @@ -2244,7 +2241,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
>        opt_result res
>  	= vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo),
>  				     &LOOP_VINFO_DATAREFS (loop_vinfo),
> -				     n_stmts);
> +				     &LOOP_VINFO_N_STMTS (loop_vinfo));
>        if (!res)
>  	{
>  	  if (dump_enabled_p ())
> @@ -2341,7 +2338,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
>    poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
>  
>    /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
> -  ok = vect_analyze_slp (loop_vinfo, *n_stmts);
> +  ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
>    if (!ok)
>      return ok;
>  
> @@ -2641,6 +2638,7 @@ start_over:
>  			LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
>  
>    /* Ok to vectorize!  */
> +  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
>    return opt_result::success ();
>  
>  again:
> @@ -2891,46 +2889,70 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
>    return true;
>  }
>  
> -/* Analyze LOOP with VECTOR_MODE and as epilogue if MAIN_LOOP_VINFO is
> -   not NULL.  Process the analyzed loop with PROCESS even if analysis
> -   failed.  Sets *N_STMTS and FATAL according to the analysis.
> +/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is
> +   not NULL.  Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
> +   MODE_I to the next mode useful to analyze.
>     Return the loop_vinfo on success and wrapped null on failure.  */
>  
>  static opt_loop_vec_info
>  vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
>  		     const vect_loop_form_info *loop_form_info,
> -		     machine_mode vector_mode, loop_vec_info main_loop_vinfo,
> -		     unsigned int *n_stmts, bool &fatal,
> -		     std::function<void(loop_vec_info)> process = nullptr)
> +		     loop_vec_info main_loop_vinfo,
> +		     const vector_modes &vector_modes, unsigned &mode_i,
> +		     machine_mode &autodetected_vector_mode,
> +		     bool &fatal)
>  {
>    loop_vec_info loop_vinfo
>      = vect_create_loop_vinfo (loop, shared, loop_form_info);
> -  loop_vinfo->vector_mode = vector_mode;
> -
>    if (main_loop_vinfo)
>      LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = main_loop_vinfo;
>  
> +  machine_mode vector_mode = vector_modes[mode_i];
> +  loop_vinfo->vector_mode = vector_mode;
> +
>    /* Run the main analysis.  */
> -  fatal = false;
> -  opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, n_stmts);
> -  loop->aux = NULL;
> +  opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal);
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +		     "***** Analysis %s with vector mode %s\n",
> +		     res ? "succeeded" : " failed",
> +		     GET_MODE_NAME (loop_vinfo->vector_mode));
>  
> -  /* Process info before we destroy loop_vinfo upon analysis failure
> -     when there was no fatal failure.  */
> -  if (!fatal && process)
> -    process (loop_vinfo);
> +  /* Remember the autodetected vector mode.  */
> +  if (vector_mode == VOIDmode)
> +    autodetected_vector_mode = loop_vinfo->vector_mode;
>  
> -  if (dump_enabled_p ())
> +  /* Advance mode_i, first skipping modes that would result in the
> +     same analysis result.  */
> +  while (mode_i + 1 < vector_modes.length ()
> +	 && vect_chooses_same_modes_p (loop_vinfo,
> +				       vector_modes[mode_i + 1]))
>      {
> -      if (res)
> +      if (dump_enabled_p ())
>  	dump_printf_loc (MSG_NOTE, vect_location,
> -			 "***** Analysis succeeded with vector mode %s\n",
> -			 GET_MODE_NAME (loop_vinfo->vector_mode));
> -      else
> +			 "***** The result for vector mode %s would"
> +			 " be the same\n",
> +			 GET_MODE_NAME (vector_modes[mode_i + 1]));
> +      mode_i += 1;
> +    }
> +  if (mode_i + 1 < vector_modes.length ()
> +      && VECTOR_MODE_P (autodetected_vector_mode)
> +      && (related_vector_mode (vector_modes[mode_i + 1],
> +			       GET_MODE_INNER (autodetected_vector_mode))
> +	  == autodetected_vector_mode)
> +      && (related_vector_mode (autodetected_vector_mode,
> +			       GET_MODE_INNER (vector_modes[mode_i + 1]))
> +	  == vector_modes[mode_i + 1]))
> +    {
> +      if (dump_enabled_p ())
>  	dump_printf_loc (MSG_NOTE, vect_location,
> -			 "***** Analysis failed with vector mode %s\n",
> -			 GET_MODE_NAME (loop_vinfo->vector_mode));
> +			 "***** Skipping vector mode %s, which would"
> +			 " repeat the analysis for %s\n",
> +			 GET_MODE_NAME (vector_modes[mode_i + 1]),
> +			 GET_MODE_NAME (autodetected_vector_mode));
> +      mode_i += 1;
>      }
> +  mode_i++;
>  
>    if (!res)
>      {
> @@ -2940,7 +2962,6 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
>        return opt_loop_vec_info::propagate_failure (res);
>      }
>  
> -  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
>    return opt_loop_vec_info::success (loop_vinfo);
>  }
>  
> @@ -2952,14 +2973,6 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
>  opt_loop_vec_info
>  vect_analyze_loop (class loop *loop, vec_info_shared *shared)
>  {
> -  auto_vector_modes vector_modes;
> -
> -  /* Autodetect first vector size we try.  */
> -  unsigned int autovec_flags
> -    = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
> -						    loop->simdlen != 0);
> -  unsigned int mode_i = 0;
> -
>    DUMP_VECT_SCOPE ("analyze_loop_nest");
>  
>    if (loop_outer (loop)
> @@ -2985,70 +2998,59 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
>        return opt_loop_vec_info::propagate_failure (res);
>      }
>  
> -  unsigned n_stmts = 0;
> -  machine_mode autodetected_vector_mode = VOIDmode;
> -  opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
> -  machine_mode next_vector_mode = VOIDmode;
> -  poly_uint64 lowest_th = 0;
> -  bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
> -			     && !unlimited_cost_model (loop));
> +  /* When pick_lowest_cost_p is true, we should in principle iterate
> +     over all the loop_vec_infos that LOOP_VINFO could replace and
> +     try to vectorize LOOP_VINFO under the same conditions.
> +     E.g. when trying to replace an epilogue loop, we should vectorize
> +     LOOP_VINFO as an epilogue loop with the same VF limit.  When trying
> +     to replace the main loop, we should vectorize LOOP_VINFO as a main
> +     loop too.
>  
> -  bool vect_epilogues = false;
> -  unsigned HOST_WIDE_INT simdlen = loop->simdlen;
> -  while (1)
> -    {
> -      /* When pick_lowest_cost_p is true, we should in principle iterate
> -	 over all the loop_vec_infos that LOOP_VINFO could replace and
> -	 try to vectorize LOOP_VINFO under the same conditions.
> -	 E.g. when trying to replace an epilogue loop, we should vectorize
> -	 LOOP_VINFO as an epilogue loop with the same VF limit.  When trying
> -	 to replace the main loop, we should vectorize LOOP_VINFO as a main
> -	 loop too.
> +     However, autovectorize_vector_modes is usually sorted as follows:
>  
> -	 However, autovectorize_vector_modes is usually sorted as follows:
> +     - Modes that naturally produce lower VFs usually follow modes that
> +     naturally produce higher VFs.
>  
> -	 - Modes that naturally produce lower VFs usually follow modes that
> -	   naturally produce higher VFs.
> +     - When modes naturally produce the same VF, maskable modes
> +     usually follow unmaskable ones, so that the maskable mode
> +     can be used to vectorize the epilogue of the unmaskable mode.
>  
> -	 - When modes naturally produce the same VF, maskable modes
> -	   usually follow unmaskable ones, so that the maskable mode
> -	   can be used to vectorize the epilogue of the unmaskable mode.
> +     This order is preferred because it leads to the maximum
> +     epilogue vectorization opportunities.  Targets should only use
> +     a different order if they want to make wide modes available while
> +     disparaging them relative to earlier, smaller modes.  The assumption
> +     in that case is that the wider modes are more expensive in some
> +     way that isn't reflected directly in the costs.
>  
> -	 This order is preferred because it leads to the maximum
> -	 epilogue vectorization opportunities.  Targets should only use
> -	 a different order if they want to make wide modes available while
> -	 disparaging them relative to earlier, smaller modes.  The assumption
> -	 in that case is that the wider modes are more expensive in some
> -	 way that isn't reflected directly in the costs.
> +     There should therefore be few interesting cases in which
> +     LOOP_VINFO fails when treated as an epilogue loop, succeeds when
> +     treated as a standalone loop, and ends up being genuinely cheaper
> +     than FIRST_LOOP_VINFO.  */

I think the patch obsoletes this big comment, which was trying to explain
why we *didn't* try to vectorise as a main loop separately from an
epilogue loop.  (It was already on shaky ground, as previously discussed.)

>  
> -	 There should therefore be few interesting cases in which
> -	 LOOP_VINFO fails when treated as an epilogue loop, succeeds when
> -	 treated as a standalone loop, and ends up being genuinely cheaper
> -	 than FIRST_LOOP_VINFO.  */
> +  auto_vector_modes vector_modes;
> +  /* Autodetect first vector size we try.  */
> +  vector_modes.safe_push (VOIDmode);
> +  unsigned int autovec_flags
> +    = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
> +						    loop->simdlen != 0);
> +  bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
> +			     && !unlimited_cost_model (loop));
> +  machine_mode autodetected_vector_mode = VOIDmode;
> +  opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
> +  unsigned int mode_i = 0;
> +  unsigned int first_loop_i = 0;
> +  unsigned int first_loop_next_i = 0;
> +  unsigned HOST_WIDE_INT simdlen = loop->simdlen;
>  
> +  /* First determine the main loop vectorization mode.  */
> +  while (1)
> +    {
> +      unsigned int loop_vinfo_i = mode_i;
>        bool fatal;
> -      auto cb = [&] (loop_vec_info loop_vinfo)
> -	{
> -	  if (mode_i == 0)
> -	    autodetected_vector_mode = loop_vinfo->vector_mode;
> -	  while (mode_i < vector_modes.length ()
> -		 && vect_chooses_same_modes_p (loop_vinfo,
> -					       vector_modes[mode_i]))
> -	    {
> -	      if (dump_enabled_p ())
> -		dump_printf_loc (MSG_NOTE, vect_location,
> -				 "***** The result for vector mode %s would"
> -				 " be the same\n",
> -				 GET_MODE_NAME (vector_modes[mode_i]));
> -	      mode_i += 1;
> -	    }
> -	};
>        opt_loop_vec_info loop_vinfo
>  	= vect_analyze_loop_1 (loop, shared, &loop_form_info,
> -			       next_vector_mode,
> -			       vect_epilogues
> -			       ? (loop_vec_info)first_loop_vinfo : NULL,
> -			       &n_stmts, fatal, cb);
> +			       NULL, vector_modes, mode_i,
> +			       autodetected_vector_mode, fatal);
>        if (fatal)
>  	break;
>  
> @@ -3061,10 +3063,107 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
>  	    {
>  	      delete first_loop_vinfo;
>  	      first_loop_vinfo = opt_loop_vec_info::success (NULL);
> -	      LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL;
>  	      simdlen = 0;
>  	    }
>  	  else if (pick_lowest_cost_p && first_loop_vinfo)
> +	    {
> +	      /* Keep trying to roll back vectorization attempts while the
> +		 loop_vec_infos they produced were worse than this one.  */
> +	      if (vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
> +		{
> +		  delete first_loop_vinfo;
> +		  first_loop_vinfo = opt_loop_vec_info::success (NULL);
> +		}

The comment no longer really describes the code here.  We're just
making a straight comparison between two main loop vinfos (which is
a good thing).  Think it might be easier to follow if the joust condition
was part of the “else if”, so that it's more obviously a sibling of the
simdlen comparison.

> +	    }
> +	  if (first_loop_vinfo == NULL)
> +	    {
> +	      first_loop_vinfo = loop_vinfo;
> +	      first_loop_i = loop_vinfo_i;
> +	      first_loop_next_i = mode_i;
> +	    }
> +	  else
> +	    {
> +	      delete loop_vinfo;
> +	      loop_vinfo = opt_loop_vec_info::success (NULL);
> +	    }
> +
> +	  /* Commit to first_loop_vinfo if we have no reason to try
> +	     alternatives.  */
> +	  if (!simdlen && !pick_lowest_cost_p)
> +	    break;
> +	}
> +      if (mode_i == vector_modes.length ()
> +	  || autodetected_vector_mode == VOIDmode)
> +	break;
> +
> +      /* Try the next biggest vector size.  */
> +      if (dump_enabled_p ())
> +	dump_printf_loc (MSG_NOTE, vect_location,
> +			 "***** Re-trying analysis with vector mode %s\n",
> +			 GET_MODE_NAME (vector_modes[mode_i]));
> +    }
> +  if (!first_loop_vinfo)
> +    return opt_loop_vec_info::propagate_failure (res);
> +
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +		     "***** Choosing vector mode %s\n",
> +		     GET_MODE_NAME (first_loop_vinfo->vector_mode));
> +
> +  /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
> +     enabled, SIMDUID is not set, it is the innermost loop and we have
> +     either already found the loop's SIMDLEN or there was no SIMDLEN to
> +     begin with.
> +     TODO: Enable epilogue vectorization for loops with SIMDUID set.  */
> +  bool vect_epilogues = (!simdlen
> +			 && loop->inner == NULL
> +			 && param_vect_epilogues_nomask
> +			 && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
> +			 && !loop->simduid);
> +  if (!vect_epilogues)
> +    return first_loop_vinfo;
> +
> +  /* Now analyze first_loop_vinfo for epilogue vectorization.  */
> +  poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
> +
> +  /* Handle the case that the original loop can use partial
> +     vectorization, but want to only adopt it for the epilogue.
> +     The retry should be in the same mode as original.  */
> +  if (LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (first_loop_vinfo))
> +    {
> +      gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (first_loop_vinfo)
> +		  && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (first_loop_vinfo));
> +      if (dump_enabled_p ())
> +	dump_printf_loc (MSG_NOTE, vect_location,
> +			 "***** Re-trying analysis with same vector mode"
> +			 " %s for epilogue with partial vectors.\n",
> +			 GET_MODE_NAME (first_loop_vinfo->vector_mode));
> +      mode_i = first_loop_i;
> +    }
> +  else
> +    {
> +      mode_i = first_loop_next_i;
> +      if (mode_i == vector_modes.length ())
> +	return first_loop_vinfo;
> +    }

It's an interesting question whether we should continue doing this,
or whether we should consider all epilogue alternatives even for
LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P.  Perhaps this reorg makes
LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P redundant.

Obviously doesn't affect this patch though.

Looks great to me otherwise FWIW.

Thanks,
Richard

> +
> +  /* ???  If first_loop_vinfo was using VOIDmode then we probably
> +     want to instead search for the corresponding mode in vector_modes[].  */
> +
> +  while (1)
> +    {
> +      bool fatal;
> +      opt_loop_vec_info loop_vinfo
> +	= vect_analyze_loop_1 (loop, shared, &loop_form_info,
> +			       first_loop_vinfo,
> +			       vector_modes, mode_i,
> +			       autodetected_vector_mode, fatal);
> +      if (fatal)
> +	break;
> +
> +      if (loop_vinfo)
> +	{
> +	  if (pick_lowest_cost_p)
>  	    {
>  	      /* Keep trying to roll back vectorization attempts while the
>  		 loop_vec_infos they produced were worse than this one.  */
> @@ -3075,59 +3174,9 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
>  		  gcc_assert (vect_epilogues);
>  		  delete vinfos.pop ();
>  		}
> -	      if (vinfos.is_empty ()
> -		  && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
> -		{
> -		  if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
> -		    {
> -		      delete first_loop_vinfo;
> -		      first_loop_vinfo = opt_loop_vec_info::success (NULL);
> -		    }
> -		  else
> -		    {
> -		      if (dump_enabled_p ())
> -			dump_printf_loc (MSG_NOTE, vect_location,
> -					 "***** Reanalyzing as a main loop "
> -					 "with vector mode %s\n",
> -					 GET_MODE_NAME
> -					   (loop_vinfo->vector_mode));
> -		      opt_loop_vec_info main_loop_vinfo
> -			= vect_analyze_loop_1 (loop, shared, &loop_form_info,
> -					       loop_vinfo->vector_mode,
> -					       NULL, &n_stmts, fatal);
> -		      if (main_loop_vinfo
> -			  && vect_joust_loop_vinfos (main_loop_vinfo,
> -						     first_loop_vinfo))
> -			{
> -			  delete first_loop_vinfo;
> -			  first_loop_vinfo = opt_loop_vec_info::success (NULL);
> -			  delete loop_vinfo;
> -			  loop_vinfo
> -			    = opt_loop_vec_info::success (main_loop_vinfo);
> -			}
> -		      else
> -			{
> -			  if (dump_enabled_p ())
> -			    dump_printf_loc (MSG_NOTE, vect_location,
> -					     "***** No longer preferring vector"
> -					     " mode %s after reanalyzing the "
> -					     " loop as a main loop\n",
> -					     GET_MODE_NAME
> -					       (loop_vinfo->vector_mode));
> -			  delete main_loop_vinfo;
> -			}
> -		    }
> -		}
>  	    }
> -
> -	  if (first_loop_vinfo == NULL)
> -	    {
> -	      first_loop_vinfo = loop_vinfo;
> -	      lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
> -	    }
> -	  else if (vect_epilogues
> -		   /* For now only allow one epilogue loop.  */
> -		   && first_loop_vinfo->epilogue_vinfos.is_empty ())
> +	  /* For now only allow one epilogue loop.  */
> +	  if (first_loop_vinfo->epilogue_vinfos.is_empty ())
>  	    {
>  	      first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo);
>  	      poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
> @@ -3144,86 +3193,34 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
>  	      loop_vinfo = opt_loop_vec_info::success (NULL);
>  	    }
>  
> -	  /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
> -	     enabled, SIMDUID is not set, it is the innermost loop and we have
> -	     either already found the loop's SIMDLEN or there was no SIMDLEN to
> -	     begin with.
> -	     TODO: Enable epilogue vectorization for loops with SIMDUID set.  */
> -	  vect_epilogues = (!simdlen
> -			    && loop->inner == NULL
> -			    && param_vect_epilogues_nomask
> -			    && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
> -			    && !loop->simduid
> -			    /* For now only allow one epilogue loop, but allow
> -			       pick_lowest_cost_p to replace it.  */
> -			    && (first_loop_vinfo->epilogue_vinfos.is_empty ()
> -				|| pick_lowest_cost_p));
> -
> -	  /* Commit to first_loop_vinfo if we have no reason to try
> -	     alternatives.  */
> -	  if (!simdlen && !vect_epilogues && !pick_lowest_cost_p)
> +	  /* For now only allow one epilogue loop, but allow
> +	     pick_lowest_cost_p to replace it, so commit to the
> +	     first epilogue if we have no reason to try alternatives.  */
> +	  if (!pick_lowest_cost_p)
>  	    break;
>  	}
>  
> -      /* Handle the case that the original loop can use partial
> -	 vectorization, but want to only adopt it for the epilogue.
> -	 The retry should be in the same mode as original.  */
> -      if (vect_epilogues
> -	  && loop_vinfo
> -	  && LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo))
> -	{
> -	  gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
> -		      && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
> -	  if (dump_enabled_p ())
> -	    dump_printf_loc (MSG_NOTE, vect_location,
> -			     "***** Re-trying analysis with same vector mode"
> -			     " %s for epilogue with partial vectors.\n",
> -			     GET_MODE_NAME (loop_vinfo->vector_mode));
> -	  continue;
> -	}
> -
> -      if (mode_i < vector_modes.length ()
> -	  && VECTOR_MODE_P (autodetected_vector_mode)
> -	  && (related_vector_mode (vector_modes[mode_i],
> -				   GET_MODE_INNER (autodetected_vector_mode))
> -	      == autodetected_vector_mode)
> -	  && (related_vector_mode (autodetected_vector_mode,
> -				   GET_MODE_INNER (vector_modes[mode_i]))
> -	      == vector_modes[mode_i]))
> -	{
> -	  if (dump_enabled_p ())
> -	    dump_printf_loc (MSG_NOTE, vect_location,
> -			     "***** Skipping vector mode %s, which would"
> -			     " repeat the analysis for %s\n",
> -			     GET_MODE_NAME (vector_modes[mode_i]),
> -			     GET_MODE_NAME (autodetected_vector_mode));
> -	  mode_i += 1;
> -	}
> -
> -      if (mode_i == vector_modes.length ()
> -	  || autodetected_vector_mode == VOIDmode)
> +      if (mode_i == vector_modes.length ())
>  	break;
>  
>        /* Try the next biggest vector size.  */
> -      next_vector_mode = vector_modes[mode_i++];
>        if (dump_enabled_p ())
>  	dump_printf_loc (MSG_NOTE, vect_location,
> -			 "***** Re-trying analysis with vector mode %s\n",
> -			 GET_MODE_NAME (next_vector_mode));
> +			 "***** Re-trying epilogue analysis with vector "
> +			 "mode %s\n", GET_MODE_NAME (vector_modes[mode_i]));
>      }
>  
> -  if (first_loop_vinfo)
> +  if (!first_loop_vinfo->epilogue_vinfos.is_empty ())
>      {
> -      loop->aux = (loop_vec_info) first_loop_vinfo;
> +      LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
>        if (dump_enabled_p ())
>  	dump_printf_loc (MSG_NOTE, vect_location,
> -			 "***** Choosing vector mode %s\n",
> -			 GET_MODE_NAME (first_loop_vinfo->vector_mode));
> -      LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
> -      return first_loop_vinfo;
> +			 "***** Choosing epilogue vector mode %s\n",
> +			 GET_MODE_NAME
> +			   (first_loop_vinfo->epilogue_vinfos[0]->vector_mode));
>      }
>  
> -  return opt_loop_vec_info::propagate_failure (res);
> +  return first_loop_vinfo;
>  }
>  
>  /* Return true if there is an in-order reduction function for CODE, storing
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index 4c9ab8124b5..a2e13acb6d2 100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -475,7 +475,8 @@ vec_info::~vec_info ()
>  }
>  
>  vec_info_shared::vec_info_shared ()
> -  : datarefs (vNULL),
> +  : n_stmts (0),
> +    datarefs (vNULL),
>      datarefs_copy (vNULL),
>      ddrs (vNULL)
>  {
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 3f069e71296..7d3d3935c95 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -350,6 +350,9 @@ public:
>    void save_datarefs();
>    void check_datarefs();
>  
> +  /* The number of scalar stmts.  */
> +  unsigned n_stmts;
> +
>    /* All data references.  Freed by free_data_refs, so not an auto_vec.  */
>    vec<data_reference_p> datarefs;
>    vec<data_reference> datarefs_copy;
> @@ -822,6 +825,7 @@ public:
>  #define LOOP_VINFO_RGROUP_COMPARE_TYPE(L)  (L)->rgroup_compare_type
>  #define LOOP_VINFO_RGROUP_IV_TYPE(L)       (L)->rgroup_iv_type
>  #define LOOP_VINFO_PTR_MASK(L)             (L)->ptr_mask
> +#define LOOP_VINFO_N_STMTS(L)		   (L)->shared->n_stmts
>  #define LOOP_VINFO_LOOP_NEST(L)            (L)->shared->loop_nest
>  #define LOOP_VINFO_DATAREFS(L)             (L)->shared->datarefs
>  #define LOOP_VINFO_DDRS(L)                 (L)->shared->ddrs
> @@ -928,12 +932,6 @@ public:
>  #define BB_VINFO_DATAREFS(B)         (B)->shared->datarefs
>  #define BB_VINFO_DDRS(B)             (B)->shared->ddrs
>  
> -static inline bb_vec_info
> -vec_info_for_bb (basic_block bb)
> -{
> -  return (bb_vec_info) bb->aux;
> -}
> -
>  /*-----------------------------------------------------------------*/
>  /* Info on vectorized defs.                                        */
>  /*-----------------------------------------------------------------*/


More information about the Gcc-patches mailing list