This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][2/3] Fix PR59058


This removes the broken function from tree-scalar-evolution.c and
re-implements it inside the now single user (but unfixed).  It
also re-shuffles the vectorizer niter code some more to make
the final fix (use # of latch executions throughout) more easy.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2013-11-21  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/59058
	* tree-scalar-evolution.h (number_of_exit_cond_executions): Remove.
	* tree-scalar-evolution.c (number_of_exit_cond_executions): Likewise.
	* tree-vectorizer.h (LOOP_PEELING_FOR_ALIGNMENT): Rename to ...
	(LOOP_VINFO_PEELING_FOR_ALIGNMENT): ... this.
	(NITERS_KNOWN_P): Fold into ...
	(LOOP_VINFO_NITERS_KNOWN_P): ... this.
	(LOOP_VINFO_PEELING_FOR_NITER): Add.
	* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop):
	Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
	(vect_do_peeling_for_alignment): Re-use precomputed niter
	instead of re-emitting it.
	* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
	Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
	* tree-vect-loop.c (vect_get_loop_niters): Use
	number_of_latch_executions.
	(new_loop_vec_info): Initialize LOOP_VINFO_PEELING_FOR_NITER.
	(vect_analyze_loop_form): Simplify.
	(vect_analyze_loop_operations): Move epilogue peeling code ...
	(vect_analyze_loop_2): ... here and adjust it to compute
	LOOP_VINFO_PEELING_FOR_NITER.
	(vect_estimate_min_profitable_iters): Use
	LOOP_VINFO_PEELING_FOR_ALIGNMENT.
	(vect_build_loop_niters): Emit on the preheader.
	(vect_generate_tmps_on_preheader): Likewise.
	(vect_transform_loop): Use LOOP_VINFO_PEELING_FOR_NITER instead
	of recomputing it.  Adjust.

Index: gcc/tree-vect-loop-manip.c
===================================================================
*** gcc/tree-vect-loop-manip.c.orig	2013-11-21 14:58:43.061653802 +0100
--- gcc/tree-vect-loop-manip.c	2013-11-21 14:58:51.151747654 +0100
*************** vect_gen_niters_for_prolog_loop (loop_ve
*** 1736,1751 ****
  
    pe = loop_preheader_edge (loop);
  
!   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
      {
!       int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
  
        if (dump_enabled_p ())
          dump_printf_loc (MSG_NOTE, vect_location,
                           "known peeling = %d.\n", npeel);
  
        iters = build_int_cst (niters_type, npeel);
!       *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
      }
    else
      {
--- 1736,1751 ----
  
    pe = loop_preheader_edge (loop);
  
!   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
      {
!       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
  
        if (dump_enabled_p ())
          dump_printf_loc (MSG_NOTE, vect_location,
                           "known peeling = %d.\n", npeel);
  
        iters = build_int_cst (niters_type, npeel);
!       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
      }
    else
      {
*************** vect_do_peeling_for_alignment (loop_vec_
*** 1876,1882 ****
  {
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    tree niters_of_prolog_loop;
-   tree n_iters;
    tree wide_prolog_niters;
    struct loop *new_loop;
    int max_iter;
--- 1876,1881 ----
*************** vect_do_peeling_for_alignment (loop_vec_
*** 1918,1926 ****
                 "loop to %d\n", max_iter);
  
    /* Update number of times loop executes.  */
-   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
    LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
! 		TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
  
    if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
      wide_prolog_niters = niters_of_prolog_loop;
--- 1917,1924 ----
                 "loop to %d\n", max_iter);
  
    /* Update number of times loop executes.  */
    LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
! 		TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
  
    if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
      wide_prolog_niters = niters_of_prolog_loop;
Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h.orig	2013-11-21 14:58:43.062653811 +0100
--- gcc/tree-vectorizer.h	2013-11-21 14:58:51.153747678 +0100
*************** typedef struct _loop_vec_info {
*** 361,367 ****
  #define LOOP_VINFO_DATAREFS(L)             (L)->datarefs
  #define LOOP_VINFO_DDRS(L)                 (L)->ddrs
  #define LOOP_VINFO_INT_NITERS(L)           (TREE_INT_CST_LOW ((L)->num_iters))
! #define LOOP_PEELING_FOR_ALIGNMENT(L)      (L)->peeling_for_alignment
  #define LOOP_VINFO_UNALIGNED_DR(L)         (L)->unaligned_dr
  #define LOOP_VINFO_MAY_MISALIGN_STMTS(L)   (L)->may_misalign_stmts
  #define LOOP_VINFO_MAY_ALIAS_DDRS(L)       (L)->may_alias_ddrs
--- 361,367 ----
  #define LOOP_VINFO_DATAREFS(L)             (L)->datarefs
  #define LOOP_VINFO_DDRS(L)                 (L)->ddrs
  #define LOOP_VINFO_INT_NITERS(L)           (TREE_INT_CST_LOW ((L)->num_iters))
! #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
  #define LOOP_VINFO_UNALIGNED_DR(L)         (L)->unaligned_dr
  #define LOOP_VINFO_MAY_MISALIGN_STMTS(L)   (L)->may_misalign_stmts
  #define LOOP_VINFO_MAY_ALIAS_DDRS(L)       (L)->may_alias_ddrs
*************** typedef struct _loop_vec_info {
*** 375,392 ****
  #define LOOP_VINFO_TARGET_COST_DATA(L)     (L)->target_cost_data
  #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
  #define LOOP_VINFO_OPERANDS_SWAPPED(L)     (L)->operands_swapped
  
  #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
! (L)->may_misalign_stmts.length () > 0
  #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L)     \
! (L)->may_alias_ddrs.length () > 0
! 
! #define NITERS_KNOWN_P(n)                     \
! (tree_fits_shwi_p ((n))                        \
! && tree_to_shwi ((n)) > 0)
  
  #define LOOP_VINFO_NITERS_KNOWN_P(L)          \
! NITERS_KNOWN_P ((L)->num_iters)
  
  static inline loop_vec_info
  loop_vec_info_for_loop (struct loop *loop)
--- 375,389 ----
  #define LOOP_VINFO_TARGET_COST_DATA(L)     (L)->target_cost_data
  #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
  #define LOOP_VINFO_OPERANDS_SWAPPED(L)     (L)->operands_swapped
+ #define LOOP_VINFO_PEELING_FOR_NITER(L)    (L)->peeling_for_niter
  
  #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
!   (L)->may_misalign_stmts.length () > 0
  #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L)     \
!   (L)->may_alias_ddrs.length () > 0
  
  #define LOOP_VINFO_NITERS_KNOWN_P(L)          \
!   (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
  
  static inline loop_vec_info
  loop_vec_info_for_loop (struct loop *loop)
Index: gcc/tree-scalar-evolution.c
===================================================================
*** gcc/tree-scalar-evolution.c.orig	2013-11-21 14:48:17.710397038 +0100
--- gcc/tree-scalar-evolution.c	2013-11-21 14:58:51.148747619 +0100
*************** number_of_latch_executions (struct loop
*** 2910,2943 ****
    loop->nb_iterations = res;
    return res;
  }
- 
- /* Returns the number of executions of the exit condition of LOOP,
-    i.e., the number by one higher than number_of_latch_executions.
-    Note that unlike number_of_latch_executions, this number does
-    not necessarily fit in the unsigned variant of the type of
-    the control variable -- if the number of iterations is a constant,
-    we return chrec_dont_know if adding one to number_of_latch_executions
-    overflows; however, in case the number of iterations is symbolic
-    expression, the caller is responsible for dealing with this
-    the possible overflow.  */
- 
- tree
- number_of_exit_cond_executions (struct loop *loop)
- {
-   tree ret = number_of_latch_executions (loop);
-   tree type = chrec_type (ret);
- 
-   if (chrec_contains_undetermined (ret))
-     return ret;
- 
-   ret = chrec_fold_plus (type, ret, build_int_cst (type, 1));
-   if (TREE_CODE (ret) == INTEGER_CST
-       && TREE_OVERFLOW (ret))
-     return chrec_dont_know;
- 
-   return ret;
- }
- 
  
  
  /* Counters for the stats.  */
--- 2910,2915 ----
Index: gcc/tree-scalar-evolution.h
===================================================================
*** gcc/tree-scalar-evolution.h.orig	2013-11-21 14:48:17.709397027 +0100
--- gcc/tree-scalar-evolution.h	2013-11-21 14:58:51.148747619 +0100
*************** along with GCC; see the file COPYING3.
*** 22,28 ****
  #define GCC_TREE_SCALAR_EVOLUTION_H
  
  extern tree number_of_latch_executions (struct loop *);
- extern tree number_of_exit_cond_executions (struct loop *);
  extern gimple get_loop_exit_condition (const struct loop *);
  
  extern void scev_initialize (void);
--- 22,27 ----
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c.orig	2013-11-21 14:48:17.709397027 +0100
--- gcc/tree-vect-data-refs.c	2013-11-21 14:58:51.150747642 +0100
*************** vect_enhance_data_refs_alignment (loop_v
*** 1735,1743 ****
  
            LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
            if (npeel)
!             LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
            else
!             LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
  	  SET_DR_MISALIGNMENT (dr0, 0);
  	  if (dump_enabled_p ())
              {
--- 1735,1744 ----
  
            LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
            if (npeel)
!             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
            else
!             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
! 	      = DR_MISALIGNMENT (dr0);
  	  SET_DR_MISALIGNMENT (dr0, 0);
  	  if (dump_enabled_p ())
              {
Index: gcc/tree-vect-loop.c
===================================================================
*** gcc/tree-vect-loop.c.orig	2013-11-21 14:48:17.709397027 +0100
--- gcc/tree-vect-loop.c	2013-11-21 14:58:51.152747666 +0100
*************** vect_analyze_scalar_cycles (loop_vec_inf
*** 771,781 ****
      vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
  }
  
  /* Function vect_get_loop_niters.
  
!    Determine how many iterations the loop is executed.
!    If an expression that represents the number of iterations
!    can be constructed, place it in NUMBER_OF_ITERATIONS.
     Return the loop exit condition.  */
  
  static gimple
--- 771,782 ----
      vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
  }
  
+ 
  /* Function vect_get_loop_niters.
  
!    Determine how many iterations the loop is executed and place it
!    in NUMBER_OF_ITERATIONS.
! 
     Return the loop exit condition.  */
  
  static gimple
*************** vect_get_loop_niters (struct loop *loop,
*** 786,805 ****
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
  		     "=== get_loop_niters ===\n");
-   niters = number_of_exit_cond_executions (loop);
  
!   if (niters != NULL_TREE
!       && niters != chrec_dont_know)
!     {
!       *number_of_iterations = niters;
! 
!       if (dump_enabled_p ())
!         {
!           dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
!           dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
!           dump_printf (MSG_NOTE, "\n");
!         }
!     }
  
    return get_loop_exit_condition (loop);
  }
--- 787,802 ----
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
  		     "=== get_loop_niters ===\n");
  
!   niters = number_of_latch_executions (loop);
!   /* We want the number of loop header executions which is the number
!      of latch executions plus one.
!      ???  For UINT_MAX latch executions this number overflows to zero
!      for loops like do { n++; } while (n != 0);  */
!   if (niters && !chrec_contains_undetermined (niters))
!     niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters,
! 			  build_int_cst (TREE_TYPE (niters), 1));
!   *number_of_iterations = niters;
  
    return get_loop_exit_condition (loop);
  }
*************** new_loop_vec_info (struct loop *loop)
*** 907,913 ****
    LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
    LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
    LOOP_VINFO_VECTORIZABLE_P (res) = 0;
!   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
    LOOP_VINFO_VECT_FACTOR (res) = 0;
    LOOP_VINFO_LOOP_NEST (res).create (3);
    LOOP_VINFO_DATAREFS (res).create (10);
--- 904,910 ----
    LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
    LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
    LOOP_VINFO_VECTORIZABLE_P (res) = 0;
!   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
    LOOP_VINFO_VECT_FACTOR (res) = 0;
    LOOP_VINFO_LOOP_NEST (res).create (3);
    LOOP_VINFO_DATAREFS (res).create (10);
*************** new_loop_vec_info (struct loop *loop)
*** 924,929 ****
--- 921,927 ----
    LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
    LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
    LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
+   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
    LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
  
    return res;
*************** vect_analyze_loop_form (struct loop *loo
*** 1091,1102 ****
          }
  
        if (empty_block_p (loop->header))
!     {
!           if (dump_enabled_p ())
!             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  			     "not vectorized: empty loop.\n");
!       return NULL;
!     }
      }
    else
      {
--- 1089,1100 ----
          }
  
        if (empty_block_p (loop->header))
! 	{
! 	  if (dump_enabled_p ())
! 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  			     "not vectorized: empty loop.\n");
! 	  return NULL;
! 	}
      }
    else
      {
*************** vect_analyze_loop_form (struct loop *loo
*** 1243,1249 ****
        return NULL;
      }
  
!   if (!number_of_iterations)
      {
        if (dump_enabled_p ())
  	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--- 1241,1248 ----
        return NULL;
      }
  
!   if (!number_of_iterations
!       || chrec_contains_undetermined (number_of_iterations))
      {
        if (dump_enabled_p ())
  	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
*************** vect_analyze_loop_form (struct loop *loo
*** 1254,1270 ****
        return NULL;
      }
  
!   if (chrec_contains_undetermined (number_of_iterations))
      {
        if (dump_enabled_p ())
! 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
! 			     "Infinite number of iterations.\n");
        if (inner_loop_vinfo)
! 	destroy_loop_vec_info (inner_loop_vinfo, true);
        return NULL;
      }
  
!   if (!NITERS_KNOWN_P (number_of_iterations))
      {
        if (dump_enabled_p ())
          {
--- 1253,1273 ----
        return NULL;
      }
  
!   if (integer_zerop (number_of_iterations))
      {
        if (dump_enabled_p ())
! 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
! 			 "not vectorized: number of iterations = 0.\n");
        if (inner_loop_vinfo)
!         destroy_loop_vec_info (inner_loop_vinfo, true);
        return NULL;
      }
  
!   loop_vinfo = new_loop_vec_info (loop);
!   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
!   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
! 
!   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
      {
        if (dump_enabled_p ())
          {
*************** vect_analyze_loop_form (struct loop *loo
*** 1274,1292 ****
            dump_printf (MSG_NOTE, "\n");
          }
      }
-   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
-     {
-       if (dump_enabled_p ())
- 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			 "not vectorized: number of iterations = 0.\n");
-       if (inner_loop_vinfo)
-         destroy_loop_vec_info (inner_loop_vinfo, true);
-       return NULL;
-     }
- 
-   loop_vinfo = new_loop_vec_info (loop);
-   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
-   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
  
    STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
  
--- 1277,1282 ----
*************** vect_analyze_loop_operations (loop_vec_i
*** 1588,1610 ****
        return false;
      }
  
-   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
-       || ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
- 	  < exact_log2 (vectorization_factor)))
-     {
-       if (dump_enabled_p ())
-         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
-       if (!vect_can_advance_ivs_p (loop_vinfo)
- 	  || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
-         {
-           if (dump_enabled_p ())
- 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			     "not vectorized: can't create required "
- 			     "epilog loop\n");
-           return false;
-         }
-     }
- 
    return true;
  }
  
--- 1578,1583 ----
*************** vect_analyze_loop_2 (loop_vec_info loop_
*** 1760,1765 ****
--- 1733,1772 ----
        return false;
      }
  
+   /* Decide whether we need to create an epilogue loop to handle
+      remaining scalar iterations.  */
+   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+     {
+       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
+ 		   - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ 	  < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+ 	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+     }
+   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ 	   || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+ 	       < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
+     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+ 
+   /* If an epilogue loop is required make sure we can create one.  */
+   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
+     {
+       if (dump_enabled_p ())
+         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
+       if (!vect_can_advance_ivs_p (loop_vinfo)
+ 	  || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+ 					   single_exit (LOOP_VINFO_LOOP
+ 							 (loop_vinfo))))
+         {
+           if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 			     "not vectorized: can't create required "
+ 			     "epilog loop\n");
+           return false;
+         }
+     }
+ 
    return true;
  }
  
*************** vect_estimate_min_profitable_iters (loop
*** 2689,2695 ****
    int scalar_single_iter_cost = 0;
    int scalar_outside_cost = 0;
    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
!   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
    void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
  
    /* Cost model disabled.  */
--- 2696,2702 ----
    int scalar_single_iter_cost = 0;
    int scalar_outside_cost = 0;
    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
!   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
  
    /* Cost model disabled.  */
*************** vect_estimate_min_profitable_iters (loop
*** 2880,2886 ****
        else
  	{
  	  /* Cost model check occurs at prologue generation.  */
! 	  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
  	    scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
  	      + vect_get_stmt_cost (cond_branch_not_taken); 
  	  /* Cost model check occurs at epilogue generation.  */
--- 2887,2893 ----
        else
  	{
  	  /* Cost model check occurs at prologue generation.  */
! 	  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
  	    scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
  	      + vect_get_stmt_cost (cond_branch_not_taken); 
  	  /* Cost model check occurs at epilogue generation.  */
*************** vect_loop_kill_debug_uses (struct loop *
*** 5574,5620 ****
  
  
  /* This function builds ni_name = number of iterations.  Statements
!    are queued onto SEQ.  */
  
  static tree
! vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq *seq)
  {
-   tree ni_name, var;
-   gimple_seq stmts = NULL;
    tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
  
!   var = create_tmp_var (TREE_TYPE (ni), "niters");
!   ni_name = force_gimple_operand (ni, &stmts, false, var);
! 
!   if (stmts)
!     gimple_seq_add_seq (seq, stmts);
  
!   return ni_name;
  }
  
  
  /* This function generates the following statements:
  
!  ni_name = number of iterations loop executes
!  ratio = ni_name / vf
!  ratio_mult_vf_name = ratio * vf
  
!  and places them in COND_EXPR_STMT_LIST.  */
  
  static void
  vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
  				 tree ni_name,
  				 tree *ratio_mult_vf_name_ptr,
! 				 tree *ratio_name_ptr,
! 				 gimple_seq *cond_expr_stmt_list)
  {
-   gimple_seq stmts;
    tree ni_minus_gap_name;
    tree var;
    tree ratio_name;
    tree ratio_mult_vf_name;
    tree ni = LOOP_VINFO_NITERS (loop_vinfo);
    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    tree log_vf;
  
    log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
--- 5581,5631 ----
  
  
  /* This function builds ni_name = number of iterations.  Statements
!    are emitted on the loop preheader edge.  */
  
  static tree
! vect_build_loop_niters (loop_vec_info loop_vinfo)
  {
    tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
+   if (TREE_CODE (ni) == INTEGER_CST)
+     return ni;
+   else
+     {
+       tree ni_name, var;
+       gimple_seq stmts = NULL;
+       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
  
!       var = create_tmp_var (TREE_TYPE (ni), "niters");
!       ni_name = force_gimple_operand (ni, &stmts, false, var);
!       if (stmts)
! 	gsi_insert_seq_on_edge_immediate (pe, stmts);
  
!       return ni_name;
!     }
  }
  
  
  /* This function generates the following statements:
  
!    ni_name = number of iterations loop executes
!    ratio = ni_name / vf
!    ratio_mult_vf_name = ratio * vf
  
!    and places them on the loop preheader edge.  */
  
  static void
  vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
  				 tree ni_name,
  				 tree *ratio_mult_vf_name_ptr,
! 				 tree *ratio_name_ptr)
  {
    tree ni_minus_gap_name;
    tree var;
    tree ratio_name;
    tree ratio_mult_vf_name;
    tree ni = LOOP_VINFO_NITERS (loop_vinfo);
    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    tree log_vf;
  
    log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
*************** vect_generate_tmps_on_preheader (loop_ve
*** 5630,5640 ****
        if (!is_gimple_val (ni_minus_gap_name))
  	{
  	  var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
! 
!           stmts = NULL;
            ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
  						    true, var);
! 	  gimple_seq_add_seq (cond_expr_stmt_list, stmts);
          }
      }
    else
--- 5641,5650 ----
        if (!is_gimple_val (ni_minus_gap_name))
  	{
  	  var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
!           gimple stmts = NULL;
            ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
  						    true, var);
! 	  gsi_insert_seq_on_edge_immediate (pe, stmts);
          }
      }
    else
*************** vect_generate_tmps_on_preheader (loop_ve
*** 5647,5656 ****
    if (!is_gimple_val (ratio_name))
      {
        var = create_tmp_var (TREE_TYPE (ni), "bnd");
! 
!       stmts = NULL;
        ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
!       gimple_seq_add_seq (cond_expr_stmt_list, stmts);
      }
    *ratio_name_ptr = ratio_name;
  
--- 5657,5665 ----
    if (!is_gimple_val (ratio_name))
      {
        var = create_tmp_var (TREE_TYPE (ni), "bnd");
!       gimple stmts = NULL;
        ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
!       gsi_insert_seq_on_edge_immediate (pe, stmts);
      }
    *ratio_name_ptr = ratio_name;
  
*************** vect_generate_tmps_on_preheader (loop_ve
*** 5663,5673 ****
        if (!is_gimple_val (ratio_mult_vf_name))
  	{
  	  var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
! 
! 	  stmts = NULL;
  	  ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
  						     true, var);
! 	  gimple_seq_add_seq (cond_expr_stmt_list, stmts);
  	}
        *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
      }
--- 5672,5681 ----
        if (!is_gimple_val (ratio_mult_vf_name))
  	{
  	  var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
! 	  gimple stmts = NULL;
  	  ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
  						     true, var);
! 	  gsi_insert_seq_on_edge_immediate (pe, stmts);
  	}
        *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
      }
*************** vect_transform_loop (loop_vec_info loop_
*** 5739,5758 ****
        check_profitability = false;
      }
  
    /* Peel the loop if there are data refs with unknown alignment.
!      Only one data ref with unknown store is allowed.
!      This clobbers LOOP_VINFO_NITERS but retains the original
!      in LOOP_VINFO_NITERS_UNCHANGED.  So we cannot avoid re-computing
!      niters.  */
  
!   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
      {
-       gimple_seq stmts = NULL;
-       tree ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
-       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
        vect_do_peeling_for_alignment (loop_vinfo, ni_name,
  				     th, check_profitability);
        check_profitability = false;
      }
  
    /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
--- 5747,5766 ----
        check_profitability = false;
      }
  
+   tree ni_name = vect_build_loop_niters (loop_vinfo);
+   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
+ 
    /* Peel the loop if there are data refs with unknown alignment.
!      Only one data ref with unknown store is allowed.  */
  
!   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
      {
        vect_do_peeling_for_alignment (loop_vinfo, ni_name,
  				     th, check_profitability);
        check_profitability = false;
+       /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
+ 	 be re-computed.  */
+       ni_name = NULL_TREE;
      }
  
    /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
*************** vect_transform_loop (loop_vec_info loop_
*** 5763,5778 ****
       will remain scalar and will compute the remaining (n%VF) iterations.
       (VF is the vectorization factor).  */
  
!   if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
!       < exact_log2 (vectorization_factor)
        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
      {
!       tree ni_name, ratio_mult_vf;
!       gimple_seq stmts = NULL;
!       ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
        vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
! 				       &ratio, &stmts);
!       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
        vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
  				      th, check_profitability);
      }
--- 5771,5784 ----
       will remain scalar and will compute the remaining (n%VF) iterations.
       (VF is the vectorization factor).  */
  
!   if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
      {
!       tree ratio_mult_vf;
!       if (!ni_name)
! 	ni_name = vect_build_loop_niters (loop_vinfo);
        vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
! 				       &ratio);
        vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
  				      th, check_profitability);
      }
*************** vect_transform_loop (loop_vec_info loop_
*** 5781,5792 ****
  		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
    else
      {
!       tree ni_name;
!       gimple_seq stmts = NULL;
!       ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
!       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL,
! 				       &ratio, &stmts);
!       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
      }
  
    /* 1) Make sure the loop header has exactly two entries
--- 5787,5795 ----
  		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
    else
      {
!       if (!ni_name)
! 	ni_name = vect_build_loop_niters (loop_vinfo);
!       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
      }
  
    /* 1) Make sure the loop header has exactly two entries


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]