This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][2/3] Fix PR59058
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 21 Nov 2013 15:06:12 +0100 (CET)
- Subject: [PATCH][2/3] Fix PR59058
- Authentication-results: sourceware.org; auth=none
This removes the broken function from tree-scalar-evolution.c and
re-implements it inside the now single user (but unfixed). It
also re-shuffles the vectorizer niter code some more to make
the final fix (use # of latch executions throughout) more easy.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.
Richard.
2013-11-21 Richard Biener <rguenther@suse.de>
PR tree-optimization/59058
* tree-scalar-evolution.h (number_of_exit_cond_executions): Remove.
* tree-scalar-evolution.c (number_of_exit_cond_executions): Likewise.
* tree-vectorizer.h (LOOP_PEELING_FOR_ALIGNMENT): Rename to ...
(LOOP_VINFO_PEELING_FOR_ALIGNMENT): ... this.
(NITERS_KNOWN_P): Fold into ...
(LOOP_VINFO_NITERS_KNOWN_P): ... this.
(LOOP_VINFO_PEELING_FOR_NITER): Add.
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop):
Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
(vect_do_peeling_for_alignment): Re-use precomputed niter
instead of re-emitting it.
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
* tree-vect-loop.c (vect_get_loop_niters): Use
number_of_latch_executions.
(new_loop_vec_info): Initialize LOOP_VINFO_PEELING_FOR_NITER.
(vect_analyze_loop_form): Simplify.
(vect_analyze_loop_operations): Move epilogue peeling code ...
(vect_analyze_loop_2): ... here and adjust it to compute
LOOP_VINFO_PEELING_FOR_NITER.
(vect_estimate_min_profitable_iters): Use
LOOP_VINFO_PEELING_FOR_ALIGNMENT.
(vect_build_loop_niters): Emit on the preheader.
(vect_generate_tmps_on_preheader): Likewise.
(vect_transform_loop): Use LOOP_VINFO_PEELING_FOR_NITER instead
of recomputing it. Adjust.
Index: gcc/tree-vect-loop-manip.c
===================================================================
*** gcc/tree-vect-loop-manip.c.orig 2013-11-21 14:58:43.061653802 +0100
--- gcc/tree-vect-loop-manip.c 2013-11-21 14:58:51.151747654 +0100
*************** vect_gen_niters_for_prolog_loop (loop_ve
*** 1736,1751 ****
pe = loop_preheader_edge (loop);
! if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
! int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"known peeling = %d.\n", npeel);
iters = build_int_cst (niters_type, npeel);
! *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
}
else
{
--- 1736,1751 ----
pe = loop_preheader_edge (loop);
! if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
! int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"known peeling = %d.\n", npeel);
iters = build_int_cst (niters_type, npeel);
! *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
}
else
{
*************** vect_do_peeling_for_alignment (loop_vec_
*** 1876,1882 ****
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree niters_of_prolog_loop;
- tree n_iters;
tree wide_prolog_niters;
struct loop *new_loop;
int max_iter;
--- 1876,1881 ----
*************** vect_do_peeling_for_alignment (loop_vec_
*** 1918,1926 ****
"loop to %d\n", max_iter);
/* Update number of times loop executes. */
- n_iters = LOOP_VINFO_NITERS (loop_vinfo);
LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
! TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
wide_prolog_niters = niters_of_prolog_loop;
--- 1917,1924 ----
"loop to %d\n", max_iter);
/* Update number of times loop executes. */
LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
! TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
wide_prolog_niters = niters_of_prolog_loop;
Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h.orig 2013-11-21 14:58:43.062653811 +0100
--- gcc/tree-vectorizer.h 2013-11-21 14:58:51.153747678 +0100
*************** typedef struct _loop_vec_info {
*** 361,367 ****
#define LOOP_VINFO_DATAREFS(L) (L)->datarefs
#define LOOP_VINFO_DDRS(L) (L)->ddrs
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
! #define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
#define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
#define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
--- 361,367 ----
#define LOOP_VINFO_DATAREFS(L) (L)->datarefs
#define LOOP_VINFO_DDRS(L) (L)->ddrs
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
! #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
#define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
#define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
*************** typedef struct _loop_vec_info {
*** 375,392 ****
#define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
#define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
! (L)->may_misalign_stmts.length () > 0
#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
! (L)->may_alias_ddrs.length () > 0
!
! #define NITERS_KNOWN_P(n) \
! (tree_fits_shwi_p ((n)) \
! && tree_to_shwi ((n)) > 0)
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
! NITERS_KNOWN_P ((L)->num_iters)
static inline loop_vec_info
loop_vec_info_for_loop (struct loop *loop)
--- 375,389 ----
#define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
#define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped
+ #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
! (L)->may_misalign_stmts.length () > 0
#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
! (L)->may_alias_ddrs.length () > 0
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
! (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
static inline loop_vec_info
loop_vec_info_for_loop (struct loop *loop)
Index: gcc/tree-scalar-evolution.c
===================================================================
*** gcc/tree-scalar-evolution.c.orig 2013-11-21 14:48:17.710397038 +0100
--- gcc/tree-scalar-evolution.c 2013-11-21 14:58:51.148747619 +0100
*************** number_of_latch_executions (struct loop
*** 2910,2943 ****
loop->nb_iterations = res;
return res;
}
-
- /* Returns the number of executions of the exit condition of LOOP,
- i.e., the number by one higher than number_of_latch_executions.
- Note that unlike number_of_latch_executions, this number does
- not necessarily fit in the unsigned variant of the type of
- the control variable -- if the number of iterations is a constant,
- we return chrec_dont_know if adding one to number_of_latch_executions
- overflows; however, in case the number of iterations is symbolic
- expression, the caller is responsible for dealing with this
- the possible overflow. */
-
- tree
- number_of_exit_cond_executions (struct loop *loop)
- {
- tree ret = number_of_latch_executions (loop);
- tree type = chrec_type (ret);
-
- if (chrec_contains_undetermined (ret))
- return ret;
-
- ret = chrec_fold_plus (type, ret, build_int_cst (type, 1));
- if (TREE_CODE (ret) == INTEGER_CST
- && TREE_OVERFLOW (ret))
- return chrec_dont_know;
-
- return ret;
- }
-
/* Counters for the stats. */
--- 2910,2915 ----
Index: gcc/tree-scalar-evolution.h
===================================================================
*** gcc/tree-scalar-evolution.h.orig 2013-11-21 14:48:17.709397027 +0100
--- gcc/tree-scalar-evolution.h 2013-11-21 14:58:51.148747619 +0100
*************** along with GCC; see the file COPYING3.
*** 22,28 ****
#define GCC_TREE_SCALAR_EVOLUTION_H
extern tree number_of_latch_executions (struct loop *);
- extern tree number_of_exit_cond_executions (struct loop *);
extern gimple get_loop_exit_condition (const struct loop *);
extern void scev_initialize (void);
--- 22,27 ----
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c.orig 2013-11-21 14:48:17.709397027 +0100
--- gcc/tree-vect-data-refs.c 2013-11-21 14:58:51.150747642 +0100
*************** vect_enhance_data_refs_alignment (loop_v
*** 1735,1743 ****
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
if (npeel)
! LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
else
! LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
SET_DR_MISALIGNMENT (dr0, 0);
if (dump_enabled_p ())
{
--- 1735,1744 ----
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
if (npeel)
! LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
else
! LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
! = DR_MISALIGNMENT (dr0);
SET_DR_MISALIGNMENT (dr0, 0);
if (dump_enabled_p ())
{
Index: gcc/tree-vect-loop.c
===================================================================
*** gcc/tree-vect-loop.c.orig 2013-11-21 14:48:17.709397027 +0100
--- gcc/tree-vect-loop.c 2013-11-21 14:58:51.152747666 +0100
*************** vect_analyze_scalar_cycles (loop_vec_inf
*** 771,781 ****
vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
}
/* Function vect_get_loop_niters.
! Determine how many iterations the loop is executed.
! If an expression that represents the number of iterations
! can be constructed, place it in NUMBER_OF_ITERATIONS.
Return the loop exit condition. */
static gimple
--- 771,782 ----
vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
}
+
/* Function vect_get_loop_niters.
! Determine how many iterations the loop is executed and place it
! in NUMBER_OF_ITERATIONS.
!
Return the loop exit condition. */
static gimple
*************** vect_get_loop_niters (struct loop *loop,
*** 786,805 ****
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== get_loop_niters ===\n");
- niters = number_of_exit_cond_executions (loop);
! if (niters != NULL_TREE
! && niters != chrec_dont_know)
! {
! *number_of_iterations = niters;
!
! if (dump_enabled_p ())
! {
! dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
! dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
! dump_printf (MSG_NOTE, "\n");
! }
! }
return get_loop_exit_condition (loop);
}
--- 787,802 ----
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== get_loop_niters ===\n");
! niters = number_of_latch_executions (loop);
! /* We want the number of loop header executions which is the number
! of latch executions plus one.
! ??? For UINT_MAX latch executions this number overflows to zero
! for loops like do { n++; } while (n != 0); */
! if (niters && !chrec_contains_undetermined (niters))
! niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters,
! build_int_cst (TREE_TYPE (niters), 1));
! *number_of_iterations = niters;
return get_loop_exit_condition (loop);
}
*************** new_loop_vec_info (struct loop *loop)
*** 907,913 ****
LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
! LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
LOOP_VINFO_VECT_FACTOR (res) = 0;
LOOP_VINFO_LOOP_NEST (res).create (3);
LOOP_VINFO_DATAREFS (res).create (10);
--- 904,910 ----
LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
! LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
LOOP_VINFO_VECT_FACTOR (res) = 0;
LOOP_VINFO_LOOP_NEST (res).create (3);
LOOP_VINFO_DATAREFS (res).create (10);
*************** new_loop_vec_info (struct loop *loop)
*** 924,929 ****
--- 921,927 ----
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
+ LOOP_VINFO_PEELING_FOR_NITER (res) = false;
LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
return res;
*************** vect_analyze_loop_form (struct loop *loo
*** 1091,1102 ****
}
if (empty_block_p (loop->header))
! {
! if (dump_enabled_p ())
! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: empty loop.\n");
! return NULL;
! }
}
else
{
--- 1089,1100 ----
}
if (empty_block_p (loop->header))
! {
! if (dump_enabled_p ())
! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: empty loop.\n");
! return NULL;
! }
}
else
{
*************** vect_analyze_loop_form (struct loop *loo
*** 1243,1249 ****
return NULL;
}
! if (!number_of_iterations)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--- 1241,1248 ----
return NULL;
}
! if (!number_of_iterations
! || chrec_contains_undetermined (number_of_iterations))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
*************** vect_analyze_loop_form (struct loop *loo
*** 1254,1270 ****
return NULL;
}
! if (chrec_contains_undetermined (number_of_iterations))
{
if (dump_enabled_p ())
! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
! "Infinite number of iterations.\n");
if (inner_loop_vinfo)
! destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
! if (!NITERS_KNOWN_P (number_of_iterations))
{
if (dump_enabled_p ())
{
--- 1253,1273 ----
return NULL;
}
! if (integer_zerop (number_of_iterations))
{
if (dump_enabled_p ())
! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
! "not vectorized: number of iterations = 0.\n");
if (inner_loop_vinfo)
! destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
! loop_vinfo = new_loop_vec_info (loop);
! LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
! LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
!
! if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
if (dump_enabled_p ())
{
*************** vect_analyze_loop_form (struct loop *loo
*** 1274,1292 ****
dump_printf (MSG_NOTE, "\n");
}
}
- else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: number of iterations = 0.\n");
- if (inner_loop_vinfo)
- destroy_loop_vec_info (inner_loop_vinfo, true);
- return NULL;
- }
-
- loop_vinfo = new_loop_vec_info (loop);
- LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
- LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
--- 1277,1282 ----
*************** vect_analyze_loop_operations (loop_vec_i
*** 1588,1610 ****
return false;
}
- if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- || ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
- < exact_log2 (vectorization_factor)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
- if (!vect_can_advance_ivs_p (loop_vinfo)
- || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: can't create required "
- "epilog loop\n");
- return false;
- }
- }
-
return true;
}
--- 1578,1583 ----
*************** vect_analyze_loop_2 (loop_vec_info loop_
*** 1760,1765 ****
--- 1733,1772 ----
return false;
}
+ /* Decide whether we need to create an epilogue loop to handle
+ remaining scalar iterations. */
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+ {
+ if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
+ - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+ }
+ else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+ < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+
+ /* If an epilogue loop is required make sure we can create one. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
+ if (!vect_can_advance_ivs_p (loop_vinfo)
+ || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+ single_exit (LOOP_VINFO_LOOP
+ (loop_vinfo))))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: can't create required "
+ "epilog loop\n");
+ return false;
+ }
+ }
+
return true;
}
*************** vect_estimate_min_profitable_iters (loop
*** 2689,2695 ****
int scalar_single_iter_cost = 0;
int scalar_outside_cost = 0;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
! int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */
--- 2696,2702 ----
int scalar_single_iter_cost = 0;
int scalar_outside_cost = 0;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
! int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */
*************** vect_estimate_min_profitable_iters (loop
*** 2880,2886 ****
else
{
/* Cost model check occurs at prologue generation. */
! if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
+ vect_get_stmt_cost (cond_branch_not_taken);
/* Cost model check occurs at epilogue generation. */
--- 2887,2893 ----
else
{
/* Cost model check occurs at prologue generation. */
! if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
+ vect_get_stmt_cost (cond_branch_not_taken);
/* Cost model check occurs at epilogue generation. */
*************** vect_loop_kill_debug_uses (struct loop *
*** 5574,5620 ****
/* This function builds ni_name = number of iterations. Statements
! are queued onto SEQ. */
static tree
! vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq *seq)
{
- tree ni_name, var;
- gimple_seq stmts = NULL;
tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
! var = create_tmp_var (TREE_TYPE (ni), "niters");
! ni_name = force_gimple_operand (ni, &stmts, false, var);
!
! if (stmts)
! gimple_seq_add_seq (seq, stmts);
! return ni_name;
}
/* This function generates the following statements:
! ni_name = number of iterations loop executes
! ratio = ni_name / vf
! ratio_mult_vf_name = ratio * vf
! and places them in COND_EXPR_STMT_LIST. */
static void
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
tree ni_name,
tree *ratio_mult_vf_name_ptr,
! tree *ratio_name_ptr,
! gimple_seq *cond_expr_stmt_list)
{
- gimple_seq stmts;
tree ni_minus_gap_name;
tree var;
tree ratio_name;
tree ratio_mult_vf_name;
tree ni = LOOP_VINFO_NITERS (loop_vinfo);
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
tree log_vf;
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
--- 5581,5631 ----
/* This function builds ni_name = number of iterations. Statements
! are emitted on the loop preheader edge. */
static tree
! vect_build_loop_niters (loop_vec_info loop_vinfo)
{
tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
+ if (TREE_CODE (ni) == INTEGER_CST)
+ return ni;
+ else
+ {
+ tree ni_name, var;
+ gimple_seq stmts = NULL;
+ edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
! var = create_tmp_var (TREE_TYPE (ni), "niters");
! ni_name = force_gimple_operand (ni, &stmts, false, var);
! if (stmts)
! gsi_insert_seq_on_edge_immediate (pe, stmts);
! return ni_name;
! }
}
/* This function generates the following statements:
! ni_name = number of iterations loop executes
! ratio = ni_name / vf
! ratio_mult_vf_name = ratio * vf
! and places them on the loop preheader edge. */
static void
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
tree ni_name,
tree *ratio_mult_vf_name_ptr,
! tree *ratio_name_ptr)
{
tree ni_minus_gap_name;
tree var;
tree ratio_name;
tree ratio_mult_vf_name;
tree ni = LOOP_VINFO_NITERS (loop_vinfo);
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
tree log_vf;
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
*************** vect_generate_tmps_on_preheader (loop_ve
*** 5630,5640 ****
if (!is_gimple_val (ni_minus_gap_name))
{
var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
!
! stmts = NULL;
ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
true, var);
! gimple_seq_add_seq (cond_expr_stmt_list, stmts);
}
}
else
--- 5641,5650 ----
if (!is_gimple_val (ni_minus_gap_name))
{
var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
! gimple stmts = NULL;
ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
true, var);
! gsi_insert_seq_on_edge_immediate (pe, stmts);
}
}
else
*************** vect_generate_tmps_on_preheader (loop_ve
*** 5647,5656 ****
if (!is_gimple_val (ratio_name))
{
var = create_tmp_var (TREE_TYPE (ni), "bnd");
!
! stmts = NULL;
ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
! gimple_seq_add_seq (cond_expr_stmt_list, stmts);
}
*ratio_name_ptr = ratio_name;
--- 5657,5665 ----
if (!is_gimple_val (ratio_name))
{
var = create_tmp_var (TREE_TYPE (ni), "bnd");
! gimple stmts = NULL;
ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
! gsi_insert_seq_on_edge_immediate (pe, stmts);
}
*ratio_name_ptr = ratio_name;
*************** vect_generate_tmps_on_preheader (loop_ve
*** 5663,5673 ****
if (!is_gimple_val (ratio_mult_vf_name))
{
var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
!
! stmts = NULL;
ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
true, var);
! gimple_seq_add_seq (cond_expr_stmt_list, stmts);
}
*ratio_mult_vf_name_ptr = ratio_mult_vf_name;
}
--- 5672,5681 ----
if (!is_gimple_val (ratio_mult_vf_name))
{
var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
! gimple stmts = NULL;
ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
true, var);
! gsi_insert_seq_on_edge_immediate (pe, stmts);
}
*ratio_mult_vf_name_ptr = ratio_mult_vf_name;
}
*************** vect_transform_loop (loop_vec_info loop_
*** 5739,5758 ****
check_profitability = false;
}
/* Peel the loop if there are data refs with unknown alignment.
! Only one data ref with unknown store is allowed.
! This clobbers LOOP_VINFO_NITERS but retains the original
! in LOOP_VINFO_NITERS_UNCHANGED. So we cannot avoid re-computing
! niters. */
! if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
{
- gimple_seq stmts = NULL;
- tree ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
- gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
vect_do_peeling_for_alignment (loop_vinfo, ni_name,
th, check_profitability);
check_profitability = false;
}
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
--- 5747,5766 ----
check_profitability = false;
}
+ tree ni_name = vect_build_loop_niters (loop_vinfo);
+ LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
+
/* Peel the loop if there are data refs with unknown alignment.
! Only one data ref with unknown store is allowed. */
! if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
{
vect_do_peeling_for_alignment (loop_vinfo, ni_name,
th, check_profitability);
check_profitability = false;
+ /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
+ be re-computed. */
+ ni_name = NULL_TREE;
}
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
*************** vect_transform_loop (loop_vec_info loop_
*** 5763,5778 ****
will remain scalar and will compute the remaining (n%VF) iterations.
(VF is the vectorization factor). */
! if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
! < exact_log2 (vectorization_factor)
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
{
! tree ni_name, ratio_mult_vf;
! gimple_seq stmts = NULL;
! ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
! &ratio, &stmts);
! gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
th, check_profitability);
}
--- 5771,5784 ----
will remain scalar and will compute the remaining (n%VF) iterations.
(VF is the vectorization factor). */
! if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
{
! tree ratio_mult_vf;
! if (!ni_name)
! ni_name = vect_build_loop_niters (loop_vinfo);
vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
! &ratio);
vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
th, check_profitability);
}
*************** vect_transform_loop (loop_vec_info loop_
*** 5781,5792 ****
LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
else
{
! tree ni_name;
! gimple_seq stmts = NULL;
! ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
! vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL,
! &ratio, &stmts);
! gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
}
/* 1) Make sure the loop header has exactly two entries
--- 5787,5795 ----
LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
else
{
! if (!ni_name)
! ni_name = vect_build_loop_niters (loop_vinfo);
! vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
}
/* 1) Make sure the loop header has exactly two entries