This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Avoid versioning the vectorized loop if possible
- From: Richard Guenther <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Cc: irar at il dot ibm dot com
- Date: Tue, 31 Mar 2009 17:48:06 +0200 (CEST)
- Subject: [PATCH] Avoid versioning the vectorized loop if possible
This is part #1 of the merge of versioning for strides. It eliminates
the need to produce a versioned copy in some cases when we can just use
the loop for the remaining iterations.
Bootstrapped and tested on x86_64-unknown-linux-gnu. Ira, does this
look ok?
Thanks,
Richard.
2009-03-31 Richard Guenther <rguenther@suse.de>
* Makefile.in (tree-ssa-copy.o): Add $(CFGLOOP_H) dependency.
* tree-ssa-copy.c (init_copy_prop): Do not propagate through
single-argument PHIs if we are in loop-closed SSA form.
* tree-vect-loop-manip.c (slpeel_add_loop_guard): Pass extra guards
for the pre-condition.
(slpeel_tree_peel_loop_to_edge): Likewise.
(vect_build_loop_niters): Take an optional sequence to append stmts.
(vect_generate_tmps_on_preheader): Likewise.
(vect_do_peeling_for_loop_bound): Take extra guards for the
pre-condition.
(vect_do_peeling_for_alignment): Adjust.
(vect_loop_versioning): Take stmt and stmt list to put pre-condition
guards if we are going to peel. Do not apply versioning in that
case.
* tree-vectorizer.h (vect_loop_versioning): Adjust declaration.
(vect_do_peeling_for_loop_bound): Likewise.
* tree-vect-loop.c (vect_transform_loop): If we are peeling for
loop bound only record extra pre-conditions, do not apply loop
versioning.
Index: trunk/gcc/tree-vect-loop-manip.c
===================================================================
*** trunk.orig/gcc/tree-vect-loop-manip.c 2009-03-31 16:26:46.000000000 +0200
--- trunk/gcc/tree-vect-loop-manip.c 2009-03-31 17:32:58.000000000 +0200
*************** slpeel_tree_duplicate_loop_to_edge_cfg (
*** 792,802 ****
/* Given the condition statement COND, put it as the last statement
of GUARD_BB; EXIT_BB is the basic block to skip the loop;
Assumes that this is the single exit of the guarded loop.
! Returns the skip edge. */
static edge
! slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
! basic_block dom_bb)
{
gimple_stmt_iterator gsi;
edge new_e, enter_e;
--- 792,803 ----
/* Given the condition statement COND, put it as the last statement
of GUARD_BB; EXIT_BB is the basic block to skip the loop;
Assumes that this is the single exit of the guarded loop.
! Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
static edge
! slpeel_add_loop_guard (basic_block guard_bb, tree cond,
! gimple_seq cond_expr_stmt_list,
! basic_block exit_bb, basic_block dom_bb)
{
gimple_stmt_iterator gsi;
edge new_e, enter_e;
*************** slpeel_add_loop_guard (basic_block guard
*** 809,819 ****
gsi = gsi_last_bb (guard_bb);
cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
cond_stmt = gimple_build_cond (NE_EXPR,
cond, build_int_cst (TREE_TYPE (cond), 0),
NULL_TREE, NULL_TREE);
! if (gimplify_stmt_list)
! gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
gsi = gsi_last_bb (guard_bb);
gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
--- 810,822 ----
gsi = gsi_last_bb (guard_bb);
cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
+ if (gimplify_stmt_list)
+ gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
cond_stmt = gimple_build_cond (NE_EXPR,
cond, build_int_cst (TREE_TYPE (cond), 0),
NULL_TREE, NULL_TREE);
! if (cond_expr_stmt_list)
! gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
gsi = gsi_last_bb (guard_bb);
gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
*************** set_prologue_iterations (basic_block bb_
*** 1011,1016 ****
--- 1014,1023 ----
The second guard is:
if (FIRST_NITERS == NITERS) then skip the second loop.
+ If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
+ then the generated condition is combined with COND_EXPR and the
+ statements in COND_EXPR_STMT_LIST are emitted together with it.
+
FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
FORNOW the resulting code will not be in loop-closed-ssa form.
*/
*************** static struct loop*
*** 1019,1025 ****
slpeel_tree_peel_loop_to_edge (struct loop *loop,
edge e, tree first_niters,
tree niters, bool update_first_loop_count,
! unsigned int th, bool check_profitability)
{
struct loop *new_loop = NULL, *first_loop, *second_loop;
edge skip_e;
--- 1026,1033 ----
slpeel_tree_peel_loop_to_edge (struct loop *loop,
edge e, tree first_niters,
tree niters, bool update_first_loop_count,
! unsigned int th, bool check_profitability,
! tree cond_expr, gimple_seq cond_expr_stmt_list)
{
struct loop *new_loop = NULL, *first_loop, *second_loop;
edge skip_e;
*************** slpeel_tree_peel_loop_to_edge (struct lo
*** 1193,1198 ****
--- 1201,1214 ----
pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
cost_pre_condition, pre_condition);
}
+ if (cond_expr)
+ {
+ pre_condition =
+ fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+ pre_condition,
+ fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
+ cond_expr));
+ }
}
/* Prologue peeling. */
*************** slpeel_tree_peel_loop_to_edge (struct lo
*** 1208,1213 ****
--- 1224,1230 ----
}
skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
+ cond_expr_stmt_list,
bb_before_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
first_loop == new_loop,
*************** slpeel_tree_peel_loop_to_edge (struct lo
*** 1245,1251 ****
pre_condition =
fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
! skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
bb_after_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
second_loop == new_loop, &new_exit_bb);
--- 1262,1268 ----
pre_condition =
fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
! skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
bb_after_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
second_loop == new_loop, &new_exit_bb);
*************** find_loop_location (struct loop *loop)
*** 1303,1312 ****
/* This function builds ni_name = number of iterations loop executes
! on the loop preheader. */
static tree
! vect_build_loop_niters (loop_vec_info loop_vinfo)
{
tree ni_name, var;
gimple_seq stmts = NULL;
--- 1320,1330 ----
/* This function builds ni_name = number of iterations loop executes
! on the loop preheader. If SEQ is given the stmt is instead emitted
! there. */
static tree
! vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
{
tree ni_name, var;
gimple_seq stmts = NULL;
*************** vect_build_loop_niters (loop_vec_info lo
*** 1321,1328 ****
pe = loop_preheader_edge (loop);
if (stmts)
{
! basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
! gcc_assert (!new_bb);
}
return ni_name;
--- 1339,1351 ----
pe = loop_preheader_edge (loop);
if (stmts)
{
! if (seq)
! gimple_seq_add_seq (&seq, stmts);
! else
! {
! basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
! gcc_assert (!new_bb);
! }
}
return ni_name;
*************** vect_build_loop_niters (loop_vec_info lo
*** 1335,1347 ****
ratio = ni_name / vf
ratio_mult_vf_name = ratio * vf
! and places them at the loop preheader edge. */
static void
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
tree *ni_name_ptr,
tree *ratio_mult_vf_name_ptr,
! tree *ratio_name_ptr)
{
edge pe;
--- 1358,1372 ----
ratio = ni_name / vf
ratio_mult_vf_name = ratio * vf
! and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
! if that is non-NULL. */
static void
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
tree *ni_name_ptr,
tree *ratio_mult_vf_name_ptr,
! tree *ratio_name_ptr,
! gimple_seq cond_expr_stmt_list)
{
edge pe;
*************** vect_generate_tmps_on_preheader (loop_ve
*** 1361,1367 ****
/* Generate temporary variable that contains
number of iterations loop executes. */
! ni_name = vect_build_loop_niters (loop_vinfo);
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
/* Create: ratio = ni >> log2(vf) */
--- 1386,1392 ----
/* Generate temporary variable that contains
number of iterations loop executes. */
! ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
/* Create: ratio = ni >> log2(vf) */
*************** vect_generate_tmps_on_preheader (loop_ve
*** 1374,1382 ****
stmts = NULL;
ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
! pe = loop_preheader_edge (loop);
! new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
! gcc_assert (!new_bb);
}
/* Create: ratio_mult_vf = ratio << log2 (vf). */
--- 1399,1412 ----
stmts = NULL;
ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
! if (cond_expr_stmt_list)
! gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
! else
! {
! pe = loop_preheader_edge (loop);
! new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
! gcc_assert (!new_bb);
! }
}
/* Create: ratio_mult_vf = ratio << log2 (vf). */
*************** vect_generate_tmps_on_preheader (loop_ve
*** 1391,1399 ****
stmts = NULL;
ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
true, var);
! pe = loop_preheader_edge (loop);
! new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
! gcc_assert (!new_bb);
}
*ni_name_ptr = ni_name;
--- 1421,1434 ----
stmts = NULL;
ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
true, var);
! if (cond_expr_stmt_list)
! gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
! else
! {
! pe = loop_preheader_edge (loop);
! new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
! gcc_assert (!new_bb);
! }
}
*ni_name_ptr = ni_name;
*************** conservative_cost_threshold (loop_vec_in
*** 1664,1673 ****
NITERS % VECTORIZATION_FACTOR times.
The original loop will later be made to iterate
! NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
void
! vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
{
tree ni_name, ratio_mult_vf_name;
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
--- 1699,1712 ----
NITERS % VECTORIZATION_FACTOR times.
The original loop will later be made to iterate
! NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
!
! COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
! test. */
void
! vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
! tree cond_expr, gimple_seq cond_expr_stmt_list)
{
tree ni_name, ratio_mult_vf_name;
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 1690,1696 ****
ratio = ni_name / vf
ratio_mult_vf_name = ratio * vf */
vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
! &ratio_mult_vf_name, ratio);
loop_num = loop->num;
--- 1729,1736 ----
ratio = ni_name / vf
ratio_mult_vf_name = ratio * vf */
vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
! &ratio_mult_vf_name, ratio,
! cond_expr_stmt_list);
loop_num = loop->num;
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 1698,1704 ****
peeling for alignment. */
if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
&& !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
! && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
{
check_profitability = true;
--- 1738,1745 ----
peeling for alignment. */
if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
&& !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
! && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
! && !cond_expr)
{
check_profitability = true;
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 1711,1717 ****
new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
ratio_mult_vf_name, ni_name, false,
! th, check_profitability);
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
#ifdef ENABLE_CHECKING
--- 1752,1759 ----
new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
ratio_mult_vf_name, ni_name, false,
! th, check_profitability,
! cond_expr, cond_expr_stmt_list);
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
#ifdef ENABLE_CHECKING
*************** vect_do_peeling_for_alignment (loop_vec_
*** 1935,1941 ****
initialize_original_copy_tables ();
! ni_name = vect_build_loop_niters (loop_vinfo);
niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
--- 1977,1983 ----
initialize_original_copy_tables ();
! ni_name = vect_build_loop_niters (loop_vinfo, NULL);
niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
*************** vect_do_peeling_for_alignment (loop_vec_
*** 1956,1962 ****
new_loop =
slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
niters_of_prolog_loop, ni_name, true,
! th, check_profitability);
gcc_assert (new_loop);
#ifdef ENABLE_CHECKING
--- 1998,2004 ----
new_loop =
slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
niters_of_prolog_loop, ni_name, true,
! th, check_profitability, NULL_TREE, NULL);
gcc_assert (new_loop);
#ifdef ENABLE_CHECKING
*************** vect_create_cond_for_alias_checks (loop_
*** 2273,2287 ****
The test generated to check which version of loop is executed
is modified to also check for profitability as indicated by the
! cost model initially. */
void
! vect_loop_versioning (loop_vec_info loop_vinfo)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
struct loop *nloop;
- tree cond_expr = NULL_TREE;
- gimple_seq cond_expr_stmt_list = NULL;
basic_block condition_bb;
gimple_stmt_iterator gsi, cond_exp_gsi;
basic_block merge_bb;
--- 2315,2332 ----
The test generated to check which version of loop is executed
is modified to also check for profitability as indicated by the
! cost model initially.
!
! The versioning precondition(s) are placed in *COND_EXPR and
! *COND_EXPR_STMT_LIST. If DO_VERSIONING is true versioning is
! also performed, otherwise only the conditions are generated. */
void
! vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
! tree *cond_expr, gimple_seq *cond_expr_stmt_list)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
struct loop *nloop;
basic_block condition_bb;
gimple_stmt_iterator gsi, cond_exp_gsi;
basic_block merge_bb;
*************** vect_loop_versioning (loop_vec_info loop
*** 2301,2329 ****
th = conservative_cost_threshold (loop_vinfo,
min_profitable_iters);
! cond_expr =
fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
build_int_cst (TREE_TYPE (scalar_loop_iters), th));
! cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list,
! false, NULL_TREE);
if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
! vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
! &cond_expr_stmt_list);
if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
! vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
! &cond_expr_stmt_list);
! cond_expr =
! fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
! cond_expr =
! force_gimple_operand (cond_expr, &gimplify_stmt_list, true, NULL_TREE);
! gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
initialize_original_copy_tables ();
! nloop = loop_version (loop, cond_expr, &condition_bb,
prob, prob, REG_BR_PROB_BASE - prob, true);
free_original_copy_tables();
--- 2346,2379 ----
th = conservative_cost_threshold (loop_vinfo,
min_profitable_iters);
! *cond_expr =
fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
build_int_cst (TREE_TYPE (scalar_loop_iters), th));
! *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
! false, NULL_TREE);
if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
! vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
! cond_expr_stmt_list);
if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
! vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
! cond_expr_stmt_list);
! *cond_expr =
! fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
! *cond_expr =
! force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
! gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
!
! /* If we only needed the extra conditions and a new loop copy
! bail out here. */
! if (!do_versioning)
! return;
initialize_original_copy_tables ();
! nloop = loop_version (loop, *cond_expr, &condition_bb,
prob, prob, REG_BR_PROB_BASE - prob, true);
free_original_copy_tables();
*************** vect_loop_versioning (loop_vec_info loop
*** 2354,2363 ****
/* End loop-exit-fixes after versioning. */
update_ssa (TODO_update_ssa);
! if (cond_expr_stmt_list)
{
cond_exp_gsi = gsi_last_bb (condition_bb);
! gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, GSI_SAME_STMT);
}
}
--- 2404,2416 ----
/* End loop-exit-fixes after versioning. */
update_ssa (TODO_update_ssa);
! if (*cond_expr_stmt_list)
{
cond_exp_gsi = gsi_last_bb (condition_bb);
! gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
! GSI_SAME_STMT);
! *cond_expr_stmt_list = NULL;
}
+ *cond_expr = NULL_TREE;
}
Index: trunk/gcc/tree-vectorizer.h
===================================================================
*** trunk.orig/gcc/tree-vectorizer.h 2009-03-31 16:26:46.000000000 +0200
--- trunk/gcc/tree-vectorizer.h 2009-03-31 16:29:10.000000000 +0200
*************** extern bitmap vect_memsyms_to_rename;
*** 706,713 ****
in tree-vect-loop-manip.c. */
extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
! extern void vect_loop_versioning (loop_vec_info);
! extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *);
extern void vect_do_peeling_for_alignment (loop_vec_info);
extern LOC find_loop_location (struct loop *);
extern bool vect_can_advance_ivs_p (loop_vec_info);
--- 706,714 ----
in tree-vect-loop-manip.c. */
extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
! extern void vect_loop_versioning (loop_vec_info, bool, tree *, gimple_seq *);
! extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *,
! tree, gimple_seq);
extern void vect_do_peeling_for_alignment (loop_vec_info);
extern LOC find_loop_location (struct loop *);
extern bool vect_can_advance_ivs_p (loop_vec_info);
Index: trunk/gcc/tree-vect-loop.c
===================================================================
*** trunk.orig/gcc/tree-vect-loop.c 2009-03-31 16:26:46.000000000 +0200
--- trunk/gcc/tree-vect-loop.c 2009-03-31 16:29:10.000000000 +0200
*************** vect_transform_loop (loop_vec_info loop_
*** 3388,3410 ****
bool strided_store;
bool slp_scheduled = false;
unsigned int nunits;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vec_transform_loop ===");
if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
|| VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
! vect_loop_versioning (loop_vinfo);
/* CHECKME: we wouldn't need this if we called update_ssa once
for all loops. */
bitmap_zero (vect_memsyms_to_rename);
-
- /* Peel the loop if there are data refs with unknown alignment.
- Only one data ref with unknown store is allowed. */
-
- if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
- vect_do_peeling_for_alignment (loop_vinfo);
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
compile time constant), or it is a constant that doesn't divide by the
--- 3388,3420 ----
bool strided_store;
bool slp_scheduled = false;
unsigned int nunits;
+ tree cond_expr = NULL_TREE;
+ gimple_seq cond_expr_stmt_list = NULL;
+ bool do_peeling_for_loop_bound;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vec_transform_loop ===");
+ /* Peel the loop if there are data refs with unknown alignment.
+ Only one data ref with unknown store is allowed. */
+
+ if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ vect_do_peeling_for_alignment (loop_vinfo);
+
+ do_peeling_for_loop_bound
+ = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
+
if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
|| VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
! vect_loop_versioning (loop_vinfo,
! !do_peeling_for_loop_bound,
! &cond_expr, &cond_expr_stmt_list);
/* CHECKME: we wouldn't need this if we called update_ssa once
for all loops. */
bitmap_zero (vect_memsyms_to_rename);
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
compile time constant), or it is a constant that doesn't divide by the
*************** vect_transform_loop (loop_vec_info loop_
*** 3414,3423 ****
will remain scalar and will compute the remaining (n%VF) iterations.
(VF is the vectorization factor). */
! if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
! || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
! && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
! vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
else
ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
--- 3424,3432 ----
will remain scalar and will compute the remaining (n%VF) iterations.
(VF is the vectorization factor). */
! if (do_peeling_for_loop_bound)
! vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
! cond_expr, cond_expr_stmt_list);
else
ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
Index: trunk/gcc/Makefile.in
===================================================================
*** trunk.orig/gcc/Makefile.in 2009-03-31 16:26:46.000000000 +0200
--- trunk/gcc/Makefile.in 2009-03-31 16:29:10.000000000 +0200
*************** tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SY
*** 2121,2127 ****
tree-ssa-copy.o : tree-ssa-copy.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
$(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h $(DIAGNOSTIC_H) \
$(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
! $(BASIC_BLOCK_H) tree-pass.h langhooks.h tree-ssa-propagate.h $(FLAGS_H)
tree-ssa-propagate.o : tree-ssa-propagate.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h \
$(DIAGNOSTIC_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
--- 2121,2128 ----
tree-ssa-copy.o : tree-ssa-copy.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
$(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h $(DIAGNOSTIC_H) \
$(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
! $(BASIC_BLOCK_H) tree-pass.h langhooks.h tree-ssa-propagate.h $(FLAGS_H) \
! $(CFGLOOP_H)
tree-ssa-propagate.o : tree-ssa-propagate.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h \
$(DIAGNOSTIC_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
Index: trunk/gcc/tree-ssa-copy.c
===================================================================
*** trunk.orig/gcc/tree-ssa-copy.c 2009-03-31 16:26:46.000000000 +0200
--- trunk/gcc/tree-ssa-copy.c 2009-03-31 16:29:10.000000000 +0200
*************** along with GCC; see the file COPYING3.
*** 37,42 ****
--- 37,43 ----
#include "tree-pass.h"
#include "tree-ssa-propagate.h"
#include "langhooks.h"
+ #include "cfgloop.h"
/* This file implements the copy propagation pass and provides a
handful of interfaces for performing const/copy propagation and
*************** init_copy_prop (void)
*** 992,998 ****
tree def;
def = gimple_phi_result (phi);
! if (!is_gimple_reg (def))
prop_set_simulate_again (phi, false);
else
prop_set_simulate_again (phi, true);
--- 993,1005 ----
tree def;
def = gimple_phi_result (phi);
! if (!is_gimple_reg (def)
! /* In loop-closed SSA form do not copy-propagate through
! PHI nodes. Technically this is only needed for loop
! exit PHIs, but this is difficult to query. */
! || (current_loops
! && gimple_phi_num_args (phi) == 1
! && loops_state_satisfies_p (LOOP_CLOSED_SSA)))
prop_set_simulate_again (phi, false);
else
prop_set_simulate_again (phi, true);