This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Avoid versioning the vectorized loop if possible
- From: Ira Rosen <IRAR at il dot ibm dot com>
- To: Richard Guenther <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Thu, 2 Apr 2009 14:36:22 +0300
- Subject: Re: [PATCH] Avoid versioning the vectorized loop if possible
Richard Guenther <rguenther@suse.de> wrote on 02/04/2009 14:24:10:
> On Wed, 1 Apr 2009, Ira Rosen wrote:
>
> >
> >
> > Richard Guenther <rguenther@suse.de> wrote on 31/03/2009 18:48:06:
> >
> > >
> > > This is part #1 of the merge of versioning for strides. It
eliminates
> > > the need to produce a versioned copy in some cases when we can just
use
> > > the loop for the remaining iterations.
> > >
> >
> > This patch also allows us to easily support peeling for alignment
together
> > with loop versioning in case we also do peeling for loop bound.
> >
> > > Bootstrapped and tested on x86_64-unknown-linux-gnu. Ira, does this
> > > look ok?
> >
> > Yes, just please see two minor comments below.
>
> Like the following?
Yes, thanks.
Ira,
> Re-bootstrapped and tested on
> x86_64-unknown-linux-gnu.
>
> Thanks,
> Richard.
>
> 2009-04-02 Richard Guenther <rguenther@suse.de>
>
> * Makefile.in (tree-ssa-copy.o): Add $(CFGLOOP_H) dependency.
> * tree-ssa-copy.c (init_copy_prop): Do not propagate through
> single-argument PHIs if we are in loop-closed SSA form.
> * tree-vect-loop-manip.c (slpeel_add_loop_guard): Pass extra guards
> for the pre-condition.
> (slpeel_tree_peel_loop_to_edge): Likewise.
> (vect_build_loop_niters): Take an optional sequence to append stmts.
> (vect_generate_tmps_on_preheader): Likewise.
> (vect_do_peeling_for_loop_bound): Take extra guards for the
> pre-condition.
> (vect_do_peeling_for_alignment): Adjust. Unconditionally apply
> the cost model check.
> (vect_loop_versioning): Take stmt and stmt list to put pre-condition
> guards if we are going to peel. Do not apply versioning in that
> case.
> * tree-vectorizer.h (vect_loop_versioning): Adjust declaration.
> (vect_do_peeling_for_loop_bound): Likewise.
> * tree-vect-loop.c (vect_transform_loop): If we are peeling for
> loop bound only record extra pre-conditions, do not apply loop
> versioning.
>
> Index: trunk/gcc/tree-vect-loop-manip.c
> ===================================================================
> *** trunk.orig/gcc/tree-vect-loop-manip.c 2009-04-02 11:08:16.
> 000000000 +0200
> --- trunk/gcc/tree-vect-loop-manip.c 2009-04-02 11:55:44.000000000
+0200
> *************** slpeel_tree_duplicate_loop_to_edge_cfg (
> *** 792,802 ****
> /* Given the condition statement COND, put it as the last statement
> of GUARD_BB; EXIT_BB is the basic block to skip the loop;
> Assumes that this is the single exit of the guarded loop.
> ! Returns the skip edge. */
>
> static edge
> ! slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block
exit_bb,
> ! basic_block dom_bb)
> {
> gimple_stmt_iterator gsi;
> edge new_e, enter_e;
> --- 792,803 ----
> /* Given the condition statement COND, put it as the last statement
> of GUARD_BB; EXIT_BB is the basic block to skip the loop;
> Assumes that this is the single exit of the guarded loop.
> ! Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.
*/
>
> static edge
> ! slpeel_add_loop_guard (basic_block guard_bb, tree cond,
> ! gimple_seq cond_expr_stmt_list,
> ! basic_block exit_bb, basic_block dom_bb)
> {
> gimple_stmt_iterator gsi;
> edge new_e, enter_e;
> *************** slpeel_add_loop_guard (basic_block guard
> *** 809,819 ****
> gsi = gsi_last_bb (guard_bb);
>
> cond = force_gimple_operand (cond, &gimplify_stmt_list, true,
NULL_TREE);
> cond_stmt = gimple_build_cond (NE_EXPR,
> cond, build_int_cst (TREE_TYPE (cond), 0),
> NULL_TREE, NULL_TREE);
> ! if (gimplify_stmt_list)
> ! gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
>
> gsi = gsi_last_bb (guard_bb);
> gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
> --- 810,822 ----
> gsi = gsi_last_bb (guard_bb);
>
> cond = force_gimple_operand (cond, &gimplify_stmt_list, true,
NULL_TREE);
> + if (gimplify_stmt_list)
> + gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
> cond_stmt = gimple_build_cond (NE_EXPR,
> cond, build_int_cst (TREE_TYPE (cond), 0),
> NULL_TREE, NULL_TREE);
> ! if (cond_expr_stmt_list)
> ! gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
>
> gsi = gsi_last_bb (guard_bb);
> gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
> *************** set_prologue_iterations (basic_block bb_
> *** 1011,1016 ****
> --- 1014,1023 ----
> The second guard is:
> if (FIRST_NITERS == NITERS) then skip the second loop.
>
> + If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are
given
> + then the generated condition is combined with COND_EXPR and the
> + statements in COND_EXPR_STMT_LIST are emitted together with it.
> +
> FORNOW only simple loops are supported (see
slpeel_can_duplicate_loop_p).
> FORNOW the resulting code will not be in loop-closed-ssa form.
> */
> *************** static struct loop*
> *** 1019,1025 ****
> slpeel_tree_peel_loop_to_edge (struct loop *loop,
> edge e, tree first_niters,
> tree niters, bool update_first_loop_count,
> ! unsigned int th, bool check_profitability)
> {
> struct loop *new_loop = NULL, *first_loop, *second_loop;
> edge skip_e;
> --- 1026,1033 ----
> slpeel_tree_peel_loop_to_edge (struct loop *loop,
> edge e, tree first_niters,
> tree niters, bool update_first_loop_count,
> ! unsigned int th, bool check_profitability,
> ! tree cond_expr, gimple_seq cond_expr_stmt_list)
> {
> struct loop *new_loop = NULL, *first_loop, *second_loop;
> edge skip_e;
> *************** slpeel_tree_peel_loop_to_edge (struct lo
> *** 1149,1155 ****
> profitable than the vector one. This occurs when
> this function is invoked for epilogue generation
> and the cost model check needs to be done at run
> ! time.
>
> Resulting CFG after prologue peeling would be:
>
> --- 1157,1164 ----
> profitable than the vector one. This occurs when
> this function is invoked for epilogue generation
> and the cost model check needs to be done at run
> ! time. This check is combined with any pre-existing
> ! check in COND_EXPR to avoid versioning.
>
> Resulting CFG after prologue peeling would be:
>
> *************** slpeel_tree_peel_loop_to_edge (struct lo
> *** 1193,1198 ****
> --- 1202,1215 ----
> pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
> cost_pre_condition, pre_condition);
> }
> + if (cond_expr)
> + {
> + pre_condition =
> + fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
> + pre_condition,
> + fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
> + cond_expr));
> + }
> }
>
> /* Prologue peeling. */
> *************** slpeel_tree_peel_loop_to_edge (struct lo
> *** 1208,1213 ****
> --- 1225,1231 ----
> }
>
> skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
> + cond_expr_stmt_list,
> bb_before_second_loop,
> bb_before_first_loop);
> slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
> first_loop == new_loop,
> *************** slpeel_tree_peel_loop_to_edge (struct lo
> *** 1245,1251 ****
>
> pre_condition =
> fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
> ! skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
> bb_after_second_loop,
> bb_before_first_loop);
> slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
> second_loop == new_loop,
&new_exit_bb);
> --- 1263,1269 ----
>
> pre_condition =
> fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
> ! skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
NULL,
> bb_after_second_loop,
> bb_before_first_loop);
> slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
> second_loop == new_loop,
&new_exit_bb);
> *************** find_loop_location (struct loop *loop)
> *** 1303,1312 ****
>
>
> /* This function builds ni_name = number of iterations loop executes
> ! on the loop preheader. */
>
> static tree
> ! vect_build_loop_niters (loop_vec_info loop_vinfo)
> {
> tree ni_name, var;
> gimple_seq stmts = NULL;
> --- 1321,1331 ----
>
>
> /* This function builds ni_name = number of iterations loop executes
> ! on the loop preheader. If SEQ is given the stmt is instead emitted
> ! there. */
>
> static tree
> ! vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
> {
> tree ni_name, var;
> gimple_seq stmts = NULL;
> *************** vect_build_loop_niters (loop_vec_info lo
> *** 1321,1328 ****
> pe = loop_preheader_edge (loop);
> if (stmts)
> {
> ! basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe,
stmts);
> ! gcc_assert (!new_bb);
> }
>
> return ni_name;
> --- 1340,1352 ----
> pe = loop_preheader_edge (loop);
> if (stmts)
> {
> ! if (seq)
> ! gimple_seq_add_seq (&seq, stmts);
> ! else
> ! {
> ! basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
> ! gcc_assert (!new_bb);
> ! }
> }
>
> return ni_name;
> *************** vect_build_loop_niters (loop_vec_info lo
> *** 1335,1347 ****
> ratio = ni_name / vf
> ratio_mult_vf_name = ratio * vf
>
> ! and places them at the loop preheader edge. */
>
> static void
> vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
> tree *ni_name_ptr,
> tree *ratio_mult_vf_name_ptr,
> ! tree *ratio_name_ptr)
> {
>
> edge pe;
> --- 1359,1373 ----
> ratio = ni_name / vf
> ratio_mult_vf_name = ratio * vf
>
> ! and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
> ! if that is non-NULL. */
>
> static void
> vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
> tree *ni_name_ptr,
> tree *ratio_mult_vf_name_ptr,
> ! tree *ratio_name_ptr,
> ! gimple_seq cond_expr_stmt_list)
> {
>
> edge pe;
> *************** vect_generate_tmps_on_preheader (loop_ve
> *** 1361,1367 ****
> /* Generate temporary variable that contains
> number of iterations loop executes. */
>
> ! ni_name = vect_build_loop_niters (loop_vinfo);
> log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
>
> /* Create: ratio = ni >> log2(vf) */
> --- 1387,1393 ----
> /* Generate temporary variable that contains
> number of iterations loop executes. */
>
> ! ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
> log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
>
> /* Create: ratio = ni >> log2(vf) */
> *************** vect_generate_tmps_on_preheader (loop_ve
> *** 1374,1382 ****
>
> stmts = NULL;
> ratio_name = force_gimple_operand (ratio_name, &stmts, true,
var);
> ! pe = loop_preheader_edge (loop);
> ! new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
> ! gcc_assert (!new_bb);
> }
>
> /* Create: ratio_mult_vf = ratio << log2 (vf). */
> --- 1400,1413 ----
>
> stmts = NULL;
> ratio_name = force_gimple_operand (ratio_name, &stmts, true,
var);
> ! if (cond_expr_stmt_list)
> ! gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
> ! else
> ! {
> ! pe = loop_preheader_edge (loop);
> ! new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
> ! gcc_assert (!new_bb);
> ! }
> }
>
> /* Create: ratio_mult_vf = ratio << log2 (vf). */
> *************** vect_generate_tmps_on_preheader (loop_ve
> *** 1391,1399 ****
> stmts = NULL;
> ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name,
&stmts,
> true, var);
> ! pe = loop_preheader_edge (loop);
> ! new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
> ! gcc_assert (!new_bb);
> }
>
> *ni_name_ptr = ni_name;
> --- 1422,1435 ----
> stmts = NULL;
> ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name,
&stmts,
> true, var);
> ! if (cond_expr_stmt_list)
> ! gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
> ! else
> ! {
> ! pe = loop_preheader_edge (loop);
> ! new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
> ! gcc_assert (!new_bb);
> ! }
> }
>
> *ni_name_ptr = ni_name;
> *************** conservative_cost_threshold (loop_vec_in
> *** 1664,1673 ****
> NITERS % VECTORIZATION_FACTOR times.
>
> The original loop will later be made to iterate
> ! NITERS / VECTORIZATION_FACTOR times (this value is placed into
> RATIO). */
>
> void
> ! vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
> {
> tree ni_name, ratio_mult_vf_name;
> struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> --- 1700,1713 ----
> NITERS % VECTORIZATION_FACTOR times.
>
> The original loop will later be made to iterate
> ! NITERS / VECTORIZATION_FACTOR times (this value is placed into
RATIO).
> !
> ! COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
> ! test. */
>
> void
> ! vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
> ! tree cond_expr, gimple_seq cond_expr_stmt_list)
> {
> tree ni_name, ratio_mult_vf_name;
> struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> *************** vect_do_peeling_for_loop_bound (loop_vec
> *** 1690,1696 ****
> ratio = ni_name / vf
> ratio_mult_vf_name = ratio * vf */
> vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
> ! &ratio_mult_vf_name, ratio);
>
> loop_num = loop->num;
>
> --- 1730,1737 ----
> ratio = ni_name / vf
> ratio_mult_vf_name = ratio * vf */
> vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
> ! &ratio_mult_vf_name, ratio,
> ! cond_expr_stmt_list);
>
> loop_num = loop->num;
>
> *************** vect_do_peeling_for_loop_bound (loop_vec
> *** 1698,1704 ****
> peeling for alignment. */
> if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
> && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
> ! && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
> {
> check_profitability = true;
>
> --- 1739,1746 ----
> peeling for alignment. */
> if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
> && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
> ! && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
> ! && !cond_expr)
> {
> check_profitability = true;
>
> *************** vect_do_peeling_for_loop_bound (loop_vec
> *** 1711,1717 ****
>
> new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
> ratio_mult_vf_name,
> ni_name, false,
> ! th, check_profitability);
> gcc_assert (new_loop);
> gcc_assert (loop_num == loop->num);
> #ifdef ENABLE_CHECKING
> --- 1753,1760 ----
>
> new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
> ratio_mult_vf_name,
> ni_name, false,
> ! th, check_profitability,
> ! cond_expr, cond_expr_stmt_list);
> gcc_assert (new_loop);
> gcc_assert (loop_num == loop->num);
> #ifdef ENABLE_CHECKING
> *************** vect_do_peeling_for_alignment (loop_vec_
> *** 1926,1932 ****
> tree niters_of_prolog_loop, ni_name;
> tree n_iters;
> struct loop *new_loop;
> - bool check_profitability = false;
> unsigned int th = 0;
> int min_profitable_iters;
>
> --- 1969,1974 ----
> *************** vect_do_peeling_for_alignment (loop_vec_
> *** 1935,1962 ****
>
> initialize_original_copy_tables ();
>
> ! ni_name = vect_build_loop_niters (loop_vinfo);
> niters_of_prolog_loop = vect_gen_niters_for_prolog_loop
> (loop_vinfo, ni_name);
>
>
> ! /* If cost model check not done during versioning. */
> ! if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
> ! && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
> ! {
> ! check_profitability = true;
> !
> ! /* Get profitability threshold for vectorized loop. */
> ! min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS
(loop_vinfo);
> !
> ! th = conservative_cost_threshold (loop_vinfo,
> ! min_profitable_iters);
> ! }
>
> /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
> new_loop =
> slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
> niters_of_prolog_loop, ni_name, true,
> ! th, check_profitability);
>
> gcc_assert (new_loop);
> #ifdef ENABLE_CHECKING
> --- 1977,1996 ----
>
> initialize_original_copy_tables ();
>
> ! ni_name = vect_build_loop_niters (loop_vinfo, NULL);
> niters_of_prolog_loop = vect_gen_niters_for_prolog_loop
> (loop_vinfo, ni_name);
>
>
> ! /* Get profitability threshold for vectorized loop. */
> ! min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
> ! th = conservative_cost_threshold (loop_vinfo,
> ! min_profitable_iters);
>
> /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
> new_loop =
> slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
> niters_of_prolog_loop, ni_name, true,
> ! th, true, NULL_TREE, NULL);
>
> gcc_assert (new_loop);
> #ifdef ENABLE_CHECKING
> *************** vect_create_cond_for_alias_checks (loop_
> *** 2273,2287 ****
>
> The test generated to check which version of loop is executed
> is modified to also check for profitability as indicated by the
> ! cost model initially. */
>
> void
> ! vect_loop_versioning (loop_vec_info loop_vinfo)
> {
> struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> struct loop *nloop;
> - tree cond_expr = NULL_TREE;
> - gimple_seq cond_expr_stmt_list = NULL;
> basic_block condition_bb;
> gimple_stmt_iterator gsi, cond_exp_gsi;
> basic_block merge_bb;
> --- 2307,2324 ----
>
> The test generated to check which version of loop is executed
> is modified to also check for profitability as indicated by the
> ! cost model initially.
> !
> ! The versioning precondition(s) are placed in *COND_EXPR and
> ! *COND_EXPR_STMT_LIST. If DO_VERSIONING is true versioning is
> ! also performed, otherwise only the conditions are generated. */
>
> void
> ! vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
> ! tree *cond_expr, gimple_seq *cond_expr_stmt_list)
> {
> struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> struct loop *nloop;
> basic_block condition_bb;
> gimple_stmt_iterator gsi, cond_exp_gsi;
> basic_block merge_bb;
> *************** vect_loop_versioning (loop_vec_info loop
> *** 2301,2329 ****
> th = conservative_cost_threshold (loop_vinfo,
> min_profitable_iters);
>
> ! cond_expr =
> fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
> build_int_cst (TREE_TYPE (scalar_loop_iters), th));
>
> ! cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list,
> ! false, NULL_TREE);
>
> if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
> ! vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
> ! &cond_expr_stmt_list);
>
> if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
> ! vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
> ! &cond_expr_stmt_list);
>
> ! cond_expr =
> ! fold_build2 (NE_EXPR, boolean_type_node, cond_expr,
integer_zero_node);
> ! cond_expr =
> ! force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
NULL_TREE);
> ! gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
>
> initialize_original_copy_tables ();
> ! nloop = loop_version (loop, cond_expr, &condition_bb,
> prob, prob, REG_BR_PROB_BASE - prob, true);
> free_original_copy_tables();
>
> --- 2338,2371 ----
> th = conservative_cost_threshold (loop_vinfo,
> min_profitable_iters);
>
> ! *cond_expr =
> fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
> build_int_cst (TREE_TYPE (scalar_loop_iters), th));
>
> ! *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
> ! false, NULL_TREE);
>
> if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
> ! vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
> ! cond_expr_stmt_list);
>
> if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
> ! vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
> ! cond_expr_stmt_list);
>
> ! *cond_expr =
> ! fold_build2 (NE_EXPR, boolean_type_node, *cond_expr,
integer_zero_node);
> ! *cond_expr =
> ! force_gimple_operand (*cond_expr, &gimplify_stmt_list, true,
NULL_TREE);
> ! gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
> !
> ! /* If we only needed the extra conditions and a new loop copy
> ! bail out here. */
> ! if (!do_versioning)
> ! return;
>
> initialize_original_copy_tables ();
> ! nloop = loop_version (loop, *cond_expr, &condition_bb,
> prob, prob, REG_BR_PROB_BASE - prob, true);
> free_original_copy_tables();
>
> *************** vect_loop_versioning (loop_vec_info loop
> *** 2354,2363 ****
> /* End loop-exit-fixes after versioning. */
>
> update_ssa (TODO_update_ssa);
> ! if (cond_expr_stmt_list)
> {
> cond_exp_gsi = gsi_last_bb (condition_bb);
> ! gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
> GSI_SAME_STMT);
> }
> }
>
> --- 2396,2408 ----
> /* End loop-exit-fixes after versioning. */
>
> update_ssa (TODO_update_ssa);
> ! if (*cond_expr_stmt_list)
> {
> cond_exp_gsi = gsi_last_bb (condition_bb);
> ! gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
> ! GSI_SAME_STMT);
> ! *cond_expr_stmt_list = NULL;
> }
> + *cond_expr = NULL_TREE;
> }
>
> Index: trunk/gcc/tree-vectorizer.h
> ===================================================================
> *** trunk.orig/gcc/tree-vectorizer.h 2009-04-02 11:08:16.000000000
+0200
> --- trunk/gcc/tree-vectorizer.h 2009-04-02 11:44:43.000000000 +0200
> *************** extern bitmap vect_memsyms_to_rename;
> *** 706,713 ****
> in tree-vect-loop-manip.c. */
> extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
> extern bool slpeel_can_duplicate_loop_p (const struct loop *,
const_edge);
> ! extern void vect_loop_versioning (loop_vec_info);
> ! extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *);
> extern void vect_do_peeling_for_alignment (loop_vec_info);
> extern LOC find_loop_location (struct loop *);
> extern bool vect_can_advance_ivs_p (loop_vec_info);
> --- 706,714 ----
> in tree-vect-loop-manip.c. */
> extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
> extern bool slpeel_can_duplicate_loop_p (const struct loop *,
const_edge);
> ! extern void vect_loop_versioning (loop_vec_info, bool, tree *,
> gimple_seq *);
> ! extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *,
> ! tree, gimple_seq);
> extern void vect_do_peeling_for_alignment (loop_vec_info);
> extern LOC find_loop_location (struct loop *);
> extern bool vect_can_advance_ivs_p (loop_vec_info);
> Index: trunk/gcc/tree-vect-loop.c
> ===================================================================
> *** trunk.orig/gcc/tree-vect-loop.c 2009-04-02 11:08:16.000000000 +0200
> --- trunk/gcc/tree-vect-loop.c 2009-04-02 11:44:43.000000000 +0200
> *************** vect_transform_loop (loop_vec_info loop_
> *** 3388,3410 ****
> bool strided_store;
> bool slp_scheduled = false;
> unsigned int nunits;
>
> if (vect_print_dump_info (REPORT_DETAILS))
> fprintf (vect_dump, "=== vec_transform_loop ===");
>
> if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
> || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
> ! vect_loop_versioning (loop_vinfo);
>
> /* CHECKME: we wouldn't need this if we called update_ssa once
> for all loops. */
> bitmap_zero (vect_memsyms_to_rename);
> -
> - /* Peel the loop if there are data refs with unknown alignment.
> - Only one data ref with unknown store is allowed. */
> -
> - if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
> - vect_do_peeling_for_alignment (loop_vinfo);
>
> /* If the loop has a symbolic number of iterations 'n' (i.e. it's not
a
> compile time constant), or it is a constant that doesn't divide by
the
> --- 3388,3420 ----
> bool strided_store;
> bool slp_scheduled = false;
> unsigned int nunits;
> + tree cond_expr = NULL_TREE;
> + gimple_seq cond_expr_stmt_list = NULL;
> + bool do_peeling_for_loop_bound;
>
> if (vect_print_dump_info (REPORT_DETAILS))
> fprintf (vect_dump, "=== vec_transform_loop ===");
>
> + /* Peel the loop if there are data refs with unknown alignment.
> + Only one data ref with unknown store is allowed. */
> +
> + if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
> + vect_do_peeling_for_alignment (loop_vinfo);
> +
> + do_peeling_for_loop_bound
> + = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> + || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> + && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor !=
0));
> +
> if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
> || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
> ! vect_loop_versioning (loop_vinfo,
> ! !do_peeling_for_loop_bound,
> ! &cond_expr, &cond_expr_stmt_list);
>
> /* CHECKME: we wouldn't need this if we called update_ssa once
> for all loops. */
> bitmap_zero (vect_memsyms_to_rename);
>
> /* If the loop has a symbolic number of iterations 'n' (i.e. it's not
a
> compile time constant), or it is a constant that doesn't divide by
the
> *************** vect_transform_loop (loop_vec_info loop_
> *** 3414,3423 ****
> will remain scalar and will compute the remaining (n%VF)
iterations.
> (VF is the vectorization factor). */
>
> ! if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> ! || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> ! && LOOP_VINFO_INT_NITERS (loop_vinfo) %
> vectorization_factor != 0))
> ! vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
> else
> ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
> LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
> --- 3424,3432 ----
> will remain scalar and will compute the remaining (n%VF)
iterations.
> (VF is the vectorization factor). */
>
> ! if (do_peeling_for_loop_bound)
> ! vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
> ! cond_expr, cond_expr_stmt_list);
> else
> ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
> LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
> Index: trunk/gcc/Makefile.in
> ===================================================================
> *** trunk.orig/gcc/Makefile.in 2009-04-02 11:09:28.000000000 +0200
> --- trunk/gcc/Makefile.in 2009-04-02 11:44:43.000000000 +0200
> *************** tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SY
> *** 2123,2129 ****
> tree-ssa-copy.o : tree-ssa-copy.c $(TREE_FLOW_H) $(CONFIG_H) $
(SYSTEM_H) \
> $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h
> $(DIAGNOSTIC_H) \
> $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
> ! $(BASIC_BLOCK_H) tree-pass.h langhooks.h tree-ssa-propagate.h $
(FLAGS_H)
> tree-ssa-propagate.o : tree-ssa-propagate.c $(TREE_FLOW_H) $(CONFIG_H)
\
> $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h
\
> $(DIAGNOSTIC_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
> --- 2123,2130 ----
> tree-ssa-copy.o : tree-ssa-copy.c $(TREE_FLOW_H) $(CONFIG_H) $
(SYSTEM_H) \
> $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h
> $(DIAGNOSTIC_H) \
> $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
> ! $(BASIC_BLOCK_H) tree-pass.h langhooks.h tree-ssa-propagate.h
> $(FLAGS_H) \
> ! $(CFGLOOP_H)
> tree-ssa-propagate.o : tree-ssa-propagate.c $(TREE_FLOW_H) $(CONFIG_H)
\
> $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h
\
> $(DIAGNOSTIC_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
> Index: trunk/gcc/tree-ssa-copy.c
> ===================================================================
> *** trunk.orig/gcc/tree-ssa-copy.c 2009-04-02 11:08:16.000000000 +0200
> --- trunk/gcc/tree-ssa-copy.c 2009-04-02 11:44:43.000000000 +0200
> *************** along with GCC; see the file COPYING3.
> *** 37,42 ****
> --- 37,43 ----
> #include "tree-pass.h"
> #include "tree-ssa-propagate.h"
> #include "langhooks.h"
> + #include "cfgloop.h"
>
> /* This file implements the copy propagation pass and provides a
> handful of interfaces for performing const/copy propagation and
> *************** init_copy_prop (void)
> *** 992,998 ****
> tree def;
>
> def = gimple_phi_result (phi);
> ! if (!is_gimple_reg (def))
> prop_set_simulate_again (phi, false);
> else
> prop_set_simulate_again (phi, true);
> --- 993,1005 ----
> tree def;
>
> def = gimple_phi_result (phi);
> ! if (!is_gimple_reg (def)
> ! /* In loop-closed SSA form do not copy-propagate through
> ! PHI nodes. Technically this is only needed for loop
> ! exit PHIs, but this is difficult to query. */
> ! || (current_loops
> ! && gimple_phi_num_args (phi) == 1
> ! && loops_state_satisfies_p (LOOP_CLOSED_SSA)))
> prop_set_simulate_again (phi, false);
> else
> prop_set_simulate_again (phi, true);