This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Avoid versioning the vectorized loop if possible


This is part #1 of the merge of versioning for strides.  It eliminates
the need to produce a versioned copy in some cases when we can just use
the loop for the remaining iterations.

Bootstrapped and tested on x86_64-unknown-linux-gnu.  Ira, does this
look ok?

Thanks,
Richard.

2009-03-31  Richard Guenther  <rguenther@suse.de>

	* Makefile.in (tree-ssa-copy.o): Add $(CFGLOOP_H) dependency.
	* tree-ssa-copy.c (init_copy_prop): Do not propagate through
	single-argument PHIs if we are in loop-closed SSA form.
	* tree-vect-loop-manip.c (slpeel_add_loop_guard): Pass extra guards
	for the pre-condition.
	(slpeel_tree_peel_loop_to_edge): Likewise.
	(vect_build_loop_niters): Take an optional sequence to append stmts.
	(vect_generate_tmps_on_preheader): Likewise.
	(vect_do_peeling_for_loop_bound): Take extra guards for the
	pre-condition.
	(vect_do_peeling_for_alignment): Adjust.
	(vect_loop_versioning): Take stmt and stmt list to put pre-condition
	guards if we are going to peel.  Do not apply versioning in that
	case.
	* tree-vectorizer.h (vect_loop_versioning): Adjust declaration.
	(vect_do_peeling_for_loop_bound): Likewise.
	* tree-vect-loop.c (vect_transform_loop): If we are peeling for
	loop bound only record extra pre-conditions, do not apply loop
	versioning.

Index: trunk/gcc/tree-vect-loop-manip.c
===================================================================
*** trunk.orig/gcc/tree-vect-loop-manip.c	2009-03-31 16:26:46.000000000 +0200
--- trunk/gcc/tree-vect-loop-manip.c	2009-03-31 17:32:58.000000000 +0200
*************** slpeel_tree_duplicate_loop_to_edge_cfg (
*** 792,802 ****
  /* Given the condition statement COND, put it as the last statement
     of GUARD_BB; EXIT_BB is the basic block to skip the loop;
     Assumes that this is the single exit of the guarded loop.  
!    Returns the skip edge.  */
  
  static edge
! slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
! 		       basic_block dom_bb)
  {
    gimple_stmt_iterator gsi;
    edge new_e, enter_e;
--- 792,803 ----
  /* Given the condition statement COND, put it as the last statement
     of GUARD_BB; EXIT_BB is the basic block to skip the loop;
     Assumes that this is the single exit of the guarded loop.  
!    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
  
  static edge
! slpeel_add_loop_guard (basic_block guard_bb, tree cond,
! 		       gimple_seq cond_expr_stmt_list,
! 		       basic_block exit_bb, basic_block dom_bb)
  {
    gimple_stmt_iterator gsi;
    edge new_e, enter_e;
*************** slpeel_add_loop_guard (basic_block guard
*** 809,819 ****
    gsi = gsi_last_bb (guard_bb);
  
    cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
    cond_stmt = gimple_build_cond (NE_EXPR,
  				 cond, build_int_cst (TREE_TYPE (cond), 0),
  				 NULL_TREE, NULL_TREE);
!   if (gimplify_stmt_list)
!     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
  
    gsi = gsi_last_bb (guard_bb);
    gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
--- 810,822 ----
    gsi = gsi_last_bb (guard_bb);
  
    cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
+   if (gimplify_stmt_list)
+     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
    cond_stmt = gimple_build_cond (NE_EXPR,
  				 cond, build_int_cst (TREE_TYPE (cond), 0),
  				 NULL_TREE, NULL_TREE);
!   if (cond_expr_stmt_list)
!     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
  
    gsi = gsi_last_bb (guard_bb);
    gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
*************** set_prologue_iterations (basic_block bb_
*** 1011,1016 ****
--- 1014,1023 ----
     The second guard is:
       if (FIRST_NITERS == NITERS) then skip the second loop.
  
+    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
+    then the generated condition is combined with COND_EXPR and the
+    statements in COND_EXPR_STMT_LIST are emitted together with it.
+ 
     FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
     FORNOW the resulting code will not be in loop-closed-ssa form.
  */
*************** static struct loop*
*** 1019,1025 ****
  slpeel_tree_peel_loop_to_edge (struct loop *loop, 
  			       edge e, tree first_niters, 
  			       tree niters, bool update_first_loop_count,
! 			       unsigned int th, bool check_profitability)
  {
    struct loop *new_loop = NULL, *first_loop, *second_loop;
    edge skip_e;
--- 1026,1033 ----
  slpeel_tree_peel_loop_to_edge (struct loop *loop, 
  			       edge e, tree first_niters, 
  			       tree niters, bool update_first_loop_count,
! 			       unsigned int th, bool check_profitability,
! 			       tree cond_expr, gimple_seq cond_expr_stmt_list)
  {
    struct loop *new_loop = NULL, *first_loop, *second_loop;
    edge skip_e;
*************** slpeel_tree_peel_loop_to_edge (struct lo
*** 1193,1198 ****
--- 1201,1214 ----
  	  pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
  				       cost_pre_condition, pre_condition);
  	}
+       if (cond_expr)
+ 	{
+ 	  pre_condition =
+ 	    fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+ 			 pre_condition,
+ 			 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
+ 				      cond_expr));
+ 	}
      }
  
    /* Prologue peeling.  */  
*************** slpeel_tree_peel_loop_to_edge (struct lo
*** 1208,1213 ****
--- 1224,1230 ----
      }
  
    skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
+ 				  cond_expr_stmt_list,
                                    bb_before_second_loop, bb_before_first_loop);
    slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
  				      first_loop == new_loop,
*************** slpeel_tree_peel_loop_to_edge (struct lo
*** 1245,1251 ****
  
    pre_condition = 
  	fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
!   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
                                    bb_after_second_loop, bb_before_first_loop);
    slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
                                       second_loop == new_loop, &new_exit_bb);
--- 1262,1268 ----
  
    pre_condition = 
  	fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
!   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
                                    bb_after_second_loop, bb_before_first_loop);
    slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
                                       second_loop == new_loop, &new_exit_bb);
*************** find_loop_location (struct loop *loop)
*** 1303,1312 ****
  
  
  /* This function builds ni_name = number of iterations loop executes
!    on the loop preheader.  */
  
  static tree
! vect_build_loop_niters (loop_vec_info loop_vinfo)
  {
    tree ni_name, var;
    gimple_seq stmts = NULL;
--- 1320,1330 ----
  
  
  /* This function builds ni_name = number of iterations loop executes
!    on the loop preheader.  If SEQ is given the stmt is instead emitted
!    there.  */
  
  static tree
! vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
  {
    tree ni_name, var;
    gimple_seq stmts = NULL;
*************** vect_build_loop_niters (loop_vec_info lo
*** 1321,1328 ****
    pe = loop_preheader_edge (loop);
    if (stmts)
      {
!       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
!       gcc_assert (!new_bb);
      }
  
    return ni_name;
--- 1339,1351 ----
    pe = loop_preheader_edge (loop);
    if (stmts)
      {
!       if (seq)
! 	gimple_seq_add_seq (&seq, stmts);
!       else
! 	{
! 	  basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
! 	  gcc_assert (!new_bb);
! 	}
      }
  
    return ni_name;
*************** vect_build_loop_niters (loop_vec_info lo
*** 1335,1347 ****
   ratio = ni_name / vf
   ratio_mult_vf_name = ratio * vf
  
!  and places them at the loop preheader edge.  */
  
  static void 
  vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, 
  				 tree *ni_name_ptr,
  				 tree *ratio_mult_vf_name_ptr, 
! 				 tree *ratio_name_ptr)
  {
  
    edge pe;
--- 1358,1372 ----
   ratio = ni_name / vf
   ratio_mult_vf_name = ratio * vf
  
!  and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
!  if that is non-NULL.  */
  
  static void 
  vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, 
  				 tree *ni_name_ptr,
  				 tree *ratio_mult_vf_name_ptr, 
! 				 tree *ratio_name_ptr,
! 				 gimple_seq cond_expr_stmt_list)
  {
  
    edge pe;
*************** vect_generate_tmps_on_preheader (loop_ve
*** 1361,1367 ****
    /* Generate temporary variable that contains 
       number of iterations loop executes.  */
  
!   ni_name = vect_build_loop_niters (loop_vinfo);
    log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
  
    /* Create: ratio = ni >> log2(vf) */
--- 1386,1392 ----
    /* Generate temporary variable that contains 
       number of iterations loop executes.  */
  
!   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
    log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
  
    /* Create: ratio = ni >> log2(vf) */
*************** vect_generate_tmps_on_preheader (loop_ve
*** 1374,1382 ****
  
        stmts = NULL;
        ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
!       pe = loop_preheader_edge (loop);
!       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
!       gcc_assert (!new_bb);
      }
         
    /* Create: ratio_mult_vf = ratio << log2 (vf).  */
--- 1399,1412 ----
  
        stmts = NULL;
        ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
!       if (cond_expr_stmt_list)
! 	gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
!       else
! 	{
! 	  pe = loop_preheader_edge (loop);
! 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
! 	  gcc_assert (!new_bb);
! 	}
      }
         
    /* Create: ratio_mult_vf = ratio << log2 (vf).  */
*************** vect_generate_tmps_on_preheader (loop_ve
*** 1391,1399 ****
        stmts = NULL;
        ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
  						 true, var);
!       pe = loop_preheader_edge (loop);
!       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
!       gcc_assert (!new_bb);
      }
  
    *ni_name_ptr = ni_name;
--- 1421,1434 ----
        stmts = NULL;
        ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
  						 true, var);
!       if (cond_expr_stmt_list)
! 	gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
!       else
! 	{
! 	  pe = loop_preheader_edge (loop);
! 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
! 	  gcc_assert (!new_bb);
! 	}
      }
  
    *ni_name_ptr = ni_name;
*************** conservative_cost_threshold (loop_vec_in
*** 1664,1673 ****
     NITERS % VECTORIZATION_FACTOR times.
     
     The original loop will later be made to iterate 
!    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
  
  void 
! vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
  {
    tree ni_name, ratio_mult_vf_name;
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
--- 1699,1712 ----
     NITERS % VECTORIZATION_FACTOR times.
     
     The original loop will later be made to iterate 
!    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
! 
!    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
!    test.  */
  
  void 
! vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
! 				tree cond_expr, gimple_seq cond_expr_stmt_list)
  {
    tree ni_name, ratio_mult_vf_name;
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 1690,1696 ****
       ratio = ni_name / vf
       ratio_mult_vf_name = ratio * vf  */
    vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
! 				   &ratio_mult_vf_name, ratio);
  
    loop_num  = loop->num; 
  
--- 1729,1736 ----
       ratio = ni_name / vf
       ratio_mult_vf_name = ratio * vf  */
    vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
! 				   &ratio_mult_vf_name, ratio,
! 				   cond_expr_stmt_list);
  
    loop_num  = loop->num; 
  
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 1698,1704 ****
       peeling for alignment.  */
    if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
        && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
!       && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
      {
        check_profitability = true;
  
--- 1738,1745 ----
       peeling for alignment.  */
    if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
        && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
!       && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
!       && !cond_expr)
      {
        check_profitability = true;
  
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 1711,1717 ****
  
    new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
                                              ratio_mult_vf_name, ni_name, false,
!                                             th, check_profitability);
    gcc_assert (new_loop);
    gcc_assert (loop_num == loop->num);
  #ifdef ENABLE_CHECKING
--- 1752,1759 ----
  
    new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
                                              ratio_mult_vf_name, ni_name, false,
!                                             th, check_profitability,
! 					    cond_expr, cond_expr_stmt_list);
    gcc_assert (new_loop);
    gcc_assert (loop_num == loop->num);
  #ifdef ENABLE_CHECKING
*************** vect_do_peeling_for_alignment (loop_vec_
*** 1935,1941 ****
  
    initialize_original_copy_tables ();
  
!   ni_name = vect_build_loop_niters (loop_vinfo);
    niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
    
  
--- 1977,1983 ----
  
    initialize_original_copy_tables ();
  
!   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
    niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
    
  
*************** vect_do_peeling_for_alignment (loop_vec_
*** 1956,1962 ****
    new_loop =
      slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
  				   niters_of_prolog_loop, ni_name, true,
! 				   th, check_profitability);
  
    gcc_assert (new_loop);
  #ifdef ENABLE_CHECKING
--- 1998,2004 ----
    new_loop =
      slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
  				   niters_of_prolog_loop, ni_name, true,
! 				   th, check_profitability, NULL_TREE, NULL);
  
    gcc_assert (new_loop);
  #ifdef ENABLE_CHECKING
*************** vect_create_cond_for_alias_checks (loop_
*** 2273,2287 ****
    
     The test generated to check which version of loop is executed
     is modified to also check for profitability as indicated by the 
!    cost model initially.  */
  
  void
! vect_loop_versioning (loop_vec_info loop_vinfo)
  {
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    struct loop *nloop;
-   tree cond_expr = NULL_TREE;
-   gimple_seq cond_expr_stmt_list = NULL;
    basic_block condition_bb;
    gimple_stmt_iterator gsi, cond_exp_gsi;
    basic_block merge_bb;
--- 2315,2332 ----
    
     The test generated to check which version of loop is executed
     is modified to also check for profitability as indicated by the 
!    cost model initially.
! 
!    The versioning precondition(s) are placed in *COND_EXPR and
!    *COND_EXPR_STMT_LIST.  If DO_VERSIONING is true versioning is
!    also performed, otherwise only the conditions are generated.  */
  
  void
! vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
! 		      tree *cond_expr, gimple_seq *cond_expr_stmt_list)
  {
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    struct loop *nloop;
    basic_block condition_bb;
    gimple_stmt_iterator gsi, cond_exp_gsi;
    basic_block merge_bb;
*************** vect_loop_versioning (loop_vec_info loop
*** 2301,2329 ****
    th = conservative_cost_threshold (loop_vinfo,
  				    min_profitable_iters);
  
!   cond_expr =
      fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, 
   	         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
  
!   cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list,
! 				    false, NULL_TREE);
  
    if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
!       vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
! 					 &cond_expr_stmt_list);
  
    if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
!     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr, 
! 				       &cond_expr_stmt_list);
  
!   cond_expr =
!     fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
!   cond_expr =
!     force_gimple_operand (cond_expr, &gimplify_stmt_list, true, NULL_TREE);
!   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
  
    initialize_original_copy_tables ();
!   nloop = loop_version (loop, cond_expr, &condition_bb,
  			prob, prob, REG_BR_PROB_BASE - prob, true);
    free_original_copy_tables();
  
--- 2346,2379 ----
    th = conservative_cost_threshold (loop_vinfo,
  				    min_profitable_iters);
  
!   *cond_expr =
      fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, 
   	         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
  
!   *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
! 				     false, NULL_TREE);
  
    if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
!       vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
! 					 cond_expr_stmt_list);
  
    if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
!     vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
! 				       cond_expr_stmt_list);
  
!   *cond_expr =
!     fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
!   *cond_expr =
!     force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
!   gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
! 
!   /* If we only needed the extra conditions and a new loop copy
!      bail out here.  */
!   if (!do_versioning)
!     return;
  
    initialize_original_copy_tables ();
!   nloop = loop_version (loop, *cond_expr, &condition_bb,
  			prob, prob, REG_BR_PROB_BASE - prob, true);
    free_original_copy_tables();
  
*************** vect_loop_versioning (loop_vec_info loop
*** 2354,2363 ****
    /* End loop-exit-fixes after versioning.  */
  
    update_ssa (TODO_update_ssa);
!   if (cond_expr_stmt_list)
      {
        cond_exp_gsi = gsi_last_bb (condition_bb);
!       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, GSI_SAME_STMT);
      }
  }
  
--- 2404,2416 ----
    /* End loop-exit-fixes after versioning.  */
  
    update_ssa (TODO_update_ssa);
!   if (*cond_expr_stmt_list)
      {
        cond_exp_gsi = gsi_last_bb (condition_bb);
!       gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
! 			     GSI_SAME_STMT);
!       *cond_expr_stmt_list = NULL;
      }
+   *cond_expr = NULL_TREE;
  }
  
Index: trunk/gcc/tree-vectorizer.h
===================================================================
*** trunk.orig/gcc/tree-vectorizer.h	2009-03-31 16:26:46.000000000 +0200
--- trunk/gcc/tree-vectorizer.h	2009-03-31 16:29:10.000000000 +0200
*************** extern bitmap vect_memsyms_to_rename;
*** 706,713 ****
     in tree-vect-loop-manip.c.  */
  extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
  extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
! extern void vect_loop_versioning (loop_vec_info);
! extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *);
  extern void vect_do_peeling_for_alignment (loop_vec_info);
  extern LOC find_loop_location (struct loop *);
  extern bool vect_can_advance_ivs_p (loop_vec_info);
--- 706,714 ----
     in tree-vect-loop-manip.c.  */
  extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
  extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
! extern void vect_loop_versioning (loop_vec_info, bool, tree *, gimple_seq *);
! extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *,
! 					    tree, gimple_seq);
  extern void vect_do_peeling_for_alignment (loop_vec_info);
  extern LOC find_loop_location (struct loop *);
  extern bool vect_can_advance_ivs_p (loop_vec_info);
Index: trunk/gcc/tree-vect-loop.c
===================================================================
*** trunk.orig/gcc/tree-vect-loop.c	2009-03-31 16:26:46.000000000 +0200
--- trunk/gcc/tree-vect-loop.c	2009-03-31 16:29:10.000000000 +0200
*************** vect_transform_loop (loop_vec_info loop_
*** 3388,3410 ****
    bool strided_store;
    bool slp_scheduled = false;
    unsigned int nunits;
  
    if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "=== vec_transform_loop ===");
  
    if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
        || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
!     vect_loop_versioning (loop_vinfo);
  
    /* CHECKME: we wouldn't need this if we called update_ssa once
       for all loops.  */
    bitmap_zero (vect_memsyms_to_rename);
- 
-   /* Peel the loop if there are data refs with unknown alignment.
-      Only one data ref with unknown store is allowed.  */
- 
-   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
-     vect_do_peeling_for_alignment (loop_vinfo);
    
    /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
       compile time constant), or it is a constant that doesn't divide by the
--- 3388,3420 ----
    bool strided_store;
    bool slp_scheduled = false;
    unsigned int nunits;
+   tree cond_expr = NULL_TREE;
+   gimple_seq cond_expr_stmt_list = NULL;
+   bool do_peeling_for_loop_bound;
  
    if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "=== vec_transform_loop ===");
  
+   /* Peel the loop if there are data refs with unknown alignment.
+      Only one data ref with unknown store is allowed.  */
+ 
+   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+     vect_do_peeling_for_alignment (loop_vinfo);
+ 
+   do_peeling_for_loop_bound
+     = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ 	   && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
+ 
    if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
        || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
!     vect_loop_versioning (loop_vinfo,
! 			  !do_peeling_for_loop_bound,
! 			  &cond_expr, &cond_expr_stmt_list);
  
    /* CHECKME: we wouldn't need this if we called update_ssa once
       for all loops.  */
    bitmap_zero (vect_memsyms_to_rename);
    
    /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
       compile time constant), or it is a constant that doesn't divide by the
*************** vect_transform_loop (loop_vec_info loop_
*** 3414,3423 ****
       will remain scalar and will compute the remaining (n%VF) iterations.
       (VF is the vectorization factor).  */
  
!   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
!       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
!           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
!     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
    else
      ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
  		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
--- 3424,3432 ----
       will remain scalar and will compute the remaining (n%VF) iterations.
       (VF is the vectorization factor).  */
  
!   if (do_peeling_for_loop_bound)
!     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
! 				    cond_expr, cond_expr_stmt_list);
    else
      ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
  		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
Index: trunk/gcc/Makefile.in
===================================================================
*** trunk.orig/gcc/Makefile.in	2009-03-31 16:26:46.000000000 +0200
--- trunk/gcc/Makefile.in	2009-03-31 16:29:10.000000000 +0200
*************** tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SY
*** 2121,2127 ****
  tree-ssa-copy.o : tree-ssa-copy.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
     $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h $(DIAGNOSTIC_H) \
     $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
!    $(BASIC_BLOCK_H) tree-pass.h langhooks.h tree-ssa-propagate.h $(FLAGS_H)
  tree-ssa-propagate.o : tree-ssa-propagate.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h \
     $(DIAGNOSTIC_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
--- 2121,2128 ----
  tree-ssa-copy.o : tree-ssa-copy.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
     $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h $(DIAGNOSTIC_H) \
     $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
!    $(BASIC_BLOCK_H) tree-pass.h langhooks.h tree-ssa-propagate.h $(FLAGS_H) \
!    $(CFGLOOP_H)
  tree-ssa-propagate.o : tree-ssa-propagate.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h \
     $(DIAGNOSTIC_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
Index: trunk/gcc/tree-ssa-copy.c
===================================================================
*** trunk.orig/gcc/tree-ssa-copy.c	2009-03-31 16:26:46.000000000 +0200
--- trunk/gcc/tree-ssa-copy.c	2009-03-31 16:29:10.000000000 +0200
*************** along with GCC; see the file COPYING3.
*** 37,42 ****
--- 37,43 ----
  #include "tree-pass.h"
  #include "tree-ssa-propagate.h"
  #include "langhooks.h"
+ #include "cfgloop.h"
  
  /* This file implements the copy propagation pass and provides a
     handful of interfaces for performing const/copy propagation and
*************** init_copy_prop (void)
*** 992,998 ****
            tree def;
  
  	  def = gimple_phi_result (phi);
! 	  if (!is_gimple_reg (def))
              prop_set_simulate_again (phi, false);
  	  else
              prop_set_simulate_again (phi, true);
--- 993,1005 ----
            tree def;
  
  	  def = gimple_phi_result (phi);
! 	  if (!is_gimple_reg (def)
! 	      /* In loop-closed SSA form do not copy-propagate through
! 	         PHI nodes.  Technically this is only needed for loop
! 		 exit PHIs, but this is difficult to query.  */
! 	      || (current_loops
! 		  && gimple_phi_num_args (phi) == 1
! 		  && loops_state_satisfies_p (LOOP_CLOSED_SSA)))
              prop_set_simulate_again (phi, false);
  	  else
              prop_set_simulate_again (phi, true);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]