cvs diff: Diffing . Index: cfgloop.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/cfgloop.h,v retrieving revision 1.28 diff -c -3 -p -r1.28 cfgloop.h *** cfgloop.h 5 Sep 2004 09:25:24 -0000 1.28 --- cfgloop.h 11 Sep 2004 17:10:31 -0000 *************** extern unsigned expected_loop_iterations *** 304,309 **** --- 304,312 ---- /* Loop manipulation. */ extern bool can_duplicate_loop_p (struct loop *loop); + extern struct loop *duplicate_loop (struct loops *loops, + struct loop *loop, + struct loop *target); #define DLTHE_FLAG_UPDATE_FREQ 1 /* Update frequencies in duplicate_loop_to_header_edge. */ Index: cfgloopmanip.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/cfgloopmanip.c,v retrieving revision 1.29 diff -c -3 -p -r1.29 cfgloopmanip.c *** cfgloopmanip.c 7 Sep 2004 15:46:48 -0000 1.29 --- cfgloopmanip.c 11 Sep 2004 17:10:31 -0000 *************** Software Foundation, 59 Temple Place - S *** 29,36 **** #include "cfglayout.h" #include "output.h" - static struct loop * duplicate_loop (struct loops *, struct loop *, - struct loop *); static void duplicate_subloops (struct loops *, struct loop *, struct loop *); static void copy_loops_to (struct loops *, struct loop **, int, struct loop *); --- 29,34 ---- *************** place_new_loop (struct loops *loops, str *** 701,707 **** /* Copies copy of LOOP as subloop of TARGET loop, placing newly created loop into LOOPS structure. */ ! static struct loop * duplicate_loop (struct loops *loops, struct loop *loop, struct loop *target) { struct loop *cloop; --- 699,705 ---- /* Copies copy of LOOP as subloop of TARGET loop, placing newly created loop into LOOPS structure. */ ! struct loop * duplicate_loop (struct loops *loops, struct loop *loop, struct loop *target) { struct loop *cloop; Index: tree-vectorizer.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-vectorizer.c,v retrieving revision 2.8 diff -c -3 -p -r2.8 tree-vectorizer.c *** tree-vectorizer.c 10 Sep 2004 10:44:47 -0000 2.8 --- tree-vectorizer.c 11 Sep 2004 17:10:32 -0000 *************** Software Foundation, 59 Temple Place - S *** 140,145 **** --- 140,146 ---- #include "cfglayout.h" #include "expr.h" #include "optabs.h" + #include "toplev.h" #include "tree-chrec.h" #include "tree-data-ref.h" #include "tree-scalar-evolution.h" *************** static bool vect_analyze_operations (loo *** 159,165 **** /* Main code transformation functions. */ static void vect_transform_loop (loop_vec_info, struct loops *); ! static void vect_transform_loop_bound (loop_vec_info); static bool vect_transform_stmt (tree, block_stmt_iterator *); static bool vectorizable_load (tree, block_stmt_iterator *, tree *); static bool vectorizable_store (tree, block_stmt_iterator *, tree *); --- 160,166 ---- /* Main code transformation functions. */ static void vect_transform_loop (loop_vec_info, struct loops *); ! static void vect_transform_loop_bound (loop_vec_info, tree niters); static bool vect_transform_stmt (tree, block_stmt_iterator *); static bool vectorizable_load (tree, block_stmt_iterator *, tree *); static bool vectorizable_store (tree, block_stmt_iterator *, tree *); *************** static bool vectorizable_operation (tree *** 167,187 **** static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); static void vect_align_data_ref (tree); static void vect_enhance_data_refs_alignment (loop_vec_info); /* Utility functions for the analyses. */ static bool vect_is_simple_use (tree , struct loop *, tree *); static bool exist_non_indexing_operands_for_use_p (tree, tree); ! static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool); static void vect_mark_relevant (varray_type, tree); static bool vect_stmt_relevant_p (tree, loop_vec_info); ! static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *); static void vect_compute_data_ref_alignment (struct data_reference *, loop_vec_info); static bool vect_analyze_data_ref_access (struct data_reference *); static bool vect_get_first_index (tree, tree *); static bool vect_can_force_dr_alignment_p (tree, unsigned int); static tree vect_get_base_decl_and_bit_offset (tree, tree *); ! static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, bool); /* Utility functions for the code transformation. */ static tree vect_create_destination_var (tree, tree); --- 168,194 ---- static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); static void vect_align_data_ref (tree); static void vect_enhance_data_refs_alignment (loop_vec_info); + static void vect_transform_for_unknown_loop_bound + (loop_vec_info, tree *, struct loops *); /* Utility functions for the analyses. */ static bool vect_is_simple_use (tree , struct loop *, tree *); static bool exist_non_indexing_operands_for_use_p (tree, tree); ! static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, ! bool); static void vect_mark_relevant (varray_type, tree); static bool vect_stmt_relevant_p (tree, loop_vec_info); ! static tree vect_get_loop_niters (struct loop *, tree *); static void vect_compute_data_ref_alignment (struct data_reference *, loop_vec_info); static bool vect_analyze_data_ref_access (struct data_reference *); static bool vect_get_first_index (tree, tree *); static bool vect_can_force_dr_alignment_p (tree, unsigned int); static tree vect_get_base_decl_and_bit_offset (tree, tree *); ! static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, ! bool); ! static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters, ! struct loop *loop); /* Utility functions for the code transformation. */ static tree vect_create_destination_var (tree, tree); *************** static tree get_vectype_for_scalar_type *** 191,198 **** --- 198,238 ---- static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); static tree vect_get_vec_def_for_operand (tree, tree); static tree vect_init_vector (tree, tree); + static tree vect_build_symbol_bound (tree, int, struct loop *); static void vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); + static void vect_generate_tmps_on_preheader (loop_vec_info, + tree *, tree *, + tree *); + static void vect_update_ivs_after_vectorizer (struct loop *, tree); + + /* Loop transformations prior to vectorizeration. */ + + /* Loop transformations entry point function. + It can be used outside of the vectorizer + in case the loop to be manipulated answers conditions specified + in function documentation. */ + struct loop *tree_duplicate_loop_to_edge (struct loop *, + struct loops *, edge, + tree, tree, bool); + + static void allocate_new_names (bitmap); + static void rename_use_op (use_operand_p); + static void rename_def_op (def_operand_p, tree); + static void rename_variables_in_bb (basic_block); + static void free_new_names (bitmap); + static void rename_variables_in_loop (struct loop *); + static void copy_phi_nodes (struct loop *, struct loop *, bool); + static void update_phis_for_duplicate_loop (struct loop *, + struct loop *, + bool after); + static void update_phi_nodes_for_guard (edge, struct loop *); + static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree); + static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *, + struct loops *, + edge); + static edge add_loop_guard (basic_block, tree, basic_block); + static bool verify_loop_for_duplication (struct loop *, bool, edge); /* Utilities for creation and deletion of vec_info structs. */ loop_vec_info new_loop_vec_info (struct loop *loop); *************** stmt_vec_info new_stmt_vec_info (tree st *** 202,207 **** --- 242,989 ---- static bool vect_debug_stats (struct loop *loop); static bool vect_debug_details (struct loop *loop); + + /* This page contains code for general loop transfomations to + be applied to some loops before vectorizing it + to make them suitable for vectorization. */ + + /* For each definition in DEFINITIONS this function allocates + new ssa name. */ + static void + allocate_new_names (bitmap definitions) + { + unsigned ver; + + EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, + { + tree def = ssa_name (ver); + tree *new_name_ptr = xmalloc (sizeof (tree)); + + bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def); + + *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def)); + SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal; + + SSA_NAME_AUX (def) = new_name_ptr; + }); + } + + + /* Renames the use *OP_P. */ + static void + rename_use_op (use_operand_p op_p) + { + tree *new_name_ptr; + + if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME) + return; + + new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p)); + + /* Something defined outside of the loop. */ + if (!new_name_ptr) + return; + + /* An ordinary ssa name defined in the loop. */ + + SET_USE (op_p, *new_name_ptr); + } + + + /* Renames the def *OP_P in statement STMT. */ + static void + rename_def_op (def_operand_p op_p, tree stmt) + { + tree *new_name_ptr; + + if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME) + return; + + new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p)); + + /* Something defined outside of the loop. */ + if (!new_name_ptr) + return; + + /* An ordinary ssa name defined in the loop. */ + + SET_DEF (op_p, *new_name_ptr); + SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt; + } + + + /* Renames the variables in basic block BB. */ + static void + rename_variables_in_bb (basic_block bb) + { + tree phi; + block_stmt_iterator bsi; + tree stmt; + stmt_ann_t ann; + use_optype uses; + vuse_optype vuses; + def_optype defs; + v_may_def_optype v_may_defs; + v_must_def_optype v_must_defs; + unsigned i; + edge e; + + for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) + rename_def_op (PHI_RESULT_PTR (phi), phi); + + for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) + { + stmt = bsi_stmt (bsi); + get_stmt_operands (stmt); + ann = stmt_ann (stmt); + + uses = USE_OPS (ann); + for (i = 0; i < NUM_USES (uses); i++) + rename_use_op (USE_OP_PTR (uses, i)); + + defs = DEF_OPS (ann); + for (i = 0; i < NUM_DEFS (defs); i++) + rename_def_op (DEF_OP_PTR (defs, i), stmt); + + vuses = VUSE_OPS (ann); + for (i = 0; i < NUM_VUSES (vuses); i++) + rename_use_op (VUSE_OP_PTR (vuses, i)); + + v_may_defs = V_MAY_DEF_OPS (ann); + for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++) + { + rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i)); + rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt); + } + + v_must_defs = V_MUST_DEF_OPS (ann); + for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++) + rename_def_op (V_MUST_DEF_OP_PTR (v_must_defs, i), stmt); + } + + for (e = bb->succ; e; e = e->succ_next) + for (phi = phi_nodes (e->dest); phi; phi = TREE_CHAIN (phi)) + rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e)); + } + + + /* Releases the structures holding the new ssa names. */ + static void + free_new_names (bitmap definitions) + { + unsigned ver; + + EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, + { + tree def = ssa_name (ver); + + if (SSA_NAME_AUX (def)) + { + free (SSA_NAME_AUX (def)); + SSA_NAME_AUX (def) = NULL; + } + + }); + } + + + /* Renames variables in new generated LOOP. */ + static void + rename_variables_in_loop (struct loop *loop) + { + unsigned i; + basic_block *bbs; + + bbs = get_loop_body (loop); + + for (i = 0; i < loop->num_nodes; i++) + rename_variables_in_bb (bbs[i]); + + free (bbs); + } + + + /* This function copies phis from LOOP header to + NEW_LOOP header. AFTER is as + from update_phis_for_duplicate_loop function. */ + static void + copy_phi_nodes (struct loop *loop, struct loop *new_loop, + bool after) + { + tree phi, new_phi, def; + edge new_e; + edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop)); + + /* First copy the phi nodes. + In lno branch this code is a part of + tree_duplicate_bb function. */ + for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi)) + create_phi_node (PHI_RESULT (phi), new_loop->header); + + set_phi_nodes (new_loop->header, nreverse (phi_nodes (new_loop->header))); + + /* Second add arguments to newly created phi nodes. */ + for (phi = phi_nodes (loop->header), + new_phi = phi_nodes (new_loop->header); + phi; + phi = TREE_CHAIN (phi), + new_phi = TREE_CHAIN (new_phi)) + { + new_e = loop_preheader_edge (new_loop); + def = PHI_ARG_DEF_FROM_EDGE (phi, e); + add_phi_arg (&new_phi, def, new_e); + } + } + + + /* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP + executes after LOOP, and false if it executes before it. */ + static void + update_phis_for_duplicate_loop (struct loop *loop, + struct loop *new_loop, bool after) + { + edge old_latch; + tree *new_name_ptr, new_ssa_name; + tree phi_new, phi_old, def; + edge orig_entry_e = loop_preheader_edge (loop); + + /* Copy phis from loop->header to new_loop->header. */ + copy_phi_nodes (loop, new_loop, after); + + old_latch = loop_latch_edge (loop); + + /* Update PHI args for the new loop latch edge, and + the old loop preheader edge, we know that the PHI nodes + are ordered appropriately in copy_phi_nodes. */ + for (phi_new = phi_nodes (new_loop->header), + phi_old = phi_nodes (loop->header); + phi_new && phi_old; + phi_new = TREE_CHAIN (phi_new), phi_old = TREE_CHAIN (phi_old)) + { + def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch); + + if (TREE_CODE (def) != SSA_NAME) + continue; + + new_name_ptr = SSA_NAME_AUX (def); + + /* Something defined outside of the loop. */ + if (!new_name_ptr) + continue; + + /* An ordinary ssa name defined in the loop. */ + new_ssa_name = *new_name_ptr; + + add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop)); + + /* Update PHI args for the original loop pre-header edge. */ + if (! after) + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e), + new_ssa_name); + } + } + + /* Update PHI nodes for a guard of the LOOP. + + LOOP is supposed to have preheader bb at which guard condition is located. + The true edge of this condition skips the LOOP and ends + at destination of the (unique) LOOP exit. Loop exit bb is suppose to be + empty (created by this transformation) bb with one successor. + + This function creates phi nodes at LOOP exit bb. These phis need to be + created as a result of adding true edge coming from guard. + + FORNOW: Only phis which have corresponding phi nodes at the header of the + LOOP are created. Here we use the assumption that after the LOOP there + are no uses of defs generated in LOOP. + + After the phis creation, function updates values of phi nodes from + the LOOP exit successor bb. */ + + static void + update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop) + { + tree phi, phi1; + + for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi)) + { + tree new_phi; + tree phi_arg; + + /* Generate new phi node. */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), + loop->exit_edges[0]->dest); + + /* Add argument coming from guard true edge. */ + phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]); + add_phi_arg (&new_phi, phi_arg, guard_true_edge); + + /* Add argument coming from loop exit edge. */ + phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->latch->succ); + add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]); + + /* Update all phi nodes at the loop exit successor. */ + for (phi1 = phi_nodes (loop->exit_edges[0]->dest->succ->dest); phi1; + phi1 = TREE_CHAIN (phi1)) + { + tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, + loop->exit_edges[0]->dest->succ); + if (old_arg == phi_arg) + { + edge e = loop->exit_edges[0]->dest->succ; + + SET_PHI_ARG_DEF (phi1, + phi_arg_from_edge (phi1, e), + PHI_RESULT (new_phi)); + } + } + } + } + + /* Make the LOOP iterate NITERS times. It does so by adding a new IV + that starts at zero increas by one and its limit is niters. */ + static void + make_loop_iterate_ntimes (struct loop *loop, tree niters, + tree begin_label, tree exit_label) + { + tree indx_before_incr, indx_after_incr, cond_stmt, cond; + tree orig_cond = get_loop_exit_condition (loop); + edge exit_edge = loop->exit_edges[0]; + block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src); + + create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, + &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr); + + /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get + back to the exit condition statement. */ + bsi_next (&loop_exit_bsi); + gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond); + + + if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ + cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters); + else /* 'then' edge loops back. */ + cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters); + + begin_label = build1 (GOTO_EXPR, void_type_node, begin_label); + exit_label = build1 (GOTO_EXPR, void_type_node, exit_label); + cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond, + begin_label, exit_label); + bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT); + + /* Remove old loop exit test: */ + bsi_remove (&loop_exit_bsi); + + if (vect_debug_stats (loop) || vect_debug_details (loop)) + print_generic_expr (dump_file, cond_stmt, TDF_SLIM); + } + + + /* Given LOOP this function generates a new copy of it and put it on either + the entry if E is the entry edge or at its exit if E is the exit edge. */ + static struct loop * + tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, + edge e) + { + struct loop *new_loop; + basic_block *new_bbs, *bbs; + bool at_exit; + bool was_imm_dom; + basic_block exit_dest; + tree phi, phi_arg; + + at_exit = (e == loop->exit_edges[0]); + if (!at_exit && e != loop_preheader_edge (loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Edge is not an entry nor an exit edge.\n"); + return NULL; + } + + bbs = get_loop_body (loop); + + /* Check whether duplication is possible. */ + if (!can_copy_bbs_p (bbs, loop->num_nodes)) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "Cannot copy basic blocks.\n"); + free (bbs); + return NULL; + } + + /* Generate new loop structure. */ + new_loop = duplicate_loop (loops, loop, loop->outer); + if (!new_loop) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "The duplicate_loop returns NULL.\n"); + free (bbs); + return NULL; + } + + exit_dest = loop->exit_edges[0]->dest; + was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, + exit_dest) == loop->header ? + true : false); + + new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes); + + copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL); + + /* Duplicating phi args at exit bbs as coming + also from exit of duplicated loop. */ + for (phi = phi_nodes (exit_dest); phi; phi = TREE_CHAIN (phi)) + { + phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]); + if (phi_arg) + { + edge new_loop_exit_edge; + + if (new_loop->header->succ->dest == new_loop->latch) + new_loop_exit_edge = new_loop->header->succ->succ_next; + else + new_loop_exit_edge = new_loop->header->succ; + + add_phi_arg (&phi, phi_arg, new_loop_exit_edge); + } + } + + if (at_exit) /* Add the loop copy at exit. */ + { + redirect_edge_and_branch_force (e, new_loop->header); + set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src); + if (was_imm_dom) + set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header); + } + else /* Add the copy at entry. */ + { + basic_block new_bb, e_bb; + edge new_exit_e; + + /* This is the only way we can keep PHI node args of e in + loop->header. */ + new_bb = split_edge (e); + add_bb_to_loop (new_bb, loop->outer); + e_bb = new_bb->pred->src; + + redirect_edge_and_branch_force (new_bb->pred, new_loop->header); + set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e_bb); + + if (!flow_bb_inside_loop_p (new_loop, new_loop->header->succ->dest)) + new_exit_e = new_loop->header->succ; + else + new_exit_e = new_loop->header->succ->succ_next; + + redirect_edge_and_branch_force (new_exit_e, new_bb); + set_immediate_dominator (CDI_DOMINATORS, new_bb, + new_exit_e->src); + } + + flow_loop_scan (new_loop, LOOP_ALL); + flow_loop_scan (loop, LOOP_ALL); + free (new_bbs); + free (bbs); + + return new_loop; + } + + + /* Given the condition statement COND, locate it as last statement + of GUARD_BB; EXIT_BB is the basic block to skip the loop; + Assumes that this is the single exit of the guarded loop. + Returns the skip edge. */ + static edge + add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb) + { + block_stmt_iterator bsi; + edge new_e, enter_e; + tree cond_stmt, then_label, else_label; + + enter_e = guard_bb->succ; + enter_e->flags &= ~EDGE_FALLTHRU; + enter_e->flags |= EDGE_FALSE_VALUE; + bsi = bsi_last (guard_bb); + + then_label = build1 (GOTO_EXPR, void_type_node, + tree_block_label (exit_bb)); + else_label = build1 (GOTO_EXPR, void_type_node, + tree_block_label (enter_e->dest)); + cond_stmt = build (COND_EXPR, void_type_node, cond, + then_label, else_label); + bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); + /* Add new edge to connect entry block to the second loop. */ + new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); + set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb); + return new_e; + } + + /* This function verify that LOOP corresponds to + transformation conditions. */ + static bool + verify_loop_for_duplication (struct loop *loop, + bool update_first_loop_count, edge e) + { + edge exit_e = loop->exit_edges [0]; + edge entry_e = loop_preheader_edge (loop); + + /* We duplicate only innermost loops. */ + if (loop->inner) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "Loop duplication failed. Loop is not innermost.\n"); + return false; + } + + /* Only loops with 1 exit. */ + if (loop->num_exits != 1) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "More than one exit from loop.\n"); + return false; + } + + /* Only loops with 1 entry. */ + if (loop->num_entries != 1) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "More than one exit from loop.\n"); + return false; + } + + /* All loops has outers, the only case loop->outer is NULL is for + the function itself. */ + if (!loop->outer) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "Loop is outer-most loop.\n"); + return false; + } + + /* Verify that new IV can be created and loop condition + can be changed to make first loop iterate first_niters times. */ + if (!update_first_loop_count) + { + tree orig_cond = get_loop_exit_condition (loop); + block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src); + + if (!orig_cond) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "Loop has no exit condition.\n"); + return false; + } + if (orig_cond != bsi_stmt (loop_exit_bsi)) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "Loop exit condition is not loop header last stmt.\n"); + return false; + } + } + + /* Make sure E is either an entry or an exit edge. */ + if (e != exit_e && e != entry_e) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "E is not loop entry or exit edge.\n"); + return false; + } + + return true; + } + + /* Given LOOP this function duplicates it to the edge E. + + This function serves as a basic transformation to be done on LOOP prior + to vectorizing it. For now, there are two main cases when it's used + by vectorizer: to support loops with unknown loop bounds and to deal with + alignment unknown at compile time. In the first case, LOOP is duplicated to + the exit edge, producing epilog loop. In the second case, the LOOP is + duplicated to the preheader edge thus generating prolog loop. In both + cases, the original loop will be vectorized after the transformation. + + The edge E is supposed to be either preheader edge of the LOOP or + its exit edge. If preheader edge is specified, the LOOP copy + will precede the original one. Otherwise the copy will be located + at the exit of the LOOP. + + FIRST_NITERS (SSA_NAME) parameter specifies how many time to iterate + the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first + loop will be iterated FIRST_NITERS times by introducing additional + induction variable and replacing loop exit condition. If + UPDATE_FIRST_LOOP_COUNT is true no change to first loop is made and + the caller to tree_duplicate_loop_to_edge is responsible for updating + the first loop count. + + NITERS (also SSA_NAME) parameter defines the number of iteration the + original loop has been iterated. The function generates two if-then guards: + one prior to the first loop and the other prior to the second loop. + The first guard will be: + + if (FIRST_NITERS == 0) then skip the first loop + + The second guard will be: + + if (FIRST_NITERS == NITERS) then skip the second loop + + Thus the equivalence to the original code is guaranteed by correct values + of NITERS and FIRST_NITERS and generation of if-then loop guards. + + For now this function supports only types of loops that can be + vectorizered after the transformation. Such types are the following: + + (1) only innermost loops + (2) loops built from 2 basic blocks + (3) loops with one entry and one exit + (4) loops without function calls + (5) loops without defs that are used after the loop + + (1), (3) are checked in this function; (2) - in function + vect_analyze_loop_form; (4) - in function vect_analyze_data_refs; + (5) is checked as part of the function vect_mark_stmts_to_be_vectorized, + when excluding induction/reduction support. + + The function returns NULL in case one of these checks or + transformations failed. */ + + struct loop* + tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops, + edge e, tree first_niters, + tree niters, bool update_first_loop_count) + { + struct loop *new_loop = NULL, *first_loop, *second_loop; + edge skip_e; + tree pre_condition; + bitmap definitions; + basic_block first_exit_bb, second_exit_bb; + basic_block pre_header_bb; + edge exit_e = loop->exit_edges [0]; + + gcc_assert (!any_marked_for_rewrite_p ()); + + if (!verify_loop_for_duplication (loop, update_first_loop_count, e)) + return NULL; + + /* We have to initialize cfg_hooks. Then, when calling + cfg_hooks->split_edge, the function tree_split_edge + is actually called and, when calling cfg_hooks->duplicate_block, + the function tree_duplicate_bb is called. */ + tree_register_cfg_hooks (); + + /* 1. Generate a copy of LOOP and put it on E (entry or exit). */ + if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e))) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "The tree_duplicate_loop_to_edge_cfg failed.\n"); + return NULL; + } + + definitions = marked_ssa_names (); + allocate_new_names (definitions); + update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); + /* Here, using assumption (5), we do not propagate new names futher + than on phis of the exit from the second loop. */ + rename_variables_in_loop (new_loop); + free_new_names (definitions); + + if (e == exit_e) + { + first_loop = loop; + second_loop = new_loop; + } + else + { + first_loop = new_loop; + second_loop = loop; + } + + /* 2. Generate bb between the loops. */ + first_exit_bb = split_edge (first_loop->exit_edges[0]); + add_bb_to_loop (first_exit_bb, first_loop->outer); + + /* We need to update here first loop exit edge + and second loop preheader edge. */ + flow_loop_scan (first_loop, LOOP_ALL); + flow_loop_scan (second_loop, LOOP_ALL); + + + /* 3. Make first loop iterate FIRST_NITERS times, if needed. */ + if (!update_first_loop_count) + { + tree first_loop_latch_lbl = tree_block_label (first_loop->latch); + tree first_loop_exit_lbl = tree_block_label (first_exit_bb); + + make_loop_iterate_ntimes (first_loop, first_niters, + first_loop_latch_lbl, + first_loop_exit_lbl); + } + + /* 4. Add the guard before first loop: + + if FIRST_NITERS == 0 + skip first loop + else + enter first loop */ + + /* 4a. Generate bb before first loop. */ + pre_header_bb = split_edge (loop_preheader_edge (first_loop)); + add_bb_to_loop (pre_header_bb, first_loop->outer); + + /* First loop preheader edge is changed. */ + flow_loop_scan (first_loop, LOOP_ALL); + + /* 4b. Generate guard condition. */ + pre_condition = build (EQ_EXPR, boolean_type_node, + first_niters, integer_zero_node); + + /* 4c. Add condition at the end of preheader bb. */ + skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb); + + /* 4d. Updtae phis at first loop exit and propagate changes + to the phis of second loop. */ + update_phi_nodes_for_guard (skip_e, first_loop); + + /* 5. Add the guard before second loop: + + if FIRST_NITERS == NITERS SKIP + skip second loop + else + enter second loop */ + + /* 5a. Generate empty bb at the exit from the second loop. */ + second_exit_bb = split_edge (second_loop->exit_edges[0]); + add_bb_to_loop (second_exit_bb, second_loop->outer); + + /* Second loop preheader edge is changed. */ + flow_loop_scan (second_loop, LOOP_ALL); + + /* 5b. Generate guard condition. */ + pre_condition = build (EQ_EXPR, boolean_type_node, + first_niters, niters); + + /* 5c. Add condition at the end of preheader bb. */ + skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb); + update_phi_nodes_for_guard (skip_e, second_loop); + + BITMAP_XFREE (definitions); + unmark_all_for_rewrite (); + + return new_loop; + } + + + + /* Here the proper Vectorizer starts. */ /* Function new_stmt_vec_info. *************** new_loop_vec_info (struct loop *loop) *** 261,267 **** LOOP_VINFO_LOOP (res) = loop; LOOP_VINFO_BBS (res) = bbs; LOOP_VINFO_EXIT_COND (res) = NULL; ! LOOP_VINFO_NITERS (res) = -1; LOOP_VINFO_VECTORIZABLE_P (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0; VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, --- 1043,1049 ---- LOOP_VINFO_LOOP (res) = loop; LOOP_VINFO_BBS (res) = bbs; LOOP_VINFO_EXIT_COND (res) = NULL; ! LOOP_VINFO_NITERS (res) = NULL; LOOP_VINFO_VECTORIZABLE_P (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0; VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, *************** vect_can_force_dr_alignment_p (tree decl *** 485,491 **** provided. */ static tree ! vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) { const char *prefix; int prefix_len; --- 1267,1274 ---- provided. */ static tree ! vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, ! const char *name) { const char *prefix; int prefix_len; *************** vect_transform_stmt (tree stmt, block_st *** 1359,1370 **** } /* Function vect_transform_loop_bound. Create a new exit condition for the loop. */ static void ! vect_transform_loop_bound (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); edge exit_edge = loop->single_exit; --- 2142,2273 ---- } + /* This function generate the following statements: + + ni_name = number of iterations loop executes + ratio = ni_name / vf + ratio_mult_vf_name = ratio * vf + + and locate them at the loop preheader edge. */ + + static void + vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p, + tree *ratio_mult_vf_name_p, tree *ratio_p) + { + + edge pe; + basic_block new_bb; + tree stmt, var, ni, ni_name; + tree ratio; + tree ratio_mult_vf_name, ratio_mult_vf; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + + int vf, i; + + /* Generate temporary variable that contains + number of iterations loop executes. */ + + ni = LOOP_VINFO_NITERS(loop_vinfo); + var = create_tmp_var (TREE_TYPE (ni), "niters"); + add_referenced_tmp_var (var); + if (TREE_CODE (ni) == INTEGER_CST) + { + /* This case is generated when treating a known loop bound + indivisible by VF. Here we cannot use force_gimple_operand. */ + stmt = build (MODIFY_EXPR, void_type_node, var, ni); + ni_name = make_ssa_name (var, stmt); + TREE_OPERAND (stmt, 0) = ni_name; + } + else + ni_name = force_gimple_operand (ni, &stmt, false, var); + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, stmt); + if (new_bb) + add_bb_to_loop (new_bb, new_bb->pred->src->loop_father); + + /* ratio = ni / vf. + vf is power of 2; then if ratio = = n >> log2 (vf). */ + vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + ratio = vect_build_symbol_bound (ni_name, vf, loop); + + /* Update initial conditions of loop copy. */ + + /* ratio_mult_vf = ratio * vf; + then if ratio_mult_vf = ratio << log2 (vf). */ + + i = exact_log2 (vf); + ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); + add_referenced_tmp_var (ratio_mult_vf); + + ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE); + + stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name, + build2 (LSHIFT_EXPR, TREE_TYPE (ratio), + ratio, build_int_cst (unsigned_type_node, + i))); + + SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt; + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, stmt); + if (new_bb) + add_bb_to_loop (new_bb, new_bb->pred->src->loop_father); + + *ni_name_p = ni_name; + *ratio_mult_vf_name_p = ratio_mult_vf_name; + *ratio_p = ratio; + + return; + } + + /* This function generates stmt + + tmp = n / vf; + + and attaches it to preheader of LOOP. */ + + static tree + vect_build_symbol_bound (tree n, int vf, struct loop * loop) + { + tree var, stmt, var_name; + edge pe; + basic_block new_bb; + int i; + + /* create temporary variable */ + var = create_tmp_var (TREE_TYPE (n), "bnd"); + add_referenced_tmp_var (var); + + var_name = make_ssa_name (var, NULL_TREE); + + /* vf is power of 2; then n/vf = n >> log2 (vf). */ + + i = exact_log2 (vf); + stmt = build2 (MODIFY_EXPR, void_type_node, var_name, + build2 (RSHIFT_EXPR, TREE_TYPE (n), + n, build_int_cst (unsigned_type_node,i))); + + SSA_NAME_DEF_STMT (var_name) = stmt; + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, stmt); + if (new_bb) + add_bb_to_loop (new_bb, new_bb->pred->src->loop_father); + else + if (vect_debug_details (NULL)) + fprintf (dump_file, "New bb on preheader edge was not generated."); + + return var_name; + } + + /* Function vect_transform_loop_bound. Create a new exit condition for the loop. */ static void ! vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); edge exit_edge = loop->single_exit; *************** vect_transform_loop_bound (loop_vec_info *** 1375,1393 **** int vf; tree cond_stmt; tree new_loop_bound; tree cond; tree lb_type; ! gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)); ! old_N = LOOP_VINFO_NITERS (loop_vinfo); ! vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); ! /* FORNOW: ! assuming number-of-iterations divides by the vectorization factor. */ ! gcc_assert (!(old_N % vf)); orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo); gcc_assert (orig_cond_expr); gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi)); create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, --- 2278,2298 ---- int vf; tree cond_stmt; tree new_loop_bound; + bool symbol_niters; tree cond; tree lb_type; ! symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo); ! if (!symbol_niters) ! old_N = LOOP_VINFO_INT_NITERS (loop_vinfo); ! ! vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo); + #ifdef ENABLE_CHECKING gcc_assert (orig_cond_expr); + #endif gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi)); create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, *************** vect_transform_loop_bound (loop_vec_info *** 1400,1411 **** /* new loop exit test: */ lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1)); ! new_loop_bound = build_int_cst (lb_type, old_N/vf); if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ ! cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound); else /* 'then' edge loops back. */ ! cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound); cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond, TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2)); --- 2305,2323 ---- /* new loop exit test: */ lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1)); ! if(!symbol_niters) ! new_loop_bound = fold_convert (lb_type, ! build_int_cst (unsigned_type_node, ! old_N/vf)); ! else ! new_loop_bound = niters; if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ ! cond = build2 (GE_EXPR, boolean_type_node, ! indx_after_incr, new_loop_bound); else /* 'then' edge loops back. */ ! cond = build2 (LT_EXPR, boolean_type_node, ! indx_after_incr, new_loop_bound); cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond, TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2)); *************** vect_transform_loop_bound (loop_vec_info *** 1420,1425 **** --- 2332,2497 ---- } + /* Advance IVs of the loop (to be vectorized later) to correct position. + + When loop is vectorized, its IVs are not always advanced + correctly since vectorization changes the loop count. It's ok + in case epilog loop was not produced after original one before + vectorization process (the vectorizer checks that there is no uses + of IVs after the loop). However, in case the epilog loop was peeled, + IVs from original loop are used in epilog loop and should be + advanced correctly. + + Here we use access functions of IVs and number of + iteration loop executes in order to bring IVs to correct position. + + Function also update phis of basic block at the exit + from the loop. */ + + static void + vect_update_ivs_after_vectorizer (struct loop *loop, tree niters) + { + edge exit = loop->exit_edges[0]; + tree phi; + edge latch = loop_latch_edge (loop); + + /* Generate basic block at the exit from the loop. */ + basic_block new_bb = split_edge (exit); + add_bb_to_loop (new_bb, loop->outer); + + loop->exit_edges[0] = new_bb->pred; + + for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi)) + { + tree access_fn = NULL; + tree evolution_part; + tree init_expr; + tree step_expr; + tree var, stmt, ni, ni_name; + int i, j, num_elem1, num_elem2; + tree phi1; + block_stmt_iterator last_bsi; + + + + /* Skip virtual phi's. The data dependences that are associated with + virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ + + if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) + { + if (vect_debug_details (NULL)) + fprintf (dump_file, "virtual phi. skip."); + continue; + } + + access_fn = instantiate_parameters + (loop, + analyze_scalar_evolution (loop, PHI_RESULT (phi))); + + evolution_part = evolution_part_in_loop_num (access_fn, loop->num); + + /* FORNOW: We do not transform initial conditions of IVs + which evolution functions are a polynomial of degree >= 2 or + exponential. */ + + step_expr = evolution_part; + init_expr = initial_condition (access_fn); + + ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr), + build2 (MULT_EXPR, TREE_TYPE (niters), + niters, step_expr), init_expr); + + var = create_tmp_var (TREE_TYPE (init_expr), "tmp"); + add_referenced_tmp_var (var); + + ni_name = force_gimple_operand (ni, &stmt, false, var); + + /* Insert stmt into new_bb. */ + last_bsi = bsi_last (new_bb); + bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT); + + /* Fix phi expressions in duplicated loop. */ + num_elem1 = PHI_NUM_ARGS (phi); + for (i = 0; i < num_elem1; i++) + if (PHI_ARG_EDGE (phi, i) == latch) + { + tree def = PHI_ARG_DEF (phi, i); + + for (phi1 = phi_nodes (new_bb->succ->dest); phi1; + phi1 = TREE_CHAIN (phi1)) + { + num_elem2 = PHI_NUM_ARGS (phi1); + for (j = 0; j < num_elem2; j++) + if (PHI_ARG_DEF (phi1, j) == def) + { + SET_PHI_ARG_DEF (phi1, j, ni_name); + PHI_ARG_EDGE (phi1, j) = new_bb->succ; + break; + } + } + break; + } + } + + } + + + /* This function is the main driver of tranformation + to be done for loop before vectorizing it in case of + unknown loop bound. */ + static void + vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio, + struct loops *loops) + { + + tree ni_name, ratio_mult_vf_name; + #ifdef ENABLE_CHECKING + int loop_num; + #endif + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *new_loop; + + if (vect_debug_details (NULL)) + fprintf (dump_file, "\n<>\n"); + + /* Generate the following variables on the preheader of original loop: + + ni_name = number of iteration the original loop executes + ratio = ni_name / vf + ration_mult_vf_name = ration * vf */ + vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, + &ratio_mult_vf_name, ratio); + + /* Update loop info. */ + loop->pre_header = loop_preheader_edge (loop)->src; + loop->pre_header_edges[0] = loop_preheader_edge (loop); + + /* Call duplicate_loop_body_on_entry_exit to have the vectorized + loop executes RATION_MULT_VF times and the rest in a new duplication + after that - so we duplicate on exit edge. */ + #ifdef ENABLE_CHECKING + loop_num = loop->num; + #endif + new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0], + ratio_mult_vf_name, ni_name, true); + #ifdef ENABLE_CHECKING + gcc_assert (new_loop); + gcc_assert (loop_num == loop->num); + #endif + + /* Update IVs of original loop as if they were advanced + by ratio_mult_vf_name steps. */ + + #ifdef ENABLE_CHECKING + /* Check existence of intermediate bb. */ + gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header); + #endif + vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name); + + return; + } + + /* Function vect_transform_loop. The analysis phase has determined that the loop is vectorizable. *************** vect_transform_loop (loop_vec_info loop_ *** 1435,1440 **** --- 2507,2513 ---- int nbbs = loop->num_nodes; block_stmt_iterator si; int i; + tree ratio = NULL; #ifdef ENABLE_CHECKING int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); #endif *************** vect_transform_loop (loop_vec_info loop_ *** 1442,1447 **** --- 2515,2546 ---- if (vect_debug_details (NULL)) fprintf (dump_file, "\n<>\n"); + /* If the loop has a symbolic number of iterations 'n' + (i.e. it's not a compile time constant), + then an epilog loop needs to be created. We therefore duplicate + the initial loop. The original loop will be vectorized, and will compute + the first (n/VF) iterations. The second copy of the loop will remain + serial and will compute the remaining (n%VF) iterations. + (VF is the vectorization factor). */ + + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + { + vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops); + } + + /* FORNOW: we'll treat the case where niters is constant and + + niters % vf != 0 + + in the way similar to one with symbolic niters. + For this we'll generate variable which value is equal to niters. */ + + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)) + { + vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops); + } + /* 1) Make sure the loop header has exactly two entries 2) Make sure we have a preheader basic block. */ *************** vect_transform_loop (loop_vec_info loop_ *** 1475,1481 **** --- 2574,2582 ---- print_generic_expr (dump_file, stmt, TDF_SLIM); } stmt_info = vinfo_for_stmt (stmt); + #ifdef ENABLE_CHECKING gcc_assert (stmt_info); + #endif if (!STMT_VINFO_RELEVANT_P (stmt_info)) { bsi_next (&si); *************** vect_transform_loop (loop_vec_info loop_ *** 1507,1513 **** } /* stmts in BB */ } /* BBs in loop */ ! vect_transform_loop_bound (loop_vinfo); if (vect_debug_details (loop)) fprintf (dump_file,"Success! loop vectorized."); --- 2607,2613 ---- } /* stmts in BB */ } /* BBs in loop */ ! vect_transform_loop_bound (loop_vinfo, ratio); if (vect_debug_details (loop)) fprintf (dump_file,"Success! loop vectorized."); *************** vect_analyze_operations (loop_vec_info l *** 1727,1754 **** } LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; ! /* FORNOW: handle only cases where the loop bound divides by the ! vectorization factor. */ ! ! if (vect_debug_details (NULL)) fprintf (dump_file, "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, ! vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo)); ! if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) { ! if (vect_debug_stats (loop) || vect_debug_details (loop)) ! fprintf (dump_file, "not vectorized: Unknown loop bound."); ! return false; ! } ! ! if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ! && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0) ! { ! if (vect_debug_stats (loop) || vect_debug_details (loop)) ! fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.", ! vectorization_factor); ! return false; } return true; --- 2827,2851 ---- } LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; ! ! if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ! && vect_debug_details (NULL)) fprintf (dump_file, "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, ! vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo)); ! if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ! && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) { ! /* In this case we have to generate epilog loop, that ! can be done only for loops with one entry edge. */ ! if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1 ! || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header)) ! { ! if (vect_debug_stats (loop) || vect_debug_details (loop)) ! fprintf (dump_file, "not vectorized: More than one loop preheader."); ! return false; ! } } return true; *************** vect_mark_stmts_to_be_vectorized (loop_v *** 3092,3103 **** } /* Function vect_get_loop_niters. Determine how many iterations the loop is executed. */ static tree ! vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations) { tree niters; --- 4189,4296 ---- } + /* Function vect_analyze_loop_with_symbolic_num_of_iters. + + In case the number of iterations that LOOP iterates in unknown at compile + time, an epilog loop will be generated, and the loop induction variables + (IVs) will be "advanced" to the value they are supposed to take just before + the epilog loop. Here we check that the access function of the loop IVs + and the expression that represents the loop bound are simple enough. + These restrictions will be relxed in the future. */ + + static bool + vect_analyze_loop_with_symbolic_num_of_iters (tree niters, + struct loop *loop) + { + basic_block bb = loop->header; + tree phi; + + if (vect_debug_details (NULL)) + fprintf (dump_file, + "\n<>\n"); + + if (chrec_contains_undetermined (niters)) + { + if (vect_debug_details (NULL)) + fprintf (dump_file, "Infinite number of iterations."); + return false; + } + + if (!niters) + { + if (vect_debug_details (NULL)) + fprintf (dump_file, "niters is NULL pointer."); + return false; + } + + if (vect_debug_details (NULL)) + { + fprintf (dump_file, "Symbolic number of iterations is "); + print_generic_expr (dump_file, niters, TDF_DETAILS); + } + + /* Analyze phi functions of the loop header. */ + + for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) + { + tree access_fn = NULL; + tree evolution_part; + + if (vect_debug_details (NULL)) + { + fprintf (dump_file, "Analyze phi: "); + print_generic_expr (dump_file, phi, TDF_SLIM); + } + + /* Skip virtual phi's. The data dependences that are associated with + virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ + + if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) + { + if (vect_debug_details (NULL)) + fprintf (dump_file, "virtual phi. skip."); + continue; + } + + /* Analyze the evolution function. */ + + access_fn = instantiate_parameters + (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); + + if (!access_fn) + { + if (vect_debug_details (NULL)) + fprintf (dump_file, "No Access function."); + return false; + } + + if (vect_debug_details (NULL)) + { + fprintf (dump_file, "Access function of PHI: "); + print_generic_expr (dump_file, access_fn, TDF_SLIM); + } + + evolution_part = evolution_part_in_loop_num (access_fn, loop->num); + + if (evolution_part == NULL_TREE) + return false; + + /* FORNOW: We do not transform initial conditions of IVs + which evolution functions are a polynomial of degree >= 2. */ + + if (tree_is_chrec (evolution_part)) + return false; + } + + return true; + } + /* Function vect_get_loop_niters. Determine how many iterations the loop is executed. */ static tree ! vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) { tree niters; *************** vect_get_loop_niters (struct loop *loop, *** 3107,3120 **** niters = number_of_iterations_in_loop (loop); if (niters != NULL_TREE ! && niters != chrec_dont_know ! && host_integerp (niters,0)) { ! *number_of_iterations = TREE_INT_CST_LOW (niters); if (vect_debug_details (NULL)) ! fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC, ! *number_of_iterations); } return get_loop_exit_condition (loop); --- 4300,4314 ---- niters = number_of_iterations_in_loop (loop); if (niters != NULL_TREE ! && niters != chrec_dont_know) { ! *number_of_iterations = niters; if (vect_debug_details (NULL)) ! { ! fprintf (dump_file, "==> get_loop_niters:" ); ! print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM); ! } } return get_loop_exit_condition (loop); *************** vect_analyze_loop_form (struct loop *loo *** 3136,3142 **** { loop_vec_info loop_vinfo; tree loop_cond; ! HOST_WIDE_INT number_of_iterations = -1; if (vect_debug_details (loop)) fprintf (dump_file, "\n<>\n"); --- 4330,4336 ---- { loop_vec_info loop_vinfo; tree loop_cond; ! tree number_of_iterations = NULL; if (vect_debug_details (loop)) fprintf (dump_file, "\n<>\n"); *************** vect_analyze_loop_form (struct loop *loo *** 3184,3207 **** fprintf (dump_file, "not vectorized: complicated exit condition."); return NULL; } ! ! if (number_of_iterations < 0) { if (vect_debug_stats (loop) || vect_debug_details (loop)) ! fprintf (dump_file, "not vectorized: unknown loop bound."); return NULL; } ! if (number_of_iterations == 0) /* CHECKME: can this happen? */ { if (vect_debug_stats (loop) || vect_debug_details (loop)) fprintf (dump_file, "not vectorized: number of iterations = 0."); return NULL; } - loop_vinfo = new_loop_vec_info (loop); LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; - LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; return loop_vinfo; } --- 4378,4429 ---- fprintf (dump_file, "not vectorized: complicated exit condition."); return NULL; } ! ! if (!number_of_iterations) { if (vect_debug_stats (loop) || vect_debug_details (loop)) ! fprintf (dump_file, ! "not vectorized: number of iterations cannot be specified."); return NULL; } ! loop_vinfo = new_loop_vec_info (loop); ! LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; ! if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) ! { ! if (vect_debug_stats (loop) || vect_debug_details (loop)) ! fprintf (dump_file, "loop bound unknown."); ! ! /* Unknown loop bound. */ ! if (!vect_analyze_loop_with_symbolic_num_of_iters ! (number_of_iterations, loop)) ! { ! if (vect_debug_stats (loop) || vect_debug_details (loop)) ! fprintf (dump_file, ! "not vectorized: can't determine loop bound."); ! return NULL; ! } ! else ! { ! /* We need only one loop entry for unknown loop bound support. */ ! if (loop->num_entries != 1 || !loop->pre_header) ! { ! if (vect_debug_stats (loop) || vect_debug_details (loop)) ! fprintf (dump_file, ! "not vectorized: more than one loop entry."); ! return NULL; ! } ! } ! } ! else ! if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0) { if (vect_debug_stats (loop) || vect_debug_details (loop)) fprintf (dump_file, "not vectorized: number of iterations = 0."); return NULL; } LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; return loop_vinfo; } Index: tree-vectorizer.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-vectorizer.h,v retrieving revision 2.3 diff -c -3 -p -r2.3 tree-vectorizer.h *** tree-vectorizer.h 10 Sep 2004 10:44:48 -0000 2.3 --- tree-vectorizer.h 11 Sep 2004 17:10:32 -0000 *************** typedef struct _loop_vec_info { *** 138,145 **** /* The loop exit_condition. */ tree exit_cond; ! /* Number of iterations. -1 if unknown. */ ! HOST_WIDE_INT num_iters; /* Is the loop vectorizable? */ bool vectorizable; --- 138,145 ---- /* The loop exit_condition. */ tree exit_cond; ! /* Number of iterations. */ ! tree num_iters; /* Is the loop vectorizable? */ bool vectorizable; *************** typedef struct _loop_vec_info { *** 163,170 **** #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor #define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes #define LOOP_VINFO_DATAREF_READS(L) (L)->data_ref_reads ! #define LOOP_VINFO_NITERS_KNOWN_P(L) ((L)->num_iters > 0) /*-----------------------------------------------------------------*/ /* Function prototypes. */ --- 163,174 ---- #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor #define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes #define LOOP_VINFO_DATAREF_READS(L) (L)->data_ref_reads + #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) + ! #define LOOP_VINFO_NITERS_KNOWN_P(L) \ ! (host_integerp ((L)->num_iters,0) \ ! && TREE_INT_CST_LOW ((L)->num_iters) > 0) /*-----------------------------------------------------------------*/ /* Function prototypes. */ Index: testsuite/gcc.dg/vect/vect-30.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-30.c,v retrieving revision 1.1 diff -c -3 -p -r1.1 vect-30.c *** testsuite/gcc.dg/vect/vect-30.c 17 Aug 2004 16:17:14 -0000 1.1 --- testsuite/gcc.dg/vect/vect-30.c 11 Sep 2004 17:10:36 -0000 *************** int main (void) *** 62,65 **** return 0; } ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */ --- 62,65 ---- return 0; } ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ Index: testsuite/gcc.dg/vect/vect-46.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-46.c,v retrieving revision 1.1 diff -c -3 -p -r1.1 vect-46.c *** testsuite/gcc.dg/vect/vect-46.c 17 Aug 2004 16:17:14 -0000 1.1 --- testsuite/gcc.dg/vect/vect-46.c 11 Sep 2004 17:10:36 -0000 *************** int main (void) *** 53,56 **** return 0; } ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ --- 53,56 ---- return 0; } ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ Index: testsuite/gcc.dg/vect/vect-8.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-8.c,v retrieving revision 1.1 diff -c -3 -p -r1.1 vect-8.c *** testsuite/gcc.dg/vect/vect-8.c 17 Aug 2004 16:17:14 -0000 1.1 --- testsuite/gcc.dg/vect/vect-8.c 11 Sep 2004 17:10:36 -0000 *************** int main (void) *** 37,40 **** return main1 (N); } ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ --- 37,40 ---- return main1 (N); } ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ Index: testsuite/gcc.dg/vect/vect-none.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-none.c,v retrieving revision 1.1 diff -c -3 -p -r1.1 vect-none.c *** testsuite/gcc.dg/vect/vect-none.c 17 Aug 2004 16:17:14 -0000 1.1 --- testsuite/gcc.dg/vect/vect-none.c 11 Sep 2004 17:10:36 -0000 *************** foo (int n) *** 189,193 **** } /* { dg-final { scan-tree-dump-times "vectorized " 3 "vect"} } */ ! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 3 "vect"} } */ ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ --- 189,193 ---- } /* { dg-final { scan-tree-dump-times "vectorized " 3 "vect"} } */ ! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect"} } */ ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */