diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c old mode 100644 new mode 100755 index 1f8ef03..f213506 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -120,6 +120,9 @@ along with GCC; see the file COPYING3. If not see /* List of basic blocks in if-conversion-suitable order. */ static basic_block *ifc_bbs; +/* Copy of 'force_vectorize' field of loop. */ +static bool flag_force_vectorize; + /* Structure used to predicate basic blocks. This is attached to the ->aux field of the BBs in the loop to be if-converted. */ typedef struct bb_predicate_s { @@ -149,6 +152,16 @@ bb_predicate (basic_block bb) return ((bb_predicate_p) bb->aux)->predicate; } +/* Returns predicate for critical edge E. */ + +static inline tree +edge_predicate (edge e) +{ + gcc_assert (EDGE_COUNT (e->dest->preds) >= 2); + gcc_assert (e->aux != NULL); + return (tree) e->aux; +} + /* Sets the gimplified predicate COND for basic block BB. */ static inline void @@ -160,6 +173,16 @@ set_bb_predicate (basic_block bb, tree cond) ((bb_predicate_p) bb->aux)->predicate = cond; } +/* Sets predicate COND for critical edge E. */ + +static inline void +set_edge_predicate (edge e, tree cond) +{ + gcc_assert (EDGE_COUNT (e->dest->preds) >= 2); + gcc_assert (cond != NULL_TREE); + e->aux = cond; +} + /* Returns the sequence of statements of the gimplification of the predicate for basic block BB. */ @@ -396,25 +419,51 @@ fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs) } /* Add condition NC to the predicate list of basic block BB. LOOP is - the loop to be if-converted. */ + the loop to be if-converted. Use predicate of cd-equivalent block + if it exists for join bb. */ static inline void add_to_predicate_list (struct loop *loop, basic_block bb, tree nc) { tree bc, *tp; + basic_block dom_bb; + static basic_block join_bb = NULL; if (is_true_predicate (nc)) return; - if (!is_predicated (bb)) + /* If dominance tells us this basic block is always executed, + don't record any predicates for it. */ + if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) + return; + + /* If predicate has been already set up for given bb using cd-equivalent + block predicate, simply escape. Post-dominator tree was built under + flag_force_vectorize only. */ + if (flag_force_vectorize) { - /* If dominance tells us this basic block is always executed, don't - record any predicates for it. */ - if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) + if (join_bb == bb) return; + dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb); + /* We use notion of cd equivalence to get simplier predicate for + join block, e.g. if join block has 2 predecessors with predicates + p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of + p1 & p2 | p1 & !p2. */ + if (dom_bb != loop->header + && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb) + { + gcc_assert (flow_bb_inside_loop_p (loop, dom_bb)); + bc = bb_predicate (dom_bb); + gcc_assert (!is_true_predicate (bc)); + set_bb_predicate (bb, bc); - bc = nc; + /* Save bb in join_bb to not handle it once more. */ + join_bb = bb; + return; + } } + if (!is_predicated (bb)) + bc = nc; else { bc = bb_predicate (bb); @@ -455,10 +504,15 @@ add_to_dst_predicate_list (struct loop *loop, edge e, cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, prev_cond, cond); - add_to_predicate_list (loop, e->dest, cond); + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest)) + add_to_predicate_list (loop, e->dest, cond); + + /* If edge E is critical save predicate on it. */ + if (EDGE_COUNT (e->dest->preds) >= 2) + set_edge_predicate (e, cond); } -/* Return true if one of the successor edges of BB exits LOOP. */ +/* Returns true if one of the successor edges of BB exits LOOP. */ static bool bb_with_exit_edge_p (struct loop *loop, basic_block bb) @@ -482,7 +536,9 @@ bb_with_exit_edge_p (struct loop *loop, basic_block bb) When the flag_tree_loop_if_convert_stores is not set, PHI is not if-convertible if: - a virtual PHI is immediately used in another PHI node, - - there is a virtual PHI in a BB other than the loop->header. */ + - there is a virtual PHI in a BB other than the loop->header. + When the flag_force_vectorize is set, PHI can have more than + two arguments. */ static bool if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi, @@ -494,11 +550,18 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi, print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); } - if (bb != loop->header && gimple_phi_num_args (phi) != 2) + if (bb != loop->header) { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "More than two phi node args.\n"); - return false; + if (gimple_phi_num_args (phi) != 2) + { + if (!flag_force_vectorize) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "More than two phi node args.\n"); + return false; + } + + } } if (flag_tree_loop_if_convert_stores || any_mask_load_store) @@ -728,7 +791,7 @@ ifcvt_can_use_mask_load_store (gimple stmt) basic_block bb = gimple_bb (stmt); bool is_load; - if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize) + if (!(flag_tree_loop_vectorize || flag_force_vectorize) || bb->loop_father->dont_vectorize || !gimple_assign_single_p (stmt) || gimple_has_volatile_ops (stmt)) @@ -865,7 +928,8 @@ if_convertible_gimple_assign_stmt_p (gimple stmt, A statement is if-convertible if: - it is an if-convertible GIMPLE_ASSIGN, - - it is a GIMPLE_LABEL or a GIMPLE_COND. */ + - it is a GIMPLE_LABEL or a GIMPLE_COND, + - it is builtins call. */ static bool if_convertible_stmt_p (gimple stmt, vec refs, @@ -912,6 +976,22 @@ if_convertible_stmt_p (gimple stmt, vec refs, return true; } +/* Assumes that BB has more than 2 predecessors. + Returns false if at least one successor is not on critical edge + and true otherwise. */ + +static inline bool +all_edges_are_critical (basic_block bb) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->preds) + if (EDGE_COUNT (e->src->succs) == 1) + return false; + return true; +} + /* Return true when BB is if-convertible. This routine does not check basic block's statements and phis. @@ -920,6 +1000,8 @@ if_convertible_stmt_p (gimple stmt, vec refs, - it is after the exit block but before the latch, - its edges are not normal. + Last restriction is not applicable for loops marked with simd pragma. + EXIT_BB is the basic block containing the exit of the LOOP. BB is inside LOOP. */ @@ -932,9 +1014,13 @@ if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb) if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "----------[%d]-------------\n", bb->index); - if (EDGE_COUNT (bb->preds) > 2 - || EDGE_COUNT (bb->succs) > 2) + if (EDGE_COUNT (bb->succs) > 2) return false; + if (EDGE_COUNT (bb->preds) > 2) + { + if (!flag_force_vectorize) + return false; + } if (exit_bb) { @@ -971,18 +1057,17 @@ if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb) /* At least one incoming edge has to be non-critical as otherwise edge predicates are not equal to basic-block predicates of the edge - source. */ + source. This restriction is not valid for loops marked with + simd pragma. */ if (EDGE_COUNT (bb->preds) > 1 && bb != loop->header) { - bool found = false; - FOR_EACH_EDGE (e, ei, bb->preds) - if (EDGE_COUNT (e->src->succs) == 1) - found = true; - if (!found) + if (!flag_force_vectorize && all_edges_are_critical (bb)) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "only critical predecessors\n"); + fprintf (dump_file, "only critical predecessors in bb#%d\n", + bb->index); + return false; } } @@ -1064,6 +1149,7 @@ get_loop_body_in_if_conv_order (const struct loop *loop) return blocks; } + /* Returns true when the analysis of the predicates for all the basic blocks in LOOP succeeded. @@ -1096,9 +1182,10 @@ predicate_bbs (loop_p loop) tree cond; gimple stmt; - /* The loop latch is always executed and has no extra conditions - to be processed: skip it. */ - if (bb == loop->latch) + /* The loop latch and loop exit block are always executed and + have no extra conditions to be processed: skip them. */ + if (bb == loop->latch + || bb_with_exit_edge_p (loop, bb)) { reset_bb_predicate (loop->latch); continue; @@ -1108,27 +1195,41 @@ predicate_bbs (loop_p loop) stmt = last_stmt (bb); if (stmt && gimple_code (stmt) == GIMPLE_COND) { - tree c2; + tree c, c2; edge true_edge, false_edge; location_t loc = gimple_location (stmt); - tree c = fold_build2_loc (loc, gimple_cond_code (stmt), - boolean_type_node, - gimple_cond_lhs (stmt), - gimple_cond_rhs (stmt)); - - /* Add new condition into destination's predicate list. */ - extract_true_false_edges_from_block (gimple_bb (stmt), - &true_edge, &false_edge); + tree lopnd = gimple_cond_lhs (stmt); + enum tree_code code = gimple_cond_code (stmt); + + /* Compute predicates for true and false edges. */ + c = fold_build2_loc (loc, code, + boolean_type_node, + lopnd, + gimple_cond_rhs (stmt)); + /* Fold_build2 can produce bool conversion which is not + supported by vectorizer, so re-build it without folding. + For example, such conversion is generated for sequence: + _Bool _7, _8, _9; + _7 = _6 != 13; _8 = _6 != 0; _9 = _8 & _9; + if (_9 != 0) --> (bool)_9. */ + + if (CONVERT_EXPR_P (c) + && TREE_CODE_CLASS (code) == tcc_comparison) + c = build2_loc (loc, code, boolean_type_node, + lopnd, gimple_cond_rhs (stmt)); + c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node, + unshare_expr (c)); + extract_true_false_edges_from_block (bb, &true_edge, &false_edge); + if (flag_force_vectorize) + true_edge->aux = false_edge->aux = NULL; /* If C is true, then TRUE_EDGE is taken. */ add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond), unshare_expr (c)); /* If C is false, then FALSE_EDGE is taken. */ - c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node, - unshare_expr (c)); - add_to_dst_predicate_list (loop, false_edge, - unshare_expr (cond), c2); + add_to_dst_predicate_list (loop, false_edge, unshare_expr (cond), + unshare_expr (c2)); cond = NULL_TREE; } @@ -1176,6 +1277,8 @@ if_convertible_loop_p_1 (struct loop *loop, return false; calculate_dominance_info (CDI_DOMINATORS); + if (flag_force_vectorize) + calculate_dominance_info (CDI_POST_DOMINATORS); /* Allow statements that can be handled during if-conversion. */ ifc_bbs = get_loop_body_in_if_conv_order (loop); @@ -1337,7 +1440,9 @@ if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store) replacement. Return the true block whose phi arguments are selected when cond is true. LOOP is the loop containing the if-converted region, GSI is the place to insert the code for the - if-conversion. */ + if-conversion. + Returns NULL if given phi node must be handled by means of extended + phi node predication. */ static basic_block find_phi_replacement_condition (basic_block bb, tree *cond, @@ -1346,7 +1451,13 @@ find_phi_replacement_condition (basic_block bb, tree *cond, edge first_edge, second_edge; tree tmp_cond; - gcc_assert (EDGE_COUNT (bb->preds) == 2); + if (EDGE_COUNT (bb->preds) != 2 + || all_edges_are_critical (bb)) + { + gcc_assert (flag_force_vectorize); + return NULL; + } + first_edge = EDGE_PRED (bb, 0); second_edge = EDGE_PRED (bb, 1); @@ -1624,6 +1735,237 @@ predicate_scalar_phi (gimple phi, tree cond, } } +/* Returns predicate of edge associated with argument of phi node. */ + +static tree +get_predicate_for_edge (edge e) +{ + tree c; + basic_block b = e->src; + + if (EDGE_COUNT (b->succs) == 1 || EDGE_COUNT (e->dest->preds) == 1) + /* Edge E is not critical, use predicate of edge source bb. */ + c = bb_predicate (b); + else + /* Edge E is critical and its aux field contains predicate. */ + c = edge_predicate (e); + return c; +} + +/* This is enhancement for predication of a phi node with arbitrary + number of arguments, i.e. for + x = phi (x_1, x_2, ..., x_k) + a chain of recurrent cond expressions will be produced. + For example, + bb_0 + if (_5 != 0) goto bb_1 else goto bb_2 + end_bb_0 + + bb_1 + res_2 = some computations; + goto bb_5 + end_bb_1 + + bb_2 + if (_9 != 0) goto bb_3 else goto bb_4 + end_bb_2 + + bb_3 + res_3 = ...; + goto bb_5 + end_bb_3 + + bb4 + res_4 = ...; + end_bb_4 + + bb_5 + # res_1 = PHI + + will be if-converted into chain of unconditional assignments: + _ifc__42 = ? res_3 : res_4; + res_1 = _5 != 0 ? res_2 : _ifc__42; + + where is predicate of . + + All created intermediate statements are inserted at GSI point. */ + +static void +predicate_arbitrary_scalar_phi (gimple phi, gimple_stmt_iterator *gsi, + bool before) +{ + int i; + int num = (int) gimple_phi_num_args (phi); + tree last = gimple_phi_arg_def (phi, num - 1); + tree type = TREE_TYPE (gimple_phi_result (phi)); + tree curr; + gimple stmt; + tree lhs; + tree rhs; + tree res; + tree cond; + bool swap = false; + + res = gimple_phi_result (phi); + if (virtual_operand_p (res)) + return; + + for (i = num - 2; i > 0; i--) + { + curr = gimple_phi_arg_def (phi, i); + lhs = make_temp_ssa_name (type, NULL, "_ifc_"); + cond = get_predicate_for_edge (gimple_phi_arg_edge (phi, i)); + swap = false; + if (TREE_CODE (cond) == TRUTH_NOT_EXPR) + { + cond = TREE_OPERAND (cond, 0); + swap = true; + } + /* Gimplify the condition to a valid cond-expr conditonal operand. */ + if (before) + cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), + is_gimple_condexpr, NULL_TREE, + true, GSI_SAME_STMT); + else + cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), + is_gimple_condexpr, NULL_TREE, + false, GSI_CONTINUE_LINKING); + + stmt = gimple_build_assign_with_ops (COND_EXPR, lhs, + unshare_expr (cond), + swap? last : curr, + swap? curr : last); + + if (before) + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + else + gsi_insert_after (gsi, stmt, GSI_NEW_STMT); + update_stmt (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Create new assign stmt for phi arg#%d\n", i); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + } + last = lhs; + } + curr = gimple_phi_arg_def (phi, 0); + cond = get_predicate_for_edge (gimple_phi_arg_edge (phi, 0)); + swap = false; + if (TREE_CODE (cond) == TRUTH_NOT_EXPR) + { + cond = TREE_OPERAND (cond, 0); + swap = true; + } + if (before) + cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), + is_gimple_condexpr, NULL_TREE, true, + GSI_SAME_STMT); + else + cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), + is_gimple_condexpr, NULL_TREE, false, + GSI_CONTINUE_LINKING); + rhs = fold_build_cond_expr (type, + unshare_expr (cond), + swap? last : curr, + swap? curr : last); + stmt = gimple_build_assign (res, rhs); + if (before) + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + else + gsi_insert_after (gsi, stmt, GSI_NEW_STMT); + update_stmt (stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "new phi replacement stmt\n"); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + } +} + +/* Returns gimple statement iterator to insert code for predicated phi. */ + +static gimple_stmt_iterator +find_insertion_point (basic_block bb, bool* before) +{ + edge e; + edge_iterator ei; + tree cond; + gimple last = NULL; + gimple curr; + int num_opnd; + tree opnd1, opnd2; + + /* Found last statement in bb after which code for predicated phi can be + inserted using edge predicates. */ + FOR_EACH_EDGE (e, ei, bb->preds) + { + cond = get_predicate_for_edge (e); + if (TREE_CODE (cond) == SSA_NAME) + { + opnd1 = cond; + opnd2 = NULL_TREE; + } + else if (TREE_CONSTANT (cond)) + continue; + else if ((num_opnd = TREE_OPERAND_LENGTH (cond)) == 2) + { + opnd1 = TREE_OPERAND (cond, 0); + opnd2 = TREE_OPERAND (cond, 1); + } + else + { + gcc_assert (num_opnd == 1); + opnd1 = TREE_OPERAND (cond, 0); + opnd2 = NULL_TREE; + } + /* Process each operand of cond to determine the latest defenition. */ + while (true) + { + if (TREE_CODE (opnd1) == SSA_NAME) + { + curr = SSA_NAME_DEF_STMT (opnd1); + /* Skip defenition in other bb's. */ + if (gimple_bb (curr) == bb) + { + if (last == NULL) + last = curr; + else + { + /* Determine what stmt is latest in bb. */ + gimple_stmt_iterator gsi; + gimple stmt; + for (gsi = gsi_last_bb (bb); + !gsi_end_p (gsi); + gsi_prev (&gsi)) + if ((stmt = gsi_stmt (gsi)) == last) + break; + else if (stmt == curr) + { + last = curr; + break; + } + } + } + } + if (opnd2 != NULL_TREE) + { + opnd1 = opnd2; + opnd2 = NULL_TREE; + } + else + break; + } + } + + if (last == NULL) + { + *before = true; + return gsi_after_labels (bb); + } + *before = false; + return gsi_for_stmt (last); +} + /* Replaces in LOOP all the scalar phi nodes other than those in the LOOP->header block with conditional modify expressions. */ @@ -1633,6 +1975,7 @@ predicate_all_scalar_phis (struct loop *loop) basic_block bb; unsigned int orig_loop_num_nodes = loop->num_nodes; unsigned int i; + bool before = false; for (i = 1; i < orig_loop_num_nodes; i++) { @@ -1653,11 +1996,17 @@ predicate_all_scalar_phis (struct loop *loop) appropriate condition for the PHI node replacement. */ gsi = gsi_after_labels (bb); true_bb = find_phi_replacement_condition (bb, &cond, &gsi); + if (!true_bb) + /* Will use extended predication, find out insertion point. */ + gsi = find_insertion_point (bb, &before); while (!gsi_end_p (phi_gsi)) { phi = gsi_stmt (phi_gsi); - predicate_scalar_phi (phi, cond, true_bb, &gsi); + if (true_bb) + predicate_scalar_phi (phi, cond, true_bb, &gsi); + else + predicate_arbitrary_scalar_phi (phi, &gsi, before); release_phi_node (phi); gsi_next (&phi_gsi); } @@ -1673,13 +2022,12 @@ static void insert_gimplified_predicates (loop_p loop, bool any_mask_load_store) { unsigned int i; - for (i = 0; i < loop->num_nodes; i++) { basic_block bb = ifc_bbs[i]; gimple_seq stmts; - if (!is_predicated (bb)) + if (!is_predicated (bb) && bb_predicate_gimplified_stmts (bb) == NULL) { /* Do not insert statements for a basic block that is not predicated. Also make sure that the predicate of the @@ -1692,7 +2040,8 @@ insert_gimplified_predicates (loop_p loop, bool any_mask_load_store) if (stmts) { if (flag_tree_loop_if_convert_stores - || any_mask_load_store) + || any_mask_load_store + || flag_force_vectorize) { /* Insert the predicate of the BB just after the label, as the if-conversion of memory writes will use this @@ -1849,7 +2198,6 @@ predicate_mem_writes (loop_p loop) swap = true; cond = TREE_OPERAND (cond, 0); } - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) if (!gimple_assign_single_p (stmt = gsi_stmt (gsi))) continue; @@ -2102,6 +2450,7 @@ version_loop_for_if_conversion (struct loop *loop) return true; } + /* If-convert LOOP when it is legal. For the moment this pass has no profitability analysis. Returns non-zero todo flags when something changed. */ @@ -2113,6 +2462,15 @@ tree_if_conversion (struct loop *loop) ifc_bbs = NULL; bool any_mask_load_store = false; + flag_force_vectorize = loop->force_vectorize; + /* Check either outer loop was marked with simd pragma. */ + if (!flag_force_vectorize) + { + struct loop *outer_loop = loop_outer (loop); + if (outer_loop && outer_loop->force_vectorize) + flag_force_vectorize = true; + } + if (!if_convertible_loop_p (loop, &any_mask_load_store) || !dbg_cnt (if_conversion_tree)) goto cleanup; @@ -2122,7 +2480,9 @@ tree_if_conversion (struct loop *loop) || loop->dont_vectorize)) goto cleanup; - if (any_mask_load_store && !version_loop_for_if_conversion (loop)) + if ((any_mask_load_store + || (loop->force_vectorize && flag_tree_loop_if_convert != 1)) + && !version_loop_for_if_conversion (loop)) goto cleanup; /* Now all statements are if-convertible. Combine all the basic @@ -2143,7 +2503,15 @@ tree_if_conversion (struct loop *loop) unsigned int i; for (i = 0; i < loop->num_nodes; i++) - free_bb_predicate (ifc_bbs[i]); + { + basic_block bb = ifc_bbs[i]; + free_bb_predicate (bb); + if (EDGE_COUNT (bb->succs) == 2) + { + EDGE_SUCC (bb, 0)->aux = NULL; + EDGE_SUCC (bb, 1)->aux = NULL; + } + } free (ifc_bbs); ifc_bbs = NULL;