Index: doc/tm.texi =================================================================== --- doc/tm.texi (revision 161484) +++ doc/tm.texi (working copy) @@ -5750,6 +5750,14 @@ the elements in the vectors should be of parameter is true if the memory access is defined in a packed struct. @end deftypefn +@deftypefn {Target Hook} bool TARGET_VECTORIZE_BUILTIN_VEC_CMP (unsigned int @var{code}, const_tree @var{type}, tree *@var{result_type}) +Target builtin that implements vector element-wise comparison. +The value of @var{code} is one of the enumerators in @code{enum tree_code} and +specifies comparison operation, @var{type} specifies the type of input vectors. +The function returns the type of the comparison result in @var{result_type}. +@end deftypefn + + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses Index: target.h =================================================================== --- target.h (revision 161484) +++ target.h (working copy) @@ -545,6 +545,8 @@ struct gcc_target is true if the access is defined in a packed struct. */ bool (* builtin_support_vector_misalignment) (enum machine_mode, const_tree, int, bool); + /* Target builtin that implements vector element-wise comparison. */ + tree (* builtin_vect_compare) (unsigned int, tree, tree *); } vectorize; /* The initial value of target_flags. */ Index: tree-vectorizer.h =================================================================== --- tree-vectorizer.h (revision 161484) +++ tree-vectorizer.h (working copy) @@ -389,6 +389,17 @@ enum slp_vect_type { hybrid }; +/* Compound pattern is a pattern consisting more than one statement that need + to be vectorized. Currenty min/max location pattern is the only supported + compound pattern. It has two statements: the first statement calculates the + minimum (marked MINMAX_STMT) and the second one calculates the location + (marked MINMAX_LOC_STMT). */ +enum vect_compound_pattern { + not_in_pattern = 0, + minmax_stmt, + minmax_loc_stmt +}; + typedef struct data_reference *dr_p; DEF_VEC_P(dr_p); @@ -405,6 +416,10 @@ typedef struct _stmt_vec_info { /* Stmt is part of some pattern (computation idiom) */ bool in_pattern_p; + /* Statement is a part of a compound pattern, i.e., a pattern consisting + more than one statement. */ + enum vect_compound_pattern compound_pattern; + /* For loads only, if there is a store with the same location, this field is TRUE. */ bool read_write_dep; @@ -491,6 +506,10 @@ typedef struct _stmt_vec_info { /* The bb_vec_info with respect to which STMT is vectorized. */ bb_vec_info bb_vinfo; + /* The scalar result of vectorized reduction computation generated in + reduction epilogue. */ + gimple reduc_scalar_result_stmt; + /* Is this statement vectorizable or should it be skipped in (partial) vectorization. */ bool vectorizable; @@ -515,6 +534,7 @@ typedef struct _stmt_vec_info { #define STMT_VINFO_DR_ALIGNED_TO(S) (S)->dr_aligned_to #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p +#define STMT_VINFO_COMPOUND_PATTERN(S) (S)->compound_pattern #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs #define STMT_VINFO_DEF_TYPE(S) (S)->def_type @@ -526,6 +546,7 @@ typedef struct _stmt_vec_info { #define STMT_VINFO_DR_GROUP_SAME_DR_STMT(S)(S)->same_dr_stmt #define STMT_VINFO_DR_GROUP_READ_WRITE_DEPENDENCE(S) (S)->read_write_dep #define STMT_VINFO_STRIDED_ACCESS(S) ((S)->first_dr != NULL) +#define STMT_VINFO_REDUC_SCALAR_RES_STMT(S) (S)->reduc_scalar_result_stmt #define DR_GROUP_FIRST_DR(S) (S)->first_dr #define DR_GROUP_NEXT_DR(S) (S)->next_dr @@ -620,7 +641,8 @@ is_pattern_stmt_p (stmt_vec_info stmt_in related_stmt = STMT_VINFO_RELATED_STMT (stmt_info); if (related_stmt && (related_stmt_info = vinfo_for_stmt (related_stmt)) - && STMT_VINFO_IN_PATTERN_P (related_stmt_info)) + && (STMT_VINFO_IN_PATTERN_P (related_stmt_info) + || STMT_VINFO_COMPOUND_PATTERN (related_stmt_info))) return true; return false; @@ -741,8 +763,10 @@ extern bool vect_transform_stmt (gimple, bool *, slp_tree, slp_instance); extern void vect_remove_stores (gimple); extern bool vect_analyze_stmt (gimple, bool *, slp_tree); -extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, - tree, int); +extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, + tree, int, tree, int); +extern gimple vectorize_minmax_location_pattern (gimple, gimple_stmt_iterator*, + enum tree_code, tree, tree, tree, tree); /* In tree-vect-data-refs.c. */ extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); @@ -818,8 +842,11 @@ extern void vect_slp_transform_bb (basic Additional pattern recognition functions can (and will) be added in the future. */ typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); -#define NUM_PATTERNS 4 +typedef bool (* vect_recog_compound_func_ptr) (unsigned int, va_list); +#define NUM_PATTERNS 4 +#define NUM_COMPOUND_PATTERNS 1 void vect_pattern_recog (loop_vec_info); +void vect_compound_pattern_recog (unsigned int, ...); /* In tree-vectorizer.c. */ unsigned vectorize_loops (void); Index: tree-vect-loop.c =================================================================== --- tree-vect-loop.c (revision 161484) +++ tree-vect-loop.c (working copy) @@ -295,7 +295,8 @@ vect_determine_vectorization_factor (loo else { gcc_assert (!STMT_VINFO_DATA_REF (stmt_info) - && !is_pattern_stmt_p (stmt_info)); + && (!is_pattern_stmt_p (stmt_info) + || STMT_VINFO_COMPOUND_PATTERN (stmt_info))); scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); if (vect_print_dump_info (REPORT_DETAILS)) @@ -444,10 +445,15 @@ static void vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) { basic_block bb = loop->header; - tree dumy; + tree dummy; VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64); gimple_stmt_iterator gsi; - bool double_reduc; + bool double_reduc, found, minmax_loc = false; + gimple first_cond_stmt = NULL, second_cond_stmt = NULL; + gimple first_phi = NULL, second_phi = NULL, phi, use_stmt; + int i; + imm_use_iterator imm_iter; + use_operand_p use_p; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_analyze_scalar_cycles ==="); @@ -484,7 +490,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_i } if (!access_fn - || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy)) + || !vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, + &dummy)) { VEC_safe_push (gimple, heap, worklist, phi); continue; @@ -495,8 +502,56 @@ vect_analyze_scalar_cycles_1 (loop_vec_i STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def; } + /* Detect compound reduction patterns (before reduction detection): + we currently support only min/max location pattern, so we look for two + reduction condition statements. */ + for (i = 0; VEC_iterate (gimple, worklist, i, phi); i++) + { + tree def = PHI_RESULT (phi); - /* Second - identify all reductions and nested cycles. */ + found = false; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) + { + use_stmt = USE_STMT (use_p); + if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)) + && vinfo_for_stmt (use_stmt) + && is_gimple_assign (use_stmt) + && gimple_assign_rhs_code (use_stmt) == COND_EXPR) + { + found = true; + break; + } + } + + if (!found) + continue; + + if (!first_cond_stmt) + { + first_cond_stmt = use_stmt; + first_phi = phi; + } + else + { + if (second_cond_stmt) + { + /* This one is the third reduction condition statement in the + loop. This is too confusing, we bail out. */ + minmax_loc = false; + break; + } + + second_cond_stmt = use_stmt; + second_phi = phi; + minmax_loc = true; + } + } + + if (minmax_loc) + vect_compound_pattern_recog (4, first_phi, first_cond_stmt, + second_phi, second_cond_stmt); + + /* Identify all reductions and nested cycles. */ while (VEC_length (gimple, worklist) > 0) { gimple phi = VEC_pop (gimple, worklist); @@ -595,11 +650,9 @@ vect_analyze_scalar_cycles (loop_vec_inf /* When vectorizing an outer-loop, the inner-loop is executed sequentially. Reductions in such inner-loop therefore have different properties than the reductions in the nest that gets vectorized: - 1. When vectorized, they are executed in the same order as in the original - scalar loop, so we can't change the order of computation when - vectorizing them. - 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the - current checks are too strict. */ + when vectorized, they are executed in the same order as in the original + scalar loop, so we can't change the order of computation when + vectorizing them. */ if (loop->inner) vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); @@ -819,7 +872,15 @@ destroy_loop_vec_info (loop_vec_info loo if (orig_stmt_info && STMT_VINFO_IN_PATTERN_P (orig_stmt_info)) remove_stmt_p = true; - } + + /* We are removing statement inserted by the pattern + detection pass. Update the original statement to be the + def stmt of the statement's LHS. */ + if (remove_stmt_p && is_gimple_assign (orig_stmt) + && TREE_CODE (gimple_assign_lhs (orig_stmt)) == SSA_NAME) + SSA_NAME_DEF_STMT (gimple_assign_lhs (orig_stmt)) + = orig_stmt; + } /* Free stmt_vec_info. */ free_stmt_vec_info (stmt); @@ -1662,13 +1723,16 @@ vect_is_simple_reduction_1 (loop_vec_inf gimple use_stmt = USE_STMT (use_p); if (is_gimple_debug (use_stmt)) continue; + if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)) && vinfo_for_stmt (use_stmt) - && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt))) + && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)) + && !STMT_VINFO_COMPOUND_PATTERN (vinfo_for_stmt (use_stmt))) nloop_uses++; + if (nloop_uses > 1) { - if (vect_print_dump_info (REPORT_DETAILS)) + if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "reduction used in loop."); return NULL; } @@ -1716,10 +1780,12 @@ vect_is_simple_reduction_1 (loop_vec_inf gimple use_stmt = USE_STMT (use_p); if (is_gimple_debug (use_stmt)) continue; + if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)) && vinfo_for_stmt (use_stmt) && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt))) nloop_uses++; + if (nloop_uses > 1) { if (vect_print_dump_info (REPORT_DETAILS)) @@ -1769,6 +1835,9 @@ vect_is_simple_reduction_1 (loop_vec_inf code = PLUS_EXPR; if (check_reduction + && (!vinfo_for_stmt (def_stmt) + || STMT_VINFO_COMPOUND_PATTERN (vinfo_for_stmt (def_stmt)) + != minmax_loc_stmt) && (!commutative_tree_code (code) || !associative_tree_code (code))) { if (vect_print_dump_info (REPORT_DETAILS)) @@ -1819,14 +1888,16 @@ vect_is_simple_reduction_1 (loop_vec_inf } type = TREE_TYPE (gimple_assign_lhs (def_stmt)); - if ((TREE_CODE (op1) == SSA_NAME - && !types_compatible_p (type,TREE_TYPE (op1))) - || (TREE_CODE (op2) == SSA_NAME - && !types_compatible_p (type, TREE_TYPE (op2))) - || (op3 && TREE_CODE (op3) == SSA_NAME - && !types_compatible_p (type, TREE_TYPE (op3))) - || (op4 && TREE_CODE (op4) == SSA_NAME - && !types_compatible_p (type, TREE_TYPE (op4)))) + if (STMT_VINFO_COMPOUND_PATTERN (vinfo_for_stmt (def_stmt)) + != minmax_loc_stmt + && ((TREE_CODE (op1) == SSA_NAME + && !types_compatible_p (type, TREE_TYPE (op1))) + || (TREE_CODE (op2) == SSA_NAME + && !types_compatible_p (type, TREE_TYPE (op2))) + || (op3 && TREE_CODE (op3) == SSA_NAME + && !types_compatible_p (type, TREE_TYPE (op3))) + || (op4 && TREE_CODE (op4) == SSA_NAME + && !types_compatible_p (type, TREE_TYPE (op4))))) { if (vect_print_dump_info (REPORT_DETAILS)) { @@ -1834,17 +1905,17 @@ vect_is_simple_reduction_1 (loop_vec_inf print_generic_expr (vect_dump, type, TDF_SLIM); fprintf (vect_dump, ", operands types: "); print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM); - fprintf (vect_dump, ","); + fprintf (vect_dump, ", "); print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM); if (op3) { - fprintf (vect_dump, ","); + fprintf (vect_dump, ", "); print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM); } if (op4) { - fprintf (vect_dump, ","); + fprintf (vect_dump, ", "); print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM); } } @@ -1952,7 +2023,7 @@ vect_is_simple_reduction_1 (loop_vec_inf == vect_internal_def && !is_loop_header_bb_p (gimple_bb (def2))))))) { - if (check_reduction) + if (check_reduction && code != COND_EXPR) { /* Swap operands (just for simplicity - so that the rest of the code can assume that the reduction variable is always the last (second) @@ -2354,7 +2425,6 @@ vect_model_reduction_cost (stmt_vec_info loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - /* Cost of reduction op inside loop. */ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * vect_get_cost (vector_stmt); @@ -2391,11 +2461,15 @@ vect_model_reduction_cost (stmt_vec_info mode = TYPE_MODE (vectype); orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); - if (!orig_stmt) + if (!orig_stmt || STMT_VINFO_COMPOUND_PATTERN (stmt_info)) orig_stmt = STMT_VINFO_STMT (stmt_info); code = gimple_assign_rhs_code (orig_stmt); + if (STMT_VINFO_COMPOUND_PATTERN (stmt_info) == minmax_loc_stmt) + STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) + += ncopies * 5 * vect_get_cost (vector_stmt); + /* Add in cost for initial definition. */ outer_cost += vect_get_cost (scalar_to_vec); @@ -2411,28 +2485,35 @@ vect_model_reduction_cost (stmt_vec_info + vect_get_cost (vec_to_scalar); else { - int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); - tree bitsize = - TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt))); - int element_bitsize = tree_low_cst (bitsize, 1); - int nelements = vec_size_in_bits / element_bitsize; - - optab = optab_for_tree_code (code, vectype, optab_default); - - /* We have a whole vector shift available. */ - if (VECTOR_MODE_P (mode) - && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing - && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing) - /* Final reduction via vector shifts and the reduction operator. Also - requires scalar extract. */ - outer_cost += ((exact_log2(nelements) * 2) - * vect_get_cost (vector_stmt) - + vect_get_cost (vec_to_scalar)); - else - /* Use extracts and reduction op for final reduction. For N elements, - we have N extracts and N-1 reduction ops. */ - outer_cost += ((nelements + nelements - 1) - * vect_get_cost (vector_stmt)); + if (STMT_VINFO_COMPOUND_PATTERN (stmt_info)) + outer_cost += 6 * vect_get_cost (vector_stmt) + + vect_get_cost (vec_to_scalar); + else + { + int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); + tree bitsize = + TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt))); + int element_bitsize = tree_low_cst (bitsize, 1); + int nelements = vec_size_in_bits / element_bitsize; + + optab = optab_for_tree_code (code, vectype, optab_default); + + /* We have a whole vector shift available. */ + if (VECTOR_MODE_P (mode) + && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing + && optab_handler (vec_shr_optab, mode)->insn_code + != CODE_FOR_nothing) + /* Final reduction via vector shifts and the reduction operator. + Also requires scalar extract. */ + outer_cost += ((exact_log2(nelements) * 2) + * vect_get_cost (vector_stmt) + + vect_get_cost (vec_to_scalar)); + else + /* Use extracts and reduction op for final reduction. For N + elements, we have N extracts and N-1 reduction ops. */ + outer_cost += ((nelements + nelements - 1) + * vect_get_cost (vector_stmt)); + } } } @@ -2933,6 +3014,128 @@ get_initial_def_for_reduction (gimple st return init_def; } +/* Create min/max location epilogue calculation. We have both vector and + extracted scalar results of min/max computation, and a vector of locations + that we need to reduce to a scalar result now. + We use a technique described in the documention of + vectorize_minmax_location_pattern (). */ + +static void +vect_create_epilogue_for_compound_pattern (gimple stmt, tree vectype, + enum tree_code *reduc_code, + gimple *new_phi, + gimple_stmt_iterator *exit_gsi, + enum tree_code *code) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree t = NULL_TREE, minmax_vec, minmax_res = NULL_TREE, orig_cond, val; + gimple related, min_max_stmt, related_res; + enum machine_mode vec_mode; + optab reduc_optab; + unsigned int nunits; + int i; + imm_use_iterator imm_iter; + use_operand_p use_p; + basic_block exit_bb; + enum tree_code orig_code; + + if (nested_in_vect_loop_p (loop, stmt)) + loop = loop->inner; + + exit_bb = single_exit (loop)->dest; + + if (STMT_VINFO_COMPOUND_PATTERN (stmt_info) != minmax_loc_stmt) + return; + + related = STMT_VINFO_RELATED_STMT (stmt_info); + related_res = STMT_VINFO_REDUC_SCALAR_RES_STMT (vinfo_for_stmt (related)); + gcc_assert (related_res); + + /* Get a vector result of min/max computation. */ + min_max_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (related)); + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (min_max_stmt)) + if (gimple_bb (USE_STMT (use_p)) == exit_bb + && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI) + minmax_res = PHI_RESULT (USE_STMT (use_p)); + + gcc_assert (minmax_res); + + /* Create vector {min, min,...} or {max, max, ...}. */ + nunits = TYPE_VECTOR_SUBPARTS (vectype); + for (i = nunits - 1; i >= 0; --i) + t = tree_cons (NULL_TREE, gimple_assign_lhs (related_res), t); + + minmax_vec = build_constructor_from_list (TREE_TYPE (minmax_res), t); + + /* To extract the final position value, we need to know whether to look + for maximum (GT_EXPR and LT_EXPR) or minimum (GE_EXPR or LE_EXPR). */ + orig_cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); + if (TREE_CODE (orig_cond) == SSA_NAME) + { + gimple cond_def_stmt = SSA_NAME_DEF_STMT (orig_cond); + orig_code = gimple_assign_rhs_code (cond_def_stmt); + } + else + orig_code = TREE_CODE (orig_cond); + + if (orig_code == GT_EXPR || orig_code == LT_EXPR) + { + val = TYPE_MAX_VALUE (TREE_TYPE (gimple_assign_lhs (stmt))); + *code = MIN_EXPR; + } + else + { + val = TYPE_MIN_VALUE (TREE_TYPE (gimple_assign_lhs (stmt))); + *code = MAX_EXPR; + } + + /* Build a vector of maximum or minimum values. */ + t = NULL_TREE; + for (i = nunits - 1; i >= 0; --i) + t = tree_cons (NULL_TREE, val, t); + + /* Promote GSI to after the min/max result extraction, since we use it + in index calculation. (We insert the min/max scalar statement before + the index calculation statement (in vect_recog_min_max_loc_pattern()), + therefore, its epilogue is created before the epilogue of the index + calculation statement. */ + *exit_gsi = gsi_for_stmt (related_res); + gsi_next (exit_gsi); + minmax_vec = vect_init_vector (stmt, minmax_vec, TREE_TYPE (minmax_res), + exit_gsi); + *new_phi = vectorize_minmax_location_pattern (stmt, exit_gsi, EQ_EXPR, + minmax_res, minmax_vec, + PHI_RESULT (*new_phi), + build_vector (vectype, t)); + + /* Extract minimum or maximum from VECTOR_RESULT to get the first or the last + index (using one of the above techniques). */ + *reduc_code = ERROR_MARK; + if (reduction_code_for_scalar_code (*code, reduc_code)) + { + reduc_optab = optab_for_tree_code (*reduc_code, vectype, optab_default); + if (!reduc_optab) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "no optab for reduction."); + + reduc_code = ERROR_MARK; + } + + vec_mode = TYPE_MODE (vectype); + if (reduc_optab + && optab_handler (reduc_optab, vec_mode)->insn_code + == CODE_FOR_nothing) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "reduc op not supported by target."); + + *reduc_code = ERROR_MARK; + } + } +} /* Function vect_create_epilog_for_reduction @@ -3035,6 +3238,7 @@ vect_create_epilog_for_reduction (VEC (t unsigned int group_size = 1, k, ratio; VEC (tree, heap) *vec_initial_defs = NULL; VEC (gimple, heap) *phis; + tree vec_temp; if (slp_node) group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node)); @@ -3092,9 +3296,9 @@ vect_create_epilog_for_reduction (VEC (t else { vec_initial_defs = VEC_alloc (tree, heap, 1); - /* For the case of reduction, vect_get_vec_def_for_operand returns - the scalar def before the loop, that defines the initial value - of the reduction variable. */ + /* For the case of reduction, vect_get_vec_def_for_operand returns + the scalar def before the loop, that defines the initial value + of the reduction variable. */ vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, &adjustment_def); VEC_quick_push (tree, vec_initial_defs, vec_initial_def); @@ -3194,18 +3398,18 @@ vect_create_epilog_for_reduction (VEC (t defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it represents a reduction pattern), the tree-code and scalar-def are taken from the original stmt that the pattern-stmt (STMT) replaces. - Otherwise (it is a regular reduction) - the tree-code and scalar-def - are taken from STMT. */ + Otherwise (it is a regular reduction or a compound pattern) - the + tree-code and scalar-def are taken from STMT. */ orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); - if (!orig_stmt) + if (!orig_stmt || STMT_VINFO_COMPOUND_PATTERN (stmt_info)) { - /* Regular reduction */ + /* Regular reduction or compound pattern. */ orig_stmt = stmt; } else { - /* Reduction pattern */ + /* Reduction pattern. */ stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt); gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)); gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); @@ -3232,6 +3436,16 @@ vect_create_epilog_for_reduction (VEC (t if (nested_in_vect_loop && !double_reduc) goto vect_finalize_reduction; + /* Create an epilogue for compound pattern. */ + if (STMT_VINFO_COMPOUND_PATTERN (stmt_info)) + { + /* FORNOW: SLP with compound patterns is not supported. */ + new_phi = VEC_index (gimple, new_phis, 0); + vect_create_epilogue_for_compound_pattern (stmt, vectype, &reduc_code, + &new_phi, &exit_gsi, &code); + VEC_replace (gimple, new_phis, 0, new_phi); + } + /* 2.3 Create the reduction code, using one of the three schemes described above. In SLP we simply need to extract all the elements from the vector (without reducing them), so we use scalar shifts. */ @@ -3247,7 +3461,11 @@ vect_create_epilog_for_reduction (VEC (t vec_dest = vect_create_destination_var (scalar_dest, vectype); new_phi = VEC_index (gimple, new_phis, 0); - tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi)); + if (gimple_code (new_phi) == GIMPLE_PHI) + vec_temp = PHI_RESULT (new_phi); + else + vec_temp = gimple_assign_lhs (new_phi); + tmp = build1 (reduc_code, vectype, vec_temp); epilog_stmt = gimple_build_assign (vec_dest, tmp); new_temp = make_ssa_name (vec_dest, epilog_stmt); gimple_assign_set_lhs (epilog_stmt, new_temp); @@ -3262,7 +3480,6 @@ vect_create_epilog_for_reduction (VEC (t int bit_offset; int element_bitsize = tree_low_cst (bitsize, 1); int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); - tree vec_temp; if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing) shift_code = VEC_RSHIFT_EXPR; @@ -3278,11 +3495,11 @@ vect_create_epilog_for_reduction (VEC (t if (!VECTOR_MODE_P (mode)) have_whole_vector_shift = false; else - { - optab optab = optab_for_tree_code (code, vectype, optab_default); - if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing) - have_whole_vector_shift = false; - } + { + optab optab = optab_for_tree_code (code, vectype, optab_default); + if (!optab || optab_handler (optab, mode)->insn_code == CODE_FOR_nothing) + have_whole_vector_shift = false; + } if (have_whole_vector_shift && !slp_node) { @@ -3298,7 +3515,10 @@ vect_create_epilog_for_reduction (VEC (t vec_dest = vect_create_destination_var (scalar_dest, vectype); new_phi = VEC_index (gimple, new_phis, 0); - new_temp = PHI_RESULT (new_phi); + if (gimple_code (new_phi) == GIMPLE_PHI) + new_temp = PHI_RESULT (new_phi); + else + new_temp = gimple_assign_lhs (new_phi); for (bit_offset = vec_size_in_bits/2; bit_offset >= element_bitsize; bit_offset /= 2) @@ -3340,7 +3560,10 @@ vect_create_epilog_for_reduction (VEC (t vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); for (i = 0; VEC_iterate (gimple, new_phis, i, new_phi); i++) { - vec_temp = PHI_RESULT (new_phi); + if (gimple_code (new_phi) == GIMPLE_PHI) + vec_temp = PHI_RESULT (new_phi); + else + vec_temp = gimple_assign_lhs (new_phi); rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, bitsize_zero_node); epilog_stmt = gimple_build_assign (new_scalar_dest, rhs); @@ -3410,6 +3633,7 @@ vect_create_epilog_for_reduction (VEC (t /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */ VEC_safe_push (tree, heap, scalar_results, new_temp); + STMT_VINFO_REDUC_SCALAR_RES_STMT (stmt_info) = epilog_stmt; extract_scalar_result = false; } } @@ -3437,6 +3661,7 @@ vect_create_epilog_for_reduction (VEC (t gimple_assign_set_lhs (epilog_stmt, new_temp); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); VEC_safe_push (tree, heap, scalar_results, new_temp); + STMT_VINFO_REDUC_SCALAR_RES_STMT (stmt_info) = epilog_stmt; } vect_finalize_reduction: @@ -3452,8 +3677,13 @@ vect_finalize_reduction: if (nested_in_vect_loop) { new_phi = VEC_index (gimple, new_phis, 0); + if (gimple_code (new_phi) == GIMPLE_PHI) + vec_temp = PHI_RESULT (new_phi); + else + vec_temp = gimple_assign_lhs (new_phi); + gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE); - expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def); + expr = build2 (code, vectype, vec_temp, adjustment_def); new_dest = vect_create_destination_var (scalar_dest, vectype); } else @@ -3486,6 +3716,7 @@ vect_finalize_reduction: VEC_replace (tree, scalar_results, 0, new_temp); VEC_replace (gimple, new_phis, 0, epilog_stmt); + STMT_VINFO_REDUC_SCALAR_RES_STMT (stmt_info) = epilog_stmt; } /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit @@ -3555,8 +3786,10 @@ vect_finalize_reduction: VEC_safe_push (gimple, heap, phis, USE_STMT (use_p)); /* We expect to have found an exit_phi because of loop-closed-ssa - form. */ - gcc_assert (!VEC_empty (gimple, phis)); + form,unless it's a min/max statement of min/max location pattern, + which is inserted by the pattern recognition phase. */ + gcc_assert (!VEC_empty (gimple, phis) + || STMT_VINFO_COMPOUND_PATTERN (stmt_info) == minmax_stmt); for (i = 0; VEC_iterate (gimple, phis, i, exit_phi); i++) { @@ -3637,7 +3870,12 @@ vect_finalize_reduction: add_phi_arg (vect_phi, vect_phi_init, loop_preheader_edge (outer_loop), UNKNOWN_LOCATION); - add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt), + if (gimple_code (epilog_stmt) == GIMPLE_PHI) + vec_temp = PHI_RESULT (epilog_stmt); + else + vec_temp = gimple_assign_lhs (epilog_stmt); + + add_phi_arg (vect_phi, vec_temp, loop_latch_edge (outer_loop), UNKNOWN_LOCATION); if (vect_print_dump_info (REPORT_DETAILS)) { @@ -3760,11 +3998,11 @@ vectorizable_reduction (gimple stmt, gim basic_block def_bb; struct loop * def_stmt_loop, *outer_loop = NULL; tree def_arg; - gimple def_arg_stmt; + gimple def_arg_stmt, related; VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL; VEC (gimple, heap) *phis = NULL; - int vec_num; - tree def0, def1; + int vec_num, cond_reduc_index = 0; + tree def0, def1, cond_reduc_def = NULL_TREE; if (nested_in_vect_loop_p (loop, stmt)) { @@ -3774,8 +4012,10 @@ vectorizable_reduction (gimple stmt, gim } /* 1. Is vectorizable reduction? */ - /* Not supportable if the reduction variable is used in the loop. */ - if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer) + /* Not supportable if the reduction variable is used in the loop, + unless it's a pattern. */ + if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer + && !STMT_VINFO_COMPOUND_PATTERN (stmt_info)) return false; /* Reductions that are not used even in an enclosing outer-loop, @@ -3797,14 +4037,17 @@ vectorizable_reduction (gimple stmt, gim the original sequence that constitutes the pattern. */ orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); - if (orig_stmt) + if (orig_stmt + && !STMT_VINFO_COMPOUND_PATTERN (stmt_info)) { orig_stmt_info = vinfo_for_stmt (orig_stmt); gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt); gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info)); gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info)); } - + else + orig_stmt = NULL; + /* 3. Check the operands of the operation. The first operands are defined inside the loop body. The last operand is the reduction variable, which is defined by the loop-header-phi. */ @@ -3917,12 +4160,13 @@ vectorizable_reduction (gimple stmt, gim if (code == COND_EXPR) { - if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0)) + if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, + cond_reduc_def, cond_reduc_index)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "unsupported condition in reduction"); - return false; + return false; } } else @@ -4055,7 +4299,12 @@ vectorizable_reduction (gimple stmt, gim } else { - if (!nested_cycle || double_reduc) + /* There is no need in reduction epilogue in case of a nested cycle, + unless it is double reduction. For reduction pattern, we assume that + we know how to create an epilogue even if there is no reduction code + for it. */ + if ((!nested_cycle || double_reduc) + && !STMT_VINFO_COMPOUND_PATTERN (stmt_info)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "no reduc code for scalar code."); @@ -4075,8 +4324,9 @@ vectorizable_reduction (gimple stmt, gim if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; - if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies)) + if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies)) return false; + return true; } @@ -4127,6 +4377,32 @@ vectorizable_reduction (gimple stmt, gim else epilog_copies = ncopies; + /* Prepare vector operands for min/max location. */ + if (STMT_VINFO_COMPOUND_PATTERN (stmt_info) == minmax_loc_stmt) + { + tree cond_op; + gimple cond_def_stmt; + + related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); + cond_op = TREE_OPERAND (ops[0], 0); + cond_def_stmt = SSA_NAME_DEF_STMT (cond_op); + if (gimple_code (cond_def_stmt) == GIMPLE_PHI) + { + cond_reduc_index = 1; + cond_reduc_def = gimple_assign_rhs1 (STMT_VINFO_VEC_STMT ( + vinfo_for_stmt (related))); + } + else + { + cond_op = TREE_OPERAND (ops[0], 1); + cond_def_stmt = SSA_NAME_DEF_STMT (cond_op); + gcc_assert (gimple_code (cond_def_stmt) == GIMPLE_PHI); + cond_reduc_index = 2; + cond_reduc_def = gimple_assign_rhs2 (STMT_VINFO_VEC_STMT ( + vinfo_for_stmt (related))); + } + } + prev_stmt_info = NULL; prev_phi_info = NULL; if (slp_node) @@ -4170,7 +4446,8 @@ vectorizable_reduction (gimple stmt, gim gcc_assert (!slp_node); vectorizable_condition (stmt, gsi, vec_stmt, PHI_RESULT (VEC_index (gimple, phis, 0)), - reduc_index); + reduc_index, cond_reduc_def, + cond_reduc_index); /* Multiple types are not supported for condition. */ break; } @@ -4406,6 +4683,9 @@ vectorizable_live_operation (gimple stmt gcc_assert (STMT_VINFO_LIVE_P (stmt_info)); + if (STMT_VINFO_COMPOUND_PATTERN (stmt_info)) + return true; + if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def) return false; Index: tree-vect-patterns.c =================================================================== --- tree-vect-patterns.c (revision 161484) +++ tree-vect-patterns.c (working copy) @@ -53,6 +53,10 @@ static vect_recog_func_ptr vect_vect_rec vect_recog_widen_sum_pattern, vect_recog_dot_prod_pattern, vect_recog_pow_pattern}; +static bool vect_recog_min_max_loc_pattern (unsigned int, va_list); +static vect_recog_compound_func_ptr + vect_recog_compound_func_ptrs[NUM_COMPOUND_PATTERNS] = { + vect_recog_min_max_loc_pattern}; /* Function widened_name_p @@ -847,3 +851,286 @@ vect_pattern_recog (loop_vec_info loop_v } } } + + +/* Detect min/max location pattern. + Given two reducton condition statements and their phi nodes, we check + if one of the statements calculates minimum or maximum, and the other one + records its location. If the pattern is detected, we replace the min/max + condition statement with MIN_EXPR or MAX_EXPR, and mark the old statement + as pattern statement. + + The pattern we are looking for: + + s1: min = [cond_expr] a < min ? a : min + s2: index = [cond_expr] a < min ? new_index : index + + We add MIN_EXPR statement before the index calculation statement: + + s1: min = [cond_expr] a < min ? a : min + s1': min = [min_expr] + s2: index = [cond_expr] a < min ? new_index : index + + s1 is marked as pattern statement + s1' points to s1 via related_stmt field + s1 points to s1' via related_stmt field + s2 points to s1' via related_stmt field. + s1' and s2 are marked as compound pattern min/max and min/max location + statements. */ + +static bool +vect_recog_min_max_loc_pattern (unsigned int nargs, va_list args) +{ + gimple first_phi, first_stmt, second_phi, second_stmt, loop_op_def_stmt; + stmt_vec_info stmt_vinfo, new_stmt_info, minmax_stmt_info, pos_stmt_info; + loop_vec_info loop_info; + struct loop *loop; + enum tree_code code, first_code, second_code; + gimple first_cond_def_stmt = NULL, second_cond_def_stmt = NULL; + tree first_cond_op0, first_cond_op1, second_cond_op0, second_cond_op1; + tree first_stmt_oprnd0, first_stmt_oprnd1, second_stmt_oprnd0; + tree second_stmt_oprnd1, first_cond, second_cond; + int phi_def_index; + tree first_loop_op, second_loop_op, pos_stmt_loop_op, def, result; + gimple pos_stmt, min_max_stmt, new_stmt, def_stmt; + gimple_stmt_iterator gsi; + + if (nargs < 4) + return false; + + first_phi = va_arg (args, gimple); + first_stmt = va_arg (args, gimple); + second_phi = va_arg (args, gimple); + second_stmt = va_arg (args, gimple); + + stmt_vinfo = vinfo_for_stmt (first_stmt); + loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + loop = LOOP_VINFO_LOOP (loop_info); + + /* Check that the condition is the same and is GT or LT. */ + first_cond = TREE_OPERAND (gimple_assign_rhs1 (first_stmt), 0); + if (TREE_CODE (first_cond) == SSA_NAME) + { + first_cond_def_stmt = SSA_NAME_DEF_STMT (first_cond); + first_code = gimple_assign_rhs_code (first_cond_def_stmt); + first_cond_op0 = gimple_assign_rhs1 (first_cond_def_stmt); + first_cond_op1 = gimple_assign_rhs2 (first_cond_def_stmt); + } + else + { + first_code = TREE_CODE (first_cond); + first_cond_op0 = TREE_OPERAND (first_cond, 0); + first_cond_op1 = TREE_OPERAND (first_cond, 1); + } + + if (first_code != GT_EXPR && first_code != LT_EXPR + && first_code != GE_EXPR && first_code != LE_EXPR) + return false; + + second_cond = TREE_OPERAND (gimple_assign_rhs1 (second_stmt), 0); + if (TREE_CODE (second_cond) == SSA_NAME) + { + second_cond_def_stmt = SSA_NAME_DEF_STMT (second_cond); + second_code = gimple_assign_rhs_code (second_cond_def_stmt); + second_cond_op0 = gimple_assign_rhs1 (second_cond_def_stmt); + second_cond_op1 = gimple_assign_rhs2 (second_cond_def_stmt); + } + else + { + second_code = TREE_CODE (second_cond); + second_cond_op0 = TREE_OPERAND (second_cond, 0); + second_cond_op1 = TREE_OPERAND (second_cond, 1); + } + + if (first_code != second_code) + return false; + + if (first_cond_def_stmt + && (!second_cond_def_stmt + || first_cond_def_stmt != second_cond_def_stmt + || !operand_equal_p (first_cond_op0, second_cond_op0, 0) + || !operand_equal_p (first_cond_op1, second_cond_op1, 0))) + return false; + + /* Both statements have the same condition. */ + + first_stmt_oprnd0 = TREE_OPERAND (gimple_assign_rhs1 (first_stmt), 1); + first_stmt_oprnd1 = TREE_OPERAND (gimple_assign_rhs1 (first_stmt), 2); + + second_stmt_oprnd0 = TREE_OPERAND (gimple_assign_rhs1 (second_stmt), 1); + second_stmt_oprnd1 = TREE_OPERAND (gimple_assign_rhs1 (second_stmt), 2); + + if (TREE_CODE (first_stmt_oprnd0) != SSA_NAME + || TREE_CODE (first_stmt_oprnd1) != SSA_NAME + || TREE_CODE (second_stmt_oprnd0) != SSA_NAME + || TREE_CODE (second_stmt_oprnd1) != SSA_NAME) + return false; + + if (operand_equal_p (PHI_RESULT (first_phi), first_stmt_oprnd0, 0) + && operand_equal_p (PHI_RESULT (second_phi), second_stmt_oprnd0, 0)) + { + phi_def_index = 0; + first_loop_op = first_stmt_oprnd1; + second_loop_op = second_stmt_oprnd1; + } + else + { + if (operand_equal_p (PHI_RESULT (first_phi), first_stmt_oprnd1, 0) + && operand_equal_p (PHI_RESULT (second_phi), second_stmt_oprnd1, 0)) + { + phi_def_index = 1; + first_loop_op = first_stmt_oprnd0; + second_loop_op = second_stmt_oprnd0; + } + else + return false; + } + + /* Now we know which operand is defined by phi node. Analyze the second + one. */ + + /* The min/max stmt must be x < y ? x : y. */ + if (operand_equal_p (first_cond_op0, first_stmt_oprnd0, 0) + && operand_equal_p (first_cond_op1, first_stmt_oprnd1, 0)) + { + pos_stmt = second_stmt; + min_max_stmt = first_stmt; + pos_stmt_loop_op = second_loop_op; + } + else + { + if (operand_equal_p (second_cond_op0, second_stmt_oprnd0, 0) + && operand_equal_p (second_cond_op1, second_stmt_oprnd1, 0)) + { + pos_stmt = first_stmt; + min_max_stmt = second_stmt; + pos_stmt_loop_op = first_loop_op; + } + else + return false; + } + + /* Analyze the position stmt. We expect it to be either induction or + induction plus constant. */ + loop_op_def_stmt = SSA_NAME_DEF_STMT (pos_stmt_loop_op); + + if (!flow_bb_inside_loop_p (loop, gimple_bb (loop_op_def_stmt))) + return false; + + if (gimple_code (loop_op_def_stmt) == GIMPLE_PHI) + { + if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (loop_op_def_stmt)) + != vect_induction_def) + return false; + } + else + { + if (!is_gimple_assign (loop_op_def_stmt)) + return false; + + if (get_gimple_rhs_class (gimple_assign_rhs_code (loop_op_def_stmt)) + == GIMPLE_UNARY_RHS) + def = gimple_assign_rhs1 (loop_op_def_stmt); + else + { + tree op1, op2; + + if (get_gimple_rhs_class (gimple_assign_rhs_code (loop_op_def_stmt)) + != GIMPLE_BINARY_RHS + || gimple_assign_rhs_code (loop_op_def_stmt) != PLUS_EXPR) + return false; + + op1 = gimple_assign_rhs1 (loop_op_def_stmt); + op2 = gimple_assign_rhs2 (loop_op_def_stmt); + + if (TREE_CONSTANT (op1)) + def = op2; + else + { + if (TREE_CONSTANT (op2)) + def = op1; + else + return false; + } + } + + if (TREE_CODE (def) != SSA_NAME) + return false; + + def_stmt = SSA_NAME_DEF_STMT (def); + if (!flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) + || gimple_code (def_stmt) != GIMPLE_PHI + || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)) + != vect_induction_def) + return false; + } + + /* Pattern detected. Create a stmt to be used to replace the pattern. */ + if (first_code == GT_EXPR || first_code == GE_EXPR) + code = phi_def_index ? MAX_EXPR : MIN_EXPR; + else + code = phi_def_index ? MIN_EXPR : MAX_EXPR; + + result = gimple_assign_lhs (min_max_stmt); + new_stmt = gimple_build_assign_with_ops (code, result, + TREE_OPERAND (gimple_assign_rhs1 (min_max_stmt), 1), + TREE_OPERAND (gimple_assign_rhs1 (min_max_stmt), 2)); + gsi = gsi_for_stmt (pos_stmt); + gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); + SSA_NAME_DEF_STMT (result) = new_stmt; + + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "Detected min/max location pattern:\nmin/max stmt "); + print_gimple_stmt (vect_dump, min_max_stmt, 0, TDF_SLIM); + fprintf (vect_dump, "\nlocation stmt "); + print_gimple_stmt (vect_dump, pos_stmt, 0, TDF_SLIM); + fprintf (vect_dump, "\nCreated stmt: "); + print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM); + } + + /* Mark the stmts that are involved in the pattern. */ + set_vinfo_for_stmt (new_stmt, + new_stmt_vec_info (new_stmt, loop_info, NULL)); + new_stmt_info = vinfo_for_stmt (new_stmt); + + pos_stmt_info = vinfo_for_stmt (pos_stmt); + minmax_stmt_info = vinfo_for_stmt (min_max_stmt); + + STMT_VINFO_DEF_TYPE (new_stmt_info) = STMT_VINFO_DEF_TYPE (minmax_stmt_info); + STMT_VINFO_VECTYPE (new_stmt_info) = STMT_VINFO_VECTYPE (minmax_stmt_info); + + STMT_VINFO_IN_PATTERN_P (minmax_stmt_info) = true; + STMT_VINFO_COMPOUND_PATTERN (new_stmt_info) = minmax_stmt; + STMT_VINFO_COMPOUND_PATTERN (pos_stmt_info) = minmax_loc_stmt; + STMT_VINFO_RELATED_STMT (new_stmt_info) = min_max_stmt; + STMT_VINFO_RELATED_STMT (minmax_stmt_info) = new_stmt; + STMT_VINFO_RELATED_STMT (pos_stmt_info) = new_stmt; + + return true; +} + +/* Detect patterns consisting of two more statements to be vectorized. + Currently the only supported pattern is min/max location. */ + +void +vect_compound_pattern_recog (unsigned int nargs, ...) +{ + unsigned int j; + va_list args; + bool detected = false; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vect_compound_pattern_recog ==="); + + /* Scan over all generic vect_recog__compound_xxx_pattern functions. */ + for (j = 0; j < NUM_COMPOUND_PATTERNS; j++) + { + va_start (args, nargs); + detected = (* vect_recog_compound_func_ptrs[j]) (nargs, args); + va_end (args); + if (detected) + break; + } +} + Index: target-def.h =================================================================== --- target-def.h (revision 161484) +++ target-def.h (working copy) @@ -431,7 +431,7 @@ hook_bool_tree_tree_true #define TARGET_SUPPORT_VECTOR_MISALIGNMENT \ default_builtin_support_vector_misalignment - +#define TARGET_VECTORIZE_BUILTIN_VEC_CMP 0 #define TARGET_VECTORIZE \ { \ @@ -444,7 +444,8 @@ TARGET_VECTOR_ALIGNMENT_REACHABLE, \ TARGET_VECTORIZE_BUILTIN_VEC_PERM, \ TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK, \ - TARGET_SUPPORT_VECTOR_MISALIGNMENT \ + TARGET_SUPPORT_VECTOR_MISALIGNMENT, \ + TARGET_VECTORIZE_BUILTIN_VEC_CMP \ } #define TARGET_DEFAULT_TARGET_FLAGS 0 Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 161484) +++ tree-vect-stmts.c (working copy) @@ -271,8 +271,10 @@ process_use (gimple stmt, tree use, loop /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT). DEF_STMT must have already been processed, because this should be the only way that STMT, which is a reduction-phi, was put in the worklist, - as there should be no other uses for DEF_STMT in the loop. So we just - check that everything is as expected, and we are done. */ + as there should be no other uses for DEF_STMT in the loop, unless it is + min/max location pattern. So we just check that everything is as + as expected, and mark the min/max stmt of the location pattern stmt as + used by reduction (it is used by the reduction of location). */ dstmt_vinfo = vinfo_for_stmt (def_stmt); bb = gimple_bb (stmt); if (gimple_code (stmt) == GIMPLE_PHI @@ -283,11 +285,22 @@ process_use (gimple stmt, tree use, loop { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest."); + + /* Compound reduction pattern: is used by reduction. */ + if (STMT_VINFO_COMPOUND_PATTERN (dstmt_vinfo)) + { + relevant = vect_used_by_reduction; + vect_mark_relevant (worklist, def_stmt, relevant, live_p); + return true; + } + if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo)) dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo)); + gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction); - gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo) - || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope); + gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo) + || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope + || STMT_VINFO_COMPOUND_PATTERN (dstmt_vinfo)); return true; } @@ -481,7 +494,8 @@ vect_mark_stmts_to_be_vectorized (loop_v break; case vect_used_by_reduction: - if (gimple_code (stmt) == GIMPLE_PHI) + if (gimple_code (stmt) == GIMPLE_PHI + || STMT_VINFO_COMPOUND_PATTERN (stmt_vinfo)) break; /* fall through */ @@ -3873,6 +3887,106 @@ vect_is_simple_cond (tree cond, loop_vec return true; } +/* Create a sequence of statements that vectorizes min/max location pattern + either inside the loop body, or in reduction epilogue. The technique used + here was taken from "Multimedia vectorization of floating-point MIN/MAX + reductions" by A.J.C.Bik, X.Tian and M.B.Girkar, + http://portal.acm.org/citation.cfm?id=1145765. + Vectorized loop (maxloc, first index): + vcx[0:vl-1:1] = | x |..| x |; - vector of max values + vck[0:vl-1:1] = | k |..| k |; - vector of positions + ind[0:vl-1:1] = |vl-1|..| 0 |; + inc[0:vl-1:1] = | vl |..| vl |; + for (i = 0; i < N; i += vl) { + msk[0:vl-1:1] = (a[i:i+vl-1:1] > vcx[0:vl-1:1]); + vck[0:vl-1:1] = (ind[0:vl-1:1] & msk[0:vl-1:1]) | + (vck[0:vl-1:1] & !msk[0:vl-1:1]); + vcx[0:vl-1:1] = VMAX(vcx[0:vl-1:1], a[i:i+vl-1:1]); + ind[0:vl-1:1] += inc[0:vl-1:1]; + } + x = HMAX(vcx[0:vl-1:1]); - scalar maximum extraction + msk[0:vl-1:1] = (vcx[0:vl-1:1] == |x|..|x|); + vck[0:vl-1:1] = (vck[0:vl-1:1] & msk[0:vl-1:1]) | + (|MaxInt|..|MaxInt| & !msk[0:vl-1:1]); + k = HMIN(vck[0:vl-1:1]); - first position extraction + + In this function we generate: + MASK = CODE (COMPARE_OPRND1, COMPARE_OPRND2) + VEC_DEST = (VEC_OPRND1 & MASK) | (VEC_OPRND2 & !MASK) + + When called from vectorizable_condition(), the loop body code is generated. + When called from vect_create_epilog_for_reduction(), the function generates + the code for scalar extraction in the reduction epilogue. + + The return value is the last statement in the above sequence. */ + +gimple +vectorize_minmax_location_pattern (gimple stmt, gimple_stmt_iterator *gsi, + enum tree_code code, + tree compare_oprnd1, tree compare_oprnd2, + tree vec_oprnd1, tree vec_oprnd2) +{ + tree mask_type, builtin_decl, vec_dest, new_temp, vect_mask; + tree and_res1, and_res2, and_dest1, and_dest2, tmp, not_mask, mask, tmp_mask; + gimple mask_stmt, new_stmt; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree scalar_dest = gimple_assign_lhs (stmt); + gimple related = STMT_VINFO_RELATED_STMT (stmt_info); + tree related_lhs = gimple_assign_lhs (related); + tree comparison_type = get_vectype_for_scalar_type (TREE_TYPE (related_lhs)); + + /* Create mask: MASK = CODE (COMPARE_OPRND1, COMPARE_OPRND2). */ + builtin_decl = targetm.vectorize.builtin_vect_compare (code, + comparison_type, &mask_type); + vect_mask = vect_create_destination_var (related_lhs, mask_type); + mask_stmt = gimple_build_call (builtin_decl, 2, compare_oprnd1, + compare_oprnd2); + tmp_mask = make_ssa_name (vect_mask, mask_stmt); + gimple_call_set_lhs (mask_stmt, tmp_mask); + vect_finish_stmt_generation (stmt, mask_stmt, gsi); + + /* Convert the mask to VECTYPE. */ + vect_mask = vect_create_destination_var (scalar_dest, vectype); + mask_stmt = gimple_build_assign (vect_mask, fold_build1 (VIEW_CONVERT_EXPR, + vectype, tmp_mask)); + mask = make_ssa_name (vect_mask, mask_stmt); + gimple_assign_set_lhs (mask_stmt, mask); + vect_finish_stmt_generation (stmt, mask_stmt, gsi); + + /* Create: VEC_DEST = (VEC_OPRND1 & MASK) | (VEC_OPRND2 & !MASK). */ + and_dest1 = vect_create_destination_var (scalar_dest, vectype); + and_dest2 = vect_create_destination_var (scalar_dest, vectype); + vec_dest = vect_create_destination_var (scalar_dest, vectype); + + tmp = build2 (BIT_AND_EXPR, vectype, vec_oprnd1, mask); + new_stmt = gimple_build_assign (and_dest1, tmp); + and_res1 = make_ssa_name (and_dest1, new_stmt); + gimple_assign_set_lhs (new_stmt, and_res1); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + tmp = build1 (BIT_NOT_EXPR, vectype, mask); + new_stmt = gimple_build_assign (vec_dest, tmp); + not_mask = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, not_mask); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + tmp = build2 (BIT_AND_EXPR, vectype, vec_oprnd2, not_mask); + new_stmt = gimple_build_assign (and_dest2, tmp); + and_res2 = make_ssa_name (and_dest2, new_stmt); + gimple_assign_set_lhs (new_stmt, and_res2); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + vec_dest = vect_create_destination_var (scalar_dest, vectype); + tmp = build2 (BIT_IOR_EXPR, vectype, and_res1, and_res2); + new_stmt = gimple_build_assign (vec_dest, tmp); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + return new_stmt; +} + /* vectorizable_condition. Check if STMT is conditional modify expression that can be vectorized. @@ -3884,11 +3998,16 @@ vect_is_simple_cond (tree cond, loop_vec to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in else caluse if it is 2). + In min/max location pattern, reduction defs are used in both condition part + and then/else clause. In that case COND_REDUC_DEF contains such vector def, + and COND_REDUC_INDEX specifies its place in the condition. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ bool vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, - gimple *vec_stmt, tree reduc_def, int reduc_index) + gimple *vec_stmt, tree reduc_def, int reduc_index, + tree cond_reduc_def, int cond_reduc_index) { tree scalar_dest = NULL_TREE; tree vec_dest = NULL_TREE; @@ -3906,6 +4025,7 @@ vectorizable_condition (gimple stmt, gim int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; enum tree_code code; + tree comparison_type, mask_type; /* FORNOW: unsupported in basic block SLP. */ gcc_assert (loop_vinfo); @@ -3914,20 +4034,23 @@ vectorizable_condition (gimple stmt, gim if (ncopies > 1) return false; /* FORNOW */ - if (!STMT_VINFO_RELEVANT_P (stmt_info)) + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_COMPOUND_PATTERN (stmt_info)) return false; if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def - && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + && !((STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def) && reduc_def)) - return false; + return false; /* FORNOW: SLP not supported. */ if (STMT_SLP_TYPE (stmt_info)) return false; /* FORNOW: not yet supported. */ - if (STMT_VINFO_LIVE_P (stmt_info)) + if (STMT_VINFO_LIVE_P (stmt_info) + && !STMT_VINFO_COMPOUND_PATTERN (stmt_info)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "value used after loop."); @@ -3955,7 +4078,10 @@ vectorizable_condition (gimple stmt, gim /* We do not handle two different vector types for the condition and the values. */ if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)), - TREE_TYPE (vectype))) + TREE_TYPE (vectype)) + && !(STMT_VINFO_COMPOUND_PATTERN (stmt_info) + && TYPE_SIZE_UNIT (TREE_TYPE (TREE_OPERAND (cond_expr, 0))) + == TYPE_SIZE_UNIT (TREE_TYPE (vectype)))) return false; if (TREE_CODE (then_clause) == SSA_NAME) @@ -3985,42 +4111,77 @@ vectorizable_condition (gimple stmt, gim vec_mode = TYPE_MODE (vectype); - if (!vec_stmt) + comparison_type = + get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0))); + + /* Check that min/max location pattern is supported, i.e., the relevant + vector comparisons exist (including EQ_EXPR for reduction epilogue). */ + if (STMT_VINFO_COMPOUND_PATTERN (stmt_info) == minmax_loc_stmt + && (!targetm.vectorize.builtin_vect_compare + || !targetm.vectorize.builtin_vect_compare (TREE_CODE (cond_expr), + comparison_type, &mask_type) + || !targetm.vectorize.builtin_vect_compare (EQ_EXPR, comparison_type, + &mask_type))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "unsupported comparison"); + + return false; + } + + if (!vec_stmt) { STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode); } - /* Transform */ + /* Transform. */ /* Handle def. */ scalar_dest = gimple_assign_lhs (stmt); vec_dest = vect_create_destination_var (scalar_dest, vectype); /* Handle cond expr. */ - vec_cond_lhs = - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL); - vec_cond_rhs = - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL); + if (cond_reduc_index == 1) + vec_cond_lhs = cond_reduc_def; + else + vec_cond_lhs = + vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL); + + if (cond_reduc_index == 2) + vec_cond_rhs = cond_reduc_def; + else + vec_cond_rhs = + vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL); + if (reduc_index == 1) vec_then_clause = reduc_def; else vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL); + if (reduc_index == 2) vec_else_clause = reduc_def; else vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL); /* Arguments are ready. Create the new vector stmt. */ - vec_compare = build2 (TREE_CODE (cond_expr), vectype, - vec_cond_lhs, vec_cond_rhs); - vec_cond_expr = build3 (VEC_COND_EXPR, vectype, - vec_compare, vec_then_clause, vec_else_clause); - - *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr); - new_temp = make_ssa_name (vec_dest, *vec_stmt); - gimple_assign_set_lhs (*vec_stmt, new_temp); - vect_finish_stmt_generation (stmt, *vec_stmt, gsi); + if (STMT_VINFO_COMPOUND_PATTERN (stmt_info) == minmax_loc_stmt) + { + *vec_stmt = vectorize_minmax_location_pattern (stmt, gsi, + TREE_CODE (cond_expr), vec_cond_lhs, vec_cond_rhs, + vec_then_clause, vec_else_clause); + } + else + { + vec_compare = build2 (TREE_CODE (cond_expr), vectype, vec_cond_lhs, + vec_cond_rhs); + vec_cond_expr = build3 (VEC_COND_EXPR, vectype, vec_compare, + vec_then_clause, vec_else_clause); + *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr); + new_temp = make_ssa_name (vec_dest, *vec_stmt); + gimple_assign_set_lhs (*vec_stmt, new_temp); + vect_finish_stmt_generation (stmt, *vec_stmt, gsi); + } return true; } @@ -4077,7 +4238,8 @@ vect_analyze_stmt (gimple stmt, bool *ne case vect_nested_cycle: gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer || relevance == vect_used_in_outer_by_reduction - || relevance == vect_unused_in_scope)); + || relevance == vect_unused_in_scope + || relevance == vect_used_by_reduction)); break; case vect_induction_def: @@ -4139,7 +4301,7 @@ vect_analyze_stmt (gimple stmt, bool *ne || vectorizable_call (stmt, NULL, NULL) || vectorizable_store (stmt, NULL, NULL, NULL) || vectorizable_reduction (stmt, NULL, NULL, NULL) - || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); + || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL, 0)); else { if (bb_vinfo) @@ -4280,7 +4442,7 @@ vect_transform_stmt (gimple stmt, gimple case condition_vec_info_type: gcc_assert (!slp_node); - done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0); + done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, NULL, 0); gcc_assert (done); break; @@ -4418,6 +4580,7 @@ new_stmt_vec_info (gimple stmt, loop_vec STMT_VINFO_VEC_STMT (res) = NULL; STMT_VINFO_VECTORIZABLE (res) = true; STMT_VINFO_IN_PATTERN_P (res) = false; + STMT_VINFO_COMPOUND_PATTERN (res) = not_in_pattern; STMT_VINFO_RELATED_STMT (res) = NULL; STMT_VINFO_DATA_REF (res) = NULL; @@ -4436,6 +4599,7 @@ new_stmt_vec_info (gimple stmt, loop_vec STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5); STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0; STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0; + STMT_VINFO_REDUC_SCALAR_RES_STMT (res) = NULL; STMT_SLP_TYPE (res) = loop_vect; DR_GROUP_FIRST_DR (res) = NULL; DR_GROUP_NEXT_DR (res) = NULL; Index: config/rs6000/rs6000-builtin.def =================================================================== --- config/rs6000/rs6000-builtin.def (revision 161484) +++ config/rs6000/rs6000-builtin.def (working copy) @@ -73,6 +73,8 @@ RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSH, RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUW, RS6000_BTC_CONST) RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSW, RS6000_BTC_CONST) RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTFP, RS6000_BTC_FP_PURE) +RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPLTFP, RS6000_BTC_FP_PURE) +RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPLEFP, RS6000_BTC_FP_PURE) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEXPTEFP, RS6000_BTC_FP_PURE) RS6000_BUILTIN(ALTIVEC_BUILTIN_VLOGEFP, RS6000_BTC_FP_PURE) RS6000_BUILTIN(ALTIVEC_BUILTIN_VMADDFP, RS6000_BTC_FP_PURE) Index: config/rs6000/rs6000.c =================================================================== --- config/rs6000/rs6000.c (revision 161484) +++ config/rs6000/rs6000.c (working copy) @@ -1056,6 +1056,7 @@ static bool rs6000_builtin_support_vecto machine_mode, const_tree, int, bool); +static tree rs6000_builtin_vect_compare (unsigned int, tree, tree *); static void def_builtin (int, const char *, tree, int); static bool rs6000_vector_alignment_reachable (const_tree, bool); @@ -1448,6 +1449,8 @@ static const struct attribute_spec rs600 rs6000_builtin_support_vector_misalignment #undef TARGET_VECTOR_ALIGNMENT_REACHABLE #define TARGET_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable +#undef TARGET_VECTORIZE_BUILTIN_VEC_CMP +#define TARGET_VECTORIZE_BUILTIN_VEC_CMP rs6000_builtin_vect_compare #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins @@ -3371,6 +3374,45 @@ rs6000_builtin_vec_perm (tree type, tree return d; } +/* Implement targetm.vectorize.builtin_vect_compare. */ +tree +rs6000_builtin_vect_compare (unsigned int tcode, tree type, tree *return_type) +{ + enum tree_code code = (enum tree_code) tcode; + + if (!TARGET_ALTIVEC) + return NULL_TREE; + + switch (TYPE_MODE (type)) + { + case V4SFmode: + *return_type = V4SF_type_node; + switch (code) + { + case GT_EXPR: + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VCMPGTFP]; + + case LT_EXPR: + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VCMPLTFP]; + + case GE_EXPR: + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VCMPGEFP]; + + case LE_EXPR: + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VCMPLEFP]; + + case EQ_EXPR: + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VCMPEQFP]; + + default: + return NULL_TREE; + } + + default: + return NULL_TREE; + } +} + /* Handle generic options of the form -mfoo=yes/no. NAME is the option name. VALUE is the option value. @@ -9182,6 +9224,8 @@ static struct builtin_description bdesc_ { MASK_ALTIVEC, CODE_FOR_vector_gtuv4si, "__builtin_altivec_vcmpgtuw", ALTIVEC_BUILTIN_VCMPGTUW }, { MASK_ALTIVEC, CODE_FOR_vector_gtv4si, "__builtin_altivec_vcmpgtsw", ALTIVEC_BUILTIN_VCMPGTSW }, { MASK_ALTIVEC, CODE_FOR_vector_gtv4sf, "__builtin_altivec_vcmpgtfp", ALTIVEC_BUILTIN_VCMPGTFP }, + { MASK_ALTIVEC, CODE_FOR_altivec_vcmpltfp, "__builtin_altivec_vcmpltfp", ALTIVEC_BUILTIN_VCMPLTFP }, + { MASK_ALTIVEC, CODE_FOR_altivec_vcmplefp, "__builtin_altivec_vcmplefp", ALTIVEC_BUILTIN_VCMPLEFP }, { MASK_ALTIVEC, CODE_FOR_altivec_vctsxs, "__builtin_altivec_vctsxs", ALTIVEC_BUILTIN_VCTSXS }, { MASK_ALTIVEC, CODE_FOR_altivec_vctuxs, "__builtin_altivec_vctuxs", ALTIVEC_BUILTIN_VCTUXS }, { MASK_ALTIVEC, CODE_FOR_umaxv16qi3, "__builtin_altivec_vmaxub", ALTIVEC_BUILTIN_VMAXUB }, Index: config/rs6000/altivec.md =================================================================== --- config/rs6000/altivec.md (revision 161484) +++ config/rs6000/altivec.md (working copy) @@ -144,6 +144,8 @@ (UNSPEC_VUPKHU_V4SF 326) (UNSPEC_VUPKLU_V4SF 327) (UNSPEC_VNMSUBFP 328) + (UNSPEC_VCMPLTFP 329) + (UNSPEC_VCMPLEFP 330) ]) (define_constants @@ -2802,3 +2804,22 @@ emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx)); DONE; }") + + +(define_insn "altivec_vcmpltfp" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")] + UNSPEC_VCMPLTFP))] + "TARGET_ALTIVEC" + "vcmpgtfp %0,%2,%1" + [(set_attr "type" "veccmp")]) + +(define_insn "altivec_vcmplefp" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")] + UNSPEC_VCMPLEFP))] + "TARGET_ALTIVEC" + "vcmpgefp %0,%2,%1" + [(set_attr "type" "veccmp")]) Index: tree-vect-slp.c =================================================================== --- tree-vect-slp.c (revision 161484) +++ tree-vect-slp.c (working copy) @@ -146,6 +146,18 @@ vect_get_and_check_slp_defs (loop_vec_in return false; } + if (def_stmt && vinfo_for_stmt (def_stmt) + && STMT_VINFO_COMPOUND_PATTERN (vinfo_for_stmt (def_stmt))) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: compound pattern "); + print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM); + } + + return false; + } + /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt from the pattern. Check that all the stmts of the node are in the pattern. */