From: Xinliang David Li Date: Fri, 27 Sep 2013 16:48:15 +0000 (+0000) Subject: vectorizer cost model enhancement X-Git-Tag: releases/gcc-4.9.0~3843 X-Git-Url: https://gcc.gnu.org/git/?a=commitdiff_plain;h=d6d1127249564146429009e0682f25bd58d7a791;p=gcc.git vectorizer cost model enhancement From-SVN: r202980 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4b330457c131..9712bc80dfeb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2013-09-27 Xinliang David Li + + * opts.c (finish_options): Adjust parameters + according to vect cost model. + (common_handle_option): Set dynamic vect cost + model for FDO. + targhooks.c (default_add_stmt_cost): Compute stmt cost + unconditionally. + * tree-vect-loop.c (vect_estimate_min_profitable_iters): + Use helper function. + * tree-vectorizer.h (unlimited_cost_model): New function. + * tree-vect-slp.c (vect_slp_analyze_bb_1): Use helper function. + * tree-vect-data-refs.c (vect_peeling_hash_insert): Use helper + function. + (vect_enhance_data_refs_alignment): Ditto. + * flag-types.h: New enum. + * common/config/i386/i386-common.c (ix86_option_init_struct): + No need to initialize vect_cost_model flag. + * config/i386/i386.c (ix86_add_stmt_cost): Compute stmt cost + unconditionally. + 2013-09-27 Diego Novillo * gimple.h (enum ssa_mode): Remove. diff --git a/gcc/common.opt b/gcc/common.opt index 202e169d281c..c2b3d3573025 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2278,13 +2278,33 @@ ftree-slp-vectorize Common Report Var(flag_tree_slp_vectorize) Optimization Enable basic block vectorization (SLP) on trees +fvect-cost-model= +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) +Specifies the cost model for vectorization + +Enum +Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs) + +EnumValue +Enum(vect_cost_model) String(unlimited) Value(VECT_COST_MODEL_UNLIMITED) + +EnumValue +Enum(vect_cost_model) String(dynamic) Value(VECT_COST_MODEL_DYNAMIC) + +EnumValue +Enum(vect_cost_model) String(cheap) Value(VECT_COST_MODEL_CHEAP) + fvect-cost-model -Common Report Var(flag_vect_cost_model) Optimization -Enable use of cost model in vectorization +Common RejectNegative Alias(fvect-cost-model=,dynamic) +Enables the dynamic vectorizer cost model. Preserved for backward compatibility. + +fno-vect-cost-model +Common RejectNegative Alias(fvect-cost-model=,unlimited) +Enables the unlimited vectorizer cost model. Preserved for backward compatibility. ftree-vect-loop-version -Common Report Var(flag_tree_vect_loop_version) Init(1) Optimization -Enable loop versioning when doing loop vectorization on trees +Common Ignore +Does nothing. Preserved for backward compatibility. ftree-scev-cprop Common Report Var(flag_tree_scev_cprop) Init(1) Optimization diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 8ca74b9be494..341637b4a10a 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -811,7 +811,6 @@ ix86_option_init_struct (struct gcc_options *opts) opts->x_flag_pcc_struct_return = 2; opts->x_flag_asynchronous_unwind_tables = 2; - opts->x_flag_vect_cost_model = 1; } /* On the x86 -fsplit-stack and -fstack-protector both use the same diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index f10113fd3c41..21fc5314861c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -42782,20 +42782,17 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, unsigned *cost = (unsigned *) data; unsigned retval = 0; - if (flag_vect_cost_model) - { - tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; - int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); + tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; + int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); - /* Statements in an inner loop relative to the loop being - vectorized are weighted more heavily. The value here is - arbitrary and could potentially be improved with analysis. */ - if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) - count *= 50; /* FIXME. */ + /* Statements in an inner loop relative to the loop being + vectorized are weighted more heavily. The value here is + arbitrary and could potentially be improved with analysis. */ + if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) + count *= 50; /* FIXME. */ - retval = (unsigned) (count * stmt_cost); - cost[where] += retval; - } + retval = (unsigned) (count * stmt_cost); + cost[where] += retval; return retval; } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 508bbb43838a..8bfd3db6c66a 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -423,7 +423,7 @@ Objective-C and Objective-C++ Dialects}. -ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol -ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol -ftree-switch-conversion -ftree-tail-merge -ftree-ter @gol --ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol +-ftree-vectorize -ftree-vrp @gol -funit-at-a-time -funroll-all-loops -funroll-loops @gol -funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol -fvariable-expansion-in-unroller -fvect-cost-model -fvpt -fweb @gol @@ -6770,7 +6770,7 @@ optimizations designed to reduce code size. @option{-Os} disables the following optimization flags: @gccoptlist{-falign-functions -falign-jumps -falign-loops @gol -falign-labels -freorder-blocks -freorder-blocks-and-partition @gol --fprefetch-loop-arrays -ftree-vect-loop-version} +-fprefetch-loop-arrays} @item -Ofast @opindex Ofast @@ -8025,19 +8025,20 @@ Perform loop vectorization on trees. This flag is enabled by default at Perform basic block vectorization on trees. This flag is enabled by default at @option{-O3} and when @option{-ftree-vectorize} is enabled. -@item -ftree-vect-loop-version -@opindex ftree-vect-loop-version -Perform loop versioning when doing loop vectorization on trees. When a loop -appears to be vectorizable except that data alignment or data dependence cannot -be determined at compile time, then vectorized and non-vectorized versions of -the loop are generated along with run-time checks for alignment or dependence -to control which version is executed. This option is enabled by default -except at level @option{-Os} where it is disabled. - -@item -fvect-cost-model +@item -fvect-cost-model=@var{model} @opindex fvect-cost-model -Enable cost model for vectorization. This option is enabled by default at -@option{-O3}. +Alter the cost model used for vectorization. The @var{model} argument +should be one of @code{unlimited}, @code{dynamic} or @code{cheap}. +With the @code{unlimited} model the vectorized code-path is assumed +to be profitable while with the @code{dynamic} model a runtime check +will guard the vectorized code-path to enable it only for iteration +counts that will likely execute faster than when executing the original +scalar loop. The @code{cheap} model will disable vectorization of +loops where doing so would be cost prohibitive for example due to +required runtime checks for data dependence or alignment but otherwise +is equal to the @code{dynamic} model. +The default cost model depends on other optimization flags and is +either @code{dynamic} or @code{cheap}. @item -ftree-vrp @opindex ftree-vrp @@ -9443,13 +9444,11 @@ constraints. The default value is 0. @item vect-max-version-for-alignment-checks The maximum number of run-time checks that can be performed when -doing loop versioning for alignment in the vectorizer. See option -@option{-ftree-vect-loop-version} for more information. +doing loop versioning for alignment in the vectorizer. @item vect-max-version-for-alias-checks The maximum number of run-time checks that can be performed when -doing loop versioning for alias in the vectorizer. See option -@option{-ftree-vect-loop-version} for more information. +doing loop versioning for alias in the vectorizer. @item vect-max-peeling-for-alignment The maximum number of loop peels to enhance access alignment diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 45616bc74f5a..a2be8bb1ad2e 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -191,6 +191,15 @@ enum fp_contract_mode { FP_CONTRACT_FAST = 2 }; +/* Vectorizer cost-model. */ +enum vect_cost_model { + VECT_COST_MODEL_UNLIMITED = 0, + VECT_COST_MODEL_CHEAP = 1, + VECT_COST_MODEL_DYNAMIC = 2, + VECT_COST_MODEL_DEFAULT = 3 +}; + + /* Different instrumentation modes. */ enum sanitize_code { /* AddressSanitizer. */ diff --git a/gcc/opts.c b/gcc/opts.c index 944834c44386..b1fadb162328 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -486,6 +486,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP }, { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_foptimize_strlen, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 }, @@ -500,7 +501,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 }, - { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model, NULL, 1 }, + { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC }, { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 }, @@ -825,6 +826,17 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, } } + /* Tune vectorization related parametees according to cost model. */ + if (opts->x_flag_vect_cost_model == VECT_COST_MODEL_CHEAP) + { + maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, + 6, opts->x_param_values, opts_set->x_param_values); + maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS, + 0, opts->x_param_values, opts_set->x_param_values); + maybe_set_param_value (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT, + 0, opts->x_param_values, opts_set->x_param_values); + } + /* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion is disabled. */ if ((!opts->x_flag_tree_loop_vectorize && !opts->x_flag_tree_slp_vectorize) @@ -1669,7 +1681,7 @@ common_handle_option (struct gcc_options *opts, && !opts_set->x_flag_tree_vectorize) opts->x_flag_tree_slp_vectorize = value; if (!opts_set->x_flag_vect_cost_model) - opts->x_flag_vect_cost_model = value; + opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC; if (!opts_set->x_flag_tree_loop_distribute_patterns) opts->x_flag_tree_loop_distribute_patterns = value; /* Indirect call profiling should do all useful transformations diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 03db7b4da1ee..798aacf73a0f 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1057,20 +1057,17 @@ default_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, unsigned *cost = (unsigned *) data; unsigned retval = 0; - if (flag_vect_cost_model) - { - tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; - int stmt_cost = default_builtin_vectorization_cost (kind, vectype, + tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; + int stmt_cost = default_builtin_vectorization_cost (kind, vectype, misalign); - /* Statements in an inner loop relative to the loop being - vectorized are weighted more heavily. The value here is - arbitrary and could potentially be improved with analysis. */ - if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) - count *= 50; /* FIXME. */ - - retval = (unsigned) (count * stmt_cost); - cost[where] += retval; - } + /* Statements in an inner loop relative to the loop being + vectorized are weighted more heavily. The value here is + arbitrary and could potentially be improved with analysis. */ + if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) + count *= 50; /* FIXME. */ + + retval = (unsigned) (count * stmt_cost); + cost[where] += retval; return retval; } diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index c8cdcb85df08..b8988d9d264a 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -1115,7 +1115,7 @@ vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr, *new_slot = slot; } - if (!supportable_dr_alignment && !flag_vect_cost_model) + if (!supportable_dr_alignment && unlimited_cost_model ()) slot->count += VECT_MAX_COST; } @@ -1225,7 +1225,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo, res.peel_info.dr = NULL; res.body_cost_vec = stmt_vector_for_cost(); - if (flag_vect_cost_model) + if (!unlimited_cost_model ()) { res.inside_cost = INT_MAX; res.outside_cost = INT_MAX; @@ -1454,7 +1454,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) vectorization factor. We do this automtically for cost model, since we calculate cost for every peeling option. */ - if (!flag_vect_cost_model) + if (unlimited_cost_model ()) possible_npeel_number = vf /nelements; /* Handle the aligned case. We may decide to align some other @@ -1462,7 +1462,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) if (DR_MISALIGNMENT (dr) == 0) { npeel_tmp = 0; - if (!flag_vect_cost_model) + if (unlimited_cost_model ()) possible_npeel_number++; } @@ -1795,16 +1795,14 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) /* (2) Versioning to force alignment. */ /* Try versioning if: - 1) flag_tree_vect_loop_version is TRUE - 2) optimize loop for speed - 3) there is at least one unsupported misaligned data ref with an unknown + 1) optimize loop for speed + 2) there is at least one unsupported misaligned data ref with an unknown misalignment, and - 4) all misaligned data refs with a known misalignment are supported, and - 5) the number of runtime alignment checks is within reason. */ + 3) all misaligned data refs with a known misalignment are supported, and + 4) the number of runtime alignment checks is within reason. */ do_versioning = - flag_tree_vect_loop_version - && optimize_loop_nest_for_speed_p (loop) + optimize_loop_nest_for_speed_p (loop) && (!loop->inner); /* FORNOW */ if (do_versioning) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 072d44ef2b26..baea8b7f8275 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2680,7 +2680,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); /* Cost model disabled. */ - if (!flag_vect_cost_model) + if (unlimited_cost_model ()) { dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n"); *ret_min_profitable_niters = 0; diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 8ed0fc58c593..b3b3abec1100 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2168,7 +2168,7 @@ vect_slp_analyze_bb_1 (basic_block bb) } /* Cost model: check if the vectorization is worthwhile. */ - if (flag_vect_cost_model + if (!unlimited_cost_model () && !vect_bb_vectorization_profitable_p (bb_vinfo)) { if (dump_enabled_p ()) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 9c7753e2eaf4..7cb8f4d9d06a 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -880,6 +880,14 @@ known_alignment_for_access_p (struct data_reference *data_ref_info) return (DR_MISALIGNMENT (data_ref_info) != -1); } + +/* Return true if the vect cost model is unlimited. */ +static inline bool +unlimited_cost_model () +{ + return flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED; +} + /* Source location */ extern LOC vect_location;