This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][2/n] 2nd try: Re-organize -fvect-cost-model, enable basic vectorization at -O2
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 16 May 2013 14:04:12 +0200 (CEST)
- Subject: [PATCH][2/n] 2nd try: Re-organize -fvect-cost-model, enable basic vectorization at -O2
The following is a revision of the original patch in that it
also constrains versioning for aliasing with the cheap cost model
(formerly -ftree-vect-loop-version only constrained versioning for
alignment). It also changes vectorizer related gates to rely
on global_options_set instead of playing magic games with
flag values (that won't extend to vectorization being enabled by
default at -O2 with a different default cost model). It also
disables if-conversion at -O2 as the current way that works
makes code changes to non-vectorized code as well.
I've done some measurements with that (also with the previous
patch, not quoted here), plus [3/n] that just changes -ftree-vectorize
to be enabled at -O2.
Thus, SPEC 2k6 CPU -O2 -fno-tree-vectorize vs. -O2 on x86_64 SandyBridge
(but without any extra -m flag). Basically checking what differences
we can expect in distribution builds.
> du -k */exe/*
1212 400.perlbench/exe/perlbench_base.amd64-m64-gcc42-nn
1216 400.perlbench/exe/perlbench_peak.amd64-m64-gcc42-nn
76 401.bzip2/exe/bzip2_base.amd64-m64-gcc42-nn
80 401.bzip2/exe/bzip2_peak.amd64-m64-gcc42-nn
3632 403.gcc/exe/gcc_base.amd64-m64-gcc42-nn
3640 403.gcc/exe/gcc_peak.amd64-m64-gcc42-nn
48 410.bwaves/exe/bwaves_base.amd64-m64-gcc42-nn
60 410.bwaves/exe/bwaves_peak.amd64-m64-gcc42-nn
8952 416.gamess/exe/gamess_base.amd64-m64-gcc42-nn
9556 416.gamess/exe/gamess_peak.amd64-m64-gcc42-nn
28 429.mcf/exe/mcf_base.amd64-m64-gcc42-nn
28 429.mcf/exe/mcf_peak.amd64-m64-gcc42-nn
148 433.milc/exe/milc_base.amd64-m64-gcc42-nn
148 433.milc/exe/milc_peak.amd64-m64-gcc42-nn
280 434.zeusmp/exe/zeusmp_base.amd64-m64-gcc42-nn
348 434.zeusmp/exe/zeusmp_peak.amd64-m64-gcc42-nn
1096 435.gromacs/exe/gromacs_base.amd64-m64-gcc42-nn
1104 435.gromacs/exe/gromacs_peak.amd64-m64-gcc42-nn
804 436.cactusADM/exe/cactusADM_base.amd64-m64-gcc42-nn
932 436.cactusADM/exe/cactusADM_peak.amd64-m64-gcc42-nn
132 437.leslie3d/exe/leslie3d_base.amd64-m64-gcc42-nn
204 437.leslie3d/exe/leslie3d_peak.amd64-m64-gcc42-nn
340 444.namd/exe/namd_base.amd64-m64-gcc42-nn
348 444.namd/exe/namd_peak.amd64-m64-gcc42-nn
3956 445.gobmk/exe/gobmk_base.amd64-m64-gcc42-nn
3960 445.gobmk/exe/gobmk_peak.amd64-m64-gcc42-nn
4012 447.dealII/exe/dealII_base.amd64-m64-gcc42-nn
4136 447.dealII/exe/dealII_peak.amd64-m64-gcc42-nn
468 450.soplex/exe/soplex_base.amd64-m64-gcc42-nn
476 450.soplex/exe/soplex_peak.amd64-m64-gcc42-nn
1152 453.povray/exe/povray_base.amd64-m64-gcc42-nn
1156 453.povray/exe/povray_peak.amd64-m64-gcc42-nn
1796 454.calculix/exe/calculix_base.amd64-m64-gcc42-nn
1824 454.calculix/exe/calculix_peak.amd64-m64-gcc42-nn
324 456.hmmer/exe/hmmer_base.amd64-m64-gcc42-nn
328 456.hmmer/exe/hmmer_peak.amd64-m64-gcc42-nn
160 458.sjeng/exe/sjeng_base.amd64-m64-gcc42-nn
164 458.sjeng/exe/sjeng_peak.amd64-m64-gcc42-nn
432 459.GemsFDTD/exe/GemsFDTD_base.amd64-m64-gcc42-nn
576 459.GemsFDTD/exe/GemsFDTD_peak.amd64-m64-gcc42-nn
68 462.libquantum/exe/libquantum_base.amd64-m64-gcc42-nn
68 462.libquantum/exe/libquantum_peak.amd64-m64-gcc42-nn
572 464.h264ref/exe/h264ref_base.amd64-m64-gcc42-nn
576 464.h264ref/exe/h264ref_peak.amd64-m64-gcc42-nn
4488 465.tonto/exe/tonto_base.amd64-m64-gcc42-nn
4580 465.tonto/exe/tonto_peak.amd64-m64-gcc42-nn
28 470.lbm/exe/lbm_base.amd64-m64-gcc42-nn
28 470.lbm/exe/lbm_peak.amd64-m64-gcc42-nn
784 471.omnetpp/exe/omnetpp_base.amd64-m64-gcc42-nn
784 471.omnetpp/exe/omnetpp_peak.amd64-m64-gcc42-nn
60 473.astar/exe/astar_base.amd64-m64-gcc42-nn
64 473.astar/exe/astar_peak.amd64-m64-gcc42-nn
4460 481.wrf/exe/wrf_base.amd64-m64-gcc42-nn
5332 481.wrf/exe/wrf_peak.amd64-m64-gcc42-nn
208 482.sphinx3/exe/sphinx_livepretend_base.amd64-m64-gcc42-nn
212 482.sphinx3/exe/sphinx_livepretend_peak.amd64-m64-gcc42-nn
5660 483.xalancbmk/exe/Xalan_base.amd64-m64-gcc42-nn
5668 483.xalancbmk/exe/Xalan_peak.amd64-m64-gcc42-nn
12 998.specrand/exe/specrand_base.amd64-m64-gcc42-nn
12 998.specrand/exe/specrand_peak.amd64-m64-gcc42-nn
12 999.specrand/exe/specrand_base.amd64-m64-gcc42-nn
12 999.specrand/exe/specrand_peak.amd64-m64-gcc42-nn
(serial make)
> grep 'Elapsed compile ' /abuild/rguenther/spec2k6/result/CPU2006.497.log
Elapsed compile for '400.perlbench': 00:00:28 (28)
Elapsed compile for '401.bzip2': 00:00:05 (5)
Elapsed compile for '403.gcc': 00:01:07 (67)
Elapsed compile for '429.mcf': 00:00:03 (3)
Elapsed compile for '445.gobmk': 00:00:21 (21)
Elapsed compile for '456.hmmer': 00:00:11 (11)
Elapsed compile for '458.sjeng': 00:00:06 (6)
Elapsed compile for '462.libquantum': 00:00:05 (5)
Elapsed compile for '464.h264ref': 00:00:15 (15)
Elapsed compile for '471.omnetpp': 00:00:29 (29)
Elapsed compile for '473.astar': 00:00:04 (4)
Elapsed compile for '483.xalancbmk': 00:03:02 (182)
Elapsed compile for '999.specrand': 00:00:03 (3)
Elapsed compile for '410.bwaves': 00:00:04 (4)
Elapsed compile for '416.gamess': 00:03:14 (194)
Elapsed compile for '433.milc': 00:00:07 (7)
Elapsed compile for '434.zeusmp': 00:00:11 (11)
Elapsed compile for '435.gromacs': 00:00:25 (25)
Elapsed compile for '436.cactusADM': 00:00:23 (23)
Elapsed compile for '437.leslie3d': 00:00:06 (6)
Elapsed compile for '444.namd': 00:00:10 (10)
Elapsed compile for '447.dealII': 00:02:00 (120)
Elapsed compile for '450.soplex': 00:00:24 (24)
Elapsed compile for '453.povray': 00:00:28 (28)
Elapsed compile for '454.calculix': 00:00:47 (47)
Elapsed compile for '459.GemsFDTD': 00:00:15 (15)
Elapsed compile for '465.tonto': 00:01:41 (101)
Elapsed compile for '470.lbm': 00:00:03 (3)
Elapsed compile for '481.wrf': 00:02:08 (128)
Elapsed compile for '482.sphinx3': 00:00:08 (8)
Elapsed compile for '998.specrand': 00:00:03 (3)
Elapsed compile for '400.perlbench': 00:00:28 (28)
Elapsed compile for '401.bzip2': 00:00:05 (5)
Elapsed compile for '403.gcc': 00:01:07 (67)
Elapsed compile for '429.mcf': 00:00:04 (4)
Elapsed compile for '445.gobmk': 00:00:22 (22)
Elapsed compile for '456.hmmer': 00:00:11 (11)
Elapsed compile for '458.sjeng': 00:00:06 (6)
Elapsed compile for '462.libquantum': 00:00:04 (4)
Elapsed compile for '464.h264ref': 00:00:15 (15)
Elapsed compile for '471.omnetpp': 00:00:30 (30)
Elapsed compile for '473.astar': 00:00:04 (4)
Elapsed compile for '483.xalancbmk': 00:03:02 (182)
Elapsed compile for '999.specrand': 00:00:03 (3)
Elapsed compile for '410.bwaves': 00:00:04 (4)
Elapsed compile for '416.gamess': 00:03:31 (211)
Elapsed compile for '433.milc': 00:00:07 (7)
Elapsed compile for '434.zeusmp': 00:00:13 (13)
Elapsed compile for '435.gromacs': 00:00:26 (26)
Elapsed compile for '436.cactusADM': 00:00:27 (27)
Elapsed compile for '437.leslie3d': 00:00:08 (8)
Elapsed compile for '444.namd': 00:00:11 (11)
Elapsed compile for '447.dealII': 00:02:03 (123)
Elapsed compile for '450.soplex': 00:00:24 (24)
Elapsed compile for '453.povray': 00:00:28 (28)
Elapsed compile for '454.calculix': 00:00:48 (48)
Elapsed compile for '459.GemsFDTD': 00:00:21 (21)
Elapsed compile for '465.tonto': 00:01:42 (102)
Elapsed compile for '470.lbm': 00:00:04 (4)
Elapsed compile for '481.wrf': 00:02:31 (151)
Elapsed compile for '482.sphinx3': 00:00:07 (7)
Elapsed compile for '998.specrand': 00:00:03 (3)
> grep VECTORIZED /abuild/rguenther/spec2k6/result/CPU2006.497.log | wc
-l
9374
> grep 'VECTORIZED\|Compile for.*started'
/abuild/rguenther/spec2k6/result/CPU2006.497.log | sed -e
's/^.*VECTORIZED.*$/VECTORIZED/' | uniq -c
1 Compile for '400.perlbench' started at: Wed May 15 17:22:50 2013
(1368631370)
24 VECTORIZED
1 Compile for '401.bzip2' started at: Wed May 15 17:23:18 2013
(1368631398)
19 VECTORIZED
1 Compile for '403.gcc' started at: Wed May 15 17:23:24 2013
(1368631404)
91 VECTORIZED
1 Compile for '429.mcf' started at: Wed May 15 17:24:31 2013
(1368631471)
1 Compile for '445.gobmk' started at: Wed May 15 17:24:35 2013
(1368631475)
44 VECTORIZED
1 Compile for '456.hmmer' started at: Wed May 15 17:24:57 2013
(1368631497)
40 VECTORIZED
1 Compile for '458.sjeng' started at: Wed May 15 17:25:08 2013
(1368631508)
5 VECTORIZED
1 Compile for '462.libquantum' started at: Wed May 15 17:25:14
2013 (1368631514)
2 VECTORIZED
1 Compile for '464.h264ref' started at: Wed May 15 17:25:19 2013
(1368631519)
97 VECTORIZED
1 Compile for '471.omnetpp' started at: Wed May 15 17:25:34 2013
(1368631534)
4 VECTORIZED
1 Compile for '473.astar' started at: Wed May 15 17:26:04 2013
(1368631564)
5 VECTORIZED
1 Compile for '483.xalancbmk' started at: Wed May 15 17:26:10 2013
(1368631570)
76 VECTORIZED
1 Compile for '999.specrand' started at: Wed May 15 17:29:12 2013
(1368631752)
1 Compile for '410.bwaves' started at: Wed May 15 17:29:16 2013
(1368631756)
7 VECTORIZED
1 Compile for '416.gamess' started at: Wed May 15 17:29:22 2013
(1368631762)
2764 VECTORIZED
1 Compile for '433.milc' started at: Wed May 15 17:32:53 2013
(1368631973)
9 VECTORIZED
1 Compile for '434.zeusmp' started at: Wed May 15 17:33:00 2013
(1368631980)
111 VECTORIZED
1 Compile for '435.gromacs' started at: Wed May 15 17:33:13 2013
(1368631993)
72 VECTORIZED
1 Compile for '436.cactusADM' started at: Wed May 15 17:33:39 2013
(1368632019)
243 VECTORIZED
1 Compile for '437.leslie3d' started at: Wed May 15 17:34:06 2013
(1368632046)
179 VECTORIZED
1 Compile for '444.namd' started at: Wed May 15 17:34:14 2013
(1368632054)
22 VECTORIZED
1 Compile for '447.dealII' started at: Wed May 15 17:34:26 2013
(1368632066)
2152 VECTORIZED
1 Compile for '450.soplex' started at: Wed May 15 17:36:30 2013
(1368632190)
25 VECTORIZED
1 Compile for '453.povray' started at: Wed May 15 17:36:54 2013
(1368632214)
43 VECTORIZED
1 Compile for '454.calculix' started at: Wed May 15 17:37:23 2013
(1368632243)
358 VECTORIZED
1 Compile for '459.GemsFDTD' started at: Wed May 15 17:38:11 2013
(1368632291)
312 VECTORIZED
1 Compile for '465.tonto' started at: Wed May 15 17:38:33 2013
(1368632313)
439 VECTORIZED
1 Compile for '470.lbm' started at: Wed May 15 17:40:15 2013
(1368632415)
1 Compile for '481.wrf' started at: Wed May 15 17:40:20 2013
(1368632420)
2210 VECTORIZED
1 Compile for '482.sphinx3' started at: Wed May 15 17:42:52 2013
(1368632572)
21 VECTORIZED
1 Compile for '998.specrand' started at: Wed May 15 17:43:00 2013
(1368632580)
Estimated
Estimated
Base Base Base Peak Peak Peak
Benchmarks Ref. Run Time Ratio Ref. Run Time Ratio
-------------- ------ --------- --------- ------ ---------
---------
400.perlbench 9770 310 31.5 * 9770 308
31.7 *
401.bzip2 9650 460 21.0 * 9650 459
21.0 *
403.gcc 8050 304 26.4 * 8050 296
27.2 *
429.mcf 9120 235 38.7 * 9120 235
38.9 *
445.gobmk 10490 398 26.3 * 10490 395
26.6 *
456.hmmer 9330 390 23.9 * 9330 372
25.1 *
458.sjeng 12100 467 25.9 * 12100 462
26.2 *
462.libquantum 20720 376 55.1 * 20720 377
54.9 *
464.h264ref 22130 559 39.6 * 22130 557
39.7 *
471.omnetpp 6250 273 22.9 * 6250 267
23.4 *
473.astar 7020 391 18.0 * 7020 399
17.6 *
483.xalancbmk 6900 211 32.6 * 6900 216
32.0 *
Est. SPECint_base2006 --
Est. SPECint2006
--
Estimated
Estimated
Base Base Base Peak Peak Peak
Benchmarks Ref. Run Time Ratio Ref. Run Time Ratio
-------------- ------ --------- --------- ------ ---------
---------
410.bwaves 13590 332 41.0 * 13590 331
41.0 *
416.gamess NR
NR
433.milc 9180 487 18.9 * 9180 491
18.7 *
434.zeusmp 9100 461 19.7 * 9100 441
20.6 *
435.gromacs 7140 475 15.0 * 7140 475
15.0 *
436.cactusADM 11950 891 13.4 * 11950 579
20.7 *
437.leslie3d 9400 391 24.1 * 9400 320
29.4 *
444.namd 8020 393 20.4 * 8020 393
20.4 *
447.dealII 11440 310 36.9 * 11440 339
33.8 *
450.soplex 8340 212 39.3 * 8340 221
37.7 *
453.povray 5320 182 29.2 * 5320 182
29.2 *
454.calculix 8250 706 11.7 * 8250 741
11.1 *
459.GemsFDTD 10610 380 27.9 * 10610 366
29.0 *
465.tonto 9840 362 27.2 * 9840 361
27.3 *
470.lbm 13740 269 51.1 * 13740 269
51.2 *
481.wrf 11170 469 23.8 * 11170 335
33.3 *
482.sphinx3 19490 529 36.8 * 19490 531
36.7 *
Est. SPECfp_base2006 --
Est. SPECfp2006
--
Compile-time correlates somewhat with the number of vectorized loops
(up to 2 extra loops are created), bigger offenders would need to
be investigated for inherent vectorizer slowness (I didn't see
anything obvious there looking at polyhedron). Runtime effects
are in the noise for SPEC INT and show significant improvements
for 436.cactusADM, 437.leslie3d and 481.wrf while slowdowns for
447.dealII, 450.soplex and 454.calculix (observed slowdowns are
bigger when you compare -O2 vs. -O2 -ftree-vectorize unpatched).
Slowdowns hint at cost-model issues and/or cost-model checking
overhead (I have patches and further ideas here).
Bootstrapped / tested on x86_64-unknown-linux-gnu.
Comments? Especially to the new -fvectorizer-cost-model=
interface (I can see enabling vectorization at -O2 by default
is non-obvious)?
I'm currently running another comparison only vectorizing loops
where the cost model check can be performed at compile-time.
Further restricting vectorization to loops where no prologue/epilogue
loops are necessary would also be possible, but that leaves only
trivial loops with constant bounds to be vectorized.
Thanks,
Richard.
2013-05-14 Richard Biener <rguenther@suse.de>
common/
* config/i386/i386-common.c (ix86_option_init_struct): Do not
enable OPT_fvect_cost_model.
* common.opt (fvect-cost-model=): New option.
(vect_cost_model): New enum and values.
(fvect-cost-model): Alias to -fvect-cost-model=dynamic.
(fno-vect-cost-model): Alias to -fvect-cost-model=unlimited.
(ftree-vect-loop-version): Ignore.
* opts.c (default_options_table): Do not set OPT_fvect_cost_model.
(common_handle_option): Likewise.
* flag-types.h (enum vect_cost_model): New enum.
* doc/invoke.texi (ftree-vect-loop-version): Remove.
(fvect-cost-model): Adjust documentation.
* targhooks.c (default_add_stmt_cost): Do not check
flag_vect_cost_model.
* tree-vectorizer.h (struct _loop_vec_info): Add cost model field.
(struct _bb_vec_info): Likewise.
(vectorizer_cost_model): Declare.
* tree-vect-data-refs.c (vect_peeling_hash_insert): Check the
loops cost-model flag.
(vect_peeling_hash_choose_best_peeling): Likewise.
(vect_enhance_data_refs_alignment): Likewise. Do not check
flag_tree_vect_loop_version but check the cost model.
(vect_mark_for_runtime_alias_test): Do not add runtime alias checks
for the cheap cost model.
* tree-vect-loop.c (vect_analyze_loop): Initialize the loops
cost model flag.
(vect_estimate_min_profitable_iters): Use the loops cost model flag.
* tree-vect-slp.c (vect_slp_analyze_bb_1): Initialize and use the BBs
cost model flag.
* tree-vectorizer.c (gate_vect_slp): Enable SLP via the vectorizer
only at -O3.
(vectorizer_cost_model): Return the active cost model.
* Makefile.in (tree-if-conv.o): Depend on $(TREE_VECTORIZER_H).
* tree-if-conv.c: Include tree-vectorizer.h.
(gate_tree_if_conversion): Enable if-conversion via the vectorizer
only at -O3.
Index: trunk/gcc/common.opt
===================================================================
*** trunk.orig/gcc/common.opt 2013-05-15 13:21:54.000000000 +0200
--- trunk/gcc/common.opt 2013-05-15 14:43:01.046249528 +0200
*************** EnumValue
*** 1304,1310 ****
Enum(stack_reuse_level) String(none) Value(SR_NONE)
ftree-loop-if-convert
! Common Report Var(flag_tree_loop_if_convert) Init(-1) Optimization
Convert conditional jumps in innermost loops to branchless equivalents
ftree-loop-if-convert-stores
--- 1304,1310 ----
Enum(stack_reuse_level) String(none) Value(SR_NONE)
ftree-loop-if-convert
! Common Report Var(flag_tree_loop_if_convert) Optimization
Convert conditional jumps in innermost loops to branchless equivalents
ftree-loop-if-convert-stores
*************** Common RejectNegative Joined UInteger Va
*** 2267,2282 ****
-ftree-vectorizer-verbose=<number> This switch is deprecated. Use -fopt-info instead.
ftree-slp-vectorize
! Common Report Var(flag_tree_slp_vectorize) Init(2) Optimization
Enable basic block vectorization (SLP) on trees
fvect-cost-model
! Common Report Var(flag_vect_cost_model) Optimization
! Enable use of cost model in vectorization
ftree-vect-loop-version
! Common Report Var(flag_tree_vect_loop_version) Init(1) Optimization
! Enable loop versioning when doing loop vectorization on trees
ftree-scev-cprop
Common Report Var(flag_tree_scev_cprop) Init(1) Optimization
--- 2267,2302 ----
-ftree-vectorizer-verbose=<number> This switch is deprecated. Use -fopt-info instead.
ftree-slp-vectorize
! Common Report Var(flag_tree_slp_vectorize) Optimization
Enable basic block vectorization (SLP) on trees
+ fvect-cost-model=
+ Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT)
+ Specifies the cost model for vectorization
+
+ Enum
+ Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
+
+ EnumValue
+ Enum(vect_cost_model) String(unlimited) Value(VECT_COST_MODEL_UNLIMITED)
+
+ EnumValue
+ Enum(vect_cost_model) String(dynamic) Value(VECT_COST_MODEL_DYNAMIC)
+
+ EnumValue
+ Enum(vect_cost_model) String(cheap) Value(VECT_COST_MODEL_CHEAP)
+
fvect-cost-model
! Common RejectNegative Alias(fvect-cost-model=,dynamic)
! Enables the dynamic vectorizer cost model. Preserved for backward compatibility.
!
! fno-vect-cost-model
! Common RejectNegative Alias(fvect-cost-model=,unlimited)
! Enables the unlimited vectorizer cost model. Preserved for backward compatibility.
ftree-vect-loop-version
! Common Ignore
! Does nothing. Preserved for backward compatibility.
ftree-scev-cprop
Common Report Var(flag_tree_scev_cprop) Init(1) Optimization
Index: trunk/gcc/opts.c
===================================================================
*** trunk.orig/gcc/opts.c 2013-05-15 13:21:54.000000000 +0200
--- trunk/gcc/opts.c 2013-05-15 14:47:03.820005049 +0200
*************** static const struct default_options defa
*** 498,504 ****
{ OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_vectorize, NULL, 1 },
- { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
--- 498,503 ----
*************** common_handle_option (struct gcc_options
*** 1597,1604 ****
opts->x_flag_gcse_after_reload = value;
if (!opts_set->x_flag_tree_vectorize)
opts->x_flag_tree_vectorize = value;
- if (!opts_set->x_flag_vect_cost_model)
- opts->x_flag_vect_cost_model = value;
if (!opts_set->x_flag_tree_loop_distribute_patterns)
opts->x_flag_tree_loop_distribute_patterns = value;
break;
--- 1596,1601 ----
Index: trunk/gcc/common/config/i386/i386-common.c
===================================================================
*** trunk.orig/gcc/common/config/i386/i386-common.c 2013-05-15 13:21:54.000000000 +0200
--- trunk/gcc/common/config/i386/i386-common.c 2013-05-15 13:24:32.139871020 +0200
*************** ix86_option_init_struct (struct gcc_opti
*** 729,735 ****
opts->x_flag_pcc_struct_return = 2;
opts->x_flag_asynchronous_unwind_tables = 2;
- opts->x_flag_vect_cost_model = 1;
}
/* On the x86 -fsplit-stack and -fstack-protector both use the same
--- 729,734 ----
Index: trunk/gcc/flag-types.h
===================================================================
*** trunk.orig/gcc/flag-types.h 2013-05-15 13:21:54.000000000 +0200
--- trunk/gcc/flag-types.h 2013-05-15 13:24:32.139871020 +0200
*************** enum fp_contract_mode {
*** 191,194 ****
--- 191,202 ----
FP_CONTRACT_FAST = 2
};
+ /* Vectorizer cost-model. */
+ enum vect_cost_model {
+ VECT_COST_MODEL_UNLIMITED = 0,
+ VECT_COST_MODEL_CHEAP = 1,
+ VECT_COST_MODEL_DYNAMIC = 2,
+ VECT_COST_MODEL_DEFAULT = 3
+ };
+
#endif /* ! GCC_FLAG_TYPES_H */
Index: trunk/gcc/targhooks.c
===================================================================
*** trunk.orig/gcc/targhooks.c 2013-05-15 13:21:54.000000000 +0200
--- trunk/gcc/targhooks.c 2013-05-15 13:24:32.140871032 +0200
*************** default_add_stmt_cost (void *data, int c
*** 1050,1070 ****
{
unsigned *cost = (unsigned *) data;
unsigned retval = 0;
! if (flag_vect_cost_model)
! {
! tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
! int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
! misalign);
! /* Statements in an inner loop relative to the loop being
! vectorized are weighted more heavily. The value here is
! arbitrary and could potentially be improved with analysis. */
! if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
! count *= 50; /* FIXME. */
!
! retval = (unsigned) (count * stmt_cost);
! cost[where] += retval;
! }
return retval;
}
--- 1050,1066 ----
{
unsigned *cost = (unsigned *) data;
unsigned retval = 0;
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+ int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
+ misalign);
+ /* Statements in an inner loop relative to the loop being
+ vectorized are weighted more heavily. The value here is
+ arbitrary and could potentially be improved with analysis. */
+ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+ count *= 50; /* FIXME. */
! retval = (unsigned) (count * stmt_cost);
! cost[where] += retval;
return retval;
}
Index: trunk/gcc/tree-vect-data-refs.c
===================================================================
*** trunk.orig/gcc/tree-vect-data-refs.c 2013-05-15 13:21:54.000000000 +0200
--- trunk/gcc/tree-vect-data-refs.c 2013-05-15 13:24:32.142871055 +0200
*************** vect_mark_for_runtime_alias_test (ddr_p
*** 173,179 ****
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
! if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
return false;
if (dump_enabled_p ())
--- 173,180 ----
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
! if (loop_vinfo->cost_model == VECT_COST_MODEL_CHEAP
! || (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
return false;
if (dump_enabled_p ())
*************** vect_peeling_hash_insert (loop_vec_info
*** 1087,1093 ****
*new_slot = slot;
}
! if (!supportable_dr_alignment && !flag_vect_cost_model)
slot->count += VECT_MAX_COST;
}
--- 1088,1095 ----
*new_slot = slot;
}
! if (!supportable_dr_alignment
! && loop_vinfo->cost_model == VECT_COST_MODEL_UNLIMITED)
slot->count += VECT_MAX_COST;
}
*************** vect_peeling_hash_choose_best_peeling (l
*** 1197,1203 ****
res.peel_info.dr = NULL;
res.body_cost_vec = stmt_vector_for_cost();
! if (flag_vect_cost_model)
{
res.inside_cost = INT_MAX;
res.outside_cost = INT_MAX;
--- 1199,1205 ----
res.peel_info.dr = NULL;
res.body_cost_vec = stmt_vector_for_cost();
! if (loop_vinfo->cost_model != VECT_COST_MODEL_UNLIMITED)
{
res.inside_cost = INT_MAX;
res.outside_cost = INT_MAX;
*************** vect_enhance_data_refs_alignment (loop_v
*** 1426,1432 ****
vectorization factor.
We do this automtically for cost model, since we calculate cost
for every peeling option. */
! if (!flag_vect_cost_model)
possible_npeel_number = vf /nelements;
/* Handle the aligned case. We may decide to align some other
--- 1428,1434 ----
vectorization factor.
We do this automtically for cost model, since we calculate cost
for every peeling option. */
! if (loop_vinfo->cost_model == VECT_COST_MODEL_UNLIMITED)
possible_npeel_number = vf /nelements;
/* Handle the aligned case. We may decide to align some other
*************** vect_enhance_data_refs_alignment (loop_v
*** 1434,1440 ****
if (DR_MISALIGNMENT (dr) == 0)
{
npeel_tmp = 0;
! if (!flag_vect_cost_model)
possible_npeel_number++;
}
--- 1436,1442 ----
if (DR_MISALIGNMENT (dr) == 0)
{
npeel_tmp = 0;
! if (loop_vinfo->cost_model == VECT_COST_MODEL_UNLIMITED)
possible_npeel_number++;
}
*************** vect_enhance_data_refs_alignment (loop_v
*** 1743,1749 ****
/* (2) Versioning to force alignment. */
/* Try versioning if:
! 1) flag_tree_vect_loop_version is TRUE
2) optimize loop for speed
3) there is at least one unsupported misaligned data ref with an unknown
misalignment, and
--- 1745,1751 ----
/* (2) Versioning to force alignment. */
/* Try versioning if:
! 1) cost model is not VECT_COST_MODEL_CHEAP
2) optimize loop for speed
3) there is at least one unsupported misaligned data ref with an unknown
misalignment, and
*************** vect_enhance_data_refs_alignment (loop_v
*** 1751,1757 ****
5) the number of runtime alignment checks is within reason. */
do_versioning =
! flag_tree_vect_loop_version
&& optimize_loop_nest_for_speed_p (loop)
&& (!loop->inner); /* FORNOW */
--- 1753,1759 ----
5) the number of runtime alignment checks is within reason. */
do_versioning =
! loop_vinfo->cost_model != VECT_COST_MODEL_CHEAP
&& optimize_loop_nest_for_speed_p (loop)
&& (!loop->inner); /* FORNOW */
Index: trunk/gcc/tree-vect-loop.c
===================================================================
*** trunk.orig/gcc/tree-vect-loop.c 2013-05-15 13:22:16.000000000 +0200
--- trunk/gcc/tree-vect-loop.c 2013-05-15 13:24:32.144871077 +0200
*************** vect_analyze_loop (struct loop *loop)
*** 1761,1766 ****
--- 1761,1768 ----
return NULL;
}
+ loop_vinfo->cost_model = vectorizer_cost_model ();
+
if (vect_analyze_loop_2 (loop_vinfo))
{
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
*************** vect_estimate_min_profitable_iters (loop
*** 2634,2640 ****
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */
! if (!flag_vect_cost_model)
{
dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.");
*ret_min_profitable_niters = 0;
--- 2636,2642 ----
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */
! if (loop_vinfo->cost_model == VECT_COST_MODEL_UNLIMITED)
{
dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.");
*ret_min_profitable_niters = 0;
Index: trunk/gcc/tree-vect-slp.c
===================================================================
*** trunk.orig/gcc/tree-vect-slp.c 2013-05-15 13:22:16.000000000 +0200
--- trunk/gcc/tree-vect-slp.c 2013-05-15 13:24:32.145871088 +0200
*************** vect_slp_analyze_bb_1 (basic_block bb)
*** 1992,1997 ****
--- 1992,2001 ----
if (!bb_vinfo)
return NULL;
+ /* For BB vectorization it only matters whether the cost model is
+ enabled or disabled. */
+ bb_vinfo->cost_model = vectorizer_cost_model ();
+
if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
{
if (dump_enabled_p ())
*************** vect_slp_analyze_bb_1 (basic_block bb)
*** 2093,2099 ****
}
/* Cost model: check if the vectorization is worthwhile. */
! if (flag_vect_cost_model
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
{
if (dump_enabled_p ())
--- 2097,2103 ----
}
/* Cost model: check if the vectorization is worthwhile. */
! if (bb_vinfo->cost_model != VECT_COST_MODEL_UNLIMITED
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
{
if (dump_enabled_p ())
Index: trunk/gcc/tree-vectorizer.c
===================================================================
*** trunk.orig/gcc/tree-vectorizer.c 2013-05-15 13:22:16.000000000 +0200
--- trunk/gcc/tree-vectorizer.c 2013-05-15 14:33:38.359862554 +0200
*************** LOC vect_location;
*** 73,78 ****
--- 73,93 ----
/* Vector mapping GIMPLE stmt to stmt_vec_info. */
vec<vec_void_p> stmt_vec_info_vec;
+ /* Return the active vectorizer cost model. */
+
+ enum vect_cost_model
+ vectorizer_cost_model (void)
+ {
+ if (flag_vect_cost_model != VECT_COST_MODEL_DEFAULT)
+ return flag_vect_cost_model;
+ /* If -ftree-vectorize is specified explicitely or enabled by using -O3
+ then use the dynamic model, otherwise the cheap one. */
+ if (global_options_set.x_flag_tree_vectorize
+ || (flag_tree_vectorize != 0 && optimize == 3))
+ return VECT_COST_MODEL_DYNAMIC;
+ else
+ return VECT_COST_MODEL_CHEAP;
+ }
/* Function vectorize_loops.
*************** execute_vect_slp (void)
*** 191,200 ****
static bool
gate_vect_slp (void)
{
! /* Apply SLP either if the vectorizer is on and the user didn't specify
! whether to run SLP or not, or if the SLP flag was set by the user. */
! return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0)
! || flag_tree_slp_vectorize == 1);
}
struct gimple_opt_pass pass_slp_vectorize =
--- 206,220 ----
static bool
gate_vect_slp (void)
{
! /* Apply SLP either according to whether the user specified whether to
! run SLP or not, or according to whether the user specified whether
! to do vectorization or not. */
! if (global_options_set.x_flag_tree_slp_vectorize)
! return flag_tree_slp_vectorize != 0;
! if (global_options_set.x_flag_tree_vectorize)
! return flag_tree_vectorize != 0;
! /* And if vectorization was enabled by default run SLP only at -O3. */
! return flag_tree_vectorize != 0 && optimize == 3;
}
struct gimple_opt_pass pass_slp_vectorize =
Index: trunk/gcc/tree-vectorizer.h
===================================================================
*** trunk.orig/gcc/tree-vectorizer.h 2013-05-15 13:21:54.000000000 +0200
--- trunk/gcc/tree-vectorizer.h 2013-05-15 13:24:32.146871099 +0200
*************** typedef struct _loop_vec_info {
*** 314,319 ****
--- 314,322 ----
fix it up. */
bool operands_swapped;
+ /* The cost model to be used for this loop. */
+ enum vect_cost_model cost_model;
+
} *loop_vec_info;
/* Access Functions. */
*************** typedef struct _bb_vec_info {
*** 391,396 ****
--- 394,402 ----
/* Cost data used by the target cost model. */
void *target_cost_data;
+ /* The cost model to be used for this BB. */
+ enum vect_cost_model cost_model;
+
} *bb_vec_info;
#define BB_VINFO_BB(B) (B)->bb
*************** void vect_pattern_recog (loop_vec_info,
*** 1010,1014 ****
--- 1016,1021 ----
/* In tree-vectorizer.c. */
unsigned vectorize_loops (void);
+ enum vect_cost_model vectorizer_cost_model (void);
#endif /* GCC_TREE_VECTORIZER_H */
Index: trunk/gcc/doc/invoke.texi
===================================================================
*** trunk.orig/gcc/doc/invoke.texi 2013-05-15 13:21:54.000000000 +0200
--- trunk/gcc/doc/invoke.texi 2013-05-15 13:24:32.149871132 +0200
*************** Objective-C and Objective-C++ Dialects}.
*** 419,428 ****
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
-ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
-ftree-switch-conversion -ftree-tail-merge @gol
! -ftree-ter -ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
! -fvariable-expansion-in-unroller -fvect-cost-model -fvpt -fweb @gol
-fwhole-program -fwpa -fuse-ld=@var{linker} -fuse-linker-plugin @gol
--param @var{name}=@var{value}
-O -O0 -O1 -O2 -O3 -Os -Ofast -Og}
--- 419,428 ----
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
-ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
-ftree-switch-conversion -ftree-tail-merge @gol
! -ftree-ter -ftree-vectorize -ftree-vrp @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
! -fvariable-expansion-in-unroller -fvect-cost-model=@var{model} -fvpt -fweb @gol
-fwhole-program -fwpa -fuse-ld=@var{linker} -fuse-linker-plugin @gol
--param @var{name}=@var{value}
-O -O0 -O1 -O2 -O3 -Os -Ofast -Og}
*************** Optimize yet more. @option{-O3} turns o
*** 6649,6655 ****
by @option{-O2} and also turns on the @option{-finline-functions},
@option{-funswitch-loops}, @option{-fpredictive-commoning},
@option{-fgcse-after-reload}, @option{-ftree-vectorize},
- @option{-fvect-cost-model},
@option{-ftree-partial-pre} and @option{-fipa-cp-clone} options.
@item -O0
--- 6649,6654 ----
*************** optimizations designed to reduce code si
*** 6666,6672 ****
@option{-Os} disables the following optimization flags:
@gccoptlist{-falign-functions -falign-jumps -falign-loops @gol
-falign-labels -freorder-blocks -freorder-blocks-and-partition @gol
! -fprefetch-loop-arrays -ftree-vect-loop-version}
@item -Ofast
@opindex Ofast
--- 6665,6671 ----
@option{-Os} disables the following optimization flags:
@gccoptlist{-falign-functions -falign-jumps -falign-loops @gol
-falign-labels -freorder-blocks -freorder-blocks-and-partition @gol
! -fprefetch-loop-arrays}
@item -Ofast
@opindex Ofast
*************** Perform loop vectorization on trees. Thi
*** 7907,7925 ****
Perform basic block vectorization on trees. This flag is enabled by default at
@option{-O3} and when @option{-ftree-vectorize} is enabled.
! @item -ftree-vect-loop-version
! @opindex ftree-vect-loop-version
! Perform loop versioning when doing loop vectorization on trees. When a loop
! appears to be vectorizable except that data alignment or data dependence cannot
! be determined at compile time, then vectorized and non-vectorized versions of
! the loop are generated along with run-time checks for alignment or dependence
! to control which version is executed. This option is enabled by default
! except at level @option{-Os} where it is disabled.
!
! @item -fvect-cost-model
@opindex fvect-cost-model
! Enable cost model for vectorization. This option is enabled by default at
! @option{-O3}.
@item -ftree-vrp
@opindex ftree-vrp
--- 7906,7925 ----
Perform basic block vectorization on trees. This flag is enabled by default at
@option{-O3} and when @option{-ftree-vectorize} is enabled.
! @item -fvect-cost-model=@var{model}
@opindex fvect-cost-model
! Alter the cost model used for vectorization. The @var{model} argument
! should be one of @code{unlimited}, @code{dynamic} or @code{cheap}.
! With the @code{unlimited} model the vectorized code-path is assumed
! to be profitable while with the @code{dynamic} model a runtime check
! will guard the vectorized code-path to enable it only for iteration
! counts that will likely execute faster than when executing the original
! scalar loop. The @code{cheap} model will disable vectorization of
! loops where doing so would be cost prohibitive for example due to
! required runtime checks for data dependence or alignment but otherwise
! is equal to the @code{dynamic} model.
! The default cost model depends on other optimization flags and is
! either @code{dynamic} or @code{cheap}.
@item -ftree-vrp
@opindex ftree-vrp
*************** constraints. The default value is 0.
*** 9325,9337 ****
@item vect-max-version-for-alignment-checks
The maximum number of run-time checks that can be performed when
! doing loop versioning for alignment in the vectorizer. See option
! @option{-ftree-vect-loop-version} for more information.
@item vect-max-version-for-alias-checks
The maximum number of run-time checks that can be performed when
! doing loop versioning for alias in the vectorizer. See option
! @option{-ftree-vect-loop-version} for more information.
@item max-iterations-to-track
The maximum number of iterations of a loop the brute-force algorithm
--- 9325,9335 ----
@item vect-max-version-for-alignment-checks
The maximum number of run-time checks that can be performed when
! doing loop versioning for alignment in the vectorizer.
@item vect-max-version-for-alias-checks
The maximum number of run-time checks that can be performed when
! doing loop versioning for alias in the vectorizer.
@item max-iterations-to-track
The maximum number of iterations of a loop the brute-force algorithm
Index: trunk/gcc/Makefile.in
===================================================================
*** trunk.orig/gcc/Makefile.in 2013-05-15 13:21:54.000000000 +0200
--- trunk/gcc/Makefile.in 2013-05-15 13:24:32.150871143 +0200
*************** tree-nested.o: tree-nested.c $(CONFIG_H)
*** 2428,2434 ****
tree-if-conv.o: tree-if-conv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(TREE_H) $(FLAGS_H) $(BASIC_BLOCK_H) $(TREE_FLOW_H) \
$(CFGLOOP_H) $(TREE_DATA_REF_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
! $(DBGCNT_H) $(GIMPLE_PRETTY_PRINT_H)
tree-iterator.o : tree-iterator.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) \
coretypes.h $(GGC_H) tree-iterator.h $(GIMPLE_H) gt-tree-iterator.h
tree-dfa.o : tree-dfa.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
--- 2428,2434 ----
tree-if-conv.o: tree-if-conv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(TREE_H) $(FLAGS_H) $(BASIC_BLOCK_H) $(TREE_FLOW_H) \
$(CFGLOOP_H) $(TREE_DATA_REF_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
! $(DBGCNT_H) $(GIMPLE_PRETTY_PRINT_H) $(TREE_VECTORIZER_H)
tree-iterator.o : tree-iterator.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) \
coretypes.h $(GGC_H) tree-iterator.h $(GIMPLE_H) gt-tree-iterator.h
tree-dfa.o : tree-dfa.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
Index: trunk/gcc/tree-if-conv.c
===================================================================
*** trunk.orig/gcc/tree-if-conv.c 2013-05-15 13:21:54.000000000 +0200
--- trunk/gcc/tree-if-conv.c 2013-05-15 14:42:47.816099456 +0200
*************** along with GCC; see the file COPYING3.
*** 95,100 ****
--- 95,101 ----
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
#include "dbgcnt.h"
+ #include "tree-vectorizer.h"
/* List of basic blocks in if-conversion-suitable order. */
static basic_block *ifc_bbs;
*************** main_tree_if_conversion (void)
*** 1848,1856 ****
static bool
gate_tree_if_conversion (void)
{
! return ((flag_tree_vectorize && flag_tree_loop_if_convert != 0)
! || flag_tree_loop_if_convert == 1
! || flag_tree_loop_if_convert_stores == 1);
}
struct gimple_opt_pass pass_if_conversion =
--- 1849,1865 ----
static bool
gate_tree_if_conversion (void)
{
! /* If the option was explicitely specified enable the pass according
! to that. */
! if (global_options_set.x_flag_tree_loop_if_convert
! || global_options_set.x_flag_tree_loop_if_convert_stores)
! return flag_tree_loop_if_convert || flag_tree_loop_if_convert_stores;
! /* Otherwise when vectorization was enabled/disabled explicitely,
! enable according to that. */
! if (global_options_set.x_flag_tree_vectorize)
! return flag_tree_vectorize != 0;
! /* And if vectorization was enabled by default run only at -O3. */
! return flag_tree_vectorize != 0 && optimize == 3;
}
struct gimple_opt_pass pass_if_conversion =