Update probabilities in predict.def to match reality
Jan Hubicka
hubicka@ucw.cz
Wed Jun 8 12:35:00 GMT 2016
> On 06/08/2016 12:21 PM, Andreas Schwab wrote:
> > Jan Hubicka <hubicka@ucw.cz> writes:
> >
> >> Bootstrapped/regtested x86_64-linux, will commit it later today.
> >
> > FAIL: gcc.dg/tree-ssa/slsr-8.c scan-tree-dump-times optimized " w?\\\\* " 7
> >
> > Andreas.
> >
>
> Hi.
>
> It's caused by a different probabilities for BB 2:
>
> @@ -11,11 +11,11 @@
> ;; 3 succs { 4 }
> ;; 4 succs { 1 }
> Predictions for bb 2
> - DS theory heuristics: 78.4%
> - first match heuristics (ignored): 85.0%
> - combined heuristics: 78.4%
> - pointer (on trees) heuristics: 85.0%
> - early return (on trees) heuristics: 39.0%
> + DS theory heuristics: 66.5%
> + first match heuristics (ignored): 70.0%
> + combined heuristics: 66.5%
> + pointer (on trees) heuristics: 70.0%
> + early return (on trees) heuristics: 46.0%
I see this is because sinking is done when PARAM_SINK_FREQUENCY_THRESHOLD
is met and that is 75% which seems quite ambitious for guessed profiles
that tends to be flat. (also the code should use counts where available).
For some optimizers we have two thresholds - one for guessed profile and one
for FDO. Perhaps it would make sense to benchmark how decreasing this threshold
affect performance & code size.
What are the downsides of sinking? Increased register pressure? For non-loop
branches it is bit iffy to rely on static branch prediction to even give the
right direction of the branch. It happens in about 65% of cases (where perfect
predictor would do 85%) so we may try to come with heuristics that does not
fully rely on the profile.
We could probably fix the testcase by adding --param sink-frequency-threshold=55
Honza
>
> Which leads to a different decision made by tree-ssa-sink:
>
> +++ /tmp/sl-new/slsr-8.c.127t.sink 2016-06-08 14:07:59.747958332 +0200
> @@ -21,6 +21,16 @@
> from bb 2 to bb 3
> Sinking a3_17 = s_11(D) * 6;
> from bb 2 to bb 3
> +Sinking x2_16 = c_13(D) + _6;
> + from bb 2 to bb 5
> +Sinking _6 = -_5;
> + from bb 2 to bb 5
> +Sinking _5 = _4 * 4;
> + from bb 2 to bb 5
> +Sinking _4 = (long unsigned int) a2_15;
> + from bb 2 to bb 5
> +Sinking a2_15 = s_11(D) * 4;
> + from bb 2 to bb 5
> f (int s, int * c)
> {
> int * x3;
> @@ -46,17 +56,17 @@
> _2 = _1 * 4;
> _3 = -_2;
> x1_14 = c_13(D) + _3;
> - a2_15 = s_11(D) * 4;
> - _4 = (long unsigned int) a2_15;
> - _5 = _4 * 4;
> - _6 = -_5;
> - x2_16 = c_13(D) + _6;
> if (x1_14 != 0B)
> goto <bb 5>;
> else
> goto <bb 3>;
>
> <bb 5>:
> + a2_15 = s_11(D) * 4;
> + _4 = (long unsigned int) a2_15;
> + _5 = _4 * 4;
> + _6 = -_5;
> + x2_16 = c_13(D) + _6;
> goto <bb 4>;
>
> <bb 3>:
>
> That eventually leads to 9 occurrences of the scanned pattern. However, I'm not sure if the test-case makes
> sense any longer?
>
> Thanks,
> Martin
>
> ;; Function f (f, funcdef_no=0, decl_uid=1747, cgraph_uid=0, symbol_order=0)
>
> ;; 1 loops found
> ;;
> ;; Loop 0
> ;; header 0, latch 1
> ;; depth 0, outer -1
> ;; nodes: 0 1 2 5 3 4
> ;; 2 succs { 5 3 }
> ;; 5 succs { 4 }
> ;; 3 succs { 4 }
> ;; 4 succs { 1 }
> Sinking x3_18 = c_13(D) + _9;
> from bb 2 to bb 3
> Sinking _9 = -_8;
> from bb 2 to bb 3
> Sinking _8 = _7 * 4;
> from bb 2 to bb 3
> Sinking _7 = (long unsigned int) a3_17;
> from bb 2 to bb 3
> Sinking a3_17 = s_11(D) * 6;
> from bb 2 to bb 3
> Sinking x2_16 = c_13(D) + _6;
> from bb 2 to bb 5
> Sinking _6 = -_5;
> from bb 2 to bb 5
> Sinking _5 = _4 * 4;
> from bb 2 to bb 5
> Sinking _4 = (long unsigned int) a2_15;
> from bb 2 to bb 5
> Sinking a2_15 = s_11(D) * 4;
> from bb 2 to bb 5
> f (int s, int * c)
> {
> int * x3;
> int * x2;
> int * x1;
> int a3;
> int a2;
> int a1;
> long unsigned int _1;
> long unsigned int _2;
> sizetype _3;
> long unsigned int _4;
> long unsigned int _5;
> sizetype _6;
> long unsigned int _7;
> long unsigned int _8;
> sizetype _9;
> int * iftmp.0_10;
>
> <bb 2>:
> a1_12 = s_11(D) * 2;
> _1 = (long unsigned int) a1_12;
> _2 = _1 * 4;
> _3 = -_2;
> x1_14 = c_13(D) + _3;
> if (x1_14 != 0B)
> goto <bb 5>;
> else
> goto <bb 3>;
>
> <bb 5>:
> a2_15 = s_11(D) * 4;
> _4 = (long unsigned int) a2_15;
> _5 = _4 * 4;
> _6 = -_5;
> x2_16 = c_13(D) + _6;
> goto <bb 4>;
>
> <bb 3>:
> a3_17 = s_11(D) * 6;
> _7 = (long unsigned int) a3_17;
> _8 = _7 * 4;
> _9 = -_8;
> x3_18 = c_13(D) + _9;
>
> <bb 4>:
> # iftmp.0_10 = PHI <x2_16(5), x3_18(3)>
> return iftmp.0_10;
>
> }
>
>
More information about the Gcc-patches
mailing list