Update probabilities in predict.def to match reality

Jan Hubicka hubicka@ucw.cz
Wed Jun 8 12:35:00 GMT 2016


> On 06/08/2016 12:21 PM, Andreas Schwab wrote:
> > Jan Hubicka <hubicka@ucw.cz> writes:
> > 
> >> Bootstrapped/regtested x86_64-linux, will commit it later today.
> > 
> > FAIL: gcc.dg/tree-ssa/slsr-8.c scan-tree-dump-times optimized " w?\\\\* " 7
> > 
> > Andreas.
> > 
> 
> Hi.
> 
> It's caused by a different probabilities for BB 2:
> 
> @@ -11,11 +11,11 @@
>  ;; 3 succs { 4 }
>  ;; 4 succs { 1 }
>  Predictions for bb 2
> -  DS theory heuristics: 78.4%
> -  first match heuristics (ignored): 85.0%
> -  combined heuristics: 78.4%
> -  pointer (on trees) heuristics: 85.0%
> -  early return (on trees) heuristics: 39.0%
> +  DS theory heuristics: 66.5%
> +  first match heuristics (ignored): 70.0%
> +  combined heuristics: 66.5%
> +  pointer (on trees) heuristics: 70.0%
> +  early return (on trees) heuristics: 46.0%

I see this is because sinking is done when PARAM_SINK_FREQUENCY_THRESHOLD
is met and that is 75% which seems quite ambitious for guessed profiles
that tends to be flat.  (also the code should use counts where available).
For some optimizers we have two thresholds - one for guessed profile and one
for FDO. Perhaps it would make sense to benchmark how decreasing this threshold
affect performance & code size.

What are the downsides of sinking? Increased register pressure? For non-loop
branches it is bit iffy to rely on static branch prediction to even give the
right direction of the branch. It happens in about 65% of cases (where perfect
predictor would do 85%) so we may try to come with heuristics that does not
fully rely on the profile.

We could probably fix the testcase by adding --param sink-frequency-threshold=55

Honza
> 
> Which leads to a different decision made by tree-ssa-sink:
> 
> +++ /tmp/sl-new/slsr-8.c.127t.sink	2016-06-08 14:07:59.747958332 +0200
> @@ -21,6 +21,16 @@
>   from bb 2 to bb 3
>  Sinking a3_17 = s_11(D) * 6;
>   from bb 2 to bb 3
> +Sinking x2_16 = c_13(D) + _6;
> + from bb 2 to bb 5
> +Sinking _6 = -_5;
> + from bb 2 to bb 5
> +Sinking _5 = _4 * 4;
> + from bb 2 to bb 5
> +Sinking _4 = (long unsigned int) a2_15;
> + from bb 2 to bb 5
> +Sinking a2_15 = s_11(D) * 4;
> + from bb 2 to bb 5
>  f (int s, int * c)
>  {
>    int * x3;
> @@ -46,17 +56,17 @@
>    _2 = _1 * 4;
>    _3 = -_2;
>    x1_14 = c_13(D) + _3;
> -  a2_15 = s_11(D) * 4;
> -  _4 = (long unsigned int) a2_15;
> -  _5 = _4 * 4;
> -  _6 = -_5;
> -  x2_16 = c_13(D) + _6;
>    if (x1_14 != 0B)
>      goto <bb 5>;
>    else
>      goto <bb 3>;
>  
>    <bb 5>:
> +  a2_15 = s_11(D) * 4;
> +  _4 = (long unsigned int) a2_15;
> +  _5 = _4 * 4;
> +  _6 = -_5;
> +  x2_16 = c_13(D) + _6;
>    goto <bb 4>;
>  
>    <bb 3>:
> 
> That eventually leads to 9 occurrences of the scanned pattern. However, I'm not sure if the test-case makes
> sense any longer?
> 
> Thanks,
> Martin

> 
> ;; Function f (f, funcdef_no=0, decl_uid=1747, cgraph_uid=0, symbol_order=0)
> 
> ;; 1 loops found
> ;;
> ;; Loop 0
> ;;  header 0, latch 1
> ;;  depth 0, outer -1
> ;;  nodes: 0 1 2 5 3 4
> ;; 2 succs { 5 3 }
> ;; 5 succs { 4 }
> ;; 3 succs { 4 }
> ;; 4 succs { 1 }
> Sinking x3_18 = c_13(D) + _9;
>  from bb 2 to bb 3
> Sinking _9 = -_8;
>  from bb 2 to bb 3
> Sinking _8 = _7 * 4;
>  from bb 2 to bb 3
> Sinking _7 = (long unsigned int) a3_17;
>  from bb 2 to bb 3
> Sinking a3_17 = s_11(D) * 6;
>  from bb 2 to bb 3
> Sinking x2_16 = c_13(D) + _6;
>  from bb 2 to bb 5
> Sinking _6 = -_5;
>  from bb 2 to bb 5
> Sinking _5 = _4 * 4;
>  from bb 2 to bb 5
> Sinking _4 = (long unsigned int) a2_15;
>  from bb 2 to bb 5
> Sinking a2_15 = s_11(D) * 4;
>  from bb 2 to bb 5
> f (int s, int * c)
> {
>   int * x3;
>   int * x2;
>   int * x1;
>   int a3;
>   int a2;
>   int a1;
>   long unsigned int _1;
>   long unsigned int _2;
>   sizetype _3;
>   long unsigned int _4;
>   long unsigned int _5;
>   sizetype _6;
>   long unsigned int _7;
>   long unsigned int _8;
>   sizetype _9;
>   int * iftmp.0_10;
> 
>   <bb 2>:
>   a1_12 = s_11(D) * 2;
>   _1 = (long unsigned int) a1_12;
>   _2 = _1 * 4;
>   _3 = -_2;
>   x1_14 = c_13(D) + _3;
>   if (x1_14 != 0B)
>     goto <bb 5>;
>   else
>     goto <bb 3>;
> 
>   <bb 5>:
>   a2_15 = s_11(D) * 4;
>   _4 = (long unsigned int) a2_15;
>   _5 = _4 * 4;
>   _6 = -_5;
>   x2_16 = c_13(D) + _6;
>   goto <bb 4>;
> 
>   <bb 3>:
>   a3_17 = s_11(D) * 6;
>   _7 = (long unsigned int) a3_17;
>   _8 = _7 * 4;
>   _9 = -_8;
>   x3_18 = c_13(D) + _9;
> 
>   <bb 4>:
>   # iftmp.0_10 = PHI <x2_16(5), x3_18(3)>
>   return iftmp.0_10;
> 
> }
> 
> 



More information about the Gcc-patches mailing list