Improve -fprofile-report

Jan Hubicka hubicka@kam.mff.cuni.cz
Sat Nov 27 15:56:32 GMT 2021


Hi,
Profile-report was never properly updated after switch to new profile
representation.  This patch fixes the way profile mismatches are
calculated: we used to collect separately count and freq mismatches,
while now we have only counts & probabilities.  So we verify

 - in count: that total count of incomming edges is close to acutal count of
   the BB
 - out prob: that total sum of outgoing edge edge probabilities is close
   to 1 (except for BB containing noreturn calls or EH).

Moreover I added dumping of absolute data which is useful to plot them:
with Martin Liska we plan to setup regular testing so we keep optimizers
profie updates bit under control.

Finally I added both static and dynamic stats about mismatches - static
one is simply number of inconsistencies in the cfg while dynamic is
scaled by the profile - I think in order to keep eye on optimizers the
first number is quite relevant. WHile when tracking why code quality
regressed the second number matters more.

The output on exchange2 benchmark with FDO is currently:

Profile consistency report:

Pass name                        |static mismatch            |dynamic mismatch                                     |overall                                       |
                                 |in count     |out prob     |in count                  |out prob                  |size               |time                      |
cp                               |      9    +9|     52   +52|    222697491   +222697491|            0             |    19336          |  86295742108             |
inline                           |      6    -3|     52      |    224325864     +1628373|            0             |    26811    +38.7%|  80149710330        -7.1%|
fixup_cfg                        |     19   +13|     57    +5|     65581029   -158744835|            0             |    34292    +27.9%|  73900655012        -7.8%|
adjust_alignment                 |     19      |     57      |     65581029             |            0             |    34292          |  73900655012             |
ccp                              |     19      |     57      |     65581029             |            0             |    29929    -12.7%|  72799142820        -1.5%|
objsz                            |    216  +197|     46   -11|    161980247    +96399218|            0             |    25566    -14.6%|  71697630628        -1.5%|
cunrolli                         |    216      |     46      |    161980247             |            0             |    26184     +2.4%|  69645569278        -2.9%|
backprop                         |    151   -65|     46      |    137177274    -24802973|            0             |    26802     +2.4%|  67593507928        -2.9%|
phiprop                          |    151      |     46      |    137177274             |            0             |    26802          |  67593507928             |
forwprop                         |    151      |     46      |    137177274             |            0             |    26801     -0.0%|  67593507371        -0.0%|
alias                            |    151      |     46      |    137177274             |            0             |    26800     -0.0%|  67593506814        -0.0%|
retslot                          |    151      |     46      |    137177274             |            0             |    26800          |  67593506814             |
fre                              |    151      |     46      |    137177274             |            0             |    26302     -1.9%|  65529460956        -3.1%|
mergephi                         |    151      |     44    -2|    138917934     +1740660|            0             |    25804     -1.9%|  63465415098        -3.1%|
threadfull                       |    145    -6|     44      |    131849778     -7068156|            0             |    25820     +0.1%|  63461133128        -0.0%|
vrp                              |    150    +5|     44      |    132104333      +254555|            0             |    25835     +0.1%|  63456821751        -0.0%|
dse                              |    149    -1|     37    -7|    132082245       -22088|            0             |    25604     -0.9%|  63320841739        -0.2%|
dce                              |    149      |     37      |    132082245             |            0             |    25361     -0.9%|  63139421822        -0.3%|
stdarg                           |    149      |     37      |    132082245             |            0             |    25348     -0.1%|  63093952510        -0.1%|
cdce                             |    149      |     37      |    132082245             |            0             |    25348          |  63093952510             |
cselim                           |    149      |     37      |    132082245             |            0             |    25352     +0.0%|  63094375700        +0.0%|
copyprop                         |    149      |     34    -3|    132082245             |            0             |    25356     +0.0%|  63094798890        +0.0%|
ifcombine                        |    149      |     34      |    132082245             |            0             |    25373     +0.1%|  63160748697        +0.1%|
mergephi                         |    149      |     28    -6|    132082244           -1|            0             |    25390     +0.1%|  63226698504        +0.1%|
phiopt                           |    149      |     28      |    132082244             |            0             |    25372     -0.1%|  63217606291        -0.0%|
tailr                            |    149      |     28      |    132082244             |            0             |    25354     -0.1%|  63208514078        -0.0%|
ch                               |    149      |     28      |    132082244             |            0             |    25621     +1.1%|  63162780009        -0.1%|
cplxlower                        |    633  +484|     24    -4|    353799598   +221717354|            0             |    25888     +1.0%|  63117045940        -0.1%|
sra                              |    633      |     24      |    353799598             |            0             |    25875     -0.1%|  63116622750        -0.0%|
thread                           |    633      |     24      |    353799598             |            0             |    25882     +0.0%|  63033386004        -0.1%|
dom                              |    709   +76|     24      |    499035933   +145236335|            0             |    25731     -0.6%|  62384968706        -1.0%|
copyprop                         |    633   -76|     24      |    863914623   +364878690|            0             |    25550     -0.7%|  61816384882        -0.9%|
isolate-paths                    |    631    -2|     24      |    863680922      -233701|            0             |    25540     -0.0%|  61813404800        -0.0%|
reassoc                          |    631      |     24      |    863680922             |            0             |    25540          |  61813404800             |
dce                              |    631      |     24      |    863680922             |            0             |    25506     -0.1%|  61760371819        -0.1%|
forwprop                         |    631      |     24      |    863680922             |            0             |    25452     -0.2%|  61650701991        -0.2%|
phiopt                           |    631      |     24      |    863680922             |            0             |    25432     -0.1%|  61594065144        -0.1%|
ccp                              |    631      |     24      |    863680922             |            0             |    25424     -0.0%|  61594065144             |
sincos                           |    631      |     24      |    863680922             |            0             |    25416     -0.0%|  61594065144             |
bswap                            |    631      |     24      |    863680922             |            0             |    25416          |  61594065144             |
laddress                         |    631      |     24      |    863680922             |            0             |    25456     +0.2%|  61643648277        +0.1%|
lim                              |    631      |     24      |    863680922             |            0             |    25509     +0.2%|  60793374141        -1.4%|
walloca                          |    630    -1|     24      |    863531218      -149704|            0             |    25522     +0.1%|  59893516872        -1.5%|
pre                              |    630      |     24      |    863531218             |            0             |    25394     -0.5%|  59867081918        -0.0%|
sink                             |    614   -16|     24      |    861553401     -1977817|            0             |    25233     -0.6%|  59723883109        -0.2%|
dse                              |    612    -2|     24      |    861403844      -149557|            0             |    25191     -0.2%|  59598412538        -0.2%|
dce                              |    612      |     24      |    861403844             |            0             |    25182     -0.0%|  59589705822        -0.0%|
fix_loops                        |    612      |     24      |    861403844             |            0             |    25182          |  59589705822             |
loop                             |    612      |     24      |    861403844             |            0             |    25182          |  59589705822             |
loopinit                         |    612      |     24      |    861403844             |            0             |    25168     -0.1%|  59589705754        -0.0%|
unswitch                         |    612      |     24      |    861403844             |            0             |    25168          |  59361533623        -0.4%|
sccp                             |    612      |     24      |    861403844             |            0             |    25170     +0.0%|  59133758932        -0.4%|
lsplit                           |    612      |     24      |    861403844             |            0             |    25172     +0.0%|  59134156372        +0.0%|
lversion                         |    612      |     24      |    861403844             |            0             |    25522     +1.4%|  58540917176        -1.0%|
unrolljam                        |    617    +5|     24      |    861403844             |            0             |    25872     +1.4%|  57947677980        -1.0%|
cddce                            |    617      |     24      |    861403844             |            0             |    25853     -0.1%|  57882768038        -0.1%|
ivcanon                          |    617      |     24      |    861403844             |            0             |    26336     +1.9%|  58192620262        +0.5%|
ldist                            |    611    -6|     24      |    860204342     -1199502|            0             |    26822     +1.8%|  58416549908        +0.4%|
linterchange                     |    604    -7|     25    +1|    857866959     -2337383|      1449193     +1449193|    26806     -0.1%|  58265717388        -0.3%|
copyprop                         |    604      |     25      |    857866959             |      1449193             |    26806          |  58265717388             |
ch_vect                          |    604      |     25      |    857866959             |      1449193             |    26892     +0.3%|  58264532568        -0.0%|
ifcvt                            |    610    +6|     25      |    858265815      +398856|      1449193             |    29465     +9.6%|  68707093181       +17.9%|
vect                             |    763  +153|    238  +213|   1205331319   +347065504|    175363929   +173914736|    30720     +4.3%|  74875644651        +9.0%|
dce                              |    761    -2|     25  -213|   1465465605   +260134286|      2581981   -172781948|    29275     -4.7%|  68459671713        -8.6%|
pcom                             |    760    -1|     25      |   1464785932      -679673|      2581981             |    29062     -0.7%|  66318892738        -3.1%|
cunroll                          |    760      |     25      |   1464785932             |      2581981             |    33220    +14.3%|  57947562020       -12.6%|
slp                              |    782   +22|     26    +1|    938458273   -526327659|      2199665      -382316|    37256    +12.1%|  49430320053       -14.7%|
ivopts                           |    782      |     26      |    938458273             |      2199665             |    37624     +1.0%|  47538359282        -3.8%|
lim                              |    782      |     26      |    938458273             |      2199665             |    38115     +1.3%|  43911316219        -7.6%|
loopdone                         |    782      |     26      |    938458273             |      2199665             |    38116     +0.0%|  42030322678        -4.3%|
no_loop                          |    780    -2|     26      |    938219393      -238880|      2199665             |    38130     +0.0%|  42030322746        +0.0%|
slp                              |      0  -780|      0   -26|            0   -938219393|            0     -2199665|       14   -100.0%|           68      -100.0%|
veclower2                        |    780  +780|     26   +26|    938219393   +938219393|      2199665     +2199665|    38130 +272257.1%|  42030322746 +61809298055.9%|
switchlower                      |    780      |     26      |    938219393             |      2199665             |    38129     -0.0%|  42030406045        +0.0%|
recip                            |    782    +2|     26      |    938302687       +83294|      2199665             |    38128     -0.0%|  42030489344        +0.0%|
reassoc                          |    782      |     26      |    938302687             |      2199665             |    38153     +0.1%|  42072927600        +0.1%|
slsr                             |    782      |     26      |    938302687             |      2199665             |    37565     -1.5%|  42062311851        -0.0%|
split-paths                      |    782      |     26      |    938302687             |      2199665             |    36952     -1.6%|  42009257846        -0.1%|
tracer                           |    782      |     26      |    938302687             |      2199665             |    37091     +0.4%|  42017839334        +0.0%|
fre                              |    782      |     26      |    884236883    -54065804|      2199665             |    35938     -3.1%|  41523235663        -1.2%|
thread                           |    792   +10|     26      |    966537048    +82300165|      2199665             |    34784     -3.2%|  40970278410        -1.3%|
dom                              |    800    +8|     26      |    966740391      +203343|      2199665             |    34878     +0.3%|  40883668999        -0.2%|
strlen                           |    806    +6|     26      |    971963312     +5222921|      2199665             |    34834     -0.1%|  40846831682        -0.1%|
threadfull                       |    806      |     26      |    971963312             |      2199665             |    35023     +0.5%|  40783019349        -0.2%|
vrp                              |    832   +26|     26      |    983081423    +11118111|      2199665             |    35205     +0.5%|  40777250162        -0.0%|
ccp                              |    835    +3|     26      |    995679581    +12598158|      2199665             |    35198     -0.0%|  40835293308        +0.1%|
wrestrict                        |    834    -1|     26      |    995436090      -243491|      2199665             |    35198          |  40835293308             |
dse                              |    834      |     26      |    995436090             |      2199665             |    34548     -1.8%|  40287490581        -1.3%|
cddce                            |    825    -9|     26      |    993555689     -1880401|      2199665             |    33848     -2.0%|  39686775972        -1.5%|
forwprop                         |    828    +3|     26      |    994360564      +804875|      2199665             |    33804     -0.1%|  39689166731        +0.0%|
phiopt                           |    829    +1|     26      |    997327283     +2966719|      2199665             |    33783     -0.1%|  39729029151        +0.1%|
fab                              |    829      |     26      |    997327283             |      2199665             |    33756     -0.1%|  39713588930        -0.0%|
widening_mul                     |    829      |     26      |    997327283             |      2199665             |    33756          |  39713588930             |
sink                             |    829      |     26      |    997327283             |      2199665             |    33746     -0.0%|  39672082343        -0.1%|
store-merging                    |    829      |     26      |    997327283             |      2199665             |    33691     -0.2%|  39556068692        -0.3%|
tailc                            |    829      |     26      |    997327283             |      2199665             |    33646     -0.1%|  39481561628        -0.2%|
dce                              |    829      |     26      |    997327283             |      2199665             |    33646          |  39481561628             |
crited                           |    827    -2|     26      |    997245974       -81309|      2199665             |    33646          |  39481561628             |
local-pure-const                 |    827      |     26      |    997245974             |      2199665             |    33646          |  39481561628             |
modref                           |    827      |     26      |    997245974             |      2199665             |    33646          |  39481561628             |
uncprop                          |    827      |     26      |    997245974             |      2199665             |    33646          |  39481561628             |
nrv                              |    827      |     26      |    997245974             |      2199665             |    33646          |  39481561628             |
isel                             |    827      |     26      |    997245974             |      2199665             |    33514     -0.4%|  39225113838        -0.6%|
optimized                        |    827      |     26      |    997245974             |      2199665             |    33382     -0.4%|  38968666048        -0.7%|
waccess                          |    817   -10|     26      |    994609654     -2636320|      2199665             |    33382          |  38968666048             |
expand                           |    817      |     26      |    994609654             |      2199665             |-------------------|--------------------------|
vregs                            |    792   -25|     26      |    982479501    -12130153|      2199665             |   248936    +76.4%| 289884265130       +76.4%|
into_cfglayout                   |    792      |     26      |    982479501             |      2199665             |   245988     -1.2%| 287195553058        -0.9%|
jump                             |    788    -4|     22    -4|    980690179     -1789322|       750472     -1449193|   242260     -1.5%| 284023632362        -1.1%|
subreg1                          |    788      |     22      |    980690179             |       750472             |   241560     -0.3%| 283737224554        -0.1%|
dfinit                           |    788      |     22      |    980690179             |       750472             |   241560          | 283737224554             |
cse1                             |    788      |     22      |    980690179             |       750472             |   241530     -0.0%| 283542440179        -0.1%|
fwprop1                          |    788      |     22      |    980690179             |       750472             |   236348     -2.1%| 277839977964        -2.0%|
cprop                            |    788      |     22      |    980690179             |       750472             |   229356     -3.0%| 271526534608        -2.3%|
rtl pre                          |    788      |     22      |    980690179             |       750472             |   228890     -0.2%| 271458511024        -0.0%|
cprop                            |    788      |     22      |    980690179             |       750472             |   228432     -0.2%| 270488229100        -0.4%|
cse_local                        |    788      |     22      |    980690179             |       750472             |   226190     -1.0%| 268346544636        -0.8%|
ce1                              |    788      |     22      |    980690179             |       750472             |   224736     -0.6%| 266742600856        -0.6%|
reginfo                          |    784    -4|     22      |    970723385     -9966794|       750472             |   223692     -0.5%| 265572317684        -0.4%|
loop2                            |    784      |     22      |    970723385             |       750472             |   223692          | 265572317684             |
loop2_init                       |    784      |     22      |    970723385             |       750472             |   223692          | 265572317684             |
loop2_invariant                  |    784      |     22      |    970723385             |       750472             |   226952     +1.5%| 265012451525        -0.2%|
loop2_unroll                     |    784      |     22      |    970723385             |       750472             |   238388     +5.0%| 263251671395        -0.7%|
loop2_done                       |    804   +20|     22      |   1068548117    +97824732|       750472             |   246564     +3.4%| 262050757638        -0.5%|
cprop                            |    796    -8|     22      |   1060536046     -8012071|       750472             |   243056     -1.4%| 259215526324        -1.1%|
stv                              |    796      |     22      |   1060536046             |       750472             |   239560     -1.4%| 256380295176        -1.1%|
cse2                             |    796      |     22      |   1060536046             |       750472             |   239166     -0.2%| 256198425332        -0.1%|
dse1                             |    796      |     22      |   1060536046             |       750472             |   238756     -0.2%| 256016466788        -0.1%|
fwprop2                          |    796      |     22      |   1060536046             |       750472             |   238716     -0.0%| 256013475076        -0.0%|
init-regs                        |    796      |     22      |   1060536046             |       750472             |   238680     -0.0%| 256010571684        -0.0%|
ud_dce                           |    796      |     22      |   1060536046             |       750472             |   238640     -0.0%| 255949424552        -0.0%|
combine                          |    796      |     22      |   1060536046             |       750472             |   234341     -1.8%| 250210614407        -2.2%|
rpad                             |    796      |     22      |   1060536046             |       750472             |   230082     -1.8%| 244532951394        -2.3%|
stv                              |    796      |     22      |   1060536046             |       750472             |   230082          | 244532951394             |
ce2                              |    796      |     22      |   1060536046             |       750472             |   230082          | 244532951394             |
jump_after_combine               |    796      |     22      |   1060536046             |       750472             |   230082          | 244532951394             |
bbpart                           |    796      |     22      |   1060536046             |       750472             |   230150     +0.0%| 244532951394             |
outof_cfglayout                  |    796      |     22      |   1060536046             |       750472             |   233098     +1.3%| 245953693762        +0.6%|
split1                           |    796      |     22      |   1060536046             |       750472             |   236330     +1.4%| 247637967034        +0.7%|
subreg3                          |    796      |     22      |   1060536046             |       750472             |   236682     +0.1%| 247901497938        +0.1%|
mode_sw                          |    796      |     22      |   1060536046             |       750472             |   236682          | 247901497938             |
asmcons                          |    796      |     22      |   1060536046             |       750472             |   236682          | 247901497938             |
ira                              |    796      |     22      |   1060536046             |       750472             |   276850    +17.0%| 259550769994        +4.7%|
reload                           |    796      |     22      |   1060536046             |       750472             |   314040    +13.4%| 279667802131        +7.8%|
vzeroupper                       |    795    -1|     22      |   1060252399      -283647|       750472             |   311062     -0.9%| 288135562212        +3.0%|
postreload                       |    795      |     22      |   1060252399             |       750472             |   310594     -0.2%| 287983292675        -0.1%|
gcse2                            |    795      |     22      |   1060252399             |       750472             |   310134     -0.1%| 287816349702        -0.1%|
split2                           |    795      |     22      |   1060252399             |       750472             |   310606     +0.2%| 288424437654        +0.2%|
ree                              |    795      |     22      |   1060252399             |       750472             |   311154     +0.2%| 289062128214        +0.2%|
cmpelim                          |    795      |     22      |   1060252399             |       750472             |   311230     +0.0%| 289076923160        +0.0%|
ro_and_epilogue                  |    795      |     22      |   1060252399             |       750472             |   312318     +0.3%| 290230761620        +0.4%|
dse2                             |    795      |     22      |   1060252399             |       750472             |   313414     +0.4%| 291384734306        +0.4%|
csa                              |    795      |     22      |   1060252399             |       750472             |   313414          | 291384734306             |
jump2                            |    795      |     22      |   1060252399             |       750472             |   311606     -0.6%| 291487635182        +0.0%|
compgotos                        |    799    +4|     22      |   1078875358    +18622959|       750472             |   309798     -0.6%| 291590536058        +0.0%|
peephole2                        |    799      |     22      |   1078875358             |       750472             |   310221     +0.1%| 291470973464        -0.0%|
ce3                              |    799      |     22      |   1078875358             |       750472             |   310644     +0.1%| 291343685638        -0.0%|
cprop_hardreg                    |    799      |     22      |   1078875358             |       750472             |   310344     -0.1%| 291274241846        -0.0%|
rtl_dce                          |    799      |     22      |   1078875358             |       750472             |   310036     -0.1%| 291212389842        -0.0%|
bbro                             |    799      |     22      |   1078875358             |       750472             |   310712     +0.2%| 290584977970        -0.2%|
split3                           |    792    -7|     22      |   1077517775     -1357583|       750472             |   311396     +0.2%| 289957699542        -0.2%|
sched2                           |    792      |     22      |   1077517775             |       750472             |   311396          | 289957699542             |
stack                            |    792      |     22      |   1077517775             |       750472             |   311396          | 289957699542             |
zero_call_used_regs              |    792      |     22      |   1077517775             |       750472             |   311396          | 289957699542             |
alignments                       |    792      |     22      |   1077517775             |       750472             |   311396          | 289957699542             |

Maritnj: I think we want to track, for start

 1) fixup_cfg                        |     19   +13|     57    +5|     65581029   -158744835|            0             |    34292    +27.9%|  73900655012        -7.8%|
 2) loop                             |    612      |     24      |    861403844             |            0             |    25182          |  59589705822             |
 3) waccess                          |    817   -10|     26      |    994609654     -2636320|      2199665             |    33382          |  38968666048             |
 4) into_cfglayout                   |    792      |     26      |    982479501             |      2199665             |   245988     -1.2%| 287195553058        -0.9%|
 5) alignments                       |    792      |     22      |   1077517775             |       750472             |   311396          | 289957699542             |

1) is situation after IPA passes, 2) just before loop optimizations, 3) is end of gimple optimizatoin queue, 4) is just after expansion and 5) is end of RTl.
For each of it we can track

	For this we can record:
	  - in count mismatches: 19
	  - out probability mismathces: 57
	  - dynamic in count mismatches: 65581029
	  - dynamic out probability mismathces: 0
	  - overall size: 34292
	  - overall time: 73900655012
(values are from fixup_cfg stats).
Would that be reasonable?

There are some nonsences in the data, for example:
no_loop                          |    780    -2|     26      |    938219393      -238880|      2199665             |    38130     +0.0%|  42030322746        +0.0%|
slp                              |      0  -780|      0   -26|            0   -938219393|            0     -2199665|       14   -100.0%|           68      -100.0%|
veclower2                        |    780  +780|     26   +26|    938219393   +938219393|      2199665     +2199665|    38130 +272257.1%|  42030322746 +61809298055.9%|

Whic is related to the fact we re-run slp only sometimes.  I will try to improve
this incrementally.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

Honza

gcc/ChangeLog:

2021-11-27  Jan Hubicka  <hubicka@ucw.cz>

	* cfghooks.c: Include sreal.h, profile.h.
	(profile_record_check_consistency): Fix checking of count counsistency;
	record also dynamic mismatches.
	* cfgrtl.c (rtl_account_profile_record): Similarly.
	* tree-cfg.c (gimple_account_profile_record): Likewise.
	* cfghooks.h (struct profile_record): Remove num_mismatched_freq_in,
	num_mismatched_freq_out, turn time to double, add
	dyn_mismatched_prob_out, dyn_mismatched_count_in,
	num_mismatched_prob_out; remove num_mismatched_count_out.
	* passes.c (pass_manager::dump_profile_report):

diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
index fa2dae21a03..6a72ca37e3b 100644
--- a/gcc/cfghooks.c
+++ b/gcc/cfghooks.c
@@ -31,6 +31,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfganal.h"
 #include "tree-ssa.h"
 #include "cfgloop.h"
+#include "sreal.h"
+#include "profile.h"
 
 /* Disable warnings about missing quoting in GCC diagnostics.  */
 #if __GNUC__ >= 10
@@ -1467,35 +1469,67 @@ profile_record_check_consistency (profile_record *record)
   FOR_ALL_BB_FN (bb, cfun)
    {
       if (bb != EXIT_BLOCK_PTR_FOR_FN (cfun)
-	  && profile_status_for_fn (cfun) != PROFILE_ABSENT)
+	  && profile_status_for_fn (cfun) != PROFILE_ABSENT
+	  && EDGE_COUNT (bb->succs))
 	{
-	  profile_probability sum = profile_probability::never ();
-	  FOR_EACH_EDGE (e, ei, bb->succs)
-	    sum += e->probability;
-	  if (EDGE_COUNT (bb->succs)
-	      && sum.differs_from_p (profile_probability::always ()))
-	    record->num_mismatched_freq_out++;
-	  profile_count lsum = profile_count::zero ();
+	  sreal sum = 0;
+	  bool found = false;
 	  FOR_EACH_EDGE (e, ei, bb->succs)
-	    lsum += e->count ();
-	  if (EDGE_COUNT (bb->succs) && (lsum.differs_from_p (bb->count)))
-	    record->num_mismatched_count_out++;
+	    {
+	      if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
+		found = true;
+	      sum += e->probability.to_sreal ();
+	    }
+	  double dsum = sum.to_double ();
+	  if (found && (dsum < 0.9 || dsum > 1.1))
+	    {
+	      record->num_mismatched_prob_out++;
+	      dsum = dsum > 1 ? dsum - 1 : 1 - dsum;
+	      if (profile_info)
+		{
+		  if (ENTRY_BLOCK_PTR_FOR_FN
+			 (cfun)->count.ipa ().initialized_p ()
+		      && ENTRY_BLOCK_PTR_FOR_FN
+			 (cfun)->count.ipa ().nonzero_p ()
+		      && bb->count.ipa ().initialized_p ())
+		    record->dyn_mismatched_prob_out
+			+= dsum * bb->count.ipa ().to_gcov_type ();
+		}
+	      else if (bb->count.initialized_p ())
+		record->dyn_mismatched_prob_out
+		    += dsum * bb->count.to_sreal_scale
+			(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
+	    }
 	}
       if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 	  && profile_status_for_fn (cfun) != PROFILE_ABSENT)
 	{
-	  profile_probability sum = profile_probability::never ();
 	  profile_count lsum = profile_count::zero ();
 	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    lsum += e->count ();
+	  if (lsum.differs_from_p (bb->count))
 	    {
-	      sum += e->probability;
-	      lsum += e->count ();
+	      record->num_mismatched_count_in++;
+	      profile_count max;
+	      if (lsum < bb->count)
+		max = bb->count;
+	      else
+		max = lsum;
+	      if (profile_info)
+		{
+		  if (ENTRY_BLOCK_PTR_FOR_FN
+			 (cfun)->count.ipa ().initialized_p ()
+		      && ENTRY_BLOCK_PTR_FOR_FN
+			 (cfun)->count.ipa ().nonzero_p ()
+		      && max.ipa ().initialized_p ())
+		    record->dyn_mismatched_count_in
+			+= max.ipa ().to_gcov_type ();
+		}
+	      else if (bb->count.initialized_p ())
+		record->dyn_mismatched_prob_out
+		    += max.to_sreal_scale
+			(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
 	    }
-	  if (EDGE_COUNT (bb->preds)
-	      && sum.differs_from_p (profile_probability::always ()))
-	    record->num_mismatched_freq_in++;
-	  if (lsum.differs_from_p (bb->count))
-	    record->num_mismatched_count_in++;
 	}
       if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)
 	  || bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
diff --git a/gcc/cfghooks.h b/gcc/cfghooks.h
index 6df651f128e..684e6319d26 100644
--- a/gcc/cfghooks.h
+++ b/gcc/cfghooks.h
@@ -36,22 +36,25 @@ along with GCC; see the file COPYING3.  If not see
    and one CFG hook per CFG mode.  */
 struct profile_record
 {
-  /* The number of basic blocks where sum(freq) of the block's predecessors
-     doesn't match reasonably well with the incoming frequency.  */
-  int num_mismatched_freq_in;
-  /* Likewise for a basic block's successors.  */
-  int num_mismatched_freq_out;
+  /* A weighted cost of the run-time of the function body.  */
+  double time;
+  /* Frequency of execution of basic blocks where sum(prob) of the block's
+     predecessors doesn't match reasonably probability 1.  */
+  double dyn_mismatched_prob_out;
+  /* Frequency of execution basic blocks where sum(count) of the block's
+     predecessors doesn't match reasonably well with the incoming frequency.  */
+  double dyn_mismatched_count_in;
+  /* The number of basic blocks where sum(prob) of the block's predecessors
+     doesn't match reasonably probability 1.  */
+  int num_mismatched_prob_out;
   /* The number of basic blocks where sum(count) of the block's predecessors
      doesn't match reasonably well with the incoming frequency.  */
   int num_mismatched_count_in;
-  /* Likewise for a basic block's successors.  */
-  int num_mismatched_count_out;
-  /* A weighted cost of the run-time of the function body.  */
-  gcov_type_unsigned time;
   /* A weighted cost of the size of the function body.  */
   int size;
   /* True iff this pass actually was run.  */
   bool run;
+  bool fdo;
 };
 
 typedef int_hash <unsigned short, 0> dependence_hash;
diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index c7ba9006b4e..3744adcc2ba 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -63,6 +63,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "print-rtl.h"
 #include "rtl-iter.h"
 #include "gimplify.h"
+#include "profile.h"
+#include "sreal.h"
 
 /* Disable warnings about missing quoting in GCC diagnostics.  */
 #if __GNUC__ >= 10
@@ -5264,12 +5266,22 @@ rtl_account_profile_record (basic_block bb, struct profile_record *record)
     if (INSN_P (insn))
       {
 	record->size += insn_cost (insn, false);
-	if (bb->count.initialized_p ())
-	  record->time
-	    += insn_cost (insn, true) * bb->count.to_gcov_type ();
-	else if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
+	if (profile_info)
+	  {
+	    if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().initialized_p ()
+		&& ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().nonzero_p ()
+		&& bb->count.ipa ().initialized_p ())
+	      record->time
+		+= insn_cost (insn, true) * bb->count.ipa ().to_gcov_type ();
+	  }
+	else if (bb->count.initialized_p ()
+		 && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
 	  record->time
-	    += insn_cost (insn, true) * bb->count.to_frequency (cfun);
+	    += insn_cost (insn, true)
+	       * bb->count.to_sreal_scale
+		      (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
+	else
+	  record->time += insn_cost (insn, true);
       }
 }
 
diff --git a/gcc/passes.c b/gcc/passes.c
index 64550b00b43..5a7da687b0b 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -1893,10 +1893,11 @@ dump_profile_report (void)
 void
 pass_manager::dump_profile_report () const
 {
-  int last_freq_in = 0, last_count_in = 0, last_freq_out = 0, last_count_out = 0;
-  gcov_type last_time = 0, last_size = 0;
+  int last_count_in = 0, last_prob_out = 0;
+  double last_dyn_count_in = 0, last_dyn_prob_out = 0;
+  double last_time = 0;
+  int last_size = 0;
   double rel_time_change, rel_size_change;
-  int last_reported = 0;
 
   if (!profile_record)
     return;
@@ -1906,16 +1907,21 @@ pass_manager::dump_profile_report () const
     dump_file = stderr;
 
   fprintf (dump_file, "Profile consistency report:\n\n");
-  fprintf (dump_file, "                                 |mismatch     |mismatch     |                     |\n");
-  fprintf (dump_file, "Pass name                        |IN    |IN    |OUT   |OUT   |overall              |\n");
-  fprintf (dump_file, "                                 |freq  |count |freq  |count |size      |time      |\n");
+  fprintf (dump_file,
+	   "Pass name                        |static mismatch            "
+	   "|dynamic mismatch                                     "
+	   "|overall                                       |\n");
+  fprintf (dump_file,
+	   "                                 |in count     |out prob     "
+	   "|in count                  |out prob                  "
+	   "|size               |time                      |\n");
 	   
   for (int i = 1; i < passes_by_id_size; i++)
     if (profile_record[i].run)
       {
 	if (last_time)
 	  rel_time_change = (profile_record[i].time
-			     - (double)last_time) * 100 / (double)last_time;
+			     - last_time) * 100 / last_time;
 	else
 	  rel_time_change = 0;
 	if (last_size)
@@ -1924,65 +1930,62 @@ pass_manager::dump_profile_report () const
 	else
 	  rel_size_change = 0;
 
-	if (profile_record[i].num_mismatched_freq_in != last_freq_in
-	    || profile_record[i].num_mismatched_freq_out != last_freq_out
-	    || profile_record[i].num_mismatched_count_in != last_count_in
-	    || profile_record[i].num_mismatched_count_out != last_count_out
-	    || rel_time_change || rel_size_change)
+	fprintf (dump_file, "%-33s| %6i", passes_by_id[i]->name,
+		 profile_record[i].num_mismatched_count_in);
+	if (profile_record[i].num_mismatched_count_in != last_count_in)
+	  fprintf (dump_file, " %+5i",
+		   profile_record[i].num_mismatched_count_in
+		   - last_count_in);
+	else
+	  fprintf (dump_file, "      ");
+	fprintf (dump_file, "| %6i",
+		 profile_record[i].num_mismatched_prob_out);
+	if (profile_record[i].num_mismatched_prob_out != last_prob_out)
+	  fprintf (dump_file, " %+5i",
+		   profile_record[i].num_mismatched_prob_out
+		   - last_prob_out);
+	else
+	  fprintf (dump_file, "      ");
+
+	fprintf (dump_file, "| %12.0f",
+		 profile_record[i].dyn_mismatched_count_in);
+	if (profile_record[i].dyn_mismatched_count_in != last_dyn_count_in)
+	  fprintf (dump_file, " %+12.0f",
+		   profile_record[i].dyn_mismatched_count_in
+		   - last_dyn_count_in);
+	else
+	  fprintf (dump_file, "             ");
+	fprintf (dump_file, "| %12.0f",
+		 profile_record[i].dyn_mismatched_prob_out);
+	if (profile_record[i].dyn_mismatched_prob_out != last_dyn_prob_out)
+	  fprintf (dump_file, " %+12.0f",
+		   profile_record[i].dyn_mismatched_prob_out
+		   - last_dyn_prob_out);
+	else
+	  fprintf (dump_file, "             ");
+
+	/* Size/time units change across gimple and RTL.  */
+	if (i == pass_expand_1->static_pass_number)
+	  fprintf (dump_file,
+		   "|-------------------|--------------------------");
+	else
 	  {
-	    last_reported = i;
-	    fprintf (dump_file, "%-33s", passes_by_id[i]->name);
-	    if (profile_record[i].num_mismatched_freq_in != last_freq_in)
-	      fprintf (dump_file, "| %+5i",
-		       profile_record[i].num_mismatched_freq_in
-		       - last_freq_in);
+	    fprintf (dump_file, "| %8i", profile_record[i].size);
+	    if (rel_size_change)
+	      fprintf (dump_file, " %+8.1f%%", rel_size_change);
 	    else
-	      fprintf (dump_file, "|      ");
-	    if (profile_record[i].num_mismatched_count_in != last_count_in)
-	      fprintf (dump_file, "| %+5i",
-		       profile_record[i].num_mismatched_count_in
-		       - last_count_in);
+	      fprintf (dump_file, "          ");
+	    fprintf (dump_file, "| %12.0f", profile_record[i].time);
+	    if (rel_time_change)
+	      fprintf (dump_file, " %+11.1f%%", rel_time_change);
 	    else
-	      fprintf (dump_file, "|      ");
-	    if (profile_record[i].num_mismatched_freq_out != last_freq_out)
-	      fprintf (dump_file, "| %+5i",
-		       profile_record[i].num_mismatched_freq_out
-		       - last_freq_out);
-	    else
-	      fprintf (dump_file, "|      ");
-	    if (profile_record[i].num_mismatched_count_out != last_count_out)
-	      fprintf (dump_file, "| %+5i",
-		       profile_record[i].num_mismatched_count_out
-		       - last_count_out);
-	    else
-	      fprintf (dump_file, "|      ");
-
-	    /* Size/time units change across gimple and RTL.  */
-	    if (i == pass_expand_1->static_pass_number)
-	      fprintf (dump_file, "|----------|----------");
-	    else
-	      {
-		if (rel_size_change)
-		  fprintf (dump_file, "| %+8.1f%%", rel_size_change);
-		else
-		  fprintf (dump_file, "|          ");
-		if (rel_time_change)
-		  fprintf (dump_file, "| %+8.1f%%", rel_time_change);
-		else
-		  fprintf (dump_file, "|          ");
-	      }
-	    fprintf (dump_file, "|\n");
-	    last_freq_in = profile_record[i].num_mismatched_freq_in;
-	    last_freq_out = profile_record[i].num_mismatched_freq_out;
-	    last_count_in = profile_record[i].num_mismatched_count_in;
-	    last_count_out = profile_record[i].num_mismatched_count_out;
-	  }
-	else if (last_reported != i)
-	  {
-	    last_reported = i;
-	    fprintf (dump_file, "%-20s ------------|      |      |      |      |          |          |\n",
-		     passes_by_id[i]->name);
+	      fprintf (dump_file, "             ");
 	  }
+	fprintf (dump_file, "|\n");
+	last_prob_out = profile_record[i].num_mismatched_prob_out;
+	last_count_in = profile_record[i].num_mismatched_count_in;
+	last_dyn_prob_out = profile_record[i].dyn_mismatched_prob_out;
+	last_dyn_count_in = profile_record[i].dyn_mismatched_count_in;
 	last_time = profile_record[i].time;
 	last_size = profile_record[i].size;
       }
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 8ed8c69b5b1..ebbd894ae03 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "opts.h"
 #include "asan.h"
 #include "profile.h"
+#include "sreal.h"
 
 /* This file contains functions for building the Control Flow Graph (CFG)
    for a function tree.  */
@@ -9084,18 +9085,32 @@ gimple_account_profile_record (basic_block bb,
 			       struct profile_record *record)
 {
   gimple_stmt_iterator i;
-  for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
+  for (i = gsi_start_nondebug_after_labels_bb (bb); !gsi_end_p (i);
+       gsi_next_nondebug (&i))
     {
       record->size
 	+= estimate_num_insns (gsi_stmt (i), &eni_size_weights);
-      if (bb->count.initialized_p ())
+      if (profile_info)
+	{
+	  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().initialized_p ()
+	      && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().nonzero_p ()
+	      && bb->count.ipa ().initialized_p ())
+	    record->time
+	      += estimate_num_insns (gsi_stmt (i),
+				     &eni_time_weights)
+				     * bb->count.ipa ().to_gcov_type ();
+	}
+      else if (bb->count.initialized_p ()
+	       && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
 	record->time
-	  += estimate_num_insns (gsi_stmt (i),
-				 &eni_time_weights) * bb->count.to_gcov_type ();
-      else if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
-	record->time
-	  += estimate_num_insns (gsi_stmt (i),
-				 &eni_time_weights) * bb->count.to_frequency (cfun);
+	  += estimate_num_insns
+		(gsi_stmt (i),
+		 &eni_time_weights)
+		 * bb->count.to_sreal_scale
+			(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
+     else
+      record->time
+	+= estimate_num_insns (gsi_stmt (i), &eni_time_weights);
     }
 }
 


More information about the Gcc-patches mailing list