Improve -fprofile-report

Martin Liška mliska@suse.cz
Mon Nov 29 09:11:37 GMT 2021


On 11/27/21 16:56, Jan Hubicka via Gcc-patches wrote:
> Hi,
> Profile-report was never properly updated after switch to new profile
> representation.  This patch fixes the way profile mismatches are
> calculated: we used to collect separately count and freq mismatches,
> while now we have only counts & probabilities.  So we verify
> 
>   - in count: that total count of incomming edges is close to acutal count of
>     the BB
>   - out prob: that total sum of outgoing edge edge probabilities is close
>     to 1 (except for BB containing noreturn calls or EH).

Hello.

Can you please CC me when you mention me in an email?

> 
> Moreover I added dumping of absolute data which is useful to plot them:
> with Martin Liska we plan to setup regular testing so we keep optimizers
> profie updates bit under control.
> 
> Finally I added both static and dynamic stats about mismatches - static
> one is simply number of inconsistencies in the cfg while dynamic is
> scaled by the profile - I think in order to keep eye on optimizers the
> first number is quite relevant. WHile when tracking why code quality
> regressed the second number matters more.
> 
> The output on exchange2 benchmark with FDO is currently:
> 
> Profile consistency report:

The version you send is different to what was install :)

Pass dump id and name            |static mismatch            |dynamic mismatch                                     |overall                                       |
                                  |in count     |out prob     |in count                  |out prob                  |size               |time                      |
  15t cfg                         |      0      |      0      |            0             |            0             |   165834          |       495010             |
  17t ompexp                      |      0      |      0      |            0             |            0             |   165834          |       495010             |
  18t walloca                     |      0      |      0      |            0             |            0             |   165834          |       495010             |
  19i visibility                  |      0      |      0      |            0             |            0             |   165834          |       495010             |
  20i build_ssa_passes            |      0      |      0      |            0             |            0             |   165834          |       495010             |
...

Can you please rename it to the same format we use for dump files, e.g. 018t.walloca1 ?
It would be easier for people finding the corresponding dump file.

> 
> Pass name                        |static mismatch            |dynamic mismatch                                     |overall                                       |
>                                   |in count     |out prob     |in count                  |out prob                  |size               |time                      |
> cp                               |      9    +9|     52   +52|    222697491   +222697491|            0             |    19336          |  86295742108             |
> inline                           |      6    -3|     52      |    224325864     +1628373|            0             |    26811    +38.7%|  80149710330        -7.1%|
> fixup_cfg                        |     19   +13|     57    +5|     65581029   -158744835|            0             |    34292    +27.9%|  73900655012        -7.8%|
> adjust_alignment                 |     19      |     57      |     65581029             |            0             |    34292          |  73900655012             |
> ccp                              |     19      |     57      |     65581029             |            0             |    29929    -12.7%|  72799142820        -1.5%|
> objsz                            |    216  +197|     46   -11|    161980247    +96399218|            0             |    25566    -14.6%|  71697630628        -1.5%|
> cunrolli                         |    216      |     46      |    161980247             |            0             |    26184     +2.4%|  69645569278        -2.9%|
> backprop                         |    151   -65|     46      |    137177274    -24802973|            0             |    26802     +2.4%|  67593507928        -2.9%|
> phiprop                          |    151      |     46      |    137177274             |            0             |    26802          |  67593507928             |
> forwprop                         |    151      |     46      |    137177274             |            0             |    26801     -0.0%|  67593507371        -0.0%|
> alias                            |    151      |     46      |    137177274             |            0             |    26800     -0.0%|  67593506814        -0.0%|
> retslot                          |    151      |     46      |    137177274             |            0             |    26800          |  67593506814             |
> fre                              |    151      |     46      |    137177274             |            0             |    26302     -1.9%|  65529460956        -3.1%|
> mergephi                         |    151      |     44    -2|    138917934     +1740660|            0             |    25804     -1.9%|  63465415098        -3.1%|
> threadfull                       |    145    -6|     44      |    131849778     -7068156|            0             |    25820     +0.1%|  63461133128        -0.0%|
> vrp                              |    150    +5|     44      |    132104333      +254555|            0             |    25835     +0.1%|  63456821751        -0.0%|
> dse                              |    149    -1|     37    -7|    132082245       -22088|            0             |    25604     -0.9%|  63320841739        -0.2%|
> dce                              |    149      |     37      |    132082245             |            0             |    25361     -0.9%|  63139421822        -0.3%|
> stdarg                           |    149      |     37      |    132082245             |            0             |    25348     -0.1%|  63093952510        -0.1%|
> cdce                             |    149      |     37      |    132082245             |            0             |    25348          |  63093952510             |
> cselim                           |    149      |     37      |    132082245             |            0             |    25352     +0.0%|  63094375700        +0.0%|
> copyprop                         |    149      |     34    -3|    132082245             |            0             |    25356     +0.0%|  63094798890        +0.0%|
> ifcombine                        |    149      |     34      |    132082245             |            0             |    25373     +0.1%|  63160748697        +0.1%|
> mergephi                         |    149      |     28    -6|    132082244           -1|            0             |    25390     +0.1%|  63226698504        +0.1%|
> phiopt                           |    149      |     28      |    132082244             |            0             |    25372     -0.1%|  63217606291        -0.0%|
> tailr                            |    149      |     28      |    132082244             |            0             |    25354     -0.1%|  63208514078        -0.0%|
> ch                               |    149      |     28      |    132082244             |            0             |    25621     +1.1%|  63162780009        -0.1%|
> cplxlower                        |    633  +484|     24    -4|    353799598   +221717354|            0             |    25888     +1.0%|  63117045940        -0.1%|
> sra                              |    633      |     24      |    353799598             |            0             |    25875     -0.1%|  63116622750        -0.0%|
> thread                           |    633      |     24      |    353799598             |            0             |    25882     +0.0%|  63033386004        -0.1%|
> dom                              |    709   +76|     24      |    499035933   +145236335|            0             |    25731     -0.6%|  62384968706        -1.0%|
> copyprop                         |    633   -76|     24      |    863914623   +364878690|            0             |    25550     -0.7%|  61816384882        -0.9%|
> isolate-paths                    |    631    -2|     24      |    863680922      -233701|            0             |    25540     -0.0%|  61813404800        -0.0%|
> reassoc                          |    631      |     24      |    863680922             |            0             |    25540          |  61813404800             |
> dce                              |    631      |     24      |    863680922             |            0             |    25506     -0.1%|  61760371819        -0.1%|
> forwprop                         |    631      |     24      |    863680922             |            0             |    25452     -0.2%|  61650701991        -0.2%|
> phiopt                           |    631      |     24      |    863680922             |            0             |    25432     -0.1%|  61594065144        -0.1%|
> ccp                              |    631      |     24      |    863680922             |            0             |    25424     -0.0%|  61594065144             |
> sincos                           |    631      |     24      |    863680922             |            0             |    25416     -0.0%|  61594065144             |
> bswap                            |    631      |     24      |    863680922             |            0             |    25416          |  61594065144             |
> laddress                         |    631      |     24      |    863680922             |            0             |    25456     +0.2%|  61643648277        +0.1%|
> lim                              |    631      |     24      |    863680922             |            0             |    25509     +0.2%|  60793374141        -1.4%|
> walloca                          |    630    -1|     24      |    863531218      -149704|            0             |    25522     +0.1%|  59893516872        -1.5%|
> pre                              |    630      |     24      |    863531218             |            0             |    25394     -0.5%|  59867081918        -0.0%|
> sink                             |    614   -16|     24      |    861553401     -1977817|            0             |    25233     -0.6%|  59723883109        -0.2%|
> dse                              |    612    -2|     24      |    861403844      -149557|            0             |    25191     -0.2%|  59598412538        -0.2%|
> dce                              |    612      |     24      |    861403844             |            0             |    25182     -0.0%|  59589705822        -0.0%|
> fix_loops                        |    612      |     24      |    861403844             |            0             |    25182          |  59589705822             |
> loop                             |    612      |     24      |    861403844             |            0             |    25182          |  59589705822             |
> loopinit                         |    612      |     24      |    861403844             |            0             |    25168     -0.1%|  59589705754        -0.0%|
> unswitch                         |    612      |     24      |    861403844             |            0             |    25168          |  59361533623        -0.4%|
> sccp                             |    612      |     24      |    861403844             |            0             |    25170     +0.0%|  59133758932        -0.4%|
> lsplit                           |    612      |     24      |    861403844             |            0             |    25172     +0.0%|  59134156372        +0.0%|
> lversion                         |    612      |     24      |    861403844             |            0             |    25522     +1.4%|  58540917176        -1.0%|
> unrolljam                        |    617    +5|     24      |    861403844             |            0             |    25872     +1.4%|  57947677980        -1.0%|
> cddce                            |    617      |     24      |    861403844             |            0             |    25853     -0.1%|  57882768038        -0.1%|
> ivcanon                          |    617      |     24      |    861403844             |            0             |    26336     +1.9%|  58192620262        +0.5%|
> ldist                            |    611    -6|     24      |    860204342     -1199502|            0             |    26822     +1.8%|  58416549908        +0.4%|
> linterchange                     |    604    -7|     25    +1|    857866959     -2337383|      1449193     +1449193|    26806     -0.1%|  58265717388        -0.3%|
> copyprop                         |    604      |     25      |    857866959             |      1449193             |    26806          |  58265717388             |
> ch_vect                          |    604      |     25      |    857866959             |      1449193             |    26892     +0.3%|  58264532568        -0.0%|
> ifcvt                            |    610    +6|     25      |    858265815      +398856|      1449193             |    29465     +9.6%|  68707093181       +17.9%|
> vect                             |    763  +153|    238  +213|   1205331319   +347065504|    175363929   +173914736|    30720     +4.3%|  74875644651        +9.0%|
> dce                              |    761    -2|     25  -213|   1465465605   +260134286|      2581981   -172781948|    29275     -4.7%|  68459671713        -8.6%|
> pcom                             |    760    -1|     25      |   1464785932      -679673|      2581981             |    29062     -0.7%|  66318892738        -3.1%|
> cunroll                          |    760      |     25      |   1464785932             |      2581981             |    33220    +14.3%|  57947562020       -12.6%|
> slp                              |    782   +22|     26    +1|    938458273   -526327659|      2199665      -382316|    37256    +12.1%|  49430320053       -14.7%|
> ivopts                           |    782      |     26      |    938458273             |      2199665             |    37624     +1.0%|  47538359282        -3.8%|
> lim                              |    782      |     26      |    938458273             |      2199665             |    38115     +1.3%|  43911316219        -7.6%|
> loopdone                         |    782      |     26      |    938458273             |      2199665             |    38116     +0.0%|  42030322678        -4.3%|
> no_loop                          |    780    -2|     26      |    938219393      -238880|      2199665             |    38130     +0.0%|  42030322746        +0.0%|
> slp                              |      0  -780|      0   -26|            0   -938219393|            0     -2199665|       14   -100.0%|           68      -100.0%|
> veclower2                        |    780  +780|     26   +26|    938219393   +938219393|      2199665     +2199665|    38130 +272257.1%|  42030322746 +61809298055.9%|
> switchlower                      |    780      |     26      |    938219393             |      2199665             |    38129     -0.0%|  42030406045        +0.0%|
> recip                            |    782    +2|     26      |    938302687       +83294|      2199665             |    38128     -0.0%|  42030489344        +0.0%|
> reassoc                          |    782      |     26      |    938302687             |      2199665             |    38153     +0.1%|  42072927600        +0.1%|
> slsr                             |    782      |     26      |    938302687             |      2199665             |    37565     -1.5%|  42062311851        -0.0%|
> split-paths                      |    782      |     26      |    938302687             |      2199665             |    36952     -1.6%|  42009257846        -0.1%|
> tracer                           |    782      |     26      |    938302687             |      2199665             |    37091     +0.4%|  42017839334        +0.0%|
> fre                              |    782      |     26      |    884236883    -54065804|      2199665             |    35938     -3.1%|  41523235663        -1.2%|
> thread                           |    792   +10|     26      |    966537048    +82300165|      2199665             |    34784     -3.2%|  40970278410        -1.3%|
> dom                              |    800    +8|     26      |    966740391      +203343|      2199665             |    34878     +0.3%|  40883668999        -0.2%|
> strlen                           |    806    +6|     26      |    971963312     +5222921|      2199665             |    34834     -0.1%|  40846831682        -0.1%|
> threadfull                       |    806      |     26      |    971963312             |      2199665             |    35023     +0.5%|  40783019349        -0.2%|
> vrp                              |    832   +26|     26      |    983081423    +11118111|      2199665             |    35205     +0.5%|  40777250162        -0.0%|
> ccp                              |    835    +3|     26      |    995679581    +12598158|      2199665             |    35198     -0.0%|  40835293308        +0.1%|
> wrestrict                        |    834    -1|     26      |    995436090      -243491|      2199665             |    35198          |  40835293308             |
> dse                              |    834      |     26      |    995436090             |      2199665             |    34548     -1.8%|  40287490581        -1.3%|
> cddce                            |    825    -9|     26      |    993555689     -1880401|      2199665             |    33848     -2.0%|  39686775972        -1.5%|
> forwprop                         |    828    +3|     26      |    994360564      +804875|      2199665             |    33804     -0.1%|  39689166731        +0.0%|
> phiopt                           |    829    +1|     26      |    997327283     +2966719|      2199665             |    33783     -0.1%|  39729029151        +0.1%|
> fab                              |    829      |     26      |    997327283             |      2199665             |    33756     -0.1%|  39713588930        -0.0%|
> widening_mul                     |    829      |     26      |    997327283             |      2199665             |    33756          |  39713588930             |
> sink                             |    829      |     26      |    997327283             |      2199665             |    33746     -0.0%|  39672082343        -0.1%|
> store-merging                    |    829      |     26      |    997327283             |      2199665             |    33691     -0.2%|  39556068692        -0.3%|
> tailc                            |    829      |     26      |    997327283             |      2199665             |    33646     -0.1%|  39481561628        -0.2%|
> dce                              |    829      |     26      |    997327283             |      2199665             |    33646          |  39481561628             |
> crited                           |    827    -2|     26      |    997245974       -81309|      2199665             |    33646          |  39481561628             |
> local-pure-const                 |    827      |     26      |    997245974             |      2199665             |    33646          |  39481561628             |
> modref                           |    827      |     26      |    997245974             |      2199665             |    33646          |  39481561628             |
> uncprop                          |    827      |     26      |    997245974             |      2199665             |    33646          |  39481561628             |
> nrv                              |    827      |     26      |    997245974             |      2199665             |    33646          |  39481561628             |
> isel                             |    827      |     26      |    997245974             |      2199665             |    33514     -0.4%|  39225113838        -0.6%|
> optimized                        |    827      |     26      |    997245974             |      2199665             |    33382     -0.4%|  38968666048        -0.7%|
> waccess                          |    817   -10|     26      |    994609654     -2636320|      2199665             |    33382          |  38968666048             |
> expand                           |    817      |     26      |    994609654             |      2199665             |-------------------|--------------------------|
> vregs                            |    792   -25|     26      |    982479501    -12130153|      2199665             |   248936    +76.4%| 289884265130       +76.4%|
> into_cfglayout                   |    792      |     26      |    982479501             |      2199665             |   245988     -1.2%| 287195553058        -0.9%|
> jump                             |    788    -4|     22    -4|    980690179     -1789322|       750472     -1449193|   242260     -1.5%| 284023632362        -1.1%|
> subreg1                          |    788      |     22      |    980690179             |       750472             |   241560     -0.3%| 283737224554        -0.1%|
> dfinit                           |    788      |     22      |    980690179             |       750472             |   241560          | 283737224554             |
> cse1                             |    788      |     22      |    980690179             |       750472             |   241530     -0.0%| 283542440179        -0.1%|
> fwprop1                          |    788      |     22      |    980690179             |       750472             |   236348     -2.1%| 277839977964        -2.0%|
> cprop                            |    788      |     22      |    980690179             |       750472             |   229356     -3.0%| 271526534608        -2.3%|
> rtl pre                          |    788      |     22      |    980690179             |       750472             |   228890     -0.2%| 271458511024        -0.0%|
> cprop                            |    788      |     22      |    980690179             |       750472             |   228432     -0.2%| 270488229100        -0.4%|
> cse_local                        |    788      |     22      |    980690179             |       750472             |   226190     -1.0%| 268346544636        -0.8%|
> ce1                              |    788      |     22      |    980690179             |       750472             |   224736     -0.6%| 266742600856        -0.6%|
> reginfo                          |    784    -4|     22      |    970723385     -9966794|       750472             |   223692     -0.5%| 265572317684        -0.4%|
> loop2                            |    784      |     22      |    970723385             |       750472             |   223692          | 265572317684             |
> loop2_init                       |    784      |     22      |    970723385             |       750472             |   223692          | 265572317684             |
> loop2_invariant                  |    784      |     22      |    970723385             |       750472             |   226952     +1.5%| 265012451525        -0.2%|
> loop2_unroll                     |    784      |     22      |    970723385             |       750472             |   238388     +5.0%| 263251671395        -0.7%|
> loop2_done                       |    804   +20|     22      |   1068548117    +97824732|       750472             |   246564     +3.4%| 262050757638        -0.5%|
> cprop                            |    796    -8|     22      |   1060536046     -8012071|       750472             |   243056     -1.4%| 259215526324        -1.1%|
> stv                              |    796      |     22      |   1060536046             |       750472             |   239560     -1.4%| 256380295176        -1.1%|
> cse2                             |    796      |     22      |   1060536046             |       750472             |   239166     -0.2%| 256198425332        -0.1%|
> dse1                             |    796      |     22      |   1060536046             |       750472             |   238756     -0.2%| 256016466788        -0.1%|
> fwprop2                          |    796      |     22      |   1060536046             |       750472             |   238716     -0.0%| 256013475076        -0.0%|
> init-regs                        |    796      |     22      |   1060536046             |       750472             |   238680     -0.0%| 256010571684        -0.0%|
> ud_dce                           |    796      |     22      |   1060536046             |       750472             |   238640     -0.0%| 255949424552        -0.0%|
> combine                          |    796      |     22      |   1060536046             |       750472             |   234341     -1.8%| 250210614407        -2.2%|
> rpad                             |    796      |     22      |   1060536046             |       750472             |   230082     -1.8%| 244532951394        -2.3%|
> stv                              |    796      |     22      |   1060536046             |       750472             |   230082          | 244532951394             |
> ce2                              |    796      |     22      |   1060536046             |       750472             |   230082          | 244532951394             |
> jump_after_combine               |    796      |     22      |   1060536046             |       750472             |   230082          | 244532951394             |
> bbpart                           |    796      |     22      |   1060536046             |       750472             |   230150     +0.0%| 244532951394             |
> outof_cfglayout                  |    796      |     22      |   1060536046             |       750472             |   233098     +1.3%| 245953693762        +0.6%|
> split1                           |    796      |     22      |   1060536046             |       750472             |   236330     +1.4%| 247637967034        +0.7%|
> subreg3                          |    796      |     22      |   1060536046             |       750472             |   236682     +0.1%| 247901497938        +0.1%|
> mode_sw                          |    796      |     22      |   1060536046             |       750472             |   236682          | 247901497938             |
> asmcons                          |    796      |     22      |   1060536046             |       750472             |   236682          | 247901497938             |
> ira                              |    796      |     22      |   1060536046             |       750472             |   276850    +17.0%| 259550769994        +4.7%|
> reload                           |    796      |     22      |   1060536046             |       750472             |   314040    +13.4%| 279667802131        +7.8%|
> vzeroupper                       |    795    -1|     22      |   1060252399      -283647|       750472             |   311062     -0.9%| 288135562212        +3.0%|
> postreload                       |    795      |     22      |   1060252399             |       750472             |   310594     -0.2%| 287983292675        -0.1%|
> gcse2                            |    795      |     22      |   1060252399             |       750472             |   310134     -0.1%| 287816349702        -0.1%|
> split2                           |    795      |     22      |   1060252399             |       750472             |   310606     +0.2%| 288424437654        +0.2%|
> ree                              |    795      |     22      |   1060252399             |       750472             |   311154     +0.2%| 289062128214        +0.2%|
> cmpelim                          |    795      |     22      |   1060252399             |       750472             |   311230     +0.0%| 289076923160        +0.0%|
> ro_and_epilogue                  |    795      |     22      |   1060252399             |       750472             |   312318     +0.3%| 290230761620        +0.4%|
> dse2                             |    795      |     22      |   1060252399             |       750472             |   313414     +0.4%| 291384734306        +0.4%|
> csa                              |    795      |     22      |   1060252399             |       750472             |   313414          | 291384734306             |
> jump2                            |    795      |     22      |   1060252399             |       750472             |   311606     -0.6%| 291487635182        +0.0%|
> compgotos                        |    799    +4|     22      |   1078875358    +18622959|       750472             |   309798     -0.6%| 291590536058        +0.0%|
> peephole2                        |    799      |     22      |   1078875358             |       750472             |   310221     +0.1%| 291470973464        -0.0%|
> ce3                              |    799      |     22      |   1078875358             |       750472             |   310644     +0.1%| 291343685638        -0.0%|
> cprop_hardreg                    |    799      |     22      |   1078875358             |       750472             |   310344     -0.1%| 291274241846        -0.0%|
> rtl_dce                          |    799      |     22      |   1078875358             |       750472             |   310036     -0.1%| 291212389842        -0.0%|
> bbro                             |    799      |     22      |   1078875358             |       750472             |   310712     +0.2%| 290584977970        -0.2%|
> split3                           |    792    -7|     22      |   1077517775     -1357583|       750472             |   311396     +0.2%| 289957699542        -0.2%|
> sched2                           |    792      |     22      |   1077517775             |       750472             |   311396          | 289957699542             |
> stack                            |    792      |     22      |   1077517775             |       750472             |   311396          | 289957699542             |
> zero_call_used_regs              |    792      |     22      |   1077517775             |       750472             |   311396          | 289957699542             |
> alignments                       |    792      |     22      |   1077517775             |       750472             |   311396          | 289957699542             |
> 
> Maritnj: I think we want to track, for start

You likely mean me, right?

> 
>   1) fixup_cfg                        |     19   +13|     57    +5|     65581029   -158744835|            0             |    34292    +27.9%|  73900655012        -7.8%|
>   2) loop                             |    612      |     24      |    861403844             |            0             |    25182          |  59589705822             |
>   3) waccess                          |    817   -10|     26      |    994609654     -2636320|      2199665             |    33382          |  38968666048             |
>   4) into_cfglayout                   |    792      |     26      |    982479501             |      2199665             |   245988     -1.2%| 287195553058        -0.9%|
>   5) alignments                       |    792      |     22      |   1077517775             |       750472             |   311396          | 289957699542             |
> 
> 1) is situation after IPA passes, 2) just before loop optimizations, 3) is end of gimple optimizatoin queue, 4) is just after expansion and 5) is end of RTl.
> For each of it we can track
> 
> 	For this we can record:
> 	  - in count mismatches: 19
> 	  - out probability mismathces: 57
> 	  - dynamic in count mismatches: 65581029
> 	  - dynamic out probability mismathces: 0
> 	  - overall size: 34292
> 	  - overall time: 73900655012
> (values are from fixup_cfg stats).
> Would that be reasonable?

Yes, I'm going to add that.

Martin

> 
> There are some nonsences in the data, for example:
> no_loop                          |    780    -2|     26      |    938219393      -238880|      2199665             |    38130     +0.0%|  42030322746        +0.0%|
> slp                              |      0  -780|      0   -26|            0   -938219393|            0     -2199665|       14   -100.0%|           68      -100.0%|
> veclower2                        |    780  +780|     26   +26|    938219393   +938219393|      2199665     +2199665|    38130 +272257.1%|  42030322746 +61809298055.9%|
> 
> Whic is related to the fact we re-run slp only sometimes.  I will try to improve
> this incrementally.
> 
> Bootstrapped/regtested x86_64-linux, will commit it shortly.
> 
> Honza
> 
> gcc/ChangeLog:
> 
> 2021-11-27  Jan Hubicka  <hubicka@ucw.cz>
> 
> 	* cfghooks.c: Include sreal.h, profile.h.
> 	(profile_record_check_consistency): Fix checking of count counsistency;
> 	record also dynamic mismatches.
> 	* cfgrtl.c (rtl_account_profile_record): Similarly.
> 	* tree-cfg.c (gimple_account_profile_record): Likewise.
> 	* cfghooks.h (struct profile_record): Remove num_mismatched_freq_in,
> 	num_mismatched_freq_out, turn time to double, add
> 	dyn_mismatched_prob_out, dyn_mismatched_count_in,
> 	num_mismatched_prob_out; remove num_mismatched_count_out.
> 	* passes.c (pass_manager::dump_profile_report):
> 
> diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
> index fa2dae21a03..6a72ca37e3b 100644
> --- a/gcc/cfghooks.c
> +++ b/gcc/cfghooks.c
> @@ -31,6 +31,8 @@ along with GCC; see the file COPYING3.  If not see
>   #include "cfganal.h"
>   #include "tree-ssa.h"
>   #include "cfgloop.h"
> +#include "sreal.h"
> +#include "profile.h"
>   
>   /* Disable warnings about missing quoting in GCC diagnostics.  */
>   #if __GNUC__ >= 10
> @@ -1467,35 +1469,67 @@ profile_record_check_consistency (profile_record *record)
>     FOR_ALL_BB_FN (bb, cfun)
>      {
>         if (bb != EXIT_BLOCK_PTR_FOR_FN (cfun)
> -	  && profile_status_for_fn (cfun) != PROFILE_ABSENT)
> +	  && profile_status_for_fn (cfun) != PROFILE_ABSENT
> +	  && EDGE_COUNT (bb->succs))
>   	{
> -	  profile_probability sum = profile_probability::never ();
> -	  FOR_EACH_EDGE (e, ei, bb->succs)
> -	    sum += e->probability;
> -	  if (EDGE_COUNT (bb->succs)
> -	      && sum.differs_from_p (profile_probability::always ()))
> -	    record->num_mismatched_freq_out++;
> -	  profile_count lsum = profile_count::zero ();
> +	  sreal sum = 0;
> +	  bool found = false;
>   	  FOR_EACH_EDGE (e, ei, bb->succs)
> -	    lsum += e->count ();
> -	  if (EDGE_COUNT (bb->succs) && (lsum.differs_from_p (bb->count)))
> -	    record->num_mismatched_count_out++;
> +	    {
> +	      if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
> +		found = true;
> +	      sum += e->probability.to_sreal ();
> +	    }
> +	  double dsum = sum.to_double ();
> +	  if (found && (dsum < 0.9 || dsum > 1.1))
> +	    {
> +	      record->num_mismatched_prob_out++;
> +	      dsum = dsum > 1 ? dsum - 1 : 1 - dsum;
> +	      if (profile_info)
> +		{
> +		  if (ENTRY_BLOCK_PTR_FOR_FN
> +			 (cfun)->count.ipa ().initialized_p ()
> +		      && ENTRY_BLOCK_PTR_FOR_FN
> +			 (cfun)->count.ipa ().nonzero_p ()
> +		      && bb->count.ipa ().initialized_p ())
> +		    record->dyn_mismatched_prob_out
> +			+= dsum * bb->count.ipa ().to_gcov_type ();
> +		}
> +	      else if (bb->count.initialized_p ())
> +		record->dyn_mismatched_prob_out
> +		    += dsum * bb->count.to_sreal_scale
> +			(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
> +	    }
>   	}
>         if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)
>   	  && profile_status_for_fn (cfun) != PROFILE_ABSENT)
>   	{
> -	  profile_probability sum = profile_probability::never ();
>   	  profile_count lsum = profile_count::zero ();
>   	  FOR_EACH_EDGE (e, ei, bb->preds)
> +	    lsum += e->count ();
> +	  if (lsum.differs_from_p (bb->count))
>   	    {
> -	      sum += e->probability;
> -	      lsum += e->count ();
> +	      record->num_mismatched_count_in++;
> +	      profile_count max;
> +	      if (lsum < bb->count)
> +		max = bb->count;
> +	      else
> +		max = lsum;
> +	      if (profile_info)
> +		{
> +		  if (ENTRY_BLOCK_PTR_FOR_FN
> +			 (cfun)->count.ipa ().initialized_p ()
> +		      && ENTRY_BLOCK_PTR_FOR_FN
> +			 (cfun)->count.ipa ().nonzero_p ()
> +		      && max.ipa ().initialized_p ())
> +		    record->dyn_mismatched_count_in
> +			+= max.ipa ().to_gcov_type ();
> +		}
> +	      else if (bb->count.initialized_p ())
> +		record->dyn_mismatched_prob_out
> +		    += max.to_sreal_scale
> +			(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
>   	    }
> -	  if (EDGE_COUNT (bb->preds)
> -	      && sum.differs_from_p (profile_probability::always ()))
> -	    record->num_mismatched_freq_in++;
> -	  if (lsum.differs_from_p (bb->count))
> -	    record->num_mismatched_count_in++;
>   	}
>         if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)
>   	  || bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
> diff --git a/gcc/cfghooks.h b/gcc/cfghooks.h
> index 6df651f128e..684e6319d26 100644
> --- a/gcc/cfghooks.h
> +++ b/gcc/cfghooks.h
> @@ -36,22 +36,25 @@ along with GCC; see the file COPYING3.  If not see
>      and one CFG hook per CFG mode.  */
>   struct profile_record
>   {
> -  /* The number of basic blocks where sum(freq) of the block's predecessors
> -     doesn't match reasonably well with the incoming frequency.  */
> -  int num_mismatched_freq_in;
> -  /* Likewise for a basic block's successors.  */
> -  int num_mismatched_freq_out;
> +  /* A weighted cost of the run-time of the function body.  */
> +  double time;
> +  /* Frequency of execution of basic blocks where sum(prob) of the block's
> +     predecessors doesn't match reasonably probability 1.  */
> +  double dyn_mismatched_prob_out;
> +  /* Frequency of execution basic blocks where sum(count) of the block's
> +     predecessors doesn't match reasonably well with the incoming frequency.  */
> +  double dyn_mismatched_count_in;
> +  /* The number of basic blocks where sum(prob) of the block's predecessors
> +     doesn't match reasonably probability 1.  */
> +  int num_mismatched_prob_out;
>     /* The number of basic blocks where sum(count) of the block's predecessors
>        doesn't match reasonably well with the incoming frequency.  */
>     int num_mismatched_count_in;
> -  /* Likewise for a basic block's successors.  */
> -  int num_mismatched_count_out;
> -  /* A weighted cost of the run-time of the function body.  */
> -  gcov_type_unsigned time;
>     /* A weighted cost of the size of the function body.  */
>     int size;
>     /* True iff this pass actually was run.  */
>     bool run;
> +  bool fdo;
>   };
>   
>   typedef int_hash <unsigned short, 0> dependence_hash;
> diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
> index c7ba9006b4e..3744adcc2ba 100644
> --- a/gcc/cfgrtl.c
> +++ b/gcc/cfgrtl.c
> @@ -63,6 +63,8 @@ along with GCC; see the file COPYING3.  If not see
>   #include "print-rtl.h"
>   #include "rtl-iter.h"
>   #include "gimplify.h"
> +#include "profile.h"
> +#include "sreal.h"
>   
>   /* Disable warnings about missing quoting in GCC diagnostics.  */
>   #if __GNUC__ >= 10
> @@ -5264,12 +5266,22 @@ rtl_account_profile_record (basic_block bb, struct profile_record *record)
>       if (INSN_P (insn))
>         {
>   	record->size += insn_cost (insn, false);
> -	if (bb->count.initialized_p ())
> -	  record->time
> -	    += insn_cost (insn, true) * bb->count.to_gcov_type ();
> -	else if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
> +	if (profile_info)
> +	  {
> +	    if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().initialized_p ()
> +		&& ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().nonzero_p ()
> +		&& bb->count.ipa ().initialized_p ())
> +	      record->time
> +		+= insn_cost (insn, true) * bb->count.ipa ().to_gcov_type ();
> +	  }
> +	else if (bb->count.initialized_p ()
> +		 && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
>   	  record->time
> -	    += insn_cost (insn, true) * bb->count.to_frequency (cfun);
> +	    += insn_cost (insn, true)
> +	       * bb->count.to_sreal_scale
> +		      (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
> +	else
> +	  record->time += insn_cost (insn, true);
>         }
>   }
>   
> diff --git a/gcc/passes.c b/gcc/passes.c
> index 64550b00b43..5a7da687b0b 100644
> --- a/gcc/passes.c
> +++ b/gcc/passes.c
> @@ -1893,10 +1893,11 @@ dump_profile_report (void)
>   void
>   pass_manager::dump_profile_report () const
>   {
> -  int last_freq_in = 0, last_count_in = 0, last_freq_out = 0, last_count_out = 0;
> -  gcov_type last_time = 0, last_size = 0;
> +  int last_count_in = 0, last_prob_out = 0;
> +  double last_dyn_count_in = 0, last_dyn_prob_out = 0;
> +  double last_time = 0;
> +  int last_size = 0;
>     double rel_time_change, rel_size_change;
> -  int last_reported = 0;
>   
>     if (!profile_record)
>       return;
> @@ -1906,16 +1907,21 @@ pass_manager::dump_profile_report () const
>       dump_file = stderr;
>   
>     fprintf (dump_file, "Profile consistency report:\n\n");
> -  fprintf (dump_file, "                                 |mismatch     |mismatch     |                     |\n");
> -  fprintf (dump_file, "Pass name                        |IN    |IN    |OUT   |OUT   |overall              |\n");
> -  fprintf (dump_file, "                                 |freq  |count |freq  |count |size      |time      |\n");
> +  fprintf (dump_file,
> +	   "Pass name                        |static mismatch            "
> +	   "|dynamic mismatch                                     "
> +	   "|overall                                       |\n");
> +  fprintf (dump_file,
> +	   "                                 |in count     |out prob     "
> +	   "|in count                  |out prob                  "
> +	   "|size               |time                      |\n");
>   	
>     for (int i = 1; i < passes_by_id_size; i++)
>       if (profile_record[i].run)
>         {
>   	if (last_time)
>   	  rel_time_change = (profile_record[i].time
> -			     - (double)last_time) * 100 / (double)last_time;
> +			     - last_time) * 100 / last_time;
>   	else
>   	  rel_time_change = 0;
>   	if (last_size)
> @@ -1924,65 +1930,62 @@ pass_manager::dump_profile_report () const
>   	else
>   	  rel_size_change = 0;
>   
> -	if (profile_record[i].num_mismatched_freq_in != last_freq_in
> -	    || profile_record[i].num_mismatched_freq_out != last_freq_out
> -	    || profile_record[i].num_mismatched_count_in != last_count_in
> -	    || profile_record[i].num_mismatched_count_out != last_count_out
> -	    || rel_time_change || rel_size_change)
> +	fprintf (dump_file, "%-33s| %6i", passes_by_id[i]->name,
> +		 profile_record[i].num_mismatched_count_in);
> +	if (profile_record[i].num_mismatched_count_in != last_count_in)
> +	  fprintf (dump_file, " %+5i",
> +		   profile_record[i].num_mismatched_count_in
> +		   - last_count_in);
> +	else
> +	  fprintf (dump_file, "      ");
> +	fprintf (dump_file, "| %6i",
> +		 profile_record[i].num_mismatched_prob_out);
> +	if (profile_record[i].num_mismatched_prob_out != last_prob_out)
> +	  fprintf (dump_file, " %+5i",
> +		   profile_record[i].num_mismatched_prob_out
> +		   - last_prob_out);
> +	else
> +	  fprintf (dump_file, "      ");
> +
> +	fprintf (dump_file, "| %12.0f",
> +		 profile_record[i].dyn_mismatched_count_in);
> +	if (profile_record[i].dyn_mismatched_count_in != last_dyn_count_in)
> +	  fprintf (dump_file, " %+12.0f",
> +		   profile_record[i].dyn_mismatched_count_in
> +		   - last_dyn_count_in);
> +	else
> +	  fprintf (dump_file, "             ");
> +	fprintf (dump_file, "| %12.0f",
> +		 profile_record[i].dyn_mismatched_prob_out);
> +	if (profile_record[i].dyn_mismatched_prob_out != last_dyn_prob_out)
> +	  fprintf (dump_file, " %+12.0f",
> +		   profile_record[i].dyn_mismatched_prob_out
> +		   - last_dyn_prob_out);
> +	else
> +	  fprintf (dump_file, "             ");
> +
> +	/* Size/time units change across gimple and RTL.  */
> +	if (i == pass_expand_1->static_pass_number)
> +	  fprintf (dump_file,
> +		   "|-------------------|--------------------------");
> +	else
>   	  {
> -	    last_reported = i;
> -	    fprintf (dump_file, "%-33s", passes_by_id[i]->name);
> -	    if (profile_record[i].num_mismatched_freq_in != last_freq_in)
> -	      fprintf (dump_file, "| %+5i",
> -		       profile_record[i].num_mismatched_freq_in
> -		       - last_freq_in);
> +	    fprintf (dump_file, "| %8i", profile_record[i].size);
> +	    if (rel_size_change)
> +	      fprintf (dump_file, " %+8.1f%%", rel_size_change);
>   	    else
> -	      fprintf (dump_file, "|      ");
> -	    if (profile_record[i].num_mismatched_count_in != last_count_in)
> -	      fprintf (dump_file, "| %+5i",
> -		       profile_record[i].num_mismatched_count_in
> -		       - last_count_in);
> +	      fprintf (dump_file, "          ");
> +	    fprintf (dump_file, "| %12.0f", profile_record[i].time);
> +	    if (rel_time_change)
> +	      fprintf (dump_file, " %+11.1f%%", rel_time_change);
>   	    else
> -	      fprintf (dump_file, "|      ");
> -	    if (profile_record[i].num_mismatched_freq_out != last_freq_out)
> -	      fprintf (dump_file, "| %+5i",
> -		       profile_record[i].num_mismatched_freq_out
> -		       - last_freq_out);
> -	    else
> -	      fprintf (dump_file, "|      ");
> -	    if (profile_record[i].num_mismatched_count_out != last_count_out)
> -	      fprintf (dump_file, "| %+5i",
> -		       profile_record[i].num_mismatched_count_out
> -		       - last_count_out);
> -	    else
> -	      fprintf (dump_file, "|      ");
> -
> -	    /* Size/time units change across gimple and RTL.  */
> -	    if (i == pass_expand_1->static_pass_number)
> -	      fprintf (dump_file, "|----------|----------");
> -	    else
> -	      {
> -		if (rel_size_change)
> -		  fprintf (dump_file, "| %+8.1f%%", rel_size_change);
> -		else
> -		  fprintf (dump_file, "|          ");
> -		if (rel_time_change)
> -		  fprintf (dump_file, "| %+8.1f%%", rel_time_change);
> -		else
> -		  fprintf (dump_file, "|          ");
> -	      }
> -	    fprintf (dump_file, "|\n");
> -	    last_freq_in = profile_record[i].num_mismatched_freq_in;
> -	    last_freq_out = profile_record[i].num_mismatched_freq_out;
> -	    last_count_in = profile_record[i].num_mismatched_count_in;
> -	    last_count_out = profile_record[i].num_mismatched_count_out;
> -	  }
> -	else if (last_reported != i)
> -	  {
> -	    last_reported = i;
> -	    fprintf (dump_file, "%-20s ------------|      |      |      |      |          |          |\n",
> -		     passes_by_id[i]->name);
> +	      fprintf (dump_file, "             ");
>   	  }
> +	fprintf (dump_file, "|\n");
> +	last_prob_out = profile_record[i].num_mismatched_prob_out;
> +	last_count_in = profile_record[i].num_mismatched_count_in;
> +	last_dyn_prob_out = profile_record[i].dyn_mismatched_prob_out;
> +	last_dyn_count_in = profile_record[i].dyn_mismatched_count_in;
>   	last_time = profile_record[i].time;
>   	last_size = profile_record[i].size;
>         }
> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
> index 8ed8c69b5b1..ebbd894ae03 100644
> --- a/gcc/tree-cfg.c
> +++ b/gcc/tree-cfg.c
> @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
>   #include "opts.h"
>   #include "asan.h"
>   #include "profile.h"
> +#include "sreal.h"
>   
>   /* This file contains functions for building the Control Flow Graph (CFG)
>      for a function tree.  */
> @@ -9084,18 +9085,32 @@ gimple_account_profile_record (basic_block bb,
>   			       struct profile_record *record)
>   {
>     gimple_stmt_iterator i;
> -  for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
> +  for (i = gsi_start_nondebug_after_labels_bb (bb); !gsi_end_p (i);
> +       gsi_next_nondebug (&i))
>       {
>         record->size
>   	+= estimate_num_insns (gsi_stmt (i), &eni_size_weights);
> -      if (bb->count.initialized_p ())
> +      if (profile_info)
> +	{
> +	  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().initialized_p ()
> +	      && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().nonzero_p ()
> +	      && bb->count.ipa ().initialized_p ())
> +	    record->time
> +	      += estimate_num_insns (gsi_stmt (i),
> +				     &eni_time_weights)
> +				     * bb->count.ipa ().to_gcov_type ();
> +	}
> +      else if (bb->count.initialized_p ()
> +	       && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
>   	record->time
> -	  += estimate_num_insns (gsi_stmt (i),
> -				 &eni_time_weights) * bb->count.to_gcov_type ();
> -      else if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
> -	record->time
> -	  += estimate_num_insns (gsi_stmt (i),
> -				 &eni_time_weights) * bb->count.to_frequency (cfun);
> +	  += estimate_num_insns
> +		(gsi_stmt (i),
> +		 &eni_time_weights)
> +		 * bb->count.to_sreal_scale
> +			(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
> +     else
> +      record->time
> +	+= estimate_num_insns (gsi_stmt (i), &eni_time_weights);
>       }
>   }
>   
> 



More information about the Gcc-patches mailing list