This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug tree-optimization/21485] [4.2/4.3/4.4 Regression] missed load PRE, PRE makes i?86 suck



------- Comment #34 from rguenth at gcc dot gnu dot org  2008-10-04 15:11 -------
Fastest result on a Intel Core Duo with

gcc-4.1 -O3 -fomit-frame-pointer -fno-tree-pre -fno-inline -fschedule-insns:
1273

the interesting thing is that with the above we if-convert

        if (array[k] < array[k + 1L]) 
          ++k; 

using setl which reduces the burden of the branch predictor which in the worst
case (trunk) has quite a number of mispredicts.  The following is branches
retired vs. mispredicted branches retired for trunk (with PRE enabled)

 * CPU: Core Solo / Duo, speed 1833 MHz (estimated)
 * Counted BR_INST_RETIRED events (number of branch instructions retired) with
a
 unit mask of 0x00 (No unit mask) count 10000
 * Counted BR_MISS_PRED_RETIRED events (number of mispredicted branches
retired)
 with a unit mask of 0x00 (No unit mask) count 10000

080486d0 <NumSift>: /* NumSift total: 188708 95.2681 21424 99.9953 */
   752  0.3796     0       0   : 80486d0:       push   %ebp
                               : 80486d1:       push   %edi
                               : 80486d2:       push   %esi
   824  0.4160     0       0   : 80486d3:       push   %ebx
     5  0.0025     0       0   : 80486d4:       sub    $0xc,%esp
                               : 80486d7:       mov    %ecx,(%esp)
  1541  0.7780     0       0   : 80486da:       add    $0x1,%ecx
                               : 80486dd:       mov    %ecx,0x8(%esp)
                               : 80486e1:       lea    0x0(%esi),%esi
   709  0.3579     2  0.0093   : 80486e8:       lea    (%edx,%edx,1),%ecx
  1706  0.8613     1  0.0047   : 80486eb:       cmp    (%esp),%ecx
  3083  1.5564   924  4.3127   : 80486ee:       mov    %ecx,%edi
    92  0.0464     0       0   : 80486f0:       lea    (%eax,%edx,8),%ebp
                               : 80486f3:       mov    %ebp,%ebx
   868  0.4382    13  0.0607   : 80486f5:       ja     804871d <NumSift+0x4d>
  5732  2.8938     0       0   : 80486f7:       jb     8048728 <NumSift+0x58>
     2  0.0010     0       0   : 80486f9:       mov    (%ebx),%esi
  7789  3.9322   162  0.7561   : 80486fb:       lea    (%eax,%edx,4),%ecx
 34575 17.4550  6534 30.4971   : 80486fe:       mov    0x8(%esp),%edx
  3070  1.5499  2103  9.8156   : 8048702:       mov    (%ecx),%ebp
  8244  4.1619   134  0.6254   : 8048704:       cmp    %esi,%ebp
  2322  1.1722   155  0.7235   : 8048706:       jge    80486e8 <NumSift+0x18>
  1363  0.6881   236  1.1015   : 8048708:       mov    %edi,%edx
  3578  1.8063     0       0   : 804870a:       mov    %ebp,(%ebx)
   450  0.2272   367  1.7130   : 804870c:       lea    (%eax,%edx,8),%ebp
  3797  1.9169     0       0   : 804870f:       mov    %esi,(%ecx)
  5035  2.5419    22  0.1027   : 8048711:       lea    (%edx,%edx,1),%ecx
                               : 8048714:       mov    %ebp,%ebx
   389  0.1964     0       0   : 8048716:       cmp    (%esp),%ecx
  5885  2.9710    15  0.0700   : 8048719:       mov    %ecx,%edi
     7  0.0035     0       0   : 804871b:       jbe    80486f7 <NumSift+0x27>
   416  0.2100    24  0.1120   : 804871d:       add    $0xc,%esp
  5419  2.7357  1431  6.6791   : 8048720:       pop    %ebx
   568  0.2868   275  1.2835   : 8048721:       pop    %esi
   710  0.3584    24  0.1120   : 8048722:       pop    %edi
   334  0.1686    12  0.0560   : 8048723:       pop    %ebp
   146  0.0737    91  0.4247   : 8048724:       ret
                               : 8048725:       lea    0x0(%esi),%esi
  8706  4.3952     0       0   : 8048728:       mov    0x0(%ebp),%ebx
  1536  0.7754   379  1.7690   : 804872b:       lea    0x1(%ecx),%edi
                               : 804872e:       mov    %ebx,0x4(%esp)
 14484  7.3122     9  0.0420   : 8048732:       lea    (%eax,%edi,4),%ebx
                               : 8048735:       mov    (%ebx),%esi
  2165  1.0930     6  0.0280   : 8048737:       cmp    %esi,0x4(%esp)
 19814 10.0030     1  0.0047   : 804873b:       jl     80486fb <NumSift+0x2b>
  2585  1.3050     0       0   : 804873d:       mov    0x4(%esp),%esi
 37728 19.0468  8504 39.6919   : 8048741:       mov    %ebp,%ebx
  1511  0.7628     0       0   : 8048743:       mov    %ecx,%edi
   768  0.3877     0       0   : 8048745:       jmp    80486fb <NumSift+0x2b>
                               : 8048747:       mov    %esi,%esi
                               : 8048749:       lea    0x0(%edi),%edi

while the following is what we get for the gcc 4.1 code w/o PRE:

08048670 <NumSift>: /* NumSift total: 200781 92.9938  4738 99.9156 */
  1196  0.5539     0       0   : 8048670:       push   %ebp
     1 4.6e-04     0       0   : 8048671:       push   %edi
     2 9.3e-04     0       0   : 8048672:       mov    %eax,%edi
  2084  0.9652     0       0   : 8048674:       push   %esi
     9  0.0042     0       0   : 8048675:       push   %ebx
     1 4.6e-04     0       0   : 8048676:       mov    %edx,%ebx
  1162  0.5382     0       0   : 8048678:       sub    $0x4,%esp
     6  0.0028     0       0   : 804867b:       mov    %ecx,(%esp)
  3128  1.4488     0       0   : 804867e:       xchg   %ax,%ax
  1078  0.4993     2  0.0422   : 8048680:       lea    (%ebx,%ebx,1),%edx
   577  0.2672     0       0   : 8048683:       cmp    (%esp),%edx
   202  0.0936     6  0.1265   : 8048686:       lea    (%edi,%ebx,4),%ebp
   152  0.0704     1  0.0211   : 8048689:       ja     80486ba <NumSift+0x4a>
 44618 20.6653     0       0   : 804868b:       jae    804869c <NumSift+0x2c>
  2125  0.9842    62  1.3075   : 804868d:       mov    (%edi,%ebx,8),%eax
  2932  1.3580   322  6.7904   : 8048690:       cmp    0x4(%edi,%ebx,8),%eax
 23392 10.8342   151  3.1843   : 8048694:       setl   %al
  8331  3.8586     5  0.1054   : 8048697:       movzbl %al,%eax
 11420  5.2893     0       0   : 804869a:       add    %eax,%edx
 15985  7.4036     1  0.0211   : 804869c:       lea    (%edi,%edx,4),%esi
  5171  2.3950     6  0.1265   : 804869f:       mov    0x0(%ebp),%ecx
   109  0.0505     0       0   : 80486a2:       mov    %edx,%ebx
  1129  0.5229     0       0   : 80486a4:       mov    (%esi),%eax
 16905  7.8297     0       0   : 80486a6:       cmp    %eax,%ecx
  2442  1.1310     0       0   : 80486a8:       jge    80486c2 <NumSift+0x52>
 18994  8.7973     1  0.0211   : 80486aa:       lea    (%ebx,%ebx,1),%edx
  1134  0.5252   507 10.6917   : 80486ad:       cmp    (%esp),%edx
   136  0.0630     4  0.0844   : 80486b0:       mov    %ecx,(%esi)
 19141  8.8654     0       0   : 80486b2:       mov    %eax,0x0(%ebp)
     1 4.6e-04     0       0   : 80486b5:       lea    (%edi,%ebx,4),%ebp
    36  0.0167     0       0   : 80486b8:       jbe    804868b <NumSift+0x1b>
  3202  1.4830     0       0   : 80486ba:       add    $0x4,%esp
  4369  2.0235   618 13.0325   : 80486bd:       pop    %ebx
  1842  0.8531   680 14.3399   : 80486be:       pop    %esi
  2309  1.0694   878 18.5154   : 80486bf:       pop    %edi
    54  0.0250     1  0.0211   : 80486c0:       pop    %ebp
   500  0.2316     5  0.1054   : 80486c1:       ret    
   498  0.2307     0       0   : 80486c2:       mov    (%esp),%ebx
  4407  2.0411  1487 31.3581   : 80486c5:       add    $0x1,%ebx
     1 4.6e-04     1  0.0211   : 80486c8:       jmp    8048680 <NumSift+0x10>
                               : 80486ca:       lea    0x0(%esi),%esi


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21485


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]