This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: GCC beaten by ICC in stupid trig test!


Andrew Pinski wrote:
Here is a much better benchmark to try, notice that we are doing more
work now but the point is that ICC is going to be the transformation
and it not going to see that doit is constant so it will not pull it
out of the loop and it cannot unroll the loop into just being a constant.

Intel does *not* unroll the original code into a constnt. To wit, the code generated on my system:


        .globl main
main:
..B1.1:                         # Preds ..B1.0
        pushl     %ebx                                          #14.1
        movl      %esp, %ebx                                    #14.1
        andl      $-64, %esp                                    #14.1
        pushl     %edi                                          #14.1
        subl      $60, %esp                                     #14.1
        call      __intel_proc_init_N                           #14.1
        pushl     %eax                                          #14.1
        pushl     %eax                                          #14.1
        stmxcsr   (%esp)                                        #14.1
        popl      %eax                                          #14.1
        orl       $32768, %eax                                  #14.1
        pushl     %eax                                          #14.1
        ldmxcsr   (%esp)                                        #14.1
        popl      %eax                                          #14.1
        popl      %eax                                          #14.1
        movapd    _2il0floatpacket.1, %xmm0                     #18.14
        xorl      %edi, %edi                                    #17.5
        pxor      %xmm1, %xmm1                                  #
        movapd    %xmm1, 16(%esp)                               #
        call      vmldSin2                                      #18.14
                                # LOE ebp esi edi xmm0
..B1.7:                         # Preds ..B1.1
        movapd    %xmm0, 32(%esp)                               #18.14
        movapd    _2il0floatpacket.1, %xmm0                     #18.14
        call      vmldCos2                                      #18.14
                                # LOE ebp esi edi xmm0
..B1.8:                         # Preds ..B1.7
        mulpd     %xmm0, %xmm0                                  #18.14
        movapd    32(%esp), %xmm1                               #18.14
        mulpd     %xmm1, %xmm1                                  #18.14
        movapd    16(%esp), %xmm2                               #18.14
        movapd    %xmm1, 32(%esp)                               #18.14
        movapd    32(%esp), %xmm1                               #18.14
        .align    4,0x90
                                # LOE ebp esi edi xmm0 xmm1 xmm2
..B1.2:                         # Preds ..B1.8 ..B1.2
        addpd     %xmm1, %xmm2                                  #18.9
        addpd     %xmm0, %xmm2                                  #18.14
        addl      $2, %edi                                      #17.5
        cmpl      $100000000, %edi                              #17.5
        jb        ..B1.2        # Prob 100%                     #17.5
                                # LOE ebp esi edi xmm0 xmm1 xmm2
..B1.3:                         # Preds ..B1.2
        movapd    %xmm2, 16(%esp)                               #
        movapd    16(%esp), %xmm1                               #17.5
        movapd    %xmm1, %xmm0                                  #17.5
        unpckhpd  %xmm0, %xmm0                                  #17.5
        addsd     %xmm0, %xmm1                                  #
        movl      $__STRING.0, (%esp)                           #20.12
        movsd     %xmm1, 4(%esp)                                #20.23
        call      printf                                        #20.5
                                # LOE ebp esi
..B1.4:                         # Preds ..B1.3
        xorl      %eax, %eax                                    #21.12
        addl      $60, %esp                                     #21.12
        popl      %edi                                          #21.12
        movl      %ebx, %esp                                    #21.12
        popl      %ebx                                          #21.12
        ret                                                     #21.12


I made some minor mention of this in the original post, but it was likely too vague for you.


--
Scott Robert Ladd
Coyote Gulch Productions (http://www.coyotegulch.com)
Software Invention for High-Performance Computing


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]