GCC beaten by ICC in stupid trig test!

Scott Robert Ladd coyote@coyotegulch.com
Mon Mar 15 02:36:00 GMT 2004


Andrew Pinski wrote:
> Here is a much better benchmark to try, notice that we are doing more
>  work now but the point is that ICC is going to be the transformation
>  and it not going to see that doit is constant so it will not pull it
>  out of the loop and it cannot unroll the loop into just being a 
> constant.

Intel does *not* unroll the original code into a constnt. To wit, the 
code generated on my system:

         .globl main
main:
..B1.1:                         # Preds ..B1.0
         pushl     %ebx                                          #14.1
         movl      %esp, %ebx                                    #14.1
         andl      $-64, %esp                                    #14.1
         pushl     %edi                                          #14.1
         subl      $60, %esp                                     #14.1
         call      __intel_proc_init_N                           #14.1
         pushl     %eax                                          #14.1
         pushl     %eax                                          #14.1
         stmxcsr   (%esp)                                        #14.1
         popl      %eax                                          #14.1
         orl       $32768, %eax                                  #14.1
         pushl     %eax                                          #14.1
         ldmxcsr   (%esp)                                        #14.1
         popl      %eax                                          #14.1
         popl      %eax                                          #14.1
         movapd    _2il0floatpacket.1, %xmm0                     #18.14
         xorl      %edi, %edi                                    #17.5
         pxor      %xmm1, %xmm1                                  #
         movapd    %xmm1, 16(%esp)                               #
         call      vmldSin2                                      #18.14
                                 # LOE ebp esi edi xmm0
..B1.7:                         # Preds ..B1.1
         movapd    %xmm0, 32(%esp)                               #18.14
         movapd    _2il0floatpacket.1, %xmm0                     #18.14
         call      vmldCos2                                      #18.14
                                 # LOE ebp esi edi xmm0
..B1.8:                         # Preds ..B1.7
         mulpd     %xmm0, %xmm0                                  #18.14
         movapd    32(%esp), %xmm1                               #18.14
         mulpd     %xmm1, %xmm1                                  #18.14
         movapd    16(%esp), %xmm2                               #18.14
         movapd    %xmm1, 32(%esp)                               #18.14
         movapd    32(%esp), %xmm1                               #18.14
         .align    4,0x90
                                 # LOE ebp esi edi xmm0 xmm1 xmm2
..B1.2:                         # Preds ..B1.8 ..B1.2
         addpd     %xmm1, %xmm2                                  #18.9
         addpd     %xmm0, %xmm2                                  #18.14
         addl      $2, %edi                                      #17.5
         cmpl      $100000000, %edi                              #17.5
         jb        ..B1.2        # Prob 100%                     #17.5
                                 # LOE ebp esi edi xmm0 xmm1 xmm2
..B1.3:                         # Preds ..B1.2
         movapd    %xmm2, 16(%esp)                               #
         movapd    16(%esp), %xmm1                               #17.5
         movapd    %xmm1, %xmm0                                  #17.5
         unpckhpd  %xmm0, %xmm0                                  #17.5
         addsd     %xmm0, %xmm1                                  #
         movl      $__STRING.0, (%esp)                           #20.12
         movsd     %xmm1, 4(%esp)                                #20.23
         call      printf                                        #20.5
                                 # LOE ebp esi
..B1.4:                         # Preds ..B1.3
         xorl      %eax, %eax                                    #21.12
         addl      $60, %esp                                     #21.12
         popl      %edi                                          #21.12
         movl      %ebx, %esp                                    #21.12
         popl      %ebx                                          #21.12
         ret                                                     #21.12


I made some minor mention of this in the original post, but it was 
likely too vague for you.

-- 
Scott Robert Ladd
Coyote Gulch Productions (http://www.coyotegulch.com)
Software Invention for High-Performance Computing



More information about the Gcc mailing list