simple example -O3 optimizer problem?

Dennis Luehring dl.soluz@gmx.net
Fri Jul 18 07:30:00 GMT 2014


tested following code with

http://gcc.godbolt.org/
tested with
g++-4.8 (Ubuntu 4.8.1.2ubuntu1~12.04) 4.8.1
g++ (GCC) 4.9.0 20130909 (experimental)

and the result with -O3 + defined USE_ITER seems to be a little bit long

----------

static void foo(int a, int& dummy)
{
   dummy += a;
}

#define USE_ITER

int main(int argc, char** argv)
{
   //--
   //anti-optimizer
   int dummy = 0;
   int* array = (int*)&argv;
   //--

#if defined(USE_ITER)
   int* pend = &array[10];
   for(int* p = &array[0]; p < pend; ++p) foo(*p, dummy);
#else
   for(int i = 0; i < 10; ++i) foo(array[i], dummy);
#endif

   return dummy;
}

---------

with -O2
with/without define USE_ITER

result:

main:
     lea    rdx, [rsp-8]
     lea    rcx, [rsp+32]
     mov    QWORD PTR [rsp-8], rsi
     xor    eax, eax
.L3:
     add    eax, DWORD PTR [rdx]
     add    rdx, 4
     cmp    rdx, rcx
     jb    .L3
     rep; ret

with -O3

without define USE_ITER

main:
     mov    rax, rsi
     shr    rax, 32
     add    eax, esi
     add    eax, DWORD PTR [rsp]
     add    eax, DWORD PTR [rsp+4]
     add    eax, DWORD PTR [rsp+8]
     add    eax, DWORD PTR [rsp+12]
     add    eax, DWORD PTR [rsp+16]
     add    eax, DWORD PTR [rsp+20]
     add    eax, DWORD PTR [rsp+24]
     add    eax, DWORD PTR [rsp+28]
     ret

with define USE_ITER

main:
     lea    rdi, [rsp-16]
     lea    rax, [rsp+27]
     mov    QWORD PTR [rsp-16], rsi
     lea    r8, [rsp+24]
     mov    ecx, 1
     lea    rdx, [rdi+4]
     lea    rsi, [rdi+1]
     sub    rax, rdx
     mov    rdx, rdi
     shr    rax, 2
     add    rax, 1
     cmp    rsi, r8
     cmovbe    rcx, rax
     and    edx, 15
     shr    rdx, 2
     cmp    rcx, rdx
     cmovbe    rdx, rcx
     cmp    rax, 8
     ja    .L30
.L2:
     mov    rdx, rcx
.L11:
     cmp    rdx, 1
     mov    eax, DWORD PTR [rsp-16]
     je    .L13
     add    eax, DWORD PTR [rsp-12]
     cmp    rdx, 2
     je    .L14
     add    eax, DWORD PTR [rsp-8]
     cmp    rdx, 3
     je    .L15
     add    eax, DWORD PTR [rsp-4]
     cmp    rdx, 4
     je    .L16
     add    eax, DWORD PTR [rsp]
     cmp    rdx, 5
     je    .L17
     add    eax, DWORD PTR [rsp+4]
     cmp    rdx, 6
     je    .L18
     add    eax, DWORD PTR [rsp+8]
     cmp    rdx, 7
     je    .L19
     add    eax, DWORD PTR [rsp+12]
     lea    rsi, [rsp+16]
.L4:
     cmp    rcx, rdx
     je    .L23
.L3:
     sub    rcx, rdx
     mov    r9, rcx
     shr    r9, 2
     lea    r10, [0+r9*4]
     test    r10, r10
     je    .L6
     lea    rdx, [rdi+rdx*4]
     cmp    r9, 1
     movdqu    xmm0, XMMWORD PTR [rdx]
     jbe    .L7
     movdqu    xmm1, XMMWORD PTR [rdx+16]
     cmp    r9, 2
     paddd    xmm0, xmm1
     je    .L7
     movdqu    xmm1, XMMWORD PTR [rdx+32]
     paddd    xmm0, xmm1
.L7:
     movdqa    xmm2, xmm0
     lea    rsi, [rsi+r10*4]
     psrldq    xmm2, 8
     paddd    xmm0, xmm2
     movdqa    xmm3, xmm0
     psrldq    xmm3, 4
     paddd    xmm0, xmm3
     movd    edx, xmm0
     add    eax, edx
     cmp    rcx, r10
     je    .L23
.L6:
     lea    rdx, [rsi+4]
     add    eax, DWORD PTR [rsi]
     cmp    r8, rdx
     jbe    .L23
     lea    rdx, [rsi+8]
     add    eax, DWORD PTR [rsi+4]
     cmp    r8, rdx
     jbe    .L31
     add    eax, DWORD PTR [rsi+8]
     ret
.L23:
     rep; ret
.L30:
     cmp    rsi, r8
     ja    .L2
     xor    eax, eax
     test    rdx, rdx
     mov    rsi, rdi
     je    .L3
     jmp    .L11
.L31:
     ret
.L16:
     mov    rsi, rsp
     jmp    .L4
.L17:
     lea    rsi, [rsp+4]
     jmp    .L4
.L18:
     lea    rsi, [rsp+8]
     jmp    .L4
.L19:
     lea    rsi, [rsp+12]
     jmp    .L4
.L14:
     lea    rsi, [rsp-8]
     jmp    .L4
.L15:
     lea    rsi, [rsp-4]
     jmp    .L4
.L13:
     lea    rsi, [rdi+4]
     jmp    .L4




More information about the Gcc mailing list