simple example -O3 optimizer problem?
Dennis Luehring
dl.soluz@gmx.net
Fri Jul 18 07:30:00 GMT 2014
tested following code with
http://gcc.godbolt.org/
tested with
g++-4.8 (Ubuntu 4.8.1.2ubuntu1~12.04) 4.8.1
g++ (GCC) 4.9.0 20130909 (experimental)
and the result with -O3 + defined USE_ITER seems to be a little bit long
----------
static void foo(int a, int& dummy)
{
dummy += a;
}
#define USE_ITER
int main(int argc, char** argv)
{
//--
//anti-optimizer
int dummy = 0;
int* array = (int*)&argv;
//--
#if defined(USE_ITER)
int* pend = &array[10];
for(int* p = &array[0]; p < pend; ++p) foo(*p, dummy);
#else
for(int i = 0; i < 10; ++i) foo(array[i], dummy);
#endif
return dummy;
}
---------
with -O2
with/without define USE_ITER
result:
main:
lea rdx, [rsp-8]
lea rcx, [rsp+32]
mov QWORD PTR [rsp-8], rsi
xor eax, eax
.L3:
add eax, DWORD PTR [rdx]
add rdx, 4
cmp rdx, rcx
jb .L3
rep; ret
with -O3
without define USE_ITER
main:
mov rax, rsi
shr rax, 32
add eax, esi
add eax, DWORD PTR [rsp]
add eax, DWORD PTR [rsp+4]
add eax, DWORD PTR [rsp+8]
add eax, DWORD PTR [rsp+12]
add eax, DWORD PTR [rsp+16]
add eax, DWORD PTR [rsp+20]
add eax, DWORD PTR [rsp+24]
add eax, DWORD PTR [rsp+28]
ret
with define USE_ITER
main:
lea rdi, [rsp-16]
lea rax, [rsp+27]
mov QWORD PTR [rsp-16], rsi
lea r8, [rsp+24]
mov ecx, 1
lea rdx, [rdi+4]
lea rsi, [rdi+1]
sub rax, rdx
mov rdx, rdi
shr rax, 2
add rax, 1
cmp rsi, r8
cmovbe rcx, rax
and edx, 15
shr rdx, 2
cmp rcx, rdx
cmovbe rdx, rcx
cmp rax, 8
ja .L30
.L2:
mov rdx, rcx
.L11:
cmp rdx, 1
mov eax, DWORD PTR [rsp-16]
je .L13
add eax, DWORD PTR [rsp-12]
cmp rdx, 2
je .L14
add eax, DWORD PTR [rsp-8]
cmp rdx, 3
je .L15
add eax, DWORD PTR [rsp-4]
cmp rdx, 4
je .L16
add eax, DWORD PTR [rsp]
cmp rdx, 5
je .L17
add eax, DWORD PTR [rsp+4]
cmp rdx, 6
je .L18
add eax, DWORD PTR [rsp+8]
cmp rdx, 7
je .L19
add eax, DWORD PTR [rsp+12]
lea rsi, [rsp+16]
.L4:
cmp rcx, rdx
je .L23
.L3:
sub rcx, rdx
mov r9, rcx
shr r9, 2
lea r10, [0+r9*4]
test r10, r10
je .L6
lea rdx, [rdi+rdx*4]
cmp r9, 1
movdqu xmm0, XMMWORD PTR [rdx]
jbe .L7
movdqu xmm1, XMMWORD PTR [rdx+16]
cmp r9, 2
paddd xmm0, xmm1
je .L7
movdqu xmm1, XMMWORD PTR [rdx+32]
paddd xmm0, xmm1
.L7:
movdqa xmm2, xmm0
lea rsi, [rsi+r10*4]
psrldq xmm2, 8
paddd xmm0, xmm2
movdqa xmm3, xmm0
psrldq xmm3, 4
paddd xmm0, xmm3
movd edx, xmm0
add eax, edx
cmp rcx, r10
je .L23
.L6:
lea rdx, [rsi+4]
add eax, DWORD PTR [rsi]
cmp r8, rdx
jbe .L23
lea rdx, [rsi+8]
add eax, DWORD PTR [rsi+4]
cmp r8, rdx
jbe .L31
add eax, DWORD PTR [rsi+8]
ret
.L23:
rep; ret
.L30:
cmp rsi, r8
ja .L2
xor eax, eax
test rdx, rdx
mov rsi, rdi
je .L3
jmp .L11
.L31:
ret
.L16:
mov rsi, rsp
jmp .L4
.L17:
lea rsi, [rsp+4]
jmp .L4
.L18:
lea rsi, [rsp+8]
jmp .L4
.L19:
lea rsi, [rsp+12]
jmp .L4
.L14:
lea rsi, [rsp-8]
jmp .L4
.L15:
lea rsi, [rsp-4]
jmp .L4
.L13:
lea rsi, [rdi+4]
jmp .L4
More information about the Gcc
mailing list