This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: simple example -O3 optimizer problem?


Am 18.07.2014 11:14, schrieb Andrew Haley:
On 07/18/2014 09:40 AM, Dennis Luehring wrote:
> Am 18.07.2014 10:29, schrieb Andrew Haley:
>> On 18/07/14 08:30, Dennis Luehring wrote:
>>>    int* array = (int*)&argv;
>>
>> This looks like undefined behaviour.  Don't you get a warning?
>
> no warning - its an valid typed pointer to stack and i don't care what
> the values are
> its just an anti-optimizer trick nothing more

And if you give it the correct type, doe that make any difference?



static void foo(int a, int& dummy)
{
  dummy += a;
}

#define USE_CHAR
#define USE_ITER

int main(int argc, char** argv)
{
  //--
  //anti-optimizer
  int dummy = 0;
#if defined(USE_CHAR)
  typedef char T;
  T* array = argv[0];
#else
  typedef int T;
  T* array = (int*)&argv; // undefined?
#endif
  //--

#if defined(USE_ITER)
  T* pend = &array[10];
  for(T* p = &array[0]; p < pend; ++p) foo(*p, dummy);
#else
  for(int i = 0; i < 10; ++i) foo(array[i], dummy);
#endif

  return dummy;
}

with USE_CHAR, with/without USE_ITER, -O3 gives:

main:
    mov    rdx, QWORD PTR [rsi]
    movsx    ecx, BYTE PTR [rdx]
    movsx    eax, BYTE PTR [rdx+1]
    add    eax, ecx
    movsx    ecx, BYTE PTR [rdx+2]
    add    eax, ecx
    movsx    ecx, BYTE PTR [rdx+3]
    add    eax, ecx
    movsx    ecx, BYTE PTR [rdx+4]
    add    eax, ecx
    movsx    ecx, BYTE PTR [rdx+5]
    add    eax, ecx
    movsx    ecx, BYTE PTR [rdx+6]
    add    eax, ecx
    movsx    ecx, BYTE PTR [rdx+7]
    add    eax, ecx
    movsx    ecx, BYTE PTR [rdx+8]
    movsx    edx, BYTE PTR [rdx+9]
    add    eax, ecx
    add    eax, edx
    ret

without USE_CHAR, without USE_ITER, -O3 gives:

main:
    mov    rax, rsi
    shr    rax, 32
    add    eax, esi
    add    eax, DWORD PTR [rsp]
    add    eax, DWORD PTR [rsp+4]
    add    eax, DWORD PTR [rsp+8]
    add    eax, DWORD PTR [rsp+12]
    add    eax, DWORD PTR [rsp+16]
    add    eax, DWORD PTR [rsp+20]
    add    eax, DWORD PTR [rsp+24]
    add    eax, DWORD PTR [rsp+28]
    ret

without USE_CHAR, with USE_ITER, -O3 gives

main:
    lea    rdi, [rsp-16]
    lea    rax, [rsp+27]
    mov    QWORD PTR [rsp-16], rsi
    lea    r8, [rsp+24]
    mov    ecx, 1
    lea    rdx, [rdi+4]
    lea    rsi, [rdi+1]
    sub    rax, rdx
    mov    rdx, rdi
    shr    rax, 2
    add    rax, 1
    cmp    rsi, r8
    cmovbe    rcx, rax
    and    edx, 15
    shr    rdx, 2
    cmp    rcx, rdx
    cmovbe    rdx, rcx
    cmp    rax, 8
    ja    .L30
.L2:
    mov    rdx, rcx
.L11:
    cmp    rdx, 1
    mov    eax, DWORD PTR [rsp-16]
    je    .L13
    add    eax, DWORD PTR [rsp-12]
    cmp    rdx, 2
    je    .L14
    add    eax, DWORD PTR [rsp-8]
    cmp    rdx, 3
    je    .L15
    add    eax, DWORD PTR [rsp-4]
    cmp    rdx, 4
    je    .L16
    add    eax, DWORD PTR [rsp]
    cmp    rdx, 5
    je    .L17
    add    eax, DWORD PTR [rsp+4]
    cmp    rdx, 6
    je    .L18
    add    eax, DWORD PTR [rsp+8]
    cmp    rdx, 7
    je    .L19
    add    eax, DWORD PTR [rsp+12]
    lea    rsi, [rsp+16]
.L4:
    cmp    rcx, rdx
    je    .L23
.L3:
    sub    rcx, rdx
    mov    r9, rcx
    shr    r9, 2
    lea    r10, [0+r9*4]
    test    r10, r10
    je    .L6
    lea    rdx, [rdi+rdx*4]
    cmp    r9, 1
    movdqu    xmm0, XMMWORD PTR [rdx]
    jbe    .L7
    movdqu    xmm1, XMMWORD PTR [rdx+16]
    cmp    r9, 2
    paddd    xmm0, xmm1
    je    .L7
    movdqu    xmm1, XMMWORD PTR [rdx+32]
    paddd    xmm0, xmm1
.L7:
    movdqa    xmm2, xmm0
    lea    rsi, [rsi+r10*4]
    psrldq    xmm2, 8
    paddd    xmm0, xmm2
    movdqa    xmm3, xmm0
    psrldq    xmm3, 4
    paddd    xmm0, xmm3
    movd    edx, xmm0
    add    eax, edx
    cmp    rcx, r10
    je    .L23
.L6:
    lea    rdx, [rsi+4]
    add    eax, DWORD PTR [rsi]
    cmp    r8, rdx
    jbe    .L23
    lea    rdx, [rsi+8]
    add    eax, DWORD PTR [rsi+4]
    cmp    r8, rdx
    jbe    .L31
    add    eax, DWORD PTR [rsi+8]
    ret
.L23:
    rep; ret
.L30:
    cmp    rsi, r8
    ja    .L2
    xor    eax, eax
    test    rdx, rdx
    mov    rsi, rdi
    je    .L3
    jmp    .L11
.L31:
    ret
.L16:
    mov    rsi, rsp
    jmp    .L4
.L17:
    lea    rsi, [rsp+4]
    jmp    .L4
.L18:
    lea    rsi, [rsp+8]
    jmp    .L4
.L19:
    lea    rsi, [rsp+12]
    jmp    .L4
.L14:
    lea    rsi, [rsp-8]
    jmp    .L4
.L15:
    lea    rsi, [rsp-4]
    jmp    .L4
.L13:
    lea    rsi, [rdi+4]
    jmp    .L4

but is still don't get whats undefined is with (int*)&argv - i understand that the values are undefined (pointing anywhere in stack) but why should the -O3 optimization generate such amount of code due to this "undefined behavior"

strangely for clang 3.4.1 behaves different

with USE_CHAR, without USE_ITER, -O3 gives:

main:                                   # @main
    movq    (%rsi), %rax
    movsbl    (%rax), %ecx
    movsbl    1(%rax), %edx
    addl    %ecx, %edx
    movsbl    2(%rax), %ecx
    addl    %edx, %ecx
    movsbl    3(%rax), %edx
    addl    %ecx, %edx
    movsbl    4(%rax), %ecx
    addl    %edx, %ecx
    movsbl    5(%rax), %edx
    addl    %ecx, %edx
    movsbl    6(%rax), %ecx
    addl    %edx, %ecx
    movsbl    7(%rax), %edx
    addl    %ecx, %edx
    movsbl    8(%rax), %ecx
    addl    %edx, %ecx
    movsbl    9(%rax), %eax
    addl    %ecx, %eax
    ret

with USE_CHAR, with USE_ITER, -O3 gives:

main:                                   # @main
    pushq    %r14
    pushq    %rbx
    movq    (%rsi), %rdx
    leaq    10(%rdx), %r11
    leaq    1(%rdx), %r10
    cmpq    %r10, %r11
    cmovaq    %r11, %r10
    subq    %rdx, %r10
    movq    %r10, %r8
    andq    $-8, %r8
    xorl    %r9d, %r9d
    movq    %r10, %r14
    andq    $-8, %r14
    pxor    %xmm0, %xmm0
    je    .LBB0_1
    addq    %rdx, %r8
    pxor    %xmm0, %xmm0
    xorl    %eax, %eax
    pxor    %xmm1, %xmm1
.LBB0_3:                                # %vector.body
    movdqa    %xmm1, %xmm2
    movdqa    %xmm0, %xmm3
    movl    (%rdx,%rax), %edi
    movl    4(%rdx,%rax), %ecx
    movl    %edi, %esi
    shll    $8, %esi
    movd    %edi, %xmm0
    pextrw    $1, %xmm0, %ebx
    pinsrw    $1, %esi, %xmm0
    pinsrw    $3, %edi, %xmm0
    movl    %ebx, %esi
    shll    $8, %esi
    pinsrw    $5, %esi, %xmm0
    pinsrw    $7, %ebx, %xmm0
    psrad    $24, %xmm0
    movl    %ecx, %esi
    shll    $8, %esi
    movd    %ecx, %xmm1
    pextrw    $1, %xmm1, %edi
    pinsrw    $1, %esi, %xmm1
    pinsrw    $3, %ecx, %xmm1
    movl    %edi, %ecx
    shll    $8, %ecx
    pinsrw    $5, %ecx, %xmm1
    pinsrw    $7, %edi, %xmm1
    psrad    $24, %xmm1
    paddd    %xmm3, %xmm0
    paddd    %xmm2, %xmm1
    addq    $8, %rax
    cmpq    %rax, %r14
    jne    .LBB0_3
    movq    %r8, %rdx
    movq    %r14, %r9
    jmp    .LBB0_5
.LBB0_1:
    pxor    %xmm1, %xmm1
.LBB0_5:                                # %middle.block
    paddd    %xmm1, %xmm0
    movdqa    %xmm0, %xmm1
    movhlps    %xmm1, %xmm1            # xmm1 = xmm1[1,1]
    paddd    %xmm0, %xmm1
    pshufd    $1, %xmm1, %xmm0        # xmm0 = xmm1[1,0,0,0]
    paddd    %xmm1, %xmm0
    movd    %xmm0, %eax
    cmpq    %r9, %r10
    je    .LBB0_7
.LBB0_6:                                # %scalar.ph
    movsbl    (%rdx), %ecx
    addl    %ecx, %eax
    incq    %rdx
    cmpq    %r11, %rdx
    jb    .LBB0_6
.LBB0_7:                                # %._crit_edge
    popq    %rbx
    popq    %r14
    ret

without USE_CHAR, with USE_ITER, -O3 gives

main:                                   # @main
    movq    %rsi, -8(%rsp)
    leaq    32(%rsp), %rcx
    leaq    -4(%rsp), %rdx
    xorl    %eax, %eax
    jmp    .LBB0_1
.LBB0_2:                                # %._crit_edge4
    movl    (%rdx), %esi
    addq    $4, %rdx
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
    movl    %eax, %edi
    movl    %esi, %eax
    addl    %edi, %eax
    cmpq    %rcx, %rdx
    jb    .LBB0_2
    ret

without USE_CHAR, without USE_ITER, -O3 gives

no result on gcc.godbolt.org

btw: i always receive 3 equal post from you








Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]