Bug 39093 - inline code optimized wrong with -O3
inline code optimized wrong with -O3
Status: RESOLVED DUPLICATE of bug 21920
Product: gcc
Classification: Unclassified
Component: c
4.3.2
: P3 major
: ---
Assigned To: Not yet assigned to anyone
:
Depends on:
Blocks:
  Show dependency treegraph
 
Reported: 2009-02-04 03:49 UTC by Ricardo Telichevesky
Modified: 2009-02-04 09:33 UTC (History)
29 users (show)

See Also:
Host:
Target: x86_64-suse-linux
Build: gcc version 4.3.2 [gcc-4_3-branch revision 141291] (SUSE Linux)
Known to work:
Known to fail:
Last reconfirmed:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Ricardo Telichevesky 2009-02-04 03:49:57 UTC
The problem happens when I try to optimize this procedure:
------------------------------------------------
inline void*
bcAtomCompareExchange(void **destination,
                      void  *exchange,
                      void  *compare)
{
    void* old = *destination;
    if (old == compare)
        *destination = exchange;

    return old;
}
-------------------------------------------------
when old == compare *destination changes, but the optimizer keeps using the old value. Here is the source code (has just stdio.h), and yes, I know the names are "Atom" are irrelevant here, it is not thread-safe, but that's not the issue, there are no threads here, straight C code
------------- test.c ---------------------
#include <stdio.h>

inline void*
bcAtomCompareExchange(void **destination,
                      void  *exchange,
                      void  *compare)
{
    void* old = *destination;
    if (old == compare)
        *destination = exchange;

    return old;
}


inline void*
bcAtomExchange(void **target,
               void  *exchange)
{
    void* old = *target;
    *target = exchange;
    return old;
}


int
main()
{

    const char* first = "uno";
    const char* second = "duo";
    const char* third = "tre";
    char *px = (char*) first, *py = (char*) second, *pz = (char*) third;
    fprintf(stdout, "px= %s  py= %s pz= %s\n", px, py, pz);

    pz = px; 
    char* pw = (char*) bcAtomCompareExchange((void**) &px, py, pz);
    fprintf(stdout, "pz= px; pw= bcAtomCompareExchange(&px, py, pz); px= %s  py= %s pz= %s pw= %s\n", 
            px, py, pz, pw);
    
    py = (char*) third; py = (char*) bcAtomExchange((void**) &px, py);
    fprintf(stdout, "py= tre; py= bcAtomExchange(&px, py); px= %s  py= %s pz= %s pw= %s\n", 
            px, py, pz, pw);
    
    return 0;

}
--------------------end of test.c ----------------------
without optimization the output is correct:
px= uno  py= duo pz= tre
pz= px; pw= bcAtomCompareExchange(&px, py, pz); px= duo  py= duo pz= uno pw= uno
py= tre; py= bcAtomExchange(&px, py); px= tre  py= duo pz= uno pw= uno

but with optimization (-O3) the output is WRONG!
px= uno  py= duo pz= tre
pz= px; pw= bcAtomCompareExchange(&px, py, pz); px= uno  py= duo pz= uno pw= uno
py= tre; py= bcAtomExchange(&px, py); px= duo  py= duo pz= uno pw= uno

to help, I looked at the generated object code, and spotted the source of the probelm, as explained in the following:
----------------- relevand parts of the disassemble using objdump ----
Contents of section .rodata:
 400818   ....uno.duo.tre.      # uno is 40081c  (initial px)
 400828   px= %s  py= %s p      # duo is 400820  (initial py)
 400838   z= %s...pz= px;       # tre is 400824  (initial pz)
 400848   pw= bcAtomCompar      # msg1 is 400828
 400858   eExchange(&px, p      # msg2 is 400840
 400868   y, pz); px= %s        # msg3 is 400890
 400878   py= %s pz= %s pw
 400888   = %s....py= tre;      
 400898    py= bcAtomExcha
 4008a8   nge(&px, py); px
 4008b8   = %s  py= %s pz=
 4008c8    %s pw= %s......

0000000000400660 <main>:
main():
  400660:	mov    %rbx,-0x18(%rsp)
  400665:	mov    %rbp,-0x10(%rsp)
  40066a:	xor    %eax,%eax
  40066c:	mov    %r12,-0x8(%rsp)
  400671:	sub    $0x28,%rsp
  400675:	mov    0x2009b4(%rip),%rdi    # 601030 <stdout@@GLIBC_2.2.5>
  40067c:	mov    $0x400824,%r8d         # %r8 gets pz (5th arg ABI)
  400682:	mov    $0x400820,%ecx         # %rcx gets py (4th arg ABI)
  400687:	mov    $0x40081c,%edx         # %rdx gets px (3rd arg ABI)
  40068c:	mov    $0x400828,%esi         # %esi gets msg1 (2nd arg ABI)
  400691:	movq   $0x40081c,0x8(%rsp)    # 0x8(%rsp) is &px 
  400698:	
  40069a:	lea    0x8(%rsp),%r12
  40069f:	callq  400560 <fprintf@plt>   # print first message fine
  4006a4:	mov    0x8(%rsp),%rbx         # %rbx gets px (old = *dest)
  4006a9:	cmp    %rbx,%rbx              # inline the comparison old == comp
  4006ac:	mov    %rbx,%rbp              # inline return pw = old
  4006af:	je     400718 <main+0xb8>     # jump executed, back next line
  4006b1:	mov    0x200978(%rip),%rdi    # 601030 <stdout@@GLIBC_2.2.5>
  4006b8:	mov    %rbp,%r9               # %r9 gets pw (6th arg ABI)
  4006bb:	mov    %rbx,%r8               # %r8 gets pz (5th arg ABI)
  4006be:	mov    %rbx,%rdx              # << ERROR >> (3rd arg ABI)
                mov    0x8(%rsp),%rdx  <<<<<<<< should be that with updated px
                                       <<<<<<<< but instead it ignored the fact
                                       <<<<<<<< that *destination changed !! 
  4006c1:	mov    $0x400820,%ecx         # %rcx untouched py (4th arg ABI)
  4006c6:	mov    $0x400840,%esi         # %esi gets msg2 (2nd arg ABI)
  4006cb:	xor    %eax,%eax                  
  4006cd:	callq  400560 <fprintf@plt>   # message prints WRONG!!!!!
  4006d2:	mov    (%r12),%rcx
  4006d6:	mov    0x8(%rsp),%rdx
  4006db:	mov    %rbp,%r9
  4006de:	mov    0x20094b(%rip),%rdi    # 601030 <stdout@@GLIBC_2.2.5>
  4006e5:	mov    %rbx,%r8
  4006e8:	mov    $0x400890,%esi
  4006ed:	xor    %eax,%eax
  4006ef:	movq   $0x400824,(%r12)
  4006f6:	
  4006f7:	callq  400560 <fprintf@plt>
  4006fc:	xor    %eax,%eax
  4006fe:	mov    0x10(%rsp),%rbx
  400703:	mov    0x18(%rsp),%rbp
  400708:	mov    0x20(%rsp),%r12
  40070d:	add    $0x28,%rsp
  400711:	retq   
  400712:	nopw   0x0(%rax,%rax,1)
  400718:	movq   $0x400820,0x8(%rsp)       # px reference is correct but not used
  40071f:	
  400721:	jmp    4006b1 <main+0x51>
  400723:	nop    
------------------------------------------------------------------------------
I think I have a workaround, instead of using void* as generic pointer, using a typedef like
typedef char * addr_t  like in the old days when void did not exist, but not sure under what conditions the compiler will generate wrong code.
Thanks,

Ricardo
Comment 1 Andreas Schwab 2009-02-04 09:33:57 UTC
>     char* pw = (char*) bcAtomCompareExchange((void**) &px, py, pz);

This is violating the C aliasing rules.  Declare px as void* to fix that.


*** This bug has been marked as a duplicate of 21920 ***