This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: GCC does not optimize out functions without side effects with asm statements inside loop even if return velue is ignored


On Sat, Oct 7, 2017 at 8:39 AM, Saldyrkine, Mikhail
<Mikhail.Saldyrkine@gs.com> wrote:
> g++ (GCC) 6.3.1 20170216 (Red Hat 6.3.1-3)
>
> In the below case compile_test_asm_inside_loop invokes test_asm_inside_loop and ignores results.
> The call into test_asm_inside_loop is expected to be eliminated since return value is not used and there is no side effect
> The call elimination works fine without asm and without loop
> It does not work with asm inside loop

Because the loop could be an infinite loop and GCC does not know how
many times the inline-asm is going to be called and if there are other
side effects.

Let's look at the function:
inline uint64_t test_asm_inside_loop(uint64_t idx) {
    while(true)
    {
        uint64_t result;
        asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) );
        if( result > 128 )
            return result;
        ++idx;
    }
}

The loop is only broken out of when result is > 128.  result from the
inline-asm is used as the breakout from the loop.

Thanks,
Andrew

>
> TEST CODE
>
> #include <iostream>
> #include <assert.h>
>
> using namespace std;
> constexpr static size_t capacity = 1024;
> uint64_t objects[capacity];
>
> // THE FUNCTION IS ELIMINATED BY COMPILER IF OUTPUT IS NOT USED
> inline uint64_t test_noloop(uint64_t idx) {
>     uint64_t result;
>     asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) );
>     if( result > 128 )
>         return result;
>     return 0;
> }
>
> // THE FUNCTION IS ELIMINATED BY COMPILER IF OUTPUT IS NOT USED
> inline uint64_t test_noasm(uint64_t idx) {
>     while(true)
>     {
>         if( objects[idx] > 128 )
>             return objects[idx];
>         ++idx;
>     }
> }
>
> // THE FUNCTION IS KEEPT EVEN WHEN IF RESULT IS NOT USED - ASM INSIDE LOOP CAUSING THE ISSUE
> inline uint64_t test_asm_inside_loop(uint64_t idx) {
>     while(true)
>     {
>         uint64_t result;
>         asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) );
>         if( result > 128 )
>             return result;
>         ++idx;
>     }
> }
>
> void init() {
>     srand(time(nullptr));
>     for( size_t i = 0; i < capacity - 1; ++i )
>         objects[i] = random() % 256;
>     objects[capacity-1] = 255;
> }
>
> // TETS THAT test_noasm AND test_asm_inside_loop PRODUCE SAME RESULT
> void sanity_test() {
>     for( size_t i = 0; i < capacity; ++i ) {
>         assert( test_noasm(i) == test_asm_inside_loop(i));
>     }
> }
>
> void compile_test_noasm() {
>     test_noasm(0);
> }
>
> void compile_test_noloop() {
>     test_noloop(0);
> }
>
> void compile_test_asm_inside_loop() {
>     test_asm_inside_loop(0);
> }
>
> int main( int argc, char* argv[] ) {
>     init();
>     sanity_test();
>     compile_test_noasm();
>     compile_test_noloop();
>     compile_test_asm_inside_loop();
> }
>
> COMPILATION AND DISASSEMBLER RESULTS:
>
> /opt/rh/devtoolset-6//root/bin/g++  -O3 -funroll-loops  loop_optimization.cpp; gdb -batch -ex "file a.out" -ex "disas compile_test_noasm" -ex "disas compile_test_noloop" -ex "disas compile_test_asm_inside_loop"
> Dump of assembler code for function _Z18compile_test_noasmv:
>    0x0000000000400970 <+0>:     repz retq
> End of assembler dump.
> Dump of assembler code for function _Z19compile_test_noloopv:
>    0x0000000000400980 <+0>:     repz retq
> End of assembler dump.
> Dump of assembler code for function _Z28compile_test_asm_inside_loopv:
>    0x0000000000400990 <+0>:     xor    %edx,%edx
>    0x0000000000400992 <+2>:     mov    $0x601080,%ecx
>    0x0000000000400997 <+7>:     xor    %eax,%eax
>    0x0000000000400999 <+9>:     mov    (%rcx,%rdx,8),%rsi
>    0x000000000040099d <+13>:    cmp    $0x80,%rsi
>    0x00000000004009a4 <+20>:    ja     0x4009c1 <_Z28compile_test_asm_inside_loopv+49>
>    0x00000000004009a6 <+22>:    nopw   %cs:0x0(%rax,%rax,1)
>    0x00000000004009b0 <+32>:    add    $0x1,%rax
>    0x00000000004009b4 <+36>:    mov    (%rcx,%rax,8),%rdi
>    0x00000000004009b8 <+40>:    cmp    $0x80,%rdi
>    0x00000000004009bf <+47>:    jbe    0x4009b0 <_Z28compile_test_asm_inside_loopv+32>
>    0x00000000004009c1 <+49>:    repz retq
> End of assembler dump.
>
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]