GCC does not optimize out functions without side effects with asm statements inside loop even if return velue is ignored
Andrew Pinski
pinskia@gmail.com
Sat Oct 7 19:04:00 GMT 2017
On Sat, Oct 7, 2017 at 8:39 AM, Saldyrkine, Mikhail
<Mikhail.Saldyrkine@gs.com> wrote:
> g++ (GCC) 6.3.1 20170216 (Red Hat 6.3.1-3)
>
> In the below case compile_test_asm_inside_loop invokes test_asm_inside_loop and ignores results.
> The call into test_asm_inside_loop is expected to be eliminated since return value is not used and there is no side effect
> The call elimination works fine without asm and without loop
> It does not work with asm inside loop
Because the loop could be an infinite loop and GCC does not know how
many times the inline-asm is going to be called and if there are other
side effects.
Let's look at the function:
inline uint64_t test_asm_inside_loop(uint64_t idx) {
while(true)
{
uint64_t result;
asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) );
if( result > 128 )
return result;
++idx;
}
}
The loop is only broken out of when result is > 128. result from the
inline-asm is used as the breakout from the loop.
Thanks,
Andrew
>
> TEST CODE
>
> #include <iostream>
> #include <assert.h>
>
> using namespace std;
> constexpr static size_t capacity = 1024;
> uint64_t objects[capacity];
>
> // THE FUNCTION IS ELIMINATED BY COMPILER IF OUTPUT IS NOT USED
> inline uint64_t test_noloop(uint64_t idx) {
> uint64_t result;
> asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) );
> if( result > 128 )
> return result;
> return 0;
> }
>
> // THE FUNCTION IS ELIMINATED BY COMPILER IF OUTPUT IS NOT USED
> inline uint64_t test_noasm(uint64_t idx) {
> while(true)
> {
> if( objects[idx] > 128 )
> return objects[idx];
> ++idx;
> }
> }
>
> // THE FUNCTION IS KEEPT EVEN WHEN IF RESULT IS NOT USED - ASM INSIDE LOOP CAUSING THE ISSUE
> inline uint64_t test_asm_inside_loop(uint64_t idx) {
> while(true)
> {
> uint64_t result;
> asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) );
> if( result > 128 )
> return result;
> ++idx;
> }
> }
>
> void init() {
> srand(time(nullptr));
> for( size_t i = 0; i < capacity - 1; ++i )
> objects[i] = random() % 256;
> objects[capacity-1] = 255;
> }
>
> // TETS THAT test_noasm AND test_asm_inside_loop PRODUCE SAME RESULT
> void sanity_test() {
> for( size_t i = 0; i < capacity; ++i ) {
> assert( test_noasm(i) == test_asm_inside_loop(i));
> }
> }
>
> void compile_test_noasm() {
> test_noasm(0);
> }
>
> void compile_test_noloop() {
> test_noloop(0);
> }
>
> void compile_test_asm_inside_loop() {
> test_asm_inside_loop(0);
> }
>
> int main( int argc, char* argv[] ) {
> init();
> sanity_test();
> compile_test_noasm();
> compile_test_noloop();
> compile_test_asm_inside_loop();
> }
>
> COMPILATION AND DISASSEMBLER RESULTS:
>
> /opt/rh/devtoolset-6//root/bin/g++ -O3 -funroll-loops loop_optimization.cpp; gdb -batch -ex "file a.out" -ex "disas compile_test_noasm" -ex "disas compile_test_noloop" -ex "disas compile_test_asm_inside_loop"
> Dump of assembler code for function _Z18compile_test_noasmv:
> 0x0000000000400970 <+0>: repz retq
> End of assembler dump.
> Dump of assembler code for function _Z19compile_test_noloopv:
> 0x0000000000400980 <+0>: repz retq
> End of assembler dump.
> Dump of assembler code for function _Z28compile_test_asm_inside_loopv:
> 0x0000000000400990 <+0>: xor %edx,%edx
> 0x0000000000400992 <+2>: mov $0x601080,%ecx
> 0x0000000000400997 <+7>: xor %eax,%eax
> 0x0000000000400999 <+9>: mov (%rcx,%rdx,8),%rsi
> 0x000000000040099d <+13>: cmp $0x80,%rsi
> 0x00000000004009a4 <+20>: ja 0x4009c1 <_Z28compile_test_asm_inside_loopv+49>
> 0x00000000004009a6 <+22>: nopw %cs:0x0(%rax,%rax,1)
> 0x00000000004009b0 <+32>: add $0x1,%rax
> 0x00000000004009b4 <+36>: mov (%rcx,%rax,8),%rdi
> 0x00000000004009b8 <+40>: cmp $0x80,%rdi
> 0x00000000004009bf <+47>: jbe 0x4009b0 <_Z28compile_test_asm_inside_loopv+32>
> 0x00000000004009c1 <+49>: repz retq
> End of assembler dump.
>
>
More information about the Gcc-bugs
mailing list