cat input.c void foo(void); void bar(void); static char a, h; static int b, c, d, k; static int **e; static int ***f = &e; static short g; static int *j, *l = &c; static char m(int *i) { if (*i) return 0; for (; c;) ; if (i == &b) bar(); return 1; } int main() { int *n = &b; for (; k; ++k) m(n); h = a + 10; if (h) { int *o = &d; for (; g; ++g) { *e = l; j = **f; o = j; } if (!(o == &c || o == &d)) foo(); } } In the above piece of code the calls to foo and bar are both dead gcc-trunk at -O3 generates the following code: main: subq $8, %rsp movl k(%rip), %ecx testl %ecx, %ecx je .L7 .L2: movl c(%rip), %edx testl %edx, %edx je .L5 .L6: jmp .L6 .L5: call bar addl $1, k(%rip) jne .L2 .L7: cmpw $0, g(%rip) je .L4 movq e(%rip), %rax movq $c, (%rax) xorl %eax, %eax movw %ax, g(%rip) .L4: xorl %eax, %eax addq $8, %rsp ret it eliminates the call to foo but not to bar. If I try to "help" the compiler by replacing bar() with __builtin_unreachable(), it generates worse code and it also misses eliminating the dead call to foo: void foo(void); static char a, h; static int b, c, d, k; static int **e; static int ***f = &e; static short g; static int *j, *l = &c; static char m(int *i) { if (*i) return 0; for (; c;) ; if (i == &b) __builtin_unreachable(); // <- the call to bar was here return 1; } int main() { int *n = &b; for (; k; ++k) m(n); h = a + 10; if (h) { int *o = &d; for (; g; ++g) { *e = l; j = **f; o = j; } if (!(o == &c || o == &d)) foo(); } } gcc-trunk -O3 output: main: movl k(%rip), %ecx testl %ecx, %ecx je .L22 .L2: jmp .L2 .L22: movq e(%rip), %rsi movzwl g(%rip), %eax xorl %ecx, %ecx movl $d, %edx jmp .L3 .L4: movq $c, (%rsi) addl $1, %eax movl $1, %ecx movl $c, %edx .L3: testw %ax, %ax jne .L4 testb %cl, %cl je .L5 movw $0, g(%rip) .L5: cmpq $d, %rdx je .L18 cmpq $c, %rdx je .L18 pushq %rax call foo xorl %eax, %eax popq %rdx ret .L18: xorl %eax, %eax ret This is also an old regression. gcc-7.4 at -O3 generates for the version with __builtin_unreachable: main: movl k(%rip), %eax testl %eax, %eax je .L7 .L2: jmp .L2 .L7: cmpw $0, g(%rip) je .L4 movq e(%rip), %rax movw $0, g(%rip) movq $c, (%rax) .L4: xorl %eax, %eax ret
The __builtin_unreachable missed optimization started with r8-4670-gb00ff621f6eea4.
This is interesting because the C++ front-end causes slightly different IR and with __builtin_unreachable produces decent code at -O3 still.
(In reply to Andrew Pinski from comment #2) > This is interesting because the C++ front-end causes slightly different IR > and with __builtin_unreachable produces decent code at -O3 still. The difference is due to -ffinite-loops .