Bug 109513 - Missed Dead Code Elimination when using __builtin_unreachable
Summary: Missed Dead Code Elimination when using __builtin_unreachable
Status: NEW
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: 13.0
: P3 normal
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks:
 
Reported: 2023-04-14 13:15 UTC by Theodoros Theodoridis
Modified: 2023-04-17 07:31 UTC (History)
3 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2023-04-17 00:00:00


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Theodoros Theodoridis 2023-04-14 13:15:42 UTC
cat input.c

void foo(void);
void bar(void);

static char a, h;
static int b, c, d, k;
static int **e;
static int ***f = &e;
static short g;
static int *j, *l = &c;

static char m(int *i) {
  if (*i)
    return 0;
  for (; c;)
    ;
  if (i == &b)
    bar(); 
  return 1;
}
int main() {
  int *n = &b;
  for (; k; ++k)
    m(n);
  h = a + 10;
  if (h) {
    int *o = &d;
    for (; g; ++g) {
      *e = l;
      j = **f;
      o = j;
    }
    if (!(o == &c || o == &d))
      foo();
  }
}

In the above piece of code the calls to foo and bar are both dead

gcc-trunk at -O3 generates the following code:
main:
        subq    $8, %rsp
        movl    k(%rip), %ecx
        testl   %ecx, %ecx
        je      .L7
.L2:
        movl    c(%rip), %edx
        testl   %edx, %edx
        je      .L5
.L6:
        jmp     .L6
.L5:
        call    bar
        addl    $1, k(%rip)
        jne     .L2
.L7:
        cmpw    $0, g(%rip)
        je      .L4
        movq    e(%rip), %rax
        movq    $c, (%rax)
        xorl    %eax, %eax
        movw    %ax, g(%rip)
.L4:
        xorl    %eax, %eax
        addq    $8, %rsp
        ret

it eliminates the call to foo but not to bar. If I try to "help" the compiler by replacing bar() with __builtin_unreachable(), it generates worse code and it also misses eliminating the dead call to foo:

void foo(void);

static char a, h;
static int b, c, d, k;
static int **e;
static int ***f = &e;
static short g;
static int *j, *l = &c;

static char m(int *i) {
  if (*i)
    return 0;
  for (; c;)
    ;
  if (i == &b)
    __builtin_unreachable(); // <- the call to bar was here
  return 1;
}

int main() {
  int *n = &b;
  for (; k; ++k)
    m(n);
  h = a + 10;
  if (h) {
    int *o = &d;
    for (; g; ++g) {
      *e = l;
      j = **f;
      o = j;
    }
    if (!(o == &c || o == &d))
      foo();
  }
}

gcc-trunk -O3 output:

main:
        movl    k(%rip), %ecx
        testl   %ecx, %ecx
        je      .L22
.L2:
        jmp     .L2
.L22:
        movq    e(%rip), %rsi
        movzwl  g(%rip), %eax
        xorl    %ecx, %ecx
        movl    $d, %edx
        jmp     .L3
.L4:
        movq    $c, (%rsi)
        addl    $1, %eax
        movl    $1, %ecx
        movl    $c, %edx
.L3:
        testw   %ax, %ax
        jne     .L4
        testb   %cl, %cl
        je      .L5
        movw    $0, g(%rip)
.L5:
        cmpq    $d, %rdx
        je      .L18
        cmpq    $c, %rdx
        je      .L18
        pushq   %rax
        call    foo
        xorl    %eax, %eax
        popq    %rdx
        ret
.L18:
        xorl    %eax, %eax
        ret

This is also an old regression. gcc-7.4 at -O3 generates for the version with __builtin_unreachable:

main:
        movl    k(%rip), %eax
        testl   %eax, %eax
        je      .L7
.L2:
        jmp     .L2
.L7:
        cmpw    $0, g(%rip)
        je      .L4
        movq    e(%rip), %rax
        movw    $0, g(%rip)
        movq    $c, (%rax)
.L4:
        xorl    %eax, %eax
        ret
Comment 1 Martin Liška 2023-04-17 07:11:35 UTC
The __builtin_unreachable missed optimization started with r8-4670-gb00ff621f6eea4.
Comment 2 Andrew Pinski 2023-04-17 07:20:26 UTC
This is interesting because the C++ front-end causes slightly different IR and with __builtin_unreachable produces decent code at -O3 still.
Comment 3 Andrew Pinski 2023-04-17 07:31:15 UTC
(In reply to Andrew Pinski from comment #2)
> This is interesting because the C++ front-end causes slightly different IR
> and with __builtin_unreachable produces decent code at -O3 still.

The difference is due to -ffinite-loops .