Bug 111012 - [14 Regression] Dead Code Elimination Regression at -O3 since r14-573-g69f1a8af45d
Summary: [14 Regression] Dead Code Elimination Regression at -O3 since r14-573-g69f1a8...
Status: RESOLVED FIXED
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: 14.0
: P3 normal
Target Milestone: 14.0
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks:
 
Reported: 2023-08-14 12:17 UTC by Scherrer Svenja
Modified: 2024-01-10 14:36 UTC (History)
4 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2023-08-14 00:00:00


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Scherrer Svenja 2023-08-14 12:17:50 UTC
static int b, c;
static char d;
static short e = -1L;
static int *j = &c;
void foo(void);
void bar150_(void);
void bar173_(void);
static char(a)(char k, char l) { return k + l; }
static void g(unsigned k, int l) {
  if (l)
    if (!k)
      foo();
  if (k)
    bar150_();
}
static const unsigned char h();
static char i(int k) {
  if (k)
    bar173_();
  c <= 0 >= b;
  if (k)
    return c;
  return c;
}
static void f(char k, unsigned) {
  char m = h(8 != c);
  g(m && 8, k);
}
static const unsigned char h(int k) {
  d = i(c);
  *j = a(e, d < k < k && c) ^ k;
  b = 0;
  return c;
}
int main() { f(c, b); }

gcc-9ec5d6de735 (trunk) -O3 cannot eliminate the call to foo but gcc-releases/gcc-13.1.0 -O3 can.
-----------------------------------------------------------------------
gcc-9ec5d6de7355c15b3811150d1581dab5bd489966 -O3 case.c -S -o case.s
--------- OUTPUT ---------
main:
.LFB5:
	.cfi_startproc
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	pushq	%rbx
	.cfi_def_cfa_offset 24
	.cfi_offset 3, -24
	subq	$8, %rsp
	.cfi_def_cfa_offset 32
	movl	c(%rip), %ebx
	cmpl	$8, %ebx
	setne	%bpl
	testl	%ebx, %ebx
	jne	.L9
	movl	$-2, c(%rip)
	xorl	%eax, %eax
	movl	%eax, b(%rip)
.L4:
	call	bar150_
.L6:
	addq	$8, %rsp
	.cfi_remember_state
	.cfi_def_cfa_offset 24
	xorl	%eax, %eax
	popq	%rbx
	.cfi_def_cfa_offset 16
	popq	%rbp
	.cfi_def_cfa_offset 8
	ret
	.p2align 4,,10
	.p2align 3
.L9:
	.cfi_restore_state
	call	bar173_
	movl	c(%rip), %edx
	movzbl	%bpl, %ebp
	movl	$0, b(%rip)
	movsbl	%dl, %eax
	cmpl	%eax, %ebp
	setg	%al
	movzbl	%al, %eax
	cmpl	%eax, %ebp
	setg	%al
	testl	%edx, %edx
	setne	%dl
	andl	%edx, %eax
	subl	$1, %eax
	movsbl	%al, %eax
	xorl	%ebp, %eax
	movl	%eax, c(%rip)
	testb	%bl, %bl
	je	.L3
	testb	%al, %al
	jne	.L4
	call	foo
	jmp	.L6
.L3:
	testb	%al, %al
	je	.L6
	jmp	.L4
---------- END OUTPUT ---------

-----------------------------------------------------------------------
gcc-2b98cc24d6af0432a74f6dad1c722ce21c1f7458 -O3 case.c -S -o case.s
--------- OUTPUT ---------
main:
.LFB5:
	.cfi_startproc
	movl	c(%rip), %eax
	pushq	%rbx
	.cfi_def_cfa_offset 16
	.cfi_offset 3, -16
	xorl	%ebx, %ebx
	cmpl	$8, %eax
	setne	%bl
	testl	%eax, %eax
	jne	.L10
.L2:
	cmpl	%eax, %ebx
	setg	%al
	movzbl	%al, %eax
	cmpl	%eax, %ebx
	jg	.L3
	notl	%ebx
.L4:
	movl	%ebx, c(%rip)
	movl	$0, b(%rip)
	call	bar150_
	xorl	%eax, %eax
	popq	%rbx
	.cfi_remember_state
	.cfi_def_cfa_offset 8
	ret
.L3:
	.cfi_restore_state
	cmpl	$1, c(%rip)
	sbbb	%bl, %bl
	xorl	$1, %ebx
	movsbl	%bl, %ebx
	jmp	.L4
.L10:
	call	bar173_
	movsbl	c(%rip), %eax
	jmp	.L2
---------- END OUTPUT ---------

-----------------------------------------------------------------------
Bisects to r14-573-g69f1a8af45d
Comment 1 Andrew Pinski 2023-08-14 15:43:51 UTC
So the difference here is basically doing the following manually:
```
#if 0
static char i(int k) {
  if (k)
    bar173_();
  c <= 0 >= b;
  if (k)
    return c;
  return c;
}
#else
static char i(int k) {
  if (k)
    bar173_();
  c <= 0 >= b;
  int t;
  if (k)
    t = c;
  else
    t = c;
  return t;
}
#endif
```
and we used to do some jump threading previously such that the load of the c would be in the `if (k)` branch after the call to `bar173_()`. The only pass which moves it like that is pre which happens maybe later but too late for jump threading to happen for the optimizations.

I have not looked into why this makes a difference either. maybe because there is another jump threading where we know c is 0 ...

Confirmed.
Comment 2 Andrew Pinski 2023-11-24 04:35:33 UTC
Looks like this is fixed on the trunk ...
Comment 3 Martin Jambor 2024-01-10 14:36:49 UTC
This has been fixed with Richi's r14-3982-g9ea74d235c7e78 ( better DCE after forwprop).  Given the title of the patch I guess it's safe to declare this fixed.