Bug 108419 - [13/14/15/14 Regression] Dead Code Elimination Regression at -O2 since r13-440-g98e475a8f58
Summary: [13/14/15/14 Regression] Dead Code Elimination Regression at -O2 since r13-44...
Status: NEW
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: 13.0
: P2 normal
Target Milestone: 14.2
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks: VRP
  Show dependency treegraph
 
Reported: 2023-01-16 15:18 UTC by Yann Girsberger
Modified: 2024-05-07 07:40 UTC (History)
5 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2023-02-21 00:00:00


Attachments
case as file (252 bytes, text/plain)
2023-01-16 15:18 UTC, Yann Girsberger
Details

Note You need to log in before you can comment on or make changes to this bug.
Description Yann Girsberger 2023-01-16 15:18:44 UTC
Created attachment 54281 [details]
case as file

cat case.c #1565
static int b = 6, c;
long d;
short h;
short i;
short j;
char k;
void foo();
short(a)(short l, short m) { return l + m; }
short f();
short g(unsigned short, int, char, long);
static signed char e() {
  unsigned n = -10;
  for (; n >= 14; n = a(n, 8)) {
    i = g(b, 0, c, b);
    j = f(i, b, d < j, 5, 7, 9, 5);
    k = 200 + n;
    h = k % 5;
    if (h)
      ;
    else
      foo();
  }
  return n;
}
int main() {
  b || e();
  b = 1;
}

`gcc-f99d7d669eaa2830eb5878df4da67e77ec791522 (trunk) -O2` can not eliminate `foo` but `gcc-releases/gcc-12.2.0 -O2` can.

`gcc-f99d7d669eaa2830eb5878df4da67e77ec791522 (trunk) -O2 -S -o /dev/stdout case.c`
--------- OUTPUT ---------
main:
.LFB2:
	.cfi_startproc
	movl	b(%rip), %esi
	testl	%esi, %esi
	je	.L12
	movl	$1, b(%rip)
	xorl	%eax, %eax
	ret
.L12:
	pushq	%r12
	.cfi_def_cfa_offset 16
	.cfi_offset 12, -16
	movl	$-10, %r12d
	pushq	%rbp
	.cfi_def_cfa_offset 24
	.cfi_offset 6, -24
	movl	$2, %ebp
	pushq	%rbx
	.cfi_def_cfa_offset 32
	.cfi_offset 3, -32
	movl	$5, %ebx
.L6:
	movslq	b(%rip), %rcx
	xorl	%edx, %edx
	xorl	%esi, %esi
	movzwl	%cx, %edi
	call	g
	movl	b(%rip), %esi
	movl	$5, %ecx
	movswq	j(%rip), %rdx
	movw	%ax, i(%rip)
	cmpq	d(%rip), %rdx
	movswl	%ax, %edi
	movl	$9, %r9d
	pushq	%rax
	.cfi_def_cfa_offset 40
	setg	%dl
	movl	$7, %r8d
	xorl	%eax, %eax
	pushq	$5
	.cfi_def_cfa_offset 48
	movzbl	%dl, %edx
	call	f
	movw	%ax, j(%rip)
	leal	-56(%r12), %eax
	movb	%al, k(%rip)
	cbtw
	idivb	%bl
	movl	%eax, %edx
	sarw	$8, %dx
	shrw	$8, %ax
	movw	%dx, h(%rip)
	popq	%rdx
	.cfi_def_cfa_offset 40
	popq	%rcx
	.cfi_def_cfa_offset 32
	je	.L13
.L5:
	addl	$8, %r12d
	subl	$1, %ebp
	movswl	%r12w, %r12d
	je	.L14
	movl	$1, %ebp
	jmp	.L6
.L14:
	movl	$1, b(%rip)
	popq	%rbx
	.cfi_remember_state
	.cfi_def_cfa_offset 24
	xorl	%eax, %eax
	popq	%rbp
	.cfi_def_cfa_offset 16
	popq	%r12
	.cfi_def_cfa_offset 8
	ret
.L13:
	.cfi_restore_state
	xorl	%eax, %eax
	call	foo
	jmp	.L5
---------- END OUTPUT ---------


`gcc-releases/gcc-12.2.0 -O2 -S -o /dev/stdout case.c`
--------- OUTPUT ---------
main:
.LFB2:
	.cfi_startproc
	movl	b(%rip), %r9d
	testl	%r9d, %r9d
	je	.L11
	movl	$1, b(%rip)
	xorl	%eax, %eax
	ret
.L11:
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movl	$-66, %ebp
	pushq	%rbx
	.cfi_def_cfa_offset 24
	.cfi_offset 3, -24
	movl	$5, %ebx
	pushq	%r8
	.cfi_def_cfa_offset 32
.L5:
	movslq	b(%rip), %rcx
	xorl	%edx, %edx
	xorl	%esi, %esi
	movzwl	%cx, %edi
	call	g
	movswq	j(%rip), %rdx
	cmpq	d(%rip), %rdx
	movl	$9, %r9d
	pushq	%rcx
	.cfi_def_cfa_offset 40
	setg	%dl
	movl	b(%rip), %esi
	movswl	%ax, %edi
	pushq	$5
	.cfi_def_cfa_offset 48
	movzbl	%dl, %edx
	movl	$7, %r8d
	movl	$5, %ecx
	movw	%ax, i(%rip)
	xorl	%eax, %eax
	call	f
	popq	%rsi
	.cfi_def_cfa_offset 40
	popq	%rdi
	.cfi_def_cfa_offset 32
	movb	%bpl, k(%rip)
	movw	%ax, j(%rip)
	movsbw	%bpl, %ax
	idivb	%bl
	sarw	$8, %ax
	movw	%ax, h(%rip)
	cmpb	$-58, %bpl
	je	.L12
	movl	$-58, %ebp
	jmp	.L5
.L12:
	movl	$1, b(%rip)
	xorl	%eax, %eax
	popq	%rdx
	.cfi_def_cfa_offset 24
	popq	%rbx
	.cfi_def_cfa_offset 16
	popq	%rbp
	.cfi_def_cfa_offset 8
	ret
---------- END OUTPUT ---------


Bisects to: r13-440-g98e475a8f58

commit 98e475a8f58ca3ba6e9bd5c9276efce4236f5d26
Author: Andrew MacLeod <amacleod@redhat.com>
Date:   Fri Mar 18 11:50:33 2022 -0400

    Fix return value in ranger_cache::get_global_range.
    
    The "is_current" status is returned by parameter, but was being returned by the
    function as well instead of true if NAME had a global range, and FALSE
    if it did not.
    
            * gimple-range-cache.cc (ranger_cache::get_global_range): Return the
            had_global value instead.
Comment 1 Richard Biener 2023-02-21 13:09:12 UTC
Re-confirmed but still without analysis.
Comment 2 Andrew Pinski 2023-02-21 22:11:26 UTC
Hmm, the first difference between the trunk and GCC 12.2.0 is inside IV-OPTs. But I don't see why that would make a difference ...
Comment 3 Jakub Jelinek 2023-03-17 17:34:08 UTC
Slightly cleaned up testcase:
static int b = 6, c;
long d;
short h, i, j;
signed char k;
void foo (void);
short baz (int, int, int, int, int, int, int);
short qux (unsigned short, int, char, long);

short
bar (short l, short m)
{
  return l + m;
}

static signed char
corge (void)
{
  unsigned n;
  for (n = -10U; n >= 14; n = bar (n, 8))
    {
      i = qux (b, 0, c, b);
      j = baz (i, b, d < j, 5, 7, 9, 5);
      k = 200 + n;
      h = k % 5;
      if (!h)
	foo ();
    }
  return n;
}

int
main ()
{
  b || corge ();
  b = 1;
}

The loop iterates twice, with n -10U and -2U, in third iteration 6U fails the 6U >= 14 condition.
In GCC 12 as well as in r13-439 and r13-440 the loop IV is
  # ivtmp.30_42 = PHI <ivtmp.30_34(8), 190(7)>
and loop condition is
  ivtmp.30_34 = ivtmp.30_42 + 8;
  if (ivtmp.30_34 != 206)
while trunk has 2 IVs:
  # RANGE [irange] unsigned int [38, 32767][4294934528, +INF] NONZERO 0xfffffffe
  # n_32 = PHI <n_30(8), 4294967286(7)>
  # RANGE [irange] unsigned int [1, +INF]
  # ivtmp_2 = PHI <ivtmp_43(8), 2(7)>
and
  # RANGE [irange] unsigned short [30, +INF] NONZERO 0xfffe
  l.0_27 = (unsigned short) n_32;
  # RANGE [irange] unsigned short [0, 7][38, +INF] NONZERO 0xfffe
  _28 = l.0_27 + 8;
  # RANGE [irange] short int [-INF, 7][38, +INF] NONZERO 0xfffe
  _29 = (short int) _28;
  # RANGE [irange] unsigned int [0, 7][38, 32767][4294934528, +INF] NONZERO 0xfffffffe
  n_30 = (unsigned int) _29;
  ivtmp_43 = ivtmp_2 - 1;
  if (ivtmp_43 != 0)

The r13-440 regression is in vrp2, previously we were able to determine that because ivtmp.30_42 is [190, 190][198, 198]
then (signed char) of that is [-66, -66][-58, -58] and that % 5 is [-3, -3][-1, -1].
 Folding statement: _24 = (signed char) ivtmp.30_42;
-   Loops range found for ivtmp.30_42: unsigned char [190, 198] and calculated range :unsigned char [190, 190][198, 205]
-Global Exported: _24 = signed char [-66, -58] ...  irange was : signed char [-66, -66][-58, -58]
+Global Exported: _24 = signed char [-66, -58]
 Not folded
 Folding statement: k = _24;
 Not folded
 Folding statement: _25 = _24 % 5;
-Global Exported: _25 = signed char [-3, -1] ...  irange was : signed char [-3, -3][-1, -1]
+Global Exported: _25 = signed char [-4, 0]
is the first difference in the vrp2 dump between r13-439 and r13-440.
The other major change is starting with r13-3486-g4c5b1160776382772 when ivopts uses the 2 IVs rather than one
and the convoluted increment by 8.  It is actually only a normal increment by 8 if n_32 is in [0, 32759][-32777U, -1U]
but that is actually the case here.

I think the ranger doesn't iterate, right?  So is there any way that it would figure out the exact range for the IV?
Comment 4 Andrew Macleod 2023-03-17 18:17:09 UTC
Not easily. I have started working on a phi analyzer for ranger to assist with various issues like this. 

It will analyze phi patterns to weed out the ssa-names that are just copies, and then allow the few remaining ssa-names that actually change the values in the phi seqeuence to be looked at more easily.

It will help develop better initial ranges. So cases where the non-phi names are  starting value and an increment or decrement can start with something better than VARYING. This will help with cases like https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107986

It can help identify cases where values will have restricted smallish ranges, such as https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107822.  

Cases like this one with the second IV are trickier as there isnt a relation to dicover between the two IVs...  at leats not easily.  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107639 is a case like that too I think.

When there are 2 IVs I dont suppose there is any way to indicate there is a connection?  then if we can end up counting one, we might be able to do something with the other.
Comment 5 Richard Biener 2023-03-27 11:11:09 UTC
ranger relies on SCEV only while the old VRP pass iterated (very few times) to derive ranges for cases SCEV cannot handle.

I've postponed a related bug to GCC 14, it's not realistic to fix this for GCC 13(.1) unfortunately.
Comment 6 Richard Biener 2024-05-07 07:40:07 UTC
GCC 14.1 is being released, retargeting bugs to GCC 14.2.