% gcc-tk -v Using built-in specs. COLLECT_GCC=gcc-tk COLLECT_LTO_WRAPPER=/zdata/shaoli/compilers/ccbuilder-compilers/gcc-9f0d4adabe2035886a1aa8d2ca990a90de000613/libexec/gcc/x86_64-pc-linux-gnu/13.0.0/lto-wrapper Target: x86_64-pc-linux-gnu Configured with: ../configure --disable-multilib --disable-bootstrap --enable-languages=c,c++ --prefix=/zdata/shaoli/compilers/ccbuilder-compilers/gcc-9f0d4adabe2035886a1aa8d2ca990a90de000613 Thread model: posix Supported LTO compression algorithms: zlib gcc version 13.0.0 20221013 (experimental) (GCC) % % gcc-tk -w -O0 a.c && ./a.out 2 % gcc-tk -w -O2 a.c && ./a.out 0 % % cat a.c l; m; static signed char n(); s() { n(); } signed char n(u) { for (; u <= 2; u++) { l = 2; for (; l ; l--) { m = 2; for (; m; m--) if (u) break; } } } main() { s(2); printf("%d\n", m); } % Compiler explorer: https://godbolt.org/z/z5Mhxaz9E
A few notes about reduction: please don't over-reduce the testcase (by removing of the variable and function types). Moreover, if possible, please use C-Vise that does not rename identifiers by default. Anyway, thanks for the test-case!
... > static signed char n(); > s() { n(); } This seems fishy as you are calling 'n' w/o an argument. > signed char n(u) { > for (; u <= 2; u++) { ...
Indeed, the bogus expectation is eventually that s(2) gets the argument register set up for the call to n(), but obviously that's misguided.
Hi, Sorry for my previous over-reduced test case. Here is a well-formed reduction: ``` void printf(); int a, b, c; int d[3]; int e(int f, int g, int k, int l, int m, int n) { int h = 4 * f + 2 * (g + l + n) + k + m, j = h >> 4; return j; } void o(int f) { int i = 0; for (; i < 3; i++) d[i] = 4279432140; for (; - 72 + f - -72 <= 2; f++) { a = 2; for (; a >= 0; a--) { b = 2; for (; b >= 0; b--) { int p = !(d[b] > 0 != e(10, 29, 42, 74, 89, -68) + f - 15); if (p) break; } if (f) break; } } } void q() { int i = 0; o(0); if (c) for (; i < 1;) for (; i < 1; i++) ; } int main() { q(); printf("%d\n", b); } ``` Compiler explorer: https://godbolt.org/z/bjjW5q17r
Thanks, a bit modified: cat pr107257.c int a, b, c; int d[3]; int e(int f, int g, int k, int l, int m, int n) { int h = 4 * f + 2 * (g + l + n) + k + m, j = h >> 4; return j; } void o(int f) { int i = 0; for (; i < 3; i++) d[i] = 4279432140; for (; - 72 + f - -72 <= 2; f++) { a = 2; for (; a >= 0; a--) { b = 2; for (; b >= 0; b--) { int p = !(d[b] > 0 != e(10, 29, 42, 74, 89, -68) + f - 15); if (p) break; } if (f) break; } } } void q() { int i = 0; o(0); if (c) for (; i < 1;) for (; i < 1; i++) ; } int main() { q(); __builtin_printf("%d\n", b); if (b != -1) __builtin_abort (); } Started with r13-857-gf1652e3343b1ec47.
With -fno-tree-slp-vectorize the failure goes away. diff in optimized: __attribute__((noipa, noinline, noclone, no_icf)) void q () { - <bb 2> [local count: 1073742492]: - MEM <unsigned long> [(int *)&d] = 18380021091030725580; + int b_lsm.15; + int f; + _Bool _5; + + <bb 2> [local count: 1073741824]: + MEM <vector(2) int> [(int *)&d] = { -15535156, -15535156 }; d[2] = -15535156; + + <bb 3> [local count: 8687551919]: + # f_30 = PHI <f_25(5), 0(2)> + if (f_30 == 0) + goto <bb 5>; [1.43%] + else + goto <bb 4>; [98.57%] + + <bb 4> [local count: 4698759900]: + if (f_30 != 2) + goto <bb 5>; [79.66%] + else + goto <bb 6>; [20.34%] + + <bb 5> [local count: 7731921808]: + # f_25 = PHI <2(4), 1(3)> + goto <bb 3>; [100.00%] + + <bb 6> [local count: 1073742492]: a = 2; - b = -1; + _5 = f_30 == 0; + b_lsm.15_33 = _5 ? 2 : -1; + b = b_lsm.15_33; return; } both variants look OK so instead RTL expansion looks fishy(?) ;; b = b_lsm.15_33; (insn 28 26 29 (parallel [ (set (reg:CCC 17 flags) (ne:CCC (reg/v:SI 83 [ f ]) (const_int 0 [0]))) (set (reg:SI 89) (neg:SI (reg/v:SI 83 [ f ]))) ]) -1 (nil)) (insn 29 28 30 (parallel [ (set (reg:SI 90) (neg:SI (ltu:SI (reg:CCC 17 flags) (const_int 0 [0])))) (clobber (reg:CC 17 flags)) ]) -1 (nil)) (insn 30 29 31 (parallel [ (set (reg:SI 90) (ior:SI (reg:SI 90) (const_int 2 [0x2]))) (clobber (reg:CC 17 flags)) ]) -1 (nil)) (insn 31 30 32 (set (reg:SI 88 [ b_lsm.15 ]) (reg:SI 90)) -1 (nil)) (insn 32 31 0 (set (mem/c:SI (symbol_ref:DI ("b") [flags 0x2] <var_decl 0x7ffff6526d80 b>) [1 b+0 S4 A32]) (reg:SI 88 [ b_lsm.15 ])) -1 (nil)) and we eventually optimize this to q: .LFB2: .cfi_startproc movq .LC0(%rip), %rax movl $-15535156, d+8(%rip) movl $2, a(%rip) movq %rax, d(%rip) movl $2, b(%rip) ret
(set (reg:CCC 17 flags) (ne:CCC (reg/v:SI 83 [ f ]) (const_int 0 [0]))) Yes this is broken ... Which means this is a dup of bug 107172. *** This bug has been marked as a duplicate of bug 107172 ***
This bug is not a dup of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107172, which has been fixed but the trunk is still faulty on this test case.
Sorry, this is indeed a dup. *** This bug has been marked as a duplicate of bug 107172 ***