Here is a short program for gcc 7.1.1 gives different output with "-O1 -fpeephole2 -m64" and "-O1 -m64". int main() { unsigned long long _discard = 0, zero = 0, maxull = 0; unsigned char zero1 = __builtin_ia32_addcarryx_u64(0, 0, 0, &_discard); unsigned char zero2 = __builtin_ia32_addcarryx_u64(zero1, 0, 0, &zero); __builtin_ia32_sbb_u64(0x0, 2, -1, &_discard); unsigned char one = __builtin_ia32_sbb_u64(0, zero, 1, &maxull); unsigned long long x = __builtin_ia32_sbb_u64(one, zero2, 0, &_discard); unsigned long long z1 = 0; __asm__ ("movq %1, %0;" :"+r"(z1) :"r"(x)); unsigned long long z2 = 3; __asm__ ("movq %1, %0;" :"+r"(z2) :"r"(x)); return 1-(z1 | z2); } Without -fpeephole2, the exit code is 0. With -fpeephole2, the exit code is 1. I think this program should be deterministic, so I am tentatively attributing the difference to a flaw in the peephole2 optimizations. Disassembling the compiled code indeed shows that one of the SBB intrinsics has been dropped... of course this by itself isn't evidence of anything going wrong as the whole program could in principle be constant-propagated away, but what is going on looks off to me. Annotated side-by-side diff of relevant disassembly: http://web.mit.edu/~andreser/Public/O1-fpeephole2.diff.html The same disassembly for email-users' convenience. O1: 0000000000000000 <main>: 0: bf 00 00 00 00 mov $0x0,%edi 5: b8 00 00 00 00 mov $0x0,%eax a: ba 00 00 00 00 mov $0x0,%edx f: 80 c2 ff add $0xff,%dl 12: 48 89 c1 mov %rax,%rcx 15: 48 11 c1 adc %rax,%rcx 18: 0f 92 c2 setb %dl ; dl = 0 1b: be 01 00 00 00 mov $0x1,%esi 20: 40 80 c7 ff add $0xff,%dil 24: 48 19 f1 sbb %rsi,%rcx ; rcx-rsi = 0 - 1 = 0xff...ff, CF = 1 27: 0f 92 c1 setb %cl ; cl = 1 2a: 0f b6 d2 movzbl %dl,%edx 2d: 80 c1 ff add $0xff,%cl ; cl = 0; CF = 1 30: 48 19 c2 sbb %rax,%rdx ; rdx = -1; CF = 1 33: 0f 92 c1 setb %cl 36: 0f b6 c9 movzbl %cl,%ecx 39: 48 89 c8 mov %rcx,%rax 3c: ba 03 00 00 00 mov $0x3,%edx 41: 48 89 ca mov %rcx,%rdx 44: 09 d0 or %edx,%eax 46: ba 01 00 00 00 mov $0x1,%edx 4b: 29 c2 sub %eax,%edx 4d: 89 d0 mov %edx,%eax 4f: c3 retq With -fpeephole2: 0000000000000000 <main>: 0: 31 c0 xor %eax,%eax 2: 31 d2 xor %edx,%edx 4: 80 c2 ff add $0xff,%dl 7: 48 89 c1 mov %rax,%rcx a: 48 11 c1 adc %rax,%rcx d: 0f 92 c2 setb %dl ; dl = 0 ; cl = 0 10: 0f b6 d2 movzbl %dl,%edx 13: 31 c9 xor %ecx,%ecx ; cl = 0; CF = 0 15: 48 19 c2 sbb %rax,%rdx ; rdx = 0; CF = 0 18: 0f 92 c1 setb %cl 1b: 48 89 c8 mov %rcx,%rax 1e: ba 03 00 00 00 mov $0x3,%edx 23: 48 89 ca mov %rcx,%rdx 26: 09 d0 or %edx,%eax 27: ba 01 00 00 00 mov $0x1,%edx 2d: 29 c2 sub %eax,%edx 2f: 89 d0 mov %edx,%eax 31: c3 retq
Confirmed. A couple of peephole2 patterns require additional check that FLAGS_REG is dead at the first converted pattern. Untested patch: --cut here-- diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 40a20d0..da0f7c2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -11754,7 +11754,8 @@ (zero_extend (match_dup 1)))] "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) - && ! reg_overlap_mentioned_p (operands[3], operands[0])" + && ! reg_overlap_mentioned_p (operands[3], operands[0]) + && peep2_regno_dead_p (0, FLAGS_REG)" [(set (match_dup 4) (match_dup 0)) (set (strict_low_part (match_dup 5)) (match_dup 2))] @@ -11775,7 +11776,8 @@ "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) && ! reg_overlap_mentioned_p (operands[3], operands[0]) - && ! reg_set_p (operands[3], operands[4])" + && ! reg_set_p (operands[3], operands[4]) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 5) (match_dup 0)) (match_dup 4)]) (set (strict_low_part (match_dup 6)) @@ -11797,7 +11799,8 @@ (and:SI (match_dup 3) (const_int 255))) (clobber (reg:CC FLAGS_REG))])] "REGNO (operands[1]) == REGNO (operands[3]) - && ! reg_overlap_mentioned_p (operands[3], operands[0])" + && ! reg_overlap_mentioned_p (operands[3], operands[0]) + && peep2_regno_dead_p (0, FLAGS_REG)" [(set (match_dup 4) (match_dup 0)) (set (strict_low_part (match_dup 5)) (match_dup 2))] @@ -11819,7 +11822,8 @@ "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) && ! reg_overlap_mentioned_p (operands[3], operands[0]) - && ! reg_set_p (operands[3], operands[4])" + && ! reg_set_p (operands[3], operands[4]) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 5) (match_dup 0)) (match_dup 4)]) (set (strict_low_part (match_dup 6)) --cut here--
Author: uros Date: Tue Jul 4 20:52:32 2017 New Revision: 249977 URL: https://gcc.gnu.org/viewcvs?rev=249977&root=gcc&view=rev Log: 2017-07-04 Uros Bizjak <ubizjak@gmail.com> PR target/81300 * config/i386/i386.md (setcc + movzbl/and to xor + setcc peepholes): Require dead FLAGS_REG at the beginning of a peephole. testsuite/ChangeLog: PR target/81300 * gcc.target/i386/pr81300.c: New test. Added: trunk/gcc/testsuite/gcc.target/i386/pr81300.c Modified: trunk/gcc/ChangeLog trunk/gcc/config/i386/i386.md trunk/gcc/testsuite/ChangeLog
Author: uros Date: Tue Jul 4 21:05:17 2017 New Revision: 249978 URL: https://gcc.gnu.org/viewcvs?rev=249978&root=gcc&view=rev Log: PR target/81300 * config/i386/i386.md (setcc + movzbl/and to xor + setcc peepholes): Require dead FLAGS_REG at the beginning of a peephole. PR target/81294 * config/i386/adxintrin.h (_subborrow_u32): Swap _X and _Y arguments in the call to __builtin_ia32_sbb_u32. (_subborrow_u64): Swap _X and _Y arguments in the call to __builtin_ia32_sbb_u64. testsuite/ChangeLog: PR target/81300 * gcc.target/i386/pr81300.c: New test. PR target/81294 * gcc.target/i386/adx-addcarryx32-2.c (adx_test): Swap x and y arguments in the call to _subborrow_u32. * gcc.target/i386/adx-addcarryx64-2.c (adx_test): Swap x and y arguments in the call to _subborrow_u64. * gcc.target/i386/pr81294-1.c: New test. * gcc.target/i386/pr81294-2.c: Ditto. Added: branches/gcc-7-branch/gcc/testsuite/gcc.target/i386/pr81294-1.c branches/gcc-7-branch/gcc/testsuite/gcc.target/i386/pr81294-2.c branches/gcc-7-branch/gcc/testsuite/gcc.target/i386/pr81300.c Modified: branches/gcc-7-branch/gcc/ChangeLog branches/gcc-7-branch/gcc/config/i386/adxintrin.h branches/gcc-7-branch/gcc/config/i386/i386.md branches/gcc-7-branch/gcc/testsuite/ChangeLog branches/gcc-7-branch/gcc/testsuite/gcc.target/i386/adx-addcarryx32-2.c branches/gcc-7-branch/gcc/testsuite/gcc.target/i386/adx-addcarryx64-2.c
Author: uros Date: Fri Jul 14 05:30:58 2017 New Revision: 250196 URL: https://gcc.gnu.org/viewcvs?rev=250196&root=gcc&view=rev Log: Backport from mainline 2017-07-10 Uros Bizjak <ubizjak@gmail.com> PR target/81375 * config/i386/i386.md (divsf3): Add TARGET_SSE to TARGET_SSE_MATH. (rcpps): Ditto. (*rsqrtsf2_sse): Ditto. (rsqrtsf2): Ditto. (div<mode>3): Macroize insn from divdf3 and divsf3 using MODEF mode iterator. Backport from mainline 2017-07-04 Uros Bizjak <ubizjak@gmail.com> PR target/81300 * config/i386/i386.md (setcc + movzbl/and to xor + setcc peepholes): Require dead FLAGS_REG at the beginning of a peephole. testsuite/ChangeLog: Backport from mainline 2017-07-10 Uros Bizjak <ubizjak@gmail.com> PR target/81375 * gcc.target/i386/pr81375.c: New test. Backport from mainline 2017-07-04 Uros Bizjak <ubizjak@gmail.com> PR target/81300 * gcc.target/i386/pr81300.c: New test. Added: branches/gcc-6-branch/gcc/testsuite/gcc.target/i386/pr81300.c branches/gcc-6-branch/gcc/testsuite/gcc.target/i386/pr81375.c Modified: branches/gcc-6-branch/gcc/ChangeLog branches/gcc-6-branch/gcc/config/i386/i386.md branches/gcc-6-branch/gcc/testsuite/ChangeLog
Author: uros Date: Fri Jul 14 17:19:30 2017 New Revision: 250211 URL: https://gcc.gnu.org/viewcvs?rev=250211&root=gcc&view=rev Log: Backport from mainline 2017-07-10 Uros Bizjak <ubizjak@gmail.com> PR target/81375 * config/i386/i386.md (divsf3): Add TARGET_SSE to TARGET_SSE_MATH. (rcpps): Ditto. (*rsqrtsf2_sse): Ditto. (rsqrtsf2): Ditto. (div<mode>3): Macroize insn from divdf3 and divsf3 using MODEF mode iterator. Backport from mainline 2017-07-04 Uros Bizjak <ubizjak@gmail.com> PR target/81300 * config/i386/i386.md (setcc + movzbl/and to xor + setcc peepholes): Require dead FLAGS_REG at the beginning of a peephole. testsuite/ChangeLog: Backport from mainline 2017-07-10 Uros Bizjak <ubizjak@gmail.com> PR target/81375 * gcc.target/i386/pr81375.c: New test. Backport from mainline 2017-07-04 Uros Bizjak <ubizjak@gmail.com> PR target/81300 * gcc.target/i386/pr81300.c: New test. Added: branches/gcc-5-branch/gcc/testsuite/gcc.target/i386/pr81300.c branches/gcc-5-branch/gcc/testsuite/gcc.target/i386/pr81375.c Modified: branches/gcc-5-branch/gcc/ChangeLog branches/gcc-5-branch/gcc/config/i386/i386.md branches/gcc-5-branch/gcc/testsuite/ChangeLog
Fixed.
*** Bug 87139 has been marked as a duplicate of this bug. ***