Created attachment 44475 [details] .ii file generated from t4.cpp The following program produces an incorrect result on GCC 8.1 and 8.2. It succeeds on 7.2. AFAICT, the problem is that the comparison happens before the object has been copied. The compiler could compare using the temporary stored in rdx, but it instead compares using the value at [rsp+8], which isn't yet written to. This happens when using -march=x86-64, but does not happen on -march=core2 or newer. In these cases, memcpy is used to do the copy, and the comparison happens after the call to memcpy. Moreover, GCC misses an opportunity to optimize away the copy altogether. Clang 6.0.0 does optimize away the copy. The misplaced comparison happens at -O2 or higher; at -O1, the comparison is done after the copy. #include <cstdint> #include <cassert> #include <time.h> struct ID { uint64_t value; }; uint64_t value(ID id) { return id.value; } uint64_t gen { 1000 }; struct Msg { uint64_t time; ID id; }; struct V { V() { } V(Msg const & msg) : msg(msg) { } Msg & get() { return msg; } Msg msg; char pad[237 - sizeof(Msg)]; }; struct T : V { using V::V; }; Msg init_msg() { Msg msg; timespec t; clock_gettime(CLOCK_REALTIME, &t); msg.time = t.tv_sec + t.tv_nsec; msg.id.value = ++gen; return msg; } int main() { T t; t = init_msg(); assert(value(t.get().id) == 1001); } $ g++-8.1 -std=c++14 -O2 -Wall -Werror -march=x86-64 -save-temps -v t4.cpp Using built-in specs. COLLECT_GCC=/usr/local/bin/g++-8.1 COLLECT_LTO_WRAPPER=/opt/gcc/8.1/libexec/gcc/x86_64-linux-gnu/8.1.0/lto-wrapper Target: x86_64-linux-gnu Configured with: /home/pbrannan/git/theme_infra/packaging/gcc-8.1.0/configure --prefix=/opt/gcc/8.1 --enable-languages=c,c++ --with-pkgversion='Thesys GCC 8.1.0 for Ubuntu 16.04' --enable-shared --enable-gnu-unique-object --enable-threads=posix --enable-checking=release --disable-vtable-verify --enable-lto --with-ab i=m64 --enable-multiarch --disable-multilib --with-build-config=bootstrap-lto --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu Thread model: posix gcc version 8.1.0 (Thesys GCC 8.1.0 for Ubuntu 16.04) COLLECT_GCC_OPTIONS='-std=c++14' '-O2' '-Wall' '-Werror' '-march=x86-64' '-save-temps' '-v' '-shared-libgcc' /opt/gcc/8.1/libexec/gcc/x86_64-linux-gnu/8.1.0/cc1plus -E -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE t4.cpp -march=x86-64 -std=c++14 -Wall -Werror -O2 -fpch-preprocess -o t4.ii ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu" ignoring nonexistent directory "/opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/../../../../x86_64-linux-gnu/include" #include "..." search starts here: #include <...> search starts here: /opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/../../../../include/c++/8.1.0 /opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/../../../../include/c++/8.1.0/x86_64-linux-gnu /opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/../../../../include/c++/8.1.0/backward /opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/include /usr/local/include /opt/gcc/8.1/include /opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/include-fixed /usr/include/x86_64-linux-gnu /usr/include End of search list. COLLECT_GCC_OPTIONS='-std=c++14' '-O2' '-Wall' '-Werror' '-march=x86-64' '-save-temps' '-v' '-shared-libgcc' /opt/gcc/8.1/libexec/gcc/x86_64-linux-gnu/8.1.0/cc1plus -fpreprocessed t4.ii -quiet -dumpbase t4.cpp -march=x86-64 -auxbase t4 -O2 -Wall -Werror -std=c++14 -version -o t4.s GNU C++14 (Thesys GCC 8.1.0 for Ubuntu 16.04) version 8.1.0 (x86_64-linux-gnu) compiled by GNU C version 8.1.0, GMP version 6.1.0, MPFR version 3.1.4, MPC version 1.0.3, isl version isl-0.16.1-GMP GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 GNU C++14 (Thesys GCC 8.1.0 for Ubuntu 16.04) version 8.1.0 (x86_64-linux-gnu) compiled by GNU C version 8.1.0, GMP version 6.1.0, MPFR version 3.1.4, MPC version 1.0.3, isl version isl-0.16.1-GMP GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 Compiler executable checksum: 6724e7d270573c4346dc08c31ad9ce91 COLLECT_GCC_OPTIONS='-std=c++14' '-O2' '-Wall' '-Werror' '-march=x86-64' '-save-temps' '-v' '-shared-libgcc' as -v --64 -o t4.o t4.s GNU assembler version 2.26.1 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.26.1 COMPILER_PATH=/opt/gcc/8.1/libexec/gcc/x86_64-linux-gnu/8.1.0/:/opt/gcc/8.1/libexec/gcc/x86_64-linux-gnu/8.1.0/:/opt/gcc/8.1/libexec/gcc/x86_64-linux-gnu/:/opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/:/opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/ LIBRARY_PATH=/opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/:/opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/../../../../lib64/:/lib/x86_64-linux-gnu/:/lib/../lib64/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib64/:/opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/../../../:/lib/:/usr/lib/ COLLECT_GCC_OPTIONS='-std=c++14' '-O2' '-Wall' '-Werror' '-march=x86-64' '-save-temps' '-v' '-shared-libgcc' /opt/gcc/8.1/libexec/gcc/x86_64-linux-gnu/8.1.0/collect2 -plugin /opt/gcc/8.1/libexec/gcc/x86_64-linux-gnu/8.1.0/liblto_plugin.so -plugin-opt=/opt/gcc/8.1/libexec/gcc/x86_64-linux-gnu/8.1.0/lto-wrapper -plugin-opt=-fresolution=t4.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pas s-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --eh-frame-hdr -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 /usr/lib/x86_64-linux-gnu/crt1.o /usr/lib/x86_64-linux-gnu/crti.o /opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/crtbegin.o -L/opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1 .0 -L/opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/../../../../lib64 -L/lib/x86_64-linux-gnu -L/lib/../lib64 -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib64 -L/opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/../../.. t4.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /opt/gcc/8.1/lib/gcc/x86_64-linux-gnu/8.1.0/crtend.o /usr /lib/x86_64-linux-gnu/crtn.o COLLECT_GCC_OPTIONS='-std=c++14' '-O2' '-Wall' '-Werror' '-march=x86-64' '-save-temps' '-v' '-shared-libgcc'
gimple is the same for -O vs -O2, so the issue is either RTL or target.
This is caused by r255510.
RTL expansion shows different alignment of some stack vars, -O1 -fstrict-aliasing is also broken. Assembler difference -O1 vs. -O2 is main: @@ -45,24 +48,24 @@ subq $488, %rsp .cfi_def_cfa_offset 496 call _Z8init_msgv - movq %rax, (%rsp) - movq %rdx, 8(%rsp) - leaq 240(%rsp), %rdi - movq %rsp, %rsi + movq %rsp, %rdi movl $29, %ecx + leaq 240(%rsp), %rsi + movq %rax, 240(%rsp) + cmpq $1001, 8(%rsp) + movq %rdx, 248(%rsp) rep movsq movl (%rsi), %eax movl %eax, (%rdi) movzbl 4(%rsi), %eax movb %al, 4(%rdi) - cmpq $1001, 248(%rsp) - jne .L7 - movl $0, %eax + jne .L8 + xorl %eax, %eax addq $488, %rsp .cfi_remember_state .cfi_def_cfa_offset 8 ret -.L7: +.L8: .cfi_restore_state see how the compare is moved very far away from the branch possibly across flag clobbering insns (rep movsq?). Target sounds good.
(In reply to Richard Biener from comment #3) > see how the compare is moved very far away from the branch possibly across > flag clobbering insns (rep movsq?). Target sounds good. No, rep movsq doesn't clobber flags [1]. [1] http://www.felixcloutier.com/x86/MOVS:MOVSB:MOVSW:MOVSD:MOVSQ.html
(In reply to Richard Biener from comment #3) > see how the compare is moved very far away from the branch possibly across > flag clobbering insns (rep movsq?). Target sounds good. Scheduler (sched2) pass is moving the compare: (insn 21 17 22 2 (set (reg:CCZ 17 flags) (compare:CCZ (mem/c:DI (plus:DI (reg/f:DI 7 sp) (const_int 8 [0x8])) [1 MEM[(struct Msg &)&t].id+0 S8 A64]) (const_int 1001 [0x3e9]))) "t4.cpp":32 12 {*cmpdi_1} (nil)) in front of rep movsd: (insn 11 10 12 2 (parallel [ (set (reg:DI 2 cx [92]) (const_int 0 [0])) (set (reg/f:DI 5 di [90]) (plus:DI (ashift:DI (reg:DI 2 cx [92]) (const_int 3 [0x3])) (reg/f:DI 5 di [90]))) (set (reg/f:DI 4 si [91]) (plus:DI (ashift:DI (reg:DI 2 cx [92]) (const_int 3 [0x3])) (reg/f:DI 4 si [91]))) (set (mem/c:BLK (reg/f:DI 5 di [90]) [7 MEM[(struct T *)&t]+0 S232 A128]) (mem/c:BLK (reg/f:DI 4 si [91]) [7 MEM[(struct T *)&D.2891]+0 S232 A128])) (use (reg:DI 2 cx [92])) ]) "t4.cpp":31 993 {*rep_movdi_rex64} (expr_list:REG_UNUSED (reg:DI 2 cx [92]) (nil))) Scheduler doesn't notice that rep movsd target overlaps compare operand. Looks like rtl-optimization (scheduler) to me.
(In reply to Uroš Bizjak from comment #5) > (In reply to Richard Biener from comment #3) > > see how the compare is moved very far away from the branch possibly across > > flag clobbering insns (rep movsq?). Target sounds good. > > Scheduler (sched2) pass is moving the compare: > > (insn 21 17 22 2 (set (reg:CCZ 17 flags) > (compare:CCZ (mem/c:DI (plus:DI (reg/f:DI 7 sp) > (const_int 8 [0x8])) [1 MEM[(struct Msg &)&t].id+0 S8 > A64]) > (const_int 1001 [0x3e9]))) "t4.cpp":32 12 {*cmpdi_1} > (nil)) > > in front of rep movsd: > > (insn 11 10 12 2 (parallel [ > (set (reg:DI 2 cx [92]) > (const_int 0 [0])) > (set (reg/f:DI 5 di [90]) > (plus:DI (ashift:DI (reg:DI 2 cx [92]) > (const_int 3 [0x3])) > (reg/f:DI 5 di [90]))) > (set (reg/f:DI 4 si [91]) > (plus:DI (ashift:DI (reg:DI 2 cx [92]) > (const_int 3 [0x3])) > (reg/f:DI 4 si [91]))) > (set (mem/c:BLK (reg/f:DI 5 di [90]) [7 MEM[(struct T *)&t]+0 > S232 A128]) > (mem/c:BLK (reg/f:DI 4 si [91]) [7 MEM[(struct T > *)&D.2891]+0 S232 A128])) > (use (reg:DI 2 cx [92])) > ]) "t4.cpp":31 993 {*rep_movdi_rex64} > (expr_list:REG_UNUSED (reg:DI 2 cx [92]) > (nil))) > > Scheduler doesn't notice that rep movsd target overlaps compare operand. > > Looks like rtl-optimization (scheduler) to me. I notice that -O -fstrict-aliasing -fschedule-insns2 fails and -O2 -fno-strict-aliasing succeeds. The above shows the block move uses alias-set 7 and the compare alias-set 1. Alias set 1 is a subset of alias set 7 but not the other way around - this suggests that sched-deps is querying the oracle in a wrong way. Huh, no. Breakpoint 2, true_dependence_1 (mem=0x7ffff6a5ec00, mem_mode=E_QImode, mem_addr=0x3106258, x=0x7ffff6a5edb0, x_addr=0x7ffff6a61780, mem_canonicalized=true) at /space/rguenther/src/svn/gcc-8-branch/gcc/alias.c:2905 2905 gcc_checking_assert (mem_canonicalized ? (mem_addr != NULL_RTX) (mem/c:QI (reg/f:DI 90) [7 MEM[(struct T *)&t]+236 S1 A32]) $27 = void (mem/c:DI (plus:DI (reg/f:DI 20 frame) (const_int -232 [0xffffffffffffff18])) [1 MEM[(struct Msg &)&t].id+0 S8 A64]) $28 = void (gdb) fin Run till exit from #0 true_dependence_1 (mem=0x7ffff6a5ec00, mem_mode=E_QImode, mem_addr=0x3106258, x=0x7ffff6a5edb0, x_addr=0x7ffff6a61780, mem_canonicalized=true) at /space/rguenther/src/svn/gcc-8-branch/gcc/alias.c:2905 canon_true_dependence (mem=0x7ffff6a5ec00, mem_mode=E_QImode, mem_addr=0x3106258, x=0x7ffff6a5edb0, x_addr=0x7ffff6a61780) at /space/rguenther/src/svn/gcc-8-branch/gcc/alias.c:2998 2998 } Value returned is $29 = 0 so the oracle says the load cannot alias the store. We enter with (gdb) p debug_rtx (mem_addr) (value:DI 10:17131 @0x3106258/0x30e6310) (gdb) p debug_rtx (x_addr) (plus:DI (reg/f:DI 20 frame) (const_int -232 [0xffffffffffffff18])) and find_base_term (x_addr) returns (address:DI -3) (same as mem_base) Hmm. alias_sets_conflict_p returns false?! Ah, alias_set_subset_of returns true because of has_zero_child. alias-set 7 has no children, it looks like struct T has no members but it does have a member of type V. So we do record_component_aliases arriving at that field and get_alias_set on this field returns zero due to TYPE_TYPELESS_STORAGE (which is new in GCC 8 which explains why there may be a regression). So we record the ->has_zero_child relationship (instead of previously recording a distinct alias-set). Now - some earlier change - improvement to alias_sets_conflict_p in fact, made us not consider ->has_zero_child there basically to avoid making struct X { int i; }; and struct Y { float f; char x[4]; } conflict. But this is exactly what we are trying to allow with TYPE_TYPELESS_STORAGE :/ So TYPE_TYPELESS_STORAGE doesn't work as desired and introduces wrong-code.
Also fails with GCC 7 (not with -O2 but with -O -fstrict-aliasing -fschedule-insns2), same reason.
So the question is why T doesn't get TYPE_TYPELESS_STORAGE set even though we propagate that flag from children upwards in place_field. That's what is missing here and why things go wrong. Ah, so we go build_base_field (rli=0x2ea1780, binfo=0x7ffff6a2f120, offsets=0x2ea2280, next_field=0x7ffff6a285c8) at /space/rguenther/src/svn/gcc-8-branch/gcc/cp/class.c:4244 4244 tree t = rli->t; (gdb) p basetype->type_common.typeless_storage $30 = 1 OK 4262 decl = build_base_field_1 (t, basetype, next_field); (gdb) p decl->typed.type->type_common.typeless_storage $31 = 0 looks like the as-base type doesn't inherit this flag. If I fix that up here where it matters rather than tracking down the gazillion places the C++ FE seems to set CLASSTYPE_AS_BASE and where I'm unsure at that time layout is finished the bug is fixed: Index: gcc/cp/class.c =================================================================== --- gcc/cp/class.c (revision 263209) +++ gcc/cp/class.c (working copy) @@ -4202,6 +4202,8 @@ build_base_field_1 (tree t, tree basetyp { /* Create the FIELD_DECL. */ gcc_assert (CLASSTYPE_AS_BASE (basetype)); + TYPE_TYPELESS_STORAGE (CLASSTYPE_AS_BASE (basetype)) + = TYPE_TYPELESS_STORAGE (basetype); tree decl = build_decl (input_location, FIELD_DECL, NULL_TREE, CLASSTYPE_AS_BASE (basetype)); DECL_ARTIFICIAL (decl) = 1; C++ folks?
The following seems to work, will test. Index: gcc/cp/class.c =================================================================== --- gcc/cp/class.c (revision 263209) +++ gcc/cp/class.c (working copy) @@ -6243,6 +6243,7 @@ layout_class_type (tree t, tree *virtual bitsize_int (BITS_PER_UNIT))); SET_TYPE_ALIGN (base_t, rli->record_align); TYPE_USER_ALIGN (base_t) = TYPE_USER_ALIGN (t); + TYPE_TYPELESS_STORAGE (base_t) = TYPE_TYPELESS_STORAGE (t); /* Copy the non-static data members of T. This will include its direct non-virtual bases & vtable. */
Author: rguenth Date: Thu Aug 2 14:25:57 2018 New Revision: 263261 URL: https://gcc.gnu.org/viewcvs?rev=263261&root=gcc&view=rev Log: 2018-08-02 Richard Biener <rguenther@suse.de> PR c++/86763 * class.c (layout_class_type): Copy TYPE_TYPELESS_STORAGE to the CLASSTYPE_AS_BASE. * g++.dg/torture/pr86763.C: New testcase. Added: trunk/gcc/testsuite/g++.dg/torture/pr86763.C Modified: trunk/gcc/ChangeLog trunk/gcc/cp/class.c trunk/gcc/testsuite/ChangeLog
Fixed on trunk sofar.
(In reply to Richard Biener from comment #9) > The following seems to work, will test. > > Index: gcc/cp/class.c > =================================================================== > --- gcc/cp/class.c (revision 263209) > +++ gcc/cp/class.c (working copy) > @@ -6243,6 +6243,7 @@ layout_class_type (tree t, tree *virtual > bitsize_int (BITS_PER_UNIT))); > SET_TYPE_ALIGN (base_t, rli->record_align); > TYPE_USER_ALIGN (base_t) = TYPE_USER_ALIGN (t); > + TYPE_TYPELESS_STORAGE (base_t) = TYPE_TYPELESS_STORAGE (t); > > /* Copy the non-static data members of T. This will include its > direct non-virtual bases & vtable. */ This patch fixes the failure in my original (non-reduced test case).
Author: rguenth Date: Fri Aug 17 11:51:48 2018 New Revision: 263617 URL: https://gcc.gnu.org/viewcvs?rev=263617&root=gcc&view=rev Log: 2018-08-17 Richard Biener <rguenther@suse.de> Backport from mainline 2018-08-02 Richard Biener <rguenther@suse.de> PR c++/86763 * class.c (layout_class_type): Copy TYPE_TYPELESS_STORAGE to the CLASSTYPE_AS_BASE. * g++.dg/torture/pr86763.C: New testcase. Added: branches/gcc-8-branch/gcc/testsuite/g++.dg/torture/pr86763.C Modified: branches/gcc-8-branch/gcc/cp/ChangeLog branches/gcc-8-branch/gcc/cp/class.c branches/gcc-8-branch/gcc/testsuite/ChangeLog
Author: rguenth Date: Fri Aug 17 14:17:10 2018 New Revision: 263621 URL: https://gcc.gnu.org/viewcvs?rev=263621&root=gcc&view=rev Log: 2018-08-17 Richard Biener <rguenther@suse.de> Backport from mainline 2018-08-02 Richard Biener <rguenther@suse.de> PR c++/86763 * class.c (layout_class_type): Copy TYPE_TYPELESS_STORAGE to the CLASSTYPE_AS_BASE. * g++.dg/torture/pr86763.C: New testcase. Added: branches/gcc-7-branch/gcc/testsuite/g++.dg/torture/pr86763.C Modified: branches/gcc-7-branch/gcc/cp/ChangeLog branches/gcc-7-branch/gcc/cp/class.c branches/gcc-7-branch/gcc/testsuite/ChangeLog
Fixed.