(My first time filing a gcc bug, apologies if I screw it up) I have a loop with a high multiply and some arithmetic. gcc generates code that computes the wrong value as the result, under -O3 on x86-64. It computes the correct value under -O2. The key difference appears to be that the loop is vectorized under -O3. To reproduce: compile the following as 'gcc -O3 test.c' When run, it will abort. Now compile as 'gcc -O2 test.c' and it will not abort. #include <stdlib.h> #include <stdint.h> #define ITERS 8 __attribute__((noinline)) uint32_t sum(const uint32_t *vals) { uint32_t result = 0; int i; for (i=0; i < ITERS; i++) { uint32_t val = vals[i]; uint32_t q = (val * 613566757LLU) >> 32; uint32_t t = ((val + q) >> 1); result += t; } return result; } int main(void) { uint32_t vals[ITERS] = {}; vals[1] = 7; if (sum(vals) != 4) abort(); return 0; } > gcc -v Using built-in specs. COLLECT_GCC=gcc COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.8/lto-wrapper Target: x86_64-linux-gnu Configured with: ../src/configure -v --with-pkgversion='Ubuntu 4.8.2-19ubuntu1' --with-bugurl=file:///usr/share/doc/gcc-4.8/README.Bugs --enable-languages=c,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-4.8 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.8 --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --enable-gnu-unique-object --disable-libmudflap --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-4.8-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-4.8-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-4.8-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu Thread model: posix gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)
The tree level looks fine at least to me: vect_val_8.15_127 = MEM[(const uint32_t *)vectp_vals.14_85]; vect_patt_47.16_128 = WIDEN_MULT_EVEN_EXPR <vect_val_8.15_127, { 613566757, 613566757, 613566757, 613566757 }>; vect_patt_47.16_129 = WIDEN_MULT_ODD_EXPR <vect_val_8.15_127, { 613566757, 613566757, 613566757, 613566757 }>; vect__11.18_130 = vect_patt_47.16_128 >> 32; vect__11.18_131 = vect_patt_47.16_129 >> 32; vect_q_12.19_132 = VEC_PACK_TRUNC_EXPR <vect__11.18_130, vect__11.18_131>; vect__13.20_133 = vect_q_12.19_132 + vect_val_8.15_127; vect_t_14.21_134 = vect__13.20_133 >> 1; But the assembly code looks broken: leaq (%rdi,%r8,4), %r8 movdqa .LC0(%rip), %xmm1 cmpl $2, %r10d movdqa (%r8), %xmm2 movdqa %xmm2, %xmm0 movdqa %xmm2, %xmm3 pmuludq %xmm1, %xmm0 psrlq $32, %xmm0 psrlq $32, %xmm3 <---- where did this come from? pmuludq %xmm1, %xmm3 psrlq $32, %xmm3 shufps $136, %xmm3, %xmm0 paddd %xmm2, %xmm0 psrld $1, %xmm0
The problem looks related to WIDEN_MULT_EVEN_EXPR/WIDEN_MULT_ODD_EXPR . If we add an assignment from t to an array, we get WIDEN_MULT_LO_EXPR/WIDEN_MULT_HI_EXPR which works. So in the end this is a target issue since this is correct at tree level and it looks like it is just an expansion of that is causing the issue.
Started with r188959, fixed by r209138.
__attribute__ ((noinline)) unsigned int sum (const unsigned int *x) { unsigned int r = 0; int i; for (i = 0; i < 8; i++) { unsigned int v = x[i]; unsigned int q = (v * 613566757ULL) >> 32; unsigned int t = ((v + q) >> 1); r += t; } return r; } int main (void) { unsigned int x[8] = { 0, 7, 0, 0, 0, 0, 0, 0 }; if (sum (x) != 4) __builtin_abort (); return 0; } So, I guess we want to backport PR60656 fix to 4.8 branch (both r209138 and r209363), and add this new testcase to 4.8/4.9/trunk.
GCC 4.8.3 is being released, adjusting target milestone.
GCC 4.8.4 has been released.
Duplicate (I'm currently backporting the fix for the duplicate). *** This bug has been marked as a duplicate of bug 60656 ***