This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug inline-asm/42881] New: SSE2 intrinsics miscompiled at -O0 -march=k8


A simple test program, a.c:

#include <stdio.h>
#include <emmintrin.h>
int main(void) {                                                                
  double a[2];                                                                  
  __m128d x = _mm_set1_pd(3);                                                   
  _mm_storeu_pd(a,x);                                                           
  printf("%f %f\n",a[0],a[1]);                                                  
  return 0;                                                                     
}

$ gcc-4.5 -O0 -march=k8 a.c && ./a.out  # broken
0.000000 0.000000
$ gcc-4.5 -O1 -march=k8 a.c && ./a.out  # good
3.000000 3.000000
$ gcc-4.5 -O0 -march=core2 a.c && ./a.out # good
3.000000 3.000000

$ gcc-4.5 -O0 -march=k8 -c a.c && objdump -d -M intel --prefix-addresses a.o |
grep main
0000000000000000 <main> push   rbp
0000000000000001 <main+0x1> mov    rbp,rsp
0000000000000004 <main+0x4> sub    rsp,0x40
0000000000000008 <main+0x8> mov    rax,0x4008000000000000
0000000000000012 <main+0x12> mov    QWORD PTR [rbp-0x8],rax
0000000000000016 <main+0x16> movsd  xmm2,xmm1
000000000000001a <main+0x1a> unpcklpd xmm2,xmm2
000000000000001e <main+0x1e> movapd xmm0,xmm2
0000000000000022 <main+0x22> movlpd xmm1,QWORD PTR [rbp-0x8]
0000000000000027 <main+0x27> movaps XMMWORD PTR [rbp-0x40],xmm0
000000000000002b <main+0x2b> movapd xmm0,XMMWORD PTR [rbp-0x40]
0000000000000030 <main+0x30> lea    rax,[rbp-0x30]
0000000000000034 <main+0x34> mov    QWORD PTR [rbp-0x10],rax
0000000000000038 <main+0x38> movaps XMMWORD PTR [rbp-0x20],xmm0
000000000000003c <main+0x3c> mov    rax,QWORD PTR [rbp-0x10]
0000000000000040 <main+0x40> movapd xmm0,XMMWORD PTR [rbp-0x20]
0000000000000045 <main+0x45> movupd XMMWORD PTR [rax],xmm0
0000000000000049 <main+0x49> movlpd xmm1,QWORD PTR [rbp-0x28]
000000000000004e <main+0x4e> movlpd xmm0,QWORD PTR [rbp-0x30]
0000000000000053 <main+0x53> mov    eax,0x0
0000000000000058 <main+0x58> mov    rdi,rax
000000000000005b <main+0x5b> mov    eax,0x2
0000000000000060 <main+0x60> call   0000000000000065 <main+0x65>
0000000000000065 <main+0x65> mov    eax,0x0
000000000000006a <main+0x6a> leave  
000000000000006b <main+0x6b> ret

$ gcc-4.5 -O0 -march=core2 -c a.c && objdump -d -M intel --prefix-addresses a.o
| grep main
0000000000000000 <main> push   rbp
0000000000000001 <main+0x1> mov    rbp,rsp
0000000000000004 <main+0x4> sub    rsp,0x40
0000000000000008 <main+0x8> mov    rax,0x4008000000000000
0000000000000012 <main+0x12> mov    QWORD PTR [rbp-0x8],rax
0000000000000016 <main+0x16> movddup xmm0,QWORD PTR [rbp-0x8]
000000000000001b <main+0x1b> movapd XMMWORD PTR [rbp-0x40],xmm0
0000000000000020 <main+0x20> movapd xmm0,XMMWORD PTR [rbp-0x40]
0000000000000025 <main+0x25> lea    rax,[rbp-0x30]
0000000000000029 <main+0x29> mov    QWORD PTR [rbp-0x10],rax
000000000000002d <main+0x2d> movapd XMMWORD PTR [rbp-0x20],xmm0
0000000000000032 <main+0x32> mov    rax,QWORD PTR [rbp-0x10]
0000000000000036 <main+0x36> movapd xmm0,XMMWORD PTR [rbp-0x20]
000000000000003b <main+0x3b> movupd XMMWORD PTR [rax],xmm0
000000000000003f <main+0x3f> mov    rdx,QWORD PTR [rbp-0x28]
0000000000000043 <main+0x43> movsd  xmm0,QWORD PTR [rbp-0x30]
0000000000000048 <main+0x48> mov    eax,0x0
000000000000004d <main+0x4d> movq   xmm1,rdx
0000000000000052 <main+0x52> mov    rdi,rax
0000000000000055 <main+0x55> mov    eax,0x2
000000000000005a <main+0x5a> call   000000000000005f <main+0x5f>
000000000000005f <main+0x5f> mov    eax,0x0
0000000000000064 <main+0x64> leave  
0000000000000065 <main+0x65> ret

The incorrect bit is

0000000000000016 <main+0x16> movsd  xmm2,xmm1
000000000000001a <main+0x1a> unpcklpd xmm2,xmm2
000000000000001e <main+0x1e> movapd xmm0,xmm2
0000000000000022 <main+0x22> movlpd xmm1,QWORD PTR [rbp-0x8]
0000000000000027 <main+0x27> movaps XMMWORD PTR [rbp-0x40],xmm0

which is corrected by -march=core2 to

0000000000000016 <main+0x16> movddup xmm0,QWORD PTR [rbp-0x8]
000000000000001b <main+0x1b> movapd XMMWORD PTR [rbp-0x40],xmm0

Of course all the redundant stores are collapsed at any positive optimization
level, and the result becomes correct regardless of -march.  Unfortunately, the
bug is in the generic x86-64 target so it's highly visible.  This bug is not
present in 4.4.2.

$ gcc-4.5 -v
Using built-in specs.
COLLECT_GCC=gcc-4.5
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-unknown-linux-gnu/4.5.0/lto-wrapper
Target: x86_64-unknown-linux-gnu
Configured with: ../configure --prefix=/usr --enable-languages=c,c++,fortran
--enable-gold --enable-plugin --enable-threads=posix --enable-__cxa_atexit
--enable-clocale=gnu --enable-lto --enable-gnu-unique-object --disable-multilib
--disable-libstdcxx-pch --with-tune=generic --with-system-zlib --with-ppl
--with-cloog --libdir=/usr/lib --libexecdir=/usr/lib --mandir=/usr/share/man
--infodir=/usr/share/info --disable-werror --enable-checking=release
--program-suffix=-4.5 --enable-version-specific-runtime-libs : (reconfigured)
../configure --prefix=/usr --enable-languages=c,c++,fortran --enable-gold
--enable-plugin --enable-threads=posix --enable-__cxa_atexit
--enable-clocale=gnu --enable-lto --enable-gnu-unique-object --disable-multilib
--disable-libstdcxx-pch --with-system-zlib --with-ppl --with-cloog
--libdir=/usr/lib --libexecdir=/usr/lib --mandir=/usr/share/man
--infodir=/usr/share/info --disable-werror --enable-checking=release
--program-suffix=-4.5 --enable-version-specific-runtime-libs
Thread model: posix
gcc version 4.5.0 20100121 (experimental) (GCC)


-- 
           Summary: SSE2 intrinsics miscompiled at -O0 -march=k8
           Product: gcc
           Version: 4.5.0
            Status: UNCONFIRMED
          Severity: major
          Priority: P3
         Component: inline-asm
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: bugs at 59A2 dot org
 GCC build triplet: x86_64-unknown-linux-gnu
  GCC host triplet: x86_64-unknown-linux-gnu
GCC target triplet: x86_64-unknown-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42881


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]