[Bug tree-optimization/49457] New: integer comparison does not vectorize

vincenzo.innocente at cern dot ch gcc-bugzilla@gcc.gnu.org
Fri Jun 17 17:05:00 GMT 2011


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49457

           Summary: integer comparison does not vectorize
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: vincenzo.innocente@cern.ch


float __attribute__ ((aligned(16))) a[1024];
float __attribute__ ((aligned(16))) b[1024];

// does not vectorize
void tVi() {
  for (int i=0; i!=1024; ++i) {
    int j = a[i];
    b[i] = (j==0) ?  a[i] : - a[i];
  }
}

// nicely vectorize...
void tVf() {
  for (int i=0; i!=1024; ++i) {
    int j = a[i];
    float z = j;
    b[i] = (z==0) ?  a[i] : - a[i];
  }
}


c++ -Ofast -c testVectCond.cpp -msse4.2

 otool -V -t testVectCond.o | c++filt
testVectCond.o:
(__TEXT,__text) section
tVi():
0000000000000000    xorl    %eax,%eax
0000000000000002    movss    0x00000096(%rip),%xmm2
000000000000000a    leaq    _a(%rip),%rcx
0000000000000011    nopl    tVi()(%rax)
0000000000000018    nopl    tVi()(%rax,%rax)
0000000000000020    movss    (%rcx,%rax),%xmm0
0000000000000025    cvttss2si    %xmm0,%edx
0000000000000029    movaps    %xmm0,%xmm1
000000000000002c    xorps    %xmm2,%xmm1
000000000000002f    testl    %edx,%edx
0000000000000031    je    0x00000036
0000000000000033    movaps    %xmm1,%xmm0
0000000000000036    leaq    _b(%rip),%rdx
000000000000003d    movss    %xmm0,(%rdx,%rax)
0000000000000042    addq    $0x04,%rax
0000000000000046    cmpq    $0x00001000,%eax
000000000000004c    jne    0x00000020
000000000000004e    repz/ret
tVf():
0000000000000050    movaps    0x00000059(%rip),%xmm4
0000000000000057    xorl    %eax,%eax
0000000000000059    xorps    %xmm3,%xmm3
000000000000005c    leaq    _a(%rip),%rcx
0000000000000063    leaq    _b(%rip),%rdx
000000000000006a    nopw    tVi()(%rax,%rax)
0000000000000070    movaps    (%rcx,%rax),%xmm2
0000000000000074    cvttps2dq    %xmm2,%xmm0
0000000000000078    cvtdq2ps    %xmm0,%xmm0
000000000000007b    cmpps    $0x4,%xmm3,%xmm0
000000000000007f    movaps    %xmm2,%xmm1
0000000000000082    xorps    %xmm4,%xmm1
0000000000000085    andps    %xmm0,%xmm1
0000000000000088    andnps    %xmm2,%xmm0
000000000000008b    orps    %xmm1,%xmm0
000000000000008e    movaps    %xmm0,(%rdx,%rax)
0000000000000092    addq    $0x10,%rax
0000000000000096    cmpq    $0x00001000,%eax
000000000000009c    jne    0x00000070
000000000000009e    repz/ret

I'm using
g++ -v
Using built-in specs.
COLLECT_GCC=g++
COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc/x86_64-apple-darwin10.7.0/4.7.0/lto-wrapper
Target: x86_64-apple-darwin10.7.0
Configured with: ./configure --enable-languages=c,c++,fortran --enable-lto
--with-build-config=bootstrap-lto CFLAGS='-O2 -ftree-vectorize -fPIC'
CXXFLAGS='-O2 -fPIC -ftree-vectorize -fvisibility-inlines-hidden'
Thread model: posix
gcc version 4.7.0 20110528 (experimental) (GCC)



More information about the Gcc-bugs mailing list