[Bug tree-optimization/49457] New: integer comparison does not vectorize
vincenzo.innocente at cern dot ch
gcc-bugzilla@gcc.gnu.org
Fri Jun 17 17:05:00 GMT 2011
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49457
Summary: integer comparison does not vectorize
Product: gcc
Version: 4.7.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
AssignedTo: unassigned@gcc.gnu.org
ReportedBy: vincenzo.innocente@cern.ch
float __attribute__ ((aligned(16))) a[1024];
float __attribute__ ((aligned(16))) b[1024];
// does not vectorize
void tVi() {
for (int i=0; i!=1024; ++i) {
int j = a[i];
b[i] = (j==0) ? a[i] : - a[i];
}
}
// nicely vectorize...
void tVf() {
for (int i=0; i!=1024; ++i) {
int j = a[i];
float z = j;
b[i] = (z==0) ? a[i] : - a[i];
}
}
c++ -Ofast -c testVectCond.cpp -msse4.2
otool -V -t testVectCond.o | c++filt
testVectCond.o:
(__TEXT,__text) section
tVi():
0000000000000000 xorl %eax,%eax
0000000000000002 movss 0x00000096(%rip),%xmm2
000000000000000a leaq _a(%rip),%rcx
0000000000000011 nopl tVi()(%rax)
0000000000000018 nopl tVi()(%rax,%rax)
0000000000000020 movss (%rcx,%rax),%xmm0
0000000000000025 cvttss2si %xmm0,%edx
0000000000000029 movaps %xmm0,%xmm1
000000000000002c xorps %xmm2,%xmm1
000000000000002f testl %edx,%edx
0000000000000031 je 0x00000036
0000000000000033 movaps %xmm1,%xmm0
0000000000000036 leaq _b(%rip),%rdx
000000000000003d movss %xmm0,(%rdx,%rax)
0000000000000042 addq $0x04,%rax
0000000000000046 cmpq $0x00001000,%eax
000000000000004c jne 0x00000020
000000000000004e repz/ret
tVf():
0000000000000050 movaps 0x00000059(%rip),%xmm4
0000000000000057 xorl %eax,%eax
0000000000000059 xorps %xmm3,%xmm3
000000000000005c leaq _a(%rip),%rcx
0000000000000063 leaq _b(%rip),%rdx
000000000000006a nopw tVi()(%rax,%rax)
0000000000000070 movaps (%rcx,%rax),%xmm2
0000000000000074 cvttps2dq %xmm2,%xmm0
0000000000000078 cvtdq2ps %xmm0,%xmm0
000000000000007b cmpps $0x4,%xmm3,%xmm0
000000000000007f movaps %xmm2,%xmm1
0000000000000082 xorps %xmm4,%xmm1
0000000000000085 andps %xmm0,%xmm1
0000000000000088 andnps %xmm2,%xmm0
000000000000008b orps %xmm1,%xmm0
000000000000008e movaps %xmm0,(%rdx,%rax)
0000000000000092 addq $0x10,%rax
0000000000000096 cmpq $0x00001000,%eax
000000000000009c jne 0x00000070
000000000000009e repz/ret
I'm using
g++ -v
Using built-in specs.
COLLECT_GCC=g++
COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc/x86_64-apple-darwin10.7.0/4.7.0/lto-wrapper
Target: x86_64-apple-darwin10.7.0
Configured with: ./configure --enable-languages=c,c++,fortran --enable-lto
--with-build-config=bootstrap-lto CFLAGS='-O2 -ftree-vectorize -fPIC'
CXXFLAGS='-O2 -fPIC -ftree-vectorize -fvisibility-inlines-hidden'
Thread model: posix
gcc version 4.7.0 20110528 (experimental) (GCC)
More information about the Gcc-bugs
mailing list