This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug target/79355] New: poor code for AVX vector compare
- From: "mirq-gccboogs at rere dot qmqm.pl" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Fri, 03 Feb 2017 11:14:58 +0000
- Subject: [Bug target/79355] New: poor code for AVX vector compare
- Auto-submitted: auto-generated
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79355
Bug ID: 79355
Summary: poor code for AVX vector compare
Product: gcc
Version: 6.3.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: mirq-gccboogs at rere dot qmqm.pl
Target Milestone: ---
gcc-6.2 (and previous versions) generates a very inefficient code for AVX when
comparing 32-byte vectors:
$ cat a.c
#include <x86intrin.h>
__v8su eq2(__v8su a, __v8su b)
{
return a == b;
}
$ gcc -S -Ofast -mavx a.c -o -
.file "a.c"
.text
.p2align 4,,15
.globl eq2
.type eq2, @function
eq2:
.LFB4856:
.cfi_startproc
vmovd %xmm0, %edx
vmovd %xmm1, %eax
leaq 8(%rsp), %r10
.cfi_def_cfa 10, 0
vpextrd $1, %xmm0, %ecx
andq $-32, %rsp
cmpl %eax, %edx
[... extracting and comparing every element here ...]
vpinsrd $1, %r11d, %xmm5, %xmm1
vpinsrd $1, %r9d, %xmm7, %xmm0
popq %r10
.cfi_def_cfa 10, 0
vpunpcklqdq %xmm3, %xmm0, %xmm0
vpunpcklqdq %xmm2, %xmm1, %xmm1
popq %rbp
leaq -8(%r10), %rsp
.cfi_def_cfa 7, 8
vinsertf128 $0x1, %xmm1, %ymm0, %ymm0
ret
When it could instead generate (i.e. split vector in half and combine
afterwards):
vextractf128 $0x1, %ymm0, %xmm2
vextractf128 $0x1, %ymm1, %xmm3
vpcmpeqd %xmm1, %xmm0, %xmm0
vpcmpeqd %xmm3, %xmm2, %xmm2
vinsertf128 $0x1, %xmm2, %ymm0, %ymm0
ret
$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/6/lto-wrapper
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 6.3.0-5'
--with-bugurl=file:///usr/share/doc/gcc-6/README.Bugs
--enable-languages=c,ada,c++,java
,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-6
--program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id
--libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
--libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu
--enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object
--disable-vtable-verify --enable-libmpx --enable-plugin --enable-default-pie
--with-system-zlib --disable-browser-plugin --enable-java-awt=gtk
--enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-6-amd64/jre
--enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-6-amd64
--with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-6-amd64
--with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar
--with-target-system-zlib --enable-objc-gc=auto --enable-multiarch
--with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32
--enable-multilib --with-tune=generic --enable-checking=release
--build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 6.3.0 20170124 (Debian 6.3.0-5)