This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug target/79355] New: poor code for AVX vector compare


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79355

            Bug ID: 79355
           Summary: poor code for AVX vector compare
           Product: gcc
           Version: 6.3.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: mirq-gccboogs at rere dot qmqm.pl
  Target Milestone: ---

gcc-6.2 (and previous versions) generates a very inefficient code for AVX when
comparing 32-byte vectors:

$ cat a.c
#include <x86intrin.h>

__v8su eq2(__v8su a, __v8su b)
{
        return a == b;
}

$ gcc -S -Ofast -mavx a.c -o -
        .file   "a.c"
        .text   
        .p2align 4,,15
        .globl  eq2
        .type   eq2, @function
eq2:
.LFB4856:
        .cfi_startproc
        vmovd   %xmm0, %edx
        vmovd   %xmm1, %eax
        leaq    8(%rsp), %r10
        .cfi_def_cfa 10, 0
        vpextrd $1, %xmm0, %ecx
        andq    $-32, %rsp
        cmpl    %eax, %edx
[... extracting and comparing every element here ...]
        vpinsrd $1, %r11d, %xmm5, %xmm1
        vpinsrd $1, %r9d, %xmm7, %xmm0
        popq    %r10
        .cfi_def_cfa 10, 0
        vpunpcklqdq     %xmm3, %xmm0, %xmm0
        vpunpcklqdq     %xmm2, %xmm1, %xmm1
        popq    %rbp
        leaq    -8(%r10), %rsp
        .cfi_def_cfa 7, 8
        vinsertf128     $0x1, %xmm1, %ymm0, %ymm0
        ret

When it could instead generate (i.e. split vector in half and combine
afterwards):

        vextractf128    $0x1, %ymm0, %xmm2
        vextractf128    $0x1, %ymm1, %xmm3
        vpcmpeqd        %xmm1, %xmm0, %xmm0
        vpcmpeqd        %xmm3, %xmm2, %xmm2
        vinsertf128     $0x1, %xmm2, %ymm0, %ymm0
        ret

$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/6/lto-wrapper
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 6.3.0-5'
--with-bugurl=file:///usr/share/doc/gcc-6/README.Bugs
--enable-languages=c,ada,c++,java
,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-6
--program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id
--libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
--libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu
--enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object
--disable-vtable-verify --enable-libmpx --enable-plugin --enable-default-pie
--with-system-zlib --disable-browser-plugin --enable-java-awt=gtk
--enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-6-amd64/jre
--enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-6-amd64
--with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-6-amd64
--with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar
--with-target-system-zlib --enable-objc-gc=auto --enable-multiarch
--with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32
--enable-multilib --with-tune=generic --enable-checking=release
--build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu     
          Thread model: posix
gcc version 6.3.0 20170124 (Debian 6.3.0-5)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]