This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug middle-end/71927] New: stack alignment prologue not optimized out when no local variables remain


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71927

            Bug ID: 71927
           Summary: stack alignment prologue not optimized out when no
                    local variables remain
           Product: gcc
           Version: 5.4.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: mirq-gccboogs at rere dot qmqm.pl
  Target Milestone: ---

Created attachment 38925
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=38925&action=edit
test case

gcc 5.4 optimizes out local array, but does not remove stack-alignment code.

x() has 2-element __m256i array - get's optimized out correctly
y() has 3-element __m256i array - stack alignment is left in place

for the y() case, at -O2 the loop is not unrolled (unlike x())

$ gcc -mavx2 -O3 -S -o - a.c
[...]
x:
.LFB4854:
        .cfi_startproc
        vmovdqa (%rdi), %ymm2
        vpminud %ymm1, %ymm2, %ymm3
        vpmaxud %ymm1, %ymm2, %ymm1
        vmovdqa %ymm1, (%rdi)
        vmovdqa 32(%rdi), %ymm1
        vpminud %ymm0, %ymm1, %ymm2
        vpmaxud %ymm0, %ymm1, %ymm0
        vmovdqa %ymm0, 32(%rdi)
        vzeroupper
        ret
        .cfi_endproc
[...]
y:
.LFB4855:
        .cfi_startproc
        leaq    8(%rsp), %r10
        .cfi_def_cfa 10, 0
        andq    $-32, %rsp
        pushq   -8(%r10)
        pushq   %rbp
        .cfi_escape 0x10,0x6,0x2,0x76,0   
        movq    %rsp, %rbp
        pushq   %r10
        .cfi_escape 0xf,0x3,0x76,0x78,0x6 
        vmovdqa (%rdi), %ymm0
        vpminud %ymm0, %ymm3, %ymm4
        vpmaxud %ymm0, %ymm3, %ymm0
        vmovdqa %ymm0, (%rdi)
        vmovdqa 32(%rdi), %ymm0
        vpminud %ymm0, %ymm2, %ymm3
        vpmaxud %ymm0, %ymm2, %ymm0
        vmovdqa %ymm0, 32(%rdi)
        vmovdqa 64(%rdi), %ymm0
        vpminud %ymm0, %ymm1, %ymm2
        vpmaxud %ymm0, %ymm1, %ymm0
        vmovdqa %ymm0, 64(%rdi)
        vzeroupper
        popq    %r10
        .cfi_def_cfa 10, 0
        popq    %rbp
        leaq    -8(%r10), %rsp
        .cfi_def_cfa 7, 8
        ret
        .cfi_endproc

$ gcc -mavx2 -O2 -S -o - a.c
[... x() - same as -O3]
y:
.LFB4855:
        .cfi_startproc
        leaq    8(%rsp), %r10
        .cfi_def_cfa 10, 0
        andq    $-32, %rsp
        pushq   -8(%r10)
        pushq   %rbp
        .cfi_escape 0x10,0x6,0x2,0x76,0
        movq    %rsp, %rbp
        pushq   %r10
        .cfi_escape 0xf,0x3,0x76,0x78,0x6
        vmovdqa %ymm2, -112(%rbp)
        xorl    %eax, %eax
        vmovdqa %ymm1, -80(%rbp)
        vmovdqa %ymm0, -48(%rbp)
.L3:
        vmovdqa (%rdi,%rax), %ymm0
        vmovdqa -112(%rbp,%rax), %ymm1
        vpminud %ymm0, %ymm1, %ymm2
        vpmaxud %ymm0, %ymm1, %ymm0
        vmovdqa %ymm2, -112(%rbp,%rax)
        vmovdqa %ymm0, (%rdi,%rax)
        addq    $32, %rax
        cmpq    $96, %rax
        jne     .L3
        vmovdqa -48(%rbp), %ymm2
        vmovdqa -80(%rbp), %ymm1
        vmovdqa -112(%rbp), %ymm0
        vzeroupper
        popq    %r10
        .cfi_def_cfa 10, 0
        popq    %rbp
        leaq    -8(%r10), %rsp
        .cfi_def_cfa 7, 8
        ret
        .cfi_endproc

$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 5.4.0-6'
--with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs
--enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr
--program-suffix=-5 --enable-shared --enable-linker-build-id
--libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
--libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu
--enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object
--disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib
--disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo
--with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home
--with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64
--with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64
--with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar
--enable-objc-gc --enable-multiarch --with-arch-32=i686 --with-abi=m64
--with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic
--enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu
--target=x86_64-linux-gnu
Thread model: posix
gcc version 5.4.0 20160609 (Debian 5.4.0-6)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]