This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug middle-end/71927] New: stack alignment prologue not optimized out when no local variables remain
- From: "mirq-gccboogs at rere dot qmqm.pl" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Tue, 19 Jul 2016 06:37:03 +0000
- Subject: [Bug middle-end/71927] New: stack alignment prologue not optimized out when no local variables remain
- Auto-submitted: auto-generated
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71927
Bug ID: 71927
Summary: stack alignment prologue not optimized out when no
local variables remain
Product: gcc
Version: 5.4.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: normal
Priority: P3
Component: middle-end
Assignee: unassigned at gcc dot gnu.org
Reporter: mirq-gccboogs at rere dot qmqm.pl
Target Milestone: ---
Created attachment 38925
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=38925&action=edit
test case
gcc 5.4 optimizes out local array, but does not remove stack-alignment code.
x() has 2-element __m256i array - get's optimized out correctly
y() has 3-element __m256i array - stack alignment is left in place
for the y() case, at -O2 the loop is not unrolled (unlike x())
$ gcc -mavx2 -O3 -S -o - a.c
[...]
x:
.LFB4854:
.cfi_startproc
vmovdqa (%rdi), %ymm2
vpminud %ymm1, %ymm2, %ymm3
vpmaxud %ymm1, %ymm2, %ymm1
vmovdqa %ymm1, (%rdi)
vmovdqa 32(%rdi), %ymm1
vpminud %ymm0, %ymm1, %ymm2
vpmaxud %ymm0, %ymm1, %ymm0
vmovdqa %ymm0, 32(%rdi)
vzeroupper
ret
.cfi_endproc
[...]
y:
.LFB4855:
.cfi_startproc
leaq 8(%rsp), %r10
.cfi_def_cfa 10, 0
andq $-32, %rsp
pushq -8(%r10)
pushq %rbp
.cfi_escape 0x10,0x6,0x2,0x76,0
movq %rsp, %rbp
pushq %r10
.cfi_escape 0xf,0x3,0x76,0x78,0x6
vmovdqa (%rdi), %ymm0
vpminud %ymm0, %ymm3, %ymm4
vpmaxud %ymm0, %ymm3, %ymm0
vmovdqa %ymm0, (%rdi)
vmovdqa 32(%rdi), %ymm0
vpminud %ymm0, %ymm2, %ymm3
vpmaxud %ymm0, %ymm2, %ymm0
vmovdqa %ymm0, 32(%rdi)
vmovdqa 64(%rdi), %ymm0
vpminud %ymm0, %ymm1, %ymm2
vpmaxud %ymm0, %ymm1, %ymm0
vmovdqa %ymm0, 64(%rdi)
vzeroupper
popq %r10
.cfi_def_cfa 10, 0
popq %rbp
leaq -8(%r10), %rsp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
$ gcc -mavx2 -O2 -S -o - a.c
[... x() - same as -O3]
y:
.LFB4855:
.cfi_startproc
leaq 8(%rsp), %r10
.cfi_def_cfa 10, 0
andq $-32, %rsp
pushq -8(%r10)
pushq %rbp
.cfi_escape 0x10,0x6,0x2,0x76,0
movq %rsp, %rbp
pushq %r10
.cfi_escape 0xf,0x3,0x76,0x78,0x6
vmovdqa %ymm2, -112(%rbp)
xorl %eax, %eax
vmovdqa %ymm1, -80(%rbp)
vmovdqa %ymm0, -48(%rbp)
.L3:
vmovdqa (%rdi,%rax), %ymm0
vmovdqa -112(%rbp,%rax), %ymm1
vpminud %ymm0, %ymm1, %ymm2
vpmaxud %ymm0, %ymm1, %ymm0
vmovdqa %ymm2, -112(%rbp,%rax)
vmovdqa %ymm0, (%rdi,%rax)
addq $32, %rax
cmpq $96, %rax
jne .L3
vmovdqa -48(%rbp), %ymm2
vmovdqa -80(%rbp), %ymm1
vmovdqa -112(%rbp), %ymm0
vzeroupper
popq %r10
.cfi_def_cfa 10, 0
popq %rbp
leaq -8(%r10), %rsp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 5.4.0-6'
--with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs
--enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr
--program-suffix=-5 --enable-shared --enable-linker-build-id
--libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
--libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu
--enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object
--disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib
--disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo
--with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home
--with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64
--with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64
--with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar
--enable-objc-gc --enable-multiarch --with-arch-32=i686 --with-abi=m64
--with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic
--enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu
--target=x86_64-linux-gnu
Thread model: posix
gcc version 5.4.0 20160609 (Debian 5.4.0-6)