[Bug rtl-optimization/91994] [10 Regression] r276327 breaks -mvzeroupper
hjl.tools at gmail dot com
gcc-bugzilla@gcc.gnu.org
Fri Oct 4 22:42:00 GMT 2019
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91994
H.J. Lu <hjl.tools at gmail dot com> changed:
What |Removed |Added
----------------------------------------------------------------------------
Summary|[10 Regression] r276327 |[10 Regression] r276327
|miscompiled 557.xz_r in |breaks -mvzeroupper
|SPEC CPU 2017 |
--- Comment #5 from H.J. Lu <hjl.tools at gmail dot com> ---
[hjl@gnu-skx-1 gcc]$ cat bad.c
#include <stdlib.h>
#include <immintrin.h>
__m256i x1, x2, x3;
__attribute__ ((noinline))
static void
foo (void)
{
x1 = x2;
}
void
bar (void)
{
__m256i x = x1;
foo ();
x3 = x;
}
__attribute__ ((noinline))
int
main (void)
{
__m256i x = _mm256_set1_epi8 (3);
x1 = x;
bar ();
if (__builtin_memcmp (&x3, &x, sizeof (x)))
abort ();
return 0;
}
[hjl@gnu-skx-1 gcc]$ ./xgcc -B./ -march=skylake -O2 bad.c
./a[hjl@gnu-skx-1 gcc]$ ./a.out
Aborted
[hjl@gnu-skx-1 gcc]$ ./xgcc -B./ -march=skylake -O2 bad.c -S
[hjl@gnu-skx-1 gcc]$ cat bad.s
.file "bad.c"
.text
.p2align 4
.type foo, @function
foo:
.LFB5339:
.cfi_startproc
vmovdqa x2(%rip), %ymm0
vmovdqa %ymm0, x1(%rip)
vzeroupper <<< Clobber the upper bits of YMM1.
ret
.cfi_endproc
.LFE5339:
.size foo, .-foo
.p2align 4
.globl bar
.type bar, @function
bar:
.LFB5340:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
vmovdqa x1(%rip), %ymm1
movq %rsp, %rbp
.cfi_def_cfa_register 6
andq $-32, %rsp
call foo
vmovdqa %ymm1, x3(%rip)
vzeroupper
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE5340:
.size bar, .-bar
.section .text.startup,"ax",@progbits
.p2align 4
.globl main
.type main, @function
main:
.LFB5341:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movabsq $217020518514230019, %rax
movq %rsp, %rbp
.cfi_def_cfa_register 6
andq $-32, %rsp
subq $32, %rsp
vmovdqa .LC0(%rip), %ymm1
vmovdqa %ymm1, (%rsp)
vmovdqa %ymm1, x1(%rip)
call foo
vmovdqa %ymm1, x3(%rip)
movq x3+8(%rip), %rdx
xorq (%rsp), %rax
xorq 8(%rsp), %rdx
orq %rax, %rdx
jne .L6
movq x3+24(%rip), %rdx
movq x3+16(%rip), %rax
xorq 24(%rsp), %rdx
xorq 16(%rsp), %rax
orq %rax, %rdx
je .L9
.L6:
vzeroupper
call abort
.p2align 4,,10
.p2align 3
.L9:
xorl %eax, %eax
vzeroupper
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE5341:
.size main, .-main
.comm x3,32,32
.comm x2,32,32
.comm x1,32,32
.section .rodata.cst32,"aM",@progbits,32
.align 32
.LC0:
.quad 217020518514230019
.quad 217020518514230019
.quad 217020518514230019
.quad 217020518514230019
.ident "GCC: (GNU) 10.0.0 20191003 (experimental)"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-skx-1 gcc]$
More information about the Gcc-bugs
mailing list