This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug target/66866] [miscompile] incorrect load address on manual vector shuffle
- From: "rguenth at gcc dot gnu.org" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Tue, 14 Jul 2015 12:14:53 +0000
- Subject: [Bug target/66866] [miscompile] incorrect load address on manual vector shuffle
- Auto-submitted: auto-generated
- References: <bug-66866-4 at http dot gcc dot gnu dot org/bugzilla/>
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66866
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |NEW
Last reconfirmed| |2015-07-14
Ever confirmed|0 |1
--- Comment #1 from Richard Biener <rguenth at gcc dot gnu.org> ---
The gimple looks ok to me:
__m128i shuf(__m128i) (const __m128i v)
{
__m128i r;
__m128i _16;
...
<bb 2>:
_43 = MEM[(const A * {ref-all})&v + 2B];
MEM[(A & {ref-all})&r] = _43;
_47 = MEM[(const A * {ref-all})&v];
MEM[(A & {ref-all})&r + 2] = _47;
_57 = MEM[(const A * {ref-all})&v + 6B];
MEM[(A & {ref-all})&r + 4] = _57;
_61 = MEM[(const A * {ref-all})&v + 4B];
MEM[(A & {ref-all})&r + 6] = _61;
_73 = MEM[(const A * {ref-all})&v + 10B];
MEM[(A & {ref-all})&r + 8] = _73;
_77 = MEM[(const A * {ref-all})&v + 8B];
MEM[(A & {ref-all})&r + 10] = _77;
_87 = MEM[(const A * {ref-all})&v + 14B];
MEM[(A & {ref-all})&r + 12] = _87;
_91 = MEM[(const A * {ref-all})&v + 12B];
MEM[(A & {ref-all})&r + 14] = _91;
_16 = r;
r ={v} {CLOBBER};
return _16;
The assembler is slightly convoluted:
_Z4shufDv2_x:
.LFB527:
.cfi_startproc
movaps %xmm0, -24(%rsp)
movq %xmm0, %rax
movd %xmm0, %edx
pxor %xmm0, %xmm0
sarl $16, %edx
pinsrw $0, %edx, %xmm0
movq %rax, %rdx
sarq $48, %rdx
salq $16, %rax
sarq $48, %rax
pinsrw $1, -24(%rsp), %xmm0
pinsrw $2, %edx, %xmm0
pinsrw $3, %eax, %xmm0
movq -16(%rsp), %rax
movl %eax, %edx
sarl $16, %edx
pinsrw $4, %edx, %xmm0
movq %rax, %rdx
salq $16, %rax
sarq $48, %rdx
sarq $48, %rax
pinsrw $5, -24(%rsp), %xmm0
pinsrw $6, %edx, %xmm0
pinsrw $7, %eax, %xmm0
ret
but the duplicate -24(%rsp) source in the pinsrw instructions looks
suspicious indeed.
Manually unrolling only inlines the function, the function itself is
still miscompiled the same way.
Reduced testcase:
#include <xmmintrin.h>
typedef short A __attribute__((__may_alias__));
short extr(const __m128i &d, int index) { return reinterpret_cast<const A
*>(&d)[index]; }
A &extr(__m128i &d, int index) { return reinterpret_cast<A *>(&d)[index]; }
__m128i __attribute__((noinline))
shuf(const __m128i v)
{
__m128i r;
extr(r, 5) = extr(v, 4);
return r;
}
int main()
{
__attribute__((aligned(16))) short mem[8];
*reinterpret_cast<__m128i *>(mem) = shuf(_mm_setr_epi16(0, 1, 2, 3, 4, 5, 6,
7));
if (mem[5] != 4)
abort ();
return 0;
}