This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug target/66866] [miscompile] incorrect load address on manual vector shuffle


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66866

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2015-07-14
     Ever confirmed|0                           |1

--- Comment #1 from Richard Biener <rguenth at gcc dot gnu.org> ---
The gimple looks ok to me:

__m128i shuf(__m128i) (const __m128i v)
{
  __m128i r;
  __m128i _16;
...
  <bb 2>:
  _43 = MEM[(const A * {ref-all})&v + 2B];
  MEM[(A & {ref-all})&r] = _43;
  _47 = MEM[(const A * {ref-all})&v];
  MEM[(A & {ref-all})&r + 2] = _47;
  _57 = MEM[(const A * {ref-all})&v + 6B];
  MEM[(A & {ref-all})&r + 4] = _57;
  _61 = MEM[(const A * {ref-all})&v + 4B];
  MEM[(A & {ref-all})&r + 6] = _61;
  _73 = MEM[(const A * {ref-all})&v + 10B];
  MEM[(A & {ref-all})&r + 8] = _73;
  _77 = MEM[(const A * {ref-all})&v + 8B];
  MEM[(A & {ref-all})&r + 10] = _77;
  _87 = MEM[(const A * {ref-all})&v + 14B];
  MEM[(A & {ref-all})&r + 12] = _87;
  _91 = MEM[(const A * {ref-all})&v + 12B];
  MEM[(A & {ref-all})&r + 14] = _91;
  _16 = r;
  r ={v} {CLOBBER};
  return _16;

The assembler is slightly convoluted:

_Z4shufDv2_x:
.LFB527:
        .cfi_startproc
        movaps  %xmm0, -24(%rsp)
        movq    %xmm0, %rax
        movd    %xmm0, %edx
        pxor    %xmm0, %xmm0
        sarl    $16, %edx
        pinsrw  $0, %edx, %xmm0
        movq    %rax, %rdx
        sarq    $48, %rdx
        salq    $16, %rax
        sarq    $48, %rax
        pinsrw  $1, -24(%rsp), %xmm0
        pinsrw  $2, %edx, %xmm0
        pinsrw  $3, %eax, %xmm0
        movq    -16(%rsp), %rax
        movl    %eax, %edx
        sarl    $16, %edx
        pinsrw  $4, %edx, %xmm0
        movq    %rax, %rdx
        salq    $16, %rax
        sarq    $48, %rdx
        sarq    $48, %rax
        pinsrw  $5, -24(%rsp), %xmm0
        pinsrw  $6, %edx, %xmm0
        pinsrw  $7, %eax, %xmm0
        ret

but the duplicate -24(%rsp) source in the pinsrw instructions looks
suspicious indeed.

Manually unrolling only inlines the function, the function itself is
still miscompiled the same way.

Reduced testcase:

#include <xmmintrin.h>
typedef short A __attribute__((__may_alias__));
short extr(const __m128i &d, int index) { return reinterpret_cast<const A
*>(&d)[index]; }
A &extr(__m128i &d, int index) { return reinterpret_cast<A *>(&d)[index]; }
__m128i __attribute__((noinline))
shuf(const __m128i v)
{
  __m128i r;
  extr(r, 5) = extr(v, 4);
  return r;
}
int main()
{
  __attribute__((aligned(16))) short mem[8];
  *reinterpret_cast<__m128i *>(mem) = shuf(_mm_setr_epi16(0, 1, 2, 3, 4, 5, 6,
7));
  if (mem[5] != 4)
    abort ();
  return 0;
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]