[Bug tree-optimization/95201] New: Some x86 vector-extend patterns are not exercised.

ubizjak at gmail dot com gcc-bugzilla@gcc.gnu.org
Tue May 19 09:52:26 GMT 2020


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95201

            Bug ID: 95201
           Summary: Some x86 vector-extend patterns are not exercised.
           Product: gcc
           Version: 11.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ubizjak at gmail dot com
  Target Milestone: ---

Some of x86 vector extend patterns are not exercised by middle end. Currently,
they are XFAILed in gcc.target/i386/pr92658-*.c:


pr92658-avx2.c:/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-*
} } } */
pr92658-sse4.c:/* { dg-final { scan-assembler-times "pmovzxbd" 2 { xfail *-*-*
} } } */
pr92658-sse4.c:/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-*
} } } */
pr92658-sse4.c:/* { dg-final { scan-assembler-times "pmovzxwq" 2 { xfail *-*-*
} } } */

These correspond to:

-O2 -ftree-vectorize -mavx2 is required:

--cut here--
typedef unsigned char v32qi __attribute__((vector_size (32)));
typedef unsigned short v16hi __attribute__((vector_size (32)));
typedef unsigned int v8si __attribute__((vector_size (32)));
typedef unsigned long long v4di __attribute__((vector_size (32)));

void
foo_u8_u64 (v4di * dst, v32qi * __restrict src)
{
  unsigned long long tem[4];
  tem[0] = (*src)[0];
  tem[1] = (*src)[1];
  tem[2] = (*src)[2];
  tem[3] = (*src)[3];
  dst[0] = *(v4di *) tem;
}

void
bar_u8_u64 (v4di * dst, v32qi src)
{
  unsigned long long tem[4];
  tem[0] = src[0];
  tem[1] = src[1];
  tem[2] = src[2];
  tem[3] = src[3];
  dst[0] = *(v4di *) tem;
}

/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-* } } } */
--cut here--

-O2 -ftree-vectorize -msse4.1 is required:

--cut here--
void
foo_u8_u32 (v4si * dst, v16qi * __restrict src)
{
  unsigned int tem[4];
  tem[0] = (*src)[0];
  tem[1] = (*src)[1];
  tem[2] = (*src)[2];
  tem[3] = (*src)[3];
  dst[0] = *(v4si *) tem;
}

void
bar_u8_u32 (v4si * dst, v16qi src)
{
  unsigned int tem[4];
  tem[0] = src[0];
  tem[1] = src[1];
  tem[2] = src[2];
  tem[3] = src[3];
  dst[0] = *(v4si *) tem;
}

/* { dg-final { scan-assembler-times "pmovzxbd" 2 { xfail *-*-* } } } */

void
foo_u8_u64 (v2di * dst, v16qi * __restrict src)
{
  unsigned long long tem[2];
  tem[0] = (*src)[0];
  tem[1] = (*src)[1];
  dst[0] = *(v2di *) tem;
}

void
bar_u8_u64 (v2di * dst, v16qi src)
{
  unsigned long long tem[2];
  tem[0] = src[0];
  tem[1] = src[1];
  dst[0] = *(v2di *) tem;
}

/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-* } } } */

void
foo_u16_u64 (v2di * dst, v8hi * __restrict src)
{
  unsigned long long tem[2];
  tem[0] = (*src)[0];
  tem[1] = (*src)[1];
  dst[0] = *(v2di *) tem;
}

void
bar_u16_u64 (v2di * dst, v8hi src)
{
  unsigned long long tem[2];
  tem[0] = src[0];
  tem[1] = src[1];
  dst[0] = *(v2di *) tem;
}

/* { dg-final { scan-assembler-times "pmovzxwq" 2 { xfail *-*-* } } } */

Please note that these testcases fail to vectorize also in their loop forms,
e.g.:

--cut here--
void
foo_u8_u64 (v4di * dst, v32qi * __restrict src)
{
  unsigned long long tem[4];

  for (int i = 0; i < 4; i++)
    tem[i] = (*src)[i];

  dst[0] = *(v4di *) tem;
}

void
bar_u8_u64 (v4di * dst, v32qi src)
{
  unsigned long long tem[4];

  for (int i = 0; i < 4; i++)
    tem[i] = src[i];

  dst[0] = *(v4di *) tem;
}
--cut here--

Please see also PR 92658#c8 for some analysis.


More information about the Gcc-bugs mailing list