[Bug tree-optimization/95201] New: Some x86 vector-extend patterns are not exercised.
ubizjak at gmail dot com
gcc-bugzilla@gcc.gnu.org
Tue May 19 09:52:26 GMT 2020
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95201
Bug ID: 95201
Summary: Some x86 vector-extend patterns are not exercised.
Product: gcc
Version: 11.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: ubizjak at gmail dot com
Target Milestone: ---
Some of x86 vector extend patterns are not exercised by middle end. Currently,
they are XFAILed in gcc.target/i386/pr92658-*.c:
pr92658-avx2.c:/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-*
} } } */
pr92658-sse4.c:/* { dg-final { scan-assembler-times "pmovzxbd" 2 { xfail *-*-*
} } } */
pr92658-sse4.c:/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-*
} } } */
pr92658-sse4.c:/* { dg-final { scan-assembler-times "pmovzxwq" 2 { xfail *-*-*
} } } */
These correspond to:
-O2 -ftree-vectorize -mavx2 is required:
--cut here--
typedef unsigned char v32qi __attribute__((vector_size (32)));
typedef unsigned short v16hi __attribute__((vector_size (32)));
typedef unsigned int v8si __attribute__((vector_size (32)));
typedef unsigned long long v4di __attribute__((vector_size (32)));
void
foo_u8_u64 (v4di * dst, v32qi * __restrict src)
{
unsigned long long tem[4];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
dst[0] = *(v4di *) tem;
}
void
bar_u8_u64 (v4di * dst, v32qi src)
{
unsigned long long tem[4];
tem[0] = src[0];
tem[1] = src[1];
tem[2] = src[2];
tem[3] = src[3];
dst[0] = *(v4di *) tem;
}
/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-* } } } */
--cut here--
-O2 -ftree-vectorize -msse4.1 is required:
--cut here--
void
foo_u8_u32 (v4si * dst, v16qi * __restrict src)
{
unsigned int tem[4];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
dst[0] = *(v4si *) tem;
}
void
bar_u8_u32 (v4si * dst, v16qi src)
{
unsigned int tem[4];
tem[0] = src[0];
tem[1] = src[1];
tem[2] = src[2];
tem[3] = src[3];
dst[0] = *(v4si *) tem;
}
/* { dg-final { scan-assembler-times "pmovzxbd" 2 { xfail *-*-* } } } */
void
foo_u8_u64 (v2di * dst, v16qi * __restrict src)
{
unsigned long long tem[2];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
dst[0] = *(v2di *) tem;
}
void
bar_u8_u64 (v2di * dst, v16qi src)
{
unsigned long long tem[2];
tem[0] = src[0];
tem[1] = src[1];
dst[0] = *(v2di *) tem;
}
/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-* } } } */
void
foo_u16_u64 (v2di * dst, v8hi * __restrict src)
{
unsigned long long tem[2];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
dst[0] = *(v2di *) tem;
}
void
bar_u16_u64 (v2di * dst, v8hi src)
{
unsigned long long tem[2];
tem[0] = src[0];
tem[1] = src[1];
dst[0] = *(v2di *) tem;
}
/* { dg-final { scan-assembler-times "pmovzxwq" 2 { xfail *-*-* } } } */
Please note that these testcases fail to vectorize also in their loop forms,
e.g.:
--cut here--
void
foo_u8_u64 (v4di * dst, v32qi * __restrict src)
{
unsigned long long tem[4];
for (int i = 0; i < 4; i++)
tem[i] = (*src)[i];
dst[0] = *(v4di *) tem;
}
void
bar_u8_u64 (v4di * dst, v32qi src)
{
unsigned long long tem[4];
for (int i = 0; i < 4; i++)
tem[i] = src[i];
dst[0] = *(v4di *) tem;
}
--cut here--
Please see also PR 92658#c8 for some analysis.
More information about the Gcc-bugs
mailing list