This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, x86] Fix pblendv expand.


On Wed, Dec 10, 2014 at 12:03 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Dec 10, 2014 at 02:37:13AM +0300, Evgeny Stupachenko wrote:
>> 2014-12-10  Evgeny Stupachenko  <evstupac@gmail.com>
>
> I went ahead and filed a PR, so we have something to refer to in the
> ChangeLog and name the testcases.

Thanks!

>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -47546,6 +47546,7 @@ expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
>>      dcopy.op0 = dcopy.op1 = d->op1;
>>    else
>>      dcopy.op0 = dcopy.op1 = d->op0;
>> +  dcopy.target = gen_reg_rtx (vmode);
>>    dcopy.one_operand_p = true;
>>
>>    for (i = 0; i < nelt; ++i)
>
> This is incorrect if d->testing_p is true (can happen e.g. on the testcase
> below; generally, when testing_p is true, we should not use gen_reg_rtx
> because it can be called from GIMPLE optimizers before init_emit is called.
> See PR57896 for details.

Yes, this was also my concern. We've had some nasty failures occurring
when vReg was generated after reload.

> If d->testing_p is true, target is never the same as any of the operands,
> all 3 are virtual registers, so there is no overlap (and even if there would
> be, it should not matter).
>
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/blend.c
>> @@ -0,0 +1,63 @@
>> +/* Test correctness of size 3 store groups permutation.  */
>> +/* { dg-do run } */
>
> The testcase as is does not fail even with unfixed compiler.  I had to add
> some dg-additional-options.
>
>> +  bar(2, q);
>> +  for (i = 0; i < N; i++)
>> +    if (q[0].a[i].f != 0 || q[0].a[i].c != i || q[0].a[i].p != -1)
>> +      return 1;
>
> Furthermore, tests in the testsuite should fail through abort ()
> / __builtin_abort () instead of just returning non-zero exit code.
>
> So, here is complete updated patch I'm going to bootstrap/regtest.
>
> 2014-12-10  Jakub Jelinek  <jakub@redhat.com>
>             Evgeny Stupachenko  <evstupac@gmail.com>
>
>         * config/i386/i386.c (expand_vec_perm_pblendv): If not testing_p,
>         set dcopy.target to a new pseudo.
>
>         * gcc.dg/vect/pr64252.c: New test.
>         * gcc.dg/pr64252.c: New test.
>         * gcc.target/i386/avx2-pr64252.c: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/i386.c.jj   2014-12-10 09:45:15.000000000 +0100
> +++ gcc/config/i386/i386.c      2014-12-10 11:38:12.530795610 +0100
> @@ -47554,6 +47554,8 @@ expand_vec_perm_pblendv (struct expand_v
>      dcopy.op0 = dcopy.op1 = d->op1;
>    else
>      dcopy.op0 = dcopy.op1 = d->op0;
> +  if (!d->testing_p)
> +    dcopy.target = gen_reg_rtx (vmode);
>    dcopy.one_operand_p = true;
>
>    for (i = 0; i < nelt; ++i)
> --- gcc/testsuite/gcc.dg/vect/pr64252.c.jj      2014-12-10 11:42:47.669991028 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr64252.c 2014-12-10 11:47:43.895818223 +0100
> @@ -0,0 +1,66 @@
> +/* PR target/64252 */
> +/* Test correctness of size 3 store groups permutation.  */
> +/* { dg-do run } */
> +/* { dg-additional-options "-O3" } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +
> +#include "tree-vect.h"
> +
> +#define N 50
> +
> +enum num3
> +{
> +  a, b, c
> +};
> +
> +struct flags
> +{
> +  enum num3 f;
> +  unsigned int c;
> +  unsigned int p;
> +};
> +
> +struct flagsN
> +{
> +  struct flags a[N];
> +};
> +
> +void
> +bar (int n, struct flagsN *ff)
> +{
> +  struct flagsN *fc;
> +  for (fc = ff + 1; fc < (ff + n); fc++)
> +    {
> +      int i;
> +      for (i = 0; i < N; ++i)
> +       {
> +         ff->a[i].f = 0;
> +         ff->a[i].c = i;
> +         ff->a[i].p = -1;
> +       }
> +      for (i = 0; i < n; i++)
> +       {
> +         int j;
> +         for (j = 0; j < N - n; ++j)
> +           {
> +             fc->a[i + j].f = 0;
> +             fc->a[i + j].c = j + i;
> +             fc->a[i + j].p = -1;
> +           }
> +       }
> +    }
> +}
> +
> +struct flagsN q[2];
> +
> +int main()
> +{
> +  int i;
> +  check_vect ();
> +  bar(2, q);
> +  for (i = 0; i < N; i++)
> +    if (q[0].a[i].f != 0 || q[0].a[i].c != i || q[0].a[i].p != -1)
> +      abort ();
> +  return 0;
> +}
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> --- gcc/testsuite/gcc.dg/pr64252.c.jj   2014-12-10 11:26:05.649467180 +0100
> +++ gcc/testsuite/gcc.dg/pr64252.c      2014-12-10 11:25:27.057139759 +0100
> @@ -0,0 +1,30 @@
> +/* PR target/64252 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +typedef unsigned int V __attribute__((vector_size (32)));
> +
> +__attribute__((noinline, noclone)) void
> +foo (V *a, V *b, V *c, V *d, V *e)
> +{
> +  V t = __builtin_shuffle (*a, *b, *c);
> +  V v = __builtin_shuffle (t, (V) { ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U }, (V) { 0, 1, 8, 3, 4, 5, 9, 7 });
> +  v = v + *d;
> +  *e = v;
> +}
> +
> +int
> +main ()
> +{
> +  V a, b, c, d, e;
> +  int i;
> +  a = (V) { 1, 2, 3, 4, 5, 6, 7, 8 };
> +  b = (V) { 9, 10, 11, 12, 13, 14, 15, 16 };
> +  c = (V) { 1, 3, 5, 7, 9, 11, 13, 15 };
> +  d = (V) { 0, 0, 0, 0, 0, 0, 0, 0 };
> +  foo (&a, &b, &c, &d, &e);
> +  for (i = 0; i < 8; i++)
> +    if (e[i] != ((i == 2 || i == 6) ? ~0U : 2 + 2 * i))
> +      __builtin_abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-pr64252.c.jj     2014-12-10 11:27:36.868877426 +0100
> +++ gcc/testsuite/gcc.target/i386/avx2-pr64252.c        2014-12-10 11:30:49.500520279 +0100
> @@ -0,0 +1,15 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx2" } */
> +/* { dg-require-effective-target avx2 } */
> +
> +#include "avx2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/pr64252.c"
> +
> +static void
> +avx2_test (void)
> +{
> +  do_main ();
> +}
>
>
>         Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]