This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC, RFH PATCH, i386] Fix gcc.target/i386/pr61403.c FAIL with -mavx2


On Thu, Oct 02, 2014 at 08:34:40PM +0200, Uros Bizjak wrote:
> Index: i386.c
> ===================================================================
> --- i386.c      (revision 215802)
> +++ i386.c      (working copy)
> @@ -43407,8 +43407,10 @@ expand_vec_perm_pblendv (struct expand_vec_perm_d
>       AVX and AVX2 as they require more than 2 instructions.  */
>    if (d->one_operand_p)
>      return false;
> -  if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
> +  if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
>      ;
> +  else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
> +    ;
>    else
>      return false;
> 
> --cut here--
> 
> The comment above expand_vec_perm_pblendv claims that:
> 
>   /* Use the same checks as in expand_vec_perm_blend, but skipping
>      AVX and AVX2 as they require more than 2 instructions.  */

The comment is mostly right though, I'd say "as they sometimes require
more than 2 instructions".

> BTW: I have no access to avx2 target, so I can't test the patch with a
> runtime tests. OTOH, it doesn't ICE for "GCC_TEST_RUN_EXPENSIVE=1 make
> check-gcc RUNTESTFLAGS='--target_board=unix/-mavx2
> dg-torture.exp=vshuf*.c'".

Even the expensive testsuite has very limited coverage.
As I wanted to prove your patch will ICE, I wrote following generator:

#ifndef ITYPE
#define ITYPE TYPE
#endif
#define S2(X) #X
#define S(X) S2(X)

int
main ()
{
  int i, j, nelt = 32 / sizeof (TYPE);
  printf (
"typedef " S(TYPE) " V __attribute__ ((vector_size (32)));\n"
"typedef " S(ITYPE) " VI __attribute__ ((vector_size (32)));\n"
"V a, b, c;\n"
"\n"
"#define T(n, m...) void foo##n (void) { c = __builtin_shuffle (a, b, (VI) m); }\n"
"#define S(n, m...) T(n, m)\n");
  for (i = 0; i < 100000; i++)
    {
      printf ("S (__LINE__, { ");
      for (j = 0; j < nelt; j++)
        {
          int k = random () & 3;
          int v = j;
          if (k & 1)
	    v = ((k & 2) ? nelt : 0) + (random () & (nelt - 1));
	  printf ("%d%s", v, j < (nelt - 1) ? ", " : " })\n");
        }
    }
}

which can be compiled e.g. with
-DTYPE=char
-DTYPE=short
-DTYPE=int
-DTYPE=long
-DTYPE=float -DITYPE=int
-DTYPE=double -DITYPE=long
and then in each case generate 100000 tests (sort -u on it plus manual fixup
can decrease that, for the V4DI/V4DF cases substantially).  The first one
triggered almost immediately an ICE, added to vshuf-32.inc (non-expensive).

With the following updated patch all those generated testcases don't ICE
(-mavx2 for the first four, -mavx for the last two).

Also tested with
GCC_TEST_RUN_EXPENSIVE=1 make check-gcc RUNTESTFLAGS='--target_board=unix/-mavx2 dg-torture.exp=vshuf*.c'

The pr61403.c testcase can be simplified into:
typedef float V __attribute__ ((vector_size (32)));
typedef int VI __attribute__ ((vector_size (32)));
V a, b, c;

#define T(n, m...) void foo##n (void) { c = __builtin_shuffle (a, b, (VI) m); }
T (0, { 0, 1, 2, 3, 4, 5, 10, 13 })
T (1, { 0, 1, 2, 3, 4, 8, 11, 14 })
T (2, { 0, 1, 2, 3, 4, 9, 12, 15 })
T (3, { 0, 13, 2, 3, 14, 5, 6, 15 })
T (4, { 0, 1, 8, 3, 4, 9, 6, 7 })
T (5, { 0, 3, 11, 0, 4, 12, 0, 5 })
T (6, { 0, 3, 6, 9, 12, 15, 0, 0 })
T (7, { 0, 8, 0, 1, 9, 0, 2, 10 })
T (8, { 10, 1, 2, 11, 4, 5, 12, 7 })
T (9, { 13, 0, 6, 14, 0, 7, 15, 0 })
T (10, { 1, 4, 7, 10, 13, 0, 0, 0 })
T (11, { 2, 5, 8, 11, 14, 0, 0, 0 })
permutations, where both your and my patch optimize
foo{0,1,2,3,4,8}.

2014-10-03  Jakub Jelinek  <jakub@redhat.com>
	    Uros Bizjak  <ubizjak@gmail.com>

	PR tree-optimization/61403
	* config/i386/i386.c (expand_vec_perm_palignr): Fix a spelling
	error in comment.  Also optimize 256-bit vectors for AVX2
	or AVX (floating vectors only), provided the first permutation
	can be performed in one insn.

	* gcc.dg/torture/vshuf-32.inc: Add a new test 29.

--- gcc/config/i386/i386.c.jj	2014-10-03 09:26:14.000000000 +0200
+++ gcc/config/i386/i386.c	2014-10-03 12:39:24.040185310 +0200
@@ -43422,7 +43422,7 @@ expand_vec_perm_palignr (struct expand_v
 
 /* A subroutine of ix86_expand_vec_perm_const_1.  Try to simplify
    the permutation using the SSE4_1 pblendv instruction.  Potentially
-   reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv.  */
+   reduces permutation from 2 pshufb and or to 1 pshufb and pblendv.  */
 
 static bool
 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
@@ -43432,11 +43432,14 @@ expand_vec_perm_pblendv (struct expand_v
   enum machine_mode vmode = d->vmode;
   bool ok;
 
-  /* Use the same checks as in expand_vec_perm_blend, but skipping
-     AVX and AVX2 as they require more than 2 instructions.  */
+  /* Use the same checks as in expand_vec_perm_blend.  */
   if (d->one_operand_p)
     return false;
-  if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
+  if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
+    ;
+  else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
+    ;
+  else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
     ;
   else
     return false;
@@ -43458,7 +43461,7 @@ expand_vec_perm_pblendv (struct expand_v
      respective lanes and 8 >= 8, but 2 not.  */
   if (which != 1 && which != 2)
     return false;
-  if (d->testing_p)
+  if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
     return true;
 
   /* First we apply one operand permutation to the part where
@@ -43474,7 +43477,12 @@ expand_vec_perm_pblendv (struct expand_v
     dcopy.perm[i] = d->perm[i] & (nelt - 1);
 
   ok = expand_vec_perm_1 (&dcopy);
-  gcc_assert (ok);
+  if (GET_MODE_SIZE (vmode) != 16 && !ok)
+    return false;
+  else
+    gcc_assert (ok);
+  if (d->testing_p)
+    return true;
 
   /* Next we put permuted elements into their positions.  */
   dcopy1 = *d;
--- gcc/testsuite/gcc.dg/torture/vshuf-32.inc.jj	2014-10-03 09:26:14.000000000 +0200
+++ gcc/testsuite/gcc.dg/torture/vshuf-32.inc	2014-10-03 12:34:26.780882721 +0200
@@ -28,7 +28,8 @@ T (24,	0, 3, 6, 9, 12, 15, 18, 21, 24, 2
 T (25,	11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42) \
 T (26,	21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52) \
 T (27,	22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53) \
-T (28,	38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 1, 2, 3, 4, 5, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 16, 17, 18, 19, 20, 21)
+T (28,	38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 1, 2, 3, 4, 5, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 16, 17, 18, 19, 20, 21) \
+T (29,	0, 43, 2, 3, 57, 5, 6, 7, 8, 53, 40, 11, 12, 13, 42, 15, 16, 40, 18, 19, 20, 21, 22, 23, 24, 25, 36, 58, 36, 29, 30, 31)
 #define EXPTESTS \
 T (116,	13, 38, 47, 3, 17, 8, 38, 20, 59, 61, 39, 26, 7, 49, 63, 43, 57, 16, 40, 19, 4, 32, 27, 7, 52, 19, 46, 55, 36, 41, 48, 6) \
 T (117,	39, 35, 59, 20, 56, 18, 58, 63, 57, 14, 2, 16, 5, 61, 35, 4, 53, 9, 52, 51, 27, 33, 61, 12, 3, 35, 36, 40, 37, 7, 45, 42) \


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]