[PATCH] Fix vectorizable_mask_load_store (PR tree-optimization/59591)

Richard Biener rguenther@suse.de
Mon Dec 30 16:54:00 GMT 2013


Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>This patch fixes masked gather load vectorization if the narrowing (or
>widening) gather needs to be used (one where either the index is 32-bit
>and
>loaded type 64-bit or index is 64-bit and loaded type 32-bit).
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, Kyrill has also
>kindly tested the testcases on Haswell CPU.  Ok for trunk?

Ok.

Thanks,
Richard.

>2013-12-30  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/59591
>	* tree-vect-stmts.c (vectorizable_mask_load_store): Fix up handling
>	of modifier = NARROW masked gathers.
>	(permute_vec_elements): Use gimple_get_lhs instead of
>	gimple_assign_lhs.
>
>	* gcc.dg/vect/pr59591-1.c: New test.
>	* gcc.dg/vect/pr59591-2.c: New test.
>	* gcc.target/i386/pr59591-1.c: New test.
>	* gcc.target/i386/pr59591-2.c: New test.
>
>--- gcc/tree-vect-stmts.c.jj	2013-12-27 19:24:33.000000000 +0100
>+++ gcc/tree-vect-stmts.c	2013-12-30 13:10:24.366030631 +0100
>@@ -1855,14 +1855,24 @@ vectorizable_mask_load_store (gimple stm
>       tree vec_oprnd0 = NULL_TREE, op;
>       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
>       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
>-      tree ptr, vec_mask = NULL_TREE, mask_op, var, scale;
>+      tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
>       tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
>+      tree mask_perm_mask = NULL_TREE;
>       edge pe = loop_preheader_edge (loop);
>       gimple_seq seq;
>       basic_block new_bb;
>       enum { NARROW, NONE, WIDEN } modifier;
>     int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
> 
>+      rettype = TREE_TYPE (TREE_TYPE (gather_decl));
>+      srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
>+      ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
>+      idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
>+      masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
>+      scaletype = TREE_VALUE (arglist);
>+      gcc_checking_assert (types_compatible_p (srctype, rettype)
>+			   && types_compatible_p (srctype, masktype));
>+
>       if (nunits == gather_off_nunits)
> 	modifier = NONE;
>       else if (nunits == gather_off_nunits / 2)
>@@ -1888,19 +1898,14 @@ vectorizable_mask_load_store (gimple stm
> 	  perm_mask = vect_gen_perm_mask (vectype, sel);
> 	  gcc_assert (perm_mask != NULL_TREE);
> 	  ncopies *= 2;
>+	  for (i = 0; i < nunits; ++i)
>+	    sel[i] = i | gather_off_nunits;
>+	  mask_perm_mask = vect_gen_perm_mask (masktype, sel);
>+	  gcc_assert (mask_perm_mask != NULL_TREE);
> 	}
>       else
> 	gcc_unreachable ();
> 
>-      rettype = TREE_TYPE (TREE_TYPE (gather_decl));
>-      srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
>-      ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
>-      idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
>-      masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
>-      scaletype = TREE_VALUE (arglist);
>-      gcc_checking_assert (types_compatible_p (srctype, rettype)
>-			   && types_compatible_p (srctype, masktype));
>-
>vec_dest = vect_create_destination_var (gimple_call_lhs (stmt),
>vectype);
> 
>       ptr = fold_convert (ptrtype, gather_base);
>@@ -1940,28 +1945,35 @@ vectorizable_mask_load_store (gimple stm
> 	      op = var;
> 	    }
> 
>-	  if (j == 0)
>-	    vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
>+	  if (mask_perm_mask && (j & 1))
>+	    mask_op = permute_vec_elements (mask_op, mask_op,
>+					    mask_perm_mask, stmt, gsi);
> 	  else
> 	    {
>-	      vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
>&def_stmt,
>-				  &def, &dt);
>-	      vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
>-	    }
>+	      if (j == 0)
>+		vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
>+	      else
>+		{
>+		  vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
>+				      &def_stmt, &def, &dt);
>+		  vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
>+		}
> 
>-	  mask_op = vec_mask;
>-	  if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
>-	    {
>-	      gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
>-			  == TYPE_VECTOR_SUBPARTS (masktype));
>-	      var = vect_get_new_vect_var (masktype, vect_simple_var, NULL);
>-	      var = make_ssa_name (var, NULL);
>-	      mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
>-	      new_stmt
>-		= gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
>-						mask_op, NULL_TREE);
>-	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
>-	      mask_op = var;
>+	      mask_op = vec_mask;
>+	      if (!useless_type_conversion_p (masktype, TREE_TYPE
>(vec_mask)))
>+		{
>+		  gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
>+			      == TYPE_VECTOR_SUBPARTS (masktype));
>+		  var = vect_get_new_vect_var (masktype, vect_simple_var,
>+					       NULL);
>+		  var = make_ssa_name (var, NULL);
>+		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
>+		  new_stmt
>+		    = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
>+						    mask_op, NULL_TREE);
>+		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
>+		  mask_op = var;
>+		}
> 	    }
> 
> 	  new_stmt
>@@ -5446,7 +5458,7 @@ permute_vec_elements (tree x, tree y, tr
>   tree perm_dest, data_ref;
>   gimple perm_stmt;
> 
>-  perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt),
>vectype);
>+  perm_dest = vect_create_destination_var (gimple_get_lhs (stmt),
>vectype);
>   data_ref = make_ssa_name (perm_dest, NULL);
> 
>   /* Generate the permute statement.  */
>--- gcc/testsuite/gcc.dg/vect/pr59591-1.c.jj	2013-12-30
>12:59:27.012435290 +0100
>+++ gcc/testsuite/gcc.dg/vect/pr59591-1.c	2013-12-30 13:08:40.386562796
>+0100
>@@ -0,0 +1,55 @@
>+/* PR tree-optimization/59591 */
>+/* { dg-do run } */
>+/* { dg-additional-options "-fopenmp-simd" } */
>+
>+#ifndef CHECK_H
>+#include "tree-vect.h"
>+#endif
>+
>+extern void abort (void);
>+
>+int p[256], q[256], r[256], t[256];
>+
>+__attribute__((noinline, noclone)) void
>+foo (void)
>+{
>+  int i;
>+  #pragma omp simd safelen(64)
>+  for (i = 0; i < 256; i++)
>+    if (r[i] > 32)
>+      t[i] = p[q[i] * 3L + 2L];
>+}
>+
>+__attribute__((noinline, noclone)) void
>+bar (void)
>+{
>+  int i;
>+  for (i = 0; i < 256; i++)
>+    {
>+      r[i] = ((i >> 2) & (1 << (i & 3))) ? 32 + i : 32 - i;
>+      q[i] = r[i] > 32 ? ((i * 7) % 84) : 99 + i;
>+      p[i] = i * 11;
>+      t[i] = i * 13;
>+    }
>+  foo ();
>+  for (i = 0; i < 256; i++)
>+    if ((i >> 2) & (1 << (i & 3)))
>+      {
>+	if (t[i] != (((i * 7) % 84) * 3 + 2) * 11)
>+	  abort ();
>+      }
>+    else if (t[i] != i * 13)
>+      abort ();
>+}
>+
>+#ifndef CHECK_H
>+int
>+main ()
>+{
>+  check_vect ();
>+  bar ();
>+  return 0;
>+}
>+#endif
>+
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>--- gcc/testsuite/gcc.dg/vect/pr59591-2.c.jj	2013-12-30
>12:59:35.249391492 +0100
>+++ gcc/testsuite/gcc.dg/vect/pr59591-2.c	2013-12-30 13:08:58.791467078
>+0100
>@@ -0,0 +1,56 @@
>+/* PR tree-optimization/59591 */
>+/* { dg-do run } */
>+/* { dg-additional-options "-fopenmp-simd" } */
>+
>+#ifndef CHECK_H
>+#include "tree-vect.h"
>+#endif
>+
>+extern void abort (void);
>+
>+long long int p[256], r[256], t[256];
>+int q[256];
>+
>+__attribute__((noinline, noclone)) void
>+foo (void)
>+{
>+  int i;
>+  #pragma omp simd safelen(64)
>+  for (i = 0; i < 256; i++)
>+    if (r[i] > 32LL)
>+      t[i] = p[q[i]];
>+}
>+
>+__attribute__((noinline, noclone)) void
>+bar (void)
>+{
>+  int i;
>+  for (i = 0; i < 256; i++)
>+    {
>+      r[i] = ((i >> 2) & (1 << (i & 3))) ? 32 + i : 32 - i;
>+      q[i] = r[i] > 32 ? ((i * 7) % 256) : 258 + i;
>+      p[i] = i * 11;
>+      t[i] = i * 13;
>+    }
>+  foo ();
>+  for (i = 0; i < 256; i++)
>+    if ((i >> 2) & (1 << (i & 3)))
>+      {
>+	if (t[i] != ((i * 7) % 256) * 11)
>+	  abort ();
>+      }
>+    else if (t[i] != i * 13)
>+      abort ();
>+}
>+
>+#ifndef CHECK_H
>+int
>+main ()
>+{
>+  check_vect ();
>+  bar ();
>+  return 0;
>+}
>+#endif
>+
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>--- gcc/testsuite/gcc.target/i386/pr59591-1.c.jj	2013-12-30
>13:05:51.243440518 +0100
>+++ gcc/testsuite/gcc.target/i386/pr59591-1.c	2013-12-30
>13:09:16.100377227 +0100
>@@ -0,0 +1,17 @@
>+/* PR tree-optimization/59591 */
>+/* { dg-do run } */
>+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fno-vect-cost-model" } */
>+/* { dg-require-effective-target avx2 } */
>+
>+#define CHECK_H "avx2-check.h"
>+#define TEST avx2_test
>+
>+#include "../../gcc.dg/vect/pr59591-1.c"
>+
>+#include CHECK_H
>+
>+static void
>+TEST (void)
>+{
>+  bar ();
>+}
>--- gcc/testsuite/gcc.target/i386/pr59591-2.c.jj	2013-12-30
>13:06:13.000328094 +0100
>+++ gcc/testsuite/gcc.target/i386/pr59591-2.c	2013-12-30
>13:09:24.093319805 +0100
>@@ -0,0 +1,17 @@
>+/* PR tree-optimization/59591 */
>+/* { dg-do run } */
>+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fno-vect-cost-model" } */
>+/* { dg-require-effective-target avx2 } */
>+
>+#define CHECK_H "avx2-check.h"
>+#define TEST avx2_test
>+
>+#include "../../gcc.dg/vect/pr59591-2.c"
>+
>+#include CHECK_H
>+
>+static void
>+TEST (void)
>+{
>+  bar ();
>+}
>
>	Jakub




More information about the Gcc-patches mailing list