[PATCH] Extend vect_recog_bool_pattern also to stores into bool memory (PR tree-optimization/50596, take 2)

Jakub Jelinek jakub@redhat.com
Mon Oct 24 17:05:00 GMT 2011


On Mon, Oct 24, 2011 at 04:09:49PM +0200, Richard Guenther wrote:
> This one bootstraps and regtests fine on x86_64-unknown-linux-gnu.
> I didn't find a good pattern to split out, eventually how we call
> the vectorizable_* routines should be re-factored a bit.

Here is an updated patch on top of what you've checked in.
I kept the explicit listing of what tree codes are allowed in
vectorizable_store, rather than using handled_component_p, because
we don't handle stuff like BIT_FIELD_REF and unlikely ARRAY_RANGE_REF.
And I have kept the useless_type_conversion_p check and changed
build_nonstandard_integer_type in the two helper routines, so that
for char/signed char/unsigned char comparisons there is no cast needed.

Is this ok if it passes bootstrap/regtest?

2011-10-24  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/50596
	* tree-vect-stmts.c (vect_mark_relevant): Only use
	FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME.
	(vectorizable_store): If is_pattern_stmt_p look through
	VIEW_CONVERT_EXPR on lhs.
	* tree-vect-patterns.c (check_bool_pattern, adjust_bool_pattern):
	Use unsigned type instead of signed.
	(vect_recog_bool_pattern): Optimize also stores into bool memory in
	addition to casts from bool to integral types.
	(vect_mark_pattern_stmts): If pattern_stmt already has vinfo
	created, don't create it again.

	* gcc.dg/vect/vect-cond-10.c: New test.

--- gcc/tree-vect-stmts.c.jj	2011-10-24 17:48:26.000000000 +0200
+++ gcc/tree-vect-stmts.c	2011-10-24 17:57:39.000000000 +0200
@@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **w
           /* This use is out of pattern use, if LHS has other uses that are
              pattern uses, we should mark the stmt itself, and not the pattern
              stmt.  */
-          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
-            {
-              if (is_gimple_debug (USE_STMT (use_p)))
-                continue;
-              use_stmt = USE_STMT (use_p);
+	  if (TREE_CODE (lhs) == SSA_NAME)
+	    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+	      {
+		if (is_gimple_debug (USE_STMT (use_p)))
+		  continue;
+		use_stmt = USE_STMT (use_p);
 
-              if (vinfo_for_stmt (use_stmt)
-                  && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
-                {
-                  found = true;
-                  break;
-                }
-            }
+		if (vinfo_for_stmt (use_stmt)
+		    && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
+		  {
+		    found = true;
+		    break;
+		  }
+	      }
         }
 
       if (!found)
@@ -3722,6 +3723,9 @@ vectorizable_store (gimple stmt, gimple_
     return false;
 
   scalar_dest = gimple_assign_lhs (stmt);
+  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
+      && is_pattern_stmt_p (stmt_info))
+    scalar_dest = TREE_OPERAND (scalar_dest, 0);
   if (TREE_CODE (scalar_dest) != ARRAY_REF
       && TREE_CODE (scalar_dest) != INDIRECT_REF
       && TREE_CODE (scalar_dest) != COMPONENT_REF
--- gcc/tree-vect-patterns.c.jj	2011-10-24 12:21:14.000000000 +0200
+++ gcc/tree-vect-patterns.c	2011-10-24 17:57:39.000000000 +0200
@@ -1617,7 +1617,7 @@ check_bool_pattern (tree var, loop_vec_i
 	    {
 	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
 	      tree itype
-		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0);
+		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
 	      vecitype = get_vectype_for_scalar_type (itype);
 	      if (vecitype == NULL_TREE)
 		return false;
@@ -1813,11 +1813,11 @@ adjust_bool_pattern (tree var, tree out_
     default:
       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
-	  || TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
 	{
 	  enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
 	  itype
-	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0);
+	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
 	}
       else
 	itype = TREE_TYPE (rhs1);
@@ -1933,6 +1933,44 @@ vect_recog_bool_pattern (VEC (gimple, he
       VEC_safe_push (gimple, heap, *stmts, last_stmt);
       return pattern_stmt;
     }
+  else if (rhs_code == SSA_NAME
+	   && STMT_VINFO_DATA_REF (stmt_vinfo))
+    {
+      stmt_vec_info pattern_stmt_info;
+      vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+      gcc_assert (vectype != NULL_TREE);
+      if (!check_bool_pattern (var, loop_vinfo))
+	return NULL;
+
+      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
+      lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
+      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
+	{
+	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
+	  gimple cast_stmt
+	    = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
+	  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt;
+	  rhs = rhs2;
+	}
+      pattern_stmt
+	= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
+      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
+      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
+      STMT_VINFO_DATA_REF (pattern_stmt_info)
+	= STMT_VINFO_DATA_REF (stmt_vinfo);
+      STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
+	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
+      STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
+      STMT_VINFO_DR_OFFSET (pattern_stmt_info)
+	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
+      STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
+      STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
+	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
+      *type_out = vectype;
+      *type_in = vectype;
+      VEC_safe_push (gimple, heap, *stmts, last_stmt);
+      return pattern_stmt;
+    }
   else
     return NULL;
 }
@@ -1949,19 +1987,22 @@ vect_mark_pattern_stmts (gimple orig_stm
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
   gimple def_stmt;
 
-  set_vinfo_for_stmt (pattern_stmt,
-                      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
-  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
+  if (pattern_stmt_info == NULL)
+    {
+      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
+      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
+    }
+  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
 
   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
-	= STMT_VINFO_DEF_TYPE (orig_stmt_info);
+    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
   STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
   STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
-	= STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
+    = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
   if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
     {
       def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
--- gcc/testsuite/gcc.dg/vect/vect-cond-10.c.jj	2011-10-24 17:52:16.000000000 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-cond-10.c	2011-10-24 17:52:16.000000000 +0200
@@ -0,0 +1,165 @@
+/* { dg-require-effective-target vect_cond_mixed } */
+
+#include "tree-vect.h"
+
+#define N 1024
+float a[N], b[N], c[N], d[N];
+_Bool k[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      k[i] = x & y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = (a[i] < b[i]) & (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      k[i] = x | y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = (a[i] < b[i]) | (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f5 (_Bool *p)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      p[i] = x & y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f6 (_Bool *p)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    p[i] = (a[i] < b[i]) & (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f7 (_Bool *p)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      p[i] = x | y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f8 (_Bool *p)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    p[i] = (a[i] < b[i]) | (c[i] < d[i]);
+}
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      switch (i % 9)
+	{
+	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
+	case 1: a[i] = 0; b[i] = 0; break;
+	case 2: a[i] = i + 1; b[i] = - i - 1; break;
+	case 3: a[i] = i; b[i] = i + 7; break;
+	case 4: a[i] = i; b[i] = i; break;
+	case 5: a[i] = i + 16; b[i] = i + 3; break;
+	case 6: a[i] = - i - 5; b[i] = - i; break;
+	case 7: a[i] = - i; b[i] = - i; break;
+	case 8: a[i] = - i; b[i] = - i - 7; break;
+	}
+    }
+  for (i = 0; i < N; i++)
+    {
+      switch ((i / 9) % 3)
+	{
+	case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break;
+	case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break;
+	case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break;
+	}
+    }
+  f1 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f3 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f4 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f5 (k);
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f6 (k);
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f7 (k);
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f8 (k);
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */


	Jakub



More information about the Gcc-patches mailing list