This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: combine permutations in gimple


On Fri, 10 Aug 2012, Marc Glisse wrote:

this patch detects permutations of permutations and merges them. It also canonicalizes permutations a bit more.

There are several issues with this patch:

1) I am not sure we always want to combine permutations. Indeed, someone (user? vectorizer?) may have written 2 permutations to help the backend generate optimal code, whereas it will do a bad job on the complicated combined permutation. At tree level, I am not sure what can be done except possibly restrict the optimization to the case where the combined permutation is the identity. At the RTL level, we could compare what insns are generated, but that's going to be painful (doing anything with permutations at the RTL level is painful).

2) I don't understand how the cleanup works in tree-ssa-forwprop.c. I copied some fold/update/remove lines from other simplifications, but I don't know if they are right.

3) In a first version of the patch, where I had SSA_NAME_DEF_STMT instead of get_prop_source_stmt, I got an ICE in one of the torture vectorization testcases. It happened in expand_expr_real_2, because it asserts that expand_vec_perm will never fail. However, expand_vec_perm does return NULL_RTX sometimes. Here it was in V16QImode, with the permutation {0,0,2,2,4,...,14,14}. maybe_expand_insn can't handle it directly (that's ok I guess), but then expand_vec_perm sees VOIDmode and exits instead of trying other ways. I don't know if there is a latent bug or if (more likely) my patch may produce trees with wrong modes.

4) Is this the right place?

This isn't the transformation I am most interested in, but it is a first step to see if the direction is right.

Hello,


there was yet another issue with the version I posted: the testcase was trivial so I didn't notice that it didn't perform the optimization at all...

Here is a new version. It seems a bit strange to me that there are plenty of functions that check for single-use variables, but there isn't any that checks for variables used in a single statement (but possibly twice). So I may be doing something strange, but it seems to be the natural test here.

Happily passed bootstrap+regtest.

2012-08-11 Marc Glisse <marc.glisse@inria.fr>

gcc/
	* fold-const.c (fold_ternary_loc): Detect identity permutations.
	Canonicalize permutations more.
	* tree-ssa-forwprop.c (is_only_used_in): New function.
	(simplify_permutation): Likewise.
	(ssa_forward_propagate_and_combine): Call it.

gcc/testsuite/
	* gcc.dg/tree-ssa/forwprop-19.c: New testcase.

--
Marc Glisse
Index: gcc/tree-ssa-forwprop.c
===================================================================
--- gcc/tree-ssa-forwprop.c	(revision 190311)
+++ gcc/tree-ssa-forwprop.c	(working copy)
@@ -2569,20 +2569,97 @@ combine_conversions (gimple_stmt_iterato
 	      gimple_assign_set_rhs_code (stmt, CONVERT_EXPR);
 	      update_stmt (stmt);
 	      return remove_prop_source_from_use (op0) ? 2 : 1;
 	    }
 	}
     }
 
   return 0;
 }
 
+/* Return true if VAR has no nondebug use but in stmt.  */
+static bool
+is_only_used_in (const_tree var, const_gimple stmt)
+{
+  const ssa_use_operand_t *const ptr0 = &(SSA_NAME_IMM_USE_NODE (var));
+  const ssa_use_operand_t *ptr = ptr0->next;
+
+  for (; ptr != ptr0; ptr = ptr->next)
+    {
+      const_gimple use_stmt = USE_STMT (ptr);
+      if (is_gimple_debug (use_stmt))
+	continue;
+      if (use_stmt != stmt)
+	return false;
+    }
+  return true;
+}
+
+/* Combine two shuffles in a row.  Returns 1 if there were any changes
+   made, 2 if cfg-cleanup needs to run.  Else it returns 0.  */
+ 
+static int
+simplify_permutation (gimple_stmt_iterator *gsi)
+{
+  gimple stmt = gsi_stmt (*gsi);
+  gimple def_stmt;
+  tree op0, op1, op2, op3, mask;
+  enum tree_code code = gimple_assign_rhs_code (stmt);
+  enum tree_code code2;
+  location_t loc = gimple_location (stmt);
+
+  gcc_checking_assert (code == VEC_PERM_EXPR);
+
+  op0 = gimple_assign_rhs1 (stmt);
+  op1 = gimple_assign_rhs2 (stmt);
+  op2 = gimple_assign_rhs3 (stmt);
+
+  if (TREE_CODE (op0) != SSA_NAME)
+    return 0;
+
+  if (TREE_CODE (op2) != VECTOR_CST)
+    return 0;
+
+  if (op0 != op1)
+    return 0;
+
+  def_stmt = SSA_NAME_DEF_STMT (op0);
+  if (!def_stmt || !is_gimple_assign (def_stmt)
+      || !can_propagate_from (def_stmt)
+      || !is_only_used_in (op0, stmt))
+    return 0;
+
+  /* Check that it is only used here. We cannot use has_single_use
+     since the expression is using it twice itself...  */
+
+  code2 = gimple_assign_rhs_code (def_stmt);
+
+  /* Two consecutive shuffles.  */
+  if (code2 == VEC_PERM_EXPR)
+    {
+      op3 = gimple_assign_rhs3 (def_stmt);
+      if (TREE_CODE (op3) != VECTOR_CST)
+	return 0;
+      mask = fold_build3_loc (loc, VEC_PERM_EXPR, TREE_TYPE (op3),
+			      op3, op3, op2);
+      gcc_assert (TREE_CODE (mask) == VECTOR_CST);
+      gimple_assign_set_rhs1 (stmt, gimple_assign_rhs1 (def_stmt));
+      gimple_assign_set_rhs2 (stmt, gimple_assign_rhs2 (def_stmt));
+      gimple_assign_set_rhs3 (stmt, mask);
+      fold_stmt_inplace (gsi);
+      update_stmt (stmt);
+      return remove_prop_source_from_use (op0) ? 2 : 1;
+    }
+
+  return false;
+}
+
 /* Main entry point for the forward propagation and statement combine
    optimizer.  */
 
 static unsigned int
 ssa_forward_propagate_and_combine (void)
 {
   basic_block bb;
   unsigned int todoflags = 0;
 
   cfg_changed = false;
@@ -2731,20 +2808,27 @@ ssa_forward_propagate_and_combine (void)
 		  changed = associate_pointerplus (&gsi);
 		else if (CONVERT_EXPR_CODE_P (code)
 			 || code == FLOAT_EXPR
 			 || code == FIX_TRUNC_EXPR)
 		  {
 		    int did_something = combine_conversions (&gsi);
 		    if (did_something == 2)
 		      cfg_changed = true;
 		    changed = did_something != 0;
 		  }
+		else if (code == VEC_PERM_EXPR)
+		  {
+		    int did_something = simplify_permutation (&gsi);
+		    if (did_something == 2)
+		      cfg_changed = true;
+		    changed = did_something != 0;
+		  }
 		break;
 	      }
 
 	    case GIMPLE_SWITCH:
 	      changed = simplify_gimple_switch (stmt);
 	      break;
 
 	    case GIMPLE_COND:
 	      {
 		int did_something;
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 190311)
+++ gcc/fold-const.c	(working copy)
@@ -14148,58 +14148,96 @@ fold_ternary_loc (location_t loc, enum t
 	return fold_build2_loc (loc, PLUS_EXPR, type,
 				const_binop (MULT_EXPR, arg0, arg1), arg2);
       if (integer_zerop (arg2))
 	return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
 
       return fold_fma (loc, type, arg0, arg1, arg2);
 
     case VEC_PERM_EXPR:
       if (TREE_CODE (arg2) == VECTOR_CST)
 	{
-	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask;
 	  unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
 	  tree t;
 	  bool need_mask_canon = false;
+	  bool all_in_vec0 = true;
+	  bool all_in_vec1 = true;
+	  bool maybe_identity = true;
+	  bool single_arg = (op0 == op1);
+	  bool changed = false;
 
+	  mask = single_arg ? (nelts - 1) : (2 * nelts - 1);
 	  gcc_assert (nelts == VECTOR_CST_NELTS (arg2));
 	  for (i = 0; i < nelts; i++)
 	    {
 	      tree val = VECTOR_CST_ELT (arg2, i);
 	      if (TREE_CODE (val) != INTEGER_CST)
 		return NULL_TREE;
 
-	      sel[i] = TREE_INT_CST_LOW (val) & (2 * nelts - 1);
+	      sel[i] = TREE_INT_CST_LOW (val) & mask;
 	      if (TREE_INT_CST_HIGH (val)
 		  || ((unsigned HOST_WIDE_INT)
 		      TREE_INT_CST_LOW (val) != sel[i]))
 		need_mask_canon = true;
+
+	      if (sel[i] < nelts)
+		all_in_vec1 = false;
+	      else
+		all_in_vec0 = false;
+
+	      if ((sel[i] & (nelts-1)) != i)
+		maybe_identity = false;
+	    }
+
+	  if (maybe_identity)
+	    {
+	      if (all_in_vec0)
+		return op0;
+	      if (all_in_vec1)
+		return op1;
 	    }
 
 	  if ((TREE_CODE (arg0) == VECTOR_CST
 	       || TREE_CODE (arg0) == CONSTRUCTOR)
 	      && (TREE_CODE (arg1) == VECTOR_CST
 		  || TREE_CODE (arg1) == CONSTRUCTOR))
 	    {
 	      t = fold_vec_perm (type, arg0, arg1, sel);
 	      if (t != NULL_TREE)
 		return t;
 	    }
 
+	  if (all_in_vec0)
+	    op1 = op0;
+	  else if (all_in_vec1)
+	    {
+	      op0 = op1;
+	      for (i = 0; i < nelts; i++)
+		sel[i] -= nelts;
+	      need_mask_canon = true;
+	    }
+
+	  if (op0 == op1 && !single_arg)
+	    changed = true;
+
 	  if (need_mask_canon && arg2 == op2)
 	    {
 	      tree *tsel = XALLOCAVEC (tree, nelts);
 	      tree eltype = TREE_TYPE (TREE_TYPE (arg2));
 	      for (i = 0; i < nelts; i++)
 		tsel[i] = build_int_cst (eltype, sel[i]);
-	      t = build_vector (TREE_TYPE (arg2), tsel);
-	      return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t);
+	      op2 = build_vector (TREE_TYPE (arg2), tsel);
+	      changed = true;
 	    }
+
+	  if (changed)
+	    return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
 	}
       return NULL_TREE;
 
     default:
       return NULL_TREE;
     } /* switch (code) */
 }
 
 /* Perform constant folding and related simplification of EXPR.
    The related simplifications include x*1 => x, x*0 => 0, etc.,
Index: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c	(revision 0)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop2" } */
+
+typedef int vec __attribute__((vector_size (2 * sizeof (int))));
+void f (vec *x1, vec *x2)
+{
+  vec m = { 3, 1 };
+  vec n = { 1, 8 };
+  vec y = __builtin_shuffle (*x1, *x2, n);
+  vec z = __builtin_shuffle (y, m);
+  *x1 = z;
+}
+
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 0, 0 }" "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "x2" 1 "forwprop2" } } */
+/* { dg-final { cleanup-tree-dump "forwprop2" } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Author Date Id Revision URL


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]