This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

combine permutations in gimple


Hello,

this patch detects permutations of permutations and merges them. It also canonicalizes permutations a bit more.

There are several issues with this patch:

1) I am not sure we always want to combine permutations. Indeed, someone (user? vectorizer?) may have written 2 permutations to help the backend generate optimal code, whereas it will do a bad job on the complicated combined permutation. At tree level, I am not sure what can be done except possibly restrict the optimization to the case where the combined permutation is the identity. At the RTL level, we could compare what insns are generated, but that's going to be painful (doing anything with permutations at the RTL level is painful).

2) I don't understand how the cleanup works in tree-ssa-forwprop.c. I copied some fold/update/remove lines from other simplifications, but I don't know if they are right.

3) In a first version of the patch, where I had SSA_NAME_DEF_STMT instead of get_prop_source_stmt, I got an ICE in one of the torture vectorization testcases. It happened in expand_expr_real_2, because it asserts that expand_vec_perm will never fail. However, expand_vec_perm does return NULL_RTX sometimes. Here it was in V16QImode, with the permutation {0,0,2,2,4,...,14,14}. maybe_expand_insn can't handle it directly (that's ok I guess), but then expand_vec_perm sees VOIDmode and exits instead of trying other ways. I don't know if there is a latent bug or if (more likely) my patch may produce trees with wrong modes.

4) Is this the right place?

This isn't the transformation I am most interested in, but it is a first step to see if the direction is right.


Happily passed bootstrap+regtest.


2012-08-10 Marc Glisse <marc.glisse@inria.fr>

gcc/
	* fold-const.c (fold_ternary_loc): Detect identity permutations.
	Canonicalize permutations more.
	* tree-ssa-forwprop.c (simplify_permutation): New function.
	(ssa_forward_propagate_and_combine): Call it.

gcc/testsuite/
	* gcc.dg/tree-ssa/forwprop-19.c: New testcase.

--
Marc Glisse
Index: testsuite/gcc.dg/tree-ssa/forwprop-19.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/forwprop-19.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/forwprop-19.c	(revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1" } */
+
+typedef int vec __attribute__((vector_size (2 * sizeof (int))));
+vec f (vec x1, vec x2)
+{
+  vec m = { 2, 1 };
+  vec n = { 1, 8 };
+  vec y = __builtin_shuffle (x1, x2, n);
+  vec z = __builtin_shuffle (y, m);
+  return z;
+}
+
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*x1.*x1.*{ 1, 0 }" "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "forwprop1" } } */
+/* { dg-final { cleanup-tree-dump "forwprop1" } } */

Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-19.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: fold-const.c
===================================================================
--- fold-const.c	(revision 190301)
+++ fold-const.c	(working copy)
@@ -14148,58 +14148,96 @@ fold_ternary_loc (location_t loc, enum t
 	return fold_build2_loc (loc, PLUS_EXPR, type,
 				const_binop (MULT_EXPR, arg0, arg1), arg2);
       if (integer_zerop (arg2))
 	return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
 
       return fold_fma (loc, type, arg0, arg1, arg2);
 
     case VEC_PERM_EXPR:
       if (TREE_CODE (arg2) == VECTOR_CST)
 	{
-	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask;
 	  unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
 	  tree t;
 	  bool need_mask_canon = false;
+	  bool all_in_vec0 = true;
+	  bool all_in_vec1 = true;
+	  bool maybe_identity = true;
+	  bool single_arg = (op0 == op1);
+	  bool changed = false;
 
+	  mask = single_arg ? (nelts - 1) : (2 * nelts - 1);
 	  gcc_assert (nelts == VECTOR_CST_NELTS (arg2));
 	  for (i = 0; i < nelts; i++)
 	    {
 	      tree val = VECTOR_CST_ELT (arg2, i);
 	      if (TREE_CODE (val) != INTEGER_CST)
 		return NULL_TREE;
 
-	      sel[i] = TREE_INT_CST_LOW (val) & (2 * nelts - 1);
+	      sel[i] = TREE_INT_CST_LOW (val) & mask;
 	      if (TREE_INT_CST_HIGH (val)
 		  || ((unsigned HOST_WIDE_INT)
 		      TREE_INT_CST_LOW (val) != sel[i]))
 		need_mask_canon = true;
+
+	      if (sel[i] < nelts)
+		all_in_vec1 = false;
+	      else
+		all_in_vec0 = false;
+
+	      if ((sel[i] & (nelts-1)) != i)
+		maybe_identity = false;
+	    }
+
+	  if (maybe_identity)
+	    {
+	      if (all_in_vec0)
+		return op0;
+	      if (all_in_vec1)
+		return op1;
 	    }
 
 	  if ((TREE_CODE (arg0) == VECTOR_CST
 	       || TREE_CODE (arg0) == CONSTRUCTOR)
 	      && (TREE_CODE (arg1) == VECTOR_CST
 		  || TREE_CODE (arg1) == CONSTRUCTOR))
 	    {
 	      t = fold_vec_perm (type, arg0, arg1, sel);
 	      if (t != NULL_TREE)
 		return t;
 	    }
 
+	  if (all_in_vec0)
+	    op1 = op0;
+	  else if (all_in_vec1)
+	    {
+	      op0 = op1;
+	      for (i = 0; i < nelts; i++)
+		sel[i] -= nelts;
+	      need_mask_canon = true;
+	    }
+
+	  if (op0 == op1 && !single_arg)
+	    changed = true;
+
 	  if (need_mask_canon && arg2 == op2)
 	    {
 	      tree *tsel = XALLOCAVEC (tree, nelts);
 	      tree eltype = TREE_TYPE (TREE_TYPE (arg2));
 	      for (i = 0; i < nelts; i++)
 		tsel[i] = build_int_cst (eltype, sel[i]);
-	      t = build_vector (TREE_TYPE (arg2), tsel);
-	      return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t);
+	      op2 = build_vector (TREE_TYPE (arg2), tsel);
+	      changed = true;
 	    }
+
+	  if (changed)
+	    return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
 	}
       return NULL_TREE;
 
     default:
       return NULL_TREE;
     } /* switch (code) */
 }
 
 /* Perform constant folding and related simplification of EXPR.
    The related simplifications include x*1 => x, x*0 => 0, etc.,
Index: tree-ssa-forwprop.c
===================================================================
--- tree-ssa-forwprop.c	(revision 190301)
+++ tree-ssa-forwprop.c	(working copy)
@@ -2569,20 +2569,74 @@ combine_conversions (gimple_stmt_iterato
 	      gimple_assign_set_rhs_code (stmt, CONVERT_EXPR);
 	      update_stmt (stmt);
 	      return remove_prop_source_from_use (op0) ? 2 : 1;
 	    }
 	}
     }
 
   return 0;
 }
 
+/* Combine two shuffles in a row.  Returns 1 if there were any changes
+   made, 2 if cfg-cleanup needs to run.  Else it returns 0.  */
+ 
+static int
+simplify_permutation (gimple_stmt_iterator *gsi)
+{
+  gimple stmt = gsi_stmt (*gsi);
+  gimple def_stmt;
+  tree op0, op1, op2, op3, mask;
+  enum tree_code code = gimple_assign_rhs_code (stmt);
+  enum tree_code code2;
+  location_t loc = gimple_location (stmt);
+
+  gcc_checking_assert (code == VEC_PERM_EXPR);
+
+  op0 = gimple_assign_rhs1 (stmt);
+  op1 = gimple_assign_rhs2 (stmt);
+  op2 = gimple_assign_rhs3 (stmt);
+
+  if (TREE_CODE (op0) != SSA_NAME)
+    return 0;
+
+  if (TREE_CODE (op2) != VECTOR_CST)
+    return 0;
+
+  if (op0 != op1)
+    return 0;
+
+  def_stmt = get_prop_source_stmt (op0, true, NULL);
+  if (!def_stmt || !can_propagate_from (def_stmt))
+    return 0;
+
+  code2 = gimple_assign_rhs_code (def_stmt);
+
+  /* Two consecutive shuffles.  */
+  if (code2 == VEC_PERM_EXPR)
+    {
+      op3 = gimple_assign_rhs3 (def_stmt);
+      if (TREE_CODE (op3) != VECTOR_CST)
+	return 0;
+      mask = fold_build3_loc (loc, VEC_PERM_EXPR, TREE_TYPE (op3),
+			      op3, op3, op2);
+      gcc_assert (TREE_CODE (mask) == VECTOR_CST);
+      gimple_assign_set_rhs1 (stmt, gimple_assign_rhs1 (def_stmt));
+      gimple_assign_set_rhs2 (stmt, gimple_assign_rhs2 (def_stmt));
+      gimple_assign_set_rhs3 (stmt, mask);
+      fold_stmt_inplace (gsi);
+      update_stmt (stmt);
+      return remove_prop_source_from_use (op0) ? 2 : 1;
+    }
+
+  return false;
+}
+
 /* Main entry point for the forward propagation and statement combine
    optimizer.  */
 
 static unsigned int
 ssa_forward_propagate_and_combine (void)
 {
   basic_block bb;
   unsigned int todoflags = 0;
 
   cfg_changed = false;
@@ -2731,20 +2785,27 @@ ssa_forward_propagate_and_combine (void)
 		  changed = associate_pointerplus (&gsi);
 		else if (CONVERT_EXPR_CODE_P (code)
 			 || code == FLOAT_EXPR
 			 || code == FIX_TRUNC_EXPR)
 		  {
 		    int did_something = combine_conversions (&gsi);
 		    if (did_something == 2)
 		      cfg_changed = true;
 		    changed = did_something != 0;
 		  }
+		else if (code == VEC_PERM_EXPR)
+		  {
+		    int did_something = simplify_permutation (&gsi);
+		    if (did_something == 2)
+		      cfg_changed = true;
+		    changed = did_something != 0;
+		  }
 		break;
 	      }
 
 	    case GIMPLE_SWITCH:
 	      changed = simplify_gimple_switch (stmt);
 	      break;
 
 	    case GIMPLE_COND:
 	      {
 		int did_something;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]