[PATCH][RFC] Fix PR90510, VEC_PERM -> BIT_INSERT folding

Richard Biener rguenther@suse.de
Fri May 17 13:36:00 GMT 2019


This adds, incrementally ontop of moving VEC_PERM_EXPR folding
to match.pd (but not incremental patch - sorry...), folding
of single-element insert permutations to BIT_INSERT_EXPR.

Things are quite awkward with the new poly-int vec-perm stuff
so effectively this doesn't work for SVE and is still very
ugly.  I wonder how to make it generic enough so SVE would
be happy and / or how to make the code prettier.

I also can't find a helper to read a specific vector element
from a VECTOR_CST/CONSTRUCTOR (can I even do that "generally"
with a poly-int index?!), but there surely must be one.

Richard - any comments / hints?  Look at the match.pd
change next to /* See if the permutation is performing a single elem

Thanks,
Richard.

Index: gcc/gimple-match-head.c
===================================================================
--- gcc/gimple-match-head.c	(revision 271321)
+++ gcc/gimple-match-head.c	(working copy)
@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.
 #include "gimple.h"
 #include "ssa.h"
 #include "cgraph.h"
+#include "vec-perm-indices.h"
 #include "fold-const.h"
 #include "fold-const-call.h"
 #include "stor-layout.h"
Index: gcc/generic-match-head.c
===================================================================
--- gcc/generic-match-head.c	(revision 271321)
+++ gcc/generic-match-head.c	(working copy)
@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.
 #include "gimple.h"
 #include "ssa.h"
 #include "cgraph.h"
+#include "vec-perm-indices.h"
 #include "fold-const.h"
 #include "stor-layout.h"
 #include "tree-dfa.h"
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 271321)
+++ gcc/fold-const.c	(working copy)
@@ -9015,7 +9015,7 @@ vec_cst_ctor_to_array (tree arg, unsigne
    selector.  Return the folded VECTOR_CST or CONSTRUCTOR if successful,
    NULL_TREE otherwise.  */
 
-static tree
+tree
 fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel)
 {
   unsigned int i;
@@ -11763,7 +11763,10 @@ fold_ternary_loc (location_t loc, enum t
       return NULL_TREE;
 
     case VEC_PERM_EXPR:
-      if (TREE_CODE (arg2) == VECTOR_CST)
+      /* Perform constant folding of BIT_INSERT_EXPR.  */
+      if (TREE_CODE (arg2) == VECTOR_CST
+	  && TREE_CODE (op0) == VECTOR_CST
+	  && TREE_CODE (op1) == VECTOR_CST)
 	{
 	  /* Build a vector of integers from the tree mask.  */
 	  vec_perm_builder builder;
@@ -11774,61 +11777,7 @@ fold_ternary_loc (location_t loc, enum t
 	  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
 	  bool single_arg = (op0 == op1);
 	  vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
-
-	  /* Check for cases that fold to OP0 or OP1 in their original
-	     element order.  */
-	  if (sel.series_p (0, 1, 0, 1))
-	    return op0;
-	  if (sel.series_p (0, 1, nelts, 1))
-	    return op1;
-
-	  if (!single_arg)
-	    {
-	      if (sel.all_from_input_p (0))
-		op1 = op0;
-	      else if (sel.all_from_input_p (1))
-		{
-		  op0 = op1;
-		  sel.rotate_inputs (1);
-		}
-	    }
-
-	  if ((TREE_CODE (op0) == VECTOR_CST
-	       || TREE_CODE (op0) == CONSTRUCTOR)
-	      && (TREE_CODE (op1) == VECTOR_CST
-		  || TREE_CODE (op1) == CONSTRUCTOR))
-	    {
-	      tree t = fold_vec_perm (type, op0, op1, sel);
-	      if (t != NULL_TREE)
-		return t;
-	    }
-
-	  bool changed = (op0 == op1 && !single_arg);
-
-	  /* Generate a canonical form of the selector.  */
-	  if (arg2 == op2 && sel.encoding () != builder)
-	    {
-	      /* Some targets are deficient and fail to expand a single
-		 argument permutation while still allowing an equivalent
-		 2-argument version.  */
-	      if (sel.ninputs () == 2
-		  || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
-		op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
-	      else
-		{
-		  vec_perm_indices sel2 (builder, 2, nelts);
-		  if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
-		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2);
-		  else
-		    /* Not directly supported with either encoding,
-		       so use the preferred form.  */
-		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
-		}
-	      changed = true;
-	    }
-
-	  if (changed)
-	    return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
+	  return fold_vec_perm (type, op0, op1, sel);
 	}
       return NULL_TREE;
 
Index: gcc/fold-const.h
===================================================================
--- gcc/fold-const.h	(revision 271321)
+++ gcc/fold-const.h	(working copy)
@@ -100,6 +100,9 @@ extern tree fold_bit_and_mask (tree, tre
 			       tree, enum tree_code, tree, tree,
 			       tree, enum tree_code, tree, tree, tree *);
 extern tree fold_read_from_constant_string (tree);
+#if GCC_VEC_PERN_INDICES_H
+extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &);
+#endif
 extern bool wide_int_binop (wide_int &res, enum tree_code,
 			    const wide_int &arg1, const wide_int &arg2,
 			    signop, wi::overflow_type *);
Index: gcc/match.pd
===================================================================
--- gcc/match.pd	(revision 271321)
+++ gcc/match.pd	(working copy)
@@ -5374,3 +5374,156 @@ (define_operator_list COND_TERNARY
       (bit_and:elt_type
        (BIT_FIELD_REF:elt_type @0 { size; } { pos; })
        { elt; })))))))
+
+(simplify
+ (vec_perm @0 @1 VECTOR_CST@2)
+ (with
+  {
+    tree op0 = @0, op1 = @1, op2 = @2;
+
+    /* Build a vector of integers from the tree mask.  */
+    vec_perm_builder builder;
+    if (!tree_to_vec_perm_builder (&builder, op2))
+      return NULL_TREE;
+
+    /* Create a vec_perm_indices for the integer vector.  */
+    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
+    bool single_arg = (op0 == op1);
+    vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
+  }
+  (if (sel.series_p (0, 1, 0, 1))
+   { op0; }
+   (if (sel.series_p (0, 1, nelts, 1))
+    { op1; }
+    (with
+     {
+       if (!single_arg)
+         {
+	   if (sel.all_from_input_p (0))
+	     op1 = op0;
+	   else if (sel.all_from_input_p (1))
+	     {
+	       op0 = op1;
+	       sel.rotate_inputs (1);
+	     }
+         }
+       gassign *def;
+       tree cop0 = op0, cop1 = op1;
+       if (TREE_CODE (op0) == SSA_NAME
+           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op0)))
+	   && gimple_assign_rhs_code (def) == CONSTRUCTOR)
+	 cop0 = gimple_assign_rhs1 (def);
+       if (TREE_CODE (op1) == SSA_NAME
+           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op1)))
+	   && gimple_assign_rhs_code (def) == CONSTRUCTOR)
+	 cop1 = gimple_assign_rhs1 (def);
+
+       tree t;
+    }
+    (if ((TREE_CODE (cop0) == VECTOR_CST
+	  || TREE_CODE (cop0) == CONSTRUCTOR)
+	 && (TREE_CODE (cop1) == VECTOR_CST
+	     || TREE_CODE (cop1) == CONSTRUCTOR)
+	 && (t = fold_vec_perm (type, cop0, cop1, sel)))
+     { t; }
+     (with
+      {
+	bool changed = (op0 == op1 && !single_arg);
+	tree ins = NULL_TREE;
+	unsigned at = 0;
+
+	/* See if the permutation is performing a single element
+	   insert from a CONSTRUCTOR or constant and use a BIT_INSERT_EXPR
+	   in that case.  */
+	unsigned HOST_WIDE_INT cnelts;
+        if ((TREE_CODE (cop0) == VECTOR_CST
+	     || TREE_CODE (cop0) == CONSTRUCTOR
+	     || TREE_CODE (cop1) == VECTOR_CST
+	     || TREE_CODE (cop1) == CONSTRUCTOR)
+	    && nelts.is_constant (&cnelts))
+          {
+	    unsigned first = 0, first_oo = 0, first_i;
+	    unsigned second = 0, second_oo = 0, second_i;
+	    HOST_WIDE_INT idx;
+	    for (unsigned HOST_WIDE_INT i = 0; i < cnelts; ++i)
+	      if (!sel[i].is_constant (&idx))
+	        {
+		  first = second = 0;
+		  break;
+		}
+	      else if ((unsigned HOST_WIDE_INT)idx < cnelts)
+	        {
+		  first_i = i;
+		  first++;
+		  first_oo += (unsigned HOST_WIDE_INT)idx != i;
+		}
+	      else
+	        {
+		  second_i = i;
+	          second++;
+		  second_oo += (unsigned HOST_WIDE_INT)idx != i + cnelts;
+		}
+	    if (first_oo == 0
+		&& second == 1
+		&& (TREE_CODE (cop1) == VECTOR_CST
+		    || TREE_CODE (cop1) == CONSTRUCTOR))
+	      {
+	        unsigned idx = sel[second_i].to_constant () - cnelts;
+	        at = second_i;
+	        if (TREE_CODE (cop1) == VECTOR_CST)
+		  ins = VECTOR_CST_ELT (cop1, idx);
+		else if (TREE_CODE (cop1) == CONSTRUCTOR)
+		  {
+		    if (CONSTRUCTOR_NELTS (cop1) <= idx)
+		      ins = build_zero_cst (TREE_TYPE (type));
+		    else
+		      ins = CONSTRUCTOR_ELT (cop1, idx)->value;
+		  }
+	      }
+	    else if (second_oo == 0
+	    	     && first == 1
+		     && (TREE_CODE (cop0) == VECTOR_CST
+			 || TREE_CODE (cop0) == CONSTRUCTOR))
+	      {
+	        unsigned idx = sel[first_i].to_constant ();
+	        at = first_i;
+	        if (TREE_CODE (cop0) == VECTOR_CST)
+		  ins = VECTOR_CST_ELT (cop0, idx);
+		else if (TREE_CODE (cop0) == CONSTRUCTOR)
+		  {
+		    if (CONSTRUCTOR_NELTS (cop0) <= idx)
+		      ins = build_zero_cst (TREE_TYPE (type));
+		    else
+		      ins = CONSTRUCTOR_ELT (cop0, idx)->value;
+		  }
+		op0 = op1;
+	      }
+	  }
+
+	/* Generate a canonical form of the selector.  */
+	if (!ins && sel.encoding () != builder)
+	  {
+	    /* Some targets are deficient and fail to expand a single
+	       argument permutation while still allowing an equivalent
+	       2-argument version.  */
+	    if (sel.ninputs () == 2
+	       || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
+	      op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
+	    else
+	      {
+	        vec_perm_indices sel2 (builder, 2, nelts);
+	        if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
+	          op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2);
+	        else
+	          /* Not directly supported with either encoding,
+		     so use the preferred form.  */
+		  op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
+	      }
+	    changed = true;
+	  }
+      }
+      (if (ins)
+       (bit_insert { op0; } { ins; }
+         { bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); })
+       (if (changed)
+        (vec_perm { op0; } { op1; } { op2; }))))))))))



More information about the Gcc-patches mailing list