This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[10/13] Rework VEC_PERM_EXPR folding


This patch reworks the VEC_PERM_EXPR folding so that more of it works
for variable-length vectors.  E.g. it means that we can now recognise
variable-length permutes that reduce to a single vector, or cases in
which a variable-length permute only needs one input.  There should be
no functional change for fixed-length vectors.


2017-12-09  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* selftest.h (selftest::vec_perm_indices_c_tests): Declare.
	* selftest-run-tests.c (selftest::run_tests): Call it.
	* vector-builder.h (vector_builder::operator ==): New function.
	(vector_builder::operator !=): Likewise.
	* vec-perm-indices.h (vec_perm_indices::series_p): Declare.
	(vec_perm_indices::all_from_input_p): New function.
	* vec-perm-indices.c (vec_perm_indices::series_p): Likewise.
	(test_vec_perm_12, selftest::vec_perm_indices_c_tests): Likewise.
	* fold-const.c (fold_ternary_loc): Use tree_to_vec_perm_builder
	instead of reading the VECTOR_CST directly.  Detect whether both
	vector inputs are the same before constructing the vec_perm_indices,
	and update the number of inputs argument accordingly.  Use the
	utility functions added above.  Only construct sel2 if we need to.

Index: gcc/selftest.h
===================================================================
*** gcc/selftest.h	2017-12-09 23:06:55.002855594 +0000
--- gcc/selftest.h	2017-12-09 23:21:51.517599734 +0000
*************** extern void vec_c_tests ();
*** 201,206 ****
--- 201,207 ----
  extern void wide_int_cc_tests ();
  extern void predict_c_tests ();
  extern void simplify_rtx_c_tests ();
+ extern void vec_perm_indices_c_tests ();
  
  extern int num_passes;
  
Index: gcc/selftest-run-tests.c
===================================================================
*** gcc/selftest-run-tests.c	2017-12-09 23:06:55.002855594 +0000
--- gcc/selftest-run-tests.c	2017-12-09 23:21:51.517599734 +0000
*************** selftest::run_tests ()
*** 73,78 ****
--- 73,79 ----
  
    /* Mid-level data structures.  */
    input_c_tests ();
+   vec_perm_indices_c_tests ();
    tree_c_tests ();
    gimple_c_tests ();
    rtl_tests_c_tests ();
Index: gcc/vector-builder.h
===================================================================
*** gcc/vector-builder.h	2017-12-09 23:06:55.002855594 +0000
--- gcc/vector-builder.h	2017-12-09 23:21:51.518600090 +0000
*************** #define GCC_VECTOR_BUILDER_H
*** 97,102 ****
--- 97,105 ----
    bool encoded_full_vector_p () const;
    T elt (unsigned int) const;
  
+   bool operator == (const Derived &) const;
+   bool operator != (const Derived &x) const { return !operator == (x); }
+ 
    void finalize ();
  
  protected:
*************** vector_builder<T, Derived>::new_vector (
*** 168,173 ****
--- 171,196 ----
    this->truncate (0);
  }
  
+ /* Return true if this vector and OTHER have the same elements and
+    are encoded in the same way.  */
+ 
+ template<typename T, typename Derived>
+ bool
+ vector_builder<T, Derived>::operator == (const Derived &other) const
+ {
+   if (m_full_nelts != other.m_full_nelts
+       || m_npatterns != other.m_npatterns
+       || m_nelts_per_pattern != other.m_nelts_per_pattern)
+     return false;
+ 
+   unsigned int nelts = encoded_nelts ();
+   for (unsigned int i = 0; i < nelts; ++i)
+     if (!derived ()->equal_p ((*this)[i], other[i]))
+       return false;
+ 
+   return true;
+ }
+ 
  /* Return the value of vector element I, which might or might not be
     encoded explicitly.  */
  
Index: gcc/vec-perm-indices.h
===================================================================
*** gcc/vec-perm-indices.h	2017-12-09 23:20:13.233112018 +0000
--- gcc/vec-perm-indices.h	2017-12-09 23:21:51.517599734 +0000
*************** typedef int_vector_builder<HOST_WIDE_INT
*** 62,68 ****
--- 62,70 ----
  
    element_type clamp (element_type) const;
    element_type operator[] (unsigned int i) const;
+   bool series_p (unsigned int, unsigned int, element_type, element_type) const;
    bool all_in_range_p (element_type, element_type) const;
+   bool all_from_input_p (unsigned int) const;
  
  private:
    vec_perm_indices (const vec_perm_indices &);
*************** vec_perm_indices::operator[] (unsigned i
*** 119,122 ****
--- 121,133 ----
    return clamp (m_encoding.elt (i));
  }
  
+ /* Return true if the permutation vector only selects elements from
+    input I.  */
+ 
+ inline bool
+ vec_perm_indices::all_from_input_p (unsigned int i) const
+ {
+   return all_in_range_p (i * m_nelts_per_input, m_nelts_per_input);
+ }
+ 
  #endif
Index: gcc/vec-perm-indices.c
===================================================================
*** gcc/vec-perm-indices.c	2017-12-09 23:20:13.233112018 +0000
--- gcc/vec-perm-indices.c	2017-12-09 23:21:51.517599734 +0000
*************** Software Foundation; either version 3, o
*** 28,33 ****
--- 28,34 ----
  #include "rtl.h"
  #include "memmodel.h"
  #include "emit-rtl.h"
+ #include "selftest.h"
  
  /* Switch to a new permutation vector that selects between NINPUTS vector
     inputs that have NELTS_PER_INPUT elements each.  Take the elements of the
*************** vec_perm_indices::rotate_inputs (int del
*** 85,90 ****
--- 86,139 ----
      m_encoding[i] = clamp (m_encoding[i] + element_delta);
  }
  
+ /* Return true if index OUT_BASE + I * OUT_STEP selects input
+    element IN_BASE + I * IN_STEP.  */
+ 
+ bool
+ vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
+ 			    element_type in_base, element_type in_step) const
+ {
+   /* Check the base value.  */
+   if (clamp (m_encoding.elt (out_base)) != clamp (in_base))
+     return false;
+ 
+   unsigned int full_nelts = m_encoding.full_nelts ();
+   unsigned int npatterns = m_encoding.npatterns ();
+ 
+   /* Calculate which multiple of OUT_STEP elements we need to get
+      back to the same pattern.  */
+   unsigned int cycle_length = least_common_multiple (out_step, npatterns);
+ 
+   /* Check the steps.  */
+   in_step = clamp (in_step);
+   out_base += out_step;
+   unsigned int limit = 0;
+   for (;;)
+     {
+       /* Succeed if we've checked all the elements in the vector.  */
+       if (out_base >= full_nelts)
+ 	return true;
+ 
+       if (out_base >= npatterns)
+ 	{
+ 	  /* We've got to the end of the "foreground" values.  Check
+ 	     2 elements from each pattern in the "background" values.  */
+ 	  if (limit == 0)
+ 	    limit = out_base + cycle_length * 2;
+ 	  else if (out_base >= limit)
+ 	    return true;
+ 	}
+ 
+       element_type v0 = m_encoding.elt (out_base - out_step);
+       element_type v1 = m_encoding.elt (out_base);
+       if (clamp (v1 - v0) != in_step)
+ 	return false;
+ 
+       out_base += out_step;
+     }
+   return true;
+ }
+ 
  /* Return true if all elements of the permutation vector are in the range
     [START, START + SIZE).  */
  
*************** vec_perm_indices_to_rtx (machine_mode mo
*** 180,182 ****
--- 229,280 ----
      RTVEC_ELT (v, i) = gen_int_mode (indices[i], GET_MODE_INNER (mode));
    return gen_rtx_CONST_VECTOR (mode, v);
  }
+ 
+ #if CHECKING_P
+ 
+ namespace selftest {
+ 
+ /* Test a 12-element vector.  */
+ 
+ static void
+ test_vec_perm_12 (void)
+ {
+   vec_perm_builder builder (12, 12, 1);
+   for (unsigned int i = 0; i < 4; ++i)
+     {
+       builder.quick_push (i * 5);
+       builder.quick_push (3 + i);
+       builder.quick_push (2 + 3 * i);
+     }
+   vec_perm_indices indices (builder, 1, 12);
+   ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
+   ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
+   ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
+   ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
+   ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
+ 
+   ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
+   ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
+ 
+   ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
+   ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
+ 
+   ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
+   ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
+ 
+   ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
+   ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
+   ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
+ }
+ 
+ /* Run selftests for this file.  */
+ 
+ void
+ vec_perm_indices_c_tests ()
+ {
+   test_vec_perm_12 ();
+ }
+ 
+ } // namespace selftest
+ 
+ #endif
Index: gcc/fold-const.c
===================================================================
*** gcc/fold-const.c	2017-12-09 23:18:12.040041251 +0000
--- gcc/fold-const.c	2017-12-09 23:21:51.517599734 +0000
*************** fold_ternary_loc (location_t loc, enum t
*** 11547,11645 ****
      case VEC_PERM_EXPR:
        if (TREE_CODE (arg2) == VECTOR_CST)
  	{
! 	  unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2;
! 	  bool need_mask_canon = false;
! 	  bool need_mask_canon2 = false;
! 	  bool all_in_vec0 = true;
! 	  bool all_in_vec1 = true;
! 	  bool maybe_identity = true;
! 	  bool single_arg = (op0 == op1);
! 	  bool changed = false;
! 
! 	  mask2 = 2 * nelts - 1;
! 	  mask = single_arg ? (nelts - 1) : mask2;
! 	  gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type));
! 	  vec_perm_builder sel (nelts, nelts, 1);
! 	  vec_perm_builder sel2 (nelts, nelts, 1);
! 	  for (i = 0; i < nelts; i++)
! 	    {
! 	      tree val = VECTOR_CST_ELT (arg2, i);
! 	      if (TREE_CODE (val) != INTEGER_CST)
! 		return NULL_TREE;
! 
! 	      /* Make sure that the perm value is in an acceptable
! 		 range.  */
! 	      wi::tree_to_wide_ref t = wi::to_wide (val);
! 	      need_mask_canon |= wi::gtu_p (t, mask);
! 	      need_mask_canon2 |= wi::gtu_p (t, mask2);
! 	      unsigned int elt = t.to_uhwi () & mask;
! 	      unsigned int elt2 = t.to_uhwi () & mask2;
! 
! 	      if (elt < nelts)
! 		all_in_vec1 = false;
! 	      else
! 		all_in_vec0 = false;
! 
! 	      if ((elt & (nelts - 1)) != i)
! 		maybe_identity = false;
! 
! 	      sel.quick_push (elt);
! 	      sel2.quick_push (elt2);
! 	    }
  
! 	  if (maybe_identity)
! 	    {
! 	      if (all_in_vec0)
! 		return op0;
! 	      if (all_in_vec1)
! 		return op1;
! 	    }
  
! 	  if (all_in_vec0)
! 	    op1 = op0;
! 	  else if (all_in_vec1)
! 	    {
! 	      op0 = op1;
! 	      for (i = 0; i < nelts; i++)
! 		sel[i] -= nelts;
! 	      need_mask_canon = true;
  	    }
  
- 	  vec_perm_indices indices (sel, 2, nelts);
  	  if ((TREE_CODE (op0) == VECTOR_CST
  	       || TREE_CODE (op0) == CONSTRUCTOR)
  	      && (TREE_CODE (op1) == VECTOR_CST
  		  || TREE_CODE (op1) == CONSTRUCTOR))
  	    {
! 	      tree t = fold_vec_perm (type, op0, op1, indices);
  	      if (t != NULL_TREE)
  		return t;
  	    }
  
! 	  if (op0 == op1 && !single_arg)
! 	    changed = true;
  
! 	  /* Some targets are deficient and fail to expand a single
! 	     argument permutation while still allowing an equivalent
! 	     2-argument version.  */
! 	  if (need_mask_canon && arg2 == op2
! 	      && !can_vec_perm_const_p (TYPE_MODE (type), indices, false)
! 	      && can_vec_perm_const_p (TYPE_MODE (type),
! 				       vec_perm_indices (sel2, 2, nelts),
! 				       false))
  	    {
! 	      need_mask_canon = need_mask_canon2;
! 	      sel.truncate (0);
! 	      sel.splice (sel2);
! 	    }
! 
! 	  if (need_mask_canon && arg2 == op2)
! 	    {
! 	      tree eltype = TREE_TYPE (TREE_TYPE (arg2));
! 	      tree_vector_builder tsel (TREE_TYPE (arg2), nelts, 1);
! 	      for (i = 0; i < nelts; i++)
! 		tsel.quick_push (build_int_cst (eltype, sel[i]));
! 	      op2 = tsel.build ();
  	      changed = true;
  	    }
  
--- 11547,11611 ----
      case VEC_PERM_EXPR:
        if (TREE_CODE (arg2) == VECTOR_CST)
  	{
! 	  /* Build a vector of integers from the tree mask.  */
! 	  vec_perm_builder builder;
! 	  if (!tree_to_vec_perm_builder (&builder, arg2))
! 	    return NULL_TREE;
  
! 	  /* Create a vec_perm_indices for the integer vector.  */
! 	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type);
! 	  bool single_arg = (op0 == op1);
! 	  vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
  
! 	  /* Check for cases that fold to OP0 or OP1 in their original
! 	     element order.  */
! 	  if (sel.series_p (0, 1, 0, 1))
! 	    return op0;
! 	  if (sel.series_p (0, 1, nelts, 1))
! 	    return op1;
! 
! 	  if (!single_arg)
! 	    {
! 	      if (sel.all_from_input_p (0))
! 		op1 = op0;
! 	      else if (sel.all_from_input_p (1))
! 		{
! 		  op0 = op1;
! 		  sel.rotate_inputs (1);
! 		}
  	    }
  
  	  if ((TREE_CODE (op0) == VECTOR_CST
  	       || TREE_CODE (op0) == CONSTRUCTOR)
  	      && (TREE_CODE (op1) == VECTOR_CST
  		  || TREE_CODE (op1) == CONSTRUCTOR))
  	    {
! 	      tree t = fold_vec_perm (type, op0, op1, sel);
  	      if (t != NULL_TREE)
  		return t;
  	    }
  
! 	  bool changed = (op0 == op1 && !single_arg);
  
! 	  /* Generate a canonical form of the selector.  */
! 	  if (arg2 == op2 && sel.encoding () != builder)
  	    {
! 	      /* Some targets are deficient and fail to expand a single
! 		 argument permutation while still allowing an equivalent
! 		 2-argument version.  */
! 	      if (sel.ninputs () == 2
! 		  || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
! 		op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
! 	      else
! 		{
! 		  vec_perm_indices sel2 (builder, 2, nelts);
! 		  if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
! 		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2);
! 		  else
! 		    /* Not directly supported with either encoding,
! 		       so use the preferred form.  */
! 		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
! 		}
  	      changed = true;
  	    }
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]