This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[17/17] Extend can_duplicate_and_interleave_p to mixed-size vectors


This patch makes can_duplicate_and_interleave_p cope with mixtures of
vector sizes, by using queries based on get_vectype_for_scalar_type
instead of directly querying GET_MODE_SIZE (vinfo->vector_mode).

int_mode_for_size is now the first check we do for a candidate mode,
so it seemed better to restrict it to MAX_FIXED_MODE_SIZE.  This avoids
unnecessary work and avoids trying to create scalar types that the
target might not support.

This final patch in the series.  As before, each patch tested individually
on aarch64-linux-gnu and the series as a whole on x86_64-linux-gnu.


2019-11-04  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vectorizer.h (can_duplicate_and_interleave_p): Take an
	element type rather than an element mode.
	* tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise.
	Use get_vectype_for_scalar_type to query the natural types
	for a given element type rather than basing everything on
	GET_MODE_SIZE (vinfo->vector_mode).  Limit int_mode_for_size
	query to MAX_FIXED_MODE_SIZE.
	(duplicate_and_interleave): Update call accordingly.
	* tree-vect-loop.c (vectorizable_reduction): Likewise.

Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	2019-11-05 11:08:12.521631453 +0000
+++ gcc/tree-vectorizer.h	2019-11-05 11:14:42.786884473 +0000
@@ -1779,8 +1779,7 @@ extern void vect_get_slp_defs (slp_tree,
 extern bool vect_slp_bb (basic_block);
 extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
 extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
-extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
-					    machine_mode,
+extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
 					    unsigned int * = NULL,
 					    tree * = NULL, tree * = NULL);
 extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	2019-11-05 11:08:12.517631481 +0000
+++ gcc/tree-vect-slp.c	2019-11-05 11:14:42.786884473 +0000
@@ -265,7 +265,7 @@ vect_get_place_in_interleaving_chain (st
   return -1;
 }
 
-/* Check whether it is possible to load COUNT elements of type ELT_MODE
+/* Check whether it is possible to load COUNT elements of type ELT_TYPE
    using the method implemented by duplicate_and_interleave.  Return true
    if so, returning the number of intermediate vectors in *NVECTORS_OUT
    (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
@@ -273,26 +273,37 @@ vect_get_place_in_interleaving_chain (st
 
 bool
 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
-				machine_mode elt_mode,
-				unsigned int *nvectors_out,
+				tree elt_type, unsigned int *nvectors_out,
 				tree *vector_type_out,
 				tree *permutes)
 {
-  poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
-  poly_int64 nelts;
+  tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
+  if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
+    return false;
+
+  machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
+  poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
   unsigned int nvectors = 1;
   for (;;)
     {
       scalar_int_mode int_mode;
       poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
-      if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
-	  && int_mode_for_size (elt_bits, 0).exists (&int_mode))
+      if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
 	{
+	  /* Get the natural vector type for this SLP group size.  */
 	  tree int_type = build_nonstandard_integer_type
 	    (GET_MODE_BITSIZE (int_mode), 1);
-	  tree vector_type = build_vector_type (int_type, nelts);
-	  if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
-	    {
+	  tree vector_type
+	    = get_vectype_for_scalar_type (vinfo, int_type, count);
+	  if (vector_type
+	      && VECTOR_MODE_P (TYPE_MODE (vector_type))
+	      && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
+			   GET_MODE_SIZE (base_vector_mode)))
+	    {
+	      /* Try fusing consecutive sequences of COUNT / NVECTORS elements
+		 together into elements of type INT_TYPE and using the result
+		 to build NVECTORS vectors.  */
+	      poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
 	      vec_perm_builder sel1 (nelts, 2, 3);
 	      vec_perm_builder sel2 (nelts, 2, 3);
 	      poly_int64 half_nelts = exact_div (nelts, 2);
@@ -492,7 +503,7 @@ vect_get_and_check_slp_defs (vec_info *v
 	      && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
 	      && (TREE_CODE (type) == BOOLEAN_TYPE
 		  || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
-						      TYPE_MODE (type))))
+						      type)))
 	    {
 	      if (dump_enabled_p ())
 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3551,7 +3562,7 @@ duplicate_and_interleave (vec_info *vinf
   unsigned int nvectors = 1;
   tree new_vector_type;
   tree permutes[2];
-  if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
+  if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
 				       &nvectors, &new_vector_type,
 				       permutes))
     gcc_unreachable ();
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	2019-11-05 10:57:41.658071173 +0000
+++ gcc/tree-vect-loop.c	2019-11-05 11:14:42.782884501 +0000
@@ -6288,10 +6288,9 @@ vectorizable_reduction (stmt_vec_info st
 	 that value needs to be repeated for every instance of the
 	 statement within the initial vector.  */
       unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
-      scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
       if (!neutral_op
 	  && !can_duplicate_and_interleave_p (loop_vinfo, group_size,
-					      elt_mode))
+					      TREE_TYPE (vectype_out)))
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]