This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix PR91568


On Wed, 28 Aug 2019, Richard Sandiford wrote:

> Richard Biener <rguenther@suse.de> writes:
> > When making the SLP tree a graph I messed up and in some cases we can
> > fail to propagate a higher max_nunits upwards when re-using a subtree.
> > For the testcase, but only on the gcc-9-branch, this causes us to
> > miss a higher value of 4 completely since it comes from a
> > single widening stmt which we remember but throw out of the window
> > due to an early mismatch fixable by commutating.
> >
> > I hope TYPE_VECTOR_SUBPARTS are all ordered, at least aarch64-sve.exp
> > is clean as it gets with a cc1 cross (it doesn't seem to find
> > gcc/include/stdint.h so has more compile-time errors than necessary...).
> >
> > Richard, is that an OK-ish assumption?
> 
> Yeah, it's probably a reasonable assumption for types and sizes
> currently chosen by the target vectorisation hooks (although not
> more generally).  But the existing max_nunits code goes through
> vect_update_max_nunits, with the idea being to keep the assumption
> in a single place.  So IMO it'd be better to split that into:
> 
> static inline void
> vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits)
> {
>   /* All unit counts have the form current_vector_size * X for some
>      rational X, so two unit sizes must have a common multiple.
>      Everything is a multiple of the initial value of 1.  */
>   *max_nunits = force_common_multiple (*max_nunits, nunits);
> }
> 
> static inline void
> vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype)
> {
>   vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype));
> }
> 
> LGTM otherwise.  The estimated_poly_value could end up being a bit
> confusing when reading dumps, but I'll take that as a sign that you
> don't want to add poly_* printers ;-)

:P

The following is what I have applied.  Bootstrapped and tested
on x86_64-unknown-linux-gnu on trunk and branch.

Richard.

2019-08-29  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/91568
	* tree-vectorizer.h (_slp_tree::max_nunits): Add.
	(vect_update_max_nunits): Add overload for poly_uint64.
	* tree-vect-slp.c (vect_create_new_slp_node): Initialize it.
	(vect_build_slp_tree): Record max_nunits into the subtree
	and merge it upwards.
	(vect_print_slp_tree): Print max_nunits.

	* gfortran.dg/pr91568.f: New testcase.

Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 274983)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -129,6 +129,7 @@ vect_create_new_slp_node (vec<stmt_vec_i
   SLP_TREE_TWO_OPERATORS (node) = false;
   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
   node->refcnt = 1;
+  node->max_nunits = 1;
 
   unsigned i;
   FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
@@ -1051,15 +1052,24 @@ vect_build_slp_tree (vec_info *vinfo,
 	dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
 			 *leader ? "" : "failed ", *leader);
       if (*leader)
-	(*leader)->refcnt++;
+	{
+	  (*leader)->refcnt++;
+	  vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
+	}
       return *leader;
     }
-  slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
+  poly_uint64 this_max_nunits = 1;
+  slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
+					&this_max_nunits,
 					matches, npermutes, tree_size,
 					max_tree_size, bst_map);
-  /* Keep a reference for the bst_map use.  */
   if (res)
-    res->refcnt++;
+    {
+      res->max_nunits = this_max_nunits;
+      vect_update_max_nunits (max_nunits, this_max_nunits);
+      /* Keep a reference for the bst_map use.  */
+      res->refcnt++;
+    }
   bst_map->put (stmts.copy (), res);
   return res;
 }
@@ -1463,9 +1473,10 @@ vect_print_slp_tree (dump_flags_t dump_k
 
   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
   dump_user_location_t user_loc = loc.get_user_location ();
-  dump_printf_loc (metadata, user_loc, "node%s %p\n",
+  dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
 		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
-		   ? " (external)" : "", node);
+		   ? " (external)" : "", node,
+		   estimated_poly_value (node->max_nunits));
   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
     dump_printf_loc (metadata, user_loc, "\tstmt %d %G", i, stmt_info->stmt);
   if (SLP_TREE_CHILDREN (node).is_empty ())
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	(revision 274983)
+++ gcc/tree-vectorizer.h	(working copy)
@@ -132,6 +132,9 @@ struct _slp_tree {
   unsigned int vec_stmts_size;
   /* Reference count in the SLP graph.  */
   unsigned int refcnt;
+  /* The maximum number of vector elements for the subtree rooted
+     at this node.  */
+  poly_uint64 max_nunits;
   /* Whether the scalar computations use two different operators.  */
   bool two_operators;
   /* The DEF type of this node.  */
@@ -1350,19 +1353,27 @@ vect_get_num_copies (loop_vec_info loop_
 }
 
 /* Update maximum unit count *MAX_NUNITS so that it accounts for
-   the number of units in vector type VECTYPE.  *MAX_NUNITS can be 1
-   if we haven't yet recorded any vector types.  */
+   NUNITS.  *MAX_NUNITS can be 1 if we haven't yet recorded anything.  */
 
 static inline void
-vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype)
+vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits)
 {
   /* All unit counts have the form current_vector_size * X for some
      rational X, so two unit sizes must have a common multiple.
      Everything is a multiple of the initial value of 1.  */
-  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   *max_nunits = force_common_multiple (*max_nunits, nunits);
 }
 
+/* Update maximum unit count *MAX_NUNITS so that it accounts for
+   the number of units in vector type VECTYPE.  *MAX_NUNITS can be 1
+   if we haven't yet recorded any vector types.  */
+
+static inline void
+vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype)
+{
+  vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype));
+}
+
 /* Return the vectorization factor that should be used for costing
    purposes while vectorizing the loop described by LOOP_VINFO.
    Pick a reasonable estimate if the vectorization factor isn't


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]