This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Change vectorizer SLP tree to be a graph


This does the last step (I've already changed costing, analysis and
code generation to process nodes as if it were) in making the SLP
tree a graph.  This means adjusting SLP analysis to lookup already
identified SLP nodes for a set of scalar stmts and refering to a
slp_tree from multiple parents.

This avoids blowing up during analysis and lets us vectorize
the testcase from PR87105 as well as clang does.

I'm still fighting with the necessary refcounting, but maybe this
version did the trick.

Re-bootstrap & regtest running on x86_64-unknown-linux-gnu, SPEC
CPU 2006 build is also on the way (though it looks like sth else
broke stuff there as well).

Most changes are moving and re-indenting misindented stuff (looks like the
moving part isn't necessary so I'll edit it out).

Note while costing and code generation share things cross SLP
instance the analysis part does not try to do that (I just thought
that might not be a good idea without re-vamping the whole
data structure to get rid of the idea of "separate" instances)

Richard.

>From c0e980edec39850e4f9730b02df49cdd31ced8c0 Mon Sep 17 00:00:00 2001
From: Richard Guenther <rguenther@suse.de>
Date: Wed, 24 Oct 2018 16:09:28 +0200
Subject: [PATCH] make-slp-tree-a-graph

	PR tree-optimization/87105
	* tree-vectorizer.h (_slp_tree::refcnt): New member.
	* tree-vect-slp.c (vect_free_slp_tree): Decrement and honor
	refcnt.
	(vect_create_new_slp_node): Initialize refcnt to one.
	(bst_traits): Move.
	(scalar_stmts_set_t, bst_fail): Remove.
	(vect_build_slp_tree_2): Add bst_map argument and adjust calls.
	(vect_build_slp_tree): Add bst_map argument and lookup
	already created SLP nodes.
	(vect_print_slp_tree): Handle a SLP graph.
	(vect_analyze_slp_instance): Adjust.
	(vect_schedule_slp_instance): Add short-cut.

	* g++.dg/vect/slp-pr87105.cc: Adjust.
	* gcc.dg/torture/20181024-1.c: New testcase.

diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
index 1023d915201..949b16c848f 100644
--- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
+++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
@@ -2,7 +2,7 @@
 // { dg-require-effective-target c++11 }
 // { dg-require-effective-target vect_double }
 // For MIN/MAX recognition
-// { dg-additional-options "-ffast-math -fvect-cost-model" }
+// { dg-additional-options "-ffast-math" }
 
 #include <algorithm>
 #include <cmath>
@@ -99,6 +99,7 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept {
 
 // We should have if-converted everything down to straight-line code
 // { dg-final { scan-tree-dump-times "<bb \[0-9\]+>" 1 "slp2" } }
-// We fail to elide an earlier store which makes us not handle a later
-// duplicate one for vectorization.
-// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail *-*-* } } }
+// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" } }
+// It's a bit awkward to detect that all stores were vectorized but the
+// following more or less does the trick
+// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" } }
diff --git a/gcc/testsuite/gcc.dg/torture/20181024-1.c b/gcc/testsuite/gcc.dg/torture/20181024-1.c
new file mode 100644
index 00000000000..f2cfe7f6d67
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/20181024-1.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=core-avx2" { target { x86_64-*-* i?86-*-* } } } */
+
+typedef enum {
+ C = 0,               N, S, E, W, T, B,               NE, NW, SE, SW,               NT, NB, ST, SB,               ET, EB, WT, WB,               FLAGS, N_CELL_ENTRIES} CELL_ENTRIES;
+typedef double LBM_Grid[(130)*100*100*N_CELL_ENTRIES];
+void foo( LBM_Grid srcGrid )
+{
+  double ux , uy , uz , rho ,         ux1, uy1, uz1, rho1,         ux2, uy2, uz2, rho2,         u2, px, py;
+  int i;
+  for( i = 0;
+       i < (N_CELL_ENTRIES*( 100*100));
+       i += N_CELL_ENTRIES )
+    {
+      rho1 = + ((srcGrid)[((C)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((N)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((S)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((E)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((W)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((T)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((B)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((NE)+N_CELL_ENTRIES*( 100*100))+(i)]) 
+	  + ((srcGrid)[((NW)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((SE)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((SW)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((NT)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((NB)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((ST)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((SB)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((ET)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((EB)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((WT)+N_CELL_ENTRIES*( 100*100))+(i)])
+	  + ((srcGrid)[((WB)+N_CELL_ENTRIES*( 100*100))+(i)]);
+      rho = 2.0*rho1 - rho2;
+      px = (((i / N_CELL_ENTRIES) % 100) / (0.5*(100-1))) - 1.0;
+      uz = 0.01 * (1.0-px*px) * (1.0-py*py);
+      u2 = 1.5 * (ux*ux + uy*uy + uz*uz);
+      (((srcGrid)[((C))+(i)])) = (1.0/ 3.0)*rho*(1.0 - u2);
+      (((srcGrid)[((N))+(i)])) = (1.0/18.0)*rho*(1.0 + uy*(4.5*uy + 3.0) - u2);
+    }
+}
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 3aae1776ef9..cc2724f72f6 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -57,6 +57,9 @@ vect_free_slp_tree (slp_tree node, bool final_p)
   int i;
   slp_tree child;
 
+  if (--node->refcnt != 0)
+    return;
+
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     vect_free_slp_tree (child, final_p);
 
@@ -82,7 +85,6 @@ vect_free_slp_tree (slp_tree node, bool final_p)
   free (node);
 }
 
-
 /* Free the memory allocated for the SLP instance.  FINAL_P is true if we
    have vectorized the instance or if we have made a final decision not
    to vectorize the statements in any way.  */
@@ -126,6 +128,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
   SLP_TREE_TWO_OPERATORS (node) = false;
   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
+  node->refcnt = 1;
 
   unsigned i;
   FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
@@ -195,6 +198,45 @@ vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
 }
 
 
+/* Traits for the hash_set to record failed SLP builds for a stmt set.
+   Note we never remove apart from at destruction time so we do not
+   need a special value for deleted that differs from empty.  */
+struct bst_traits
+{
+  typedef vec <stmt_vec_info> value_type;
+  typedef vec <stmt_vec_info> compare_type;
+  static inline hashval_t hash (value_type);
+  static inline bool equal (value_type existing, value_type candidate);
+  static inline bool is_empty (value_type x) { return !x.exists (); }
+  static inline bool is_deleted (value_type x) { return !x.exists (); }
+  static inline void mark_empty (value_type &x) { x.release (); }
+  static inline void mark_deleted (value_type &x) { x.release (); }
+  static inline void remove (value_type &x) { x.release (); }
+};
+inline hashval_t
+bst_traits::hash (value_type x)
+{
+  inchash::hash h;
+  for (unsigned i = 0; i < x.length (); ++i)
+    h.add_int (gimple_uid (x[i]->stmt));
+  return h.end ();
+}
+inline bool
+bst_traits::equal (value_type existing, value_type candidate)
+{
+  if (existing.length () != candidate.length ())
+    return false;
+  for (unsigned i = 0; i < existing.length (); ++i)
+    if (existing[i] != candidate[i])
+      return false;
+  return true;
+}
+
+typedef hash_map <vec <gimple *>, slp_tree,
+		  simple_hashmap_traits <bst_traits, slp_tree> >
+  scalar_stmts_to_slp_tree_map_t;
+
+
 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
    that starts from FIRST_STMT_INFO.  Return -1 if the data-ref is not a part
    of the chain.  */
@@ -987,46 +1029,6 @@ vect_build_slp_tree_1 (unsigned char *swap,
   return true;
 }
 
-/* Traits for the hash_set to record failed SLP builds for a stmt set.
-   Note we never remove apart from at destruction time so we do not
-   need a special value for deleted that differs from empty.  */
-struct bst_traits
-{
-  typedef vec <stmt_vec_info> value_type;
-  typedef vec <stmt_vec_info> compare_type;
-  static inline hashval_t hash (value_type);
-  static inline bool equal (value_type existing, value_type candidate);
-  static inline bool is_empty (value_type x) { return !x.exists (); }
-  static inline bool is_deleted (value_type x) { return !x.exists (); }
-  static inline void mark_empty (value_type &x) { x.release (); }
-  static inline void mark_deleted (value_type &x) { x.release (); }
-  static inline void remove (value_type &x) { x.release (); }
-};
-inline hashval_t
-bst_traits::hash (value_type x)
-{
-  inchash::hash h;
-  for (unsigned i = 0; i < x.length (); ++i)
-    h.add_int (gimple_uid (x[i]->stmt));
-  return h.end ();
-}
-inline bool
-bst_traits::equal (value_type existing, value_type candidate)
-{
-  if (existing.length () != candidate.length ())
-    return false;
-  for (unsigned i = 0; i < existing.length (); ++i)
-    if (existing[i] != candidate[i])
-      return false;
-  return true;
-}
-
-typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
-static scalar_stmts_set_t *bst_fail;
-
-typedef hash_map <vec <gimple *>, slp_tree,
-		  simple_hashmap_traits <bst_traits, slp_tree> >
-  scalar_stmts_to_slp_tree_map_t;
 
 static slp_tree
 vect_build_slp_tree_2 (vec_info *vinfo,
@@ -1034,30 +1036,33 @@ vect_build_slp_tree_2 (vec_info *vinfo,
 		       poly_uint64 *max_nunits,
 		       vec<slp_tree> *loads,
 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
-		       unsigned max_tree_size);
+		       unsigned max_tree_size,
+		       scalar_stmts_to_slp_tree_map_t *bst_map);
 
 static slp_tree
 vect_build_slp_tree (vec_info *vinfo,
 		     vec<stmt_vec_info> stmts, unsigned int group_size,
 		     poly_uint64 *max_nunits, vec<slp_tree> *loads,
 		     bool *matches, unsigned *npermutes, unsigned *tree_size,
-		     unsigned max_tree_size)
+		     unsigned max_tree_size,
+		     scalar_stmts_to_slp_tree_map_t *bst_map)
 {
-  if (bst_fail->contains (stmts))
-    return NULL;
-  slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
-					loads, matches, npermutes, tree_size,
-					max_tree_size);
-  /* When SLP build fails for stmts record this, otherwise SLP build
-     can be exponential in time when we allow to construct parts from
-     scalars, see PR81723.  */
-  if (! res)
+  if (slp_tree *leader = bst_map->get (stmts))
     {
-      vec <stmt_vec_info> x;
-      x.create (stmts.length ());
-      x.splice (stmts);
-      bst_fail->add (x);
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree\n",
+			 *leader ? "" : "failed ");
+      if (*leader)
+	(*leader)->refcnt++;
+      return *leader;
     }
+  slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
+					loads, matches, npermutes, tree_size,
+					max_tree_size, bst_map);
+  /* Keep a reference for the bst_map use.  */
+  if (res)
+    res->refcnt++;
+  bst_map->put (stmts.copy (), res);
   return res;
 }
 
@@ -1074,7 +1079,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
 		       poly_uint64 *max_nunits,
 		       vec<slp_tree> *loads,
 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
-		       unsigned max_tree_size)
+		       unsigned max_tree_size,
+		       scalar_stmts_to_slp_tree_map_t *bst_map)
 {
   unsigned nops, i, this_tree_size = 0;
   poly_uint64 this_max_nunits = *max_nunits;
@@ -1205,7 +1211,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
 					group_size, &this_max_nunits,
 					&this_loads, matches, npermutes,
 					&this_tree_size,
-					max_tree_size)) != NULL)
+					max_tree_size, bst_map)) != NULL)
 	{
 	  /* If we have all children of child built up from scalars then just
 	     throw that away and build it up this node from scalars.  */
@@ -1348,7 +1354,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
 					    group_size, &this_max_nunits,
 					    &this_loads, tem, npermutes,
 					    &this_tree_size,
-					    max_tree_size)) != NULL)
+					    max_tree_size, bst_map)) != NULL)
 	    {
 	      /* ... so if successful we can apply the operand swapping
 		 to the GIMPLE IL.  This is necessary because for example
@@ -1441,21 +1447,37 @@ fail:
 
 static void
 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
-		     slp_tree node)
+		     slp_tree node, hash_set<slp_tree> &visited)
 {
   int i;
   stmt_vec_info stmt_info;
   slp_tree child;
 
-  dump_printf_loc (dump_kind, loc, "node%s\n",
+  if (visited.add (node))
+    return;
+
+  dump_printf_loc (dump_kind, loc, "node%s %p\n",
 		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
-		   ? " (external)" : "");
+		   ? " (external)" : "", node);
   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
     dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
+  if (SLP_TREE_CHILDREN (node).is_empty ())
+    return;
+  dump_printf_loc (dump_kind, loc, "\tchildren");
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    vect_print_slp_tree (dump_kind, loc, child);
+    dump_printf (dump_kind, " %p", (void *)child);
+  dump_printf (dump_kind, "\n");
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+    vect_print_slp_tree (dump_kind, loc, child, visited);
 }
 
+static void
+vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
+		     slp_tree node)
+{
+  hash_set<slp_tree> visited;
+  vect_print_slp_tree (dump_kind, loc, node, visited);
+}
 
 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
@@ -1889,12 +1911,18 @@ vect_analyze_slp_instance (vec_info *vinfo,
   /* Build the tree for the SLP instance.  */
   bool *matches = XALLOCAVEC (bool, group_size);
   unsigned npermutes = 0;
-  bst_fail = new scalar_stmts_set_t ();
+  scalar_stmts_to_slp_tree_map_t *bst_map
+    = new scalar_stmts_to_slp_tree_map_t ();
   poly_uint64 max_nunits = nunits;
   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
 			      &max_nunits, &loads, matches, &npermutes,
-			      NULL, max_tree_size);
-  delete bst_fail;
+			      NULL, max_tree_size, bst_map);
+  /* The map keeps a reference on SLP nodes built, release that.  */
+  for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
+       it != bst_map->end (); ++it)
+    if ((*it).second)
+      vect_free_slp_tree ((*it).second, false);
+  delete bst_map;
   if (node != NULL)
     {
       /* Calculate the unrolling factor based on the smallest type.  */
@@ -1924,109 +1952,109 @@ vect_analyze_slp_instance (vec_info *vinfo,
 	}
       else
 	{
-      /* Create a new SLP instance.  */
-      new_instance = XNEW (struct _slp_instance);
-      SLP_INSTANCE_TREE (new_instance) = node;
-      SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
-      SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
-      SLP_INSTANCE_LOADS (new_instance) = loads;
-
-      /* Compute the load permutation.  */
-      slp_tree load_node;
-      bool loads_permuted = false;
-      FOR_EACH_VEC_ELT (loads, i, load_node)
-	{
-	  vec<unsigned> load_permutation;
-	  int j;
-	  stmt_vec_info load_info;
-	  bool this_load_permuted = false;
-	  load_permutation.create (group_size);
-	  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
-	    (SLP_TREE_SCALAR_STMTS (load_node)[0]);
-	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
+	  /* Create a new SLP instance.  */
+	  new_instance = XNEW (struct _slp_instance);
+	  SLP_INSTANCE_TREE (new_instance) = node;
+	  SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
+	  SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
+	  SLP_INSTANCE_LOADS (new_instance) = loads;
+
+	  /* Compute the load permutation.  */
+	  slp_tree load_node;
+	  bool loads_permuted = false;
+	  FOR_EACH_VEC_ELT (loads, i, load_node)
 	    {
-	      int load_place = vect_get_place_in_interleaving_chain
-		(load_info, first_stmt_info);
-	      gcc_assert (load_place != -1);
-	      if (load_place != j)
-		this_load_permuted = true;
-	      load_permutation.safe_push (load_place);
+	      vec<unsigned> load_permutation;
+	      int j;
+	      stmt_vec_info load_info;
+	      bool this_load_permuted = false;
+	      load_permutation.create (group_size);
+	      stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
+		  (SLP_TREE_SCALAR_STMTS (load_node)[0]);
+	      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
+		{
+		  int load_place = vect_get_place_in_interleaving_chain
+		      (load_info, first_stmt_info);
+		  gcc_assert (load_place != -1);
+		  if (load_place != j)
+		    this_load_permuted = true;
+		  load_permutation.safe_push (load_place);
+		}
+	      if (!this_load_permuted
+		  /* The load requires permutation when unrolling exposes
+		     a gap either because the group is larger than the SLP
+		     group-size or because there is a gap between the groups.  */
+		  && (known_eq (unrolling_factor, 1U)
+		      || (group_size == DR_GROUP_SIZE (first_stmt_info)
+			  && DR_GROUP_GAP (first_stmt_info) == 0)))
+		{
+		  load_permutation.release ();
+		  continue;
+		}
+	      SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
+	      loads_permuted = true;
 	    }
-	  if (!this_load_permuted
-	      /* The load requires permutation when unrolling exposes
-	         a gap either because the group is larger than the SLP
-		 group-size or because there is a gap between the groups.  */
-	      && (known_eq (unrolling_factor, 1U)
-		  || (group_size == DR_GROUP_SIZE (first_stmt_info)
-		      && DR_GROUP_GAP (first_stmt_info) == 0)))
+
+	  if (loads_permuted)
 	    {
-	      load_permutation.release ();
-	      continue;
+	      if (!vect_supported_load_permutation_p (new_instance))
+		{
+		  if (dump_enabled_p ())
+		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				     "Build SLP failed: unsupported load "
+				     "permutation %G", stmt_info->stmt);
+		  vect_free_slp_instance (new_instance, false);
+		  return false;
+		}
 	    }
-	  SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
-	  loads_permuted = true;
-	}
-
-      if (loads_permuted)
-        {
-          if (!vect_supported_load_permutation_p (new_instance))
-            {
-              if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "Build SLP failed: unsupported load "
-				 "permutation %G", stmt_info->stmt);
-	      vect_free_slp_instance (new_instance, false);
-              return false;
-            }
-        }
 
 	  /* If the loads and stores can be handled with load/store-lan
-	 instructions do not generate this SLP instance.  */
-      if (is_a <loop_vec_info> (vinfo)
-	  && loads_permuted
-	  && dr && vect_store_lanes_supported (vectype, group_size, false))
-	{
-	  slp_tree load_node;
-	  FOR_EACH_VEC_ELT (loads, i, load_node)
+	     instructions do not generate this SLP instance.  */
+	  if (is_a <loop_vec_info> (vinfo)
+	      && loads_permuted
+	      && dr && vect_store_lanes_supported (vectype, group_size, false))
 	    {
-	      stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
-		(SLP_TREE_SCALAR_STMTS (load_node)[0]);
-	      /* Use SLP for strided accesses (or if we can't load-lanes).  */
-	      if (STMT_VINFO_STRIDED_P (stmt_vinfo)
-		  || ! vect_load_lanes_supported
-			(STMT_VINFO_VECTYPE (stmt_vinfo),
-			 DR_GROUP_SIZE (stmt_vinfo), false))
-		break;
+	      slp_tree load_node;
+	      FOR_EACH_VEC_ELT (loads, i, load_node)
+		{
+		  stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
+		      (SLP_TREE_SCALAR_STMTS (load_node)[0]);
+		  /* Use SLP for strided accesses (or if we can't load-lanes).  */
+		  if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+		      || ! vect_load_lanes_supported
+		      (STMT_VINFO_VECTYPE (stmt_vinfo),
+		       DR_GROUP_SIZE (stmt_vinfo), false))
+		    break;
+		}
+	      if (i == loads.length ())
+		{
+		  if (dump_enabled_p ())
+		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				     "Built SLP cancelled: can use "
+				     "load/store-lanes\n");
+		  vect_free_slp_instance (new_instance, false);
+		  return false;
+		}
 	    }
-	  if (i == loads.length ())
+
+	  vinfo->slp_instances.safe_push (new_instance);
+
+	  if (dump_enabled_p ())
 	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "Built SLP cancelled: can use "
-				 "load/store-lanes\n");
-	      vect_free_slp_instance (new_instance, false);
-	      return false;
+	      dump_printf_loc (MSG_NOTE, vect_location,
+			       "Final SLP tree for instance:\n");
+	      vect_print_slp_tree (MSG_NOTE, vect_location, node);
 	    }
-	}
 
-      vinfo->slp_instances.safe_push (new_instance);
-
-      if (dump_enabled_p ())
-	{
-	  dump_printf_loc (MSG_NOTE, vect_location,
-			   "Final SLP tree for instance:\n");
-	  vect_print_slp_tree (MSG_NOTE, vect_location, node);
+	  return true;
 	}
-
-      return true;
-    }
     }
   else
     {
-  /* Failed to SLP.  */
-  /* Free the allocated memory.  */
-  scalar_stmts.release ();
-  loads.release ();
+      /* Failed to SLP.  */
+      /* Free the allocated memory.  */
+      scalar_stmts.release ();
+      loads.release ();
     }
 
   /* For basic block SLP, try to break the group up into multiples of the
@@ -3749,8 +3777,13 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
     return;
 
+  /* See if we have already vectorized the node in the graph of the
+     SLP instance.  */
+  if (SLP_TREE_VEC_STMTS (node).exists ())
+    return;
+
   /* See if we have already vectorized the same set of stmts and reuse their
-     vectorized stmts.  */
+     vectorized stmts across instances.  */
   if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
     {
       SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
@@ -3778,8 +3811,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
 
   gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
-  if (!SLP_TREE_VEC_STMTS (node).exists ())
-    SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
+  SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 05823d78a7b..548ffab4c20 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -130,6 +130,8 @@ struct _slp_tree {
      scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
      divided by vector size.  */
   unsigned int vec_stmts_size;
+  /* Reference count in the SLP graph.  */
+  unsigned int refcnt;
   /* Whether the scalar computations use two different operators.  */
   bool two_operators;
   /* The DEF type of this node.  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]