This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH GCC][06/06]Record runtime alias info in struct dependence_info and pass it along


Hi,
This is the main patch recording runtime alias check information in struct
dependence_info and passing it along to later optimizers.  It models graph
of runtime alias checks with some approximation; then sets <clique, base>
to the original data references and records it in hash map; at last, it
sets <clique, base> info to vectorized memory reference.

Given simple data structure (dependence_info) isn't capable of modeling
arbitrary graph, we take approximation as described by comment:

/*
   ...

   Runtime alias checks can be modeled as an arbitrary graph with data
   references as vertices.  In theory we can record the graph and pass
   it to later optimizers in order to enable more transformations.  In
   practice, it's not feasible for high space/time cost in handling
   arbitrary graph.  GCC uses simple data structure in which two fields
   (clique/base) are set for each memory reference.  Two references are
   known to not alias each other if dependence_info.clique are equal and
   dependence_info.base are distinct.  This simple data structure can
   not accurately model graph with diameter bigger than 2.  For example,
   below graph can't be modeled.

      v0----v1----v2----v3

   The data structure can't model some graph with diameter equal to 2
   if the graph contains clique with more than 2 vertices, as below:

      v0----v1---v4
       \    /
        \  /
	 v2

   This function only handles (diameter <= 2) graphs.  Fortunately, we
   can reduce arbitrary graph by simply discarding edges to sub-graph
   which can be modeled.  For example, we can discard edge <v0, v2> in
   above graph.  This approximation results in less optimizations but
   correct code.  */

Bootstrap and test in series on x86_64 and AArch64.  Is it OK?

Thanks,
bin
2017-08-10  Bin Cheng  <bin.cheng@arm.com>

	* tree-vectorizer.h (struct rt_alias_clique): New.
	(free_rt_alias_clique): New declaration.
	(struct _loop_vec_info): New member rt_alias_clique_map.
	(LOOP_VINFO_RT_ALIAS_CLIQUE_MAP): New macro.
	* tree-vect-loop-manip.c (struct edge_callback_data): New.
	(free_rt_alias_clique, alias_graphd_edge_callback): New function.
	(data_ref_with_dep_info_p): New function.
	(record_runtime_alias_for_data_refs): New function.
	(vect_create_cond_for_alias_checks): Call above function.
	(_loop_vec_info::_loop_vec_info): Initialize rt_alias_clique_map.
	(_loop_vec_info::~_loop_vec_info): Release rt_alias_clique_map.
	* tree-vect-stmts.c (set_runtime_alias_dependence_info): New func.
	(vectorizable_store, vectorizable_load): Call above function.

gcc/testsuite
2017-08-10  Bin Cheng  <bin.cheng@arm.com>

	* gcc.dg/vect/vect-rt-alias-info.c: New test.
From 147dc1eb8a835a70411ac8e11b6c9bc63695c431 Mon Sep 17 00:00:00 2001
From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com>
Date: Wed, 9 Aug 2017 11:47:52 +0100
Subject: [PATCH 6/6] vect-rt-alias-dep-info-20170801.txt

---
 gcc/testsuite/gcc.dg/vect/vect-rt-alias-info.c |  11 ++
 gcc/tree-vect-loop-manip.c                     | 210 +++++++++++++++++++++++++
 gcc/tree-vect-loop.c                           |   8 +
 gcc/tree-vect-stmts.c                          |  32 ++++
 gcc/tree-vectorizer.h                          |  12 ++
 5 files changed, 273 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-rt-alias-info.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-rt-alias-info.c b/gcc/testsuite/gcc.dg/vect/vect-rt-alias-info.c
new file mode 100644
index 0000000..bb20e786
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-rt-alias-info.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fdump-tree-vect-alias -fdump-tree-loopdone-alias" } */
+
+void foo (int *a, int *c, int *d)
+{
+  for (int i = 0; i < 1024; ++i)
+    a[i] = c[i] + d[i];
+}
+/* { dg-final { scan-tree-dump "clique . base . fixed" "vect" } } */
+/* { dg-final { scan-tree-dump-not "clique . base . fixed" "loopdone" } } */
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index f78e4b4..4518069 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -2091,6 +2091,215 @@ vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
     }
 }
 
+/* Hash map callback function freeing rt_alias_clique.  */
+
+bool
+free_rt_alias_clique (data_reference_p const &,
+		      rt_alias_clique_p const &clique, void *)
+{
+  free (clique);
+  return true;
+}
+
+/* Private data for callback function traversing graph edges.  */
+
+struct edge_callback_data
+{
+  vec<data_reference_p> *datarefs;
+  hash_map<data_reference_p, struct rt_alias_clique *> *clique_map;
+  bool valid_p;
+};
+
+/* Callback function for traversing graph edges.  */
+
+static void
+alias_graphd_edge_callback (struct graph *, struct graph_edge *e, void *data)
+{
+  struct edge_callback_data *edata = (struct edge_callback_data *) data;
+  struct rt_alias_clique **slot1, **slot2;
+
+  if (!edata->valid_p)
+    return;
+
+  slot1 = edata->clique_map->get ((*edata->datarefs)[e->src]);
+  gcc_assert (slot1 != NULL && (*slot1)->id == e->src);
+  if ((*slot1)->clique == 0)
+    {
+      edata->valid_p = false;
+      return;
+    }
+  slot2 = edata->clique_map->get ((*edata->datarefs)[e->dest]);
+  gcc_assert (slot2 != NULL && (*slot2)->id == e->dest);
+  /* Don't check if source and dest have the same dependence_info.base.
+     The overall effect is like we discard this edge.  */
+  if ((*slot1)->clique == 0)
+    edata->valid_p = false;
+}
+
+/* Return TRUE if memory reference of DREF has dependence info set.  */
+
+static inline bool
+data_ref_with_dep_info_p (data_reference_p dref)
+{
+  tree ref = DR_REF (dref);
+
+  return ((TREE_CODE (ref) == MEM_REF || TREE_CODE (ref) == TARGET_MEM_REF)
+	  && MR_DEPENDENCE_CLIQUE (ref) != 0);
+}
+
+/* This function analyzes data dependence given by runtime alias checks.
+   If analysis succeeds, it assigns dependence information to data refs
+   and records it in a hash map.
+
+   Runtime alias checks can be modeled as an arbitrary graph with data
+   references as vertices.  In theory we can record the graph and pass
+   it to later optimizers in order to enable more transformations.  In
+   practice, it's not feasible for high space/time cost in handling
+   arbitrary graph.  GCC uses simple data structure in which two fields
+   (clique/base) are set for each memory reference.  Two references are
+   known to not alias each other if dependence_info.clique are equal and
+   dependence_info.base are distinct.  This simple data structure can
+   not accurately model graph with diameter bigger than 2.  For example,
+   below graph can't be modeled.
+
+      v0----v1----v2----v3
+
+   The data structure can't model some graph with diameter equal to 2
+   if the graph contains clique with more than 2 vertices, as below:
+
+      v0----v1---v4
+       \    /
+        \  /
+	 v2
+
+   This function only handles (diameter <= 2) graphs.  Fortunately, we
+   can reduce arbitrary graph by simply discarding edges to sub-graph
+   which can be modeled.  For example, we can discard edge <v0, v2> in
+   above graph.  This approximation results in less optimizations but
+   correct code.  */
+
+static void
+record_runtime_alias_for_data_refs (loop_vec_info loop_vinfo)
+{
+  vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
+  unsigned short clique = cfun->last_clique;
+  unsigned j, k;
+  rt_alias_clique_p *slot_a, *slot_b;
+  int i, num_sccs, start;
+  auto_vec<data_reference_p> datarefs;
+  hash_map<data_reference_p, rt_alias_clique_p> *clique_map;
+  struct graph *alias_graph;
+  struct edge_callback_data edata;
+  auto_vec<int> vertices_a, vertices_b;
+
+  /* Initialize hash map from data reference to dependence info.  */
+  clique_map = new hash_map<data_reference_p, rt_alias_clique_p> ();
+  for (j = 0, i = 0; j < may_alias_ddrs.length (); j++)
+    {
+      data_reference_p dra = DDR_A (may_alias_ddrs[j]);
+      data_reference_p drb = DDR_B (may_alias_ddrs[j]);
+
+      if (data_ref_with_dep_info_p (dra)
+	  || data_ref_with_dep_info_p (drb))
+	continue;
+
+      if ((clique_map->get (dra)) == NULL)
+	{
+	  rt_alias_clique_p clique_p = XCNEW (struct rt_alias_clique);
+	  clique_p->id = i++;
+	  clique_map->put (dra, clique_p);
+	  datarefs.safe_push (dra);
+	}
+      if ((clique_map->get (drb)) == NULL)
+	{
+	  rt_alias_clique_p clique_p = XCNEW (struct rt_alias_clique);
+	  clique_p->id = i++;
+	  clique_map->put (drb, clique_p);
+	  datarefs.safe_push (drb);
+	}
+    }
+
+  /* Create graph and add dependence edges to it.  */
+  alias_graph = new_graph (datarefs.length ());
+  for (j = 0; j < may_alias_ddrs.length (); j++)
+    {
+      if (data_ref_with_dep_info_p (DDR_A (may_alias_ddrs[j]))
+	  || data_ref_with_dep_info_p (DDR_B (may_alias_ddrs[j])))
+	continue;
+
+      slot_a = clique_map->get (DDR_A (may_alias_ddrs[j]));
+      slot_b = clique_map->get (DDR_B (may_alias_ddrs[j]));
+      add_edge (alias_graph, (*slot_a)->id, (*slot_b)->id);
+      add_edge (alias_graph, (*slot_b)->id, (*slot_a)->id);
+    }
+
+  /* traverse each SCC of graph in two steps and assign dependence info.  */
+  num_sccs = graphds_scc (alias_graph, NULL);
+  for (i = 0; i < num_sccs; ++i)
+    {
+      clique++;
+      /* Find start vertex of SCC.  */
+      for (j = 0; j < datarefs.length (); ++j)
+	if (alias_graph->vertices[j].component == i)
+	  break;
+
+      gcc_assert (j < datarefs.length ());
+      start = (int) j;
+
+      /* Assign dependence info for start vertex of SCC.  */
+      slot_a = clique_map->get (datarefs[start]);
+      (*slot_a)->clique = clique;
+      (*slot_a)->base = 0;
+
+      /* Get all adjacent vertices of start vertex.  */
+      vertices_a.truncate (0);
+      adjacent_vertices (alias_graph, start, &vertices_a);
+      /* Assign dependence info for adjacent vertices of start vertex.  */
+      for (j = 0; j < vertices_a.length (); ++j)
+	{
+	  slot_a = clique_map->get (datarefs[vertices_a[j]]);
+	  (*slot_a)->clique = clique;
+	  (*slot_a)->base = 1;
+	}
+      /* Assign dependence info for adjacent vertices of ajacent vertices
+	 of start vertex.  */
+      for (j = 0; j < vertices_a.length (); ++j)
+	{
+	  vertices_b.truncate (0);
+	  adjacent_vertices (alias_graph, vertices_a[j], &vertices_b);
+	  for (k = 0; k < vertices_b.length (); ++k)
+	    {
+	      slot_b = clique_map->get (datarefs[vertices_b[k]]);
+	      if ((*slot_b)->clique == 0)
+		{
+		  (*slot_b)->clique = clique;
+		  (*slot_b)->base = 0;
+		}
+	    }
+	}
+    }
+
+  edata.datarefs = &datarefs;
+  edata.clique_map = clique_map;
+  edata.valid_p = true;
+  /* Traverse graph edges and check if all vertices have been assigned
+     dependence info.  */
+  for_each_edge (alias_graph, alias_graphd_edge_callback, &edata);
+  free_graph (alias_graph);
+
+  /* We only handle graph whose diameter is at most 2, otherwise the
+     dependence graph can't be represented by the clique model accurately.  */
+  if (edata.valid_p)
+    {
+      cfun->last_clique = clique;
+      LOOP_VINFO_RT_ALIAS_CLIQUE_MAP (loop_vinfo) = clique_map;
+      return;
+    }
+
+  clique_map->traverse<void *, free_rt_alias_clique> (NULL);
+  delete clique_map;
+}
+
 /* Function vect_create_cond_for_alias_checks.
 
    Create a conditional expression that represents the run-time checks for
@@ -2122,6 +2331,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
 
   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
 			       &comp_alias_ddrs, cond_expr);
+  record_runtime_alias_for_data_refs (loop_vinfo);
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
 		     "created %u versioning for alias checks.\n",
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 8b2a61e..99b0618 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1114,6 +1114,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in)
     unaligned_dr (NULL),
     peeling_for_alignment (0),
     ptr_mask (0),
+    rt_alias_clique_map (NULL),
     slp_unrolling_factor (1),
     single_scalar_iteration_cost (0),
     vectorizable (false),
@@ -1223,6 +1224,13 @@ _loop_vec_info::~_loop_vec_info ()
 
   free (bbs);
 
+  if (rt_alias_clique_map != NULL)
+    {
+      rt_alias_clique_map->traverse<void *, free_rt_alias_clique> (NULL);
+      delete rt_alias_clique_map;
+      rt_alias_clique_map = NULL;
+    }
+
   loop->aux = NULL;
 }
 
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index ee32c56..9f170d9 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5601,6 +5601,30 @@ get_group_alias_ptr_type (gimple *first_stmt)
   return reference_alias_ptr_type (DR_REF (first_dr));
 }
 
+/* Given data reference DR and its vectorized form REF, this function
+   sets runtime alias/non-dependent info of DR to REF.  */
+
+static void
+set_runtime_alias_dependence_info (loop_vec_info loop_vinfo,
+				   data_reference_p dr, tree ref)
+{
+  rt_alias_clique_p *slot;
+  hash_map<data_reference_p, rt_alias_clique_p> *clique_map;
+
+  if ((clique_map = LOOP_VINFO_RT_ALIAS_CLIQUE_MAP (loop_vinfo)) == NULL)
+    return;
+
+  slot = clique_map->get (dr);
+  if (slot == NULL || (*slot)->clique == 0)
+    return;
+
+  MR_DEPENDENCE_CLIQUE (ref) = (*slot)->clique;
+  MR_DEPENDENCE_BASE (ref) = (*slot)->base;
+  /* Non-dependent info from runtime alias check is only valid for fixed
+     length access.  It should be reset whenever the memory reference is
+     copied.  */
+  MR_DEPENDENCE_FIXED_LENGTH_P (ref) = true;
+}
 
 /* Function vectorizable_store.
 
@@ -6423,6 +6447,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
 					misalign);
 
+	      if (loop_vinfo)
+		set_runtime_alias_dependence_info (loop_vinfo, dr, data_ref);
+
 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
 		{
 		  tree perm_mask = perm_mask_for_reverse (vectype);
@@ -7515,6 +7542,11 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
 					      align, misalign);
+
+		    if (loop_vinfo)
+		      set_runtime_alias_dependence_info (loop_vinfo,
+							 dr, data_ref);
+
 		    break;
 		  }
 		case dr_explicit_realign:
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 4ee3c3f..2c90dd3 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -211,6 +211,15 @@ is_a_helper <_bb_vec_info *>::test (vec_info *i)
 }
 
 
+typedef struct rt_alias_clique
+{
+  int id;
+  unsigned short clique, base;
+}*rt_alias_clique_p;
+
+extern bool free_rt_alias_clique (data_reference_p const &,
+				  rt_alias_clique_p const &, void *);
+
 /*-----------------------------------------------------------------*/
 /* Info on vectorized loops.                                       */
 /*-----------------------------------------------------------------*/
@@ -285,6 +294,8 @@ typedef struct _loop_vec_info : public vec_info {
   /* Cost vector for a single scalar iteration.  */
   auto_vec<stmt_info_for_cost> scalar_cost_vec;
 
+  hash_map<data_reference_p, rt_alias_clique_p> *rt_alias_clique_map;
+
   /* The unrolling factor needed to SLP the loop. In case of that pure SLP is
      applied to the loop, i.e., no unrolling is needed, this is 1.  */
   unsigned slp_unrolling_factor;
@@ -379,6 +390,7 @@ typedef struct _loop_vec_info : public vec_info {
 #define LOOP_VINFO_SCALAR_LOOP(L)	   (L)->scalar_loop
 #define LOOP_VINFO_HAS_MASK_STORE(L)       (L)->has_mask_store
 #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
+#define LOOP_VINFO_RT_ALIAS_CLIQUE_MAP(L)  (L)->rt_alias_clique_map
 #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
 #define LOOP_VINFO_ORIG_LOOP_INFO(L)       (L)->orig_loop_info
 
-- 
1.9.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]