[PATCH 26/49] analyzer: new files: digraph.{cc|h} and shortest-paths.h

Jeff Law law@redhat.com
Sat Dec 7 14:58:00 GMT 2019


On Fri, 2019-11-15 at 20:23 -0500, David Malcolm wrote:
> This patch adds template classes for directed graphs, their nodes
> and edges, and for finding the shortest path through such a graph.
> 
> gcc/ChangeLog:
> 	* analyzer/digraph.cc: New file.
> 	* analyzer/digraph.h: New file.
> 	* analyzer/shortest-paths.h: New file.
Nothing too worrisome here.  I'm kindof surprised if haven't needed
some of these capabilities before now.  Thoughts on moving them outside
of analyzer into a more generic location?

jeff
> ---
>  gcc/analyzer/digraph.cc       | 189 ++++++++++++++++++++++++++++++++
>  gcc/analyzer/digraph.h        | 248
> ++++++++++++++++++++++++++++++++++++++++++
>  gcc/analyzer/shortest-paths.h | 147 +++++++++++++++++++++++++
>  3 files changed, 584 insertions(+)
>  create mode 100644 gcc/analyzer/digraph.cc
>  create mode 100644 gcc/analyzer/digraph.h
>  create mode 100644 gcc/analyzer/shortest-paths.h
> 
> diff --git a/gcc/analyzer/digraph.cc b/gcc/analyzer/digraph.cc
> new file mode 100644
> index 0000000..c1fa46e
> --- /dev/null
> +++ b/gcc/analyzer/digraph.cc
> @@ -0,0 +1,189 @@
> +/* Template classes for directed graphs.
> +   Copyright (C) 2019 Free Software Foundation, Inc.
> +   Contributed by David Malcolm <dmalcolm@redhat.com>.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it
> +under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful, but
> +WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>;.  */
> +
> +#include "config.h"
> +#include "gcc-plugin.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "diagnostic.h"
> +#include "analyzer/graphviz.h"
> +#include "analyzer/digraph.h"
> +#include "analyzer/shortest-paths.h"
> +#include "selftest.h"
> +
> +#if CHECKING_P
> +
> +namespace selftest {
> +
> +/* A family of digraph classes for writing selftests.  */
> +
> +struct test_node;
> +struct test_edge;
> +struct test_graph;
> +struct test_dump_args_t {};
> +struct test_cluster;
> +
> +struct test_graph_traits
> +{
> +  typedef test_node node_t;
> +  typedef test_edge edge_t;
> +  typedef test_graph graph_t;
> +  typedef test_dump_args_t dump_args_t;
> +  typedef test_cluster cluster_t;
> +};
> +
> +struct test_node : public dnode<test_graph_traits>
> +{
> +  test_node (const char *name, int index) : m_name (name), m_index
> (index) {}
> +  void dump_dot (graphviz_out *, const dump_args_t &) const OVERRIDE
> +  {
> +  }
> +
> +  const char *m_name;
> +  int m_index;
> +};
> +
> +struct test_edge : public dedge<test_graph_traits>
> +{
> +  test_edge (node_t *src, node_t *dest)
> +  : dedge (src, dest)
> +  {}
> +
> +  void dump_dot (graphviz_out *gv, const dump_args_t &) const
> OVERRIDE
> +  {
> +    gv->println ("%s -> %s;", m_src->m_name, m_dest->m_name);
> +  }
> +};
> +
> +struct test_graph : public digraph<test_graph_traits>
> +{
> +  test_node *add_test_node (const char *name)
> +  {
> +    test_node *result = new test_node (name, m_nodes.length ());
> +    add_node (result);
> +    return result;
> +  }
> +
> +  test_edge *add_test_edge (test_node *src, test_node *dst)
> +  {
> +    test_edge *result = new test_edge (src, dst);
> +    add_edge (result);
> +    return result;
> +  }
> +};
> +
> +struct test_cluster : public cluster<test_graph_traits>
> +{
> +};
> +
> +struct test_path
> +{
> +  auto_vec<const test_edge *> m_edges;
> +};
> +
> +/* Smoketest of digraph dumping.  */
> +
> +static void
> +test_dump_to_dot ()
> +{
> +  test_graph g;
> +  test_node *a = g.add_test_node ("a");
> +  test_node *b = g.add_test_node ("b");
> +  g.add_test_edge (a, b);
> +
> +  pretty_printer pp;
> +  pp.buffer->stream = NULL;
> +  test_dump_args_t dump_args;
> +  g.dump_dot_to_pp (&pp, NULL, dump_args);
> +
> +  ASSERT_STR_CONTAINS (pp_formatted_text (&pp),
> +		       "a -> b;\n");
> +}
> +
> +/* Test shortest paths from A in this digraph,
> +   where edges run top-to-bottom if not otherwise labeled:
> +
> +      A
> +     / \
> +    B   C-->D
> +    |   |
> +    E   |
> +     \ /
> +      F.  */
> +
> +static void
> +test_shortest_paths ()
> +{
> +  test_graph g;
> +  test_node *a = g.add_test_node ("a");
> +  test_node *b = g.add_test_node ("b");
> +  test_node *c = g.add_test_node ("d");
> +  test_node *d = g.add_test_node ("d");
> +  test_node *e = g.add_test_node ("e");
> +  test_node *f = g.add_test_node ("f");
> +
> +  test_edge *ab = g.add_test_edge (a, b);
> +  test_edge *ac = g.add_test_edge (a, c);
> +  test_edge *cd = g.add_test_edge (c, d);
> +  test_edge *be = g.add_test_edge (b, e);
> +  g.add_test_edge (e, f);
> +  test_edge *cf = g.add_test_edge (c, f);
> +
> +  shortest_paths<test_graph_traits, test_path> sp (g, a);
> +
> +  test_path path_to_a = sp.get_shortest_path (a);
> +  ASSERT_EQ (path_to_a.m_edges.length (), 0);
> +
> +  test_path path_to_b = sp.get_shortest_path (b);
> +  ASSERT_EQ (path_to_b.m_edges.length (), 1);
> +  ASSERT_EQ (path_to_b.m_edges[0], ab);
> +
> +  test_path path_to_c = sp.get_shortest_path (c);
> +  ASSERT_EQ (path_to_c.m_edges.length (), 1);
> +  ASSERT_EQ (path_to_c.m_edges[0], ac);
> +
> +  test_path path_to_d = sp.get_shortest_path (d);
> +  ASSERT_EQ (path_to_d.m_edges.length (), 2);
> +  ASSERT_EQ (path_to_d.m_edges[0], ac);
> +  ASSERT_EQ (path_to_d.m_edges[1], cd);
> +
> +  test_path path_to_e = sp.get_shortest_path (e);
> +  ASSERT_EQ (path_to_e.m_edges.length (), 2);
> +  ASSERT_EQ (path_to_e.m_edges[0], ab);
> +  ASSERT_EQ (path_to_e.m_edges[1], be);
> +
> +  test_path path_to_f = sp.get_shortest_path (f);
> +  ASSERT_EQ (path_to_f.m_edges.length (), 2);
> +  ASSERT_EQ (path_to_f.m_edges[0], ac);
> +  ASSERT_EQ (path_to_f.m_edges[1], cf);
> +}
> +
> +/* Run all of the selftests within this file.  */
> +
> +void
> +analyzer_digraph_cc_tests ()
> +{
> +  test_dump_to_dot ();
> +  test_shortest_paths ();
> +}
> +
> +} // namespace selftest
> +
> +#endif /* #if CHECKING_P */
> diff --git a/gcc/analyzer/digraph.h b/gcc/analyzer/digraph.h
> new file mode 100644
> index 0000000..5c6a496
> --- /dev/null
> +++ b/gcc/analyzer/digraph.h
> @@ -0,0 +1,248 @@
> +/* Template classes for directed graphs.
> +   Copyright (C) 2019 Free Software Foundation, Inc.
> +   Contributed by David Malcolm <dmalcolm@redhat.com>.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it
> +under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful, but
> +WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>;.  */
> +
> +#ifndef GCC_ANALYZER_DIGRAPH_H
> +#define GCC_ANALYZER_DIGRAPH_H
> +
> +#include "diagnostic.h"
> +#include "tree-diagnostic.h" /* for default_tree_printer.  */
> +#include "analyzer/graphviz.h"
> +
> +/* Templates for a family of classes: digraph, node, edge, and
> cluster.
> +   This assumes a traits type with the following typedefs:
> +   node_t: the node class
> +   edge_t: the edge class
> +   dump_args_t: additional args for dot-dumps
> +   cluster_t: the cluster class (for use when generating .dot
> files).
> +
> +   Using a template allows for typesafe nodes and edges: a node's
> +   predecessor and successor edges can be of a node-specific edge
> +   subclass, without needing casting.  */
> +
> +/* Abstract base class for a node in a directed graph.  */
> +
> +template <typename GraphTraits>
> +class dnode
> +{
> + public:
> +  typedef typename GraphTraits::edge_t edge_t;
> +  typedef typename GraphTraits::dump_args_t dump_args_t;
> +
> +  virtual ~dnode () {}
> +  virtual void dump_dot (graphviz_out *gv, const dump_args_t &args)
> const = 0;
> +
> +  auto_vec<edge_t *> m_preds;
> +  auto_vec<edge_t *> m_succs;
> +};
> +
> +/* Abstract base class for an edge in a directed graph.  */
> +
> +template <typename GraphTraits>
> +class dedge
> +{
> + public:
> +  typedef typename GraphTraits::node_t node_t;
> +  typedef typename GraphTraits::dump_args_t dump_args_t;
> +
> +  dedge (node_t *src, node_t *dest)
> +  : m_src (src), m_dest (dest) {}
> +
> +  virtual ~dedge () {}
> +
> +  virtual void dump_dot (graphviz_out *gv, const dump_args_t &args)
> const = 0;
> +
> +  node_t *const m_src;
> +  node_t *const m_dest;
> +};
> +
> +/* Abstract base class for a directed graph.
> +   This class maintains the vectors of nodes and edges,
> +   and owns the nodes and edges.  */
> +
> +template <typename GraphTraits>
> +class digraph
> +{
> + public:
> +  typedef typename GraphTraits::node_t node_t;
> +  typedef typename GraphTraits::edge_t edge_t;
> +  typedef typename GraphTraits::dump_args_t dump_args_t;
> +  typedef typename GraphTraits::cluster_t cluster_t;
> +
> +  digraph () {}
> +  virtual ~digraph () {}
> +
> +  void dump_dot_to_pp (pretty_printer *pp,
> +		       cluster_t *root_cluster,
> +		       const dump_args_t &args) const;
> +  void dump_dot_to_file (FILE *fp,
> +			 cluster_t *root_cluster,
> +			 const dump_args_t &args) const;
> +  void dump_dot (const char *path,
> +		 cluster_t *root_cluster,
> +		 const dump_args_t &args) const;
> +
> +  void add_node (node_t *node);
> +  void add_edge (edge_t *edge);
> +
> +  auto_delete_vec<node_t> m_nodes;
> +  auto_delete_vec<edge_t> m_edges;
> +};
> +
> +/* Abstract base class for splitting dnodes into hierarchical
> clusters
> +   in the generated .dot file.
> +
> +   See "Subgraphs and Clusters" within
> +     https://www.graphviz.org/doc/info/lang.html
> +   and e.g.
> +     https://graphviz.gitlab.io/_pages/Gallery/directed/cluster.html
> +
> +   If a root_cluster is passed to dump_dot*, then all nodes will be
> +   added to it at the start of dumping, via calls to add_node.
> +
> +   The root cluster can organize the nodes into a hierarchy of
> +   child clusters.
> +
> +   After all nodes are added to the root cluster, dump_dot will then
> +   be called on it (and not on the nodes themselves).  */
> +
> +template <typename GraphTraits>
> +class cluster
> +{
> + public:
> +  typedef typename GraphTraits::node_t node_t;
> +  typedef typename GraphTraits::dump_args_t dump_args_t;
> +
> +  virtual ~cluster () {}
> +
> +  virtual void add_node (node_t *node) = 0;
> +
> +  /* Recursively dump the cluster, all nodes, and child
> clusters.  */
> +  virtual void dump_dot (graphviz_out *gv, const dump_args_t &)
> const = 0;
> +};
> +
> +////////////////////////////////////////////////////////////////////
> ////////
> +
> +/* Write .dot information for this graph to PP, passing ARGS to the
> nodes
> +   and edges.
> +   If ROOT_CLUSTER is non-NULL, use it to organize the nodes into
> clusters.  */
> +
> +template <typename GraphTraits>
> +inline void
> +digraph<GraphTraits>::dump_dot_to_pp (pretty_printer *pp,
> +				      cluster_t *root_cluster,
> +				      const dump_args_t &args) const
> +{
> +  graphviz_out gv (pp);
> +
> +  pp_string (pp, "digraph \"");
> +  pp_string (pp, "base");
> +  pp_string (pp, "\" {\n");
> +
> +  gv.indent ();
> +
> +  pp_string (pp, "overlap=false;\n");
> +  pp_string (pp, "compound=true;\n");
> +
> +  /* If using clustering, emit all nodes via clusters.  */
> +  if (root_cluster)
> +    {
> +      int i;
> +      node_t *n;
> +      FOR_EACH_VEC_ELT (m_nodes, i, n)
> +	root_cluster->add_node (n);
> +      root_cluster->dump_dot (&gv, args);
> +    }
> +  else
> +    {
> +      /* Otherwise, display all nodes at top level.  */
> +      int i;
> +      node_t *n;
> +      FOR_EACH_VEC_ELT (m_nodes, i, n)
> +	n->dump_dot (&gv, args);
> +    }
> +
> +  /* Edges.  */
> +  int i;
> +  edge_t *e;
> +  FOR_EACH_VEC_ELT (m_edges, i, e)
> +    e->dump_dot (&gv, args);
> +
> +  /* Terminate "digraph" */
> +  gv.outdent ();
> +  pp_string (pp, "}");
> +  pp_newline (pp);
> +}
> +
> +/* Write .dot information for this graph to FP, passing ARGS to the
> nodes
> +   and edges.
> +   If ROOT_CLUSTER is non-NULL, use it to organize the nodes into
> clusters.  */
> +
> +template <typename GraphTraits>
> +inline void
> +digraph<GraphTraits>::dump_dot_to_file (FILE *fp,
> +					cluster_t *root_cluster,
> +					const dump_args_t &args) const
> +{
> +  pretty_printer pp;
> +  // TODO:
> +  pp_format_decoder (&pp) = default_tree_printer;
> +  pp.buffer->stream = fp;
> +  dump_dot_to_pp (&pp, root_cluster, args);
> +  pp_flush (&pp);
> +}
> +
> +/* Write .dot information for this graph to a file at PATH, passing
> ARGS
> +   to the nodes and edges.
> +   If ROOT_CLUSTER is non-NULL, use it to organize the nodes into
> clusters.  */
> +
> +template <typename GraphTraits>
> +inline void
> +digraph<GraphTraits>::dump_dot (const char *path,
> +				cluster_t *root_cluster,
> +				const dump_args_t &args) const
> +{
> +  FILE *fp = fopen (path, "w");
> +  dump_dot_to_file (fp, root_cluster, args);
> +  fclose (fp);
> +}
> +
> +/* Add NODE to this DIGRAPH, taking ownership.  */
> +
> +template <typename GraphTraits>
> +inline void
> +digraph<GraphTraits>::add_node (node_t *node)
> +{
> +  m_nodes.safe_push (node);
> +}
> +
> +/* Add EDGE to this digraph, and to the preds/succs of its
> endpoints.
> +   Take ownership of EDGE.  */
> +
> +template <typename GraphTraits>
> +inline void
> +digraph<GraphTraits>::add_edge (edge_t *edge)
> +{
> +  m_edges.safe_push (edge);
> +  edge->m_dest->m_preds.safe_push (edge);
> +  edge->m_src->m_succs.safe_push (edge);
> +
> +}
> +
> +#endif /* GCC_ANALYZER_DIGRAPH_H */
> diff --git a/gcc/analyzer/shortest-paths.h b/gcc/analyzer/shortest-
> paths.h
> new file mode 100644
> index 0000000..4738f18
> --- /dev/null
> +++ b/gcc/analyzer/shortest-paths.h
> @@ -0,0 +1,147 @@
> +/* Template class for Dijkstra's algorithm on directed graphs.
> +   Copyright (C) 2019 Free Software Foundation, Inc.
> +   Contributed by David Malcolm <dmalcolm@redhat.com>.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it
> +under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful, but
> +WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>;.  */
> +
> +#ifndef GCC_ANALYZER_SHORTEST_PATHS_H
> +#define GCC_ANALYZER_SHORTEST_PATHS_H
> +
> +#include "timevar.h"
> +
> +/* A record of the shortest path to each node in an graph
> +   from the origin node.
> +   The constructor runs Dijkstra's algorithm, and the results are
> +   stored in this class.  */
> +
> +template <typename GraphTraits, typename Path_t>
> +class shortest_paths
> +{
> +public:
> +  typedef typename GraphTraits::graph_t graph_t;
> +  typedef typename GraphTraits::node_t node_t;
> +  typedef typename GraphTraits::edge_t edge_t;
> +  typedef Path_t path_t;
> +
> +  shortest_paths (const graph_t &graph, const node_t *origin);
> +
> +  path_t get_shortest_path (const node_t *to) const;
> +
> +private:
> +  const graph_t &m_graph;
> +
> +  /* For each node (by index), the minimal distance to that node
> from the
> +     origin.  */
> +  auto_vec<int> m_dist;
> +
> +  /* For each exploded_node (by index), the previous edge in the
> shortest
> +     path from the origin.  */
> +  auto_vec<const edge_t *> m_prev;
> +};
> +
> +////////////////////////////////////////////////////////////////////
> ////////
> +
> +/* shortest_paths's constructor.
> +
> +   Use Dijkstra's algorithm relative to ORIGIN to populate m_dist
> and
> +   m_prev with enough information to be able to generate Path_t
> instances
> +   to give the shortest path to any node in GRAPH from ORIGIN.  */
> +
> +template <typename GraphTraits, typename Path_t>
> +inline
> +shortest_paths<GraphTraits, Path_t>::shortest_paths (const graph_t
> &graph,
> +						     const node_t
> *origin)
> +: m_graph (graph),
> +  m_dist (graph.m_nodes.length ()),
> +  m_prev (graph.m_nodes.length ())
> +{
> +  auto_client_timevar tv ("shortest_paths");
> +
> +  auto_vec<int> queue (graph.m_nodes.length ());
> +
> +  for (unsigned i = 0; i < graph.m_nodes.length (); i++)
> +    {
> +      m_dist.quick_push (INT_MAX);
> +      m_prev.quick_push (NULL);
> +      queue.quick_push (i);
> +    }
> +  m_dist[origin->m_index] = 0;
> +
> +  while (queue.length () > 0)
> +    {
> +      /* Get minimal distance in queue.
> +	 FIXME: this is O(N^2); replace with a priority queue.  */
> +      int idx_with_min_dist = -1;
> +      int idx_in_queue_with_min_dist = -1;
> +      int min_dist = INT_MAX;
> +      for (unsigned i = 0; i < queue.length (); i++)
> +	{
> +	  int idx = queue[i];
> +	  if (m_dist[queue[i]] < min_dist)
> +	    {
> +	      min_dist = m_dist[idx];
> +	      idx_with_min_dist = idx;
> +	      idx_in_queue_with_min_dist = i;
> +	    }
> +	}
> +      gcc_assert (idx_with_min_dist != -1);
> +      gcc_assert (idx_in_queue_with_min_dist != -1);
> +
> +      // FIXME: this is confusing: there are two indices here
> +
> +      queue.unordered_remove (idx_in_queue_with_min_dist);
> +
> +      node_t *n
> +	= static_cast <node_t *> (m_graph.m_nodes[idx_with_min_dist]);
> +
> +      int i;
> +      edge_t *succ;
> +      FOR_EACH_VEC_ELT (n->m_succs, i, succ)
> +	{
> +	  // TODO: only for dest still in queue
> +	  node_t *dest = succ->m_dest;
> +	  int alt = m_dist[n->m_index] + 1;
> +	  if (alt < m_dist[dest->m_index])
> +	    {
> +	      m_dist[dest->m_index] = alt;
> +	      m_prev[dest->m_index] = succ;
> +	    }
> +	}
> +   }
> +}
> +
> +/* Generate an Path_t instance giving the shortest path to the node
> +   TO from the origin node.  */
> +
> +template <typename GraphTraits, typename Path_t>
> +inline Path_t
> +shortest_paths<GraphTraits, Path_t>::get_shortest_path (const node_t
> *to) const
> +{
> +  Path_t result;
> +
> +  while (m_prev[to->m_index])
> +    {
> +      result.m_edges.safe_push (m_prev[to->m_index]);
> +      to = m_prev[to->m_index]->m_src;
> +    }
> +
> +  result.m_edges.reverse ();
> +
> +  return result;
> +}
> +
> +#endif /* GCC_ANALYZER_SHORTEST_PATHS_H */



More information about the Gcc-patches mailing list