This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[tree-ssa, RFC] new, very experimental ssa-vn


Hi,

Here's a first shot at SCC-based SSA GVN.  It still needs a lot of work,
and cleanups to the "older" parts, but it's (too) slowly getting into
something usable :-).

Much of this code is already several months old, but I just never took the
time to finish it, because the existing GVN that is part of the dominator
optimizations just rules and this new pass doesn't find many opportunities
for further redundancy elimination.  The kind of redundancies we could find
with this pass are those involving back edges (IVs and such) and user code
hardly ever has these.  Some loop optimizations can introduce them, so it
may be useful later on the lno branch.

Over new-year I've been working on dominator based removal.  I started
with AVAIL-based removal, but dominator based is the better choice since
it should catch exactly the same redundancies as AVAIL-based removal,
while we're still in SSA form (so that values are never killed).  And
it's much easier to implement!

An example you can find it in any description of this algorithm, the
following code,

int
foo (void)
{
  int i, j, u;
  i = 1;   j = 1;   u = 0;
  while (i < 10000)
    {
      u = i - j;
      i = i + 1;
      j = j + 1;
    }
  return u;
}


is optimized to,


foo ()
{
  int i, j;
  j = 1;
  goto <L1>;
<L0>:
  i = j + 1;
  j = i;
<L1>:
  if (j <= 9999) goto <L0>; else goto <L2>;
<L2>:
  return 0;
}



I still have a bug that I don't quite understand yet.  Consider:

foo (void)
{
  int i, j;

  for (i = 0; i < 10; i++)
    for (j = 0; j < 10; j++)
      { }
}

We would find that i and j are congruent, but clearly they are independent.
I am not sure if this is a bug in my implementation, or just a mess-up in
the removal algorithm.  Morgan gets it all wrong, and Simpson's thesis
doesn't help much either.  Looking at my dumps, I seem to be doing the
Right Thing, but it gives a useless result.  Worked around by requiring
that PHIs can only be congruent if they are in the same BB (by hashing
bb_for_stmt), I think it does not pessimize the algorithm (at least, not
too much ;-), all that happens is that we don't find congruences that would
not give opportunities for redundancy elimination anyway.


Still one question: What would be an appropriate interface for the value
numbers?  I would prefer to split the redundancy removal out, and I suppose
other passes (PRE?) might benefit a bit from having value numbers available
as well.  I suppose I should export both the valid hash table and the value
representatives.  Ideas, anyone?


Sébastian, is it OK if I put this on the lno branch as an experimental pass
(i.e. disabled by default) once I have it bootstrapping, hopefully soon?

Gr.
Steven
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.903.2.159
diff -c -3 -p -r1.903.2.159 Makefile.in
*** Makefile.in	28 Dec 2003 00:56:51 -0000	1.903.2.159
--- Makefile.in	28 Dec 2003 21:20:37 -0000
*************** OBJS-common = \
*** 877,883 ****
   tree-alias-common.o tree-ssa-ccp.o tree-browser.o @ANDER@ tree-ssa-dce.o  \
   tree-ssa-pre.o tree-ssa-live.o tree-must-alias.o tree-ssa-operands.o	   \
   tree-ssa-dom.o domwalk.o tree-tailcall.o gimple-low.o tree-iterator.o	   \
!  tree-phinodes.o tree-ssanames.o tree-sra.o tree-ssa-loop.o		   \
   alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	  	   \
   cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
   cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o	   \
--- 877,883 ----
   tree-alias-common.o tree-ssa-ccp.o tree-browser.o @ANDER@ tree-ssa-dce.o  \
   tree-ssa-pre.o tree-ssa-live.o tree-must-alias.o tree-ssa-operands.o	   \
   tree-ssa-dom.o domwalk.o tree-tailcall.o gimple-low.o tree-iterator.o	   \
!  tree-phinodes.o tree-ssanames.o tree-sra.o tree-ssa-loop.o tree-ssa-gvn.o \
   alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	  	   \
   cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
   cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o	   \
*************** tree-ssa-pre.o : tree-ssa-pre.c $(TREE_F
*** 1572,1577 ****
--- 1572,1581 ----
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) \
     $(GGC_H) output.h diagnostic.h errors.h toplev.h $(TIMEVAR_H) \
     $(TM_H) coretypes.h $(TREE_DUMP_H)
+ tree-ssa-gvn.o : tree-ssa-gvn.c $(TREE_FLOW_H) $(CONFIG_H) \
+    $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) \
+    $(GGC_H) output.h diagnostic.h errors.h toplev.h $(TIMEVAR_H) \
+    $(TM_H) coretypes.h $(TREE_DUMP_H)
  tree-cfg.o : tree-cfg.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
     $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) flags.h output.h \
     diagnostic.h errors.h function.h $(TIMEVAR_H) $(TM_H) coretypes.h \
*************** GTFILES = $(srcdir)/input.h $(srcdir)/co
*** 2216,2222 ****
    $(srcdir)/tree-alias-type.h $(srcdir)/tree-alias-common.h \
    $(srcdir)/tree-alias-type.c $(srcdir)/tree-alias-common.c \
    $(srcdir)/tree-ssa-operands.h $(srcdir)/tree-ssa-operands.c \
!   $(srcdir)/tree-simple.c \
    @all_gtfiles@
  
  GTFILES_FILES_LANGS = @all_gtfiles_files_langs@
--- 2220,2226 ----
    $(srcdir)/tree-alias-type.h $(srcdir)/tree-alias-common.h \
    $(srcdir)/tree-alias-type.c $(srcdir)/tree-alias-common.c \
    $(srcdir)/tree-ssa-operands.h $(srcdir)/tree-ssa-operands.c \
!   $(srcdir)/tree-ssa-gvn.c $(srcdir)/tree-simple.c \
    @all_gtfiles@
  
  GTFILES_FILES_LANGS = @all_gtfiles_files_langs@
*************** gt-stringpool.h gt-targhooks.h gt-langho
*** 2236,2242 ****
  gt-tree-alias-common.h gt-tree-mudflap.h gt-dependence.h \
  gt-tree-ssa.h gt-tree-dfa.h gt-tree-ssa-ccp.h gt-tree-eh.h \
  gt-tree-ssa-pre.h gt-tree-ssanames.h gt-tree-iterator.h \
! gt-tree-phinodes.h gt-tree-cfg.h : s-gtype ; @true
  
  gtyp-gen.h: Makefile
  	echo "/* This file is machine generated.  Do not edit.  */" > tmp-gtyp.h
--- 2240,2246 ----
  gt-tree-alias-common.h gt-tree-mudflap.h gt-dependence.h \
  gt-tree-ssa.h gt-tree-dfa.h gt-tree-ssa-ccp.h gt-tree-eh.h \
  gt-tree-ssa-pre.h gt-tree-ssanames.h gt-tree-iterator.h \
! gt-tree-ssa-gvn.h gt-tree-phinodes.h gt-tree-cfg.h : s-gtype ; @true
  
  gtyp-gen.h: Makefile
  	echo "/* This file is machine generated.  Do not edit.  */" > tmp-gtyp.h
Index: common.opt
===================================================================
RCS file: /cvs/gcc/gcc/gcc/common.opt,v
retrieving revision 1.14.2.13
diff -c -3 -p -r1.14.2.13 common.opt
*** common.opt	6 Dec 2003 12:31:27 -0000	1.14.2.13
--- common.opt	28 Dec 2003 21:20:37 -0000
*************** ftree-dominator-opts
*** 715,720 ****
--- 715,724 ----
  Common
  Enable dominator optimizations
  
+ ftree-gvn
+ Common
+ Enable SSA global value numbering on trees
+ 
  ftree-loop-optimize
  Common
  Enable loop optimizations on trees
Index: dominance.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/dominance.c,v
retrieving revision 1.10.2.11
diff -c -3 -p -r1.10.2.11 dominance.c
*** dominance.c	19 Dec 2003 07:01:35 -0000	1.10.2.11
--- dominance.c	28 Dec 2003 21:20:38 -0000
*************** first_dom_son (enum cdi_direction dir, b
*** 857,863 ****
  /* Returns the next dominance son after BB in the dominator or postdominator
     tree as determined by DIR, or NULL if it was the last one.  */
  
! extern basic_block
  next_dom_son (enum cdi_direction dir, basic_block bb)
  {
    struct et_node *next = bb->dom[dir]->right;
--- 857,863 ----
  /* Returns the next dominance son after BB in the dominator or postdominator
     tree as determined by DIR, or NULL if it was the last one.  */
  
! basic_block
  next_dom_son (enum cdi_direction dir, basic_block bb)
  {
    struct et_node *next = bb->dom[dir]->right;
Index: flags.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/flags.h,v
retrieving revision 1.86.2.41
diff -c -3 -p -r1.86.2.41 flags.h
*** flags.h	6 Dec 2003 12:31:27 -0000	1.86.2.41
--- flags.h	28 Dec 2003 21:20:38 -0000
*************** extern int flag_mudflap;
*** 716,721 ****
--- 716,724 ----
  /* Disable SSA optimizations on trees.  */
  extern int flag_disable_tree_ssa;
  
+ /* Enable SSA-GVN on trees.  */
+ extern int flag_tree_gvn;
+ 
  /* Enable SSA-PRE on trees.  */
  extern int flag_tree_pre;
  
Index: opts.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/opts.c,v
retrieving revision 1.31.2.22
diff -c -3 -p -r1.31.2.22 opts.c
*** opts.c	11 Dec 2003 19:12:23 -0000	1.31.2.22
--- opts.c	28 Dec 2003 21:20:39 -0000
*************** decode_options (unsigned int argc, const
*** 537,542 ****
--- 537,543 ----
        flag_tree_dom = 1;
        flag_tree_loop = 0;
        flag_tree_must_alias = 1;
+       flag_tree_gvn = 0;
        flag_tree_pre = 1;
        flag_tree_ter = 1;
        flag_tree_sra = 1;
*************** common_handle_option (size_t scode, cons
*** 1412,1417 ****
--- 1413,1422 ----
  
      case OPT_ftree_dominator_opts:
        flag_tree_dom = value;
+       break;
+ 
+     case OPT_ftree_gvn:
+       flag_tree_gvn = value;
        break;
  
      case OPT_ftree_must_alias:
Index: timevar.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/timevar.def,v
retrieving revision 1.14.2.28
diff -c -3 -p -r1.14.2.28 timevar.def
*** timevar.def	13 Dec 2003 19:39:47 -0000	1.14.2.28
--- timevar.def	28 Dec 2003 21:20:39 -0000
*************** DEFTIMEVAR (TV_TREE_DFA	             , "
*** 73,78 ****
--- 73,79 ----
  DEFTIMEVAR (TV_TREE_SSA_DOMINATOR_OPTS   , "dominator optimization")
  DEFTIMEVAR (TV_TREE_SRA              , "tree SRA")
  DEFTIMEVAR (TV_TREE_CCP		     , "tree CCP")
+ DEFTIMEVAR (TV_TREE_GVN 	     , "tree GVN")
  DEFTIMEVAR (TV_TREE_PRE		     , "tree PRE")
  DEFTIMEVAR (TV_TREE_DCE		     , "tree DCE")
  DEFTIMEVAR (TV_TREE_LOOP	     , "tree loop optimization")
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.c,v
retrieving revision 1.654.2.84
diff -c -3 -p -r1.654.2.84 toplev.c
*** toplev.c	19 Dec 2003 01:01:48 -0000	1.654.2.84
--- toplev.c	28 Dec 2003 21:20:41 -0000
*************** int flag_new_regalloc = 0;
*** 964,969 ****
--- 964,972 ----
  /* Disable SSA on trees optimizations.  */
  int flag_disable_tree_ssa = 0;
  
+ /* Enable SSA-GVN on trees.  */
+ int flag_tree_gvn = 0;
+ 
  /* Enable the SSA-PRE tree optimization.  */
  int flag_tree_pre = 0;
  
*************** static const lang_independent_options f_
*** 1185,1190 ****
--- 1188,1194 ----
    { "mudflap", &flag_mudflap, 1 },
    { "mudflapth", &flag_mudflap, 2 },
    { "disable-tree-ssa", &flag_disable_tree_ssa, 1 },
+   { "tree-gvn", &flag_tree_gvn, 1 },
    { "tree-pre", &flag_tree_pre, 1 },
    { "tree-ccp", &flag_tree_ccp, 1 },
    { "tree-dce", &flag_tree_dce, 1 },
Index: tree-dump.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-dump.c,v
retrieving revision 1.6.2.61
diff -c -3 -p -r1.6.2.61 tree-dump.c
*** tree-dump.c	17 Dec 2003 19:53:01 -0000	1.6.2.61
--- tree-dump.c	28 Dec 2003 21:20:42 -0000
*************** static struct dump_file_info dump_files[
*** 672,677 ****
--- 672,678 ----
    {".ssa4", "tree-ssa4", 0, 0},
    {".ccp", "tree-ccp", 0, 0},
    {".ssa5", "tree-ssa5", 0, 0},
+   {".gvn", "tree-gvn", 0, 0},
    {".pre", "tree-pre", 0, 0},
    {".dom2", "tree-dom2", 0, 0},
    {".ssa6", "tree-ssa6", 0, 0},
Index: tree-flow.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-flow.h,v
retrieving revision 1.1.4.177
diff -c -3 -p -r1.1.4.177 tree-flow.h
*** tree-flow.h	26 Dec 2003 22:38:28 -0000	1.1.4.177
--- tree-flow.h	28 Dec 2003 21:20:42 -0000
*************** extern void verify_ssa (void);
*** 501,506 ****
--- 501,509 ----
  extern void delete_tree_ssa (void);
  extern unsigned int highest_ssa_version;
  
+ /* In tree-ssa-gvn.c  */
+ extern void tree_ssa_gvn (tree, enum tree_dump_index);
+ 
  /* In tree-ssa-pre.c  */
  extern void tree_perform_ssapre (tree, enum tree_dump_index);
  
Index: tree-optimize.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
retrieving revision 1.1.4.98
diff -c -3 -p -r1.1.4.98 tree-optimize.c
*** tree-optimize.c	19 Dec 2003 07:01:36 -0000	1.1.4.98
--- tree-optimize.c	28 Dec 2003 21:20:43 -0000
*************** optimize_function_tree (tree fndecl, tre
*** 201,206 ****
--- 201,217 ----
  #endif
  	}
  
+       /* Run SSA-GVN (Global Value Numbering).  */
+       if (flag_tree_gvn)
+ 	{
+ 	  tree_ssa_gvn (fndecl, TDI_gvn);
+ 	  ggc_collect ();
+ 
+ #ifdef ENABLE_CHECKING
+ 	  verify_ssa ();
+ #endif
+ 	}
+ 	
        /* Run SSA-PRE (Partial Redundancy Elimination).  */
        if (flag_tree_pre)
  	{
Index: tree.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree.h,v
retrieving revision 1.342.2.154
diff -c -3 -p -r1.342.2.154 tree.h
*** tree.h	19 Dec 2003 07:01:36 -0000	1.342.2.154
--- tree.h	28 Dec 2003 21:20:45 -0000
*************** enum tree_dump_index
*** 3596,3601 ****
--- 3596,3602 ----
    TDI_ssa_4,
    TDI_ccp,
    TDI_ssa_5,
+   TDI_gvn,
    TDI_pre,
    TDI_dom_2,
    TDI_ssa_6,
/* SSA Global Value Numbering for trees.
   Copyright (C) 2003 Free Software Foundation, Inc.
   Contributed by Steven Bosscher, SUSE Labs.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING.  If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
#include "flags.h"
#include "rtl.h"
#include "tm_p.h"
#include "ggc.h"
#include "basic-block.h"
#include "output.h"
#include "errors.h"
#include "expr.h"
#include "function.h"
#include "diagnostic.h"
#include "timevar.h"
#include "bitmap.h"
#include "tree-dump.h"
#include "tree-flow.h"
#include "domwalk.h"

/* When DEBUG_SCC_VN is defined to 1, you get very detailed dumps of
   what this pass is trying to do.  */
#define DEBUG_SCC_VN 1
#define DEBUG_REMOVAL 1

extern void tree_eliminate_redundant_expressions (tree, enum tree_dump_index);

/* SCC based global value numbering for trees.

   Value numbering is a method for determining if two expressions
   compute the same value.  The basic idea is that two expressions
   are congruent if they compute the same value.  All congruent
   expressions are assigned the same symbolic value representative.
   Once all expressions have a value representative, redundant
   expressions can be removed and code motion can be performed.

   The algorithm implemented here is based on Simpson's RPO ("Reverse
   Post Order") algorithm applied to the strongly connected components
   in the SSA graph.

   We use Tarjan's SCC-finding algorithm since it conveniently finds
   the SCCs in topological order.  This is exactly how we need them
   for our value numbering algorithm, because it implies that, when
   we try to assign a value representative to an expression, we can
   be sure that all operands of the expression already have one.

   References:

     "Depth first search and linear graph algorithms", R..E. Tarjan,
     SIAM Journal on Computing, 1(2):146-160, June 1972.

     "SCC-Based Value Numbering", Keith Cooper & Taylor Simpson, 1995.
     http://citeseer.nj.nec.com/cooper95sccbased.html

     "Value-Driven Redundancy Elimination", L. Taylor Simpson, 1996.

     "Building an Optimizing Compiler",
     Robert Morgan, Butterworth-Heinemann, 1998, Section 8.10.
     (note that the implementation presented in the book is buggy.)  */

/* Dump file and flags.  */
static FILE *dump_file;
static int dump_flags;

/* Per function statistics for SCC value numbering optimization.  */
struct opt_stats_d
{
  long num_defs;
  long num_ssa_names;
  long num_unique_valnums;
  long num_non_singleton_sccs;
  long max_iterations;
  long total_iterations;
  long num_removed_exprs;
};

static struct opt_stats_d opt_stats;

/* Supporting data structures for the SCC based Value Numbering algorithm.

   The SSA graph does not have to be constructed explicitly, because
   it is available via the operands of each statement, but to search
   the SSA graph we need an additional data structure:

   - we need to hold all SSA names that appear as DEF operands (and
     perhaps at some point, VDEF operands) of a statement.  These
     would be the vertices in an explicit SSA graph.

   - we need the reverse postorder index for the basic block
     containing the statement that defines the SSA name so that we can
     sort SCCs with respect to the CFG.  We also need to number all
     statements in each block.  This is not required for Tarjan's
     algorithm, but it is necessary for the SCC-VN algorithm.

   Additionally, we need two bitmaps.  In one bitmap, we set a bit for
   visited SSA names.  In the second bitmap, we set a bit for SSA
   names if they are in the DFS stack.

   We allocate a vector of highest_ssa_version elements.  This allows
   us to look up nodes by simply using the SSA_NAME_VERSION of each SSA
   name as an index into the vector.  */

/* A node in the SSA graph.  */
typedef struct
{
  /* SSA_NAME tree node this SSA graph node.  */
  tree node;

  /* SSA graph DFS number.  */
  int dfs_num;

  /* Tarjan's LOWLINK in his SCC finding algorithm.  */
  int low_num;

  /* Index in a basic block for the statement defining this node.  */
  int stmt_num;

  /* Reverse postorder number of the basic block containing the
     statement defining this node.  */
  int cfg_rpo_num;
} ssagraph_dfs_info_d;

/* A vector of all SSA names to support the depth first search in the
   SSA graph.  */
static ssagraph_dfs_info_d *ssagraph_dfs_info;

#define NODE_NAME(NODE)		(ssagraph_dfs_info[(NODE)].node)
#define DFS_NUM(NODE)		(ssagraph_dfs_info[(NODE)].dfs_num)
#define LOW(NODE)		(ssagraph_dfs_info[(NODE)].low_num)
#define STMT_NUM(NODE)		(ssagraph_dfs_info[(NODE)].stmt_num)
#define CFG_RPO_NUM(NODE)	(ssagraph_dfs_info[(NODE)].cfg_rpo_num)

/* The depth first search number for the next SSA graph node we'll
   visit.  */
int nextDFSnum;

/* Bits are set for nodes that have already been visited.  */
static bitmap visited;

#define ALREADY_VISITED(NODE)	(bitmap_bit_p (visited, (NODE)))
#define MARK_VISITED(NODE)	(bitmap_set_bit (visited, (NODE)))

/* A stack of SSA graph nodes used by Tarjan's SCC finder.  Once a
   stronly connected component in the graph is found, all nodes that
   are part of this SCC are popped off the stack together.  */
static varray_type dfs_stack;

/* Bits are set for all nodes that are in the current DFS stack.  */
static bitmap in_stack;

#define IN_STACK(NODE)		(bitmap_bit_p (in_stack, (NODE)))

/* Each SCC we find is stored in this varray, and then processed.  For
   singleton SCCs we don't really need a varray, but for multi-node
   SCCs, a varray is more convenient than a linked list or something
   like that.  */
static varray_type scc;

/* Prototypes for the SCC finder.  */
static void push_node (tree);
static tree pop_node (void);
static void scc_finder_init (void);
static void scc_finder_done (void);
static void find_all_ssa_graph_nodes (void);
static void visit_operand (tree, tree);
static void dfs_ssa_graph (tree);
static void tarjan_scc_finder (void);
static void process_scc (varray_type scc);

/* Allocate and set up all data stuctures for Tarjan's SCC finding
   algorithm.  */

static void
scc_finder_init (void)
{
  ssagraph_dfs_info =
    xcalloc (highest_ssa_version, sizeof (ssagraph_dfs_info_d));

  visited = BITMAP_XMALLOC ();
  in_stack = BITMAP_XMALLOC ();

  VARRAY_TREE_INIT (dfs_stack, 20, "stack for DFS on the SSA graph");
  nextDFSnum = 0;

  opt_stats.num_defs = 0;
  opt_stats.num_ssa_names = 0;
  opt_stats.num_non_singleton_sccs = 0;
  find_all_ssa_graph_nodes ();
}

/* Free anything allocated for Tarjan's algorithm.  */

static void
scc_finder_done (void)
{
  free (ssagraph_dfs_info);
  BITMAP_XFREE (visited);
  BITMAP_XFREE (in_stack);
}

/* Find all DEF operands and put them in the vector of SSA graph
   nodes.  While we're at it, assign a reverse postorder number with
   respect to the CFG to each node in the SSA graph.

   We also ensure here that use operands are up-to-date, which is of
   course necessary when we do our DFS on the SSA graph later on.  */

static void
find_all_ssa_graph_nodes (void)
{
  int *cfg_rpo;
  int cfg_rpo_idx;

  /* Compute the reverse postorder numbers of the CFG.  */
  cfg_rpo = xmalloc (last_basic_block * sizeof (int));
  flow_depth_first_order_compute (NULL, cfg_rpo);

  /* Walk all statements and PHI nodes in all basic blocks, and
     find all SSA names.  */
  for (cfg_rpo_idx = 0; cfg_rpo_idx < last_basic_block; cfg_rpo_idx++)
    {
      basic_block bb = BASIC_BLOCK (cfg_rpo[cfg_rpo_idx]);
      block_stmt_iterator bsi;
      tree phi;
      int stmt_num = 0;

      for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
	{
	  tree ssa_name = PHI_RESULT (phi);
	  STMT_NUM (SSA_NAME_VERSION (ssa_name)) = ++stmt_num;

	  /* If this is a virtual PHI node, don't put it in the subgraph
	     we will be assigning value representatives to, because we do
	     not handle virtual operands in this pass.  */
	  if (!is_gimple_reg (SSA_NAME_VAR (ssa_name)))
	    continue;

	  NODE_NAME (SSA_NAME_VERSION (ssa_name)) = ssa_name;
	  CFG_RPO_NUM (SSA_NAME_VERSION (ssa_name)) = cfg_rpo_idx;
	  ++opt_stats.num_defs;
	}

      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
	{
	  size_t i;
	  def_optype defs;
	  tree stmt = bsi_stmt (bsi);
	  get_stmt_operands (stmt);

	  defs = DEF_OPS (stmt_ann (stmt));
	  for (i = 0; i < NUM_DEFS (defs); i++)
	    {
	      tree def = DEF_OP (defs,i );
	      NODE_NAME (SSA_NAME_VERSION (def)) = def;
	      STMT_NUM (SSA_NAME_VERSION (def)) = ++stmt_num;
	      CFG_RPO_NUM (SSA_NAME_VERSION (def)) = cfg_rpo_idx;
	      ++opt_stats.num_defs;
	    }
	}
    }

  free (cfg_rpo);
}

/* Put a node on the SSA graph DFS stack.  */

static void
push_node (tree node)
{
  VARRAY_PUSH_TREE (dfs_stack, node);
  bitmap_set_bit (in_stack, SSA_NAME_VERSION (node));
}

/* Pop a node from the SSA graph DFS stack.  */

static tree
pop_node (void)
{
  tree t = VARRAY_TOP_TREE (dfs_stack);
  VARRAY_POP (dfs_stack);
  bitmap_clear_bit (in_stack, SSA_NAME_VERSION (t));
  return t;
}

/* Visit O_TREE which is an operand of the statement defining NODE_TREE.

   if not o.visited
	DFS(o)
	node.low <-- MIN(node.low, o.low)
   if (o.DFSnum < node.DFSnum and o in dfs_stack
	node.low <-- MIN(o.DFSnum, node.low)

  We really want this to be inlined.  */

static inline void
visit_operand (tree node_tree, tree o_tree)
{
  unsigned int node = SSA_NAME_VERSION (node_tree);
  unsigned int o = SSA_NAME_VERSION (o_tree);

  if (! ALREADY_VISITED (o))
    {
      dfs_ssa_graph (o_tree);
      LOW (node) = MIN (LOW (node), LOW (o));
    }

  if (DFS_NUM (o) < DFS_NUM (node) && IN_STACK (o))
    LOW (node) = MIN (DFS_NUM (o), LOW (node));
}

/* Do a depth first search in the nodes of the SSA graph starting from
   SSA_NAME, and process any strongly connected components we find during
   the search.  */

static void
dfs_ssa_graph (tree ssa_name)
{
  unsigned int node = SSA_NAME_VERSION (ssa_name);

  opt_stats.num_ssa_names++;

  /* node.DFSnum <-- nextDFSnum++
     node.visited <-- TRUE
     node.low <-- node.DFSnum  */
  DFS_NUM (node) = ++nextDFSnum;
  MARK_VISITED (node);
  LOW (node) = DFS_NUM (node);

  /* PUSH(node)  */
  push_node (ssa_name);

  /* For each o in {operands of node} visit_operand (o)  */
  if (TREE_CODE (SSA_NAME_DEF_STMT (ssa_name)) == PHI_NODE)
    {
      tree phi = SSA_NAME_DEF_STMT (ssa_name);
      int i;

      for (i = 0; i < PHI_NUM_ARGS (phi); i++)
	{
	  tree o = PHI_ARG_DEF (phi, i);
	  if (TREE_CODE (o) == SSA_NAME)
	    visit_operand (ssa_name, o);
	}
    }
  else
    {
      size_t i;
      use_optype uses;
      stmt_ann_t ann = stmt_ann (SSA_NAME_DEF_STMT (ssa_name));

      uses = USE_OPS (ann);
      for (i = 0; i < NUM_USES (uses); i++)
	{
	  tree var = USE_OP (uses, i);
	  if (TREE_CODE (var) == SSA_NAME)
	    visit_operand (ssa_name, var);
	}
    }

  /* if node.low = node.DFSnum  */
  if (LOW (node) == DFS_NUM (node))
    {
      tree x;

#ifdef ENABLE_CHECKING
      /* SCC <-- {}  */
      if (VARRAY_ACTIVE_SIZE (scc) > 0)
	abort ();
#endif
      do
	{
	  /* x <-- POP()  */
	  x = pop_node ();

	  /* SCC <-- SCC U {x}  */
	  VARRAY_PUSH_TREE (scc, x);
	}
      while (x != ssa_name);

      process_scc (scc);
    }
}

/* Find SCCs in the SSA graph.

   Applying Tarjan's algorithm to the SSA graph differs somewhat from
   applying it to the CFG, since the SSA graph usually doesn't have a
   single entry node.

   Following the MSCP implementation, we choose to start from the
   highest SSA version since it is expected that this will result
   in a "deep" graph, hence more nodes will be reached per depth
   first search.

   Note that the choice for the starting node does not matter for the
   algorithm.  */

static void
tarjan_scc_finder (void)
{
  unsigned int i;

  for (i = highest_ssa_version - 1; i > 0; --i)
    {
      if (NODE_NAME (i) != NULL_TREE && ! ALREADY_VISITED (i))
	dfs_ssa_graph (NODE_NAME (i));
    }
}


/* We maintain two hash tables: a table with "valid" value numbers,
   ie. known to be correct, and a second table with "optimistic" value
   numbers which is used to make optimistic assumptions about
   congruences that may later be disproved.

   Each entry in the hash tables is a pair of trees.  One is the
   expression and the other is its value representative.  */
typedef struct valnum_expr GTY(())
{
  tree valnum;
  tree expr;
} valnum_expr;

static GTY ((param_is (struct valnum_expr))) htab_t valid_table = NULL;
static GTY ((param_is (struct valnum_expr))) htab_t optimistic_table = NULL;

/* An array of trees to store the value representative for each SSA
   node.  Think of the value representative as the partition leader.

   If the value represented by the partition leader is an expression
   that is marked TREE_INVARIANT, this expression is used as the value
   representative for the leader itself.

   Otherwise, the value representative for the leader is either the
   node that represents the leader itself during value numbering, or
   the earliest available node during AVAIL based redundancy
   elimination.  */
static tree *value_representatives;

/* Local functions for the value numbering algorithm.  */
static void valnum_init (void);
static void valnum_done (void);

static tree get_value_representative_partition_leader (tree);
static void set_value_representative (tree, tree, const char *);
static tree get_value_representative (tree);

static valnum_expr *new_valnum_expr (tree, tree);

static int sort_scc_compare_func (const void *, const void *);
static void sort_scc_members (varray_type);

static void valnum (tree, htab_t);
static void valnum_phi_node (tree, htab_t);
static void valnum_single_node (tree, htab_t);

static tree simplify_phi_argument (tree);
static tree simplify_phi_node (tree);
static tree simplify_operand (tree);
static tree simplify_expr (tree);

static hashval_t valnum_hash (const void *);
static int valnum_equal (const void *, const void *);
static void **lookup (tree, htab_t);
static void insert (void **slot, valnum_expr *);

void debug_valnum_scc_stats (void);
static void dump_valnum_scc_stats (FILE *);

/* Allocate and set up data structures for the SCC-VN algorithm.  */

static void
valnum_init (void)
{
  value_representatives = ggc_alloc_cleared (highest_ssa_version * sizeof (tree));

  VARRAY_TREE_INIT (scc, 10, "list of SCC members");

  if (valid_table == NULL)
    valid_table = htab_create_ggc (1, valnum_hash, valnum_equal, NULL);

  if (optimistic_table == NULL)
    optimistic_table = htab_create_ggc (1, valnum_hash, valnum_equal, NULL);

  opt_stats.num_unique_valnums = 0;
  opt_stats.max_iterations = 0;
  opt_stats.total_iterations = 0;
}

/* Cleanup after finishing value numbering.  */

static void
valnum_done (void)
{
  value_representatives = NULL;
  htab_empty (valid_table);
  htab_empty (optimistic_table);
}

/* Get the leader for the partition that NODE is in.  */
static tree
get_value_representative_partition_leader (tree node)
{
  tree vr = value_representatives[SSA_NAME_VERSION (node)];
  return (TREE_CODE (vr) == SSA_NAME ? vr : node);
}

/* Set the value representative for NODE to VR in mode MODE.  */
static void
set_value_representative (tree node, tree vr, const char * mode)
{
  value_representatives[SSA_NAME_VERSION (node)] = vr;
  if (dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "\t%s value representative for `", mode);
      print_generic_expr (dump_file, node, 0);
      fprintf (dump_file, "' is `");
      print_generic_expr (dump_file, vr, 0);
      fprintf (dump_file, "'\n");
    }
}

/* Get the value representative for NODE.  */
static tree
get_value_representative (tree node)
{
  tree result = value_representatives[SSA_NAME_VERSION (node)];
  if (result != NULL_TREE && TREE_CODE (result) == SSA_NAME)
    result = value_representatives[SSA_NAME_VERSION (result)];
  return result;
}

/* Create a new valnum_expr object.  */

static valnum_expr *
new_valnum_expr (tree valnum, tree expr)
{
  valnum_expr *v = ggc_alloc (sizeof (valnum_expr));
  v->valnum = valnum;
  v->expr = expr;
  return v;
}

/* "Driver function" for SCC-VN.  This function is called to process
   every SCC that we find with Tarjan's algorithm.  */

static void
process_scc (varray_type scc)
{
  tree n;
  size_t num_scc_members = VARRAY_ACTIVE_SIZE (scc);

  /* if SCC has a single member n  */
  if (num_scc_members == 1)
    {
      /* Valnum (n,valid)  */
      n = VARRAY_TOP_TREE (scc);
      VARRAY_POP (scc);
      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "\nProcessing singleton SCC: `");
	  print_generic_expr (dump_file, n, 0);
	  fprintf (dump_file, "'\n");
	}
      valnum (n, valid_table);
    }
  else
    {
      size_t i;
      int iterations = 0;
      bool changed;

      opt_stats.num_non_singleton_sccs++;

      /* Sort the members of this SCC in reverse postorder with respect
         to the CFG.  The sorting is in-place.  */
      sort_scc_members (scc);

      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  /* Dump all the nodes in this SCC.  I've put this after the sort
	     because the sort is quite safe and having the members in this
	     order helps when analyzing how we value number them.  */
	  fprintf (dump_file, "\nProcessing multi-node SCC:\n");
          for (i = 0; i < num_scc_members; i++)
	    {
	      tree n = VARRAY_TREE (scc, i);
	      fprintf (dump_file, "  `");
	      print_generic_expr (dump_file, n, 0);
	      fprintf (dump_file,"' (CFG RPO num: %d)\n",
		       CFG_RPO_NUM (SSA_NAME_VERSION (n)));
	    }
	}

      do
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "\n  Starting a new iteration\n");

	  changed = false;

	  /* Walk the nodes of the SCC.  */
	  for (i = 0; i < num_scc_members; i++)
	    {
	      /* For each n in SCC in reverse postorder (wrt. the CFG)
			Valnum (n,optimistic)  */
	      tree n = VARRAY_TREE (scc, i);
	      tree old_valnum = get_value_representative (n);
	      valnum (n, optimistic_table);
	      if (get_value_representative (n) != old_valnum)
		changed = true;
	    }

	  iterations++;
	}
      while (changed);

      opt_stats.max_iterations = MAX (opt_stats.max_iterations, iterations);
      opt_stats.total_iterations += iterations;

      /* For each n in SCC in reverse postorder (still wrt. the CFG)
		Valnum (n,valid)  */
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file, "\n  Stabilized after %d iterations.\n", iterations);
      for (i = 0; i < num_scc_members; i++)
	{
	  tree n = VARRAY_TREE (scc, i);
	  valnum (n, valid_table);
	}

      /* Purge the elements of scc varray so it can be re-used for the
         next SCC we discover.  */
      VARRAY_POP_ALL (scc);
    }
}

/* Compare function for qsort.  A statement T1 is "less than"/"greater then"
   another statement T2 if its CFG RPO number is smaller/larger.  If the
   CFG RPO numbers are equal, the one that appears first in the basic block
   is "less than" the other.  */
static int
sort_scc_compare_func (const void *p1, const void *p2)
{
  const tree *t1 = (const tree *)p1;
  const tree *t2 = (const tree *)p2;
  int node1 = SSA_NAME_VERSION (*t1);
  int node2 = SSA_NAME_VERSION (*t2);

  if (CFG_RPO_NUM (node1) < CFG_RPO_NUM (node2))
    return -1;
  else if (CFG_RPO_NUM (node1) > CFG_RPO_NUM (node2))
    return 1;
  else
    return (STMT_NUM (node1) > STMT_NUM (node2));
}

/* Sort the nodes of a strongly connected component SCC in the SSA
   graph in reverse post order.

   Originally I used a bucket sort to first sort statements per basic
   block, and then use qsort to sort statements within a basic block.
   But just qsort-ing all statements turns out to be faster on avarage.  */
static void
sort_scc_members (varray_type scc)
{
  int n = VARRAY_ACTIVE_SIZE (scc);
  /* Sort in-place sort the statements in each block.  */
  qsort (&VARRAY_TREE (scc, 0), n, sizeof (tree), sort_scc_compare_func);
}

/* Assign a value number to NODE and store it in TABLE.  */

static void
valnum (tree node, htab_t table)
{
  tree stmt = SSA_NAME_DEF_STMT (node);

  if (DEBUG_SCC_VN && dump_file && (dump_flags & TDF_DETAILS))
    {
      tree expr = (IS_EMPTY_STMT (stmt) ? node : stmt);
      fprintf (dump_file, "\n    Value numbering: `");
      print_generic_expr (dump_file, expr, 0);
      fprintf (dump_file, "'\n");
    }

  if (TREE_CODE (stmt) == PHI_NODE)
    valnum_phi_node (node, table);
  else
    valnum_single_node (node, table);
}

/* Value number a PHI node.

   If the PHI defining NODE already is in TABLE, do nothing.
   Otherwise, look at the value numbers of the PHI arguments.  If at
   least two PHI args already have value numbers, and all value numbered
   PHI args have the same value number, then use that number for NODE
   as well.  In all other cases, NODE gets a new value number.  */

static void
valnum_phi_node (tree node, htab_t table)
{
  void **slot;
  tree simplified;
  tree phi = SSA_NAME_DEF_STMT (node);

  /* See if the PHI node can be simplified.  */
  simplified = simplify_phi_node (phi);
  if (simplified != phi)
    {
      if (DEBUG_SCC_VN && dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "\tSimplified PHI: `");
	  print_generic_expr (dump_file, phi, 0);
	  fprintf (dump_file, "' to `");
	  print_generic_expr (dump_file, simplified, 0);
	  fprintf (dump_file, "'\n");
	}
    }

  slot = lookup (simplified, table);
  if (*slot != NULL)
    {
      valnum_expr *v = ((valnum_expr *) *slot);
      set_value_representative (node, v->valnum, (table == valid_table) ?
						  "valid" : "optimistic");
    }
  else
    {
      /* Insert this PHI into the table.  */
      insert (slot, new_valnum_expr (node, simplified));
      set_value_representative (node, node, (table == valid_table) ?
					     "valid" : "optimistic");
      if (table == valid_table)
	++opt_stats.num_unique_valnums;
    }
}

/* Value number a single NODE of an SCC, i.e. a single expression.

   We can only assign value numbers to the RHS of a MODIFY_EXPR,
   and only if the statement for NODE does not have virtual operands
   or side effects.

   We assume that all SSA names without a defining statement are
   uninitialized.  Following Simpson, we give uninitialized values
   a value number that is not congruent to anything.

   FIXME: We could handle virtual operands if we just do not simplify
	  expressions that have them, and just use the operands instead
	  of the operand value representatives to value number them.
	  I only realized that later on, after implementing this, but
	  it should be fairly easy to change things here to do so.  */

static void
valnum_single_node (tree node, htab_t table)
{
  void **slot;
  stmt_ann_t ann;
  tree stmt = SSA_NAME_DEF_STMT (node);

  if (stmt)
    ann = stmt_ann (stmt);
  else
    ann = NULL;

  if (ann
      && !VUSE_OPS (ann)
      && !VDEF_OPS (ann)
      && TREE_CODE (stmt) == MODIFY_EXPR
      && !TREE_SIDE_EFFECTS (TREE_OPERAND (stmt, 1)))
    {
      tree temp = TREE_OPERAND (stmt, 1);
      tree expr = simplify_expr (temp);
      if (temp != expr)
	{
	  if (DEBUG_SCC_VN && dump_file && (dump_flags & TDF_DETAILS))
	    {
	      fprintf (dump_file, "\tSimplified `");
	      print_generic_expr (dump_file, node, 0);
	      fprintf (dump_file, " = ");
	      print_generic_expr (dump_file, temp, 0);
	      fprintf (dump_file, "' to `");
	      print_generic_expr (dump_file, expr, 0);
	      fprintf (dump_file, "'\n");
	    }
	}

      slot = lookup (expr, table);
      if (*slot != NULL)
	{
	  valnum_expr *v = (valnum_expr *) *slot;
	  set_value_representative (node, v->valnum, (table == valid_table) ?
						      "valid" : "optimistic");
	}
      else
	{
	  tree vr = (TREE_INVARIANT (expr)) ? expr : node;
	  insert (slot, new_valnum_expr (node, expr));
	  set_value_representative (node, vr, (table == valid_table) ?
				               "valid" : "optimistic");
	  if (table == valid_table)
	    ++opt_stats.num_unique_valnums;
	}
    }
  else
    /* Not an expression we can value number, so make the statement
       defining NODE the value number for NODE.  */
    {
      slot = lookup (node, table);

#ifdef ENABLE_CHECKING
      if (*slot != NULL)
	{
	  valnum_expr *v = (valnum_expr *) *slot;
	  if (v->expr != node || v->valnum != node)
	    abort ();
	}
#endif

      insert (slot, new_valnum_expr (node, node));
      set_value_representative (node, node, (table == valid_table) ?
					     "valid" : "optimistic");
      if (table == valid_table)
	++opt_stats.num_unique_valnums;
    }
}

/* Try to simplify a PHI argument.  */
static tree
simplify_phi_argument (tree arg)
{
  tree arg_value_representative = NULL_TREE;

  if (TREE_CODE (arg) == SSA_NAME)
    arg_value_representative = get_value_representative (arg);

  if (arg_value_representative != NULL_TREE)
    arg = arg_value_representative;
  else if (TREE_INVARIANT (arg))
    ;
  else
    arg = NULL_TREE;

  return arg;
}

/* Optimistically try to simplify a PHI node.

   If we find two arguments with different value numbers or no value
   numbered arguments at all, the value number for the PHI is the PHI
   itself.

   Otherwise, we've only seen PHI args with equal value numbers, so
   we optimistically use that as the value number for the whole PHI.  */

static tree
simplify_phi_node (tree phi)
{
  int i;
  tree result = NULL_TREE;

  if (DEBUG_SCC_VN && dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "\tTrying to simplify PHI: `");
      print_generic_expr (dump_file, phi, 0);
      fprintf (dump_file, "'\n\tSimplified PHI arguments: <");
    }

  for (i = 0; i < PHI_NUM_ARGS (phi); i++)
    {
      tree arg = simplify_phi_argument (PHI_ARG_DEF (phi, i));

      if (DEBUG_SCC_VN && dump_file && (dump_flags & TDF_DETAILS))
	{
	  if (arg == NULL_TREE)
	    fprintf (dump_file, "T");
	  else
	    {
	      fputc ('`', dump_file);
	      print_generic_expr (dump_file, arg, 0);
	      fputc ('\'', dump_file);
	    }

	  if (i == PHI_NUM_ARGS (phi) - 1)
	    fprintf (dump_file, ">\n");
	  else
	    fprintf (dump_file, ", ");
	}

      if (arg != NULL_TREE)
	{
	  if (result == NULL_TREE)
	    result = arg;
          else if (! operand_equal_p (result, arg, 0))
	    {
	      if (DEBUG_SCC_VN && dump_file && (dump_flags & TDF_DETAILS)
		  && i < PHI_NUM_ARGS (phi) - 1)
		fprintf (dump_file, "...>\n");
	      return phi;
	    }
        }
    }

  if (result == NULL_TREE)
    result = phi;

  return result;
}

/* Try to simplify an expression operand.  For SSA names, we try to use
   the value number for it if it already has one.  */

static tree
simplify_operand (tree op)
{
  tree result = op;
  tree op_value_representative = NULL_TREE;

  if (TREE_CODE (op) == SSA_NAME)
    op_value_representative = get_value_representative (op);

  result = (op_value_representative != NULL_TREE) ?
    op_value_representative : op;

  if (DEBUG_SCC_VN && dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "\t\tOperand `");
      print_generic_expr (dump_file, op, 0);
      fprintf (dump_file, "' simplifies to `");
      print_generic_expr (dump_file, result, 0);
      fprintf (dump_file, "'\n");
    }

  return result;
}

/* Try to apply algebraic simplifications to EXPR based on the
   value numbers of its operands.  */

static tree
simplify_expr (tree expr)
{
  tree result = expr;
  enum tree_code code = TREE_CODE (expr);

  /* PHI nodes have a separate simplification function.  */
  if (code == PHI_NODE)
    abort ();

  /* If EXPR is just a variable reference, just return the value number
     for it if it is an SSA name and it already has a value number.  */
  else if (! IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
    result = simplify_operand (expr);

  /* If EXPR is a unary expression, try to simplify the operand, and
     fold the resulting new expression if the operand has changed.  */
  else if (TREE_CODE_CLASS (code) == '1')
    {
      tree op = simplify_operand (TREE_OPERAND (expr, 0));
      if (op != TREE_OPERAND (expr, 0))
	result = fold (build1 (code, TREE_TYPE (expr), op));
    }

  /* Likewise if EXPR is a binary expression.  */
  else if (TREE_CODE_CLASS (code) == '2'
	   || TREE_CODE_CLASS (code) == '<')
    {
      tree op1 = simplify_operand (TREE_OPERAND (expr, 0));
      tree op2 = simplify_operand (TREE_OPERAND (expr, 1));
      if (op1 != TREE_OPERAND (expr, 0) || op2 != TREE_OPERAND (expr, 1))
        result = fold (build (code, TREE_TYPE (expr), op1, op2));
    }

  return result;
}

/* Callback for hashtable.  Hash a valnum_expr.  */

static hashval_t
valnum_hash (const void *p)
{
  valnum_expr *ve = (valnum_expr *) p;

  if (TREE_CODE (ve->expr) == PHI_NODE)
    {
      /* Hash a PHI node.  Just take the sum of the iterative hash of
	 the value representatives of all PHI node arguments.  */
      int i;
      hashval_t val = 0;
      tree phi = ve->expr;
      basic_block bb;

      for (i = 0; i < PHI_NUM_ARGS (phi); i++)
	{
	  tree arg = simplify_phi_argument (PHI_ARG_DEF (phi, i));
	  val = iterative_hash_expr (arg, val);
	}

      /* ??? We can over-optimistically find that two PHIs are congruent
	 even when they clearly are not.  For example,

	  goto <L5>;
	<L1>:
	  j_8 = j_2 + 1;
	<L2>:
	  # j_2 = PHI <0(4), j_8(1)>;
	  if (j_2 <= 9) goto <L1>; else goto <L3>;
	<L3>:
	  i_7 = i_1 + 1;
	<L5>:
	  # i_1 = PHI <0(0), i_7(3)>;
	  if (i_1 <= 19) goto <L2>; else goto <L6>;
	<L6>:


	 After value numbering the SCC {j_2, j_8}, we find:

	Processing multi-node SCC:
	  `i_1' (CFG RPO num: 1)
	  `i_7' (CFG RPO num: 4)

	Starting a new iteration

		Value numbering: `i_1 = PHI <0(0), i_7(3)>;'
		Found expr `0' with valnum `j_2'
		optimistic value representative for `i_1' is `j_2'

		Value numbering: `i_7 = i_1 + 1'
		Found expr `j_2 + 1' with valnum `j_8'
		optimistic value representative for `i_7' is `j_8'

	Starting a new iteration

		Value numbering: `i_1 = PHI <0(0), i_7(3)>;'
		Found expr `j_2 = PHI <0(4), j_8(1)>;' with valnum `j_2'
		optimistic value representative for `i_1' is `j_2'

		Value numbering: `i_7 = i_1 + 1'
		Found expr `j_2 + 1' with valnum `j_8'
		optimistic value representative for `i_7' is `j_8'

	Stabilized after 2 iterations.

		Value numbering: `i_1 = PHI <0(0), i_7(3)>;'
		Found expr `j_2 = PHI <0(4), j_8(1)>;' with valnum `j_2'
		valid value representative for `i_1' is `j_2'

		Value numbering: `i_7 = i_1 + 1'
		Found expr `j_2 + 1' with valnum `j_8'
		valid value representative for `i_7' is `j_8'


	 While this is clearly incorrect, that's what the algorithm gives.
	 To un-break this, we require that two PHIs can only be congruent
	 if they are in the same basic block.  I'm not sure if this is a
	 hack, or if it is required within our framework for some reason.  */

      bb = bb_for_stmt (phi);
      val = iterative_hash (&bb, sizeof (basic_block), val);

      return val;
    }
  else
    /* iterative_hash_expr knows how to deal with any expression and
       deals with commutative operators as well, so just use it instead
       of duplicating such complexities here (hi Jeff ;-).  */
    return iterative_hash_expr (ve->expr, 0);
}

/* Callback for hashtable.  Compare two valnum_exprs and return true
   if they represent the same expresion.  */

static int
valnum_equal (const void *p1, const void *p2)
{
  valnum_expr *ve1 = (valnum_expr *) p1;
  valnum_expr *ve2 = (valnum_expr *) p2;
  tree expr1, expr2;

  /* If they are the same expression, return true.  */
  if (ve1->expr == ve2->expr)
    {
#ifdef ENABLE_CHECKING
      if (ve1->valnum != NULL_TREE
	  && ve2->valnum != NULL_TREE
	  && ve1->valnum != ve2->valnum)
	abort ();
#endif
      return true;
    }

  expr1 = ve1->expr;
  expr2 = ve2->expr;

  /* If the tree codes are not the same, the expressions can never
     be the same.  */
  if (TREE_CODE (expr1) != TREE_CODE (expr2))
    return false;

  /* Two PHI nodes are the same if they have the same number of
     arguments and the value representatives for all PHI arguments
     are the same for both PHIs.  */
  if (TREE_CODE (expr1) == PHI_NODE)
    {
      tree phi1 = expr1;
      tree phi2 = expr2;
      int i;

      if (PHI_NUM_ARGS (phi1) != PHI_NUM_ARGS (phi2))
	return false;

      for (i = 0; i < PHI_NUM_ARGS (expr1); i++)
	{
	  tree arg1 = simplify_phi_argument (PHI_ARG_DEF (phi1, i));
	  tree arg2 = simplify_phi_argument (PHI_ARG_DEF (phi2, i));
	  if (arg1 == NULL_TREE && arg2 == NULL_TREE)
	    continue;
	  else if ((arg1 != NULL_TREE && arg2 != NULL_TREE)
		   && ! operand_equal_p (arg1, arg2, 0))
	    return false;
	}

#ifdef ENABLE_CHECKING
        {
	  if (ve1->valnum != NULL_TREE
	      && ve2->valnum != NULL_TREE
	      && ve1->valnum != ve2->valnum)
	    abort ();
	}
#endif
      return true;
    }

  /* For expressoins, the expressions have to be identical.  */
  if ((TREE_TYPE (expr1) == TREE_TYPE (expr2)
       || (TYPE_MAIN_VARIANT (TREE_TYPE (expr1))
	   == TYPE_MAIN_VARIANT (TREE_TYPE (expr2))))
      && operand_equal_p (expr1, expr2, 0))
    {
#ifdef ENABLE_CHECKING
      if (ve1->valnum != NULL_TREE
	  && ve2->valnum != NULL_TREE
	  && ve1->valnum != ve2->valnum)
	abort ();
#endif
      return true;
    }

  return false;
}

/* Search in TABLE for an existing instance of EXPR that is the rhs
   of an assignment to NODE.
   If found, return the slot, otherwise return NULL.  */

static void **
lookup (tree expr, htab_t table)
{
  valnum_expr vr;
  void **slot;

  if (DEBUG_SCC_VN && dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "\tLooking up expr `");
      print_generic_expr (dump_file, expr, 0);
      fprintf (dump_file, "'\n");
    }

  vr.valnum = NULL_TREE;
  vr.expr = expr;
  slot = htab_find_slot (table, &vr, INSERT);

  if (DEBUG_SCC_VN && dump_file && (dump_flags & TDF_DETAILS)
      && *slot != NULL)
    {
      valnum_expr *v = (valnum_expr*) *slot;
      fprintf (dump_file, "\tFound expr `");
      print_generic_expr (dump_file, v->expr, 0);
      fprintf (dump_file, "' with valnum `");
      print_generic_expr (dump_file, v->valnum, 0);
      fprintf (dump_file, "'\n");
    }

  return slot;
}

/* Insert EXPR into TABLE.  */

static void
insert (void **slot, valnum_expr *v)
{
  if (DEBUG_SCC_VN && dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "\tInserting expr `");
      print_generic_expr (dump_file, v->expr, 0);
      fprintf (dump_file, "' with valnum `");
      print_generic_expr (dump_file, v->valnum, 0);
      fprintf (dump_file, "'\n");
    }
  *slot = (void *) v;
}

/* Dump statistics on FILE.  */

static void
dump_valnum_scc_stats (FILE * file)
{
  fprintf (file, "\n\n");

  fprintf (file, "Total number of DEF ops found in stmts:  %6ld\n",
	   opt_stats.num_defs);

  fprintf (file, "Total number SSA names found in DFS:     %6ld\n",
	   opt_stats.num_ssa_names);

  fprintf (file, "Total number of unique value numbers:    %6ld\n",
	   opt_stats.num_unique_valnums);

  fprintf (file, "Equivalent expressions found:            %6ld\n",
	   opt_stats.num_ssa_names - opt_stats.num_unique_valnums);

  fprintf (file, "Number of non-singleton SCCs found       %6ld\n",
	   opt_stats.num_non_singleton_sccs);

  fprintf (file, "Maximum number of iters over an SCC:     %6ld\n",
	   opt_stats.max_iterations);

  fprintf (file, "Total number of iters over all SCCs:     %6ld\n",
	   opt_stats.total_iterations);

  fprintf (file, "Total number of removed expressions:     %6ld\n",
	   opt_stats.num_removed_exprs);
}

/* Dump SCC VN statistics on stderr.
   PHASE indicates which dump file from the DUMP_FILES array to use when
   dumping debugging information.  */

void
debug_valnum_scc_stats (void)
{
  dump_valnum_scc_stats (stderr);
}

/* Main entry point to the SCC-VN pass.  */

void
tree_ssa_gvn (tree fndecl, enum tree_dump_index phase)
{
  timevar_push (TV_TREE_GVN);

  dump_file = dump_begin (phase, &dump_flags);

  if (dump_file)
    dump_function_to_file (fndecl, dump_file, dump_flags);

  scc_finder_init ();
  valnum_init ();
  tarjan_scc_finder (); 
  tree_eliminate_redundant_expressions (fndecl, phase);
  valnum_done ();
  scc_finder_done ();

  if (dump_file)
    {
      if (dump_file)
        dump_function_to_file (fndecl, dump_file, dump_flags);

      if (dump_flags & TDF_DETAILS)
	dump_valnum_scc_stats (dump_file);
      dump_end (phase, dump_file);
    }

  timevar_pop (TV_TREE_GVN);  
}


/* Dominator based redundancy elimination.

   Once we have value numbered SSA form, we are ready to eliminate
   redundant expressions.  We use a dominator based algorithm similar
   to the one described in Taylor Simpson's PhD. thesis, Chapter 5.

   In Simpson's description of the algorithm, he assumes that the flow
   graph is no longer in SSA form.  In this case, the AVAIL-based
   removal technique, described in the same chapter, can be more
   efficient than dominator based removal.  In our case, we are in SSA
   form and we wish to maintain it.  This means that the AVAIL-based
   removal does not remove any extra redundancies, i.e. the dominator
   based removal can remove the same redundancies.  And since it is
   much easier and cheaper to implement, it's an easy winner over the
   AVAIL-based approach.  */

static tree *available_exprs;


static void dom_removal_walk_stmts (struct dom_walk_data *, basic_block, tree);
static void dom_removal_finalize_block (struct dom_walk_data *,
					basic_block,
					tree);
static void replace_if_redundant (tree);
static void replace_one_immediate_use (tree *, tree, tree);

/* Remove fully redundant expressions.  */

void
tree_eliminate_redundant_expressions (tree fndecl ATTRIBUTE_UNUSED,
				      enum tree_dump_index phase ATTRIBUTE_UNUSED)
{
  struct dom_walk_data walk_data;

  opt_stats.num_removed_exprs = 0;
  available_exprs = xcalloc (highest_ssa_version, sizeof (tree));
  compute_immediate_uses (TDFA_USE_OPS, NULL);

  if (DEBUG_REMOVAL && dump_file && dump_flags & TDF_DETAILS)
    dump_immediate_uses (dump_file);

  /* Setup callbacks for the generic dominator tree walker.  */
  memset (&walk_data, 0, sizeof (struct dom_walk_data));
  walk_data.before_dom_children_walk_stmts = dom_removal_walk_stmts;
  walk_data.after_dom_children_after_stmts = dom_removal_finalize_block;

  /* Now initialize the dominator walker.  */
  init_walk_dominator_tree (&walk_data);
  calculate_dominance_info (CDI_DOMINATORS);

  /* Recursively walk the dominator tree optimizing statements.  */
  walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR, NULL);

  /* Cleanup.  */
  fini_walk_dominator_tree (&walk_data);
/*  bitmap_release_memory (); */
  free (available_exprs);
}

/* Find the value representatives defined in BB upon entry, and remove any
   local redundancies.  */
static void
dom_removal_walk_stmts (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
			basic_block bb,
			tree parent_block_last_stmt ATTRIBUTE_UNUSED)
{
  block_stmt_iterator bsi;
  tree phi;

  if (bb == ENTRY_BLOCK_PTR || bb == EXIT_BLOCK_PTR)
    return;

  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "\nOptimizing block #%d\n", bb->index);

  /* See if the value computed for an SSA name is already computed in
     a dominating definition.  If so, remove the redundant expression.
     Otherwise record the expression.  */
  for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
    {
      tree ssa_name = PHI_RESULT (phi);

      /* Skip virtual PHI nodes.  */
      if (!is_gimple_reg (SSA_NAME_VAR (ssa_name)))
	continue;

      replace_if_redundant (ssa_name);
    }

  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
    {
      size_t i;
      def_optype defs;
      tree stmt = bsi_stmt (bsi);

      get_stmt_operands (stmt);

      defs = DEF_OPS (stmt_ann (stmt));
      for (i = 0; i < NUM_DEFS (defs); i++)
	{
 	  tree ssa_name = DEF_OP (defs, i);
	  replace_if_redundant (ssa_name);
	}
    }
}

/* Clear out any value representatives defined in this block before leaving
   this node in the dominator tree.

   FIXME: This code is so stupid that it hurts even my own eyes.  We should
	  track which values came in with each block, and not walk over the
	  whole available_exprs array.  */
static void
dom_removal_finalize_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
			    basic_block bb,
			    tree parent_block_last_stmt ATTRIBUTE_UNUSED)
{
  size_t i;

  if (bb == ENTRY_BLOCK_PTR || bb == EXIT_BLOCK_PTR)
    return;

  if (DEBUG_REMOVAL && dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "Finalizing block #%d\n", bb->index);

  for (i = 0; i < highest_ssa_version; i++)
    {
      if (available_exprs[i] == NULL_TREE)
	continue;

      if (bb_for_stmt (SSA_NAME_DEF_STMT (available_exprs[i]))->index == bb->index)
	{
	  if (DEBUG_REMOVAL && dump_file && (dump_flags & TDF_DETAILS))
	    {
	      fprintf (dump_file, "    Removing `");
	      print_generic_expr (dump_file, available_exprs[i], 0);
	      fprintf (dump_file, "' from the available_exprs array.\n");
	    }
	  available_exprs[i] = NULL_TREE;
	}
    }
}

/* If the expression defined by SSA_NAME is available in this branch of
   the dominator tree, remove this redundancy.  Otherwise make the
   expression available in the rest of the domtree branch.  */

static void
replace_if_redundant (tree ssa_name)
{
  tree vr = get_value_representative_partition_leader (ssa_name);

  if (available_exprs[SSA_NAME_VERSION (vr)] != NULL_TREE)
    {
      tree stmt = SSA_NAME_DEF_STMT (ssa_name);
      dataflow_t df = get_immediate_uses (stmt);
      int num_uses = num_immediate_uses (df);
      tree replacement = available_exprs[SSA_NAME_VERSION (vr)];
      int i;

      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "    Replacing occurences of `");
	  print_generic_expr (dump_file, ssa_name, 0);
	  fprintf (dump_file, "' with `");
	  print_generic_expr (dump_file, replacement, 0);
	  fprintf (dump_file, "'.\n");
	}

      for (i = 0; i < num_uses; i++)
	{
	  tree imm_use = immediate_use (df, i);
	  replace_one_immediate_use (&imm_use, ssa_name,  replacement);
	}
    }
  else if (SSA_NAME_DEF_STMT (vr))
    {
      available_exprs[SSA_NAME_VERSION (vr)] = ssa_name;
      if (DEBUG_REMOVAL && dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "    Making value for `");
	  print_generic_expr (dump_file, ssa_name, 0);
	  fprintf (dump_file, "' represented by `");
	  print_generic_expr (dump_file, vr, 0);
	  fprintf (dump_file, "' available in this domtree branch.\n");
	}
    }
}

/* Replace one immediate use op OLD_OP in stmt IMM_USE with REPLACEMENT.  */

static void
replace_one_immediate_use (tree *imm_use, tree old_op, tree replacement)
{
  if (TREE_CODE (*imm_use) == PHI_NODE)
    {
      tree phi = *imm_use;
      int i;

      for (i = 0; i < PHI_NUM_ARGS (phi); i++)
	{
	  tree arg = PHI_ARG_DEF (phi, i);
	  if (arg == old_op)
	    {
	      if (dump_file && (dump_flags & TDF_DETAILS))
		{
		  fprintf (dump_file, "\tReplacing PHI arg in `");
		  print_generic_expr (dump_file, *imm_use, 0);
		  fprintf (dump_file, "'\n");
		}
	      PHI_ARG_DEF (phi, i) = replacement;
	      opt_stats.num_removed_exprs++;
	      break;
	    }
	}
    }
  else
    {
      stmt_ann_t ann = stmt_ann (*imm_use);
      use_optype uses = USE_OPS (ann);
      size_t i;

      /* Find the USE operand and replace it.  */
      for (i = 0; i < NUM_USES (uses); i++)
	{
	  tree *use_p = USE_OP_PTR (uses, i);
	  if (*use_p == old_op)
	    {
	      if (dump_file && (dump_flags & TDF_DETAILS))
		{
		  fprintf (dump_file, "\tReplacing stmt operand in `");
		  print_generic_expr (dump_file, *imm_use, 0);
		  fprintf (dump_file, "'\n");
		}
	      *use_p = replacement;
	      opt_stats.num_removed_exprs++;
	      break;
	    }
	}

      fold_stmt (imm_use);
      modify_stmt (*imm_use);
    }
}
  
#include "gt-tree-ssa-gvn.h"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]