This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] conditional store elimination


Hi,

this patch fixes PR middle-end/27313 and thereby a performance problem on 
456.hmmer (which now runs in 798 instead of 1080 seconds, i.e. 26% 
improvement).  We talked about how to integrate this better with if 
conversion and make it also do load hoisting, but I fear we won't get to 
that during the time left for stage 2, and meanwhile I'd really like to 
have that improvement in 4.3.

I tweaked the patch a bit to only do the transformation by default if the 
target has conditional moves.  Additionally it now is applied only when 
the store in question is non-trapping because of another shadowing mem 
access, which then also means that the value is in cache already (i.e. I 
don't do it when only TREE_NOTRAP is set).

For reference again, here is what the transformation does:

   ... some access to *p ...
   if (cond)
     *p = X;

--->

   cstore = *p;
   if (cond)
     cstore = X;
   *p = cstore;

Later passes then have the possibility to transform the if into a 
conditional store (and transforming the *p read to a copy).

Bootstrapped and tested (all langs except ada) on x86_64-linux.


Ciao,
Michael.
-- 
	PR middle-end/27313

	* tree-pass.h (pass_cselim): Declare new pass.
	* passes.c (init_optimization_passes): Link in pass_cselim.
	* tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Renamed from
	tree_ssa_phiopt; add do_store_elim parameter, handle it by calling
	cond_store_replacement.
	(condstoretemp): New static variable.
	(cond_store_replacement): New function.
	(tree_ssa_phiopt, tree_ssa_cs_elim): New wrappers around
	tree_ssa_phiopt_worker.
	(struct name_to_bb): New.
	(get_non_trapping, name_to_bb_hash, name_to_bb_eq, add_or_mark_expr,
	nt_init_block, nt_fini_block): New static functions.
	(seen_ssa_names, nontrap_set): New static variables.
	(gate_cselim, pass_cselim): Define new pass.
	* common.opt (ftree-cselim): New flag.
	* toplev.c (process_options): Set flag_tree_cselim if required.

Index: tree-pass.h
===================================================================
--- tree-pass.h	(revision 123256)
+++ tree-pass.h	(working copy)
@@ -283,6 +283,7 @@ extern struct tree_opt_pass pass_cse_rec
 extern struct tree_opt_pass pass_cse_sincos;
 extern struct tree_opt_pass pass_warn_function_return;
 extern struct tree_opt_pass pass_warn_function_noreturn;
+extern struct tree_opt_pass pass_cselim;
 extern struct tree_opt_pass pass_phiopt;
 extern struct tree_opt_pass pass_forwprop;
 extern struct tree_opt_pass pass_dse;
Index: tree-ssa-phiopt.c
===================================================================
--- tree-ssa-phiopt.c	(revision 123256)
+++ tree-ssa-phiopt.c	(working copy)
@@ -34,8 +34,10 @@ Software Foundation, 51 Franklin Street,
 #include "tree-pass.h"
 #include "tree-dump.h"
 #include "langhooks.h"
+#include "pointer-set.h"
+#include "domwalk.h"
 
-static unsigned int tree_ssa_phiopt (void);
+static unsigned int tree_ssa_phiopt_worker (bool);
 static bool conditional_replacement (basic_block, basic_block,
 				     edge, edge, tree, tree, tree);
 static bool value_replacement (basic_block, basic_block,
@@ -44,8 +46,11 @@ static bool minmax_replacement (basic_bl
 				edge, edge, tree, tree, tree);
 static bool abs_replacement (basic_block, basic_block,
 			     edge, edge, tree, tree, tree);
+static bool cond_store_replacement (basic_block, basic_block, edge, edge,
+				    struct pointer_set_t *);
 static void replace_phi_edge_with_variable (basic_block, edge, tree, tree);
 static basic_block *blocks_in_phiopt_order (void);
+static struct pointer_set_t * get_non_trapping (void);
 
 /* This pass tries to replaces an if-then-else block with an
    assignment.  We have four kinds of transformations.  Some of these
@@ -136,10 +141,61 @@ static basic_block *blocks_in_phiopt_ord
 static unsigned int
 tree_ssa_phiopt (void)
 {
+  return tree_ssa_phiopt_worker (false);
+}
+
+/* This pass tries to transform conditional stores into unconditional
+   ones, enabling further simplifications with the simpler then and else
+   blocks.  In particular it replaces this:
+
+     bb0:
+       if (cond) goto bb2; else goto bb1;
+     bb1:
+       *p = RHS
+     bb2:
+
+   with
+
+     bb0:
+       if (cond) goto bb1; else goto bb2;
+     bb1:
+       condtmp' = *p;
+     bb2:
+       condtmp = PHI <RHS, condtmp'>
+       *p = condtmp
+
+   This transformation can only be done under several constraints,
+   documented below.  */
+
+static unsigned int
+tree_ssa_cs_elim (void)
+{
+  return tree_ssa_phiopt_worker (true);
+}
+
+/* For conditional store replacement we need a temporary to
+   put the old contents of the memory in.  */
+static tree condstoretemp;
+
+/* The core routine of conditional store replacement and normal
+   phi optimizations.  Both share much of the infrastructure in how
+   to match applicable basic block patterns.  DO_STORE_ELIM is true
+   when we want to do conditional store replacement, false otherwise.  */
+static unsigned int
+tree_ssa_phiopt_worker (bool do_store_elim)
+{
   basic_block bb;
   basic_block *bb_order;
   unsigned n, i;
   bool cfgchanged = false;
+  struct pointer_set_t *nontrap = 0;
+
+  if (do_store_elim)
+    {
+      condstoretemp = NULL_TREE;
+      /* Calculate the set of non-trapping memory accesses.  */
+      nontrap = get_non_trapping ();
+    }
 
   /* Search every basic block for COND_EXPR we may be able to optimize.
 
@@ -211,36 +267,60 @@ tree_ssa_phiopt (void)
           || single_pred (bb1) != bb)
 	continue;
 
-      phi = phi_nodes (bb2);
-
-      /* Check to make sure that there is only one PHI node.
-         TODO: we could do it with more than one iff the other PHI nodes
-	 have the same elements for these two edges.  */
-      if (!phi || PHI_CHAIN (phi) != NULL)
-	continue;
-
-      arg0 = PHI_ARG_DEF_TREE (phi, e1->dest_idx);
-      arg1 = PHI_ARG_DEF_TREE (phi, e2->dest_idx);
+      if (do_store_elim)
+	{
+	  /* bb1 is the middle block, bb2 the join block, bb the split block,
+	     e1 the fallthrough edge from bb1 to bb2.  We can't do the
+	     optimization if the join block has more than two predecessors.  */
+	  if (EDGE_COUNT (bb2->preds) > 2)
+	    continue;
+	  if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
+	    cfgchanged = true;
+	}
+      else
+	{
+	  phi = phi_nodes (bb2);
 
-      /* Something is wrong if we cannot find the arguments in the PHI
-	 node.  */
-      gcc_assert (arg0 != NULL && arg1 != NULL);
-
-      /* Do the replacement of conditional if it can be done.  */
-      if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
-	cfgchanged = true;
-      else if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
-	cfgchanged = true;
-      else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
-	cfgchanged = true;
-      else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
-	cfgchanged = true;
+	  /* Check to make sure that there is only one PHI node.
+	     TODO: we could do it with more than one iff the other PHI nodes
+	     have the same elements for these two edges.  */
+	  if (!phi || PHI_CHAIN (phi) != NULL)
+	    continue;
+
+	  arg0 = PHI_ARG_DEF_TREE (phi, e1->dest_idx);
+	  arg1 = PHI_ARG_DEF_TREE (phi, e2->dest_idx);
+
+	  /* Something is wrong if we cannot find the arguments in the PHI
+	     node.  */
+	  gcc_assert (arg0 != NULL && arg1 != NULL);
+
+	  /* Do the replacement of conditional if it can be done.  */
+	  if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+	    cfgchanged = true;
+	  else if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+	    cfgchanged = true;
+	  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+	    cfgchanged = true;
+	  else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+	    cfgchanged = true;
+	}
     }
 
   free (bb_order);
   
-  /* If the CFG has changed, we should cleanup the CFG. */
-  return cfgchanged ? TODO_cleanup_cfg : 0;
+  if (do_store_elim)
+    pointer_set_destroy (nontrap);
+  /* If the CFG has changed, we should cleanup the CFG.  */
+  if (cfgchanged && do_store_elim)
+    {
+      /* In cond-store replacement we have added some loads on edges
+         and new VOPS (as we moved the store, and created a load).  */
+      bsi_commit_edge_inserts ();
+      return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
+    }
+  else if (cfgchanged)
+    return TODO_cleanup_cfg;
+  return 0;
 }
 
 /* Returns the list of basic blocks in the function in an order that guarantees
@@ -991,6 +1071,256 @@ abs_replacement (basic_block cond_bb, ba
   return true;
 }
 
+/* Auxiliary functions to determine the set of memory accesses which
+   can't trap because they are preceded by accesses to the same memory
+   portion.  We do that for INDIRECT_REFs, so we only need to track
+   the SSA_NAME of the pointer indirectly referenced.  The algorithm
+   simply is a walk over all instructions in dominator order.  When
+   we see an INDIRECT_REF we determine if we've already seen a same
+   ref anywhere up to the root of the dominator tree.  If we do the
+   current access can't trap.  If we don't see any dominator access
+   the current access might trap, but might also make later accesses
+   non-trapping, so we remember it.  */
+
+/* A hash-table of SSA_NAMEs, and in which basic block an INDIRECT_REF
+   through it was seen, which would constitute a no-trap region for
+   same accesses.  */
+struct name_to_bb
+{
+  tree ssa_name;
+  basic_block bb;
+};
+
+/* The hash table for remembering what we've seen.  */
+static htab_t seen_ssa_names;
+/* The set of INDIRECT_REFs which can't trap.  */
+static struct pointer_set_t *nontrap_set;
+
+/* The hash function, based on the pointer to the pointer SSA_NAME.  */
+static hashval_t
+name_to_bb_hash (const void *p)
+{
+  tree n = ((struct name_to_bb *)p)->ssa_name;
+  return htab_hash_pointer (n);
+}
+
+/* The equality function of *P1 and *P2.  SSA_NAMEs are shared, so
+   it's enough to simply compare them for equality.  */
+static int
+name_to_bb_eq (const void *p1, const void *p2)
+{
+  tree n1 = ((struct name_to_bb *)p1)->ssa_name;
+  tree n2 = ((struct name_to_bb *)p2)->ssa_name;
+
+  return n1 == n2;
+}
+
+/* We see a the expression EXP in basic block BB.  If it's an interesting
+   expression (an INDIRECT_REF through an SSA_NAME) possibly insert the
+   expression into the set NONTRAP or the hash table of seen expressions.  */
+static void
+add_or_mark_expr (basic_block bb, tree exp, struct pointer_set_t *nontrap)
+{
+  if (INDIRECT_REF_P (exp)
+      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME)
+    {
+      tree name = TREE_OPERAND (exp, 0);
+      struct name_to_bb map;
+      void **slot;
+      basic_block found_bb = 0;
+
+      /* Try to find the last seen INDIRECT_REF through the same
+         SSA_NAME, which can trap.  */
+      map.ssa_name = name;
+      map.bb = 0;
+      slot = htab_find_slot (seen_ssa_names, &map, INSERT);
+      if (*slot)
+        found_bb = ((struct name_to_bb *)*slot)->bb;
+
+      /* If we've found a trapping INDIRECT_REF, _and_ it dominates us
+         (is in a basic block on the path from us to the dominator root)
+	 then we can't trap.  */
+      if (found_bb && found_bb->aux == (void *)1)
+	{
+	  pointer_set_insert (nontrap, exp);
+	}
+      else
+        {
+	  /* We might trap, so insert us into the hash table.  */
+	  if (*slot)
+	    {
+              ((struct name_to_bb *)*slot)->bb = bb;
+	    }
+	  else
+	    {
+	      struct name_to_bb *nmap = XNEW (struct name_to_bb);
+	      nmap->ssa_name = name;
+	      nmap->bb = bb;
+	      *slot = nmap;
+	    }
+	}
+    }
+}
+
+/* Called by walk_dominator_tree, when entering the block BB.  */
+static void
+nt_init_block (struct dom_walk_data *data ATTRIBUTE_UNUSED, basic_block bb)
+{
+  block_stmt_iterator bsi;
+  /* Mark this BB as being on the path to dominator root.  */
+  bb->aux = (void*)1;
+
+  /* And walk the statements in order.  */
+  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+    {
+      tree stmt = bsi_stmt (bsi);
+
+      if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
+	{
+	  tree lhs = GIMPLE_STMT_OPERAND (stmt, 0);
+	  tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
+	  add_or_mark_expr (bb, rhs, nontrap_set);
+	  add_or_mark_expr (bb, lhs, nontrap_set);
+	}
+    }
+}
+
+/* Called by walk_dominator_tree, when basic block BB is exited.  */
+static void
+nt_fini_block (struct dom_walk_data *data ATTRIBUTE_UNUSED, basic_block bb)
+{
+  /* This BB isn't on the path to dominator root anymore.  */
+  bb->aux = NULL;
+}
+
+/* This is the entry point of gathering non trapping memory accesses.
+   It will do a dominator walk over the whole function, and it will
+   make use of the bb->aux pointers.  It returns a set of trees
+   (the INDIRECT_REFs itself) which can't trap.  */
+static struct pointer_set_t *
+get_non_trapping (void)
+{
+  struct pointer_set_t *nontrap;
+  struct dom_walk_data walk_data;
+
+  nontrap = pointer_set_create ();
+  seen_ssa_names = htab_create (128, name_to_bb_hash, name_to_bb_eq,
+				free);
+  /* We're going to do a dominator walk, so ensure that we have
+     dominance information.  */
+  calculate_dominance_info (CDI_DOMINATORS);
+
+  /* Setup callbacks for the generic dominator tree walker.  */
+  nontrap_set = nontrap;
+  walk_data.walk_stmts_backward = false;
+  walk_data.dom_direction = CDI_DOMINATORS;
+  walk_data.initialize_block_local_data = NULL;
+  walk_data.before_dom_children_before_stmts = nt_init_block;
+  walk_data.before_dom_children_walk_stmts = NULL;
+  walk_data.before_dom_children_after_stmts = NULL;
+  walk_data.after_dom_children_before_stmts = NULL;
+  walk_data.after_dom_children_walk_stmts = NULL;
+  walk_data.after_dom_children_after_stmts = nt_fini_block;
+  walk_data.global_data = NULL;
+  walk_data.block_local_data_size = 0;
+  walk_data.interesting_blocks = NULL;
+
+  init_walk_dominator_tree (&walk_data);
+  walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
+  fini_walk_dominator_tree (&walk_data);
+  htab_delete (seen_ssa_names);
+
+  return nontrap;
+}
+
+/* Do the main work of conditional store replacement.  We already know
+   that the recognized pattern looks like so:
+
+   split:
+     if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
+   MIDDLE_BB:
+     something
+     fallthrough (edge E0)
+   JOIN_BB:
+     some more
+
+   We check that MIDDLE_BB contains only one store, that that store
+   doesn't trap (via NONTRAP) and that the store has a "simple" RHS.  */
+
+static bool
+cond_store_replacement (basic_block middle_bb, basic_block join_bb,
+			edge e0, edge e1, struct pointer_set_t *nontrap)
+{
+  tree assign = last_and_only_stmt (middle_bb);
+  tree lhs, rhs, newexpr, name;
+  tree newphi;
+  block_stmt_iterator bsi;
+
+  /* Check if middle_bb contains of only one store.  */
+  if (!assign
+      || TREE_CODE (assign) != GIMPLE_MODIFY_STMT)
+    return false;
+
+  lhs = GIMPLE_STMT_OPERAND (assign, 0);
+  if (!INDIRECT_REF_P (lhs))
+    return false;
+  rhs = GIMPLE_STMT_OPERAND (assign, 1);
+  if (TREE_CODE (rhs) != SSA_NAME && !is_gimple_min_invariant (rhs))
+    return false;
+  /* Prove that we can move the store down.  */
+  if (!TREE_THIS_NOTRAP (lhs)
+      && !pointer_set_contains (nontrap, lhs))
+    return false;
+
+  /* Now we've checked the constraints, so do the transformation:
+     1) Remove the single store.  */
+  mark_symbols_for_renaming (assign);
+  bsi = bsi_for_stmt (assign);
+  bsi_remove (&bsi, true);
+
+  /* 2) Create a temporary where we can store the old content
+        of the memory touched by the store, if we need to.  */
+  if (!condstoretemp || TREE_TYPE (lhs) != TREE_TYPE (condstoretemp))
+    {
+      condstoretemp = create_tmp_var (TREE_TYPE (lhs), "cstore");
+      get_var_ann (condstoretemp);
+    }
+  add_referenced_var (condstoretemp);
+
+  /* 3) Insert a load from the memory of the store to the temporary
+        on the edge which did not contain the store.  */
+  lhs = unshare_expr (lhs);
+  newexpr = build_gimple_modify_stmt (condstoretemp, lhs);
+  name = make_ssa_name (condstoretemp, newexpr);
+  GIMPLE_STMT_OPERAND (newexpr, 0) = name;
+  mark_symbols_for_renaming (newexpr);
+  bsi_insert_on_edge (e1, newexpr);
+
+  /* 4) Create a PHI node at the join block, with one argument
+        holding the old RHS, and the other holding the temporary
+        where we stored the old memory contents.  */
+  newphi = create_phi_node (condstoretemp, join_bb);
+  add_phi_arg (newphi, rhs, e0);
+  add_phi_arg (newphi, name, e1);
+
+  lhs = unshare_expr (lhs);
+  newexpr = build_gimple_modify_stmt (lhs, PHI_RESULT (newphi));
+  mark_symbols_for_renaming (newexpr);
+
+  /* 5) Insert that PHI node.  */
+  bsi = bsi_start (join_bb);
+  while (!bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR)
+    bsi_next (&bsi);
+  if (bsi_end_p (bsi))
+    {
+      bsi = bsi_last (join_bb);
+      bsi_insert_after (&bsi, newexpr, BSI_NEW_STMT);
+    }
+  else
+    bsi_insert_before (&bsi, newexpr, BSI_NEW_STMT);
+
+  return true;
+}
 
 /* Always do these optimizations if we have SSA
    trees to work on.  */
@@ -1020,3 +1350,30 @@ struct tree_opt_pass pass_phiopt =
     | TODO_verify_stmts,		/* todo_flags_finish */
   0					/* letter */
 };
+
+static bool
+gate_cselim (void)
+{
+  return flag_tree_cselim;
+}
+
+struct tree_opt_pass pass_cselim =
+{
+  "cselim",				/* name */
+  gate_cselim,				/* gate */
+  tree_ssa_cs_elim,			/* execute */
+  NULL,					/* sub */
+  NULL,					/* next */
+  0,					/* static_pass_number */
+  TV_TREE_PHIOPT,			/* tv_id */
+  PROP_cfg | PROP_ssa | PROP_alias,	/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_dump_func
+    | TODO_ggc_collect
+    | TODO_verify_ssa
+    | TODO_verify_flow
+    | TODO_verify_stmts,		/* todo_flags_finish */
+  0					/* letter */
+};
Index: common.opt
===================================================================
--- common.opt	(revision 123256)
+++ common.opt	(working copy)
@@ -998,6 +998,10 @@ ftree-store-copy-prop
 Common Report Var(flag_tree_store_copy_prop) Optimization
 Enable copy propagation for stores and loads
 
+ftree-cselim
+Common Report Var(flag_tree_cselim) Optimization
+Transform condition stores into unconditional ones
+
 ftree-dce
 Common Report Var(flag_tree_dce) Optimization
 Enable SSA dead code elimination optimization on trees
Index: passes.c
===================================================================
--- passes.c	(revision 123256)
+++ passes.c	(working copy)
@@ -531,6 +531,7 @@ init_optimization_passes (void)
       NEXT_PASS (pass_merge_phi);
       NEXT_PASS (pass_vrp);
       NEXT_PASS (pass_dce);
+      NEXT_PASS (pass_cselim);
       NEXT_PASS (pass_dominator);
 
       /* The only const/copy propagation opportunities left after


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]