This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[patch] conditional store elimination
- From: Michael Matz <matz at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 28 Aug 2007 19:01:28 +0200 (CEST)
- Subject: [patch] conditional store elimination
Hi,
this patch fixes PR middle-end/27313 and thereby a performance problem on
456.hmmer (which now runs in 798 instead of 1080 seconds, i.e. 26%
improvement). We talked about how to integrate this better with if
conversion and make it also do load hoisting, but I fear we won't get to
that during the time left for stage 2, and meanwhile I'd really like to
have that improvement in 4.3.
I tweaked the patch a bit to only do the transformation by default if the
target has conditional moves. Additionally it now is applied only when
the store in question is non-trapping because of another shadowing mem
access, which then also means that the value is in cache already (i.e. I
don't do it when only TREE_NOTRAP is set).
For reference again, here is what the transformation does:
... some access to *p ...
if (cond)
*p = X;
--->
cstore = *p;
if (cond)
cstore = X;
*p = cstore;
Later passes then have the possibility to transform the if into a
conditional store (and transforming the *p read to a copy).
Bootstrapped and tested (all langs except ada) on x86_64-linux.
Ciao,
Michael.
--
PR middle-end/27313
* tree-pass.h (pass_cselim): Declare new pass.
* passes.c (init_optimization_passes): Link in pass_cselim.
* tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Renamed from
tree_ssa_phiopt; add do_store_elim parameter, handle it by calling
cond_store_replacement.
(condstoretemp): New static variable.
(cond_store_replacement): New function.
(tree_ssa_phiopt, tree_ssa_cs_elim): New wrappers around
tree_ssa_phiopt_worker.
(struct name_to_bb): New.
(get_non_trapping, name_to_bb_hash, name_to_bb_eq, add_or_mark_expr,
nt_init_block, nt_fini_block): New static functions.
(seen_ssa_names, nontrap_set): New static variables.
(gate_cselim, pass_cselim): Define new pass.
* common.opt (ftree-cselim): New flag.
* toplev.c (process_options): Set flag_tree_cselim if required.
Index: tree-pass.h
===================================================================
--- tree-pass.h (revision 123256)
+++ tree-pass.h (working copy)
@@ -283,6 +283,7 @@ extern struct tree_opt_pass pass_cse_rec
extern struct tree_opt_pass pass_cse_sincos;
extern struct tree_opt_pass pass_warn_function_return;
extern struct tree_opt_pass pass_warn_function_noreturn;
+extern struct tree_opt_pass pass_cselim;
extern struct tree_opt_pass pass_phiopt;
extern struct tree_opt_pass pass_forwprop;
extern struct tree_opt_pass pass_dse;
Index: tree-ssa-phiopt.c
===================================================================
--- tree-ssa-phiopt.c (revision 123256)
+++ tree-ssa-phiopt.c (working copy)
@@ -34,8 +34,10 @@ Software Foundation, 51 Franklin Street,
#include "tree-pass.h"
#include "tree-dump.h"
#include "langhooks.h"
+#include "pointer-set.h"
+#include "domwalk.h"
-static unsigned int tree_ssa_phiopt (void);
+static unsigned int tree_ssa_phiopt_worker (bool);
static bool conditional_replacement (basic_block, basic_block,
edge, edge, tree, tree, tree);
static bool value_replacement (basic_block, basic_block,
@@ -44,8 +46,11 @@ static bool minmax_replacement (basic_bl
edge, edge, tree, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, tree, tree, tree);
+static bool cond_store_replacement (basic_block, basic_block, edge, edge,
+ struct pointer_set_t *);
static void replace_phi_edge_with_variable (basic_block, edge, tree, tree);
static basic_block *blocks_in_phiopt_order (void);
+static struct pointer_set_t * get_non_trapping (void);
/* This pass tries to replaces an if-then-else block with an
assignment. We have four kinds of transformations. Some of these
@@ -136,10 +141,61 @@ static basic_block *blocks_in_phiopt_ord
static unsigned int
tree_ssa_phiopt (void)
{
+ return tree_ssa_phiopt_worker (false);
+}
+
+/* This pass tries to transform conditional stores into unconditional
+ ones, enabling further simplifications with the simpler then and else
+ blocks. In particular it replaces this:
+
+ bb0:
+ if (cond) goto bb2; else goto bb1;
+ bb1:
+ *p = RHS
+ bb2:
+
+ with
+
+ bb0:
+ if (cond) goto bb1; else goto bb2;
+ bb1:
+ condtmp' = *p;
+ bb2:
+ condtmp = PHI <RHS, condtmp'>
+ *p = condtmp
+
+ This transformation can only be done under several constraints,
+ documented below. */
+
+static unsigned int
+tree_ssa_cs_elim (void)
+{
+ return tree_ssa_phiopt_worker (true);
+}
+
+/* For conditional store replacement we need a temporary to
+ put the old contents of the memory in. */
+static tree condstoretemp;
+
+/* The core routine of conditional store replacement and normal
+ phi optimizations. Both share much of the infrastructure in how
+ to match applicable basic block patterns. DO_STORE_ELIM is true
+ when we want to do conditional store replacement, false otherwise. */
+static unsigned int
+tree_ssa_phiopt_worker (bool do_store_elim)
+{
basic_block bb;
basic_block *bb_order;
unsigned n, i;
bool cfgchanged = false;
+ struct pointer_set_t *nontrap = 0;
+
+ if (do_store_elim)
+ {
+ condstoretemp = NULL_TREE;
+ /* Calculate the set of non-trapping memory accesses. */
+ nontrap = get_non_trapping ();
+ }
/* Search every basic block for COND_EXPR we may be able to optimize.
@@ -211,36 +267,60 @@ tree_ssa_phiopt (void)
|| single_pred (bb1) != bb)
continue;
- phi = phi_nodes (bb2);
-
- /* Check to make sure that there is only one PHI node.
- TODO: we could do it with more than one iff the other PHI nodes
- have the same elements for these two edges. */
- if (!phi || PHI_CHAIN (phi) != NULL)
- continue;
-
- arg0 = PHI_ARG_DEF_TREE (phi, e1->dest_idx);
- arg1 = PHI_ARG_DEF_TREE (phi, e2->dest_idx);
+ if (do_store_elim)
+ {
+ /* bb1 is the middle block, bb2 the join block, bb the split block,
+ e1 the fallthrough edge from bb1 to bb2. We can't do the
+ optimization if the join block has more than two predecessors. */
+ if (EDGE_COUNT (bb2->preds) > 2)
+ continue;
+ if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
+ cfgchanged = true;
+ }
+ else
+ {
+ phi = phi_nodes (bb2);
- /* Something is wrong if we cannot find the arguments in the PHI
- node. */
- gcc_assert (arg0 != NULL && arg1 != NULL);
-
- /* Do the replacement of conditional if it can be done. */
- if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
- cfgchanged = true;
- else if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
- cfgchanged = true;
- else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
- cfgchanged = true;
- else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
- cfgchanged = true;
+ /* Check to make sure that there is only one PHI node.
+ TODO: we could do it with more than one iff the other PHI nodes
+ have the same elements for these two edges. */
+ if (!phi || PHI_CHAIN (phi) != NULL)
+ continue;
+
+ arg0 = PHI_ARG_DEF_TREE (phi, e1->dest_idx);
+ arg1 = PHI_ARG_DEF_TREE (phi, e2->dest_idx);
+
+ /* Something is wrong if we cannot find the arguments in the PHI
+ node. */
+ gcc_assert (arg0 != NULL && arg1 != NULL);
+
+ /* Do the replacement of conditional if it can be done. */
+ if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ cfgchanged = true;
+ else if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ cfgchanged = true;
+ else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ cfgchanged = true;
+ else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ cfgchanged = true;
+ }
}
free (bb_order);
- /* If the CFG has changed, we should cleanup the CFG. */
- return cfgchanged ? TODO_cleanup_cfg : 0;
+ if (do_store_elim)
+ pointer_set_destroy (nontrap);
+ /* If the CFG has changed, we should cleanup the CFG. */
+ if (cfgchanged && do_store_elim)
+ {
+ /* In cond-store replacement we have added some loads on edges
+ and new VOPS (as we moved the store, and created a load). */
+ bsi_commit_edge_inserts ();
+ return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
+ }
+ else if (cfgchanged)
+ return TODO_cleanup_cfg;
+ return 0;
}
/* Returns the list of basic blocks in the function in an order that guarantees
@@ -991,6 +1071,256 @@ abs_replacement (basic_block cond_bb, ba
return true;
}
+/* Auxiliary functions to determine the set of memory accesses which
+ can't trap because they are preceded by accesses to the same memory
+ portion. We do that for INDIRECT_REFs, so we only need to track
+ the SSA_NAME of the pointer indirectly referenced. The algorithm
+ simply is a walk over all instructions in dominator order. When
+ we see an INDIRECT_REF we determine if we've already seen a same
+ ref anywhere up to the root of the dominator tree. If we do the
+ current access can't trap. If we don't see any dominator access
+ the current access might trap, but might also make later accesses
+ non-trapping, so we remember it. */
+
+/* A hash-table of SSA_NAMEs, and in which basic block an INDIRECT_REF
+ through it was seen, which would constitute a no-trap region for
+ same accesses. */
+struct name_to_bb
+{
+ tree ssa_name;
+ basic_block bb;
+};
+
+/* The hash table for remembering what we've seen. */
+static htab_t seen_ssa_names;
+/* The set of INDIRECT_REFs which can't trap. */
+static struct pointer_set_t *nontrap_set;
+
+/* The hash function, based on the pointer to the pointer SSA_NAME. */
+static hashval_t
+name_to_bb_hash (const void *p)
+{
+ tree n = ((struct name_to_bb *)p)->ssa_name;
+ return htab_hash_pointer (n);
+}
+
+/* The equality function of *P1 and *P2. SSA_NAMEs are shared, so
+ it's enough to simply compare them for equality. */
+static int
+name_to_bb_eq (const void *p1, const void *p2)
+{
+ tree n1 = ((struct name_to_bb *)p1)->ssa_name;
+ tree n2 = ((struct name_to_bb *)p2)->ssa_name;
+
+ return n1 == n2;
+}
+
+/* We see a the expression EXP in basic block BB. If it's an interesting
+ expression (an INDIRECT_REF through an SSA_NAME) possibly insert the
+ expression into the set NONTRAP or the hash table of seen expressions. */
+static void
+add_or_mark_expr (basic_block bb, tree exp, struct pointer_set_t *nontrap)
+{
+ if (INDIRECT_REF_P (exp)
+ && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME)
+ {
+ tree name = TREE_OPERAND (exp, 0);
+ struct name_to_bb map;
+ void **slot;
+ basic_block found_bb = 0;
+
+ /* Try to find the last seen INDIRECT_REF through the same
+ SSA_NAME, which can trap. */
+ map.ssa_name = name;
+ map.bb = 0;
+ slot = htab_find_slot (seen_ssa_names, &map, INSERT);
+ if (*slot)
+ found_bb = ((struct name_to_bb *)*slot)->bb;
+
+ /* If we've found a trapping INDIRECT_REF, _and_ it dominates us
+ (is in a basic block on the path from us to the dominator root)
+ then we can't trap. */
+ if (found_bb && found_bb->aux == (void *)1)
+ {
+ pointer_set_insert (nontrap, exp);
+ }
+ else
+ {
+ /* We might trap, so insert us into the hash table. */
+ if (*slot)
+ {
+ ((struct name_to_bb *)*slot)->bb = bb;
+ }
+ else
+ {
+ struct name_to_bb *nmap = XNEW (struct name_to_bb);
+ nmap->ssa_name = name;
+ nmap->bb = bb;
+ *slot = nmap;
+ }
+ }
+ }
+}
+
+/* Called by walk_dominator_tree, when entering the block BB. */
+static void
+nt_init_block (struct dom_walk_data *data ATTRIBUTE_UNUSED, basic_block bb)
+{
+ block_stmt_iterator bsi;
+ /* Mark this BB as being on the path to dominator root. */
+ bb->aux = (void*)1;
+
+ /* And walk the statements in order. */
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi);
+
+ if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
+ {
+ tree lhs = GIMPLE_STMT_OPERAND (stmt, 0);
+ tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
+ add_or_mark_expr (bb, rhs, nontrap_set);
+ add_or_mark_expr (bb, lhs, nontrap_set);
+ }
+ }
+}
+
+/* Called by walk_dominator_tree, when basic block BB is exited. */
+static void
+nt_fini_block (struct dom_walk_data *data ATTRIBUTE_UNUSED, basic_block bb)
+{
+ /* This BB isn't on the path to dominator root anymore. */
+ bb->aux = NULL;
+}
+
+/* This is the entry point of gathering non trapping memory accesses.
+ It will do a dominator walk over the whole function, and it will
+ make use of the bb->aux pointers. It returns a set of trees
+ (the INDIRECT_REFs itself) which can't trap. */
+static struct pointer_set_t *
+get_non_trapping (void)
+{
+ struct pointer_set_t *nontrap;
+ struct dom_walk_data walk_data;
+
+ nontrap = pointer_set_create ();
+ seen_ssa_names = htab_create (128, name_to_bb_hash, name_to_bb_eq,
+ free);
+ /* We're going to do a dominator walk, so ensure that we have
+ dominance information. */
+ calculate_dominance_info (CDI_DOMINATORS);
+
+ /* Setup callbacks for the generic dominator tree walker. */
+ nontrap_set = nontrap;
+ walk_data.walk_stmts_backward = false;
+ walk_data.dom_direction = CDI_DOMINATORS;
+ walk_data.initialize_block_local_data = NULL;
+ walk_data.before_dom_children_before_stmts = nt_init_block;
+ walk_data.before_dom_children_walk_stmts = NULL;
+ walk_data.before_dom_children_after_stmts = NULL;
+ walk_data.after_dom_children_before_stmts = NULL;
+ walk_data.after_dom_children_walk_stmts = NULL;
+ walk_data.after_dom_children_after_stmts = nt_fini_block;
+ walk_data.global_data = NULL;
+ walk_data.block_local_data_size = 0;
+ walk_data.interesting_blocks = NULL;
+
+ init_walk_dominator_tree (&walk_data);
+ walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
+ fini_walk_dominator_tree (&walk_data);
+ htab_delete (seen_ssa_names);
+
+ return nontrap;
+}
+
+/* Do the main work of conditional store replacement. We already know
+ that the recognized pattern looks like so:
+
+ split:
+ if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
+ MIDDLE_BB:
+ something
+ fallthrough (edge E0)
+ JOIN_BB:
+ some more
+
+ We check that MIDDLE_BB contains only one store, that that store
+ doesn't trap (via NONTRAP) and that the store has a "simple" RHS. */
+
+static bool
+cond_store_replacement (basic_block middle_bb, basic_block join_bb,
+ edge e0, edge e1, struct pointer_set_t *nontrap)
+{
+ tree assign = last_and_only_stmt (middle_bb);
+ tree lhs, rhs, newexpr, name;
+ tree newphi;
+ block_stmt_iterator bsi;
+
+ /* Check if middle_bb contains of only one store. */
+ if (!assign
+ || TREE_CODE (assign) != GIMPLE_MODIFY_STMT)
+ return false;
+
+ lhs = GIMPLE_STMT_OPERAND (assign, 0);
+ if (!INDIRECT_REF_P (lhs))
+ return false;
+ rhs = GIMPLE_STMT_OPERAND (assign, 1);
+ if (TREE_CODE (rhs) != SSA_NAME && !is_gimple_min_invariant (rhs))
+ return false;
+ /* Prove that we can move the store down. */
+ if (!TREE_THIS_NOTRAP (lhs)
+ && !pointer_set_contains (nontrap, lhs))
+ return false;
+
+ /* Now we've checked the constraints, so do the transformation:
+ 1) Remove the single store. */
+ mark_symbols_for_renaming (assign);
+ bsi = bsi_for_stmt (assign);
+ bsi_remove (&bsi, true);
+
+ /* 2) Create a temporary where we can store the old content
+ of the memory touched by the store, if we need to. */
+ if (!condstoretemp || TREE_TYPE (lhs) != TREE_TYPE (condstoretemp))
+ {
+ condstoretemp = create_tmp_var (TREE_TYPE (lhs), "cstore");
+ get_var_ann (condstoretemp);
+ }
+ add_referenced_var (condstoretemp);
+
+ /* 3) Insert a load from the memory of the store to the temporary
+ on the edge which did not contain the store. */
+ lhs = unshare_expr (lhs);
+ newexpr = build_gimple_modify_stmt (condstoretemp, lhs);
+ name = make_ssa_name (condstoretemp, newexpr);
+ GIMPLE_STMT_OPERAND (newexpr, 0) = name;
+ mark_symbols_for_renaming (newexpr);
+ bsi_insert_on_edge (e1, newexpr);
+
+ /* 4) Create a PHI node at the join block, with one argument
+ holding the old RHS, and the other holding the temporary
+ where we stored the old memory contents. */
+ newphi = create_phi_node (condstoretemp, join_bb);
+ add_phi_arg (newphi, rhs, e0);
+ add_phi_arg (newphi, name, e1);
+
+ lhs = unshare_expr (lhs);
+ newexpr = build_gimple_modify_stmt (lhs, PHI_RESULT (newphi));
+ mark_symbols_for_renaming (newexpr);
+
+ /* 5) Insert that PHI node. */
+ bsi = bsi_start (join_bb);
+ while (!bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR)
+ bsi_next (&bsi);
+ if (bsi_end_p (bsi))
+ {
+ bsi = bsi_last (join_bb);
+ bsi_insert_after (&bsi, newexpr, BSI_NEW_STMT);
+ }
+ else
+ bsi_insert_before (&bsi, newexpr, BSI_NEW_STMT);
+
+ return true;
+}
/* Always do these optimizations if we have SSA
trees to work on. */
@@ -1020,3 +1350,30 @@ struct tree_opt_pass pass_phiopt =
| TODO_verify_stmts, /* todo_flags_finish */
0 /* letter */
};
+
+static bool
+gate_cselim (void)
+{
+ return flag_tree_cselim;
+}
+
+struct tree_opt_pass pass_cselim =
+{
+ "cselim", /* name */
+ gate_cselim, /* gate */
+ tree_ssa_cs_elim, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_PHIOPT, /* tv_id */
+ PROP_cfg | PROP_ssa | PROP_alias, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func
+ | TODO_ggc_collect
+ | TODO_verify_ssa
+ | TODO_verify_flow
+ | TODO_verify_stmts, /* todo_flags_finish */
+ 0 /* letter */
+};
Index: common.opt
===================================================================
--- common.opt (revision 123256)
+++ common.opt (working copy)
@@ -998,6 +998,10 @@ ftree-store-copy-prop
Common Report Var(flag_tree_store_copy_prop) Optimization
Enable copy propagation for stores and loads
+ftree-cselim
+Common Report Var(flag_tree_cselim) Optimization
+Transform condition stores into unconditional ones
+
ftree-dce
Common Report Var(flag_tree_dce) Optimization
Enable SSA dead code elimination optimization on trees
Index: passes.c
===================================================================
--- passes.c (revision 123256)
+++ passes.c (working copy)
@@ -531,6 +531,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_merge_phi);
NEXT_PASS (pass_vrp);
NEXT_PASS (pass_dce);
+ NEXT_PASS (pass_cselim);
NEXT_PASS (pass_dominator);
/* The only const/copy propagation opportunities left after