This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[tree-ssa] Reducing PHI insertions
- From: law at redhat dot com
- To: gcc-patches at gcc dot gnu dot org
- Date: Mon, 03 Feb 2003 16:58:59 -0700
- Subject: [tree-ssa] Reducing PHI insertions
- Reply-to: law at redhat dot com
I've started looking at the insane costs of PHI node insertion for
20001226-1.c. There's a couple issues that need to be addressed:
1. We're not using the pruned or semi-pruned SSA PHI node insertion
techniques. This can cause us to insert far more PHI nodes than
are actually needed. To put it in perspective, 20001226-1.c should
PHI nodes for 3-4 objects. We actually generate PHI nodes for more
than 40000 objects.
2. Our CFG has far more blocks than are necessary. For 20001226-1.c
the tree SSA code generates roughly 24000 basic blocks. 33% of
those blocks are totally unnecessary.
Both must be addressed for PHI node insertion to become a non-issue for
20001226-1. Once both are fixed PHI node insertion for 20001226-1.c will
complete in less than a second -- which is infinitely better than the
current situation where PHI insertion runs my machine out of memory, it
starts swapping insanely and eventually locks up :(
This patch addresses #1 by converting our PHI node insertion to use
semi-pruned rather than "minimal" PHI node insertion algorithms.
Note that this patch by itself will not help 20001226-1.c in any way
shape or form. This is because of the issue in #2 -- if the CFG is
not improved, then every object in 20001226-1.c is effectively live across
a basic block boundary and thus the semi-pruned form is equivalent to the
"minimal" form which we currently implement.
While it doesn't help 20001226-1.c at the moment it does clearly help other
code. Let's take combine.i as an example. PHI node insertion time is cut
by 50%, rewriting is cut by 30% and CCP is cut by 22%. Other future SSA
optimizers will also benefit as they'll have far fewer PHI nodes to consider.
Bootstrapped and regression tested whee.
* tree-dfa.c (add_referenced_var): Annotate each item in the
REFERENCED_VARS varray with a unique id.
* tree-flow.h (struct var_ann_d): Add new uid field.
* tree-ssa.c (mark_def_sites): Compute the set of variables
live across basic blocks and return them in an sbitmap.
(insert_phi_nodes): Use the set of nonlocal variables computed
by mark_def_sites to reduce the number of PHI nodes inserted.
(rewrite_into_ssa): Updated to deal with changes in
insert_phi_nodes and mark_def_sites. Free the sbitmap returned
by mark_def_sites.
Index: tree-dfa.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-dfa.c,v
retrieving revision 1.1.4.69
diff -c -3 -p -r1.1.4.69 tree-dfa.c
*** tree-dfa.c 3 Feb 2003 01:26:53 -0000 1.1.4.69
--- tree-dfa.c 3 Feb 2003 23:52:16 -0000
*************** add_referenced_var (var, data)
*** 2095,2105 ****
slot = htab_find_slot (vars_found, (void *) var, INSERT);
if (*slot == NULL)
{
/* This is the first time we find this variable, add it to the
! REFERENCED_VARS array. */
*slot = (void *) var;
VARRAY_PUSH_TREE (referenced_vars, var);
! num_referenced_vars++;
}
}
--- 2095,2110 ----
slot = htab_find_slot (vars_found, (void *) var, INSERT);
if (*slot == NULL)
{
+ var_ann_t ann;
+
/* This is the first time we find this variable, add it to the
! REFERENCED_VARS array, also annotate it with its unique id. */
*slot = (void *) var;
VARRAY_PUSH_TREE (referenced_vars, var);
! ann = var_ann (var);
! if (! ann)
! ann = create_var_ann (var);
! ann->uid = num_referenced_vars++;
}
}
Index: tree-flow.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-flow.h,v
retrieving revision 1.1.4.48
diff -c -3 -p -r1.1.4.48 tree-flow.h
*** tree-flow.h 3 Feb 2003 01:26:53 -0000 1.1.4.48
--- tree-flow.h 3 Feb 2003 23:52:22 -0000
*************** struct var_ann_d GTY(())
*** 62,67 ****
--- 62,70 ----
/* Variables that may alias this variable. */
varray_type may_aliases;
+
+ /* Unique ID of this variable. */
+ int uid;
};
Index: tree-ssa.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-ssa.c,v
retrieving revision 1.1.4.45
diff -c -3 -p -r1.1.4.45 tree-ssa.c
*** tree-ssa.c 3 Feb 2003 01:26:53 -0000 1.1.4.45
--- tree-ssa.c 3 Feb 2003 23:52:23 -0000
*************** struct currdef_d
*** 99,107 ****
/* Local functions. */
static void init_tree_ssa PARAMS ((void));
static void delete_tree_ssa PARAMS ((tree));
! static void mark_def_sites PARAMS ((dominance_info));
static void set_def_block PARAMS ((tree, basic_block));
! static void insert_phi_nodes PARAMS ((sbitmap *));
static void rewrite_block PARAMS ((basic_block));
static void rewrite_stmts PARAMS ((basic_block, varray_type *));
static void rewrite_stmt PARAMS ((tree, varray_type *));
--- 99,107 ----
/* Local functions. */
static void init_tree_ssa PARAMS ((void));
static void delete_tree_ssa PARAMS ((tree));
! static sbitmap mark_def_sites PARAMS ((dominance_info));
static void set_def_block PARAMS ((tree, basic_block));
! static void insert_phi_nodes PARAMS ((sbitmap *, sbitmap));
static void rewrite_block PARAMS ((basic_block));
static void rewrite_stmts PARAMS ((basic_block, varray_type *));
static void rewrite_stmt PARAMS ((tree, varray_type *));
*************** rewrite_into_ssa (fndecl)
*** 209,214 ****
--- 209,215 ----
tree fndecl;
{
sbitmap *dfs;
+ sbitmap nonlocal_vars;
dominance_info idom;
timevar_push (TV_TREE_SSA_OTHER);
*************** rewrite_into_ssa (fndecl)
*** 228,237 ****
compute_may_aliases ();
/* Find variable references and mark definition sites. */
! mark_def_sites (idom);
/* Insert PHI nodes at dominance frontiers of definition blocks. */
! insert_phi_nodes (dfs);
/* Rewrite all the basic blocks in the program. */
timevar_push (TV_TREE_SSA_REWRITE_BLOCKS);
--- 229,238 ----
compute_may_aliases ();
/* Find variable references and mark definition sites. */
! nonlocal_vars = mark_def_sites (idom);
/* Insert PHI nodes at dominance frontiers of definition blocks. */
! insert_phi_nodes (dfs, nonlocal_vars);
/* Rewrite all the basic blocks in the program. */
timevar_push (TV_TREE_SSA_REWRITE_BLOCKS);
*************** rewrite_into_ssa (fndecl)
*** 239,244 ****
--- 240,246 ----
timevar_pop (TV_TREE_SSA_REWRITE_BLOCKS);
/* Free allocated memory. */
+ free (nonlocal_vars);
sbitmap_vector_free (dfs);
free_dominance_info (idom);
htab_delete (def_blocks);
*************** rewrite_into_ssa (fndecl)
*** 267,280 ****
Also, compute the set of dominator children for each block in the
flowgraph. This will be used by rewrite_block when traversing the
! flowgraph. */
! static void
mark_def_sites (idom)
dominance_info idom;
{
basic_block bb;
gimple_stmt_iterator si;
/* Mark all the blocks that have definitions for each variable referenced
in the function. */
--- 269,293 ----
Also, compute the set of dominator children for each block in the
flowgraph. This will be used by rewrite_block when traversing the
! flowgraph.
! Return a bitmap for the set of referenced variables which are
! "nonlocal", ie those which are live across block boundaries.
! This information is used to reduce the number of PHI nodes
! we create. */
!
! static sbitmap
mark_def_sites (idom)
dominance_info idom;
{
basic_block bb;
gimple_stmt_iterator si;
+ sbitmap nonlocal_vars;
+ sbitmap killed_vars;
+
+ nonlocal_vars = sbitmap_alloc (num_referenced_vars);
+ sbitmap_zero (nonlocal_vars);
+ killed_vars = sbitmap_alloc (num_referenced_vars);
/* Mark all the blocks that have definitions for each variable referenced
in the function. */
*************** mark_def_sites (idom)
*** 286,310 ****
if (idom_bb)
add_dom_child (idom_bb, bb);
for (si = gsi_start_bb (bb); !gsi_end_bb_p (si); gsi_step_bb (&si))
{
varray_type ops;
size_t i;
tree stmt;
!
stmt = gsi_stmt (si);
STRIP_NOPS (stmt);
get_stmt_operands (stmt);
! if (def_op (stmt))
! set_def_block (*(def_op (stmt)), bb);
ops = vdef_ops (stmt);
for (i = 0; ops && i < VARRAY_ACTIVE_SIZE (ops); i++)
set_def_block (VDEF_RESULT (VARRAY_TREE (ops, i)), bb);
}
}
}
--- 299,363 ----
if (idom_bb)
add_dom_child (idom_bb, bb);
+ /* We're interested in finding the variables which are used before
+ they are defined in this block. So at the start of each block
+ zero out KILLED_VARS. */
+ sbitmap_zero (killed_vars);
+
for (si = gsi_start_bb (bb); !gsi_end_bb_p (si); gsi_step_bb (&si))
{
varray_type ops;
size_t i;
tree stmt;
! tree *dest;
!
stmt = gsi_stmt (si);
STRIP_NOPS (stmt);
get_stmt_operands (stmt);
! dest = def_op (stmt);
! if (dest)
! set_def_block (*dest, bb);
!
! /* If a variable is used before being set, then the variable
! is live across a block boundary, so add it to NONLOCAL_VARS. */
! ops = use_ops (stmt);
! for (i = 0; ops && i < VARRAY_ACTIVE_SIZE (ops); i++)
! {
! tree *use = VARRAY_GENERIC_PTR (ops, i);
! int uid = var_ann (*use)->uid;
!
! if (! TEST_BIT (killed_vars, uid))
! SET_BIT (nonlocal_vars, uid);
! }
!
! /* Similarly for virtual uses. */
! ops = vuse_ops (stmt);
! for (i = 0; ops && i < VARRAY_ACTIVE_SIZE (ops); i++)
! {
! tree use = VARRAY_GENERIC_PTR (ops, i);
! int uid = var_ann (use)->uid;
!
! if (! TEST_BIT (killed_vars, uid))
! SET_BIT (nonlocal_vars, uid);
! }
!
! /* Now process the single destination of this statement.
!
! Note that virtual definitions are irrelavent for
! computing NONLOCAL_VARs and KILLED_VARS, so they are
! ignored here. */
! if (dest)
! SET_BIT (killed_vars, var_ann (*dest)->uid);
ops = vdef_ops (stmt);
for (i = 0; ops && i < VARRAY_ACTIVE_SIZE (ops); i++)
set_def_block (VDEF_RESULT (VARRAY_TREE (ops, i)), bb);
}
}
+ free (killed_vars);
+ return nonlocal_vars;
}
*************** set_def_block (var, bb)
*** 338,348 ****
/* Insert PHI nodes at the dominance frontier of blocks with variable
definitions. DFS contains the dominance frontier information for the
! flowgraph. */
static void
! insert_phi_nodes (dfs)
sbitmap *dfs;
{
size_t i;
--- 391,406 ----
/* Insert PHI nodes at the dominance frontier of blocks with variable
definitions. DFS contains the dominance frontier information for the
! flowgraph.
!
! NONLOCAL_VARs is a bitmap representing the set of variables which are
! live across basic block boundaries. Only variables in NONLOCAL_VARs
! need PHI nodes. */
static void
! insert_phi_nodes (dfs, nonlocal_vars)
sbitmap *dfs;
+ sbitmap nonlocal_vars;
{
size_t i;
*************** insert_phi_nodes (dfs)
*** 367,373 ****
for the variable. PHI nodes will be added to the dominance frontier
blocks of each definition block. */
for (i = 0; i < num_referenced_vars; i++)
! insert_phi_nodes_for (referenced_var (i), dfs);
added = NULL;
in_work = NULL;
--- 425,432 ----
for the variable. PHI nodes will be added to the dominance frontier
blocks of each definition block. */
for (i = 0; i < num_referenced_vars; i++)
! if (TEST_BIT (nonlocal_vars, i))
! insert_phi_nodes_for (referenced_var (i), dfs);
added = NULL;
in_work = NULL;