This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[tree-ssa] Improve computed goto handling
- From: law at redhat dot com
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 16 Oct 2003 22:05:13 -0600
- Subject: [tree-ssa] Improve computed goto handling
- Reply-to: law at redhat dot com
So after slogging through numerous hunks of lameness which was slowing
the tree-ssa code down considerably when compiling interpret.cc, I
finally got smart and decided to look at things differently
[ Though I believe all the work recently done to improve interpret.cc
is ultimately good and useful to keep around. ]
interpret.cc is basically a switch statement and a bunch of computed
gotos. As we all know that means we get a large CFG. With a large
CFG comes lots of large PHI nodes, which impacts all the SSA passes.
So I decided to finally look at what could be done to reduce the size
of the CFG. The classic way to accomplish that is via factoring the
computed gotos. ie
goto *x;
[ ... ]
goto *x;
[ ... ]
goto *x;
[ ... ]
We'd turn that into
goto y;
[ ... ]
goto y;
[ ... ]
goto y;
[ ... ]
y:
goto *x;
Which builds a much nicer CFG which results in fewer PHI nodes and makes
us compiler junkies a lot happier.
However, the classic problem with this transformation is that it has a
runtime cost -- namely an extra jump. And in many cases this is the
tight loop for an interpreter. So end-users don't typically like compiler
junkies to make this kind of transformation :-)
Of course, it seems quite natural that we might be able to factor the code
for the benefit of the optimizers, then un-factor it to avoid the runtime
penalty of the factored code. And that's precisely what I did this
afternoon ;-) The compile-time benefits are staggering. Enough that I
won't be looking at interpret.cc again! :-) And yes, we manage to put
things back the way the were as we leave SSA form, so we shouldn't incur
a runtime penalty. Sweet.
Bootstrapped regression tested, etc.
* tree-cfg.c (found_computed_goto): New global for computed goto
factoring/unfactoring.
(factored_computed_goto_label, factored_computed_goto): Likewise.
(factor_computed_gotos): New function.
(build_tree_cfg): Use it.
(make_blocks): Record whether or not we find a computed goto.
(remove_useless_stmts_and_vars): Un-factor computed gotos.
(remove_useless_stmts_and_vars): Reset factored_computed_goto_label
and factored_computed_goto.
Index: tree-cfg.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-cfg.c,v
retrieving revision 1.1.4.176
diff -c -3 -p -r1.1.4.176 tree-cfg.c
*** tree-cfg.c 14 Oct 2003 17:31:33 -0000 1.1.4.176
--- tree-cfg.c 17 Oct 2003 03:58:14 -0000
*************** static struct cfg_stats_d cfg_stats;
*** 74,79 ****
--- 74,91 ----
static struct obstack block_tree_ann_obstack;
static void *first_block_tree_ann_obj = 0;
+ /* Nonzero if we found a computed goto while building basic blocks. */
+ static bool found_computed_goto;
+
+ /* If we found computed gotos, then they are all revectored to this
+ location. We try to unfactor them after we have translated out
+ of SSA form. */
+ static tree factored_computed_goto_label;
+
+ /* The factored computed goto. We cache this so we can easily recover
+ the destation of computed gotos when unfactoring them. */
+ static tree factored_computed_goto;
+
/* Basic blocks and flowgraphs. */
static void create_blocks_annotations (void);
static void create_block_annotation (basic_block);
*************** static basic_block make_bind_expr_blocks
*** 87,92 ****
--- 99,105 ----
static inline void add_stmt_to_bb (tree *, basic_block, tree);
static inline void append_stmt_to_bb (tree *, basic_block, tree);
static inline void set_parent_stmt (tree *, tree);
+ static void factor_computed_gotos (void);
/* Edges. */
static void make_edges (void);
*************** build_tree_cfg (tree fnbody)
*** 251,258 ****
--- 264,280 ----
first_p = first_exec_stmt (&BIND_EXPR_BODY (fnbody));
if (first_p)
{
+ found_computed_goto = 0;
make_blocks (first_p, NULL_TREE, NULL, NULL, fnbody);
+ /* Computed gotos are hell to deal with, especially if there are
+ lots of them with a large number of destinations. So we factor
+ them to a common computed goto location before we build the
+ edge list. After we convert back to normal form, we will un-factor
+ the computed gotos since factoring introduces an unwanted jump. */
+ if (found_computed_goto)
+ factor_computed_gotos ();
+
if (n_basic_blocks > 0)
{
/* Adjust the size of the array. */
*************** build_tree_cfg (tree fnbody)
*** 305,310 ****
--- 327,427 ----
}
}
+ /* Search the CFG for any computed gotos. If found, factor them to a
+ common computed goto site. Also record the location of that site so
+ that we can un-factor the gotos after we have converted back to
+ normal form. */
+
+ static void
+ factor_computed_gotos ()
+ {
+ basic_block bb;
+ tree factored_label_decl = NULL;
+ tree var = NULL;
+
+ /* We know there are one or more computed gotos in this function.
+ Examine the last statement in each basic block to see if the block
+ ends with a computed goto. */
+
+ FOR_EACH_BB (bb)
+ {
+ tree *last_p = last_stmt_ptr (bb);
+ tree last = *last_p;
+
+ /* Ignore the computed goto we create when we factor the original
+ computed gotos. */
+ if (last == factored_computed_goto)
+ continue;
+
+ /* If the last statement is a compted goto, factor it. */
+ if (is_computed_goto (last))
+ {
+ tree assignment;
+ block_stmt_iterator bsi = bsi_last (bb);
+
+ /* The first time we find a computed goto we need to create
+ the factored goto block and the variable each original
+ computed goto will use for their goto destination. */
+ if (! factored_computed_goto)
+ {
+ tree compound;
+ tree_stmt_iterator tsi = tsi_from_bsi (bsi);
+
+ /* Create the destination of the factored goto. Each original
+ computed goto will put its desired destination into this
+ variable and jump to the label we create immediately
+ below. */
+ var = create_tmp_var (ptr_type_node, "gotovar");
+
+ /* Build a label for the new block which will contain the
+ factored computed goto. */
+ factored_label_decl
+ = build_decl (LABEL_DECL, NULL_TREE, NULL_TREE);
+ DECL_CONTEXT (factored_label_decl) = current_function_decl;
+ factored_computed_goto_label
+ = build1 (LABEL_EXPR, void_type_node, factored_label_decl);
+ modify_stmt (factored_computed_goto_label);
+
+ /* Build our new computed goto. */
+ factored_computed_goto = build1 (GOTO_EXPR, void_type_node, var);
+ modify_stmt (factored_computed_goto);
+
+ /* Cram the new label and the computed goto into a container. */
+ compound = build (COMPOUND_EXPR, void_type_node,
+ factored_computed_goto_label,
+ factored_computed_goto);
+
+ /* Ugh. We want to pass the address of the container to
+ make_blocks call below. But we certainly don't want to
+ to pass along the address of a global. There's got to be
+ a better way to do this than to create a dummy container. */
+ compound = build (COMPOUND_EXPR, void_type_node, compound, NULL);
+
+ /* Put the new statements into a new basic block. This must
+ be done before we link them into the statement chain! */
+ make_blocks (&TREE_OPERAND (compound, 0), NULL, NULL, NULL, NULL);
+
+ /* Now it is safe to link in the new statements. */
+ tsi_link_chain_after (&tsi,
+ TREE_OPERAND (compound, 0),
+ TSI_CHAIN_START);
+ }
+
+ /* Copy the original computed goto's destination into VAR. */
+ assignment = build (MODIFY_EXPR, ptr_type_node,
+ var, GOTO_DESTINATION (last));
+ modify_stmt (assignment);
+
+ /* Insert that assignment just before the original computed
+ goto. */
+ set_bb_for_stmt (assignment, bb);
+ bsi_insert_before (&bsi, assignment, BSI_NEW_STMT);
+
+ /* And revector the computed goto to the new destination. */
+ GOTO_DESTINATION (last) = factored_label_decl;
+ }
+ }
+ }
/* Create annotations for all the basic blocks. */
static void create_blocks_annotations (void)
*************** make_blocks (tree *first_p, tree next_bl
*** 417,422 ****
--- 534,542 ----
append_stmt_to_bb (stmt_p, bb, parent_stmt);
get_stmt_ann (stmt)->scope = scope;
+ if (is_computed_goto (*stmt_p))
+ found_computed_goto = true;
+
if (code == COND_EXPR)
make_cond_expr_blocks (stmt_p, next_block_link, bb, scope);
else if (code == SWITCH_EXPR)
*************** remove_useless_stmts_and_vars_goto (tree
*** 1436,1441 ****
--- 1556,1569 ----
{
tree_stmt_iterator tsi = i;
+ if (factored_computed_goto_label
+ && (GOTO_DESTINATION (*stmt_p)
+ == LABEL_EXPR_LABEL (factored_computed_goto_label)))
+ {
+ GOTO_DESTINATION (*stmt_p) = GOTO_DESTINATION (factored_computed_goto);
+ return;
+ }
+
/* Step past the GOTO_EXPR statement. */
tsi_next (&tsi);
if (! tsi_end_p (tsi))
*************** remove_useless_stmts_and_vars (tree *fir
*** 1562,1567 ****
--- 1690,1698 ----
remove_useless_stmts_and_vars_1 (first_p, &data);
}
while (data.repeat);
+
+ factored_computed_goto = NULL;
+ factored_computed_goto_label = NULL;
}
/* Delete all unreachable basic blocks. Return true if any unreachable