Fix PHI handling in ipa-split
Jan Hubicka
hubicka@ucw.cz
Fri Jun 25 18:20:00 GMT 2010
Hi,
fixing the PHIs it turned out I got PHI handling completely wrong in visit_bb.
Using FOR_EACH_SSA_TREE_OPERAND on PHI is bad idea (that code got copied from
normal statement handling).
So this patch fixes this problem and allows splitting blocks with PHI in entry_bb
when either PHI is virtual, or all incomming edges from header have same values
(this is to allow split blocks starting with a loop).
tree-inline needs updating to handle PHIs in entry_bb. This is quite easy,
the edge needs to be created first and then regular PHI copying code sees
an edge that has no direct equivalent in original function body. Instead
it needs to look for edge from basic block that was not copied.
Alternative would be to split BB in ipa-split and avoid PHIs in entry BB,
but I think it is easier this way also for future other users of partial
clonning (that would be, for example, autopar)
Finally to make splitting effective on regions starting by loop, one needs
to be cureful about entry frequency. It is not entry_bb frequency, but rather
sum of frequencies of edges incomming to entry_bb from the header.
Doing similar analysis on reasons for not splitting we now get:
110 split part has non-ssa uses
833 need to pass non-param values
982 entry BB has PHI with multiple variants
5042 split size is smaller than call overhead
6813 incomming frequency is too large.
25300 header size is too large for inline candidate
There are about 800 splits, so teaching the code to pass non-param values still
has potential to triple count of splits, but we definitly got a lot better:
most of time we invalidate split because we do now want to split there.
Bootstrapped/regtested x86_64-linux, OK?
Honza
* gcc.dg/tree-ssa/ipa-split-2.c: New testcase.
* ipa-split.c (consider_split): PHI in entry block is OK as long as all
edges comming from header are equivalent.
(visit_bb): Handle PHIs correctly.
* tree-inline.c (copy_phis_for_bb): Be able to copy
PHI from entry edge.
(copy_cfg_body): Produce edge from entry BB before copying
PHIs.
Index: testsuite/gcc.dg/tree-ssa/ipa-split-2.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/ipa-split-2.c (revision 0)
--- testsuite/gcc.dg/tree-ssa/ipa-split-2.c (revision 0)
***************
*** 0 ****
--- 1,41 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -fdump-tree-fnsplit" } */
+ int b;
+ int c;
+ int d;
+ split_me(int a)
+ {
+ int t = 0;
+ if (d>4)
+ return;
+ do
+ {
+ long_function (t);
+ long_function (t);
+ long_function (t);
+ long_function (t);
+ long_function (t);
+ long_function (t);
+ make_me_irregular:
+ long_function (t);
+ long_function (t);
+ long_function (t);
+ long_function (t);
+ long_function (t);
+ t=b;
+ }
+ while (t);
+ if (c)
+ goto make_me_irregular;
+ }
+
+ main()
+ {
+ split_me (1);
+ split_me (2);
+ split_me (3);
+ split_me (4);
+ split_me (5);
+ }
+ /* { dg-final { scan-tree-dump-times "Splitting function" 1 "fnsplit"} } */
+ /* { dg-final { cleanup-tree-dump "fnsplit" } } */
Index: ipa-split.c
===================================================================
*** ipa-split.c (revision 161382)
--- ipa-split.c (working copy)
*************** consider_split (struct split_point *curr
*** 171,187 ****
unsigned int call_overhead;
edge e;
edge_iterator ei;
if (dump_file && (dump_flags & TDF_DETAILS))
dump_split_point (dump_file, current);
/* Do not split when we would end up calling function anyway. */
! if (current->entry_bb->frequency
>= (ENTRY_BLOCK_PTR->frequency
* PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
! " Refused: split BB frequency is too large.\n");
return;
}
--- 171,195 ----
unsigned int call_overhead;
edge e;
edge_iterator ei;
+ gimple_stmt_iterator bsi;
+ unsigned int i;
+ int incomming_freq = 0;
+
if (dump_file && (dump_flags & TDF_DETAILS))
dump_split_point (dump_file, current);
+ FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
+ if (!bitmap_bit_p (current->split_bbs, e->src->index))
+ incomming_freq += EDGE_FREQUENCY (e);
+
/* Do not split when we would end up calling function anyway. */
! if (incomming_freq
>= (ENTRY_BLOCK_PTR->frequency
* PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
! " Refused: incomming frequency is too large.\n");
return;
}
*************** consider_split (struct split_point *curr
*** 193,206 ****
return;
}
! /* FIXME: We can do better: if the split region start with a loop and there
! is only one entry point from outer wrold, we can update PHI. */
! if (!gsi_end_p (gsi_start_phis (current->entry_bb)))
{
! if (dump_file && (dump_flags & TDF_DETAILS))
! fprintf (dump_file,
! " Refused: entry BB has PHI\n");
! return;
}
--- 201,231 ----
return;
}
! /* Verify that PHI args on entry are either virutal or all their operands
! incomming from header are the same. */
! for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi))
{
! gimple stmt = gsi_stmt (bsi);
! tree val = NULL;
!
! if (!is_gimple_reg (gimple_phi_result (stmt)))
! continue;
! for (i = 0; i < gimple_phi_num_args (stmt); i++)
! {
! edge e = gimple_phi_arg_edge (stmt, i);
! if (!bitmap_bit_p (current->split_bbs, e->src->index))
! {
! tree edge_val = gimple_phi_arg_def (stmt, i);
! if (val && edge_val != val)
! {
! if (dump_file && (dump_flags & TDF_DETAILS))
! fprintf (dump_file,
! " Refused: entry BB has PHI with multiple variants\n");
! return;
! }
! val = edge_val;
! }
! }
}
*************** consider_split (struct split_point *curr
*** 289,296 ****
}
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
{
- if (is_gimple_debug (gsi_stmt (bsi)))
- continue;
if (walk_stmt_load_store_addr_ops
(gsi_stmt (bsi), non_ssa_vars, test_nonssa_use,
test_nonssa_use, test_nonssa_use))
--- 315,320 ----
*************** visit_bb (basic_block bb, basic_block re
*** 510,526 ****
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
{
gimple stmt = gsi_stmt (bsi);
! tree op;
! ssa_op_iter iter;
if (is_gimple_debug (stmt))
continue;
if (!is_gimple_reg (gimple_phi_result (stmt)))
continue;
! FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
! bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
! FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
! bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
mark_nonssa_use,
mark_nonssa_use,
--- 534,553 ----
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
{
gimple stmt = gsi_stmt (bsi);
! unsigned int i;
if (is_gimple_debug (stmt))
continue;
if (!is_gimple_reg (gimple_phi_result (stmt)))
continue;
! bitmap_set_bit (set_ssa_names,
! SSA_NAME_VERSION (gimple_phi_result (stmt)));
! for (i = 0; i < gimple_phi_num_args (stmt); i++)
! {
! tree op = gimple_phi_arg_def (stmt, i);
! if (TREE_CODE (op) == SSA_NAME)
! bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
! }
can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
mark_nonssa_use,
mark_nonssa_use,
Index: tree-inline.c
===================================================================
*** tree-inline.c (revision 161381)
--- tree-inline.c (working copy)
*************** copy_phis_for_bb (basic_block bb, copy_b
*** 1969,1979 ****
= new_phi = create_phi_node (new_res, new_bb);
FOR_EACH_EDGE (new_edge, ei, new_bb->preds)
{
! edge const old_edge
! = find_edge ((basic_block) new_edge->src->aux, bb);
! tree arg = PHI_ARG_DEF_FROM_EDGE (phi, old_edge);
! tree new_arg = arg;
tree block = id->block;
id->block = NULL_TREE;
walk_tree (&new_arg, copy_tree_body_r, id, NULL);
id->block = block;
--- 1969,1990 ----
= new_phi = create_phi_node (new_res, new_bb);
FOR_EACH_EDGE (new_edge, ei, new_bb->preds)
{
! edge old_edge = find_edge ((basic_block) new_edge->src->aux, bb);
! tree arg;
! tree new_arg;
tree block = id->block;
+ edge_iterator ei2;
+
+ /* When doing partial clonning, we allow PHIs on the entry block
+ as long as all the arguments are the same. Find any input
+ edge to see argument to copy. */
+ if (!old_edge)
+ FOR_EACH_EDGE (old_edge, ei2, bb->preds)
+ if (!old_edge->src->aux)
+ break;
+
+ arg = PHI_ARG_DEF_FROM_EDGE (phi, old_edge);
+ new_arg = arg;
id->block = NULL_TREE;
walk_tree (&new_arg, copy_tree_body_r, id, NULL);
id->block = block;
*************** copy_cfg_body (copy_body_data * id, gcov
*** 2191,2202 ****
|| (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index)))
need_debug_cleanup |= copy_edges_for_bb (bb, count_scale, exit_block_map);
- if (gimple_in_ssa_p (cfun))
- FOR_ALL_BB_FN (bb, cfun_to_copy)
- if (!blocks_to_copy
- || (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index)))
- copy_phis_for_bb (bb, id);
-
if (new_entry)
{
edge e;
--- 2202,2207 ----
*************** copy_cfg_body (copy_body_data * id, gcov
*** 2205,2210 ****
--- 2210,2221 ----
e->count = entry_block_map->count;
}
+ if (gimple_in_ssa_p (cfun))
+ FOR_ALL_BB_FN (bb, cfun_to_copy)
+ if (!blocks_to_copy
+ || (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index)))
+ copy_phis_for_bb (bb, id);
+
FOR_ALL_BB_FN (bb, cfun_to_copy)
if (bb->aux)
{
More information about the Gcc-patches
mailing list