This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH 3/4] Add flag -ftree-loop-if-convert-memory-writes.
- From: Sebastian Pop <sebpop at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: rguenther at suse dot de, Sebastian Pop <sebpop at gmail dot com>
- Date: Wed, 7 Jul 2010 17:54:00 -0500
- Subject: [PATCH 3/4] Add flag -ftree-loop-if-convert-memory-writes.
- References: <1278534137-22733-4-git-send-email-sebpop@gmail.com>
This patch adds a flag that controls the replacement of the memory
writes that are in predicated basic blocks with a full write:
for (...)
if (cond)
A[i] = foo
is replaced with:
for (...)
A[i] = cond ? foo : A[i]
In order to do this, we have to call gimple_could_trap_p instead of
gimple_assign_rhs_could_trap_p, as we have to also check that the LHS
of assign stmts does not trap.
* common.opt (ftree-loop-if-convert-memory-writes): New flag.
* doc/invoke.texi (ftree-loop-if-convert-memory-writes): Documented.
* tree-if-conv.c (if_convertible_phi_p): Allow virtual phi nodes when
flag_loop_if_convert_memory_writes is set.
(if_convertible_gimple_assign_stmt_p): Allow memory reads and writes
Do not handle types that do not match is_gimple_reg_type.
Remove loop and bb parameters. Call gimple_could_trap_p instead of
when flag_loop_if_convert_memory_writes is set, as LHS can contain
memory refs.
(if_convertible_stmt_p): Remove loop and bb parameters. Update calls
to if_convertible_gimple_assign_stmt_p.
(if_convertible_loop_p): Update call to if_convertible_stmt_p.
(gimplify_condition): New.
(replace_phi_with_cond_gimple_assign_stmt): Renamed
predicate_scalar_phi. Do not handle virtual phi nodes.
(ifconvert_phi_nodes): Renamed predicate_all_scalar_phis.
Call predicate_scalar_phi.
(insert_gimplified_predicates): Insert the gimplified predicate of a BB
just after the labels for flag_loop_if_convert_memory_writes, otherwise
insert the predicate in the end of the BB.
(predicate_mem_writes): New.
(combine_blocks): Call predicate_all_scalar_phis. When
flag_loop_if_convert_memory_writes is set, call predicate_mem_writes.
(tree_if_conversion): Call mark_sym_for_renaming when
flag_loop_if_convert_memory_writes is set.
(main_tree_if_conversion): Call update_ssa when
flag_loop_if_convert_memory_writes is set.
* gcc.dg/tree-ssa/ifc-4.c: New.
* gcc.dg/tree-ssa/ifc-7.c: New.
---
gcc/common.opt | 4 +
gcc/doc/invoke.texi | 20 ++++-
gcc/testsuite/gcc.dg/tree-ssa/ifc-4.c | 53 ++++++++++
gcc/testsuite/gcc.dg/tree-ssa/ifc-7.c | 29 +++++
gcc/tree-if-conv.c | 183 +++++++++++++++++++++++++-------
5 files changed, 248 insertions(+), 41 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-4.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-7.c
diff --git a/gcc/common.opt b/gcc/common.opt
index 111d7b7..ceb94f8 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -657,6 +657,10 @@ ftree-loop-if-convert
Common Report Var(flag_tree_loop_if_convert) Init(-1) Optimization
Convert conditional jumps in innermost loops to branchless equivalents
+ftree-loop-if-convert-memory-writes
+Common Report Var(flag_tree_loop_if_convert_memory_writes) Optimization
+Also if-convert conditional jumps containing memory writes
+
; -finhibit-size-directive inhibits output of .size for ELF.
; This is used only for compiling crtstuff.c,
; and it may be extended to other effects
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0847e01..eb04fd5 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -383,7 +383,8 @@ Objective-C and Objective-C++ Dialects}.
-fstrict-aliasing -fstrict-overflow -fthread-jumps -ftracer @gol
-ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copy-prop @gol
-ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse @gol
--ftree-forwprop -ftree-fre -ftree-loop-if-convert -ftree-loop-im @gol
+-ftree-forwprop -ftree-fre -ftree-loop-if-convert @gol
+-ftree-loop-if-convert-memory-writes -ftree-loop-im @gol
-ftree-phiprop -ftree-loop-distribution @gol
-ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol
@@ -6889,6 +6890,23 @@ branch-less equivalents. The intent is to remove control-flow from
the innermost loops in order to improve the ability of the
auto-vectorization pass to handle these loops.
+@item -ftree-loop-if-convert-memory-writes
+Attempt to also if-convert conditional jumps containing memory writes.
+This transformation can be unsafe for multi-threaded programs as it
+transforms conditional memory writes into unconditional memory writes.
+For example,
+@smallexample
+for (i = 0; i < N; i++)
+ if (cond)
+ A[i] = expr;
+@end smallexample
+would be transformed to
+@smallexample
+for (i = 0; i < N; i++)
+ A[i] = cond ? expr : A[i];
+@end smallexample
+potentially producing data races.
+
@item -ftree-loop-distribution
Perform loop distribution. This flag can improve cache performance on
big loop bodies and allow further loop optimizations, like
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-4.c
new file mode 100644
index 0000000..beb1a0e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-4.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */
+
+struct ht
+{
+ void * (*alloc_subobject) (int);
+};
+typedef struct cpp_reader cpp_reader;
+typedef struct cpp_token cpp_token;
+typedef struct cpp_macro cpp_macro;
+enum cpp_ttype
+{
+ CPP_PASTE,
+};
+struct cpp_token {
+ __extension__ enum cpp_ttype type : 8;
+} cpp_comment_table;
+struct cpp_macro {
+ union cpp_macro_u
+ {
+ cpp_token * tokens;
+ } exp;
+ unsigned int count;
+};
+struct cpp_reader
+{
+ struct ht *hash_table;
+};
+create_iso_definition (cpp_reader *pfile, cpp_macro *macro)
+{
+ unsigned int num_extra_tokens = 0;
+ {
+ cpp_token *tokns =
+ (cpp_token *) pfile->hash_table->alloc_subobject (sizeof (cpp_token)
+ * macro->count);
+ {
+ cpp_token *normal_dest = tokns;
+ cpp_token *extra_dest = tokns + macro->count - num_extra_tokens;
+ unsigned int i;
+ for (i = 0; i < macro->count; i++)
+ {
+ if (macro->exp.tokens[i].type == CPP_PASTE)
+ *extra_dest++ = macro->exp.tokens[i];
+ else
+ *normal_dest++ = macro->exp.tokens[i];
+ }
+ }
+ }
+}
+
+/* This cannot be if-converted because the stores are to aggregate types. */
+/* { dg-final { scan-tree-dump-times "Applying if-conversion" 0 "ifcvt" } } */
+/* { dg-final { cleanup-tree-dump "ifcvt" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-7.c
new file mode 100644
index 0000000..4d26dc7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-7.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-c -O2 -ftree-vectorize" { target *-*-* } } */
+
+typedef struct eqn_d
+{
+ int *coef;
+} *eqn;
+typedef struct omega_pb_d
+{
+ eqn subs;
+} *omega_pb;
+
+omega_pb omega_solve_problem (omega_pb);
+
+omega_pb
+omega_solve_geq (omega_pb pb, int n)
+{
+ int i, e;
+ int j = 0;
+
+ for (e = n - 1; e >= 0; e--)
+ if (pb->subs[e].coef[i] != pb->subs[e].coef[j])
+ {
+ pb->subs[e].coef[i] = j;
+ pb->subs[e].coef[j] = i;
+ }
+
+ return omega_solve_problem (pb);
+}
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index d5b0f7e..673b129 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -387,9 +387,12 @@ bb_with_exit_edge_p (struct loop *loop, basic_block bb)
and it belongs to basic block BB.
PHI is not if-convertible if:
- - it has more than 2 arguments,
- - virtual PHI is immediately used in another PHI node,
- - virtual PHI on BB other than header. */
+ - it has more than 2 arguments.
+
+ When the flag_tree_loop_if_convert_memory_writes is not set, PHI is not
+ if-convertible if:
+ - a virtual PHI is immediately used in another PHI node,
+ - there is a virtual PHI in a BB other than the loop->header. */
static bool
if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
@@ -407,6 +410,12 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
return false;
}
+ if (flag_tree_loop_if_convert_memory_writes)
+ return true;
+
+ /* When the flag_tree_loop_if_convert_memory_writes is not set, check
+ that there are no memory writes in the branches of the loop to be
+ if-converted. */
if (!is_gimple_reg (SSA_NAME_VAR (gimple_phi_result (phi))))
{
imm_use_iterator imm_iter;
@@ -415,9 +424,10 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
if (bb != loop->header)
{
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Virtual phi not on loop header.\n");
+ fprintf (dump_file, "Virtual phi not on loop->header.\n");
return false;
}
+
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
{
if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
@@ -437,15 +447,13 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
GIMPLE_ASSIGN statement is not if-convertible if,
- it is not movable,
- it could trap,
- - LHS is not var decl.
-
- GIMPLE_ASSIGN is part of block BB, which is inside loop LOOP. */
+ - LHS is not var decl. */
static bool
-if_convertible_gimple_assign_stmt_p (struct loop *loop, basic_block bb,
- gimple stmt)
+if_convertible_gimple_assign_stmt_p (gimple stmt)
{
tree lhs = gimple_assign_lhs (stmt);
+ basic_block bb;
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -453,6 +461,9 @@ if_convertible_gimple_assign_stmt_p (struct loop *loop, basic_block bb,
print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
}
+ if (!is_gimple_reg_type (TREE_TYPE (lhs)))
+ return false;
+
/* Some of these constrains might be too conservative. */
if (stmt_ends_bb_p (stmt)
|| gimple_has_volatile_ops (stmt)
@@ -465,6 +476,17 @@ if_convertible_gimple_assign_stmt_p (struct loop *loop, basic_block bb,
return false;
}
+ if (flag_tree_loop_if_convert_memory_writes)
+ {
+ if (gimple_could_trap_p (stmt))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "tree could trap...\n");
+ return false;
+ }
+ return true;
+ }
+
if (gimple_assign_rhs_could_trap_p (stmt))
{
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -472,9 +494,11 @@ if_convertible_gimple_assign_stmt_p (struct loop *loop, basic_block bb,
return false;
}
+ bb = gimple_bb (stmt);
+
if (TREE_CODE (lhs) != SSA_NAME
- && bb != loop->header
- && !bb_with_exit_edge_p (loop, bb))
+ && bb != bb->loop_father->header
+ && !bb_with_exit_edge_p (bb->loop_father, bb))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -491,12 +515,10 @@ if_convertible_gimple_assign_stmt_p (struct loop *loop, basic_block bb,
A statement is if-convertible if:
- it is an if-convertible GIMPLE_ASSGIN,
- - it is a GIMPLE_LABEL or a GIMPLE_COND.
-
- STMT is inside BB, which is inside loop LOOP. */
+ - it is a GIMPLE_LABEL or a GIMPLE_COND. */
static bool
-if_convertible_stmt_p (struct loop *loop, basic_block bb, gimple stmt)
+if_convertible_stmt_p (gimple stmt)
{
switch (gimple_code (stmt))
{
@@ -506,7 +528,7 @@ if_convertible_stmt_p (struct loop *loop, basic_block bb, gimple stmt)
return true;
case GIMPLE_ASSIGN:
- return if_convertible_gimple_assign_stmt_p (loop, bb, stmt);
+ return if_convertible_gimple_assign_stmt_p (stmt);
default:
/* Don't know what to do with 'em so don't do anything. */
@@ -877,7 +899,7 @@ if_convertible_loop_p (struct loop *loop)
continue;
for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
- if (!if_convertible_stmt_p (loop, bb, gsi_stmt (itr)))
+ if (!if_convertible_stmt_p (gsi_stmt (itr)))
return false;
}
@@ -897,7 +919,7 @@ if_convertible_loop_p (struct loop *loop)
static basic_block
find_phi_replacement_condition (struct loop *loop,
basic_block bb, tree *cond,
- gimple_stmt_iterator *gsi)
+ gimple_stmt_iterator *gsi)
{
edge first_edge, second_edge;
tree tmp_cond;
@@ -982,8 +1004,9 @@ find_phi_replacement_condition (struct loop *loop,
return first_edge->src;
}
-/* Replace PHI node with conditional modify expr using COND. This
- routine does not handle PHI nodes with more than two arguments.
+/* Replace a scalar PHI node with a COND_EXPR using COND as condition.
+ This routine does not handle PHI nodes with more than two
+ arguments.
For example,
S1: A = PHI <x1(1), x2(5)
@@ -995,18 +1018,22 @@ find_phi_replacement_condition (struct loop *loop,
TRUE_BB is selected. */
static void
-replace_phi_with_cond_gimple_assign_stmt (gimple phi, tree cond,
- basic_block true_bb,
- gimple_stmt_iterator *gsi)
+predicate_scalar_phi (gimple phi, tree cond,
+ basic_block true_bb,
+ gimple_stmt_iterator *gsi)
{
gimple new_stmt;
basic_block bb;
- tree rhs;
- tree arg;
+ tree rhs, res, arg;
gcc_assert (gimple_code (phi) == GIMPLE_PHI
&& gimple_phi_num_args (phi) == 2);
+ res = gimple_phi_result (phi);
+ /* Do not handle virtual phi nodes. */
+ if (!is_gimple_reg (SSA_NAME_VAR (res)))
+ return;
+
bb = gimple_bb (phi);
arg = degenerate_phi_result (phi);
@@ -1028,11 +1055,11 @@ replace_phi_with_cond_gimple_assign_stmt (gimple phi, tree cond,
}
/* Build new RHS using selected condition and arguments. */
- rhs = build3 (COND_EXPR, TREE_TYPE (PHI_RESULT (phi)),
+ rhs = build3 (COND_EXPR, TREE_TYPE (res),
unshare_expr (cond), arg_0, arg_1);
}
- new_stmt = gimple_build_assign (PHI_RESULT (phi), rhs);
+ new_stmt = gimple_build_assign (res, rhs);
SSA_NAME_DEF_STMT (gimple_phi_result (phi)) = new_stmt;
gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
update_stmt (new_stmt);
@@ -1044,11 +1071,11 @@ replace_phi_with_cond_gimple_assign_stmt (gimple phi, tree cond,
}
}
-/* Replaces in LOOP all the phi nodes other than those in the
+/* Replaces in LOOP all the scalar phi nodes other than those in the
LOOP->header block with conditional modify expressions. */
static void
-ifconvert_phi_nodes (struct loop *loop)
+predicate_all_scalar_phis (struct loop *loop)
{
basic_block bb;
unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -1077,7 +1104,7 @@ ifconvert_phi_nodes (struct loop *loop)
while (!gsi_end_p (phi_gsi))
{
phi = gsi_stmt (phi_gsi);
- replace_phi_with_cond_gimple_assign_stmt (phi, cond, true_bb, &gsi);
+ predicate_scalar_phi (phi, cond, true_bb, &gsi);
release_phi_node (phi);
gsi_next (&phi_gsi);
}
@@ -1097,7 +1124,7 @@ insert_gimplified_predicates (loop_p loop)
for (i = 0; i < loop->num_nodes; i++)
{
basic_block bb = ifc_bbs[i];
- gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
+ gimple_seq stmts;
if (!is_predicated (bb))
{
@@ -1108,15 +1135,30 @@ insert_gimplified_predicates (loop_p loop)
continue;
}
+ stmts = bb_predicate_gimplified_stmts (bb);
if (stmts)
{
- gimple_stmt_iterator gsi = gsi_last_bb (bb);
-
- if (gsi_end_p (gsi)
- || gimple_code (gsi_stmt (gsi)) == GIMPLE_COND)
- gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+ if (flag_tree_loop_if_convert_memory_writes)
+ {
+ /* Insert the predicate of the BB just after the label,
+ as the if-conversion of memory writes will use this
+ predicate. */
+ gimple_stmt_iterator gsi = gsi_after_labels (bb);
+ gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+ }
else
- gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
+ {
+ /* Insert the predicate of the BB at the end of the BB
+ as this would reduce the register pressure: the only
+ use of this predicate will be in successor BBs. */
+ gimple_stmt_iterator gsi = gsi_last_bb (bb);
+
+ if (gsi_end_p (gsi)
+ || gimple_code (gsi_stmt (gsi)) == GIMPLE_COND)
+ gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+ else
+ gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
+ }
/* Once the sequence is code generated, set it to NULL. */
set_bb_predicate_gimplified_stmts (bb, NULL);
@@ -1124,6 +1166,56 @@ insert_gimplified_predicates (loop_p loop)
}
}
+/* Predicate each write to memory in LOOP.
+
+ Replace a statement "A[i] = foo" with "A[i] = cond ? foo : A[i]"
+ with the condition COND determined from the predicate of the basic
+ block containing the statement. */
+
+static void
+predicate_mem_writes (loop_p loop)
+{
+ unsigned int i, orig_loop_num_nodes = loop->num_nodes;
+
+ for (i = 1; i < orig_loop_num_nodes; i++)
+ {
+ gimple_stmt_iterator gsi;
+ basic_block bb = ifc_bbs[i];
+ tree cond = bb_predicate (bb);
+
+ if (is_true_predicate (cond))
+ continue;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ if (is_gimple_assign (gsi_stmt (gsi))
+ && gimple_vdef (gsi_stmt (gsi)))
+ {
+ gimple new_stmt, lhs_stmt, rhs_stmt;
+ gimple stmt = gsi_stmt (gsi);
+ tree lhs = gimple_get_lhs (stmt);
+ tree rhs = gimple_op (stmt, 1);
+
+ gcc_assert (gimple_num_ops (stmt) == 2);
+
+ rhs_stmt = ifc_temp_var (TREE_TYPE (rhs), unshare_expr (rhs));
+ gsi_insert_before (&gsi, rhs_stmt, GSI_SAME_STMT);
+ rhs = gimple_get_lhs (rhs_stmt);
+
+ lhs_stmt = ifc_temp_var (TREE_TYPE (lhs), unshare_expr (lhs));
+ gsi_insert_before (&gsi, lhs_stmt, GSI_SAME_STMT);
+ lhs = gimple_get_lhs (lhs_stmt);
+
+ cond = unshare_expr (cond);
+ rhs = build3 (COND_EXPR, TREE_TYPE (lhs), cond, rhs, lhs);
+
+ new_stmt = ifc_temp_var (TREE_TYPE (lhs), rhs);
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+ gimple_set_op (stmt, 1, gimple_get_lhs (new_stmt));
+ update_stmt (stmt);
+ }
+ }
+}
+
/* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
other than the exit and latch of the LOOP. Also resets the
GIMPLE_DEBUG information. */
@@ -1180,7 +1272,10 @@ combine_blocks (struct loop *loop)
remove_conditions_and_labels (loop);
insert_gimplified_predicates (loop);
- ifconvert_phi_nodes (loop);
+ predicate_all_scalar_phis (loop);
+
+ if (flag_tree_loop_if_convert_memory_writes)
+ predicate_mem_writes (loop);
/* Merge basic blocks: first remove all the edges in the loop,
except for those from the exit block. */
@@ -1282,6 +1377,10 @@ tree_if_conversion (struct loop *loop)
blocks into one huge basic block doing the if-conversion
on-the-fly. */
combine_blocks (loop);
+
+ if (flag_tree_loop_if_convert_memory_writes)
+ mark_sym_for_renaming (gimple_vop (cfun));
+
changed = true;
cleanup:
@@ -1314,14 +1413,18 @@ main_tree_if_conversion (void)
FOR_EACH_LOOP (li, loop, 0)
changed |= tree_if_conversion (loop);
+ if (changed && flag_tree_loop_if_convert_memory_writes)
+ update_ssa (TODO_update_ssa);
+
return changed ? TODO_cleanup_cfg : 0;
}
static bool
gate_tree_if_conversion (void)
{
- if (flag_tree_vectorize
- && flag_tree_loop_if_convert < 0)
+ if ((flag_tree_vectorize
+ && flag_tree_loop_if_convert < 0)
+ || flag_tree_loop_if_convert_memory_writes > 0)
flag_tree_loop_if_convert = 1;
return flag_tree_loop_if_convert > 0;
--
1.7.0.4