This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[patch] Fix PR26435 loop distribution in lambda-code
- From: Sebastian Pop <sebastian dot pop at cri dot ensmp dot fr>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Mon, 15 May 2006 17:55:24 +0200
- Subject: [patch] Fix PR26435 loop distribution in lambda-code
Hi,
Here is a patch that makes the loop distribution code more careful
about the scalar dependences: for a loop nest with two loops, it now
avoids to move scalars that are updated in both loops. This is the
easy solution. A more complete solution is to include a real pass for
loop distribution: Georges-Andre Silber has worked on that pass and he
will propose a patch to be included in 4.3. Until then, here is a
patch that can also be ported to the 4.1 and 4.0 branches.
Bootstrapped and tested on amd64-linux. Okay for trunk?
Sebastian
2006-05-15 Sebastian Pop <pop@cri.ensmp.fr>
* tree-loop-linear.c (linear_transform_loops): Don't test perfect_nest_p.
Call rewrite_into_loop_closed_ssa only when something changed.
* lambda.h (gcc_loopnest_to_lambda_loopnest): Update declaration.
* lambda-code.c (can_convert_to_perfect_nest): Declared.
(gcc_loopnest_to_lambda_loopnest): Removed need_perfect_nest parameter.
Test for perfect_nest_p here. Fix formating.
(replace_uses_equiv_to_x_with_y): Fix formating.
(stmt_uses_op): Removed.
(can_convert_to_perfect_nest): Removed loopivs parameter.
Complete the test by checking the scalar dependences.
(perfect_nestify): Remove the test for can_convert_to_perfect_nest.
Fix formating.
Index: tree-loop-linear.c
===================================================================
*** tree-loop-linear.c (revision 113724)
--- tree-loop-linear.c (working copy)
*************** try_interchange_loops (lambda_trans_matr
*** 238,243 ****
--- 238,244 ----
void
linear_transform_loops (struct loops *loops)
{
+ bool modified = false;
unsigned int i;
VEC(tree,heap) *oldivs = NULL;
VEC(tree,heap) *invariants = NULL;
*************** linear_transform_loops (struct loops *lo
*** 252,258 ****
lambda_loopnest before, after;
lambda_trans_matrix trans;
bool problem = false;
- bool need_perfect_nest = false;
/* If it's not a loop nest, we don't want it.
We also don't handle sibling loops properly,
which are loops of the following form:
--- 253,258 ----
*************** linear_transform_loops (struct loops *lo
*** 316,328 ****
continue;
}
! if (!perfect_nest_p (loop_nest))
! need_perfect_nest = true;
- before = gcc_loopnest_to_lambda_loopnest (loops,
- loop_nest, &oldivs,
- &invariants,
- need_perfect_nest);
if (!before)
continue;
--- 316,324 ----
continue;
}
! before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs,
! &invariants);
if (!before)
continue;
*************** linear_transform_loops (struct loops *lo
*** 342,347 ****
--- 338,344 ----
lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
after, trans);
+ modified = true;
if (dump_file)
fprintf (dump_file, "Successfully transformed loop.\n");
*************** linear_transform_loops (struct loops *lo
*** 353,357 ****
VEC_free (tree, heap, oldivs);
VEC_free (tree, heap, invariants);
scev_reset ();
! rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
}
--- 350,356 ----
VEC_free (tree, heap, oldivs);
VEC_free (tree, heap, invariants);
scev_reset ();
!
! if (modified)
! rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
}
Index: testsuite/gcc.dg/tree-ssa/ltrans-1.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/ltrans-1.c (revision 113724)
--- testsuite/gcc.dg/tree-ssa/ltrans-1.c (working copy)
*************** int foo(int N, int *res)
*** 18,24 ****
}
*res = sum + N;
}
! /* { dg-final { scan-tree-dump-times "converted loop nest to perfect
! loop nest" 1 "ltrans"} } */
/* { dg-final { scan-tree-dump-times "transformed loop" 1 "ltrans"} } */
/* { dg-final { cleanup-tree-dump "ltrans" } } */
--- 18,23 ----
}
*res = sum + N;
}
! /* { dg-final { scan-tree-dump-times "converted loop nest to perfect loop nest" 1 "ltrans"} } */
/* { dg-final { scan-tree-dump-times "transformed loop" 1 "ltrans"} } */
/* { dg-final { cleanup-tree-dump "ltrans" } } */
Index: testsuite/gcc.dg/tree-ssa/pr26435.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/pr26435.c (revision 0)
--- testsuite/gcc.dg/tree-ssa/pr26435.c (revision 0)
***************
*** 0 ****
--- 1,20 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -ftree-loop-linear -fdump-tree-ltrans-all" } */
+ /* { dg-require-effective-target size32plus } */
+
+ int foo(int *p, int n)
+ {
+ int i, j, k = 0;
+
+ /* This is a reduction: there is a scalar dependence that cannot be
+ removed by rewriting IVs. This code cannot and should not be
+ transformed into a perfect loop. */
+ for (i = 0; i < 2; ++i, p += n)
+ for (j = 0; j < 2; ++j)
+ k += p[j];
+
+ return k;
+ }
+
+ /* { dg-final { scan-tree-dump-times "converted loop nest to perfect loop nest" 0 "ltrans"} } */
+ /* { dg-final { cleanup-tree-dump "ltrans" } } */
Index: lambda.h
===================================================================
*** lambda.h (revision 113724)
--- lambda.h (working copy)
*************** void print_lambda_body_vector (FILE *, l
*** 199,206 ****
lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loops *,
struct loop *,
VEC(tree,heap) **,
! VEC(tree,heap) **,
! bool);
void lambda_loopnest_to_gcc_loopnest (struct loop *,
VEC(tree,heap) *, VEC(tree,heap) *,
lambda_loopnest, lambda_trans_matrix);
--- 199,205 ----
lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loops *,
struct loop *,
VEC(tree,heap) **,
! VEC(tree,heap) **);
void lambda_loopnest_to_gcc_loopnest (struct loop *,
VEC(tree,heap) *, VEC(tree,heap) *,
lambda_loopnest, lambda_trans_matrix);
Index: lambda-code.c
===================================================================
*** lambda-code.c (revision 113724)
--- lambda-code.c (working copy)
*************** static lambda_lattice lambda_lattice_new
*** 147,152 ****
--- 147,153 ----
static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
static tree find_induction_var_from_exit_cond (struct loop *);
+ static bool can_convert_to_perfect_nest (struct loop *);
/* Create a new lambda body vector. */
*************** DEF_VEC_ALLOC_P(lambda_loop,heap);
*** 1457,1470 ****
lambda_loopnest
gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
! struct loop * loop_nest,
VEC(tree,heap) **inductionvars,
! VEC(tree,heap) **invariants,
! bool need_perfect_nest)
{
lambda_loopnest ret = NULL;
! struct loop *temp;
! int depth = 0;
size_t i;
VEC(lambda_loop,heap) *loops = NULL;
VEC(tree,heap) *uboundvars = NULL;
--- 1458,1470 ----
lambda_loopnest
gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
! struct loop *loop_nest,
VEC(tree,heap) **inductionvars,
! VEC(tree,heap) **invariants)
{
lambda_loopnest ret = NULL;
! struct loop *temp = loop_nest;
! int depth = depth_of_nest (loop_nest);
size_t i;
VEC(lambda_loop,heap) *loops = NULL;
VEC(tree,heap) *uboundvars = NULL;
*************** gcc_loopnest_to_lambda_loopnest (struct
*** 1472,1480 ****
VEC(int,heap) *steps = NULL;
lambda_loop newloop;
tree inductionvar = NULL;
!
! depth = depth_of_nest (loop_nest);
! temp = loop_nest;
while (temp)
{
newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
--- 1472,1482 ----
VEC(int,heap) *steps = NULL;
lambda_loop newloop;
tree inductionvar = NULL;
! bool perfect_nest = perfect_nest_p (loop_nest);
!
! if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
! goto fail;
!
while (temp)
{
newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
*************** gcc_loopnest_to_lambda_loopnest (struct
*** 1482,1493 ****
&lboundvars, &uboundvars,
&steps);
if (!newloop)
! return NULL;
VEC_safe_push (tree, heap, *inductionvars, inductionvar);
VEC_safe_push (lambda_loop, heap, loops, newloop);
temp = temp->inner;
}
! if (need_perfect_nest)
{
if (!perfect_nestify (currloops, loop_nest,
lboundvars, uboundvars, steps, *inductionvars))
--- 1484,1497 ----
&lboundvars, &uboundvars,
&steps);
if (!newloop)
! goto fail;
!
VEC_safe_push (tree, heap, *inductionvars, inductionvar);
VEC_safe_push (lambda_loop, heap, loops, newloop);
temp = temp->inner;
}
!
! if (!perfect_nest)
{
if (!perfect_nestify (currloops, loop_nest,
lboundvars, uboundvars, steps, *inductionvars))
*************** gcc_loopnest_to_lambda_loopnest (struct
*** 1501,1509 ****
--- 1505,1516 ----
fprintf (dump_file,
"Successfully converted loop nest to perfect loop nest.\n");
}
+
ret = lambda_loopnest_new (depth, 2 * depth);
+
for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
LN_LOOPS (ret)[i] = newloop;
+
fail:
VEC_free (lambda_loop, heap, loops);
VEC_free (tree, heap, uboundvars);
*************** replace_uses_equiv_to_x_with_y (struct l
*** 2110,2122 ****
{
tree use = USE_FROM_PTR (use_p);
tree step = NULL_TREE;
! tree access_fn = NULL_TREE;
!
!
! access_fn = instantiate_parameters
! (loop, analyze_scalar_evolution (loop, use));
! if (access_fn != NULL_TREE && access_fn != chrec_dont_know)
! step = evolution_part_in_loop_num (access_fn, loop->num);
if ((step && step != chrec_dont_know
&& TREE_CODE (step) == INTEGER_CST
&& int_cst_value (step) == xstep)
--- 2117,2128 ----
{
tree use = USE_FROM_PTR (use_p);
tree step = NULL_TREE;
! tree scev = instantiate_parameters (loop,
! analyze_scalar_evolution (loop, use));
!
! if (scev != NULL_TREE && scev != chrec_dont_know)
! step = evolution_part_in_loop_num (scev, loop->num);
!
if ((step && step != chrec_dont_know
&& TREE_CODE (step) == INTEGER_CST
&& int_cst_value (step) == xstep)
*************** replace_uses_equiv_to_x_with_y (struct l
*** 2125,2146 ****
}
}
- /* Return TRUE if STMT uses tree OP in it's uses. */
-
- static bool
- stmt_uses_op (tree stmt, tree op)
- {
- ssa_op_iter iter;
- tree use;
-
- FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
- {
- if (use == op)
- return true;
- }
- return false;
- }
-
/* Return true if STMT is an exit PHI for LOOP */
static bool
--- 2131,2136 ----
*************** can_put_after_inner_loop (struct loop *l
*** 2210,2223 ****
! /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
! one. LOOPIVS is a vector of induction variables, one per loop.
! ATM, we only handle imperfect nests of depth 2, where all of the statements
! occur after the inner loop. */
static bool
! can_convert_to_perfect_nest (struct loop *loop,
! VEC(tree,heap) *loopivs)
{
basic_block *bbs;
tree exit_condition, phi;
--- 2200,2211 ----
! /* Return TRUE if LOOP is an imperfect nest that we can convert to a
! perfect one. At the moment, we only handle imperfect nests of
! depth 2, where all of the statements occur after the inner loop. */
static bool
! can_convert_to_perfect_nest (struct loop *loop)
{
basic_block *bbs;
tree exit_condition, phi;
*************** can_convert_to_perfect_nest (struct loop
*** 2237,2255 ****
{
for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
{
- size_t j;
tree stmt = bsi_stmt (bsi);
! tree iv;
!
if (stmt == exit_condition
|| not_interesting_stmt (stmt)
|| stmt_is_bumper_for_loop (loop, stmt))
continue;
! /* If the statement uses inner loop ivs, we == screwed. */
! for (j = 1; VEC_iterate (tree, loopivs, j, iv); j++)
! if (stmt_uses_op (stmt, iv))
! goto fail;
!
/* If this is a scalar operation that can be put back
into the inner loop, or after the inner loop, through
copying, then do so. This works on the theory that
--- 2225,2237 ----
{
for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
{
tree stmt = bsi_stmt (bsi);
!
if (stmt == exit_condition
|| not_interesting_stmt (stmt)
|| stmt_is_bumper_for_loop (loop, stmt))
continue;
!
/* If this is a scalar operation that can be put back
into the inner loop, or after the inner loop, through
copying, then do so. This works on the theory that
*************** can_convert_to_perfect_nest (struct loop
*** 2258,2267 ****
win we get from rearranging the memory walk
the loop is doing so that it has better
cache behavior. */
! if (TREE_CODE (stmt) == MODIFY_EXPR
! && (can_put_in_inner_loop (loop->inner, stmt)
! || can_put_after_inner_loop (loop, stmt)))
! continue;
/* Otherwise, if the bb of a statement we care about isn't
dominated by the header of the inner loop, then we can't
--- 2240,2282 ----
win we get from rearranging the memory walk
the loop is doing so that it has better
cache behavior. */
! if (TREE_CODE (stmt) == MODIFY_EXPR)
! {
! use_operand_p use_a, use_b;
! imm_use_iterator imm_iter;
! ssa_op_iter op_iter;
! tree op0 = TREE_OPERAND (stmt, 0);
! tree scev = instantiate_parameters
! (loop, analyze_scalar_evolution (loop, op0));
!
! /* If the IV is simple, it can be duplicated. */
! if (!automatically_generated_chrec_p (scev))
! {
! tree step = evolution_part_in_loop_num (scev, loop->num);
! if (step && step != chrec_dont_know
! && TREE_CODE (step) == INTEGER_CST)
! continue;
! }
!
! /* The statement should not define a variable used
! in the inner loop. */
! if (TREE_CODE (op0) == SSA_NAME)
! FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
! if (bb_for_stmt (USE_STMT (use_a))->loop_father
! == loop->inner)
! goto fail;
!
! /* The variables should not be used in both loops. */
! FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
! FOR_EACH_IMM_USE_FAST (use_b, imm_iter, USE_FROM_PTR (use_a))
! if (bb_for_stmt (USE_STMT (use_b))->loop_father
! == loop->inner)
! goto fail;
!
! if (can_put_in_inner_loop (loop->inner, stmt)
! || can_put_after_inner_loop (loop, stmt))
! continue;
! }
/* Otherwise, if the bb of a statement we care about isn't
dominated by the header of the inner loop, then we can't
*************** perfect_nestify (struct loops *loops,
*** 2351,2364 ****
tree stmt;
tree oldivvar, ivvar, ivvarinced;
VEC(tree,heap) *phis = NULL;
!
! if (!can_convert_to_perfect_nest (loop, loopivs))
! return false;
!
! /* Create the new loop */
!
olddest = loop->single_exit->dest;
! preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
/* Push the exit phi nodes that we are moving. */
--- 2366,2375 ----
tree stmt;
tree oldivvar, ivvar, ivvarinced;
VEC(tree,heap) *phis = NULL;
!
! /* Create the new loop. */
olddest = loop->single_exit->dest;
! preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
/* Push the exit phi nodes that we are moving. */
*************** perfect_nestify (struct loops *loops,
*** 2490,2496 ****
}
/* Make copies of this statement to put it back next
! to its uses. */
FOR_EACH_IMM_USE_STMT (imm_stmt, imm_iter,
TREE_OPERAND (stmt, 0))
{
--- 2501,2507 ----
}
/* Make copies of this statement to put it back next
! to its uses. */
FOR_EACH_IMM_USE_STMT (imm_stmt, imm_iter,
TREE_OPERAND (stmt, 0))
{
*************** perfect_nestify (struct loops *loops,
*** 2506,2513 ****
--- 2517,2526 ----
newname = SSA_NAME_VAR (newname);
newname = make_ssa_name (newname, newstmt);
TREE_OPERAND (newstmt, 0) = newname;
+
FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
SET_USE (use_p, newname);
+
bsi_insert_before (&tobsi, newstmt, BSI_SAME_STMT);
update_stmt (newstmt);
update_stmt (imm_stmt);
*************** perfect_nestify (struct loops *loops,
*** 2535,2544 ****
continue;
}
! replace_uses_equiv_to_x_with_y (loop, stmt,
! oldivvar,
! VEC_index (int, steps, 0),
! ivvar);
bsi_move_before (&bsi, &tobsi);
/* If the statement has any virtual operands, they may
--- 2548,2556 ----
continue;
}
! replace_uses_equiv_to_x_with_y
! (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar);
!
bsi_move_before (&bsi, &tobsi);
/* If the statement has any virtual operands, they may