This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 3/3] Loop flattening on loop-SSA.


Hi Richi,

Combined patch is 0001-Loop-flattening-on-loop-SSA.patch
the other patches are the separate fixes as asked in this review.

I am currently testing this on amd64-linux.

Sebastian

On Fri, Nov 5, 2010 at 07:51, Richard Guenther <rguenther@suse.de> wrote:
> On Wed, 3 Nov 2010, Sebastian Pop wrote:
>
>> 2010-10-20 ?Sebastian Pop ?<sebastian.pop@amd.com>
>>
>> ? ? ? * Makefile.in (OBJS-common): Add tree-loop-flattening.o.
>> ? ? ? (tree-loop-flattening.o): New.
>> ? ? ? * common.opt (ftree-loop-flatten): New.
>> ? ? ? * dbgcnt.def (lflat): New.
>> ? ? ? * params.def (PARAM_LFLAT_MAX_NB_BBS): New.
>> ? ? ? * passes.c (init_optimization_passes): Add new passes
>> ? ? ? pass_flatten_loops and pass_if_conversion after loop vectorization
>> ? ? ? and before pass_slp_vectorize.
>> ? ? ? * timevar.def (TV_TREE_LOOP_FLATTENING): New.
>> ? ? ? * tree-loop-flattening.c: New.
>> ? ? ? * tree-pass.h (pass_flatten_loops): Declared.
>> ? ? ? * tree-flow.h (gate_tree_if_conversion): Declared.
>> ? ? ? (tree_if_conversion): Declared.
>> ? ? ? * tree-if-conv.c (tree_if_conversion): Not static anymore.
>> ? ? ? (gate_tree_if_conversion): Same.
>
> Comments inline.
>
> What extra testing apart from the 4 testcases did this new pass get?
> Do we pass bootstrap with it enabled? ?Did you check if we regress
> in SPEC 2k6 when it is enabled?
>
>> ? ? ? * gcc.dg/tree-ssa/flat-loop-1.c: New.
>> ? ? ? * gcc.dg/tree-ssa/flat-loop-2.c: New.
>> ? ? ? * gcc.dg/tree-ssa/flat-loop-3.c: New.
>> ? ? ? * gcc.dg/tree-ssa/flat-loop-4.c: New.
>> ---
>> ?gcc/ChangeLog ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? | ? 18 +
>> ?gcc/Makefile.in ? ? ? ? ? ? ? ? ? ? ? ? ? ? | ? ?4 +
>> ?gcc/common.opt ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?| ? ?4 +
>> ?gcc/dbgcnt.def ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?| ? ?1 +
>> ?gcc/params.def ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?| ? ?7 +
>> ?gcc/passes.c ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?| ? ?1 +
>> ?gcc/testsuite/ChangeLog ? ? ? ? ? ? ? ? ? ? | ? ?7 +
>> ?gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c | ? 28 ++
>> ?gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c | ? 39 ++
>> ?gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c | ? 19 +
>> ?gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c | ? 23 +
>> ?gcc/timevar.def ? ? ? ? ? ? ? ? ? ? ? ? ? ? | ? ?1 +
>> ?gcc/tree-flow.h ? ? ? ? ? ? ? ? ? ? ? ? ? ? | ? ?4 +
>> ?gcc/tree-if-conv.c ? ? ? ? ? ? ? ? ? ? ? ? ?| ? ?4 +-
>> ?gcc/tree-loop-flattening.c ? ? ? ? ? ? ? ? ?| ?630 +++++++++++++++++++++++++++
>> ?gcc/tree-pass.h ? ? ? ? ? ? ? ? ? ? ? ? ? ? | ? ?1 +
>> ?16 files changed, 789 insertions(+), 2 deletions(-)
>> ?create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
>> ?create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
>> ?create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
>> ?create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
>> ?create mode 100644 gcc/tree-loop-flattening.c
>>
>> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
>> index 3ceb7b6..f312b27 100644
>> --- a/gcc/ChangeLog
>> +++ b/gcc/ChangeLog
>> @@ -1,5 +1,23 @@
>> ?2010-10-20 ?Sebastian Pop ?<sebastian.pop@amd.com>
>>
>> + ? ? * Makefile.in (OBJS-common): Add tree-loop-flattening.o.
>> + ? ? (tree-loop-flattening.o): New.
>> + ? ? * common.opt (ftree-loop-flatten): New.
>> + ? ? * dbgcnt.def (lflat): New.
>> + ? ? * params.def (PARAM_LFLAT_MAX_NB_BBS): New.
>> + ? ? * passes.c (init_optimization_passes): Add new passes
>> + ? ? pass_flatten_loops and pass_if_conversion after loop vectorization
>> + ? ? and before pass_slp_vectorize.
>> + ? ? * timevar.def (TV_TREE_LOOP_FLATTENING): New.
>> + ? ? * tree-loop-flattening.c: New.
>> + ? ? * tree-pass.h (pass_flatten_loops): Declared.
>> + ? ? * tree-flow.h (gate_tree_if_conversion): Declared.
>> + ? ? (tree_if_conversion): Declared.
>> + ? ? * tree-if-conv.c (tree_if_conversion): Not static anymore.
>> + ? ? (gate_tree_if_conversion): Same.
>> +
>> +2010-10-20 ?Sebastian Pop ?<sebastian.pop@amd.com>
>> +
>> ? ? ? * tree-if-conv.c (if_convertible_loop_p_1): Do not call
>> ? ? ? compute_data_dependences_for_loop.
>> ? ? ? (if_convertible_loop_p): Do not free refs and ddrs.
>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>> index 898e962..55b67f4 100644
>> --- a/gcc/Makefile.in
>> +++ b/gcc/Makefile.in
>> @@ -1368,6 +1368,7 @@ OBJS-common = \
>> ? ? ? tree-into-ssa.o \
>> ? ? ? tree-iterator.o \
>> ? ? ? tree-loop-distribution.o \
>> + ? ? tree-loop-flattening.o \
>> ? ? ? tree-loop-linear.o \
>> ? ? ? tree-nested.o \
>> ? ? ? tree-nrv.o \
>> @@ -2773,6 +2774,9 @@ tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) coret
>> ? ? $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
>> ? ? $(TREE_PASS_H) $(TREE_DATA_REF_H) $(EXPR_H) \
>> ? ? langhooks.h $(TREE_VECTORIZER_H)
>> +tree-loop-flattening.o: tree-loop-flattening.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
>> + ? $(TM_H) $(OPTABS_H) $(TREE_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \
>> + ? $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(TREE_PASS_H) $(DBGCNT_H)
>> ?tree-parloops.o: tree-parloops.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
>> ? ? $(TREE_FLOW_H) $(TREE_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) \
>> ? ? $(DIAGNOSTIC_H) $(TREE_PASS_H) langhooks.h gt-tree-parloops.h \
>> diff --git a/gcc/common.opt b/gcc/common.opt
>> index 8fe796f..c969979 100644
>> --- a/gcc/common.opt
>> +++ b/gcc/common.opt
>> @@ -1632,6 +1632,10 @@ ftree-loop-distribute-patterns
>> ?Common Report Var(flag_tree_loop_distribute_patterns) Optimization
>> ?Enable loop distribution for patterns transformed into a library call
>>
>> +ftree-loop-flatten
>> +Common Report Var(flag_tree_loop_flattening) Optimization
>> +Enable loop flattening on trees
>> +
>> ?ftree-loop-im
>> ?Common Report Var(flag_tree_loop_im) Init(1) Optimization
>> ?Enable loop invariant motion on trees
>> diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
>> index 0492d66..0ef9a72 100644
>> --- a/gcc/dbgcnt.def
>> +++ b/gcc/dbgcnt.def
>> @@ -166,6 +166,7 @@ DEBUG_COUNTER (if_conversion_tree)
>> ?DEBUG_COUNTER (if_after_combine)
>> ?DEBUG_COUNTER (if_after_reload)
>> ?DEBUG_COUNTER (local_alloc_for_sched)
>> +DEBUG_COUNTER (lflat)
>> ?DEBUG_COUNTER (postreload_cse)
>> ?DEBUG_COUNTER (pre)
>> ?DEBUG_COUNTER (pre_insn)
>> diff --git a/gcc/params.def b/gcc/params.def
>> index 49a6185..3fffc35 100644
>> --- a/gcc/params.def
>> +++ b/gcc/params.def
>> @@ -788,6 +788,13 @@ DEFPARAM (PARAM_GRAPHITE_MAX_BBS_PER_FUNCTION,
>> ? ? ? ? "maximum number of basic blocks per function to be analyzed by Graphite",
>> ? ? ? ? 100, 0, 0)
>>
>> +/* Maximal number of basic blocks in a loop to be flattened. ?*/
>> +
>> +DEFPARAM (PARAM_LFLAT_MAX_NB_BBS,
>> + ? ? ? "lflat-max-nb-bbs",
>> + ? ? ? "maximum number of basic blocks in a loop to be flattened",
>> + ? ? ? 100, 0, 0)
>> +
>> ?/* Avoid doing loop invariant motion on very large loops. ?*/
>>
>> ?DEFPARAM (PARAM_LOOP_INVARIANT_MAX_BBS_IN_LOOP,
>> diff --git a/gcc/passes.c b/gcc/passes.c
>> index 1308ce9..22110a4 100644
>> --- a/gcc/passes.c
>> +++ b/gcc/passes.c
>> @@ -917,6 +917,7 @@ init_optimization_passes (void)
>> ? ? ? ? NEXT_PASS (pass_parallelize_loops);
>> ? ? ? ? NEXT_PASS (pass_loop_prefetch);
>> ? ? ? ? NEXT_PASS (pass_iv_optimize);
>> + ? ? ? NEXT_PASS (pass_flatten_loops);
>> ? ? ? ? NEXT_PASS (pass_tree_loop_done);
>> ? ? ? }
>> ? ? ? ?NEXT_PASS (pass_cse_reciprocals);
>> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
>> index 4233f86..2b3b93e 100644
>> --- a/gcc/testsuite/ChangeLog
>> +++ b/gcc/testsuite/ChangeLog
>> @@ -1,5 +1,12 @@
>> ?2010-10-20 ?Sebastian Pop ?<sebastian.pop@amd.com>
>>
>> + ? ? * gcc.dg/tree-ssa/flat-loop-1.c: New.
>> + ? ? * gcc.dg/tree-ssa/flat-loop-2.c: New.
>> + ? ? * gcc.dg/tree-ssa/flat-loop-3.c: New.
>> + ? ? * gcc.dg/tree-ssa/flat-loop-4.c: New.
>> +
>> +2010-10-20 ?Sebastian Pop ?<sebastian.pop@amd.com>
>> +
>> ? ? ? PR tree-optimization/46029
>> ? ? ? * g++.dg/tree-ssa/ifc-pr46029.C: New.
>> ? ? ? * gcc.dg/tree-ssa/ifc-8.c: New.
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
>> new file mode 100644
>> index 0000000..bee8a2b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
>> @@ -0,0 +1,28 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -ftree-loop-flatten" } */
>> +
>> +struct stack_segment
>> +{
>> + ?struct dynamic_allocation_blocks *dynamic_allocation;
>> +};
>> +struct dynamic_allocation_blocks
>> +{
>> + ?struct dynamic_allocation_blocks *next;
>> +};
>> +static struct dynamic_allocation_blocks *
>> +merge_dynamic_blocks (struct dynamic_allocation_blocks *a,
>> + ? ? ? ? ? ? ? ? ? struct dynamic_allocation_blocks *b)
>> +{
>> + ?struct dynamic_allocation_blocks **pp;
>> + ?for (pp = &a->next; *pp != ((void *)0); pp = &(*pp)->next)
>> + ? ?*pp = b;
>> + ?return a;
>> +}
>> +__morestack_release_segments (struct stack_segment **pp, int free_dynamic)
>> +{
>> + ?struct dynamic_allocation_blocks *ret;
>> + ?struct stack_segment *pss;
>> + ?pss = *pp;
>> + ?while (pss != ((void *)0))
>> + ? ?ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
>> new file mode 100644
>> index 0000000..a7287fb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
>> @@ -0,0 +1,39 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -ftree-loop-flatten" } */
>> +
>> +struct stack_segment
>> +{
>> + ?struct stack_segment *next;
>> + ?struct dynamic_allocation_blocks *dynamic_allocation;
>> +};
>> +struct dynamic_allocation_blocks
>> +{
>> + ?struct dynamic_allocation_blocks *next;
>> +};
>> +static struct dynamic_allocation_blocks *
>> +merge_dynamic_blocks (struct dynamic_allocation_blocks *a,
>> + ? ? ? ?struct dynamic_allocation_blocks *b)
>> +{
>> + ?struct dynamic_allocation_blocks **pp;
>> + ?if (b == ((void *)0))
>> + ?for (pp = &a->next; *pp != ((void *)0); pp = &(*pp)->next)
>> + ? ?;
>> + ?return a;
>> +}
>> +__morestack_release_segments (struct stack_segment **pp, int free_dynamic)
>> +{
>> + ?struct dynamic_allocation_blocks *ret;
>> + ?struct stack_segment *pss;
>> + ?while (pss != ((void *)0))
>> + ? ?{
>> + ? ? ?struct stack_segment *next;
>> + ? ? ?next = pss->next;
>> + {
>> + ? if (free_dynamic)
>> + ? ? {
>> + ? ? ? ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
>> + ? ? }
>> + }
>> + ? ? ?pss = next;
>> + ? ?}
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
>> new file mode 100644
>> index 0000000..d3d66ab
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -ftree-loop-flatten" } */
>> +
>> +
>> +int
>> +split_directories (const char *name, int *ptr_num_dirs)
>> +{
>> + ?int num_dirs = 0;
>> + ?char **dirs;
>> + ?const char *p, *q;
>> + ?int ch;
>> + ?while ((ch = *p++) != '\0')
>> + ? ?{
>> + ? num_dirs++;
>> + ? while (((*p) == '/'))
>> + ? ? p++;
>> + ? ?}
>> + ?return (dirs[num_dirs - 1] == ((void *)0));
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
>> new file mode 100644
>> index 0000000..8e551ac
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
>> @@ -0,0 +1,23 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -ftree-loop-flatten" } */
>> +
>> +void
>> +formatted_backspace (int common, char *s)
>> +{
>> + ?int base;
>> + ?int n;
>> + ?do
>> + ? ?{
>> + ? ? ?if (sseek (s, base, 0) < 0)
>> + ? ? goto io_error;
>> +
>> + ? ? ?while (n > 0)
>> + ? ? {
>> + ? ? ? ? ?n--;
>> + ? ? ? base += n + 1;
>> + ? ? }
>> + ? ?}
>> + ?while (base != 0);
>> + io_error:
>> + ?generate_error (common, 0, ((void *)0));
>> +}
>
> The testcases seem to origin from ICEs found during development. ?There
> is a lack of functional tests, please consider coming up with some,
> eventually testing for enabled extra optimizations.
>
>
>> diff --git a/gcc/timevar.def b/gcc/timevar.def
>> index 86e2999..89ff8e8 100644
>> --- a/gcc/timevar.def
>> +++ b/gcc/timevar.def
>> @@ -152,6 +152,7 @@ DEFTIMEVAR (TV_GRAPHITE_DATA_DEPS ? ?, "Graphite data dep analysis")
>> ?DEFTIMEVAR (TV_GRAPHITE_CODE_GEN ? ? , "Graphite code generation")
>> ?DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
>> ?DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
>> +DEFTIMEVAR (TV_TREE_LOOP_FLATTENING ?, "tree loop flattening")
>> ?DEFTIMEVAR (TV_CHECK_DATA_DEPS ? ? ? , "tree check data dependences")
>> ?DEFTIMEVAR (TV_TREE_PREFETCH ? ? ?, "tree prefetching")
>> ?DEFTIMEVAR (TV_TREE_LOOP_IVOPTS ? ? ? ? ? , "tree iv optimization")
>> diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h
>> index c2702dc..e1ee69f 100644
>> --- a/gcc/tree-flow.h
>> +++ b/gcc/tree-flow.h
>> @@ -730,6 +730,10 @@ bool contains_abnormal_ssa_name_p (tree);
>> ?bool stmt_dominates_stmt_p (gimple, gimple);
>> ?void mark_virtual_ops_for_renaming (gimple);
>>
>> +/* In tree-if-conv.c */
>> +bool gate_tree_if_conversion (void);
>> +bool tree_if_conversion (struct loop *, tree *);
>> +
>
> Why'd you need to export the gate? ?I guess if-conversion should
> happen unconditionally for loops that are flattened as I see it is
> really part of the flattening transformation?
>
>> ?/* In tree-ssa-dce.c */
>> ?void mark_virtual_phi_result_for_renaming (gimple);
>>
>> diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
>> index 5b941af..3c30abb 100644
>> --- a/gcc/tree-if-conv.c
>> +++ b/gcc/tree-if-conv.c
>> @@ -1599,7 +1599,7 @@ combine_blocks (struct loop *loop, tree *scratch_pad)
>> ?/* If-convert LOOP when it is legal. ?For the moment this pass has no
>> ? ? profitability analysis. ?Returns true when something changed. ?*/
>>
>> -static bool
>> +bool
>> ?tree_if_conversion (struct loop *loop, tree *scratch_pad)
>> ?{
>> ? ?bool changed = false;
>> @@ -1662,7 +1662,7 @@ main_tree_if_conversion (void)
>>
>> ?/* Returns true when the if-conversion pass is enabled. ?*/
>>
>> -static bool
>> +bool
>> ?gate_tree_if_conversion (void)
>> ?{
>> ? ?return ((flag_tree_vectorize && flag_tree_loop_if_convert != 0)
>> diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
>> new file mode 100644
>> index 0000000..4bc8768
>> --- /dev/null
>> +++ b/gcc/tree-loop-flattening.c
>> @@ -0,0 +1,630 @@
>> +/* Loop flattening.
>> + ? Copyright (C) 2010 Free Software Foundation, Inc.
>> + ? Contributed by Sebastian Pop <sebastian.pop@amd.com>.
>> +
>> +This file is part of GCC.
>> +
>> +GCC is free software; you can redistribute it and/or modify
>> +it under the terms of the GNU General Public License as published by
>> +the Free Software Foundation; either version 3, or (at your option)
>> +any later version.
>> +
>> +GCC is distributed in the hope that it will be useful,
>> +but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ?See the
>> +GNU General Public License for more details.
>> +
>> +You should have received a copy of the GNU General Public License
>> +along with GCC; see the file COPYING3. ?If not see
>> +<http://www.gnu.org/licenses/>. ?*/
>> +
>> +#include "config.h"
>> +#include "system.h"
>> +#include "coretypes.h"
>> +#include "tm.h"
>> +#include "ggc.h"
>> +#include "tree.h"
>> +#include "rtl.h"
>> +#include "output.h"
>> +#include "basic-block.h"
>> +#include "diagnostic.h"
>> +#include "tree-flow.h"
>> +#include "toplev.h"
>> +#include "tree-dump.h"
>> +#include "timevar.h"
>> +#include "cfgloop.h"
>> +#include "tree-pass.h"
>> +#include "gimple.h"
>> +#include "params.h"
>> +#include "dbgcnt.h"
>> +
>> +/* This loop flattening pass transforms backward pointing edges into
>> + ? forward pointing edges.
>> +
>> + ? The back-edge removal transformation was described in the 1983
>> + ? paper by Allen J. R., Ken Kennedy, Carrie Porterfield, and Joe
>> + ? Warren: "Conversion of control dependence to data dependence"
>> + ? available from http://doi.acm.org/10.1145/567067.567085
>> +
>> + ? The back-edge removal algorithm was presented in that paper as part
>> + ? of the if-conversion algorithm for backward pointing edges. ?In
>> + ? this section we will first provide a description of this technique
>> + ? adapted for the Gimple-SSA form, followed by an example, and a
>> + ? discussion of the differences with the higher level loop flattening
>> + ? transformation.
>> +
>> + ? The back-edge removal algorithm transforms control dependences into
>> + ? data dependences by using a boolean variable. ?The values taken by
>> + ? the boolean variable control the execution path of the forward
>> + ? edges created in order to use the back-edge of an outer loop.
>> +
>> + ? The first step of the algorithm detects a surrounding loop and all
>> + ? the back-edges of the loop body: these back-edges can be inner
>> + ? loops or strongly connected components of the CFG that cannot be
>> + ? reduced to natural loops.
>> +
>> + ? Each back-edge is removed by redirecting the target of the
>> + ? back-edge to the latch basic block of the surrounding loop. ?A
>> + ? boolean variable is created in the latch. ?It is cleared when the
>> + ? redirected back-edge is taken and it is set to true for any other
>> + ? paths leading to the latch.
>> +
>> + ? The header basic block of the surrounding loop is split before its
>> + ? statements and a new condition is added based on the control
>> + ? variable: when the control variable is set to true, the execution
>> + ? proceeds as normal to the basic block that contains the statements
>> + ? of the header; when the control variable is cleared, meaning that
>> + ? the back-edge has been taken, the execution proceeds to the point
>> + ? where the redirected back-edge was pointing.
>> +
>> + ? The last step updates the SSA form after all the back-edges have
>> + ? been redirected to the latch, and the new edges from the header to
>> + ? the destination of back-edges have been created.
>> +
>> + ? Another description of loop flattening in a very Fortran specific
>> + ? way is in the 1992 paper by Reinhard von Hanxleden and Ken Kennedy:
>> + ? "Relaxing SIMD Control Flow Constraints using Loop Transformations"
>> + ? available from
>> + ? http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033 */
>> +
>> +/* Keep the loop structure for LOOP and remove all the loop structures
>> + ? under LOOP. ?*/
>> +
>> +static void
>> +cancel_subloops (loop_p loop)
>> +{
>> + ?int i;
>> + ?loop_p li;
>> + ?VEC (loop_p, heap) *lv = VEC_alloc (loop_p, heap, 3);
>> +
>> + ?for (li = loop->inner; li; li = li->next)
>> + ? ?VEC_safe_push (loop_p, heap, lv, li);
>> +
>> + ?FOR_EACH_VEC_ELT (loop_p, lv, i, li)
>> + ? ?cancel_loop_tree (li);
>> +
>> + ?VEC_free (loop_p, heap, lv);
>> +}
>
> This function should be in cfgloop.c and implemented in simpler
> form, like
>
> void
> cancel_subloops (struct loop *loop)
> {
> ?while (loop->inner)
> ? ?cancel_loop_tree (loop->inner);
> }
>
> simply following the cancel_loop_tree example.
>
>> +/* Before creating other phi nodes in LOOP->header for the control
>> + ? flags, update the phi nodes of LOOP->header and add the necessary
>> + ? phi nodes in the LOOP->latch that now contains several paths on
>> + ? which the values are not updated. ?PRED_E is the single edge that
>> + ? was pointing to the LOOP->latch basic block before inner back-edges
>> + ? were redirected to the LOOP->latch. ?*/
>> +
>> +static void
>> +update_loop_phi_nodes (loop_p loop, edge pred_e)
>> +{
>> + ?gimple_stmt_iterator gsi;
>> +
>> + ?for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
>> + ? ?{
>> + ? ? ?edge e;
>> + ? ? ?edge_iterator ei;
>> + ? ? ?gimple phi = gsi_stmt (gsi);
>> + ? ? ?tree back_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
>> + ? ? ?tree res = gimple_phi_result (phi);
>> + ? ? ?tree var = SSA_NAME_VAR (res);
>> +
>> + ? ? ?phi = create_phi_node (var, loop->latch);
>> + ? ? ?create_new_def_for (gimple_phi_result (phi), phi,
>> + ? ? ? ? ? ? ? ? ? ? ? gimple_phi_result_ptr (phi));
>
> Using create_new_def_for looks very suspicios. ?create_phi_node
> will already create a new SSA name for you for the result, so
> it doesn't make any sense to fiddle with the SSA updaters machinery, does
> it?
>
>> + ? ? ?FOR_EACH_EDGE (e, ei, loop->latch->preds)
>> + ? ? add_phi_arg (phi, (e == pred_e ? back_arg : res),
>> + ? ? ? ? ? ? ? ? ?e, UNKNOWN_LOCATION);
>> +
>> + ? ? ?res = gimple_phi_result (phi);
>> + ? ? ?add_phi_arg (gsi_stmt (gsi), res, loop_latch_edge (loop),
>> + ? ? ? ? ? ? ? ?UNKNOWN_LOCATION);
>> + ? ?}
>> +}
>> +
>> +/* Creates a control flag for the FORWARDED_EDGE that represents the
>> + ? back-edge that has been forwarded to the latch basic block of LOOP.
>> + ? INNER_BODY is the basic block to which the back-edge was pointing
>> + ? before redirection. ?This function creates a boolean control flag
>> + ? that is cleared when the FORWARDED_EDGE is taken and set for all
>> + ? the other paths. ?This function adds the corresponding phi nodes in
>> + ? LOOP->latch and LOOP->header, and finally adds an edge from
>> + ? LOOP->header to the INNER_BODY guarded by the control flag. ?*/
>> +
>> +static void
>> +create_control_flag (edge forwarded_edge, loop_p loop, basic_block inner_body)
>> +{
>> + ?edge e, preheader;
>> + ?edge outer_latch_e = loop_latch_edge (loop);
>> + ?const char *name = "_flat_";
>> + ?tree var = create_tmp_var (boolean_type_node, name);
>
> create_tmp_reg
>
>> + ?tree res;
>> + ?gimple phi, cond_stmt;
>> + ?gimple_stmt_iterator gsi;
>> + ?edge_iterator ei;
>> +
>> + ?/* Adds a control variable for the redirected FORWARDED_EDGE. ?*/
>> + ?add_referenced_var (var);
>> + ?phi = create_phi_node (var, forwarded_edge->dest);
>> + ?create_new_def_for (gimple_phi_result (phi), phi,
>> + ? ? ? ? ? ? ? ? ? gimple_phi_result_ptr (phi));
>
> Likewise.
>
>> + ?FOR_EACH_EDGE (e, ei, outer_latch_e->src->preds)
>> + ? ?add_phi_arg (phi, (e == forwarded_edge
>> + ? ? ? ? ? ? ? ? ? ?? boolean_false_node
>> + ? ? ? ? ? ? ? ? ? ?: boolean_true_node),
>> + ? ? ? ? ? ? ?e, UNKNOWN_LOCATION);
>> + ?res = gimple_phi_result (phi);
>> +
>> + ?/* Add a phi node in LOOP->header for the control variable. ?*/
>> + ?phi = create_phi_node (var, loop->header);
>> + ?create_new_def_for (gimple_phi_result (phi), phi,
>> + ? ? ? ? ? ? ? ? ? gimple_phi_result_ptr (phi));
>
> Again.
>
>> + ?preheader = loop_preheader_edge (loop);
>> + ?FOR_EACH_EDGE (e, ei, loop->header->preds)
>> + ? ?add_phi_arg (phi, (e == preheader
>> + ? ? ? ? ? ? ? ? ? ?? boolean_true_node
>> + ? ? ? ? ? ? ? ? ? ?: res),
>> + ? ? ? ? ? ? ?e, UNKNOWN_LOCATION);
>> + ?res = gimple_phi_result (phi);
>> +
>> + ?/* Split LOOP->header to insert the control variable condition. ?*/
>> + ?e = split_block_after_labels (loop->header);
>> + ?e->flags = EDGE_TRUE_VALUE;
>> + ?e = make_edge (loop->header, inner_body, EDGE_FALSE_VALUE);
>> + ?cond_stmt = gimple_build_cond (EQ_EXPR, res, boolean_true_node,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?NULL_TREE, NULL_TREE);
>> + ?gsi = gsi_last_bb (loop->header);
>> + ?gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
>> +}
>> +
>> +/* Adds phi nodes to the LOOP->header and LOOP->latch for the ssa_name
>> + ? NAME. ?ARG is the argument of the latch phi node set for the
>> + ? FORWARDED_EDGE, and all the other edges merged by the latch phi
>> + ? node are set to the result of the LOOP->header phi node. ?The latch
>> + ? edge of the LOOP->header phi node is set to the result of the
>> + ? LOOP->latch phi node, and the other argument is set to an arbitrary
>> + ? valid value defined before the loop (note that this initial value
>> + ? is never used in the loop). ?Returns the LOOP->header phi result. ?*/
>> +
>> +static tree
>> +add_header_and_latch_phis (loop_p loop, tree name, edge forwarded_edge,
>> + ? ? ? ? ? ? ? ? ? ? ? ?tree arg)
>> +{
>> + ?edge e;
>> + ?edge_iterator ei;
>> + ?tree res, zero, var = SSA_NAME_VAR (name);
>> + ?gimple loop_phi = create_phi_node (var, loop->header);
>> + ?gimple latch_phi = create_phi_node (var, loop->latch);
>> +
>> + ?create_new_def_for (gimple_phi_result (loop_phi), loop_phi,
>> + ? ? ? ? ? ? ? ? ? gimple_phi_result_ptr (loop_phi));
>> + ?create_new_def_for (gimple_phi_result (latch_phi), latch_phi,
>> + ? ? ? ? ? ? ? ? ? gimple_phi_result_ptr (latch_phi));
>
> Likewise.
>
>> + ?/* The value set to ZERO will never be used in the loop, however we
>> + ? ? have to construct something meaningful for virtual SSA_NAMEs. ?*/
>> + ?if (TREE_CODE (arg) != SSA_NAME)
>> + ? ?zero = arg;
>> + ?else if (is_gimple_reg (arg))
>> + ? ?zero = fold_convert (TREE_TYPE (arg), integer_zero_node);
>> + ?else
>> + ? ?zero = gimple_default_def (cfun, SSA_NAME_VAR (arg));
>
> That looks bogus. ?It will create overlapping life-ranges
> for virtual operands - just make sure you'll rename the VOPs
> and use gimple_vop (cfun) for the fallback. ?You shoudl also
> use build_zero_cst instead of fold_convert.
>
> Thus,
>
> ?mark_sym_for_renaming (gimple_vop (cfun));
>
>> + ?res = gimple_phi_result (latch_phi);
>> + ?FOR_EACH_EDGE (e, ei, loop->header->preds)
>> + ? ?add_phi_arg (loop_phi, (e == loop_latch_edge (loop) ? res : zero),
>> + ? ? ? ? ? ? ?e, UNKNOWN_LOCATION);
>> +
>> + ?res = gimple_phi_result (loop_phi);
>> + ?FOR_EACH_EDGE (e, ei, loop->latch->preds)
>> + ? ?add_phi_arg (latch_phi, (e == forwarded_edge ? arg : res),
>> + ? ? ? ? ? ? ?e, UNKNOWN_LOCATION);
>> +
>> + ?return res;
>> +}
>> +
>> +/* Creates phi nodes for each inductive definition, i.e., loop phi
>> + ? nodes. ?For each induction phi node in the old loop header, i.e.,
>> + ? in the single_succ (INNER_BODY), insert a phi node in the
>> + ? LOOP->latch that takes the updated value of the induction on the
>> + ? FORWARDED_EDGE, and maintains the same value as in the phi node of
>> + ? the LOOP->header for all the other possible paths reaching
>> + ? LOOP->latch. ?This function has to be called after all the
>> + ? back-edges have been redirected. ?*/
>> +
>> +static void
>> +update_inner_induction_phi_nodes (edge forwarded_edge, loop_p loop,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? basic_block inner_body)
>> +{
>> + ?gimple_stmt_iterator gsi;
>> +
>> + ?for (gsi = gsi_start_phis (single_succ (inner_body));
>> + ? ? ? !gsi_end_p (gsi); gsi_next (&gsi))
>> + ? ?{
>> + ? ? ?gimple old_loop_phi = gsi_stmt (gsi);
>> + ? ? ?tree back_arg = PHI_ARG_DEF_FROM_EDGE (old_loop_phi,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?single_succ_edge (inner_body));
>> + ? ? ?tree res = gimple_phi_result (old_loop_phi);
>> +
>> + ? ? ?res = add_header_and_latch_phis (loop, res, forwarded_edge, back_arg);
>> + ? ? ?add_phi_arg (old_loop_phi, res, single_succ_edge (inner_body),
>> + ? ? ? ? ? ? ? ?UNKNOWN_LOCATION);
>> + ? ?}
>> +}
>> +
>> +/* Renames all the uses of OLD_NAME with NEW_NAME (except the phi
>> + ? nodes of DEF_BB) in all the basic blocks dominated by DEF_BB and in
>> + ? the arguments of all the phi nodes originating in a basic block
>> + ? that is dominated by DEF_BB. ?*/
>> +
>> +static void
>> +rename_dominated_uses (loop_p loop, tree old_name, tree new_name,
>> + ? ? ? ? ? ? ? ? ? ?basic_block def_bb)
>> +{
>> + ?imm_use_iterator uit;
>> + ?gimple stmt;
>> + ?use_operand_p use_p;
>> + ?ssa_op_iter op_iter;
>> +
>> + ?FOR_EACH_IMM_USE_STMT (stmt, uit, old_name)
>> + ? ?{
>> + ? ? ?enum gimple_code code = gimple_code (stmt);
>> + ? ? ?basic_block use_bb = gimple_bb (stmt);
>> + ? ? ?edge_iterator ei;
>> + ? ? ?edge e;
>> +
>> + ? ? ?if (code == GIMPLE_PHI)
>> + ? ? {
>> + ? ? ? FOR_EACH_EDGE (e, ei, use_bb->preds)
>> + ? ? ? ? if (PHI_ARG_DEF_FROM_EDGE (stmt, e) == old_name
>> + ? ? ? ? ? ? && dominated_by_p (CDI_DOMINATORS, e->src, def_bb)
>> + ? ? ? ? ? ? && use_bb != def_bb)
>> + ? ? ? ? ? replace_exp (gimple_phi_arg_imm_use_ptr (stmt, e->dest_idx),
>> + ? ? ? ? ? ? ? ? ? ? ? ?new_name);
>
> ?SET_USE (gimple_phi_arg_imm_use_ptr (stmt, e->dest_idx), new_name);
>
>> + ? ? }
>> + ? ? ?else
>> + ? ? {
>> + ? ? ? if (!dominated_by_p (CDI_DOMINATORS, use_bb, def_bb))
>> + ? ? ? ? continue;
>> +
>> + ? ? ? if (use_bb->loop_father == loop)
>> + ? ? ? ? {
>> + ? ? ? ? ? FOR_EACH_SSA_USE_OPERAND (use_p, stmt, op_iter, SSA_OP_ALL_USES)
>> + ? ? ? ? ? ? if (USE_FROM_PTR (use_p) == old_name)
>> + ? ? ? ? ? ? ? replace_exp (use_p, new_name);
>> + ? ? ? ? }
>> + ? ? ? else
>> + ? ? ? ? /* Virtual operands are not translated into loop closed
>> + ? ? ? ? ? ?SSA form, and thus they may occur in the rest of
>> + ? ? ? ? ? ?the program without a loop close vphi node. ?*/
>
> But you are updating all uses again.
>
> ?You should simply use
>
> ? ? ? ?FOR_EACH_IMM_USE_ON_STMT (use_p, uit)
> ? ? ? ? ?SET_USE (use_p, new_name);
>
>> + ? ? ? ? FOR_EACH_SSA_USE_OPERAND (use_p, stmt, op_iter, SSA_OP_ALL_USES)
>> + ? ? ? ? ? if (USE_FROM_PTR (use_p) == old_name)
>> + ? ? ? ? ? ? replace_exp (use_p, new_name);
>> + ? ? }
>> + ? ?}
>> +}
>> +
>> +/* Helper function for add_missing_phi_nodes_1. ?Adds to LOOP all the
>> + ? missing phi nodes for NAME and updates the arguments of the
>> + ? LATCH_PHI node. ?LOOP_PHI node is the inductive definition of NAME
>> + ? in LOOP->header. ?*/
>> +
>> +static void
>> +add_missing_phi_nodes_2 (loop_p loop, tree name, tree old_name,
>> + ? ? ? ? ? ? ? ? ? ? ?VEC (gimple, heap) *phis)
>> +{
>> + ?unsigned i;
>> + ?basic_block bb, dom_bb = gimple_bb (SSA_NAME_DEF_STMT (name));
>> + ?VEC (basic_block, heap) *dom_bbs = get_all_dominated_blocks (CDI_DOMINATORS,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?dom_bb);
>> +
>> + ?FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb)
>> + ? ?{
>> + ? ? ?edge e;
>> + ? ? ?edge_iterator ei;
>> +
>> + ? ? ?if (bb == loop->latch
>> + ? ? ? || bb->loop_father != loop)
>> + ? ? continue;
>
> dom_bbs may be very large, it's much better to iterate over the
> loop bbs and do a dominator check. ?Or iterate over dominator sons
> with first_dom_son (), next_dom_son () and recurse, bailing out when
> you're running out of the loop.
>
>> + ? ? ?FOR_EACH_EDGE (e, ei, bb->succs)
>> + ? ? {
>> + ? ? ? gimple phi = VEC_index (gimple, phis, e->dest->index);
>> +
>> + ? ? ? if (phi)
>> + ? ? ? ? add_phi_arg (phi, name, e, UNKNOWN_LOCATION);
>> +
>> + ? ? ? else if (!single_pred_p (e->dest)
>> + ? ? ? ? ? ? ? ?&& !dominated_by_p (CDI_DOMINATORS, e->dest, dom_bb)
>> + ? ? ? ? ? ? ? ?&& e->dest->loop_father == loop)
>> + ? ? ? {
>> + ? ? ? ? tree var = SSA_NAME_VAR (name);
>> +
>> + ? ? ? ? phi = create_phi_node (var, e->dest);
>> + ? ? ? ? create_new_def_for (gimple_phi_result (phi), phi,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? gimple_phi_result_ptr (phi));
>
> Again.
>
>> + ? ? ? ? VEC_replace (gimple, phis, e->dest->index, phi);
>> + ? ? ? ? add_phi_arg (phi, name, e, UNKNOWN_LOCATION);
>> + ? ? ? ? rename_dominated_uses (loop, old_name, gimple_phi_result (phi),
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?e->dest);
>> + ? ? ? ? add_missing_phi_nodes_2 (loop, gimple_phi_result (phi), old_name,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?phis);
>> + ? ? ? }
>> + ? ? }
>> + ? ?}
>
> You leak dom_bbs.
>
>> +}
>> +
>> +/* Helper function for add_missing_phi_nodes. ?For all the definitions
>> + ? of DEF_STMT add the missing phi nodes in LOOP. ?*/
>> +
>> +static void
>> +add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt)
>> +{
>> + ?def_operand_p def_p;
>> + ?ssa_op_iter op_iter;
>> + ?basic_block bb = gimple_bb (def_stmt);
>> +
>> + ?FOR_EACH_PHI_OR_STMT_DEF (def_p, def_stmt, op_iter, SSA_OP_DEF|SSA_OP_VDEF)
>> + ? ?{
>> + ? ? ?edge e;
>> + ? ? ?edge_iterator ei;
>> + ? ? ?tree res, zero, var;
>> + ? ? ?gimple loop_phi, latch_phi, use_stmt;
>> + ? ? ?imm_use_iterator uit;
>> + ? ? ?tree name = DEF_FROM_PTR (def_p);
>> + ? ? ?bool needs_update = false;
>> + ? ? ?VEC (gimple, heap) *phis;
>> + ? ? ?int i;
>> +
>> + ? ? ?FOR_EACH_IMM_USE_STMT (use_stmt, uit, name)
>> + ? ? {
>> + ? ? ? basic_block use_bb = gimple_bb (use_stmt);
>> +
>> + ? ? ? if (!dominated_by_p (CDI_DOMINATORS, bb, use_bb))
>> + ? ? ? ? {
>> + ? ? ? ? ? needs_update = true;
>> + ? ? ? ? ? BREAK_FROM_IMM_USE_STMT (uit);
>> + ? ? ? ? }
>> + ? ? }
>> +
>> + ? ? ?if (!needs_update)
>> + ? ? continue;
>> +
>> + ? ? ?var = SSA_NAME_VAR (name);
>> + ? ? ?loop_phi = create_phi_node (var, loop->header);
>> + ? ? ?latch_phi = create_phi_node (var, loop->latch);
>> +
>> + ? ? ?create_new_def_for (gimple_phi_result (loop_phi), loop_phi,
>> + ? ? ? ? ? ? ? ? ? ? ? gimple_phi_result_ptr (loop_phi));
>> + ? ? ?create_new_def_for (gimple_phi_result (latch_phi), latch_phi,
>> + ? ? ? ? ? ? ? ? ? ? ? gimple_phi_result_ptr (latch_phi));
>
> Again.
>
>> + ? ? ?/* The value set to ZERO will never be used in the loop, however we
>> + ? ? ?have to construct something meaningful for virtual SSA_NAMEs. ?*/
>> + ? ? ?if (is_gimple_reg (name))
>> + ? ? zero = fold_convert (TREE_TYPE (name), integer_zero_node);
>> + ? ? ?else
>> + ? ? zero = gimple_default_def (cfun, SSA_NAME_VAR (name));
>> +
>> + ? ? ?res = gimple_phi_result (latch_phi);
>> + ? ? ?FOR_EACH_EDGE (e, ei, loop->header->preds)
>> + ? ? add_phi_arg (loop_phi, (e == loop_latch_edge (loop) ? res : zero),
>> + ? ? ? ? ? ? ? ? ?e, UNKNOWN_LOCATION);
>> +
>> + ? ? ?res = gimple_phi_result (loop_phi);
>> + ? ? ?FOR_EACH_EDGE (e, ei, loop->latch->preds)
>> + ? ? add_phi_arg (latch_phi, res, e, UNKNOWN_LOCATION);
>> +
>> + ? ? ?phis = VEC_alloc (gimple, heap, n_basic_blocks);
>> + ? ? ?for (i = 0; i < n_basic_blocks; i++)
>> + ? ? VEC_quick_push (gimple, phis, NULL);
>> +
>> + ? ? ?VEC_replace (gimple, phis, loop->latch->index, latch_phi);
>> + ? ? ?VEC_replace (gimple, phis, loop->header->index, loop_phi);
>> + ? ? ?add_missing_phi_nodes_2 (loop, name, name, phis);
>> +
>> + ? ? ?for (i = 0; i < n_basic_blocks; i++)
>> + ? ? {
>> + ? ? ? gimple phi = VEC_index (gimple, phis, i);
>> +
>> + ? ? ? if (!phi)
>> + ? ? ? ? continue;
>> +
>> + ? ? ? FOR_EACH_EDGE (e, ei, BASIC_BLOCK (i)->preds)
>> + ? ? ? ? if (!PHI_ARG_DEF_FROM_EDGE (phi, e))
>> + ? ? ? ? ? add_phi_arg (phi, res, e, UNKNOWN_LOCATION);
>> + ? ? }
>> +
>> + ? ? ?VEC_free (gimple, heap, phis);
>> + ? ?}
>> +}
>> +
>> +/* Walks over the code of LOOP and adds the missing phi nodes at
>> + ? control flow junctions. ?When a variable is defined in an outer
>> + ? loop and used in an inner loop, the definition dominates the use.
>> + ? After the loop flattening, the inner loop body is directly
>> + ? reachable from the LOOP->header by using the added edge guarded by
>> + ? the boolean flag that controls the execution of the back-edge that
>> + ? was eliminated. ?In this case, the use is not dominated by the
>> + ? definition, and this function adds the missing phi nodes. ?*/
>> +
>> +static void
>> +add_missing_phi_nodes (loop_p loop)
>> +{
>> + ?gimple_stmt_iterator gsi;
>> + ?int i, n = loop->num_nodes;
>> + ?basic_block *bbs = get_loop_body (loop);
>
> So you can even pass this down to add_missing_phi_nodes_2. ?Or
> even use get_loop_body_in_dom_order and thus only need to walk
> adjacent blocks in that array.
>
>> + ?for (i = 0; i < n; i++)
>> + ? ?{
>> + ? ? ?basic_block bb = bbs[i];
>> +
>> + ? ? ?/* LOOP->header dominates all the blocks of the loop body, and
>> + ? ? ?so we don't have to look at the missing phi nodes for the
>> + ? ? ?definitions of LOOP->header. ?*/
>> + ? ? ?if (bb == loop->header)
>> + ? ? continue;
>> +
>> + ? ? ?for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> + ? ? if (!gimple_nop_p (gsi_stmt (gsi)))
>> + ? ? ? add_missing_phi_nodes_1 (loop, gsi_stmt (gsi));
>> +
>> + ? ? ?for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> + ? ? add_missing_phi_nodes_1 (loop, gsi_stmt (gsi));
>> + ? ?}
>> +
>> + ?free (bbs);
>> +}
>> +
>> +/* Removes all the back-edges of LOOP except its own back-edge.
>> + ? SCRATCH_PAD is used in if-conversion. ?*/
>> +
>> +static unsigned
>> +flatten_loop (loop_p loop, tree *scratch_pad)
>> +{
>> + ?int i, n = loop->num_nodes;
>> + ?basic_block *bbs;
>> + ?VEC (edge, heap) *back_edges;
>> + ?VEC (basic_block, heap) *loop_body;
>> + ?edge_iterator ei;
>> + ?edge e, pred_e;
>> + ?unsigned max_nb_basic_blocks = PARAM_VALUE (PARAM_LFLAT_MAX_NB_BBS);;
>> +
>> + ?if (loop->num_nodes > max_nb_basic_blocks
>> + ? ? ?|| !single_exit (loop)
>> + ? ? ?|| !dbg_cnt (lflat))
>> + ? ?return 0;
>> +
>> + ?mark_dfs_back_edges ();
>> + ?bbs = get_loop_body (loop);
>> +
>> + ?back_edges = VEC_alloc (edge, heap, 3);
>> + ?loop_body = VEC_alloc (basic_block, heap, 3);
>> +
>> + ?for (i = 0; i < n; i++)
>> + ? ?FOR_EACH_EDGE (e, ei, bbs[i]->succs)
>> + ? ? ?if (e->flags & EDGE_DFS_BACK
>> + ? ? ? && e->src != loop->latch)
>> + ? ? VEC_safe_push (edge, heap, back_edges, e);
>> +
>> + ?free (bbs);
>> +
>> + ?/* Early return and do not modify the code when there are no back
>> + ? ? edges. ?*/
>> + ?if (VEC_empty (edge, back_edges))
>> + ? ?return 0;
>> +
>> + ?cancel_subloops (loop);
>> +
>> + ?/* Split the latch edge to make sure that the latch basic block does
>> + ? ? not contain code. ?*/
>> + ?loop->latch = split_edge (loop_latch_edge (loop));
>> + ?pred_e = single_pred_edge (loop->latch);
>> +
>> + ?FOR_EACH_VEC_ELT (edge, back_edges, i, e)
>> + ? ?{
>> + ? ? ?basic_block dest = split_edge (e);
>> +
>> + ? ? ?/* Redirect BACK_EDGE to LOOP->latch. ?*/
>> + ? ? ?redirect_edge_and_branch_force (e, loop->latch);
>> +
>> + ? ? ?/* Save the basic block where it was pointing. ?*/
>> + ? ? ?VEC_safe_push (basic_block, heap, loop_body, dest);
>> + ? ?}
>> +
>> + ?update_loop_phi_nodes (loop, pred_e);
>> +
>> + ?FOR_EACH_VEC_ELT (edge, back_edges, i, e)
>> + ? ?create_control_flag (e, loop, VEC_index (basic_block, loop_body, i));
>> +
>> + ?FOR_EACH_VEC_ELT (edge, back_edges, i, e)
>> + ? ?update_inner_induction_phi_nodes (e, loop, VEC_index (basic_block,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? loop_body, i));
>> +
>> + ?free_dominance_info (CDI_DOMINATORS);
>> + ?calculate_dominance_info (CDI_DOMINATORS);
>> + ?add_missing_phi_nodes (loop);
>> +
>> + ?/* If we redirected some back-edges, split the latch edge to create
>> + ? ? an empty LOOP->latch. ?*/
>> + ?if (!single_pred_p (loop->latch))
>> + ? ?loop->latch = split_edge (loop_latch_edge (loop));
>> +
>> + ?if (gate_tree_if_conversion ())
>> + ? ?tree_if_conversion (loop, scratch_pad);
>
> You are leaking VECs. ?As mentioned above testing the gate isn't
> necessary here.
>
>> + ?return TODO_update_ssa | TODO_verify_ssa;
>
> These TODOs belong in the pass structure.
>
>> +}
>> +
>> +/* Flattens all the loops of the current function. ?*/
>> +
>> +static unsigned int
>> +tree_loop_flattening (void)
>> +{
>> + ?unsigned todo = 0;
>> + ?loop_p loop;
>> + ?loop_iterator li;
>> + ?tree scratch_pad = NULL_TREE;
>> +
>> + ?if (number_of_loops () <= 1)
>> + ? ?return 0;
>> +
>> + ?FOR_EACH_LOOP (li, loop, 0)
>> + ? ?todo |= flatten_loop (loop, &scratch_pad);
>
> So we might end up recursively flattening loops (or not, as this
> walk is in undefined order). ?I'd say you want LI_ONLY_INNERMOST here,
> or do you really want to flatten all loop trees up to the number
> of basic blocks specified in the parm? ?I guess not.
>
> I think the pass misses a cost model and I'm still not sure when
> or if it will be profitable to do this at all (as said, no
> functional testcases). ?What's the immediate benefit for GCC 4.6?
>
>> +#ifdef ENABLE_CHECKING
>> + ?verify_dominators (CDI_DOMINATORS);
>> + ?verify_flow_info ();
>> +#endif
>> +
>> + ?cleanup_tree_cfg ();
>> + ?return todo;
>
> return TODO_cleanup_cfg, but only if you flattened a loop. ?So
> return TODO_cleanup_cfg from flatten_loop instead.
>
> Richard.
>
>> +}
>> +
>> +static bool
>> +gate_tree_loop_flattening (void)
>> +{
>> + ?return flag_tree_loop_flattening != 0;
>> +}
>> +
>> +struct gimple_opt_pass pass_flatten_loops =
>> +{
>> + {
>> + ?GIMPLE_PASS,
>> + ?"lflat", ? ? ? ? ? ? ? ? ? ? ? ? ? /* name */
>> + ?gate_tree_loop_flattening, ? ? ? ? /* gate */
>> + ?tree_loop_flattening, ? ? ? ? ? ? ? ? ? ? ?/* execute */
>> + ?NULL, ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?/* sub */
>> + ?NULL, ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?/* next */
>> + ?0, ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? /* static_pass_number */
>> + ?TV_TREE_LOOP_FLATTENING, ? ? ? ? ? /* tv_id */
>> + ?PROP_cfg | PROP_ssa, ? ? ? ? ? ? ? ? ? ? ? /* properties_required */
>> + ?0, ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? /* properties_provided */
>> + ?0, ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? /* properties_destroyed */
>> + ?0, ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? /* todo_flags_start */
>> + ?TODO_dump_func
>> + ? ?| TODO_update_ssa
>> + ? ?| TODO_ggc_collect ? ? ? ? ? ? ? ? ? ? ? /* todo_flags_finish */
>> + }
>> +};
>> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
>> index a87a770..e2f257f 100644
>> --- a/gcc/tree-pass.h
>> +++ b/gcc/tree-pass.h
>> @@ -374,6 +374,7 @@ extern struct gimple_opt_pass pass_graphite;
>> ?extern struct gimple_opt_pass pass_graphite_transforms;
>> ?extern struct gimple_opt_pass pass_if_conversion;
>> ?extern struct gimple_opt_pass pass_loop_distribution;
>> +extern struct gimple_opt_pass pass_flatten_loops;
>> ?extern struct gimple_opt_pass pass_vectorize;
>> ?extern struct gimple_opt_pass pass_slp_vectorize;
>> ?extern struct gimple_opt_pass pass_complete_unroll;
>>
>
> --
> Richard Guenther <rguenther@suse.de>
> Novell / SUSE Labs
> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 - GF: Markus Rex
>
From 37ae67187f681c9e5635c6c825997dd2a56cd179 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Fri, 17 Sep 2010 14:04:49 -0500
Subject: [PATCH] Loop flattening on loop-SSA.

2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>

	* Makefile.in (OBJS-common): Add tree-loop-flattening.o.
	(tree-loop-flattening.o): New.
	* common.opt (ftree-loop-flatten): New.
	* dbgcnt.def (lflat): New.
	* params.def (PARAM_LFLAT_MAX_NB_BBS): New.
	* passes.c (init_optimization_passes): Add new passes
	pass_flatten_loops and pass_if_conversion after loop vectorization
	and before pass_slp_vectorize.
	* timevar.def (TV_TREE_LOOP_FLATTENING): New.
	* tree-loop-flattening.c: New.
	* tree-pass.h (pass_flatten_loops): Declared.
	* tree-flow.h (gate_tree_if_conversion): Declared.
	(tree_if_conversion): Declared.
	* tree-if-conv.c (tree_if_conversion): Not static anymore.
	(gate_tree_if_conversion): Same.

	* gcc.dg/tree-ssa/flat-loop-1.c: New.
	* gcc.dg/tree-ssa/flat-loop-2.c: New.
	* gcc.dg/tree-ssa/flat-loop-3.c: New.
	* gcc.dg/tree-ssa/flat-loop-4.c: New.
---
 gcc/ChangeLog                                |   18 +
 gcc/Makefile.in                              |    4 +
 gcc/cfgloop.c                                |   10 +
 gcc/cfgloop.h                                |    1 +
 gcc/common.opt                               |    4 +
 gcc/dbgcnt.def                               |    1 +
 gcc/params.def                               |    7 +
 gcc/passes.c                                 |    1 +
 gcc/testsuite/ChangeLog                      |    7 +
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c  |   28 ++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-10.c |   61 +++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-11.c |   34 ++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-12.c |   35 ++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-13.c |   55 +++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-14.c |   55 +++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c  |   35 ++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c  |   19 +
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c  |   23 +
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-5.c  |   50 +++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-6.c  |   50 +++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-7.c  |   66 +++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-8.c  |   61 +++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-9.c  |   56 +++
 gcc/timevar.def                              |    1 +
 gcc/tree-flow.h                              |    3 +
 gcc/tree-if-conv.c                           |    2 +-
 gcc/tree-loop-flattening.c                   |  586 ++++++++++++++++++++++++++
 gcc/tree-pass.h                              |    1 +
 28 files changed, 1273 insertions(+), 1 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-10.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-11.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-12.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-13.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-14.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-5.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-6.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-7.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-8.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-9.c
 create mode 100644 gcc/tree-loop-flattening.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d360463..f912f93 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,23 @@
 2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
 
+	* Makefile.in (OBJS-common): Add tree-loop-flattening.o.
+	(tree-loop-flattening.o): New.
+	* common.opt (ftree-loop-flatten): New.
+	* dbgcnt.def (lflat): New.
+	* params.def (PARAM_LFLAT_MAX_NB_BBS): New.
+	* passes.c (init_optimization_passes): Add new passes
+	pass_flatten_loops and pass_if_conversion after loop vectorization
+	and before pass_slp_vectorize.
+	* timevar.def (TV_TREE_LOOP_FLATTENING): New.
+	* tree-loop-flattening.c: New.
+	* tree-pass.h (pass_flatten_loops): Declared.
+	* tree-flow.h (gate_tree_if_conversion): Declared.
+	(tree_if_conversion): Declared.
+	* tree-if-conv.c (tree_if_conversion): Not static anymore.
+	(gate_tree_if_conversion): Same.
+
+2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
+
 	* tree-if-conv.c (if_convertible_loop_p_1): Do not call
 	compute_data_dependences_for_loop.
 	(if_convertible_loop_p): Do not free refs and ddrs.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index cc58d7f..01fa1e8 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1362,6 +1362,7 @@ OBJS-common = \
 	tree-into-ssa.o \
 	tree-iterator.o \
 	tree-loop-distribution.o \
+	tree-loop-flattening.o \
 	tree-loop-linear.o \
 	tree-nested.o \
 	tree-nrv.o \
@@ -2767,6 +2768,9 @@ tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) coret
    $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
    $(TREE_PASS_H) $(TREE_DATA_REF_H) $(EXPR_H) \
    langhooks.h $(TREE_VECTORIZER_H)
+tree-loop-flattening.o: tree-loop-flattening.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+   $(TM_H) $(OPTABS_H) $(TREE_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \
+   $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(TREE_PASS_H) $(DBGCNT_H)
 tree-parloops.o: tree-parloops.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(TREE_FLOW_H) $(TREE_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) \
    $(DIAGNOSTIC_H) $(TREE_PASS_H) langhooks.h gt-tree-parloops.h \
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 08d689d..bfab67b 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -1297,6 +1297,16 @@ cancel_loop_tree (struct loop *loop)
   cancel_loop (loop);
 }
 
+/* Keep the loop structure for LOOP and remove all the loop structures
+   under LOOP.  */
+
+void
+cancel_subloops (loop_p loop)
+{
+  while (loop->inner)
+    cancel_loop_tree (loop->inner);
+}
+
 /* Checks that information about loops is correct
      -- sizes of loops are all right
      -- results of get_loop_body really belong to the loop
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index bf2614e..1679019 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -256,6 +256,7 @@ extern void add_bb_to_loop (basic_block, struct loop *);
 extern void remove_bb_from_loops (basic_block);
 
 extern void cancel_loop_tree (struct loop *);
+extern void cancel_subloops (struct loop *);
 extern void delete_loop (struct loop *);
 
 enum
diff --git a/gcc/common.opt b/gcc/common.opt
index 71f4578..49afc96 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1698,6 +1698,10 @@ ftree-loop-distribute-patterns
 Common Report Var(flag_tree_loop_distribute_patterns) Optimization
 Enable loop distribution for patterns transformed into a library call
 
+ftree-loop-flatten
+Common Report Var(flag_tree_loop_flattening) Optimization
+Enable loop flattening on trees
+
 ftree-loop-im
 Common Report Var(flag_tree_loop_im) Init(1) Optimization
 Enable loop invariant motion on trees
diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index 0492d66..0ef9a72 100644
--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -166,6 +166,7 @@ DEBUG_COUNTER (if_conversion_tree)
 DEBUG_COUNTER (if_after_combine)
 DEBUG_COUNTER (if_after_reload)
 DEBUG_COUNTER (local_alloc_for_sched)
+DEBUG_COUNTER (lflat)
 DEBUG_COUNTER (postreload_cse)
 DEBUG_COUNTER (pre)
 DEBUG_COUNTER (pre_insn)
diff --git a/gcc/params.def b/gcc/params.def
index 6e55db6..d7b5d16 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -794,6 +794,13 @@ DEFPARAM (PARAM_GRAPHITE_MAX_BBS_PER_FUNCTION,
 	  "maximum number of basic blocks per function to be analyzed by Graphite",
 	  100, 0, 0)
 
+/* Maximal number of basic blocks in a loop to be flattened.  */
+
+DEFPARAM (PARAM_LFLAT_MAX_NB_BBS,
+	  "lflat-max-nb-bbs",
+	  "maximum number of basic blocks in a loop to be flattened",
+	  100, 0, 0)
+
 /* Avoid doing loop invariant motion on very large loops.  */
 
 DEFPARAM (PARAM_LOOP_INVARIANT_MAX_BBS_IN_LOOP,
diff --git a/gcc/passes.c b/gcc/passes.c
index da9bb15..d276723 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -917,6 +917,7 @@ init_optimization_passes (void)
 	  NEXT_PASS (pass_parallelize_loops);
 	  NEXT_PASS (pass_loop_prefetch);
 	  NEXT_PASS (pass_iv_optimize);
+	  NEXT_PASS (pass_flatten_loops);
 	  NEXT_PASS (pass_tree_loop_done);
 	}
       NEXT_PASS (pass_cse_reciprocals);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c5c2473..b213477 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,12 @@
 2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
 
+	* gcc.dg/tree-ssa/flat-loop-1.c: New.
+	* gcc.dg/tree-ssa/flat-loop-2.c: New.
+	* gcc.dg/tree-ssa/flat-loop-3.c: New.
+	* gcc.dg/tree-ssa/flat-loop-4.c: New.
+
+2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
+
 	PR tree-optimization/46029
 	* g++.dg/tree-ssa/ifc-pr46029.C: New.
 	* gcc.dg/tree-ssa/ifc-8.c: New.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
new file mode 100644
index 0000000..bee8a2b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-flatten" } */
+
+struct stack_segment
+{
+  struct dynamic_allocation_blocks *dynamic_allocation;
+};
+struct dynamic_allocation_blocks
+{
+  struct dynamic_allocation_blocks *next;
+};
+static struct dynamic_allocation_blocks *
+merge_dynamic_blocks (struct dynamic_allocation_blocks *a,
+		      struct dynamic_allocation_blocks *b)
+{
+  struct dynamic_allocation_blocks **pp;
+  for (pp = &a->next; *pp != ((void *)0); pp = &(*pp)->next)
+    *pp = b;
+  return a;
+}
+__morestack_release_segments (struct stack_segment **pp, int free_dynamic)
+{
+  struct dynamic_allocation_blocks *ret;
+  struct stack_segment *pss;
+  pss = *pp;
+  while (pss != ((void *)0))
+    ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-10.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-10.c
new file mode 100644
index 0000000..56d14f9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-10.c
@@ -0,0 +1,61 @@
+/* From graphite/block-7.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 200
+
+int A[N][N], B[N][N], C[N][N];
+
+static void __attribute__((noinline))
+matmult (void)
+{
+  int i, j, k;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      {
+        A[i][j] = 0;
+        for (k = 0; k < N; k++)
+          A[i][j] += B[i][k] * C[k][j];
+      }
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res = 0;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      {
+	B[i][j] = j;
+	C[i][j] = i;
+      }
+
+  matmult ();
+
+  for (i = 0; i < N; i++)
+    res += A[i][i];
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 529340000)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 2 loops" 1 "lflat" } } */
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-11.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-11.c
new file mode 100644
index 0000000..68c5d49
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-11.c
@@ -0,0 +1,34 @@
+/* From graphite/run-id-1.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+void abort (void);
+
+void foo (int N)
+{
+  int i, j;
+  int x[1000][1000];
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      x[i][j] = i + j + 3;
+
+  /* This loop will not be flattened as the outermost loop has two
+     exit edges.  */
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      if (x[i][j] != i + j + 3)
+	abort ();
+}
+
+int main(void)
+{
+  foo (1000);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-12.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-12.c
new file mode 100644
index 0000000..eb0fb3d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-12.c
@@ -0,0 +1,35 @@
+/* From graphite/run-id-4.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+extern void abort (void);
+
+__attribute__ ((noinline)) int
+foo (int x, int y, int *z)
+{
+  int a, b, c, d;
+
+  a = b = 0;
+  for (d = 0; d < y; d++)
+    {
+      if (z)
+	b = d * *z;
+      for (c = 0; c < x; c++)
+	a += b;
+    }
+
+  return a;
+}
+
+int
+main (void)
+{
+  if (foo (3, 2, 0) != 0)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-13.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-13.c
new file mode 100644
index 0000000..1d04dbe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-13.c
@@ -0,0 +1,55 @@
+/* From graphite/run-id-5.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#include <stdarg.h>
+
+extern void abort ();
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (int n){
+  int i,j;
+  int sum;
+
+  if (n<=0)
+    return 0;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < n; j+=2) {
+      sum += j;
+    }
+    a[i] = sum + j;
+  }
+}
+
+int main (void)
+{
+  int i,j;
+  int sum;
+
+  for (i=0; i<N; i++)
+    a[i] = i;
+
+  foo (N);
+
+  /* This won't be flattened.  */
+  for (i=0; i<N; i++)
+    {
+      sum = 0;
+      for (j = 0; j < N; j+=2)
+        sum += j;
+      if (a[i] != sum + j)
+        abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-14.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-14.c
new file mode 100644
index 0000000..c87b893
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-14.c
@@ -0,0 +1,55 @@
+/* From graphite/run-id-6.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#include <stdarg.h>
+
+extern void abort ();
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (int n){
+  int i,j,k=0;
+  int sum;
+
+  if (n<=0)
+    return 0;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < n; j+=2) {
+      sum += k++;
+    }
+    a[i] = sum + j;
+  }
+}
+
+int main (void)
+{
+  int i,j,k=0;
+  int sum;
+
+  for (i=0; i<N; i++)
+    a[i] = i;
+
+  foo (N);
+
+  /* This is not flattened.  */
+  for (i=0; i<N; i++)
+    {
+      sum = 0;
+      for (j = 0; j < N; j+=2)
+        sum += k++;
+      if (a[i] != sum + j)
+	abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
new file mode 100644
index 0000000..4573801
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-flatten" } */
+
+struct stack_segment
+{
+  struct stack_segment *next;
+  struct dynamic_allocation_blocks *dynamic_allocation;
+};
+struct dynamic_allocation_blocks
+{
+  struct dynamic_allocation_blocks *next;
+};
+static struct dynamic_allocation_blocks *
+merge_dynamic_blocks (struct dynamic_allocation_blocks *a,
+        struct dynamic_allocation_blocks *b)
+{
+  struct dynamic_allocation_blocks **pp;
+  if (b == ((void *)0))
+  for (pp = &a->next; *pp != ((void *)0); pp = &(*pp)->next)
+    ;
+  return a;
+}
+__morestack_release_segments (struct stack_segment **pp, int free_dynamic)
+{
+  struct dynamic_allocation_blocks *ret;
+  struct stack_segment *pss;
+  while (pss != ((void *)0))
+    {
+      struct stack_segment *next;
+      next = pss->next;
+      if (free_dynamic)
+	ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
+      pss = next;
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
new file mode 100644
index 0000000..cf01273
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-flatten" } */
+
+
+int
+split_directories (const char *name, int *ptr_num_dirs)
+{
+  int num_dirs = 0;
+  char **dirs;
+  const char *p, *q;
+  int ch;
+  while ((ch = *p++) != '\0')
+    {
+      num_dirs++;
+      while (((*p) == '/'))
+	p++;
+    }
+  return (dirs[num_dirs - 1] == ((void *)0));
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
new file mode 100644
index 0000000..8e551ac
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-flatten" } */
+
+void
+formatted_backspace (int common, char *s)
+{
+  int base;
+  int n;
+  do
+    {
+      if (sseek (s, base, 0) < 0)
+	goto io_error;
+
+      while (n > 0)
+	{
+          n--;
+	  base += n + 1;
+	}
+    }
+  while (base != 0);
+ io_error:
+  generate_error (common, 0, ((void *)0));
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-5.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-5.c
new file mode 100644
index 0000000..24704fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-5.c
@@ -0,0 +1,50 @@
+/* From graphite/block-0.c.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 1000
+int a[N];
+
+static int __attribute__((noinline))
+foo (void)
+{
+  int j;
+  int i;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      a[j] = a[i] + 1;
+
+  return a[0];
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, res;
+
+  for (i = 0; i < N; i++)
+    a[i] = i;
+
+  res = foo ();
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 1999)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-6.c
new file mode 100644
index 0000000..8a5382f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-6.c
@@ -0,0 +1,50 @@
+/* From graphite/block-1.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define MAX 100
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j;
+  int sum = 0;
+  int A[MAX * MAX];
+  int B[MAX * MAX];
+
+  for (i = 0; i < MAX; i++)
+    for (j = 0; j < MAX; j++)
+      {
+	A[i*MAX + j] = j;
+	B[i*MAX + j] = j;
+      }
+
+  for (i = 0; i < MAX; i++)
+    for (j = 0; j < MAX; j++)
+      A[i*MAX + j] += B[j*MAX + i];
+
+  for(i = 0; i < MAX; i++)
+    for(j = 0; j < MAX; j++)
+      sum += A[i*MAX + j];
+
+#if DEBUG
+  fprintf (stderr, "sum = %d \n", sum);
+#endif
+
+  if (sum != 990000)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 3 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-7.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-7.c
new file mode 100644
index 0000000..3252545
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-7.c
@@ -0,0 +1,66 @@
+/* From graphite/block-3.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-timeout-factor 4.0 } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 24
+#define M 100
+
+int A[M][M][M], B[M][M], C[M][M];
+
+static int __attribute__((noinline))
+foo (void)
+{
+  int i, j, k;
+
+  /* These loops contain too few iterations to be blocked by 64.  */
+  for (i = 0; i < 24; i++)
+    for (j = 0; j < 24; j++)
+      for (k = 0; k < 24; k++)
+        A[i][j][k] = B[i][k] * C[k][j];
+
+  /* These loops should still be loop blocked.  */
+  for (i = 0; i < M; i++)
+    for (j = 0; j < M; j++)
+      for (k = 0; k < M; k++)
+        A[i][j][k] = B[i][k] * C[k][j];
+
+  return A[0][0][0] + A[M-1][M-1][M-1];
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res;
+
+  for (i = 0; i < M; i++)
+    for (j = 0; j < M; j++)
+      {
+	B[i][j] = i;
+	C[i][j] = j;
+      }
+
+  res = foo ();
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 9801)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 2 loops" 2 "lflat" } } */
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-8.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-8.c
new file mode 100644
index 0000000..388e721
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-8.c
@@ -0,0 +1,61 @@
+/* From graphite/block-5.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 200
+
+int a[N][N];
+int b[N][N];
+
+static int __attribute__((noinline))
+foo (void)
+{
+  int i, j;
+  int res = 0;
+
+  /* This loop nest should be blocked.  */
+  for (j = 1; j < N; j++)
+    for (i = 0; i < N; i++)
+      a[i][j] = a[i][j-1] + b[i][j];
+
+  for (i = 0; i < N; i++)
+    res += a[i][i];
+
+  return res;
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      {
+	a[i][j] = i + j;
+	b[i][j] = i - j;
+      }
+
+  res = foo ();
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 1333300)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 2 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-9.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-9.c
new file mode 100644
index 0000000..06a2667
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-9.c
@@ -0,0 +1,56 @@
+/* From graphite/block-6.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 200
+int a[N][N];
+
+static int __attribute__((noinline))
+foo (void)
+{
+  int i, j;
+  int res = 0;
+
+  /* Interchange is not legal for loops 0 and 1.  */
+  for (i = 1; i < N; i++)
+    for (j = 1; j < N - 1; j++)
+      a[i][j] = a[i-1][j+1] * a[i-1][j+1] / 2;
+
+  for (i = 0; i < N; i++)
+    res += a[i][i];
+
+  return res;
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      a[i][j] = i + j;
+
+  res = foo ();
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 204007516)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 2 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 86e2999..89ff8e8 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -152,6 +152,7 @@ DEFTIMEVAR (TV_GRAPHITE_DATA_DEPS    , "Graphite data dep analysis")
 DEFTIMEVAR (TV_GRAPHITE_CODE_GEN     , "Graphite code generation")
 DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
 DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
+DEFTIMEVAR (TV_TREE_LOOP_FLATTENING  , "tree loop flattening")
 DEFTIMEVAR (TV_CHECK_DATA_DEPS       , "tree check data dependences")
 DEFTIMEVAR (TV_TREE_PREFETCH	     , "tree prefetching")
 DEFTIMEVAR (TV_TREE_LOOP_IVOPTS	     , "tree iv optimization")
diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h
index c2702dc..a4bf6f8 100644
--- a/gcc/tree-flow.h
+++ b/gcc/tree-flow.h
@@ -730,6 +730,9 @@ bool contains_abnormal_ssa_name_p (tree);
 bool stmt_dominates_stmt_p (gimple, gimple);
 void mark_virtual_ops_for_renaming (gimple);
 
+/* In tree-if-conv.c */
+bool tree_if_conversion (struct loop *, tree *);
+
 /* In tree-ssa-dce.c */
 void mark_virtual_phi_result_for_renaming (gimple);
 
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index e3f5941..794be57 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -1598,7 +1598,7 @@ combine_blocks (struct loop *loop, tree *scratch_pad)
 /* If-convert LOOP when it is legal.  For the moment this pass has no
    profitability analysis.  Returns true when something changed.  */
 
-static bool
+bool
 tree_if_conversion (struct loop *loop, tree *scratch_pad)
 {
   bool changed = false;
diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
new file mode 100644
index 0000000..cd49be1
--- /dev/null
+++ b/gcc/tree-loop-flattening.c
@@ -0,0 +1,586 @@
+/* Loop flattening.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by Sebastian Pop <sebastian.pop@amd.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "output.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "toplev.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-pass.h"
+#include "gimple.h"
+#include "params.h"
+#include "dbgcnt.h"
+
+/* This loop flattening pass transforms backward pointing edges into
+   forward pointing edges.
+
+   The back-edge removal transformation was described in the 1983
+   paper by Allen J. R., Ken Kennedy, Carrie Porterfield, and Joe
+   Warren: "Conversion of control dependence to data dependence"
+   available from http://doi.acm.org/10.1145/567067.567085
+
+   The back-edge removal algorithm was presented in that paper as part
+   of the if-conversion algorithm for backward pointing edges.  In
+   this section we will first provide a description of this technique
+   adapted for the Gimple-SSA form, followed by an example, and a
+   discussion of the differences with the higher level loop flattening
+   transformation.
+
+   The back-edge removal algorithm transforms control dependences into
+   data dependences by using a boolean variable.  The values taken by
+   the boolean variable control the execution path of the forward
+   edges created in order to use the back-edge of an outer loop.
+
+   The first step of the algorithm detects a surrounding loop and all
+   the back-edges of the loop body: these back-edges can be inner
+   loops or strongly connected components of the CFG that cannot be
+   reduced to natural loops.
+
+   Each back-edge is removed by redirecting the target of the
+   back-edge to the latch basic block of the surrounding loop.  A
+   boolean variable is created in the latch.  It is cleared when the
+   redirected back-edge is taken and it is set to true for any other
+   paths leading to the latch.
+
+   The header basic block of the surrounding loop is split before its
+   statements and a new condition is added based on the control
+   variable: when the control variable is set to true, the execution
+   proceeds as normal to the basic block that contains the statements
+   of the header; when the control variable is cleared, meaning that
+   the back-edge has been taken, the execution proceeds to the point
+   where the redirected back-edge was pointing.
+
+   The last step updates the SSA form after all the back-edges have
+   been redirected to the latch, and the new edges from the header to
+   the destination of back-edges have been created.
+
+   Another description of loop flattening in a very Fortran specific
+   way is in the 1992 paper by Reinhard von Hanxleden and Ken Kennedy:
+   "Relaxing SIMD Control Flow Constraints using Loop Transformations"
+   available from
+   http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033 */
+
+/* Before creating other phi nodes in LOOP->header for the control
+   flags, update the phi nodes of LOOP->header and add the necessary
+   phi nodes in the LOOP->latch that now contains several paths on
+   which the values are not updated.  PRED_E is the single edge that
+   was pointing to the LOOP->latch basic block before inner back-edges
+   were redirected to the LOOP->latch.  */
+
+static void
+update_loop_phi_nodes (loop_p loop, edge pred_e)
+{
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      edge e;
+      edge_iterator ei;
+      gimple phi = gsi_stmt (gsi);
+      tree back_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
+      tree res = gimple_phi_result (phi);
+      tree var = SSA_NAME_VAR (res);
+
+      phi = create_phi_node (var, loop->latch);
+
+      FOR_EACH_EDGE (e, ei, loop->latch->preds)
+	add_phi_arg (phi, (e == pred_e ? back_arg : res),
+		     e, UNKNOWN_LOCATION);
+
+      res = gimple_phi_result (phi);
+      add_phi_arg (gsi_stmt (gsi), res, loop_latch_edge (loop),
+		   UNKNOWN_LOCATION);
+    }
+}
+
+/* Creates a control flag for the FORWARDED_EDGE that represents the
+   back-edge that has been forwarded to the latch basic block of LOOP.
+   INNER_BODY is the basic block to which the back-edge was pointing
+   before redirection.  This function creates a boolean control flag
+   that is cleared when the FORWARDED_EDGE is taken and set for all
+   the other paths.  This function adds the corresponding phi nodes in
+   LOOP->latch and LOOP->header, and finally adds an edge from
+   LOOP->header to the INNER_BODY guarded by the control flag.  */
+
+static void
+create_control_flag (edge forwarded_edge, loop_p loop, basic_block inner_body)
+{
+  edge e, preheader;
+  edge outer_latch_e = loop_latch_edge (loop);
+  const char *name = "_flat_";
+  tree var = create_tmp_reg (boolean_type_node, name);
+  tree res;
+  gimple phi, cond_stmt;
+  gimple_stmt_iterator gsi;
+  edge_iterator ei;
+
+  /* Adds a control variable for the redirected FORWARDED_EDGE.  */
+  add_referenced_var (var);
+  phi = create_phi_node (var, forwarded_edge->dest);
+
+  FOR_EACH_EDGE (e, ei, outer_latch_e->src->preds)
+    add_phi_arg (phi, (e == forwarded_edge
+		       ? boolean_false_node
+		       : boolean_true_node),
+		 e, UNKNOWN_LOCATION);
+  res = gimple_phi_result (phi);
+
+  /* Add a phi node in LOOP->header for the control variable.  */
+  phi = create_phi_node (var, loop->header);
+
+  preheader = loop_preheader_edge (loop);
+  FOR_EACH_EDGE (e, ei, loop->header->preds)
+    add_phi_arg (phi, (e == preheader
+		       ? boolean_true_node
+		       : res),
+		 e, UNKNOWN_LOCATION);
+  res = gimple_phi_result (phi);
+
+  /* Split LOOP->header to insert the control variable condition.  */
+  e = split_block_after_labels (loop->header);
+  e->flags = EDGE_TRUE_VALUE;
+  e = make_edge (loop->header, inner_body, EDGE_FALSE_VALUE);
+  cond_stmt = gimple_build_cond (EQ_EXPR, res, boolean_true_node,
+				 NULL_TREE, NULL_TREE);
+  gsi = gsi_last_bb (loop->header);
+  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
+}
+
+/* Adds phi nodes to the LOOP->header and LOOP->latch for the ssa_name
+   NAME.  ARG is the argument of the latch phi node set for the
+   FORWARDED_EDGE, and all the other edges merged by the latch phi
+   node are set to the result of the LOOP->header phi node.  The latch
+   edge of the LOOP->header phi node is set to the result of the
+   LOOP->latch phi node, and the other argument is set to an arbitrary
+   valid value defined before the loop (note that this initial value
+   is never used in the loop).  Returns the LOOP->header phi result.  */
+
+static tree
+add_header_and_latch_phis (loop_p loop, tree name, edge forwarded_edge,
+			   tree arg)
+{
+  edge e;
+  edge_iterator ei;
+  tree res, zero, var = SSA_NAME_VAR (name);
+  gimple loop_phi = create_phi_node (var, loop->header);
+  gimple latch_phi = create_phi_node (var, loop->latch);
+
+  /* The value set to ZERO will never be used in the loop, however we
+     have to construct something meaningful for virtual SSA_NAMEs.  */
+  if (TREE_CODE (arg) != SSA_NAME)
+    zero = arg;
+  else if (is_gimple_reg (arg))
+    zero = build_zero_cst (TREE_TYPE (arg));
+  else
+    {
+      zero = gimple_vop (cfun);
+      mark_sym_for_renaming (gimple_vop (cfun));
+    }
+
+  res = gimple_phi_result (latch_phi);
+  FOR_EACH_EDGE (e, ei, loop->header->preds)
+    add_phi_arg (loop_phi, (e == loop_latch_edge (loop) ? res : zero),
+		 e, UNKNOWN_LOCATION);
+
+  res = gimple_phi_result (loop_phi);
+  FOR_EACH_EDGE (e, ei, loop->latch->preds)
+    add_phi_arg (latch_phi, (e == forwarded_edge ? arg : res),
+		 e, UNKNOWN_LOCATION);
+
+  return res;
+}
+
+/* Creates phi nodes for each inductive definition, i.e., loop phi
+   nodes.  For each induction phi node in the old loop header, i.e.,
+   in the single_succ (INNER_BODY), insert a phi node in the
+   LOOP->latch that takes the updated value of the induction on the
+   FORWARDED_EDGE, and maintains the same value as in the phi node of
+   the LOOP->header for all the other possible paths reaching
+   LOOP->latch.  This function has to be called after all the
+   back-edges have been redirected.  */
+
+static void
+update_inner_induction_phi_nodes (edge forwarded_edge, loop_p loop,
+				  basic_block inner_body)
+{
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start_phis (single_succ (inner_body));
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple old_loop_phi = gsi_stmt (gsi);
+      tree back_arg = PHI_ARG_DEF_FROM_EDGE (old_loop_phi,
+					     single_succ_edge (inner_body));
+      tree res = gimple_phi_result (old_loop_phi);
+
+      res = add_header_and_latch_phis (loop, res, forwarded_edge, back_arg);
+      add_phi_arg (old_loop_phi, res, single_succ_edge (inner_body),
+		   UNKNOWN_LOCATION);
+    }
+}
+
+/* Renames all the uses of OLD_NAME with NEW_NAME (except the phi
+   nodes of DEF_BB) in all the basic blocks dominated by DEF_BB and in
+   the arguments of all the phi nodes originating in a basic block
+   that is dominated by DEF_BB.  */
+
+static void
+rename_dominated_uses (tree old_name, tree new_name, basic_block def_bb)
+{
+  imm_use_iterator uit;
+  gimple stmt;
+  use_operand_p use_p;
+
+  FOR_EACH_IMM_USE_STMT (stmt, uit, old_name)
+    {
+      enum gimple_code code = gimple_code (stmt);
+      basic_block use_bb = gimple_bb (stmt);
+      edge_iterator ei;
+      edge e;
+
+      if (code == GIMPLE_PHI)
+	{
+	  FOR_EACH_EDGE (e, ei, use_bb->preds)
+	    if (PHI_ARG_DEF_FROM_EDGE (stmt, e) == old_name
+		&& dominated_by_p (CDI_DOMINATORS, e->src, def_bb)
+		&& use_bb != def_bb)
+	      SET_USE (gimple_phi_arg_imm_use_ptr (stmt, e->dest_idx), new_name);
+	}
+      else
+	{
+	  if (!dominated_by_p (CDI_DOMINATORS, use_bb, def_bb))
+	    continue;
+
+	  FOR_EACH_IMM_USE_ON_STMT (use_p, uit)
+	    SET_USE (use_p, new_name);
+	}
+    }
+}
+
+/* Helper function for add_missing_phi_nodes_1.  Adds to LOOP all the
+   missing phi nodes for NAME and updates the arguments of the
+   LATCH_PHI node.  LOOP_PHI node is the inductive definition of NAME
+   in LOOP->header.  */
+
+static void
+add_missing_phi_nodes_2 (loop_p loop, tree name, tree old_name,
+			 VEC (gimple, heap) *phis, basic_block *bbs)
+{
+  basic_block dom_bb = gimple_bb (SSA_NAME_DEF_STMT (name));
+  int i, n = loop->num_nodes;
+
+  for (i = 0; i < n; i++)
+    {
+      edge e;
+      edge_iterator ei;
+      basic_block bb = bbs[i];
+
+      if (bb == loop->latch
+	  || !dominated_by_p (CDI_DOMINATORS, bb, dom_bb))
+	continue;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	{
+	  gimple phi = VEC_index (gimple, phis, e->dest->index);
+
+	  if (phi)
+	    add_phi_arg (phi, name, e, UNKNOWN_LOCATION);
+
+	  else if (!single_pred_p (e->dest)
+		   && !dominated_by_p (CDI_DOMINATORS, e->dest, dom_bb)
+		   && e->dest->loop_father == loop)
+	  {
+	    tree var = SSA_NAME_VAR (name);
+
+	    phi = create_phi_node (var, e->dest);
+	    VEC_replace (gimple, phis, e->dest->index, phi);
+	    add_phi_arg (phi, name, e, UNKNOWN_LOCATION);
+	    rename_dominated_uses (old_name, gimple_phi_result (phi),
+				   e->dest);
+	    add_missing_phi_nodes_2 (loop, gimple_phi_result (phi), old_name,
+				     phis, bbs);
+	  }
+	}
+    }
+}
+
+/* Helper function for add_missing_phi_nodes.  For all the definitions
+   of DEF_STMT add the missing phi nodes in LOOP.  */
+
+static void
+add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt, basic_block *bbs)
+{
+  def_operand_p def_p;
+  ssa_op_iter op_iter;
+  basic_block bb = gimple_bb (def_stmt);
+
+  FOR_EACH_PHI_OR_STMT_DEF (def_p, def_stmt, op_iter, SSA_OP_DEF|SSA_OP_VDEF)
+    {
+      edge e;
+      edge_iterator ei;
+      tree res, zero, var;
+      gimple loop_phi, latch_phi, use_stmt, phi;
+      imm_use_iterator uit;
+      tree name = DEF_FROM_PTR (def_p);
+      bool needs_update = false;
+      VEC (gimple, heap) *phis;
+      int i;
+
+      FOR_EACH_IMM_USE_STMT (use_stmt, uit, name)
+	{
+	  basic_block use_bb = gimple_bb (use_stmt);
+
+	  if (!dominated_by_p (CDI_DOMINATORS, bb, use_bb))
+	    {
+	      needs_update = true;
+	      BREAK_FROM_IMM_USE_STMT (uit);
+	    }
+	}
+
+      if (!needs_update)
+	continue;
+
+      var = SSA_NAME_VAR (name);
+      loop_phi = create_phi_node (var, loop->header);
+      latch_phi = create_phi_node (var, loop->latch);
+
+      /* The value set to ZERO will never be used in the loop, however we
+	 have to construct something meaningful for virtual SSA_NAMEs.  */
+      if (is_gimple_reg (name))
+	zero = build_zero_cst (TREE_TYPE (name));
+      else
+	{
+	  zero = gimple_vop (cfun);
+	  mark_sym_for_renaming (gimple_vop (cfun));
+	}
+
+      res = gimple_phi_result (latch_phi);
+      FOR_EACH_EDGE (e, ei, loop->header->preds)
+	add_phi_arg (loop_phi, (e == loop_latch_edge (loop) ? res : zero),
+		     e, UNKNOWN_LOCATION);
+
+      res = gimple_phi_result (loop_phi);
+      FOR_EACH_EDGE (e, ei, loop->latch->preds)
+	add_phi_arg (latch_phi, res, e, UNKNOWN_LOCATION);
+
+      phis = VEC_alloc (gimple, heap, n_basic_blocks);
+      VEC_safe_grow_cleared (gimple, heap, phis, n_basic_blocks);
+
+      VEC_replace (gimple, phis, loop->latch->index, latch_phi);
+      VEC_replace (gimple, phis, loop->header->index, loop_phi);
+      add_missing_phi_nodes_2 (loop, name, name, phis, bbs);
+
+      FOR_EACH_VEC_ELT (gimple, phis, i, phi)
+	{
+	  if (!phi)
+	    continue;
+
+	  FOR_EACH_EDGE (e, ei, BASIC_BLOCK (i)->preds)
+	    if (!PHI_ARG_DEF_FROM_EDGE (phi, e))
+	      add_phi_arg (phi, res, e, UNKNOWN_LOCATION);
+	}
+
+      VEC_free (gimple, heap, phis);
+    }
+}
+
+/* Walks over the code of LOOP and adds the missing phi nodes at
+   control flow junctions.  When a variable is defined in an outer
+   loop and used in an inner loop, the definition dominates the use.
+   After the loop flattening, the inner loop body is directly
+   reachable from the LOOP->header by using the added edge guarded by
+   the boolean flag that controls the execution of the back-edge that
+   was eliminated.  In this case, the use is not dominated by the
+   definition, and this function adds the missing phi nodes.  */
+
+static void
+add_missing_phi_nodes (loop_p loop)
+{
+  gimple_stmt_iterator gsi;
+  int i, n = loop->num_nodes;
+  basic_block *bbs = get_loop_body (loop);
+
+  for (i = 0; i < n; i++)
+    {
+      basic_block bb = bbs[i];
+
+      /* LOOP->header dominates all the blocks of the loop body, and
+	 so we don't have to look at the missing phi nodes for the
+	 definitions of LOOP->header.  */
+      if (bb == loop->header)
+	continue;
+
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	if (!gimple_nop_p (gsi_stmt (gsi)))
+	  add_missing_phi_nodes_1 (loop, gsi_stmt (gsi), bbs);
+
+      for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	add_missing_phi_nodes_1 (loop, gsi_stmt (gsi), bbs);
+    }
+
+  free (bbs);
+}
+
+/* Removes all the back-edges of LOOP except its own back-edge.
+   SCRATCH_PAD is used in if-conversion.  */
+
+static unsigned
+flatten_loop (loop_p loop, tree *scratch_pad)
+{
+  int i, n = loop->num_nodes;
+  basic_block *bbs;
+  VEC (edge, heap) *back_edges;
+  VEC (basic_block, heap) *loop_body;
+  edge_iterator ei;
+  edge e, pred_e;
+  unsigned max_nb_basic_blocks = PARAM_VALUE (PARAM_LFLAT_MAX_NB_BBS);;
+
+  if (loop->num_nodes > max_nb_basic_blocks
+      || !single_exit (loop)
+      || !dbg_cnt (lflat))
+    return 0;
+
+  mark_dfs_back_edges ();
+  bbs = get_loop_body (loop);
+
+  back_edges = VEC_alloc (edge, heap, 3);
+  loop_body = VEC_alloc (basic_block, heap, 3);
+
+  for (i = 0; i < n; i++)
+    FOR_EACH_EDGE (e, ei, bbs[i]->succs)
+      if (e->flags & EDGE_DFS_BACK
+	  && e->src != loop->latch)
+	VEC_safe_push (edge, heap, back_edges, e);
+
+  free (bbs);
+
+  /* Early return and do not modify the code when there are no back
+     edges.  */
+  if (VEC_empty (edge, back_edges))
+    return 0;
+
+  cancel_subloops (loop);
+
+  /* Split the latch edge to make sure that the latch basic block does
+     not contain code.  */
+  loop->latch = split_edge (loop_latch_edge (loop));
+  pred_e = single_pred_edge (loop->latch);
+
+  FOR_EACH_VEC_ELT (edge, back_edges, i, e)
+    {
+      basic_block dest = split_edge (e);
+
+      /* Redirect BACK_EDGE to LOOP->latch.  */
+      redirect_edge_and_branch_force (e, loop->latch);
+
+      /* Save the basic block where it was pointing.  */
+      VEC_safe_push (basic_block, heap, loop_body, dest);
+    }
+
+  update_loop_phi_nodes (loop, pred_e);
+
+  FOR_EACH_VEC_ELT (edge, back_edges, i, e)
+    create_control_flag (e, loop, VEC_index (basic_block, loop_body, i));
+
+  FOR_EACH_VEC_ELT (edge, back_edges, i, e)
+    update_inner_induction_phi_nodes (e, loop, VEC_index (basic_block,
+							  loop_body, i));
+
+  free_dominance_info (CDI_DOMINATORS);
+  calculate_dominance_info (CDI_DOMINATORS);
+  add_missing_phi_nodes (loop);
+
+  /* If we redirected some back-edges, split the latch edge to create
+     an empty LOOP->latch.  */
+  if (!single_pred_p (loop->latch))
+    loop->latch = split_edge (loop_latch_edge (loop));
+
+  tree_if_conversion (loop, scratch_pad);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "Flattened %d loops.\n", VEC_length (edge, back_edges));
+
+  VEC_free (edge, heap, back_edges);
+  VEC_free (basic_block, heap, loop_body);
+
+  return TODO_cleanup_cfg;
+}
+
+/* Flattens all the loops of the current function.  */
+
+static unsigned int
+tree_loop_flattening (void)
+{
+  unsigned todo = 0;
+  loop_p loop;
+  loop_iterator li;
+  tree scratch_pad = NULL_TREE;
+
+  if (number_of_loops () <= 1)
+    return 0;
+
+  FOR_EACH_LOOP (li, loop, 0)
+    todo |= flatten_loop (loop, &scratch_pad);
+
+#ifdef ENABLE_CHECKING
+  verify_dominators (CDI_DOMINATORS);
+  verify_flow_info ();
+#endif
+
+  return todo;
+}
+
+static bool
+gate_tree_loop_flattening (void)
+{
+  return flag_tree_loop_flattening != 0;
+}
+
+struct gimple_opt_pass pass_flatten_loops =
+{
+ {
+  GIMPLE_PASS,
+  "lflat",				/* name */
+  gate_tree_loop_flattening,		/* gate */
+  tree_loop_flattening,       		/* execute */
+  NULL,					/* sub */
+  NULL,					/* next */
+  0,					/* static_pass_number */
+  TV_TREE_LOOP_FLATTENING,  		/* tv_id */
+  PROP_cfg | PROP_ssa,			/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_dump_func
+  | TODO_verify_ssa
+  | TODO_update_ssa
+  | TODO_ggc_collect			/* todo_flags_finish */
+ }
+};
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index a87a770..e2f257f 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -374,6 +374,7 @@ extern struct gimple_opt_pass pass_graphite;
 extern struct gimple_opt_pass pass_graphite_transforms;
 extern struct gimple_opt_pass pass_if_conversion;
 extern struct gimple_opt_pass pass_loop_distribution;
+extern struct gimple_opt_pass pass_flatten_loops;
 extern struct gimple_opt_pass pass_vectorize;
 extern struct gimple_opt_pass pass_slp_vectorize;
 extern struct gimple_opt_pass pass_complete_unroll;
-- 
1.7.0.4

From f7bd38c2245b8adf7b704208af990fea52ff66be Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 14:28:36 -0600
Subject: [PATCH 02/12] Add functional tests for loop flattening.

---
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-10.c |   61 ++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-11.c |   34 +++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-12.c |   35 ++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-13.c |   55 +++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-14.c |   55 +++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c  |    8 +--
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c  |    6 +-
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-5.c  |   50 +++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-6.c  |   50 +++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-7.c  |   66 ++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-8.c  |   61 ++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-9.c  |   56 ++++++++++++++++++++++
 gcc/tree-loop-flattening.c                   |    3 +
 13 files changed, 531 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-10.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-11.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-12.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-13.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-14.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-5.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-6.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-7.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-8.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-9.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-10.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-10.c
new file mode 100644
index 0000000..56d14f9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-10.c
@@ -0,0 +1,61 @@
+/* From graphite/block-7.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 200
+
+int A[N][N], B[N][N], C[N][N];
+
+static void __attribute__((noinline))
+matmult (void)
+{
+  int i, j, k;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      {
+        A[i][j] = 0;
+        for (k = 0; k < N; k++)
+          A[i][j] += B[i][k] * C[k][j];
+      }
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res = 0;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      {
+	B[i][j] = j;
+	C[i][j] = i;
+      }
+
+  matmult ();
+
+  for (i = 0; i < N; i++)
+    res += A[i][i];
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 529340000)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 2 loops" 1 "lflat" } } */
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-11.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-11.c
new file mode 100644
index 0000000..68c5d49
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-11.c
@@ -0,0 +1,34 @@
+/* From graphite/run-id-1.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+void abort (void);
+
+void foo (int N)
+{
+  int i, j;
+  int x[1000][1000];
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      x[i][j] = i + j + 3;
+
+  /* This loop will not be flattened as the outermost loop has two
+     exit edges.  */
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      if (x[i][j] != i + j + 3)
+	abort ();
+}
+
+int main(void)
+{
+  foo (1000);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-12.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-12.c
new file mode 100644
index 0000000..eb0fb3d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-12.c
@@ -0,0 +1,35 @@
+/* From graphite/run-id-4.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+extern void abort (void);
+
+__attribute__ ((noinline)) int
+foo (int x, int y, int *z)
+{
+  int a, b, c, d;
+
+  a = b = 0;
+  for (d = 0; d < y; d++)
+    {
+      if (z)
+	b = d * *z;
+      for (c = 0; c < x; c++)
+	a += b;
+    }
+
+  return a;
+}
+
+int
+main (void)
+{
+  if (foo (3, 2, 0) != 0)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-13.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-13.c
new file mode 100644
index 0000000..1d04dbe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-13.c
@@ -0,0 +1,55 @@
+/* From graphite/run-id-5.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#include <stdarg.h>
+
+extern void abort ();
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (int n){
+  int i,j;
+  int sum;
+
+  if (n<=0)
+    return 0;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < n; j+=2) {
+      sum += j;
+    }
+    a[i] = sum + j;
+  }
+}
+
+int main (void)
+{
+  int i,j;
+  int sum;
+
+  for (i=0; i<N; i++)
+    a[i] = i;
+
+  foo (N);
+
+  /* This won't be flattened.  */
+  for (i=0; i<N; i++)
+    {
+      sum = 0;
+      for (j = 0; j < N; j+=2)
+        sum += j;
+      if (a[i] != sum + j)
+        abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-14.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-14.c
new file mode 100644
index 0000000..c87b893
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-14.c
@@ -0,0 +1,55 @@
+/* From graphite/run-id-6.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#include <stdarg.h>
+
+extern void abort ();
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (int n){
+  int i,j,k=0;
+  int sum;
+
+  if (n<=0)
+    return 0;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < n; j+=2) {
+      sum += k++;
+    }
+    a[i] = sum + j;
+  }
+}
+
+int main (void)
+{
+  int i,j,k=0;
+  int sum;
+
+  for (i=0; i<N; i++)
+    a[i] = i;
+
+  foo (N);
+
+  /* This is not flattened.  */
+  for (i=0; i<N; i++)
+    {
+      sum = 0;
+      for (j = 0; j < N; j+=2)
+        sum += k++;
+      if (a[i] != sum + j)
+	abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
index a7287fb..4573801 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
@@ -28,12 +28,8 @@ __morestack_release_segments (struct stack_segment **pp, int free_dynamic)
     {
       struct stack_segment *next;
       next = pss->next;
- {
-   if (free_dynamic)
-     {
-       ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
-     }
- }
+      if (free_dynamic)
+	ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
       pss = next;
     }
 }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
index d3d66ab..cf01273 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
@@ -11,9 +11,9 @@ split_directories (const char *name, int *ptr_num_dirs)
   int ch;
   while ((ch = *p++) != '\0')
     {
-   num_dirs++;
-   while (((*p) == '/'))
-     p++;
+      num_dirs++;
+      while (((*p) == '/'))
+	p++;
     }
   return (dirs[num_dirs - 1] == ((void *)0));
 }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-5.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-5.c
new file mode 100644
index 0000000..24704fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-5.c
@@ -0,0 +1,50 @@
+/* From graphite/block-0.c.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 1000
+int a[N];
+
+static int __attribute__((noinline))
+foo (void)
+{
+  int j;
+  int i;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      a[j] = a[i] + 1;
+
+  return a[0];
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, res;
+
+  for (i = 0; i < N; i++)
+    a[i] = i;
+
+  res = foo ();
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 1999)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-6.c
new file mode 100644
index 0000000..8a5382f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-6.c
@@ -0,0 +1,50 @@
+/* From graphite/block-1.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define MAX 100
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j;
+  int sum = 0;
+  int A[MAX * MAX];
+  int B[MAX * MAX];
+
+  for (i = 0; i < MAX; i++)
+    for (j = 0; j < MAX; j++)
+      {
+	A[i*MAX + j] = j;
+	B[i*MAX + j] = j;
+      }
+
+  for (i = 0; i < MAX; i++)
+    for (j = 0; j < MAX; j++)
+      A[i*MAX + j] += B[j*MAX + i];
+
+  for(i = 0; i < MAX; i++)
+    for(j = 0; j < MAX; j++)
+      sum += A[i*MAX + j];
+
+#if DEBUG
+  fprintf (stderr, "sum = %d \n", sum);
+#endif
+
+  if (sum != 990000)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 3 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-7.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-7.c
new file mode 100644
index 0000000..3252545
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-7.c
@@ -0,0 +1,66 @@
+/* From graphite/block-3.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-timeout-factor 4.0 } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 24
+#define M 100
+
+int A[M][M][M], B[M][M], C[M][M];
+
+static int __attribute__((noinline))
+foo (void)
+{
+  int i, j, k;
+
+  /* These loops contain too few iterations to be blocked by 64.  */
+  for (i = 0; i < 24; i++)
+    for (j = 0; j < 24; j++)
+      for (k = 0; k < 24; k++)
+        A[i][j][k] = B[i][k] * C[k][j];
+
+  /* These loops should still be loop blocked.  */
+  for (i = 0; i < M; i++)
+    for (j = 0; j < M; j++)
+      for (k = 0; k < M; k++)
+        A[i][j][k] = B[i][k] * C[k][j];
+
+  return A[0][0][0] + A[M-1][M-1][M-1];
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res;
+
+  for (i = 0; i < M; i++)
+    for (j = 0; j < M; j++)
+      {
+	B[i][j] = i;
+	C[i][j] = j;
+      }
+
+  res = foo ();
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 9801)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 2 loops" 2 "lflat" } } */
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 1 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-8.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-8.c
new file mode 100644
index 0000000..388e721
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-8.c
@@ -0,0 +1,61 @@
+/* From graphite/block-5.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 200
+
+int a[N][N];
+int b[N][N];
+
+static int __attribute__((noinline))
+foo (void)
+{
+  int i, j;
+  int res = 0;
+
+  /* This loop nest should be blocked.  */
+  for (j = 1; j < N; j++)
+    for (i = 0; i < N; i++)
+      a[i][j] = a[i][j-1] + b[i][j];
+
+  for (i = 0; i < N; i++)
+    res += a[i][i];
+
+  return res;
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      {
+	a[i][j] = i + j;
+	b[i][j] = i - j;
+      }
+
+  res = foo ();
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 1333300)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 2 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-9.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-9.c
new file mode 100644
index 0000000..06a2667
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-9.c
@@ -0,0 +1,56 @@
+/* From graphite/block-6.c.  */
+
+/* { dg-require-effective-target size32plus } */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-flatten -fdump-tree-lflat-all" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 200
+int a[N][N];
+
+static int __attribute__((noinline))
+foo (void)
+{
+  int i, j;
+  int res = 0;
+
+  /* Interchange is not legal for loops 0 and 1.  */
+  for (i = 1; i < N; i++)
+    for (j = 1; j < N - 1; j++)
+      a[i][j] = a[i-1][j+1] * a[i-1][j+1] / 2;
+
+  for (i = 0; i < N; i++)
+    res += a[i][i];
+
+  return res;
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j, res;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      a[i][j] = i + j;
+
+  res = foo ();
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 204007516)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Flattened 1 loops" 2 "lflat" } } */
+/* { dg-final { cleanup-tree-dump "lflat" } } */
diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index 56211b4..1f887a2 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -571,6 +571,9 @@ flatten_loop (loop_p loop, tree *scratch_pad)
   if (gate_tree_if_conversion ())
     tree_if_conversion (loop, scratch_pad);
 
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "Flattened %d loops.\n", VEC_length (edge, back_edges));
+
   return TODO_update_ssa | TODO_verify_ssa;
 }
 
-- 
1.7.0.4

From af6e13e63236bf7a1c3cb4b8cee7839dae758298 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Mon, 15 Nov 2010 16:59:44 -0600
Subject: [PATCH 03/12] Unconditionally call tree_if_conversion.

---
 gcc/tree-flow.h            |    1 -
 gcc/tree-if-conv.c         |    2 +-
 gcc/tree-loop-flattening.c |    3 +--
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h
index e1ee69f..a4bf6f8 100644
--- a/gcc/tree-flow.h
+++ b/gcc/tree-flow.h
@@ -731,7 +731,6 @@ bool stmt_dominates_stmt_p (gimple, gimple);
 void mark_virtual_ops_for_renaming (gimple);
 
 /* In tree-if-conv.c */
-bool gate_tree_if_conversion (void);
 bool tree_if_conversion (struct loop *, tree *);
 
 /* In tree-ssa-dce.c */
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index eaef273..794be57 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -1661,7 +1661,7 @@ main_tree_if_conversion (void)
 
 /* Returns true when the if-conversion pass is enabled.  */
 
-bool
+static bool
 gate_tree_if_conversion (void)
 {
   return ((flag_tree_vectorize && flag_tree_loop_if_convert != 0)
diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index 1f887a2..1d22dc6 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -568,8 +568,7 @@ flatten_loop (loop_p loop, tree *scratch_pad)
   if (!single_pred_p (loop->latch))
     loop->latch = split_edge (loop_latch_edge (loop));
 
-  if (gate_tree_if_conversion ())
-    tree_if_conversion (loop, scratch_pad);
+  tree_if_conversion (loop, scratch_pad);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Flattened %d loops.\n", VEC_length (edge, back_edges));
-- 
1.7.0.4

From be8156967aaee764448fe528b64604df39c4c2de Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 13:24:07 -0600
Subject: [PATCH 04/12] Use simplified version of cancel_subloops.

---
 gcc/cfgloop.c              |   10 ++++++++++
 gcc/cfgloop.h              |    1 +
 gcc/tree-loop-flattening.c |   19 -------------------
 3 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 08d689d..bfab67b 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -1297,6 +1297,16 @@ cancel_loop_tree (struct loop *loop)
   cancel_loop (loop);
 }
 
+/* Keep the loop structure for LOOP and remove all the loop structures
+   under LOOP.  */
+
+void
+cancel_subloops (loop_p loop)
+{
+  while (loop->inner)
+    cancel_loop_tree (loop->inner);
+}
+
 /* Checks that information about loops is correct
      -- sizes of loops are all right
      -- results of get_loop_body really belong to the loop
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index bf2614e..1679019 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -256,6 +256,7 @@ extern void add_bb_to_loop (basic_block, struct loop *);
 extern void remove_bb_from_loops (basic_block);
 
 extern void cancel_loop_tree (struct loop *);
+extern void cancel_subloops (struct loop *);
 extern void delete_loop (struct loop *);
 
 enum
diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index 1d22dc6..b36c563 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -87,25 +87,6 @@ along with GCC; see the file COPYING3.  If not see
    available from
    http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033 */
 
-/* Keep the loop structure for LOOP and remove all the loop structures
-   under LOOP.  */
-
-static void
-cancel_subloops (loop_p loop)
-{
-  int i;
-  loop_p li;
-  VEC (loop_p, heap) *lv = VEC_alloc (loop_p, heap, 3);
-
-  for (li = loop->inner; li; li = li->next)
-    VEC_safe_push (loop_p, heap, lv, li);
-
-  FOR_EACH_VEC_ELT (loop_p, lv, i, li)
-    cancel_loop_tree (li);
-
-  VEC_free (loop_p, heap, lv);
-}
-
 /* Before creating other phi nodes in LOOP->header for the control
    flags, update the phi nodes of LOOP->header and add the necessary
    phi nodes in the LOOP->latch that now contains several paths on
-- 
1.7.0.4

From 831866b109d358441069aa8fef34ff2a43f83259 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 12:28:04 -0600
Subject: [PATCH 05/12] Do not call create_new_def_for

---
 gcc/tree-loop-flattening.c |   18 ------------------
 1 files changed, 0 insertions(+), 18 deletions(-)

diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index b36c563..d28117e 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -109,8 +109,6 @@ update_loop_phi_nodes (loop_p loop, edge pred_e)
       tree var = SSA_NAME_VAR (res);
 
       phi = create_phi_node (var, loop->latch);
-      create_new_def_for (gimple_phi_result (phi), phi,
-			  gimple_phi_result_ptr (phi));
 
       FOR_EACH_EDGE (e, ei, loop->latch->preds)
 	add_phi_arg (phi, (e == pred_e ? back_arg : res),
@@ -146,8 +144,6 @@ create_control_flag (edge forwarded_edge, loop_p loop, basic_block inner_body)
   /* Adds a control variable for the redirected FORWARDED_EDGE.  */
   add_referenced_var (var);
   phi = create_phi_node (var, forwarded_edge->dest);
-  create_new_def_for (gimple_phi_result (phi), phi,
-		      gimple_phi_result_ptr (phi));
 
   FOR_EACH_EDGE (e, ei, outer_latch_e->src->preds)
     add_phi_arg (phi, (e == forwarded_edge
@@ -158,8 +154,6 @@ create_control_flag (edge forwarded_edge, loop_p loop, basic_block inner_body)
 
   /* Add a phi node in LOOP->header for the control variable.  */
   phi = create_phi_node (var, loop->header);
-  create_new_def_for (gimple_phi_result (phi), phi,
-		      gimple_phi_result_ptr (phi));
 
   preheader = loop_preheader_edge (loop);
   FOR_EACH_EDGE (e, ei, loop->header->preds)
@@ -198,11 +192,6 @@ add_header_and_latch_phis (loop_p loop, tree name, edge forwarded_edge,
   gimple loop_phi = create_phi_node (var, loop->header);
   gimple latch_phi = create_phi_node (var, loop->latch);
 
-  create_new_def_for (gimple_phi_result (loop_phi), loop_phi,
-		      gimple_phi_result_ptr (loop_phi));
-  create_new_def_for (gimple_phi_result (latch_phi), latch_phi,
-		      gimple_phi_result_ptr (latch_phi));
-
   /* The value set to ZERO will never be used in the loop, however we
      have to construct something meaningful for virtual SSA_NAMEs.  */
   if (TREE_CODE (arg) != SSA_NAME)
@@ -343,8 +332,6 @@ add_missing_phi_nodes_2 (loop_p loop, tree name, tree old_name,
 	    tree var = SSA_NAME_VAR (name);
 
 	    phi = create_phi_node (var, e->dest);
-	    create_new_def_for (gimple_phi_result (phi), phi,
-				gimple_phi_result_ptr (phi));
 	    VEC_replace (gimple, phis, e->dest->index, phi);
 	    add_phi_arg (phi, name, e, UNKNOWN_LOCATION);
 	    rename_dominated_uses (loop, old_name, gimple_phi_result (phi),
@@ -396,11 +383,6 @@ add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt)
       loop_phi = create_phi_node (var, loop->header);
       latch_phi = create_phi_node (var, loop->latch);
 
-      create_new_def_for (gimple_phi_result (loop_phi), loop_phi,
-			  gimple_phi_result_ptr (loop_phi));
-      create_new_def_for (gimple_phi_result (latch_phi), latch_phi,
-			  gimple_phi_result_ptr (latch_phi));
-
       /* The value set to ZERO will never be used in the loop, however we
 	 have to construct something meaningful for virtual SSA_NAMEs.  */
       if (is_gimple_reg (name))
-- 
1.7.0.4

From 7fa8ed733959fb40fe4105800e752b7929b116cf Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 14:36:08 -0600
Subject: [PATCH 06/12] Use create_tmp_reg.

---
 gcc/tree-loop-flattening.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index d28117e..8750d80 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -135,7 +135,7 @@ create_control_flag (edge forwarded_edge, loop_p loop, basic_block inner_body)
   edge e, preheader;
   edge outer_latch_e = loop_latch_edge (loop);
   const char *name = "_flat_";
-  tree var = create_tmp_var (boolean_type_node, name);
+  tree var = create_tmp_reg (boolean_type_node, name);
   tree res;
   gimple phi, cond_stmt;
   gimple_stmt_iterator gsi;
-- 
1.7.0.4

From d1dff67ed0880e457df4df1a30711e59f820880d Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 14:40:12 -0600
Subject: [PATCH 07/12] Use build_zero_cst.

---
 gcc/tree-loop-flattening.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index 8750d80..7d61e79 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -197,7 +197,7 @@ add_header_and_latch_phis (loop_p loop, tree name, edge forwarded_edge,
   if (TREE_CODE (arg) != SSA_NAME)
     zero = arg;
   else if (is_gimple_reg (arg))
-    zero = fold_convert (TREE_TYPE (arg), integer_zero_node);
+    zero = build_zero_cst (TREE_TYPE (arg));
   else
     zero = gimple_default_def (cfun, SSA_NAME_VAR (arg));
 
@@ -386,7 +386,7 @@ add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt)
       /* The value set to ZERO will never be used in the loop, however we
 	 have to construct something meaningful for virtual SSA_NAMEs.  */
       if (is_gimple_reg (name))
-	zero = fold_convert (TREE_TYPE (name), integer_zero_node);
+	zero = build_zero_cst (TREE_TYPE (name));
       else
 	zero = gimple_default_def (cfun, SSA_NAME_VAR (name));
 
-- 
1.7.0.4

From 9ad088cb779576462c83b5dbed0449b138edc15d Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 14:43:32 -0600
Subject: [PATCH 08/12] Correct the use of the default virtual operands.

---
 gcc/tree-loop-flattening.c |   10 ++++++++--
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index 7d61e79..5708a87 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -199,7 +199,10 @@ add_header_and_latch_phis (loop_p loop, tree name, edge forwarded_edge,
   else if (is_gimple_reg (arg))
     zero = build_zero_cst (TREE_TYPE (arg));
   else
-    zero = gimple_default_def (cfun, SSA_NAME_VAR (arg));
+    {
+      zero = gimple_vop (cfun);
+      mark_sym_for_renaming (gimple_vop (cfun));
+    }
 
   res = gimple_phi_result (latch_phi);
   FOR_EACH_EDGE (e, ei, loop->header->preds)
@@ -388,7 +391,10 @@ add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt)
       if (is_gimple_reg (name))
 	zero = build_zero_cst (TREE_TYPE (name));
       else
-	zero = gimple_default_def (cfun, SSA_NAME_VAR (name));
+	{
+	  zero = gimple_vop (cfun);
+	  mark_sym_for_renaming (gimple_vop (cfun));
+	}
 
       res = gimple_phi_result (latch_phi);
       FOR_EACH_EDGE (e, ei, loop->header->preds)
-- 
1.7.0.4

From 4f9defed6dab1367dd3acdf29699e8fe0801bdf3 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 15:14:41 -0600
Subject: [PATCH 09/12] Use SET_USE instead of replace_exp.

---
 gcc/tree-loop-flattening.c |   24 +++++-------------------
 1 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index 5708a87..c3cf9a2 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -252,13 +252,11 @@ update_inner_induction_phi_nodes (edge forwarded_edge, loop_p loop,
    that is dominated by DEF_BB.  */
 
 static void
-rename_dominated_uses (loop_p loop, tree old_name, tree new_name,
-		       basic_block def_bb)
+rename_dominated_uses (tree old_name, tree new_name, basic_block def_bb)
 {
   imm_use_iterator uit;
   gimple stmt;
   use_operand_p use_p;
-  ssa_op_iter op_iter;
 
   FOR_EACH_IMM_USE_STMT (stmt, uit, old_name)
     {
@@ -273,27 +271,15 @@ rename_dominated_uses (loop_p loop, tree old_name, tree new_name,
 	    if (PHI_ARG_DEF_FROM_EDGE (stmt, e) == old_name
 		&& dominated_by_p (CDI_DOMINATORS, e->src, def_bb)
 		&& use_bb != def_bb)
-	      replace_exp (gimple_phi_arg_imm_use_ptr (stmt, e->dest_idx),
-			   new_name);
+	      SET_USE (gimple_phi_arg_imm_use_ptr (stmt, e->dest_idx), new_name);
 	}
       else
 	{
 	  if (!dominated_by_p (CDI_DOMINATORS, use_bb, def_bb))
 	    continue;
 
-	  if (use_bb->loop_father == loop)
-	    {
-	      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, op_iter, SSA_OP_ALL_USES)
-		if (USE_FROM_PTR (use_p) == old_name)
-		  replace_exp (use_p, new_name);
-	    }
-	  else
-	    /* Virtual operands are not translated into loop closed
-	       SSA form, and thus they may occur in the rest of
-	       the program without a loop close vphi node.  */
-	    FOR_EACH_SSA_USE_OPERAND (use_p, stmt, op_iter, SSA_OP_ALL_USES)
-	      if (USE_FROM_PTR (use_p) == old_name)
-		replace_exp (use_p, new_name);
+	  FOR_EACH_IMM_USE_ON_STMT (use_p, uit)
+	    SET_USE (use_p, new_name);
 	}
     }
 }
@@ -337,7 +323,7 @@ add_missing_phi_nodes_2 (loop_p loop, tree name, tree old_name,
 	    phi = create_phi_node (var, e->dest);
 	    VEC_replace (gimple, phis, e->dest->index, phi);
 	    add_phi_arg (phi, name, e, UNKNOWN_LOCATION);
-	    rename_dominated_uses (loop, old_name, gimple_phi_result (phi),
+	    rename_dominated_uses (old_name, gimple_phi_result (phi),
 				   e->dest);
 	    add_missing_phi_nodes_2 (loop, gimple_phi_result (phi), old_name,
 				     phis);
-- 
1.7.0.4

From 25475ef2e20eb242ea1f523d6276add4447a5aa4 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 15:32:30 -0600
Subject: [PATCH 10/12] Iterate over the loop body.

---
 gcc/tree-loop-flattening.c |   23 +++++++++++------------
 1 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index c3cf9a2..9ddc52a 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -291,20 +291,19 @@ rename_dominated_uses (tree old_name, tree new_name, basic_block def_bb)
 
 static void
 add_missing_phi_nodes_2 (loop_p loop, tree name, tree old_name,
-			 VEC (gimple, heap) *phis)
+			 VEC (gimple, heap) *phis, basic_block *bbs)
 {
-  unsigned i;
-  basic_block bb, dom_bb = gimple_bb (SSA_NAME_DEF_STMT (name));
-  VEC (basic_block, heap) *dom_bbs = get_all_dominated_blocks (CDI_DOMINATORS,
-							       dom_bb);
+  basic_block dom_bb = gimple_bb (SSA_NAME_DEF_STMT (name));
+  int i, n = loop->num_nodes;
 
-  FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb)
+  for (i = 0; i < n; i++)
     {
       edge e;
       edge_iterator ei;
+      basic_block bb = bbs[i];
 
       if (bb == loop->latch
-	  || bb->loop_father != loop)
+	  || !dominated_by_p (CDI_DOMINATORS, bb, dom_bb))
 	continue;
 
       FOR_EACH_EDGE (e, ei, bb->succs)
@@ -326,7 +325,7 @@ add_missing_phi_nodes_2 (loop_p loop, tree name, tree old_name,
 	    rename_dominated_uses (old_name, gimple_phi_result (phi),
 				   e->dest);
 	    add_missing_phi_nodes_2 (loop, gimple_phi_result (phi), old_name,
-				     phis);
+				     phis, bbs);
 	  }
 	}
     }
@@ -336,7 +335,7 @@ add_missing_phi_nodes_2 (loop_p loop, tree name, tree old_name,
    of DEF_STMT add the missing phi nodes in LOOP.  */
 
 static void
-add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt)
+add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt, basic_block *bbs)
 {
   def_operand_p def_p;
   ssa_op_iter op_iter;
@@ -396,7 +395,7 @@ add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt)
 
       VEC_replace (gimple, phis, loop->latch->index, latch_phi);
       VEC_replace (gimple, phis, loop->header->index, loop_phi);
-      add_missing_phi_nodes_2 (loop, name, name, phis);
+      add_missing_phi_nodes_2 (loop, name, name, phis, bbs);
 
       FOR_EACH_VEC_ELT (gimple, phis, i, phi)
 	{
@@ -440,10 +439,10 @@ add_missing_phi_nodes (loop_p loop)
 
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 	if (!gimple_nop_p (gsi_stmt (gsi)))
-	  add_missing_phi_nodes_1 (loop, gsi_stmt (gsi));
+	  add_missing_phi_nodes_1 (loop, gsi_stmt (gsi), bbs);
 
       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-	add_missing_phi_nodes_1 (loop, gsi_stmt (gsi));
+	add_missing_phi_nodes_1 (loop, gsi_stmt (gsi), bbs);
     }
 
   free (bbs);
-- 
1.7.0.4

From 64c0173d8cf8b433d46941e95fe98979afeba773 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 15:37:46 -0600
Subject: [PATCH 11/12] Fix memory leak.

---
 gcc/tree-loop-flattening.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index 9ddc52a..3613dc9 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -527,6 +527,9 @@ flatten_loop (loop_p loop, tree *scratch_pad)
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Flattened %d loops.\n", VEC_length (edge, back_edges));
 
+  VEC_free (edge, heap, back_edges);
+  VEC_free (basic_block, heap, loop_body);
+
   return TODO_update_ssa | TODO_verify_ssa;
 }
 
-- 
1.7.0.4

From 0e4f1d41f599fbbb5d338945f1b30c6ad8c37618 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 15:40:28 -0600
Subject: [PATCH 12/12] Fix pass TODOs.

---
 gcc/tree-loop-flattening.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index 3613dc9..cd49be1 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -530,7 +530,7 @@ flatten_loop (loop_p loop, tree *scratch_pad)
   VEC_free (edge, heap, back_edges);
   VEC_free (basic_block, heap, loop_body);
 
-  return TODO_update_ssa | TODO_verify_ssa;
+  return TODO_cleanup_cfg;
 }
 
 /* Flattens all the loops of the current function.  */
@@ -554,7 +554,6 @@ tree_loop_flattening (void)
   verify_flow_info ();
 #endif
 
-  cleanup_tree_cfg ();
   return todo;
 }
 
@@ -580,7 +579,8 @@ struct gimple_opt_pass pass_flatten_loops =
   0,					/* properties_destroyed */
   0,					/* todo_flags_start */
   TODO_dump_func
-    | TODO_update_ssa
-    | TODO_ggc_collect			/* todo_flags_finish */
+  | TODO_verify_ssa
+  | TODO_update_ssa
+  | TODO_ggc_collect			/* todo_flags_finish */
  }
 };
-- 
1.7.0.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]