Attachment 'loop-distribution-patch-against-gcc-4.1.0-release.patch'

Download

   1 2006-05-19  Georges-Andre Silber  <silber@cri.ensmp.fr>
   2 
   3         * doc/invoke.texi (Optimization Options): Document.
   4         * tree-loop-distribution.c: New file.
   5         * tree-pass.h (pass_loop_distribution): New.
   6         * timevar.def (TV_TREE_LOOP_DISTRIBUTION): New.
   7         * tree-data-ref.c (initialize_data_dependence_relation):
   8         Initialize and set the new structure element reverse_p.
   9         * tree-data-ref.h (data_dependence_relation): New element.
  10         * common.opt (-ftree-loop-distribution): New.
  11         * tree-flow.h (distribute_loops): Declare.
  12         * Makefile.in: Add tree-loop-distribution.o.
  13         * passes.c (init_optimization_passes): Schedule loop distribution.
  14 
  15 Index: doc/invoke.texi
  16 ===================================================================
  17 --- doc/invoke.texi	(revision 113325)
  18 +++ doc/invoke.texi	(working copy)
  19 @@ -341,7 +341,8 @@ Objective-C and Objective-C++ Dialects}.
  20  -fsplit-ivs-in-unroller -funswitch-loops @gol
  21  -fvariable-expansion-in-unroller @gol
  22  -ftree-pre  -ftree-ccp  -ftree-dce -ftree-loop-optimize @gol
  23 --ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
  24 +-ftree-loop-linear -ftree-loop-distribution -ftree-loop-im @gol
  25 +-ftree-loop-ivcanon -fivopts @gol
  26  -ftree-dominator-opts -ftree-dse -ftree-copyrename -ftree-sink @gol
  27  -ftree-ch -ftree-sra -ftree-ter -ftree-lrs -ftree-fre -ftree-vectorize @gol
  28  -ftree-vect-loop-version -ftree-salias -fipa-pta -fweb @gol
  29 @@ -5090,6 +5091,11 @@ at @option{-O} and higher.
  30  Perform linear loop transformations on tree.  This flag can improve cache
  31  performance and allow further loop optimizations to take place.
  32  
  33 +@item -ftree-loop-distribution
  34 +Perform loop distribution on tree.  This flag can improve cache
  35 +performance (on big loop bodies) and allow further loop optimizations 
  36 +(like parallelization) to take place.
  37 +
  38  @item -ftree-loop-im
  39  Perform loop invariant motion on trees.  This pass moves only invariants that
  40  would be hard to handle at RTL level (function calls, operations that expand to
  41 Index: tree-loop-distribution.c
  42 ===================================================================
  43 --- tree-loop-distribution.c	(revision 0)
  44 +++ tree-loop-distribution.c	(revision 0)
  45 @@ -0,0 +1,1860 @@
  46 +/* Loop Distribution
  47 +   Copyright (C) 2006 Free Software Foundation, Inc.
  48 +   Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>.
  49 +
  50 +This file is part of GCC.
  51 +
  52 +GCC is free software; you can redistribute it and/or modify it under
  53 +the terms of the GNU General Public License as published by the Free
  54 +Software Foundation; either version 2, or (at your option) any later
  55 +version.
  56 +
  57 +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  58 +WARRANTY; without even the implied warranty of MERCHANTABILITY or
  59 +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  60 +for more details.
  61 +
  62 +You should have received a copy of the GNU General Public License
  63 +along with GCC; see the file COPYING.  If not, write to the Free
  64 +Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  65 +02110-1301, USA.  */
  66 +
  67 +/* This pass performs loop distribution using a reduced dependence graph
  68 +   (RDG) build using the data dependence analyzer.  This RDG is then used
  69 +   to build a "partition" RDG graph without taking into account data
  70 +   dependences.  Data dependences are then used to build a graph of
  71 +   Strongly Connected Components.  This graph is then topologically sorted
  72 +   to obtain a code generation plan for the distributed loops.  
  73 +
  74 +   When using the -fdump-tree-ldist option, it outputs each graph in
  75 +   "dotty" (used by graphviz), surrounded by some XML structure.
  76 +
  77 +   TODO: Replace arrays by vectors for edges and vertices of RDG.
  78 +   TODO: Generate new distributed loops.  */
  79 +
  80 +#include "config.h"
  81 +#include "system.h"
  82 +#include "coretypes.h"
  83 +#include "tm.h"
  84 +#include "ggc.h"
  85 +#include "tree.h"
  86 +#include "target.h"
  87 +
  88 +#include "rtl.h"
  89 +#include "basic-block.h"
  90 +#include "diagnostic.h"
  91 +#include "tree-flow.h"
  92 +#include "tree-dump.h"
  93 +#include "timevar.h"
  94 +#include "cfgloop.h"
  95 +#include "expr.h"
  96 +#include "optabs.h"
  97 +#include "tree-chrec.h"
  98 +#include "tree-data-ref.h"
  99 +#include "tree-scalar-evolution.h"
 100 +#include "tree-pass.h"
 101 +#include "lambda.h"
 102 +#include "langhooks.h"
 103 +
 104 +#ifdef USE_MAPPED_LOCATION
 105 +typedef source_location LOC;
 106 +#define UNKNOWN_LOC UNKNOWN_LOCATION
 107 +#define EXPR_LOC(e) EXPR_LOCATION(e)
 108 +#define LOC_FILE(l) LOCATION_FILE (l)
 109 +#define LOC_LINE(l) LOCATION_LINE (l)
 110 +#else
 111 +typedef source_locus LOC;
 112 +#define UNKNOWN_LOC NULL
 113 +#define EXPR_LOC(e) EXPR_LOCUS(e)
 114 +#define LOC_FILE(l) (l)->file
 115 +#define LOC_LINE(l) (l)->line
 116 +#endif
 117 +
 118 +/* Initial value for VEC data structures used in RDG.  */
 119 +# define RDG_VS 10
 120 +
 121 +/* Values used in DFS algorithm.  */
 122 +# define VERTEX_WHITE 0
 123 +# define VERTEX_GRAY 1
 124 +# define VERTEX_BLACK 2
 125 +
 126 +typedef struct rdg *rdg_p;
 127 +typedef struct rdg_vertex *rdg_vertex_p;
 128 +typedef struct rdg_edge *rdg_edge_p;
 129 +typedef struct prdg *prdg_p;
 130 +typedef struct prdg_vertex *prdg_vertex_p;
 131 +typedef struct prdg_edge *prdg_edge_p;
 132 +
 133 +DEF_VEC_P(rdg_vertex_p);
 134 +DEF_VEC_ALLOC_P(rdg_vertex_p, heap);
 135 +DEF_VEC_P(rdg_edge_p);
 136 +DEF_VEC_ALLOC_P(rdg_edge_p, heap);
 137 +DEF_VEC_P(prdg_vertex_p);
 138 +DEF_VEC_ALLOC_P(prdg_vertex_p, heap);
 139 +DEF_VEC_P(prdg_edge_p);
 140 +DEF_VEC_ALLOC_P(prdg_edge_p, heap);
 141 +DEF_VEC_I(int);
 142 +DEF_VEC_ALLOC_I(int, heap);
 143 +
 144 +/* A RDG (Reduced Dependence Graph) represents all data dependence
 145 +   constraints between the statements of a loop nest. */
 146 +struct rdg 
 147 +{
 148 +  /* The loop nest represented by this RDG.  */
 149 +  struct loop *loop_nest;
 150 +  
 151 +  /* The SSA_NAME used for loop index.  */
 152 +  tree loop_index;
 153 +  
 154 +  /* The MODIFY_EXPR used to update the loop index.  */
 155 +  tree loop_index_update;
 156 +  
 157 +  /* The COND_EXPR that is the exit condition of the loop.  */
 158 +  tree loop_exit_condition;
 159 +  
 160 +  /* The PHI_NODE of the loop index.  */
 161 +  tree loop_index_phi_node;
 162 +  
 163 +  /* The vertices of the graph.  There is one vertex per
 164 +     statement of the basic block of the loop.  */
 165 +  unsigned int nb_vertices;
 166 +  rdg_vertex_p vertices;
 167 +
 168 +  /* The edges of the graph.  There is one edge per data dependence (between
 169 +     memory references) and one edge per scalar dependence.  */
 170 +  unsigned int nb_edges;  
 171 +  rdg_edge_p edges;
 172 +  
 173 +  /* Vertices that contain a statement containing an ARRAY_REF.  */
 174 +  VEC (rdg_vertex_p, heap) *dd_vertices;
 175 +  
 176 +  /* Data references and array data dependence relations.  */
 177 +  VEC (ddr_p, heap) *dependence_relations;
 178 +  VEC (data_reference_p, heap) *datarefs;
 179 +};
 180 +
 181 +#define RDG_LOOP(G)	  (G)->loop_nest
 182 +#define RDG_IDX(G)        (G)->loop_index
 183 +#define RDG_IDX_UPDATE(G) (G)->loop_index_update
 184 +#define RDG_EXIT_COND(G)  (G)->loop_exit_condition
 185 +#define RDG_IDX_PHI(G)    (G)->loop_index_phi_node
 186 +#define RDG_NBV(G)        (G)->nb_vertices
 187 +#define RDG_NBE(G)        (G)->nb_edges
 188 +#define RDG_V(G)          (G)->vertices
 189 +#define RDG_VERTEX(G,i)   &((G)->vertices[i])
 190 +#define RDG_E(G)          (G)->edges
 191 +#define RDG_EDGE(G,i)     &((G)->edges[i])
 192 +#define RDG_DDV(G)        (G)->dd_vertices
 193 +#define RDG_DR(G)         (G)->datarefs
 194 +#define RDG_DDR(G)        (G)->dependence_relations
 195 +
 196 +/* A RDG vertex representing a statement.  */
 197 +struct rdg_vertex 
 198 +{
 199 +  /* This color is used for graph algorithms.  */
 200 +  int color;
 201 +
 202 +  /* The number of the basic block in the loop body.  */
 203 +  unsigned int bb_number;
 204 +
 205 +  /* The number of the vertex.  It represents the number of
 206 +     the statement in the basic block.  */
 207 +  unsigned int number;
 208 +  
 209 +  /* The statement represented by this vertex.  */
 210 +  tree stmt;
 211 +  
 212 +  /* True when this vertex contains a data reference 
 213 +     that is an ARRAY_REF.  */
 214 +  bool has_dd_p; 
 215 +  
 216 +  /* Vertex is the sink of those edges.  */
 217 +  VEC (rdg_edge_p, heap) *in_edges;
 218 +  
 219 +  /* Vertex is the source of those edges. */
 220 +  VEC (rdg_edge_p, heap) *out_edges;
 221 +
 222 +  /* Partitions the vertex is in.  
 223 +     If 'has_dd_p' is true, the vertex can only be in one partition.
 224 +     If not, the vertex can be duplicated in several partitions.  */
 225 +  VEC (int, heap) *partition_numbers;
 226 +  
 227 +  /* Strongly connected components the vertex is in.
 228 +     If 'has_dd_p' is true, the vertex can only be in one SCC.
 229 +     If not, the vertex can be in several SCCs.  */
 230 +  VEC (int, heap) *scc_numbers;
 231 +};
 232 +
 233 +#define RDGV_COLOR(V)      (V)->color
 234 +#define RDGV_BB(V)         (V)->bb_number
 235 +#define RDGV_N(V)          (V)->number
 236 +#define RDGV_STMT(V)       (V)->stmt
 237 +#define RDGV_DD_P(V)       (V)->has_dd_p
 238 +#define RDGV_IN(V)         (V)->in_edges
 239 +#define RDGV_OUT(V)        (V)->out_edges
 240 +#define RDGV_PARTITIONS(V) (V)->partition_numbers
 241 +#define RDGV_SCCS(V)       (V)->scc_numbers
 242 +
 243 +/* Data dependence type.  */
 244 +enum rdg_dep_type 
 245 +{
 246 +  /* Read After Write (RAW) (source is W, sink is R).  */
 247 +  flow_dd = 'f',
 248 +  
 249 +  /* Write After Read (WAR) (source is R, sink is W).  */
 250 +  anti_dd = 'a',
 251 +  
 252 +  /* Write After Write (WAW) (source is W, sink is W).  */
 253 +  output_dd = 'o', 
 254 +  
 255 +  /* Read After Read (RAR) (source is R, sink is R).  */
 256 +  input_dd = 'i' 
 257 +};
 258 +
 259 +/* An edge of the RDG with dependence information.  */
 260 +struct rdg_edge 
 261 +{
 262 +  /* Color used for graph algorithms.  */  
 263 +  int color;
 264 +  
 265 +  /* The vertex source of the dependence.  */
 266 +  rdg_vertex_p source;
 267 +  
 268 +  /* The vertex sink of the dependence.  */
 269 +  rdg_vertex_p sink;
 270 +  
 271 +  /* The reference source of the dependence.  */
 272 +  tree source_ref;
 273 +  
 274 +  /* The reference sink of the dependence.  */
 275 +  tree sink_ref;
 276 +  
 277 +  /* Type of the dependence.  */
 278 +  enum rdg_dep_type type;
 279 +  
 280 +  /* Level of the dependence: the depth of the loop that
 281 +    carries the dependence.  */
 282 +  int level;
 283 +  
 284 +  /* true if the dependence is between two scalars.  Usually,
 285 +    it is known of a dependence between two memory elements
 286 +    of dimension 0.  */
 287 +  bool scalar_p;  
 288 +};
 289 +
 290 +#define RDGE_COLOR(E)       (E)->color
 291 +#define RDGE_SOURCE(E)      (E)->source
 292 +#define RDGE_SINK(E)        (E)->sink
 293 +#define RDGE_SOURCE_REF(E)  (E)->source_ref
 294 +#define RDGE_SINK_REF(E)    (E)->sink_ref
 295 +#define RDGE_TYPE(E)        (E)->type
 296 +#define RDGE_LEVEL(E)       (E)->level
 297 +#define RDGE_SCALAR_P(E)    (E)->scalar_p
 298 +
 299 +/* This graph represents a partition: each vertex is a group of
 300 +   existing RDG vertices, each edge is a dependence between two
 301 +   partitions.  */
 302 +struct prdg 
 303 +{
 304 +  /* The RDG used for partitionning.  */
 305 +  rdg_p rdg;
 306 +  
 307 +  /* The vertices of the graph.  */
 308 +  VEC (prdg_vertex_p, heap) *vertices;
 309 +  
 310 +  /* The edges of the graph.  */
 311 +  VEC (prdg_edge_p, heap) *edges;
 312 +};
 313 +
 314 +#define PRDG_RDG(G)       (G)->rdg
 315 +#define PRDG_NBV(G)       VEC_length (prdg_vertex_p,(G)->vertices)
 316 +#define PRDG_V(G)         (G)->vertices
 317 +#define PRDG_VERTEX(G,i)  VEC_index (prdg_vertex_p,(G)->vertices,i) 
 318 +#define PRDG_NBE(G)       VEC_length (prdg_edge_p,(G)->edges)
 319 +#define PRDG_E(G)         (G)->edges
 320 +#define PRDG_EDGE(G,i)    VEC_index (prdg_edge_p,(G)->edges,i)
 321 +
 322 +/* A vertex representing a group of RDG vertices.  */
 323 +struct prdg_vertex 
 324 +{
 325 +  /* The partition number.  */
 326 +  int num;
 327 +  
 328 +  /* Used for graph algorithms.  */
 329 +  int color; 
 330 +  
 331 +  /* Discovery time.  Used by DFS.  */
 332 +  int d;
 333 +  
 334 +  /* Finishing time.  Used by DFS and SCC.  */
 335 +  int f;
 336 +  
 337 +  /* SCC number.  */
 338 +  int scc; 
 339 +  
 340 +  /* Predecessor after DFS computation.  */
 341 +  prdg_vertex_p pred; 
 342 +   
 343 +  /* Vertices of the RDG that are in this partition.  */
 344 +  VEC (rdg_vertex_p, heap) *pvertices;
 345 +};
 346 +
 347 +#define PRDGV_N(V)           (V)->num
 348 +#define PRDGV_COLOR(V)       (V)->color
 349 +#define PRDGV_D(V)           (V)->d
 350 +#define PRDGV_F(V)           (V)->f
 351 +#define PRDGV_SCC(V)         (V)->scc
 352 +#define PRDGV_PRED(V)        (V)->pred
 353 +#define PRDGV_NPV(V)         VEC_length (rdg_vertex_p,(V)->pvertices)
 354 +#define PRDGV_PV(V)          (V)->pvertices
 355 +#define PRDGV_PVERTEX(V,i)   VEC_index (rdg_vertex_p,(V)->pvertices,i)
 356 +
 357 +/* Dependence egde of the partition graph.  */
 358 +struct prdg_edge 
 359 +{
 360 +  /* Vertex source of the dependence.  */
 361 +  prdg_vertex_p source;
 362 +  
 363 +  /* Vertex sink of the dependence.  */
 364 +  prdg_vertex_p sink;
 365 +  
 366 +  /* Original edge of the RDG.  */
 367 +  rdg_edge_p rdg_edge;
 368 +};
 369 +
 370 +#define PRDGE_SOURCE(V)    (V)->source
 371 +#define PRDGE_SINK(V)      (V)->sink
 372 +#define PRDGE_RDG_EDGE(V)  (V)->rdg_edge
 373 +
 374 +
 375 +/* Array to check if a loop has already been distributed.  */
 376 +bool *treated_loops;
 377 +
 378 +/* Loop location.  */
 379 +static LOC dist_loop_location;
 380 +
 381 +/* Function prototype from tree-vectorizer.  */
 382 +LOC find_loop_location (struct loop*);
 383 +
 384 +/* Helper function for Depth First Search.  */
 385 +
 386 +static void
 387 +dfs_rdgp_1 (prdg_p g, prdg_vertex_p v, unsigned int *t, unsigned int scc)
 388 +{
 389 +  unsigned int i;
 390 +  prdg_edge_p e;
 391 +  rdg_vertex_p rdg_v;
 392 +  
 393 +  PRDGV_COLOR (v) = VERTEX_GRAY;
 394 +  (*t)++;
 395 +  PRDGV_D (v) = *t;
 396 +  PRDGV_SCC (v) = scc;
 397 +
 398 +  /* If scc!=0, add this SCC to each vertex of the partition. */
 399 +  if (scc)
 400 +    for (i = 0; VEC_iterate (rdg_vertex_p, PRDGV_PV (v), i, rdg_v); i++)
 401 +      VEC_safe_push (int, heap, RDGV_SCCS (rdg_v), scc);
 402 +  
 403 +  for (i = 0; VEC_iterate (prdg_edge_p, PRDG_E (g), i, e); i++)
 404 +    if (PRDGE_SOURCE (e) == v)
 405 +      {
 406 +	prdg_vertex_p u = PRDGE_SINK (e);
 407 +      
 408 +	if (PRDGV_COLOR (u) == VERTEX_WHITE)
 409 +	  {
 410 +	    PRDGV_PRED (u) = v;
 411 +	    dfs_rdgp_1 (g, u, t, scc);
 412 +	  }
 413 +      }
 414 +
 415 +  PRDGV_COLOR (v) = VERTEX_BLACK;
 416 +  (*t)++;
 417 +  PRDGV_F (v) = *t;
 418 +}
 419 +
 420 +/* Depth First Search.  This is an adaptation of the depth first search
 421 +   described in Cormen et al., "Introduction to Algorithms", MIT Press.
 422 +   Returns the max of "finishing times" for the partition graph G.  */
 423 +
 424 +static int
 425 +dfs_rdgp (prdg_p g)
 426 +{
 427 +  unsigned int i;
 428 +  /* t represents the max of finishing times.  */
 429 +  unsigned int t = 0;
 430 +  prdg_vertex_p v;
 431 +
 432 +  for (i = 0; VEC_iterate (prdg_vertex_p, PRDG_V (g), i, v); i++)
 433 +    {
 434 +      PRDGV_COLOR (v) = VERTEX_WHITE;
 435 +      PRDGV_PRED (v) = NULL;
 436 +    }
 437 +
 438 +  for (i = 0; VEC_iterate (prdg_vertex_p, PRDG_V (g), i, v); i++)
 439 +    if (PRDGV_COLOR (v) == VERTEX_WHITE)
 440 +      dfs_rdgp_1 (g, v, &t, 0);
 441 +  
 442 +  return t;
 443 +}
 444 +
 445 +/* Comparison function to compare "finishing times" of
 446 +   two vertices.  */
 447 +
 448 +static bool
 449 +rdgp_vertex_less_than_p (const prdg_vertex_p a,
 450 +                         const prdg_vertex_p b)
 451 +{
 452 +  return (PRDGV_F (a) < PRDGV_F (b));
 453 +}
 454 +
 455 +/* Helper function for the computation of strongly connected components.  */
 456 +
 457 +static unsigned int
 458 +scc_rdgp_1 (prdg_p g, int max_f)
 459 +{
 460 +  unsigned int i;
 461 +  unsigned int t = 0;
 462 +  unsigned int scc = 0;
 463 +  prdg_vertex_p v;
 464 +  VEC (prdg_vertex_p, heap) *sorted_vertices;
 465 +
 466 +  for (i = 0; VEC_iterate (prdg_vertex_p, PRDG_V (g), i, v); i++)
 467 +    {
 468 +      PRDGV_COLOR (v) = VERTEX_WHITE;
 469 +      PRDGV_PRED (v) = NULL;
 470 +    }
 471 +  
 472 +  /* Here we build a vector containing the vertices sorted by increasing
 473 +     finishing times F (computed by DFS).   This is a contradiction with
 474 +     the complexity of the SCC algorithm that is in linear time
 475 +     O(V+E).   We could have used an array containing pointers to vertices,
 476 +     the index of this array representing F for the corresponding vertex.
 477 +     This array has a size equal to 'max_f' with holes.  */
 478 +  
 479 +  sorted_vertices = VEC_alloc (prdg_vertex_p, heap, max_f);
 480 +  
 481 +  for (i = 0; VEC_iterate (prdg_vertex_p, PRDG_V (g), i, v); i++)
 482 +    {
 483 +      unsigned int idx = VEC_lower_bound (prdg_vertex_p, sorted_vertices,
 484 +					  v, rdgp_vertex_less_than_p);
 485 +
 486 +      VEC_safe_insert (prdg_vertex_p, heap, sorted_vertices, idx, v);
 487 +    }
 488 +
 489 +  gcc_assert (VEC_length (prdg_vertex_p, sorted_vertices));
 490 +  
 491 +  while (VEC_length (prdg_vertex_p, sorted_vertices))
 492 +    {
 493 +      v = VEC_pop (prdg_vertex_p, sorted_vertices);
 494 +    
 495 +      if (PRDGV_COLOR (v) == VERTEX_WHITE)
 496 +	dfs_rdgp_1 (g, v, &t, ++scc);
 497 +    }
 498 +
 499 +  VEC_free (prdg_vertex_p, heap, sorted_vertices);
 500 +  
 501 +  return scc;
 502 +}
 503 +
 504 +/* Change the directions of all edges.  */
 505 +
 506 +static void
 507 +transpose_rdgp (prdg_p g)
 508 +{
 509 +  unsigned int i;
 510 +  prdg_edge_p e;
 511 +  
 512 +  for (i = 0; VEC_iterate (prdg_edge_p, PRDG_E (g), i, e); i++)
 513 +    {
 514 +      prdg_vertex_p tmp = PRDGE_SINK (e);
 515 +
 516 +      PRDGE_SINK (e) = PRDGE_SOURCE (e);
 517 +      PRDGE_SOURCE (e) = tmp;
 518 +    }
 519 +}
 520 +
 521 +/* Computes the strongly connected components of G.  */
 522 +
 523 +static unsigned int
 524 +scc_rdgp (prdg_p g)
 525 +{
 526 +  unsigned int nb_sccs;
 527 +  int max_f;
 528 +  
 529 +  max_f = dfs_rdgp (g);
 530 +  transpose_rdgp (g);
 531 +  nb_sccs = scc_rdgp_1 (g, max_f);
 532 +  transpose_rdgp (g);
 533 +
 534 +  return nb_sccs;
 535 +}
 536 +
 537 +/* Returns true when vertex V is in partition P.  */
 538 +
 539 +static bool
 540 +vertex_in_partition_p (rdg_vertex_p v, int p)
 541 +{
 542 +  int i;
 543 +  int vp;
 544 +  
 545 +  for (i = 0; VEC_iterate (int, RDGV_PARTITIONS (v), i, vp); i++)
 546 +    if (vp == p)
 547 +      return true;
 548 +  
 549 +  return false;
 550 +}
 551 +
 552 +/* Returns true when vertex V is in SCC S.  */
 553 +
 554 +static bool
 555 +vertex_in_scc_p (rdg_vertex_p v, int s)
 556 +{
 557 +  int i;
 558 +  int vs;
 559 +  
 560 +  for (i = 0; VEC_iterate (int, RDGV_SCCS (v), i, vs); i++)
 561 +    if (vs == s)
 562 +      return true;
 563 +  
 564 +  return false;
 565 +}
 566 +
 567 +/* Allocates a new partition vertex.  */
 568 +
 569 +static prdg_vertex_p
 570 +new_prdg_vertex (unsigned int p)
 571 +{
 572 +  prdg_vertex_p v;
 573 +  
 574 +  v = XNEW (struct prdg_vertex);
 575 +  PRDGV_N (v) = p;
 576 +  PRDGV_COLOR (v) = 0;
 577 +  PRDGV_D (v) = 0;
 578 +  PRDGV_F (v) = 0;
 579 +  PRDGV_PRED (v) = NULL;
 580 +  PRDGV_SCC (v) = 0;
 581 +  PRDGV_PV (v) = VEC_alloc (rdg_vertex_p, heap, RDG_VS);
 582 +  
 583 +  return v;
 584 +}
 585 +
 586 +/* Free a partition vertex.  */
 587 +
 588 +static void
 589 +free_prdg_vertex (prdg_vertex_p v)
 590 +{
 591 +  VEC_free (rdg_vertex_p, heap, PRDGV_PV (v));
 592 +  free (v);
 593 +}
 594 +
 595 +/* Allocates a new partition edge.  */
 596 +static prdg_edge_p
 597 +new_prdg_edge (rdg_edge_p re, 
 598 +	       prdg_vertex_p sink,
 599 +               prdg_vertex_p source)
 600 +{
 601 +  prdg_edge_p e;
 602 +  
 603 +  e = XNEW (struct prdg_edge);
 604 +  PRDGE_RDG_EDGE (e) = re;
 605 +  PRDGE_SINK (e) = sink;
 606 +  PRDGE_SOURCE (e) = source;
 607 +  
 608 +  return e;
 609 +}
 610 +
 611 +/* Free a partition edge.  */
 612 +
 613 +static void
 614 +free_prdg_edge (prdg_edge_p e)
 615 +{
 616 +  free (e);
 617 +}
 618 +
 619 +/* Allocates a new partition graph.  */
 620 +
 621 +static prdg_p
 622 +new_prdg (rdg_p rdg)
 623 +{
 624 +  prdg_p rdgp = XNEW (struct prdg);
 625 +
 626 +  PRDG_RDG (rdgp) = rdg;
 627 +  PRDG_V (rdgp) = VEC_alloc (prdg_vertex_p, heap, RDG_VS);
 628 +  PRDG_E (rdgp) = VEC_alloc (prdg_edge_p, heap, RDG_VS);
 629 +
 630 +  return rdgp;
 631 +}
 632 +
 633 +/* Free a partition graph.  */
 634 +
 635 +static void
 636 +free_prdg (prdg_p g)
 637 +{
 638 +  unsigned int i;
 639 +  prdg_vertex_p v;
 640 +  prdg_edge_p e;
 641 +  
 642 +  for (i = 0; VEC_iterate (prdg_vertex_p, PRDG_V (g), i, v); i++)
 643 +    free_prdg_vertex (v);
 644 +  
 645 +  for (i = 0; VEC_iterate (prdg_edge_p, PRDG_E (g), i, e); i++)
 646 +    free_prdg_edge (e);
 647 +  
 648 +  VEC_free (prdg_vertex_p, heap, PRDG_V (g));
 649 +  VEC_free (prdg_edge_p, heap, PRDG_E (g));
 650 +}
 651 +
 652 +/* Builds a strongly connected components partition graph of G.  */
 653 +
 654 +static prdg_p
 655 +build_scc_graph (prdg_p g)
 656 +{
 657 +  prdg_p sccg;
 658 +  unsigned int nb_sccs;
 659 +  unsigned int i, j;
 660 +  
 661 +  /* Computes the SCC of g.  */
 662 +  nb_sccs = scc_rdgp (g);
 663 +
 664 +  /* Builds a new partition graph of the SCC of g.  */
 665 +  sccg = new_prdg (PRDG_RDG (g));
 666 +  
 667 +  /* Create SCC vertices.  */
 668 +  for (i = 0; i < nb_sccs; i++)
 669 +    {
 670 +      unsigned int current_scc = i + 1;
 671 +      unsigned int nbv = RDG_NBV (PRDG_RDG (sccg));
 672 +      prdg_vertex_p v = new_prdg_vertex (current_scc);
 673 +      
 674 +      for (j = 0; j < nbv; j++)
 675 +	{
 676 +	  rdg_vertex_p rdg_v = RDG_VERTEX (PRDG_RDG (sccg), j);
 677 +        
 678 +	  if (vertex_in_scc_p (rdg_v, current_scc))
 679 +	    VEC_safe_push (rdg_vertex_p, heap, PRDGV_PV (v), rdg_v);
 680 +	}
 681 +      
 682 +      PRDGV_SCC (v) = current_scc;
 683 +      VEC_safe_push (prdg_vertex_p, heap, PRDG_V (sccg), v);
 684 +    }
 685 +  
 686 +  /* Create SCC edges.  */
 687 +  for (i = 0; i < RDG_NBE (PRDG_RDG (g)); i++)
 688 +    {
 689 +      rdg_edge_p e = RDG_EDGE (PRDG_RDG (g), i);
 690 +    
 691 +      /* Here we take only into account data dependences.  */
 692 +      if (!RDGE_SCALAR_P (e))
 693 +	{
 694 +	  prdg_edge_p pe;
 695 +	  int source_idx = VEC_index (int, RDGV_SCCS (RDGE_SOURCE (e)), 0);
 696 +	  int sink_idx = VEC_index (int, RDGV_SCCS (RDGE_SINK (e)), 0);
 697 +          
 698 +	  gcc_assert (source_idx && sink_idx);   
 699 +          
 700 +	  pe = new_prdg_edge (e, PRDG_VERTEX (sccg, source_idx - 1),
 701 +			      PRDG_VERTEX (sccg, sink_idx - 1));
 702 +	 
 703 +	  VEC_safe_push (prdg_edge_p, heap, sccg->edges, pe);
 704 +	}	
 705 +    }
 706 +    
 707 +  return sccg;
 708 +}
 709 +
 710 +/* Returns true if the vertex can be recomputed, meaning
 711 +   that the vertex and all the nodes on the path that goes up are only
 712 +   scalars.  */
 713 +
 714 +static bool
 715 +can_recompute_vertex_p (rdg_vertex_p v)
 716 +{
 717 +  rdg_edge_p in_edge;
 718 +  unsigned int i;
 719 +  
 720 +  if (RDGV_DD_P (v))
 721 +    return false;
 722 +  
 723 +  for (i = 0; VEC_iterate (rdg_edge_p, RDGV_IN (v), i, in_edge); i++)
 724 +    if (RDGE_SCALAR_P (in_edge))
 725 +      if (!can_recompute_vertex_p (RDGE_SOURCE (in_edge)))
 726 +        return false;
 727 +
 728 +  return true;
 729 +}
 730 +
 731 +/* Create one partition in RDG starting from vertex V with a number p.  */ 
 732 +
 733 +static void
 734 +one_prdg (rdg_p rdg, rdg_vertex_p v, int p)
 735 +{
 736 +  rdg_edge_p o_edge, i_edge;
 737 +  unsigned int i;
 738 +  
 739 +  if (vertex_in_partition_p (v, p))
 740 +    return;
 741 +
 742 +  VEC_safe_push (int, heap, RDGV_PARTITIONS (v), p);
 743 +
 744 +  for (i = 0; VEC_iterate (rdg_edge_p, RDGV_IN (v), i, i_edge); i++)
 745 +    if (RDGE_SCALAR_P (i_edge))
 746 +      one_prdg (rdg, RDGE_SOURCE (i_edge), p);
 747 +
 748 +  if (!can_recompute_vertex_p (v))
 749 +    for (i = 0; VEC_iterate (rdg_edge_p, RDGV_OUT (v), i, o_edge); i++)
 750 +      if (RDGE_SCALAR_P (o_edge))
 751 +        one_prdg (rdg, RDGE_SINK (o_edge), p);
 752 +}
 753 +
 754 +/* Returns true if partitions are correct.  */
 755 +
 756 +static bool
 757 +correct_partitions_p (rdg_p rdg, int p)
 758 +{
 759 +  unsigned int i;
 760 +  
 761 +  if (!p)
 762 +    return false;
 763 +  
 764 +  /* All vertices must have color != 0.  */
 765 +  for (i = 0; i < RDG_NBV (rdg); i++)
 766 +    {
 767 +      if (RDGV_DD_P (RDG_VERTEX (rdg, i))
 768 +	  && !VEC_length (int, RDGV_PARTITIONS (RDG_VERTEX (rdg, i))) == 1)
 769 +	return false;
 770 +    
 771 +      if (!VEC_length (int, RDGV_PARTITIONS (RDG_VERTEX (rdg, i))))
 772 +        return false;
 773 +    }
 774 +
 775 +  return true;
 776 +}
 777 +
 778 +/* Marks each vertex that contains an ARRAY_REF with the number of the
 779 +   partition it belongs. Returns the number of partitions. 
 780 +   This number is at least 1.  */
 781 +
 782 +static unsigned int
 783 +mark_partitions (rdg_p rdg)
 784 +{
 785 +  rdg_vertex_p rdg_v;
 786 +  unsigned int i;
 787 +  int k, p = 0;
 788 +
 789 +  /* Clear all existing partitions.  */
 790 +  for (i = 0; i < RDG_NBV (rdg); i++)
 791 +    VEC_truncate (int, RDGV_PARTITIONS (RDG_VERTEX (rdg,i)), 0);
 792 +  
 793 +  /* If there are no dd_vertices, put all in one single partition.  */
 794 +  if (VEC_length (rdg_vertex_p, RDG_DDV (rdg)) == 0)
 795 +    {
 796 +      /* Mark all vertices with p=1.  */
 797 +      for (i = 0; i < RDG_NBV (rdg); i++)
 798 +        VEC_safe_push (int, heap, RDGV_PARTITIONS (RDG_VERTEX (rdg, i)), 1);
 799 +
 800 +      return 1;
 801 +    }
 802 +    
 803 +  /* Mark each vertex with its own color and propagate.  */
 804 +  for (i = 0; VEC_iterate (rdg_vertex_p, RDG_DDV (rdg), i, rdg_v); i++)
 805 +    if (VEC_length (int, RDGV_PARTITIONS (rdg_v)) == 0)
 806 +      one_prdg (rdg, rdg_v, ++p);
 807 +  
 808 +  /* Add the vertices that are not in a partition in all partitions.
 809 +     Those vertices does not contain any ARRAY_REF (otherwise, they would
 810 +     have been added by the previous loop on dd_vertices).  */
 811 +  for (i = 0; i < RDG_NBV (rdg); i++)
 812 +    if (VEC_length (int, RDGV_PARTITIONS (RDG_VERTEX (rdg, i))) == 0)
 813 +      for (k = 1; k <= p; k++)
 814 +        VEC_safe_push (int, heap, RDGV_PARTITIONS (RDG_VERTEX (rdg, i)), k);
 815 +    
 816 +  gcc_assert (correct_partitions_p (rdg, p));
 817 +  
 818 +  return p;
 819 +}
 820 +
 821 +/* Builds a partition graph of an RDG.  This partition represents the
 822 +   maximal distribution of the loops if we break all dependences of level l
 823 +   greater than 0 that are of dimension l.  Note that this graph can have
 824 +   new cycles that were not visible in the RDG.
 825 + 
 826 +   The principle of this partition is twofold:
 827 +    - we allow the recomputation of scalar values;
 828 +    - we do not allow the recomputation of array references, because this
 829 +      is what we try to distribute to parallelize the iterations of the loop.
 830 + 
 831 +   Vertices that contain an array reference (has_dd_p == true) are in one 
 832 +   and only one partition.  Vertices that do not contain any array reference
 833 +   are in one or more partitions.
 834 + 
 835 +   This function returns NULL if there are no ARRAY_REF statement 
 836 +   in the rdg.  */ 
 837 +
 838 +static prdg_p
 839 +build_prdg (rdg_p rdg)
 840 +{
 841 +  unsigned int i, j;
 842 +  rdg_vertex_p rdg_v;  
 843 +  prdg_p rdgp = new_prdg (rdg);
 844 +  unsigned int nbp = mark_partitions (rdg);
 845 +  
 846 +  /* Create partition vertices.  */
 847 +  for (i = 0; i < nbp; i++)
 848 +    {
 849 +      unsigned int current_partition = i+1;
 850 +      prdg_vertex_p v = new_prdg_vertex (current_partition);
 851 +      
 852 +      for (j = 0; j < rdg->nb_vertices; j++)
 853 +	{
 854 +	  rdg_v = RDG_VERTEX (rdg, j);
 855 +        
 856 +	  if (vertex_in_partition_p (rdg_v, current_partition))
 857 +	    VEC_safe_push (rdg_vertex_p, heap, PRDGV_PV (v), rdg_v);
 858 +	}
 859 +
 860 +      VEC_safe_push (prdg_vertex_p, heap, PRDG_V (rdgp), v);
 861 +    }
 862 +
 863 +  /* Create partition edges.  */
 864 +  for (i = 0; i < rdg->nb_edges; i++)
 865 +    {
 866 +      rdg_edge_p e = RDG_EDGE (rdg, i);
 867 +    
 868 +      /* Here we take only into account data dependences.  */
 869 +      if (!RDGE_SCALAR_P (e))
 870 +	{
 871 +	  int so_idx = VEC_index (int, RDGV_PARTITIONS (RDGE_SOURCE (e)), 0);
 872 +          int si_idx = VEC_index (int, RDGV_PARTITIONS (RDGE_SINK (e)), 0);
 873 +	  prdg_edge_p pe = new_prdg_edge (e, PRDG_VERTEX (rdgp, so_idx-1),
 874 +					  PRDG_VERTEX (rdgp, si_idx-1));
 875 +
 876 +	  VEC_safe_push (prdg_edge_p, heap, PRDG_E (rdgp), pe);
 877 +	}
 878 +    }
 879 +  
 880 +  return rdgp;
 881 +}
 882 +
 883 +/* Print out a partition graph in DOT format and other informations.  */
 884 +
 885 +static void
 886 +dump_prdg (FILE *outf, prdg_p rdgp)
 887 +{
 888 +  unsigned int p, i;
 889 +  prdg_vertex_p pv;
 890 +  prdg_edge_p pe;
 891 +  rdg_vertex_p v;
 892 +
 893 +  fprintf (outf, "<graphviz><![CDATA[\n");
 894 +  fprintf (outf, "digraph ");
 895 +  print_generic_expr (outf, RDG_IDX (PRDG_RDG (rdgp)), 0);
 896 +  fprintf (outf, " {\n");
 897 +    
 898 +  /* Print out vertices. Each vertex represents a partition, then it
 899 +    can contain several statements.  */
 900 +  for (p = 0; VEC_iterate (prdg_vertex_p, PRDG_V (rdgp), p, pv); p++)
 901 +    {
 902 +      fprintf (outf, " P%d [ shape=rect,label = \" P%d(%d): ", 
 903 +	       PRDGV_N (pv), PRDGV_N (pv), PRDGV_SCC (pv));
 904 +    
 905 +      for (i = 0; VEC_iterate (rdg_vertex_p, PRDGV_PV (pv), i, v); i++)
 906 +	fprintf (outf, "S%d;", RDGV_N (v));
 907 +      
 908 +      fprintf (outf, "\"];\n");
 909 +    
 910 +      fprintf (outf, " v%d [ label = \" P%d(%d)",  PRDGV_N (pv), 
 911 +               PRDGV_N (pv), PRDGV_SCC (pv));   
 912 +      
 913 +      fprintf (outf, "\"];\n");
 914 +      
 915 +      fprintf (outf, "{rank=same; P%d; v%d; }\n",  PRDGV_N (pv), PRDGV_N (pv)); 
 916 +    } 
 917 +  
 918 +  for (i = 0; VEC_iterate (prdg_edge_p, PRDG_E (rdgp), i, pe); i++)
 919 +    fprintf (outf, "v%d -> v%d [label=\"%c:%d\" style=dotted];\n",
 920 +	     PRDGV_N (PRDGE_SOURCE (pe)),
 921 +	     PRDGV_N (PRDGE_SINK (pe)),
 922 +	     RDGE_TYPE (PRDGE_RDG_EDGE (pe)), 
 923 +	     RDGE_LEVEL (PRDGE_RDG_EDGE (pe)));
 924 +  
 925 +  fprintf (outf, "}\n");
 926 +  fprintf (outf, "]]></graphviz>\n");
 927 +}
 928 +
 929 +/* Print out loop informations.  */
 930 +
 931 +static void
 932 +dump_loop_infos (FILE *outf, struct loop *loop_nest)
 933 +{
 934 +  fprintf (outf, " <location>\n");
 935 +  
 936 +  if (dist_loop_location == UNKNOWN_LOC)
 937 +    fprintf (outf, "  <filename>%s</filename>\n  <line>%d</line>\n",
 938 +             DECL_SOURCE_FILE (current_function_decl),
 939 +             DECL_SOURCE_LINE (current_function_decl));
 940 +  else
 941 +    fprintf (outf, "  <filename>%s</filename>\n  <line>%d</line>\n",
 942 +	     LOC_FILE (dist_loop_location), LOC_LINE (dist_loop_location));
 943 +  
 944 +  fprintf (outf, " </location>\n");
 945 +  fprintf (outf, " <depth>%d</depth>\n", loop_nest->depth);
 946 +  fprintf (outf, " <level>%d</level>\n", loop_nest->level);
 947 +  fprintf (outf, " <nodes>%d</nodes>\n", loop_nest->num_nodes);
 948 +  fprintf (outf, " <parallel>%d</parallel>\n", loop_nest->parallel_p);
 949 +}
 950 +
 951 +/* Dump a RDG in DOT format plus other informations.  */
 952 +
 953 +static void
 954 +dump_rdg (FILE *outf, rdg_p rdg)
 955 +{
 956 +  unsigned int i;
 957 +  rdg_vertex_p vertex;
 958 +  
 959 +  fprintf (outf, "<graphviz><![CDATA[\n");
 960 +  fprintf (outf, "digraph ");
 961 +  print_generic_expr (outf, RDG_IDX (rdg), 0);
 962 +  fprintf (outf, " {\n");
 963 +  
 964 +  for (i = 0; i < RDG_NBV (rdg); i++)
 965 +    { 
 966 +      rdg_vertex_p v = RDG_VERTEX (rdg, i);
 967 +    
 968 +      fprintf (outf, " v%d [ label = \"", RDGV_N (v));
 969 +      fprintf (outf, "S%d : ", RDGV_N (v));
 970 +      print_generic_expr (outf, RDGV_STMT (v), 0);
 971 +      fprintf (outf, "\"");
 972 +      
 973 +      if (RDGV_DD_P (v))
 974 +	fprintf (outf, " shape=rect style=filled color=\".7 .3 1.0\"]");
 975 +      else
 976 +	fprintf (outf, " shape=rect]");	
 977 +      
 978 +      fprintf (outf, ";\n");
 979 +    }
 980 +  
 981 +  for (i = 0; i < RDG_NBE (rdg); i++)
 982 +    {
 983 +      struct rdg_edge *e = RDG_EDGE (rdg, i);
 984 +      struct rdg_vertex *sink = RDGE_SINK (e);
 985 +      struct rdg_vertex *source = RDGE_SOURCE (e);
 986 +      
 987 +      fprintf (outf, " v%d -> v%d", RDGV_N (source), RDGV_N (sink));      
 988 +      fprintf (outf, " [ label=\"%c:%d", RDGE_TYPE (e), RDGE_LEVEL (e));
 989 +      
 990 +      if (RDGE_SCALAR_P (e))
 991 +        fprintf (outf, " d=0");
 992 +      else
 993 +        fprintf (outf, " d=x");
 994 +      
 995 +      fprintf(outf, "\" ");
 996 +      
 997 +      /* TODO: Here, it is not the level that matters...
 998 +         In fact, it is the dimension of the dependence, a dependence
 999 +         of level=0 with a dimension=1 can be stored and
1000 +         then can be broken.  */
1001 +      if (RDGE_LEVEL (e) > 0)
1002 +        fprintf (outf, " style=dotted");
1003 +      
1004 +      fprintf(outf, "]\n");
1005 +    }
1006 +  
1007 +  fprintf (outf, "}\n");
1008 +  fprintf (outf, "]]></graphviz>\n");
1009 +  fprintf (outf, "<dd_vertices>\n");
1010 +  
1011 +  for (i = 0; VEC_iterate (rdg_vertex_p, RDG_DDV (rdg), i, vertex); i++)
1012 +    {
1013 +      fprintf (outf, "<dd_vertex s=\"s%d\">", RDGV_N (vertex));
1014 +      print_generic_expr (outf, RDGV_STMT (vertex), 0);
1015 +      fprintf (outf, "</dd_vertex>\n");
1016 +    }
1017 +
1018 +  fprintf (outf, "</dd_vertices>\n");
1019 +}
1020 +
1021 +/* Find the vertex containing a given statement in a RDG or return NULL
1022 +   if the statement is not in any vertex.  */
1023 +
1024 +static rdg_vertex_p
1025 +find_vertex_with_stmt (rdg_p rdg, tree stmt)
1026 +{
1027 +  rdg_vertex_p vertex = NULL;
1028 +  unsigned int i;
1029 +  
1030 +  for (i = 0; i < RDG_NBV (rdg) && vertex == NULL; i++)
1031 +    if (RDGV_STMT (RDG_VERTEX (rdg,i)) == stmt)
1032 +      vertex = RDG_VERTEX (rdg, i);
1033 +  
1034 +  return vertex;
1035 +}
1036 +
1037 +/* Returns true if the statement is a control statement of the loop.  */
1038 +
1039 +static bool
1040 +loop_nest_control_p (rdg_p rdg, tree stmt)
1041 +{
1042 +  if (TREE_CODE (stmt) == LABEL_EXPR)
1043 +    return true;
1044 +  
1045 +  if (stmt == RDG_EXIT_COND (rdg))
1046 +    return true;
1047 +  
1048 +  return false;
1049 +}
1050 +
1051 +/* Computes the number of vertices of a given loop nest.  */
1052 +
1053 +static int
1054 +number_of_vertices (rdg_p rdg)
1055 +{
1056 +  basic_block bb;
1057 +  unsigned int i;
1058 +  unsigned int nb_stmts = 0;
1059 +  block_stmt_iterator bsi;
1060 +  struct loop *loop_nest = RDG_LOOP (rdg);
1061 +  basic_block *bbs = get_loop_body (loop_nest);
1062 +  
1063 +  for (i = 0; i < loop_nest->num_nodes; i++)
1064 +    {
1065 +      bb = bbs[i];
1066 +    
1067 +      /* Test whether the basic block is a direct son of the loop,
1068 +         the bbs array contains all basic blocks in DFS order.  */
1069 +      if (bb->loop_father == loop_nest)
1070 +        /* Iterate of each statement of the basic block.  */
1071 +        for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1072 +          if (!loop_nest_control_p (rdg, bsi_stmt (bsi)))
1073 +            nb_stmts++;
1074 +    }
1075 +  
1076 +  free (bbs);
1077 +
1078 +  return nb_stmts;
1079 +}
1080 +
1081 +/* Returns true if a statement has a data reference in the given
1082 +   data reference vector.  */
1083 +
1084 +static bool
1085 +contains_dr_p (tree stmt, VEC (data_reference_p, heap) *datarefs)
1086 +{
1087 +  data_reference_p dr;
1088 +  unsigned int i;
1089 +  
1090 +  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1091 +    if (DR_STMT (dr) == stmt)
1092 +      return true;
1093 +  
1094 +  return false;
1095 +}
1096 +
1097 +/* Create vertices of a RDG.  */
1098 +
1099 +static void
1100 +create_vertices (rdg_p rdg)
1101 +{
1102 +  basic_block *bbs;
1103 +  basic_block bb;
1104 +  unsigned int i;
1105 +  unsigned int vertex_index;
1106 +  block_stmt_iterator bsi;
1107 +  struct loop *loop_nest = RDG_LOOP (rdg);
1108 +  
1109 +  RDG_NBV (rdg) = number_of_vertices (rdg);
1110 +  RDG_V (rdg) = XCNEWVEC (struct rdg_vertex, RDG_NBV (rdg));
1111 +  
1112 +  vertex_index = 0;
1113 +  bbs = get_loop_body (loop_nest);
1114 +  
1115 +  for (i = 0; i < loop_nest->num_nodes; i++)
1116 +    {
1117 +      bb = bbs[i];
1118 +    
1119 +      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1120 +        {
1121 +          tree stmt = bsi_stmt (bsi);
1122 +	
1123 +          if (!loop_nest_control_p (rdg, stmt))
1124 +            {
1125 +              rdg_vertex_p v = RDG_VERTEX (rdg, vertex_index);
1126 +              RDGV_STMT (v) = stmt;
1127 +              RDGV_N (v) = vertex_index;
1128 +              RDGV_BB (v) = i;
1129 +              RDGV_COLOR (v) = 0;
1130 +              RDGV_DD_P (v) = contains_dr_p (stmt, RDG_DR (rdg));
1131 +              RDGV_IN (v) = VEC_alloc (rdg_edge_p, heap, RDG_VS) ;
1132 +              RDGV_OUT (v) = VEC_alloc (rdg_edge_p, heap, RDG_VS) ;
1133 +              RDGV_PARTITIONS (v) = VEC_alloc (int, heap, RDG_VS) ;
1134 +              RDGV_SCCS (v) = VEC_alloc (int, heap, RDG_VS) ;
1135 +              vertex_index++;
1136 +            }
1137 +        }
1138 +    }
1139 +  free (bbs);
1140 +}
1141 +
1142 +/* Checks whether the modify expression correspond to something we
1143 +   can deal with.  */
1144 +
1145 +static bool
1146 +correct_modify_expr_p (tree stmt)
1147 +{
1148 +  tree lhs;
1149 +  
1150 +  if (TREE_CODE (stmt) != MODIFY_EXPR)
1151 +    return false;
1152 +  
1153 +  lhs = TREE_OPERAND (stmt, 0);
1154 +    
1155 +  switch (TREE_CODE (lhs))
1156 +    {
1157 +      case SSA_NAME:
1158 +      case ARRAY_REF:
1159 +      case INDIRECT_REF:
1160 +        return true;
1161 +      default:
1162 +        return false;
1163 +    }
1164 +}
1165 +
1166 +/* Checks the statements of a loop body.  */
1167 +
1168 +static bool
1169 +check_statements (struct loop *loop_nest)
1170 +{
1171 +  basic_block *bbs;
1172 +  basic_block bb;
1173 +  unsigned int i;
1174 +  block_stmt_iterator bsi;
1175 +  
1176 +  bbs = get_loop_body (loop_nest);  
1177 +  
1178 +  for (i = 0; i < loop_nest->num_nodes; i++)
1179 +    {
1180 +      bb = bbs[i];
1181 +    
1182 +      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1183 +        {
1184 +          tree stmt = bsi_stmt (bsi);
1185 +        
1186 +          if (TREE_CODE (stmt) == MODIFY_EXPR
1187 +	      && !correct_modify_expr_p (stmt))
1188 +	    return false;
1189 +        }
1190 +    }
1191 +
1192 +  free (bbs);
1193 +  return true;
1194 +}
1195 +
1196 +/* Computes the number of uses of a lvalue.  */
1197 +
1198 +static int
1199 +number_of_lvalue_uses (rdg_p rdg, tree stmt)
1200 +{
1201 +  tree lhs;
1202 +  
1203 +  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1204 +  
1205 +  lhs = TREE_OPERAND (stmt, 0);
1206 +  
1207 +  if (TREE_CODE (lhs) == SSA_NAME)
1208 +    {
1209 +      use_operand_p imm_use_p;
1210 +      imm_use_iterator iterator;
1211 +      int n = 0;
1212 +
1213 +      FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, lhs)
1214 +        if (find_vertex_with_stmt (rdg, USE_STMT (imm_use_p)))
1215 +          n++;
1216 +
1217 +      return n;
1218 +    }  
1219 +
1220 +  return 0;
1221 +}
1222 +
1223 +/* Computes the number of scalar dependences to add to the RDG.  */
1224 +
1225 +static int
1226 +number_of_scalar_dependences (rdg_p rdg)
1227 +{
1228 +  unsigned int i;
1229 +  unsigned int nb_deps = 0;
1230 +
1231 +  for (i = 0; i < RDG_NBV (rdg); i++)
1232 +    {
1233 +      tree stmt = RDGV_STMT (RDG_VERTEX (rdg, i));
1234 +    
1235 +      if (TREE_CODE (stmt) == MODIFY_EXPR)
1236 +        nb_deps += number_of_lvalue_uses (rdg, stmt);
1237 +    }
1238 +
1239 +  return nb_deps;
1240 +}
1241 +
1242 +/* Computes the number of data dependences to add to the RDG.  */
1243 +
1244 +static int
1245 +number_of_data_dependences (rdg_p rdg)
1246 +{
1247 +  unsigned int nb_deps = 0;
1248 +  ddr_p ddr;
1249 +  unsigned int i;
1250 +
1251 +  for (i = 0; VEC_iterate (ddr_p, RDG_DDR (rdg), i, ddr); i++)
1252 +    if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1253 +      nb_deps += DDR_NUM_DIST_VECTS (ddr);
1254 +  
1255 +  return nb_deps;
1256 +}
1257 +
1258 +/* Gets the dependence level with a distance vector.  */
1259 +
1260 +static unsigned int
1261 +get_dependence_level (lambda_vector dist_vect, unsigned int length)
1262 +{
1263 +  unsigned int level;
1264 +  unsigned int i;
1265 +  
1266 +  level = 0; /* 0 means a lexicographic dependence */
1267 +  
1268 +  for (i = 0; i < length && level == 0; i++)
1269 +    if (dist_vect[i] > 0)
1270 +      level = i + 1;
1271 +  
1272 +  return level;
1273 +}
1274 +
1275 +/* Creates an edge with a data dependence vector.  */
1276 +
1277 +static void
1278 +update_edge_with_ddv (ddr_p ddr, unsigned int index_of_vector, rdg_p rdg,
1279 +                      unsigned int index_of_edge)
1280 +{
1281 +  data_reference_p dra;
1282 +  data_reference_p drb;
1283 +  rdg_edge_p edge = RDG_EDGE (rdg, index_of_edge);
1284 +  rdg_vertex_p va;
1285 +  rdg_vertex_p vb;
1286 +  
1287 +  /* Invert data references according to the direction of the 
1288 +     dependence.  */
1289 +  if (DDR_REVERSE_P (ddr))
1290 +    {
1291 +      dra = DDR_B (ddr);
1292 +      drb = DDR_A (ddr);
1293 +    }
1294 +  else
1295 +    {
1296 +      dra = DDR_A (ddr);
1297 +      drb = DDR_B (ddr);
1298 +    }
1299 +  
1300 +  /* Locate the vertices containing the statements that contain
1301 +     the data references.  */
1302 +  va = find_vertex_with_stmt (rdg, DR_STMT (dra));
1303 +  vb = find_vertex_with_stmt (rdg, DR_STMT (drb));
1304 +  gcc_assert (va && vb);
1305 +
1306 +  /* Update source and sink of the dependence.  */
1307 +  RDGE_SOURCE (edge) = va;
1308 +  RDGE_SINK (edge) = vb;
1309 +  RDGE_SOURCE_REF (edge) = DR_REF (dra);
1310 +  RDGE_SINK_REF (edge) = DR_REF (drb);
1311 +  
1312 +  /* Determines the type of the data dependence.  */
1313 +  if (DR_IS_READ (dra) && DR_IS_READ (drb))
1314 +    RDGE_TYPE (edge) = input_dd;
1315 +  else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
1316 +    RDGE_TYPE (edge) = output_dd;
1317 +  else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
1318 +    RDGE_TYPE (edge) = flow_dd;
1319 +  else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
1320 +    RDGE_TYPE (edge) = anti_dd;
1321 +
1322 +  RDGE_LEVEL (edge) = get_dependence_level (DDR_DIST_VECT (ddr, 
1323 +                                                           index_of_vector), 
1324 +					    DDR_NB_LOOPS (ddr));
1325 +  RDGE_COLOR (edge) = 0;
1326 +  RDGE_SCALAR_P (edge) = false;
1327 +  
1328 +  VEC_safe_push (rdg_edge_p, heap, RDGV_OUT (va), edge);
1329 +  VEC_safe_push (rdg_edge_p, heap, RDGV_IN (vb), edge);
1330 +}
1331 +
1332 +/* Creates all the edges of a RDG.  */
1333 +
1334 +static void
1335 +create_edges (rdg_p rdg)
1336 +{
1337 +  unsigned int i;
1338 +  unsigned int j;
1339 +  unsigned int edge_index;
1340 +  unsigned int data_edges;
1341 +  unsigned int scalar_edges;
1342 +  struct data_dependence_relation *ddr;
1343 +
1344 +  scalar_edges = number_of_scalar_dependences (rdg);  
1345 +  data_edges = number_of_data_dependences (rdg);
1346 +  
1347 +  if (scalar_edges == 0 && data_edges == 0)
1348 +    {
1349 +      RDG_NBE (rdg) = 0;
1350 +      RDG_E (rdg) = NULL;
1351 +      return;
1352 +    }  
1353 +  
1354 +  /* Allocate an array for scalar edges and data edges.  */
1355 +  RDG_NBE (rdg) = scalar_edges + data_edges;
1356 +  RDG_E (rdg) = XCNEWVEC (struct rdg_edge, RDG_NBE (rdg));
1357 +
1358 +  /* Create data edges.  */
1359 +  edge_index = 0;
1360 +  
1361 +  for (i = 0; VEC_iterate (ddr_p, RDG_DDR (rdg), i, ddr); i++)
1362 +    if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 
1363 +      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
1364 +        update_edge_with_ddv (ddr, j, rdg, edge_index++);
1365 +          
1366 +  /* Create scalar edges. The principle is as follows: for each vertex, 
1367 +     if the vertex represents a MODIFY_EXPR (an assignment), we create one
1368 +     edge for each use of the SSA_NAME on the LHS. This edge
1369 +     represents a flow scalar dependence of level 0.  */
1370 +  
1371 +  for (i = 0; i < RDG_NBV (rdg); i++)
1372 +    {
1373 +      rdg_vertex_p def_vertex = RDG_VERTEX (rdg, i);
1374 +      tree stmt = RDGV_STMT (def_vertex);
1375 +
1376 +      if (TREE_CODE (stmt) == MODIFY_EXPR)
1377 +        {
1378 +          tree lhs = TREE_OPERAND (stmt, 0);
1379 +	
1380 +          if (TREE_CODE (lhs) == SSA_NAME)
1381 +            {
1382 +              use_operand_p imm_use_p;
1383 +              imm_use_iterator iterator;
1384 +           
1385 +              FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, lhs)
1386 +                {
1387 +                  rdg_vertex_p use_vertex;
1388 +		  
1389 +		  use_vertex = find_vertex_with_stmt (rdg, 
1390 +						      USE_STMT (imm_use_p));
1391 +		  
1392 +		  /* If use_vertex != NULL, it means that there is a vertex
1393 +		     in the RDG that uses the value defined in 
1394 +		     def_vertex.  */
1395 +                  if (use_vertex) 
1396 +		    {
1397 +		      rdg_edge_p edge = RDG_EDGE (rdg, edge_index);
1398 +		    
1399 +		      RDGE_LEVEL (edge) = 0;
1400 +		      RDGE_SINK (edge) = use_vertex;
1401 +                      RDGE_SOURCE (edge) = def_vertex;
1402 +                      RDGE_SINK_REF (edge) = *(imm_use_p->use);
1403 +                      RDGE_SOURCE_REF (edge) = lhs;
1404 +                      RDGE_COLOR (edge) = 0;
1405 +                      RDGE_TYPE (edge) = flow_dd;
1406 +		      RDGE_SCALAR_P (edge) = true;
1407 +		      VEC_safe_push (rdg_edge_p, heap, 
1408 +                                     RDGV_IN (use_vertex), edge);
1409 +		      VEC_safe_push (rdg_edge_p, heap, 
1410 +                                     RDGV_OUT (def_vertex), edge);
1411 +                      edge_index++;
1412 +                    }
1413 +                }
1414 +            }  
1415 +        }
1416 +    }
1417 +
1418 +  gcc_assert (edge_index == RDG_NBE (rdg));
1419 +}
1420 +
1421 +/* Get the loop index of a loop nest.  */
1422 +
1423 +static tree
1424 +get_loop_index (struct loop *loop_nest)
1425 +{
1426 +  tree expr = get_loop_exit_condition (loop_nest);
1427 +  tree ivarop;
1428 +  tree test;
1429 +
1430 +  if (expr == NULL_TREE)
1431 +    return NULL_TREE;
1432 +
1433 +  if (TREE_CODE (expr) != COND_EXPR)
1434 +    return NULL_TREE;
1435 +
1436 +  test = TREE_OPERAND (expr, 0);
1437 +
1438 +  if (!COMPARISON_CLASS_P (test))
1439 +    return NULL_TREE;
1440 +  
1441 +  if (expr_invariant_in_loop_p (loop_nest, TREE_OPERAND (test, 0)))
1442 +    ivarop = TREE_OPERAND (test, 1);
1443 +  else if (expr_invariant_in_loop_p (loop_nest, TREE_OPERAND (test, 1)))
1444 +    ivarop = TREE_OPERAND (test, 0);
1445 +  else
1446 +    return NULL_TREE;
1447 +  
1448 +  if (TREE_CODE (ivarop) != SSA_NAME)
1449 +    return NULL_TREE;
1450 +  
1451 +  return ivarop;
1452 +}
1453 +
1454 +/* Returns true if the dependences are all computable.  */
1455 +
1456 +static bool
1457 +known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
1458 +{
1459 +  ddr_p ddr;
1460 +  unsigned int i;
1461 +
1462 +  for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
1463 +    if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1464 +      return false;
1465 + 
1466 +  return true;
1467 +}
1468 +
1469 +/* Returns the number of phi-nodes of a basic block.  */
1470 +
1471 +static int
1472 +number_of_phi_nodes (basic_block bb)
1473 +{
1474 +  tree phi = phi_nodes (bb);
1475 +  int n;
1476 +  
1477 +  for (n = 0; phi; phi = PHI_CHAIN (phi))
1478 +    if (is_gimple_reg (PHI_RESULT (phi)))
1479 +      n++;
1480 +  
1481 +  return n;
1482 +}
1483 +
1484 +/* Returns the phi-node containing the loop index.
1485 +   Right know it just returns the first valid phi-node it finds.  */
1486 +
1487 +static tree
1488 +get_index_phi_node (struct loop *loop_nest)
1489 +{
1490 +  tree phi = phi_nodes (loop_nest->header);
1491 +  
1492 +  for (; phi; phi = PHI_CHAIN (phi))
1493 +    if (is_gimple_reg (PHI_RESULT (phi)))
1494 +      return phi;
1495 +  
1496 +  return NULL_TREE;
1497 +}
1498 +
1499 +static void
1500 +dump_check_info (const char *msg)
1501 +{
1502 +  if (dump_file)
1503 +    fprintf (dump_file, "<loop_check>%s</loop_check>\n", msg);
1504 +}
1505 +
1506 +/* Checks if the loop fit the constraints we impose on a loop.  */
1507 +
1508 +static bool
1509 +loop_is_good_p (struct loop *loop_nest)
1510 +{
1511 +  if (loop_nest->depth != 1)
1512 +    {
1513 +      /* Right now, we only deal with loop nests that
1514 +         are at depth = 1.  */
1515 +      dump_check_info ("depth != 1");
1516 +      return false;
1517 +    }
1518 +  else if (loop_nest->inner)
1519 +    {
1520 +      /* Right now, only consider single loop nests */
1521 +      dump_check_info ("Loop has inner loop(s)");      
1522 +      return false;
1523 +    }
1524 +  else if (!loop_nest->single_exit)
1525 +    {
1526 +      /* Only consider loops with a single exit */
1527 +      dump_check_info ("More than one exit");
1528 +      return false;
1529 +    }
1530 +  else if (!get_loop_exit_condition (loop_nest))
1531 +    {
1532 +      /* the exit condition is too difficult to analyze */
1533 +      dump_check_info ("Cannot determine loop exit condition");
1534 +      return false;
1535 +    }
1536 +  else if (loop_nest->num_nodes != 2)
1537 +    {
1538 +      /* If we have two basic blocks, it means that we have the loop body
1539 +         plus the basic block containing the exit label.  */
1540 +      /* If we have more that two basic blocks, it means that 
1541 +         we have some complicated control flow.  */
1542 +      dump_check_info ("Complicated control flow");
1543 +      return false;
1544 +    }
1545 +  else if (EDGE_COUNT (loop_nest->header->preds) != 2)
1546 +    {
1547 +      /* Too many incoming edges.  */
1548 +      dump_check_info ("Too many incoming edges");
1549 +      return false;
1550 +    }
1551 +  else if (!empty_block_p (loop_nest->latch))
1552 +    {
1553 +      /* The loop exit condition must be at the end of the loop, the loop
1554 +         header has to contain all the executable statements and the
1555 +         latch has to be empty.  */
1556 +      dump_check_info ("Bad loop form");
1557 +      return false;
1558 +    }
1559 +  else if (empty_block_p (loop_nest->header))
1560 +    {
1561 +      /* The loop must contain some statements.  */
1562 +      dump_check_info ("Empty loop body");
1563 +      return false;
1564 +    }  
1565 +  else if (number_of_phi_nodes (loop_nest->header) > 1)
1566 +    {
1567 +      /* We consider that we should have no more than 1 PHI node
1568 +         representing the loop index.  */
1569 +      dump_check_info ("More than one PHI node");
1570 +      return false; 
1571 +    }
1572 +  else if (!check_statements (loop_nest))
1573 +    {
1574 +      /* Some lvalues are not correct and then cannot be handled
1575 +         right now.  */
1576 +      dump_check_info ("Bad statement(s) in loop body");
1577 +      return false;
1578 +    }
1579 +  else if (!get_loop_index (loop_nest))
1580 +    {
1581 +      /* We are not able to find out the loop index.  */
1582 +      dump_check_info ("Cannot find loop index");
1583 +      return false;
1584 +    }
1585 +  else if (!get_index_phi_node (loop_nest))
1586 +    {
1587 +      /* Cannot find the PHI node of the loop index.  */
1588 +      dump_check_info ("Cannot find loop index PHI node");
1589 +      return false;
1590 +    }
1591 +  
1592 +  /* Note that we do not have to check whether the statements inside
1593 +     the loop body have side effects or not because this check is
1594 +     is going to be done by the data dependence analyzer. */
1595 +  dump_check_info ("OK");
1596 +
1597 +  return true;
1598 +}
1599 +
1600 +/* Build a Reduced Dependence Graph with one vertex per statement of the
1601 +   loop nest and one edge per data dependence or scalar dependence.  */
1602 +
1603 +static rdg_p
1604 +build_rdg (struct loop *loop_nest)
1605 +{
1606 +  rdg_p rdg;
1607 +  VEC (ddr_p, heap) *dependence_relations;
1608 +  VEC (data_reference_p, heap) *datarefs;
1609 +  unsigned int i;
1610 +  rdg_vertex_p vertex;
1611 +  
1612 +  /* Compute array data dependence relations */
1613 +  dependence_relations = VEC_alloc (ddr_p, heap, RDG_VS * RDG_VS) ;
1614 +  datarefs = VEC_alloc (data_reference_p, heap, RDG_VS);
1615 +  compute_data_dependences_for_loop (loop_nest, 
1616 +                                     false,
1617 +                                     &datarefs,
1618 +                                     &dependence_relations);
1619 +  
1620 +  /* Check if all the array dependences are known (computable) */
1621 +  if (!known_dependences_p (dependence_relations))
1622 +    {
1623 +      dump_check_info ("Dependences: not computable");
1624 +      free_dependence_relations (dependence_relations);
1625 +      free_data_refs (datarefs);
1626 +      return NULL;
1627 +    }
1628 +  else
1629 +    dump_check_info ("Dependences: OK");
1630 +  
1631 +  /* OK, now we know that we can build our Reduced Dependence Graph
1632 +     where each vertex is a statement and where each edge is a data
1633 +     dependence between two references in statements. */
1634 +  rdg = XNEW (struct rdg);
1635 +  RDG_LOOP (rdg) = loop_nest;
1636 +  RDG_EXIT_COND (rdg) = get_loop_exit_condition (loop_nest);
1637 +  RDG_IDX (rdg) = get_loop_index (loop_nest);
1638 +  RDG_IDX_UPDATE (rdg) = SSA_NAME_DEF_STMT (RDG_IDX (rdg));
1639 +  RDG_IDX_PHI (rdg) = get_index_phi_node (loop_nest);
1640 +
1641 +  RDG_DDR (rdg) = dependence_relations;
1642 +  RDG_DR (rdg) = datarefs;
1643 +  
1644 +  create_vertices (rdg);
1645 +  create_edges (rdg);
1646 +
1647 +  RDG_DDV (rdg) = VEC_alloc (rdg_vertex_p, heap, RDG_VS);
1648 +  
1649 +  for (i = 0; i < RDG_NBV (rdg); i++)
1650 +    {
1651 +      vertex = RDG_VERTEX (rdg, i);
1652 +
1653 +      if (RDGV_DD_P (vertex))
1654 +	VEC_safe_push (rdg_vertex_p, heap, RDG_DDV (rdg), vertex);
1655 +    }
1656 +    
1657 +  return rdg;
1658 +}
1659 +
1660 +/* Free the RDG.  */
1661 +static void
1662 +free_rdg (rdg_p rdg)
1663 +{
1664 +  unsigned int i;
1665 +  free_dependence_relations (RDG_DDR (rdg));
1666 +  free_data_refs (RDG_DR (rdg));
1667 +  
1668 +  if (RDG_NBV (rdg))
1669 +    {
1670 +      for (i = 0; i < RDG_NBV (rdg); i++)
1671 +        {
1672 +          rdg_vertex_p v = RDG_VERTEX (rdg, i);
1673 +        
1674 +          VEC_free (rdg_edge_p, heap, RDGV_IN (v));
1675 +          VEC_free (rdg_edge_p, heap, RDGV_OUT (v));
1676 +          VEC_free (int, heap, RDGV_PARTITIONS (v));
1677 +          VEC_free (int, heap, RDGV_SCCS (v));
1678 +        }
1679 +      free (RDG_V (rdg));
1680 +    }
1681 +  
1682 +  if (RDG_NBE (rdg))
1683 +    free (RDG_E (rdg));
1684 +
1685 +  VEC_free (rdg_vertex_p, heap, RDG_DDV (rdg));
1686 +  free (rdg);
1687 +}
1688 +
1689 +/* Sort topologically the PRDG vertices.  */
1690 +
1691 +static VEC (prdg_vertex_p, heap)*
1692 +topological_sort (prdg_p g)
1693 +{
1694 +  unsigned int max_f, i;
1695 +  prdg_vertex_p *vertices;
1696 +  prdg_vertex_p v;
1697 +  VEC (prdg_vertex_p, heap) *sorted_vertices;
1698 +  
1699 +  /* Depth First Search.  */
1700 +  max_f = dfs_rdgp (g);
1701 +  
1702 +  /* Allocate array of vertices.  */
1703 +  vertices = XCNEWVEC (prdg_vertex_p, max_f+1);
1704 +  
1705 +  /* Allocate a vector for sorted vertices.  */ 
1706 +  sorted_vertices = VEC_alloc (prdg_vertex_p, heap, RDG_VS);
1707 +  
1708 +  /* All vertices are set to NULL.  */
1709 +  for (i = 0; i <= max_f; i++)
1710 +    vertices[i] = NULL;
1711 +  
1712 +  /* Iterate on each vertex of the PRDG and put each vertex at
1713 +     the right place.  */
1714 +  for (i = 0; VEC_iterate (prdg_vertex_p, PRDG_V (g), i, v); i++)
1715 +    vertices[PRDGV_F (v)] = v;
1716 +
1717 +  /* Push all non-NULL vertices to vector of vertices.  */
1718 +  for (i = max_f; i > 0; i--)
1719 +    if (vertices[i])
1720 +      VEC_safe_push (prdg_vertex_p, heap, sorted_vertices, vertices[i]);
1721 +  
1722 +  free (vertices);
1723 +  
1724 +  return sorted_vertices;
1725 +}
1726 +
1727 +static void
1728 +open_loop_dump (struct loop *loop_nest)
1729 +{
1730 +  if (dump_file)
1731 +    {
1732 +      fprintf (dump_file , "<LOOP num=\"%d\">\n", loop_nest->num);
1733 +      dump_loop_infos (dump_file, loop_nest);
1734 +    }  
1735 +}
1736 +
1737 +static void
1738 +close_loop_dump (void)
1739 +{
1740 +  if (dump_file)
1741 +    fprintf (dump_file, "</LOOP>\n");
1742 +}
1743 +
1744 +/* Do the actual loop distribution. */
1745 +
1746 +static void
1747 +do_distribution (struct loop *loop_nest)
1748 +{
1749 +  rdg_p rdg; /* Reduced dependence graph.  */
1750 +  prdg_p rdgp; /* Graph of RDG partitions.  */
1751 +  prdg_p sccg; /* Graph of Strongly Connected Components.  */
1752 +  VEC (prdg_vertex_p, heap) *dloops; /* Distributed loops.  */
1753 +
1754 +  open_loop_dump (loop_nest);
1755 +
1756 +  /* Check whether a RDG can be build for this loop nest or not */
1757 +  if (!loop_is_good_p (loop_nest))
1758 +    {
1759 +      close_loop_dump ();
1760 +      return;
1761 +    }
1762 +
1763 +  rdg = build_rdg (loop_nest);
1764 +  
1765 +  if (!rdg)
1766 +    {
1767 +      close_loop_dump ();
1768 +      return;
1769 +    }
1770 +  
1771 +  if (dump_file)
1772 +    {
1773 +      fprintf (dump_file, "<rdg>\n");
1774 +      dump_rdg (dump_file, rdg);
1775 +      fprintf (dump_file, "</rdg>\n");
1776 +    }
1777 +
1778 +  rdgp = build_prdg (rdg);      
1779 +  
1780 +  if (dump_file)
1781 +    {
1782 +      fprintf (dump_file, "<prdg>\n");
1783 +      dump_prdg (dump_file, rdgp);
1784 +      fprintf (dump_file, "</prdg>\n");
1785 +    }
1786 +  
1787 +  sccg = build_scc_graph (rdgp);
1788 +
1789 +  if (dump_file)
1790 +    {
1791 +      fprintf (dump_file, "<sccp>\n");
1792 +      dump_prdg (dump_file, sccg);
1793 +      fprintf (dump_file, "</sccp>\n");
1794 +    }
1795 +  
1796 +  dloops = topological_sort (sccg);
1797 +
1798 +  if (dump_file)
1799 +    {
1800 +      prdg_vertex_p v;
1801 +      int i;
1802 +      
1803 +      fprintf (dump_file, "<topological_sort>\n");
1804 +      
1805 +      for (i = 0; VEC_iterate (prdg_vertex_p, dloops, i, v); i++)
1806 +        fprintf (dump_file, "  <dloop n=\"%d\">P%d</dloop>\n", 
1807 +                 i, PRDGV_N (v));
1808 +      
1809 +      fprintf (dump_file, "</topological_sort>\n");
1810 +    }
1811 +  
1812 +  free_rdg (rdg);
1813 +  free_prdg (rdgp);
1814 +  free_prdg (sccg);
1815 +  VEC_free (prdg_vertex_p, heap, dloops);
1816 +
1817 +  close_loop_dump ();
1818 +}
1819 +
1820 +/* Iterates on the loops to distribute.  */
1821 +
1822 +void
1823 +distribute_loops (struct loops *loops)
1824 +{
1825 +  unsigned int i;
1826 +  unsigned int dist_loops; /* Number of loops before distribution.  */
1827 +  
1828 +  dist_loops = loops->num;
1829 +  treated_loops = XCNEWVEC (bool, dist_loops);
1830 +
1831 +  for (i = 1; i < dist_loops; i++)
1832 +    treated_loops[i] = false;
1833 +
1834 +  for (i = 1; i < dist_loops; i++)
1835 +    {
1836 +      struct loop *loop_nest = loops->parray[i]; 
1837 +
1838 +      treated_loops[i] = true;
1839 +      
1840 +      if (!loop_nest)
1841 +        continue;
1842 +      
1843 +      dist_loop_location = find_loop_location (loop_nest);
1844 +
1845 +      do_distribution (loop_nest);
1846 +    }
1847 +  
1848 +  free (treated_loops);
1849 +}
1850 +
1851 +/* Function executed by the pass for each function.  */
1852 +
1853 +static unsigned int
1854 +tree_loop_distribution (void)
1855 +{
1856 +  if (!current_loops) 
1857 +    return 0;
1858 +  
1859 +  if (dump_file)
1860 +    {
1861 +      fprintf (dump_file, "<distribute_loops>\n");
1862 +    
1863 +      if (current_function_decl)
1864 +        {
1865 +          fprintf (dump_file, "<function name=\"%s\"><![CDATA[\n",
1866 +                   lang_hooks.decl_printable_name (current_function_decl, 
1867 +                                                   2));
1868 +          dump_function_to_file (current_function_decl,
1869 +                                 dump_file, dump_flags);
1870 +          fprintf (dump_file, "]]></function>\n");
1871 +        }
1872 +    }
1873 +  
1874 +  distribute_loops (current_loops);
1875 +
1876 +  if (dump_file)
1877 +    {
1878 +      fprintf (dump_file, "</distribute_loops>\n");
1879 +    }
1880 +  
1881 +  return 0;
1882 +}
1883 +
1884 +static bool
1885 +gate_tree_loop_distribution (void)
1886 +{
1887 +  return flag_tree_loop_distribution != 0;
1888 +}
1889 +
1890 +struct tree_opt_pass pass_loop_distribution =
1891 +{
1892 +  "ldist",			/* name */
1893 +  gate_tree_loop_distribution,  /* gate */
1894 +  tree_loop_distribution,       /* execute */
1895 +  NULL,				/* sub */
1896 +  NULL,				/* next */
1897 +  0,				/* static_pass_number */
1898 +  TV_TREE_LOOP_DISTRIBUTION,    /* tv_id */
1899 +  PROP_cfg | PROP_ssa,		/* properties_required */
1900 +  0,				/* properties_provided */
1901 +  0,				/* properties_destroyed */
1902 +  0,				/* todo_flags_start */
1903 +  TODO_verify_loops,            /* todo_flags_finish */
1904 +  0                             /* letter */  
1905 +};
1906 Index: tree-pass.h
1907 ===================================================================
1908 --- tree-pass.h	(revision 113325)
1909 +++ tree-pass.h	(working copy)
1910 @@ -250,6 +250,7 @@ extern struct tree_opt_pass pass_scev_cp
1911  extern struct tree_opt_pass pass_empty_loop;
1912  extern struct tree_opt_pass pass_record_bounds;
1913  extern struct tree_opt_pass pass_if_conversion;
1914 +extern struct tree_opt_pass pass_loop_distribution;
1915  extern struct tree_opt_pass pass_vectorize;
1916  extern struct tree_opt_pass pass_complete_unroll;
1917  extern struct tree_opt_pass pass_loop_prefetch;
1918 Index: timevar.def
1919 ===================================================================
1920 --- timevar.def	(revision 113325)
1921 +++ timevar.def	(working copy)
1922 @@ -108,6 +108,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH    , "
1923  DEFTIMEVAR (TV_COMPLETE_UNROLL       , "complete unrolling")
1924  DEFTIMEVAR (TV_TREE_VECTORIZATION    , "tree vectorization")
1925  DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
1926 +DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
1927  DEFTIMEVAR (TV_TREE_PREFETCH	     , "tree prefetching")
1928  DEFTIMEVAR (TV_TREE_LOOP_IVOPTS	     , "tree iv optimization")
1929  DEFTIMEVAR (TV_TREE_LOOP_INIT	     , "tree loop init")
1930 Index: tree-data-ref.c
1931 ===================================================================
1932 --- tree-data-ref.c	(revision 113325)
1933 +++ tree-data-ref.c	(working copy)
1934 @@ -2134,7 +2134,8 @@ initialize_data_dependence_relation (str
1935    DDR_LOOP_NEST (res) = loop_nest;
1936    DDR_DIR_VECTS (res) = NULL;
1937    DDR_DIST_VECTS (res) = NULL;
1938 -
1939 +  DDR_REVERSE_P (res) = false;
1940 +  
1941    for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1942      {
1943        struct subscript *subscript;
1944 @@ -3675,7 +3676,8 @@ build_classic_dist_vector (struct data_d
1945  	  build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
1946  				       save_v, &init_b, &index_carry);
1947  	  save_dist_v (ddr, save_v);
1948 -
1949 +	  DDR_REVERSE_P (ddr) = true;
1950 +    
1951  	  /* In this case there is a dependence forward for all the
1952  	     outer loops:
1953  
1954 Index: tree-data-ref.h
1955 ===================================================================
1956 --- tree-data-ref.h	(revision 113325)
1957 +++ tree-data-ref.h	(working copy)
1958 @@ -237,6 +237,9 @@ struct data_dependence_relation
1959  
1960    /* The classic distance vector.  */
1961    VEC (lambda_vector, heap) *dist_vects;
1962 +  
1963 +  /* Is the dependence reversed with respect to the lexicographic order?  */
1964 +  bool reverse_p;
1965  };
1966  
1967  typedef struct data_dependence_relation *ddr_p;
1968 @@ -266,7 +269,7 @@ DEF_VEC_ALLOC_P(ddr_p,heap);
1969    VEC_index (lambda_vector, DDR_DIR_VECTS (DDR), I)
1970  #define DDR_DIST_VECT(DDR, I) \
1971    VEC_index (lambda_vector, DDR_DIST_VECTS (DDR), I)
1972 -
1973 +#define DDR_REVERSE_P(DDR) DDR->reverse_p
1974  
1975  
1976  extern tree find_data_references_in_loop (struct loop *,
1977 Index: common.opt
1978 ===================================================================
1979 --- common.opt	(revision 113325)
1980 +++ common.opt	(working copy)
1981 @@ -953,6 +953,10 @@ ftree-loop-linear
1982  Common Report Var(flag_tree_loop_linear)
1983  Enable linear loop transforms on trees
1984  
1985 +ftree-loop-distribution
1986 +Common Report Var(flag_tree_loop_distribution)
1987 +Enable loop distribution on trees
1988 +
1989  ftree-loop-ivcanon
1990  Common Report Var(flag_tree_loop_ivcanon) Init(1)
1991  Create canonical induction variables in loops
1992 Index: tree-flow.h
1993 ===================================================================
1994 --- tree-flow.h	(revision 113325)
1995 +++ tree-flow.h	(working copy)
1996 @@ -928,6 +928,9 @@ bool sra_type_can_be_decomposed_p (tree)
1997  /* In tree-loop-linear.c  */
1998  extern void linear_transform_loops (struct loops *);
1999  
2000 +/* In tree-loop-distribution.c  */
2001 +extern void distribute_loops (struct loops *);
2002 +
2003  /* In tree-ssa-loop-ivopts.c  */
2004  bool expr_invariant_in_loop_p (struct loop *, tree);
2005  bool multiplier_allowed_in_address_p (HOST_WIDE_INT);
2006 Index: Makefile.in
2007 ===================================================================
2008 --- Makefile.in	(revision 113325)
2009 +++ Makefile.in	(working copy)
2010 @@ -975,6 +975,7 @@ OBJS-common = \
2011   tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o	   \
2012   tree-ssa-math-opts.o							   \
2013   tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o		   \
2014 + tree-loop-distribution.o											\
2015   alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	  	   \
2016   cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
2017   cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o	   \
2018 @@ -2086,6 +2087,11 @@ tree-loop-linear.o: tree-loop-linear.c $
2019     $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
2020     tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) $(LAMBDA_H) \
2021     $(TARGET_H) tree-chrec.h
2022 +tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
2023 +   $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \
2024 +   $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
2025 +   tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) \
2026 +   $(TARGET_H) tree-chrec.h
2027  tree-stdarg.o: tree-stdarg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
2028     $(TREE_H) $(FUNCTION_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) tree-pass.h \
2029     tree-stdarg.h $(TARGET_H) langhooks.h
2030 Index: passes.c
2031 ===================================================================
2032 --- passes.c	(revision 113325)
2033 +++ passes.c	(working copy)
2034 @@ -599,6 +599,7 @@ init_optimization_passes (void)
2035    NEXT_PASS (pass_empty_loop);
2036    NEXT_PASS (pass_record_bounds);
2037    NEXT_PASS (pass_linear_transform);
2038 +  NEXT_PASS (pass_loop_distribution);
2039    NEXT_PASS (pass_iv_canon);
2040    NEXT_PASS (pass_if_conversion);
2041    NEXT_PASS (pass_vectorize);

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2006-09-07 12:40:21, 592.2 KB) [[attachment:ExampleGCC_back.pdf]]
  • [get | view] (2006-09-07 12:40:07, 3472.2 KB) [[attachment:ExampleGCC_middle.pdf]]
  • [get | view] (2006-09-07 12:39:01, 1012.4 KB) [[attachment:Intro2GCCc.pdf]]
  • [get | view] (2006-09-07 12:39:21, 279.9 KB) [[attachment:Intro2GCCf.pdf]]
  • [get | view] (2006-09-07 12:42:27, 17.0 KB) [[attachment:clip-patch-against-gcc-4.1.0-release.patch]]
  • [get | view] (2006-09-07 09:54:52, 1431.4 KB) [[attachment:gcc_course.pdf]]
  • [get | view] (2006-11-06 19:52:29, 878.5 KB) [[attachment:gcc_graphite.pdf]]
  • [get | view] (2006-09-07 12:56:20, 76.5 KB) [[attachment:gcc_summit_optimization_abstracts.pdf]]
  • [get | view] (2006-09-07 12:42:07, 57.3 KB) [[attachment:loop-distribution-patch-against-gcc-4.1.0-release.patch]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.