gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "expr.h"
  31 #include "output.h"
  32
  33 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  34
  35 bool
  36 just_once_each_iteration_p (struct loop *loop, basic_block bb)
  37 {
  38   /* It must be executed at least once each iteration.  */
  39   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  40     return false;
  41
  42   /* And just once.  */
  43   if (bb->loop_father != loop)
  44     return false;
  45
  46   /* But this was not enough.  We might have some irreducible loop here.  */
  47   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  48     return false;
  49
  50   return true;
  51 }
  52
  53 /* Structure representing edge of a graph.  */
  54
  55 struct edge
  56 {
  57   int src, dest;        /* Source and destination.  */
  58   struct edge *pred_next, *succ_next;
  59                         /* Next edge in predecessor and successor lists.  */
  60   void *data;           /* Data attached to the edge.  */
  61 };
  62
  63 /* Structure representing vertex of a graph.  */
  64
  65 struct vertex
  66 {
  67   struct edge *pred, *succ;
  68                         /* Lists of predecessors and successors.  */
  69   int component;        /* Number of dfs restarts before reaching the
  70                            vertex.  */
  71   int post;             /* Postorder number.  */
  72 };
  73
  74 /* Structure representing a graph.  */
  75
  76 struct graph
  77 {
  78   int n_vertices;       /* Number of vertices.  */
  79   struct vertex *vertices;
  80                         /* The vertices.  */
  81 };
  82
  83 /* Dumps graph G into F.  */
  84
  85 extern void dump_graph (FILE *, struct graph *);
  86 void dump_graph (FILE *f, struct graph *g)
  87 {
  88   int i;
  89   struct edge *e;
  90
  91   for (i = 0; i < g->n_vertices; i++)
  92     {
  93       if (!g->vertices[i].pred
  94           && !g->vertices[i].succ)
  95         continue;
  96
  97       fprintf (f, "%d (%d)\t<-", i, g->vertices[i].component);
  98       for (e = g->vertices[i].pred; e; e = e->pred_next)
  99         fprintf (f, " %d", e->src);
 100       fprintf (f, "\n");
 101
 102       fprintf (f, "\t->");
 103       for (e = g->vertices[i].succ; e; e = e->succ_next)
 104         fprintf (f, " %d", e->dest);
 105       fprintf (f, "\n");
 106     }
 107 }
 108
 109 /* Creates a new graph with N_VERTICES vertices.  */
 110
 111 static struct graph *
 112 new_graph (int n_vertices)
 113 {
 114   struct graph *g = xmalloc (sizeof (struct graph));
 115
 116   g->n_vertices = n_vertices;
 117   g->vertices = xcalloc (n_vertices, sizeof (struct vertex));
 118
 119   return g;
 120 }
 121
 122 /* Adds an edge from F to T to graph G, with DATA attached.  */
 123
 124 static void
 125 add_edge (struct graph *g, int f, int t, void *data)
 126 {
 127   struct edge *e = xmalloc (sizeof (struct edge));
 128
 129   e->src = f;
 130   e->dest = t;
 131   e->data = data;
 132
 133   e->pred_next = g->vertices[t].pred;
 134   g->vertices[t].pred = e;
 135
 136   e->succ_next = g->vertices[f].succ;
 137   g->vertices[f].succ = e;
 138 }
 139
 140 /* Runs dfs search over vertices of G, from NQ vertices in queue QS.
 141    The vertices in postorder are stored into QT.  If FORWARD is false,
 142    backward dfs is run.  */
 143
 144 static void
 145 dfs (struct graph *g, int *qs, int nq, int *qt, bool forward)
 146 {
 147   int i, tick = 0, v, comp = 0, top;
 148   struct edge *e;
 149   struct edge **stack = xmalloc (sizeof (struct edge *) * g->n_vertices);
 150
 151   for (i = 0; i < g->n_vertices; i++)
 152     {
 153       g->vertices[i].component = -1;
 154       g->vertices[i].post = -1;
 155     }
 156
 157 #define FST_EDGE(V) (forward ? g->vertices[(V)].succ : g->vertices[(V)].pred)
 158 #define NEXT_EDGE(E) (forward ? (E)->succ_next : (E)->pred_next)
 159 #define EDGE_SRC(E) (forward ? (E)->src : (E)->dest)
 160 #define EDGE_DEST(E) (forward ? (E)->dest : (E)->src)
 161
 162   for (i = 0; i < nq; i++)
 163     {
 164       v = qs[i];
 165       if (g->vertices[v].post != -1)
 166         continue;
 167
 168       g->vertices[v].component = comp++;
 169       e = FST_EDGE (v);
 170       top = 0;
 171
 172       while (1)
 173         {
 174           while (e && g->vertices[EDGE_DEST (e)].component != -1)
 175             e = NEXT_EDGE (e);
 176
 177           if (!e)
 178             {
 179               if (qt)
 180                 qt[tick] = v;
 181               g->vertices[v].post = tick++;
 182
 183               if (!top)
 184                 break;
 185
 186               e = stack[--top];
 187               v = EDGE_SRC (e);
 188               e = NEXT_EDGE (e);
 189               continue;
 190             }
 191
 192           stack[top++] = e;
 193           v = EDGE_DEST (e);
 194           e = FST_EDGE (v);
 195           g->vertices[v].component = comp - 1;
 196         }
 197     }
 198
 199   free (stack);
 200 }
 201
 202 /* Marks the edge E in graph G irreducible if it connects two vertices in the
 203    same scc.  */
 204
 205 static void
 206 check_irred (struct graph *g, struct edge *e)
 207 {
 208   edge real = e->data;
 209
 210   /* All edges should lead from a component with higher number to the
 211      one with lower one.  */
 212   gcc_assert (g->vertices[e->src].component >= g->vertices[e->dest].component);
 213
 214   if (g->vertices[e->src].component != g->vertices[e->dest].component)
 215     return;
 216
 217   real->flags |= EDGE_IRREDUCIBLE_LOOP;
 218   if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 219     real->src->flags |= BB_IRREDUCIBLE_LOOP;
 220 }
 221
 222 /* Runs CALLBACK for all edges in G.  */
 223
 224 static void
 225 for_each_edge (struct graph *g,
 226                void (callback) (struct graph *, struct edge *))
 227 {
 228   struct edge *e;
 229   int i;
 230
 231   for (i = 0; i < g->n_vertices; i++)
 232     for (e = g->vertices[i].succ; e; e = e->succ_next)
 233       callback (g, e);
 234 }
 235
 236 /* Releases the memory occupied by G.  */
 237
 238 static void
 239 free_graph (struct graph *g)
 240 {
 241   struct edge *e, *n;
 242   int i;
 243
 244   for (i = 0; i < g->n_vertices; i++)
 245     for (e = g->vertices[i].succ; e; e = n)
 246       {
 247         n = e->succ_next;
 248         free (e);
 249       }
 250   free (g->vertices);
 251   free (g);
 252 }
 253
 254 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
 255    throw away all latch edges and mark blocks inside any remaining cycle.
 256    Everything is a bit complicated due to fact we do not want to do this
 257    for parts of cycles that only "pass" through some loop -- i.e. for
 258    each cycle, we want to mark blocks that belong directly to innermost
 259    loop containing the whole cycle.
 260
 261    LOOPS is the loop tree.  */
 262
 263 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
 264 #define BB_REPR(BB) ((BB)->index + 1)
 265
 266 void
 267 mark_irreducible_loops (struct loops *loops)
 268 {
 269   basic_block act;
 270   edge e;
 271   edge_iterator ei;
 272   int i, src, dest;
 273   struct graph *g;
 274   int *queue1 = xmalloc ((last_basic_block + loops->num) * sizeof (int));
 275   int *queue2 = xmalloc ((last_basic_block + loops->num) * sizeof (int));
 276   int nq, depth;
 277   struct loop *cloop;
 278
 279   /* Reset the flags.  */
 280   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
 281     {
 282       act->flags &= ~BB_IRREDUCIBLE_LOOP;
 283       FOR_EACH_EDGE (e, ei, act->succs)
 284         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
 285     }
 286
 287   /* Create the edge lists.  */
 288   g = new_graph (last_basic_block + loops->num);
 289
 290   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
 291     FOR_EACH_EDGE (e, ei, act->succs)
 292       {
 293         /* Ignore edges to exit.  */
 294         if (e->dest == EXIT_BLOCK_PTR)
 295           continue;
 296
 297         /* And latch edges.  */
 298         if (e->dest->loop_father->header == e->dest
 299             && e->dest->loop_father->latch == act)
 300           continue;
 301
 302         /* Edges inside a single loop should be left where they are.  Edges
 303            to subloop headers should lead to representative of the subloop,
 304            but from the same place.
 305
 306            Edges exiting loops should lead from representative
 307            of the son of nearest common ancestor of the loops in that
 308            act lays.  */
 309
 310         src = BB_REPR (act);
 311         dest = BB_REPR (e->dest);
 312
 313         if (e->dest->loop_father->header == e->dest)
 314           dest = LOOP_REPR (e->dest->loop_father);
 315
 316         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 317           {
 318             depth = find_common_loop (act->loop_father,
 319                                       e->dest->loop_father)->depth + 1;
 320             if (depth == act->loop_father->depth)
 321               cloop = act->loop_father;
 322             else
 323               cloop = act->loop_father->pred[depth];
 324
 325             src = LOOP_REPR (cloop);
 326           }
 327
 328         add_edge (g, src, dest, e);
 329       }
 330
 331   /* Find the strongly connected components.  Use the algorithm of Tarjan --
 332      first determine the postorder dfs numbering in reversed graph, then
 333      run the dfs on the original graph in the order given by decreasing
 334      numbers assigned by the previous pass.  */
 335   nq = 0;
 336   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
 337     {
 338       queue1[nq++] = BB_REPR (act);
 339     }
 340   for (i = 1; i < (int) loops->num; i++)
 341     if (loops->parray[i])
 342       queue1[nq++] = LOOP_REPR (loops->parray[i]);
 343   dfs (g, queue1, nq, queue2, false);
 344   for (i = 0; i < nq; i++)
 345     queue1[i] = queue2[nq - i - 1];
 346   dfs (g, queue1, nq, NULL, true);
 347
 348   /* Mark the irreducible loops.  */
 349   for_each_edge (g, check_irred);
 350
 351   free_graph (g);
 352   free (queue1);
 353   free (queue2);
 354
 355   loops->state |= LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS;
 356 }
 357
 358 /* Counts number of insns inside LOOP.  */
 359 int
 360 num_loop_insns (struct loop *loop)
 361 {
 362   basic_block *bbs, bb;
 363   unsigned i, ninsns = 0;
 364   rtx insn;
 365
 366   bbs = get_loop_body (loop);
 367   for (i = 0; i < loop->num_nodes; i++)
 368     {
 369       bb = bbs[i];
 370       ninsns++;
 371       for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
 372         if (INSN_P (insn))
 373           ninsns++;
 374     }
 375   free(bbs);
 376
 377   return ninsns;
 378 }
 379
 380 /* Counts number of insns executed on average per iteration LOOP.  */
 381 int
 382 average_num_loop_insns (struct loop *loop)
 383 {
 384   basic_block *bbs, bb;
 385   unsigned i, binsns, ninsns, ratio;
 386   rtx insn;
 387
 388   ninsns = 0;
 389   bbs = get_loop_body (loop);
 390   for (i = 0; i < loop->num_nodes; i++)
 391     {
 392       bb = bbs[i];
 393
 394       binsns = 1;
 395       for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
 396         if (INSN_P (insn))
 397           binsns++;
 398
 399       ratio = loop->header->frequency == 0
 400               ? BB_FREQ_MAX
 401               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 402       ninsns += binsns * ratio;
 403     }
 404   free(bbs);
 405
 406   ninsns /= BB_FREQ_MAX;
 407   if (!ninsns)
 408     ninsns = 1; /* To avoid division by zero.  */
 409
 410   return ninsns;
 411 }
 412
 413 /* Returns expected number of LOOP iterations.
 414    Compute upper bound on number of iterations in case they do not fit integer
 415    to help loop peeling heuristics.  Use exact counts if at all possible.  */
 416 unsigned
 417 expected_loop_iterations (const struct loop *loop)
 418 {
 419   edge e;
 420   edge_iterator ei;
 421
 422   if (loop->header->count)
 423     {
 424       gcov_type count_in, count_latch, expected;
 425
 426       count_in = 0;
 427       count_latch = 0;
 428
 429       FOR_EACH_EDGE (e, ei, loop->header->preds)
 430         if (e->src == loop->latch)
 431           count_latch = e->count;
 432         else
 433           count_in += e->count;
 434
 435       if (count_in == 0)
 436         expected = count_latch * 2;
 437       else
 438         expected = (count_latch + count_in - 1) / count_in;
 439
 440       /* Avoid overflows.  */
 441       return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 442     }
 443   else
 444     {
 445       int freq_in, freq_latch;
 446
 447       freq_in = 0;
 448       freq_latch = 0;
 449
 450       FOR_EACH_EDGE (e, ei, loop->header->preds)
 451         if (e->src == loop->latch)
 452           freq_latch = EDGE_FREQUENCY (e);
 453         else
 454           freq_in += EDGE_FREQUENCY (e);
 455
 456       if (freq_in == 0)
 457         return freq_latch * 2;
 458
 459       return (freq_latch + freq_in - 1) / freq_in;
 460     }
 461 }
 462
 463 /* Returns the maximum level of nesting of subloops of LOOP.  */
 464
 465 unsigned
 466 get_loop_level (const struct loop *loop)
 467 {
 468   const struct loop *ploop;
 469   unsigned mx = 0, l;
 470
 471   for (ploop = loop->inner; ploop; ploop = ploop->next)
 472     {
 473       l = get_loop_level (ploop);
 474       if (l >= mx)
 475         mx = l + 1;
 476     }
 477   return mx;
 478 }
 479
 480 /* Returns estimate on cost of computing SEQ.  */
 481
 482 static unsigned
 483 seq_cost (rtx seq)
 484 {
 485   unsigned cost = 0;
 486   rtx set;
 487
 488   for (; seq; seq = NEXT_INSN (seq))
 489     {
 490       set = single_set (seq);
 491       if (set)
 492         cost += rtx_cost (set, SET);
 493       else
 494         cost++;
 495     }
 496
 497   return cost;
 498 }
 499
 500 /* The properties of the target.  */
 501
 502 unsigned target_avail_regs;     /* Number of available registers.  */
 503 unsigned target_res_regs;       /* Number of reserved registers.  */
 504 unsigned target_small_cost;     /* The cost for register when there is a free one.  */
 505 unsigned target_pres_cost;      /* The cost for register when there are not too many
 506                                    free ones.  */
 507 unsigned target_spill_cost;     /* The cost for register when we need to spill.  */
 508
 509 /* Initialize the constants for computing set costs.  */
 510
 511 void
 512 init_set_costs (void)
 513 {
 514   rtx seq;
 515   rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
 516   rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
 517   rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
 518   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 519   unsigned i;
 520
 521   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 522     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 523         && !fixed_regs[i])
 524       target_avail_regs++;
 525
 526   target_res_regs = 3;
 527
 528   /* These are really just heuristic values.  */
 529
 530   start_sequence ();
 531   emit_move_insn (reg1, reg2);
 532   seq = get_insns ();
 533   end_sequence ();
 534   target_small_cost = seq_cost (seq);
 535   target_pres_cost = 2 * target_small_cost;
 536
 537   start_sequence ();
 538   emit_move_insn (mem, reg1);
 539   emit_move_insn (reg2, mem);
 540   seq = get_insns ();
 541   end_sequence ();
 542   target_spill_cost = seq_cost (seq);
 543 }
 544
 545 /* Calculates cost for having SIZE new loop global variables.  REGS_USED is the
 546    number of global registers used in loop.  N_USES is the number of relevant
 547    variable uses.  */
 548
 549 unsigned
 550 global_cost_for_size (unsigned size, unsigned regs_used, unsigned n_uses)
 551 {
 552   unsigned regs_needed = regs_used + size;
 553   unsigned cost = 0;
 554
 555   if (regs_needed + target_res_regs <= target_avail_regs)
 556     cost += target_small_cost * size;
 557   else if (regs_needed <= target_avail_regs)
 558     cost += target_pres_cost * size;
 559   else
 560     {
 561       cost += target_pres_cost * size;
 562       cost += target_spill_cost * n_uses * (regs_needed - target_avail_regs) / regs_needed;
 563     }
 564
 565   return cost;
 566 }
 567
 568 /* Sets EDGE_LOOP_EXIT flag for all exits of LOOPS.  */
 569
 570 void
 571 mark_loop_exit_edges (struct loops *loops)
 572 {
 573   basic_block bb;
 574   edge e;
 575
 576   if (loops->num <= 1)
 577     return;
 578
 579   FOR_EACH_BB (bb)
 580     {
 581       edge_iterator ei;
 582
 583       /* Do not mark exits from the fake outermost loop.  */
 584       if (!bb->loop_father->outer)
 585         continue;
 586
 587       FOR_EACH_EDGE (e, ei, bb->succs)
 588         {
 589           if (loop_exit_edge_p (bb->loop_father, e))
 590             e->flags |= EDGE_LOOP_EXIT;
 591           else
 592             e->flags &= ~EDGE_LOOP_EXIT;
 593         }
 594     }
 595 }
 596