gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194 };
 195
 196 /* The data used by the induction variable optimizations.  */
 197
 198 typedef struct iv_use *iv_use_p;
 199 DEF_VEC_P(iv_use_p);
 200 DEF_VEC_ALLOC_P(iv_use_p,heap);
 201
 202 typedef struct iv_cand *iv_cand_p;
 203 DEF_VEC_P(iv_cand_p);
 204 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 205
 206 struct ivopts_data
 207 {
 208   /* The currently optimized loop.  */
 209   struct loop *current_loop;
 210
 211   /* Numbers of iterations for all exits of the current loop.  */
 212   htab_t niters;
 213
 214   /* The size of version_info array allocated.  */
 215   unsigned version_info_size;
 216
 217   /* The array of information for the ssa names.  */
 218   struct version_info *version_info;
 219
 220   /* The bitmap of indices in version_info whose value was changed.  */
 221   bitmap relevant;
 222
 223   /* The maximum invariant id.  */
 224   unsigned max_inv_id;
 225
 226   /* The uses of induction variables.  */
 227   VEC(iv_use_p,heap) *iv_uses;
 228
 229   /* The candidates.  */
 230   VEC(iv_cand_p,heap) *iv_candidates;
 231
 232   /* A bitmap of important candidates.  */
 233   bitmap important_candidates;
 234
 235   /* Whether to consider just related and important candidates when replacing a
 236      use.  */
 237   bool consider_all_candidates;
 238 };
 239
 240 /* An assignment of iv candidates to uses.  */
 241
 242 struct iv_ca
 243 {
 244   /* The number of uses covered by the assignment.  */
 245   unsigned upto;
 246
 247   /* Number of uses that cannot be expressed by the candidates in the set.  */
 248   unsigned bad_uses;
 249
 250   /* Candidate assigned to a use, together with the related costs.  */
 251   struct cost_pair **cand_for_use;
 252
 253   /* Number of times each candidate is used.  */
 254   unsigned *n_cand_uses;
 255
 256   /* The candidates used.  */
 257   bitmap cands;
 258
 259   /* The number of candidates in the set.  */
 260   unsigned n_cands;
 261
 262   /* Total number of registers needed.  */
 263   unsigned n_regs;
 264
 265   /* Total cost of expressing uses.  */
 266   unsigned cand_use_cost;
 267
 268   /* Total cost of candidates.  */
 269   unsigned cand_cost;
 270
 271   /* Number of times each invariant is used.  */
 272   unsigned *n_invariant_uses;
 273
 274   /* Total cost of the assignment.  */
 275   unsigned cost;
 276 };
 277
 278 /* Difference of two iv candidate assignments.  */
 279
 280 struct iv_ca_delta
 281 {
 282   /* Changed use.  */
 283   struct iv_use *use;
 284
 285   /* An old assignment (for rollback purposes).  */
 286   struct cost_pair *old_cp;
 287
 288   /* A new assignment.  */
 289   struct cost_pair *new_cp;
 290
 291   /* Next change in the list.  */
 292   struct iv_ca_delta *next_change;
 293 };
 294
 295 /* Bound on number of candidates below that all candidates are considered.  */
 296
 297 #define CONSIDER_ALL_CANDIDATES_BOUND \
 298   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 299
 300 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 301    optimizing such a loop would help, and it would take ages).  */
 302
 303 #define MAX_CONSIDERED_USES \
 304   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 305
 306 /* If there are at most this number of ivs in the set, try removing unnecessary
 307    ivs from the set always.  */
 308
 309 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 310   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 311
 312 /* The list of trees for that the decl_rtl field must be reset is stored
 313    here.  */
 314
 315 static VEC(tree,heap) *decl_rtl_to_reset;
 316
 317 /* Number of uses recorded in DATA.  */
 318
 319 static inline unsigned
 320 n_iv_uses (struct ivopts_data *data)
 321 {
 322   return VEC_length (iv_use_p, data->iv_uses);
 323 }
 324
 325 /* Ith use recorded in DATA.  */
 326
 327 static inline struct iv_use *
 328 iv_use (struct ivopts_data *data, unsigned i)
 329 {
 330   return VEC_index (iv_use_p, data->iv_uses, i);
 331 }
 332
 333 /* Number of candidates recorded in DATA.  */
 334
 335 static inline unsigned
 336 n_iv_cands (struct ivopts_data *data)
 337 {
 338   return VEC_length (iv_cand_p, data->iv_candidates);
 339 }
 340
 341 /* Ith candidate recorded in DATA.  */
 342
 343 static inline struct iv_cand *
 344 iv_cand (struct ivopts_data *data, unsigned i)
 345 {
 346   return VEC_index (iv_cand_p, data->iv_candidates, i);
 347 }
 348
 349 /* The data for LOOP.  */
 350
 351 static inline struct loop_data *
 352 loop_data (struct loop *loop)
 353 {
 354   return loop->aux;
 355 }
 356
 357 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 358
 359 static edge
 360 single_dom_exit (struct loop *loop)
 361 {
 362   edge exit = loop->single_exit;
 363
 364   if (!exit)
 365     return NULL;
 366
 367   if (!just_once_each_iteration_p (loop, exit->src))
 368     return NULL;
 369
 370   return exit;
 371 }
 372
 373 /* Dumps information about the induction variable IV to FILE.  */
 374
 375 extern void dump_iv (FILE *, struct iv *);
 376 void
 377 dump_iv (FILE *file, struct iv *iv)
 378 {
 379   if (iv->ssa_name)
 380     {
 381       fprintf (file, "ssa name ");
 382       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 383       fprintf (file, "\n");
 384     }
 385
 386   fprintf (file, "  type ");
 387   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 388   fprintf (file, "\n");
 389
 390   if (iv->step)
 391     {
 392       fprintf (file, "  base ");
 393       print_generic_expr (file, iv->base, TDF_SLIM);
 394       fprintf (file, "\n");
 395
 396       fprintf (file, "  step ");
 397       print_generic_expr (file, iv->step, TDF_SLIM);
 398       fprintf (file, "\n");
 399     }
 400   else
 401     {
 402       fprintf (file, "  invariant ");
 403       print_generic_expr (file, iv->base, TDF_SLIM);
 404       fprintf (file, "\n");
 405     }
 406
 407   if (iv->base_object)
 408     {
 409       fprintf (file, "  base object ");
 410       print_generic_expr (file, iv->base_object, TDF_SLIM);
 411       fprintf (file, "\n");
 412     }
 413
 414   if (iv->biv_p)
 415     fprintf (file, "  is a biv\n");
 416 }
 417
 418 /* Dumps information about the USE to FILE.  */
 419
 420 extern void dump_use (FILE *, struct iv_use *);
 421 void
 422 dump_use (FILE *file, struct iv_use *use)
 423 {
 424   fprintf (file, "use %d\n", use->id);
 425
 426   switch (use->type)
 427     {
 428     case USE_NONLINEAR_EXPR:
 429       fprintf (file, "  generic\n");
 430       break;
 431
 432     case USE_OUTER:
 433       fprintf (file, "  outside\n");
 434       break;
 435
 436     case USE_ADDRESS:
 437       fprintf (file, "  address\n");
 438       break;
 439
 440     case USE_COMPARE:
 441       fprintf (file, "  compare\n");
 442       break;
 443
 444     default:
 445       gcc_unreachable ();
 446     }
 447
 448   fprintf (file, "  in statement ");
 449   print_generic_expr (file, use->stmt, TDF_SLIM);
 450   fprintf (file, "\n");
 451
 452   fprintf (file, "  at position ");
 453   if (use->op_p)
 454     print_generic_expr (file, *use->op_p, TDF_SLIM);
 455   fprintf (file, "\n");
 456
 457   dump_iv (file, use->iv);
 458
 459   if (use->related_cands)
 460     {
 461       fprintf (file, "  related candidates ");
 462       dump_bitmap (file, use->related_cands);
 463     }
 464 }
 465
 466 /* Dumps information about the uses to FILE.  */
 467
 468 extern void dump_uses (FILE *, struct ivopts_data *);
 469 void
 470 dump_uses (FILE *file, struct ivopts_data *data)
 471 {
 472   unsigned i;
 473   struct iv_use *use;
 474
 475   for (i = 0; i < n_iv_uses (data); i++)
 476     {
 477       use = iv_use (data, i);
 478
 479       dump_use (file, use);
 480       fprintf (file, "\n");
 481     }
 482 }
 483
 484 /* Dumps information about induction variable candidate CAND to FILE.  */
 485
 486 extern void dump_cand (FILE *, struct iv_cand *);
 487 void
 488 dump_cand (FILE *file, struct iv_cand *cand)
 489 {
 490   struct iv *iv = cand->iv;
 491
 492   fprintf (file, "candidate %d%s\n",
 493            cand->id, cand->important ? " (important)" : "");
 494
 495   if (!iv)
 496     {
 497       fprintf (file, "  final value replacement\n");
 498       return;
 499     }
 500
 501   switch (cand->pos)
 502     {
 503     case IP_NORMAL:
 504       fprintf (file, "  incremented before exit test\n");
 505       break;
 506
 507     case IP_END:
 508       fprintf (file, "  incremented at end\n");
 509       break;
 510
 511     case IP_ORIGINAL:
 512       fprintf (file, "  original biv\n");
 513       break;
 514     }
 515
 516   dump_iv (file, iv);
 517 }
 518
 519 /* Returns the info for ssa version VER.  */
 520
 521 static inline struct version_info *
 522 ver_info (struct ivopts_data *data, unsigned ver)
 523 {
 524   return data->version_info + ver;
 525 }
 526
 527 /* Returns the info for ssa name NAME.  */
 528
 529 static inline struct version_info *
 530 name_info (struct ivopts_data *data, tree name)
 531 {
 532   return ver_info (data, SSA_NAME_VERSION (name));
 533 }
 534
 535 /* Checks whether there exists number X such that X * B = A, counting modulo
 536    2^BITS.  */
 537
 538 static bool
 539 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 540         HOST_WIDE_INT *x)
 541 {
 542   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 543   unsigned HOST_WIDE_INT inv, ex, val;
 544   unsigned i;
 545
 546   a &= mask;
 547   b &= mask;
 548
 549   /* First divide the whole equation by 2 as long as possible.  */
 550   while (!(a & 1) && !(b & 1))
 551     {
 552       a >>= 1;
 553       b >>= 1;
 554       bits--;
 555       mask >>= 1;
 556     }
 557
 558   if (!(b & 1))
 559     {
 560       /* If b is still even, a is odd and there is no such x.  */
 561       return false;
 562     }
 563
 564   /* Find the inverse of b.  We compute it as
 565      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 566   inv = 1;
 567   ex = b;
 568   for (i = 0; i < bits - 1; i++)
 569     {
 570       inv = (inv * ex) & mask;
 571       ex = (ex * ex) & mask;
 572     }
 573
 574   val = (a * inv) & mask;
 575
 576   gcc_assert (((val * b) & mask) == a);
 577
 578   if ((val >> (bits - 1)) & 1)
 579     val |= ~mask;
 580
 581   *x = val;
 582
 583   return true;
 584 }
 585
 586 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 587    emitted in LOOP.  */
 588
 589 static bool
 590 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 591 {
 592   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 593
 594   gcc_assert (bb);
 595
 596   if (sbb == loop->latch)
 597     return true;
 598
 599   if (sbb != bb)
 600     return false;
 601
 602   return stmt == last_stmt (bb);
 603 }
 604
 605 /* Returns true if STMT if after the place where the original induction
 606    variable CAND is incremented.  */
 607
 608 static bool
 609 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 610 {
 611   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 612   basic_block stmt_bb = bb_for_stmt (stmt);
 613   block_stmt_iterator bsi;
 614
 615   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 616     return false;
 617
 618   if (stmt_bb != cand_bb)
 619     return true;
 620
 621   /* Scan the block from the end, since the original ivs are usually
 622      incremented at the end of the loop body.  */
 623   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 624     {
 625       if (bsi_stmt (bsi) == cand->incremented_at)
 626         return false;
 627       if (bsi_stmt (bsi) == stmt)
 628         return true;
 629     }
 630 }
 631
 632 /* Returns true if STMT if after the place where the induction variable
 633    CAND is incremented in LOOP.  */
 634
 635 static bool
 636 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 637 {
 638   switch (cand->pos)
 639     {
 640     case IP_END:
 641       return false;
 642
 643     case IP_NORMAL:
 644       return stmt_after_ip_normal_pos (loop, stmt);
 645
 646     case IP_ORIGINAL:
 647       return stmt_after_ip_original_pos (cand, stmt);
 648
 649     default:
 650       gcc_unreachable ();
 651     }
 652 }
 653
 654 /* Element of the table in that we cache the numbers of iterations obtained
 655    from exits of the loop.  */
 656
 657 struct nfe_cache_elt
 658 {
 659   /* The edge for that the number of iterations is cached.  */
 660   edge exit;
 661
 662   /* True if the # of iterations was successfully determined.  */
 663   bool valid_p;
 664
 665   /* Description of # of iterations.  */
 666   struct tree_niter_desc niter;
 667 };
 668
 669 /* Hash function for nfe_cache_elt E.  */
 670
 671 static hashval_t
 672 nfe_hash (const void *e)
 673 {
 674   const struct nfe_cache_elt *elt = e;
 675
 676   return htab_hash_pointer (elt->exit);
 677 }
 678
 679 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 680
 681 static int
 682 nfe_eq (const void *e1, const void *e2)
 683 {
 684   const struct nfe_cache_elt *elt1 = e1;
 685
 686   return elt1->exit == e2;
 687 }
 688
 689 /*  Returns structure describing number of iterations determined from
 690     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 691
 692 static struct tree_niter_desc *
 693 niter_for_exit (struct ivopts_data *data, edge exit)
 694 {
 695   struct nfe_cache_elt *nfe_desc;
 696   PTR *slot;
 697
 698   slot = htab_find_slot_with_hash (data->niters, exit,
 699                                    htab_hash_pointer (exit),
 700                                    INSERT);
 701
 702   if (!*slot)
 703     {
 704       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 705       nfe_desc->exit = exit;
 706       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 707                                                      exit, &nfe_desc->niter);
 708       *slot = nfe_desc;
 709     }
 710   else
 711     nfe_desc = *slot;
 712
 713   if (!nfe_desc->valid_p)
 714     return NULL;
 715
 716   return &nfe_desc->niter;
 717 }
 718
 719 /* Returns structure describing number of iterations determined from
 720    single dominating exit of DATA->current_loop, or NULL if something
 721    goes wrong.  */
 722
 723 static struct tree_niter_desc *
 724 niter_for_single_dom_exit (struct ivopts_data *data)
 725 {
 726   edge exit = single_dom_exit (data->current_loop);
 727
 728   if (!exit)
 729     return NULL;
 730
 731   return niter_for_exit (data, exit);
 732 }
 733
 734 /* Initializes data structures used by the iv optimization pass, stored
 735    in DATA.  LOOPS is the loop tree.  */
 736
 737 static void
 738 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 739 {
 740   unsigned i;
 741
 742   data->version_info_size = 2 * num_ssa_names;
 743   data->version_info = xcalloc (data->version_info_size,
 744                                 sizeof (struct version_info));
 745   data->relevant = BITMAP_ALLOC (NULL);
 746   data->important_candidates = BITMAP_ALLOC (NULL);
 747   data->max_inv_id = 0;
 748   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 749
 750   for (i = 1; i < loops->num; i++)
 751     if (loops->parray[i])
 752       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 753
 754   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 755   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 756   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 757 }
 758
 759 /* Returns a memory object to that EXPR points.  In case we are able to
 760    determine that it does not point to any such object, NULL is returned.  */
 761
 762 static tree
 763 determine_base_object (tree expr)
 764 {
 765   enum tree_code code = TREE_CODE (expr);
 766   tree base, obj, op0, op1;
 767
 768   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 769     return NULL_TREE;
 770
 771   switch (code)
 772     {
 773     case INTEGER_CST:
 774       return NULL_TREE;
 775
 776     case ADDR_EXPR:
 777       obj = TREE_OPERAND (expr, 0);
 778       base = get_base_address (obj);
 779
 780       if (!base)
 781         return expr;
 782
 783       if (TREE_CODE (base) == INDIRECT_REF)
 784         return determine_base_object (TREE_OPERAND (base, 0));
 785
 786       return fold (build1 (ADDR_EXPR, ptr_type_node, base));
 787
 788     case PLUS_EXPR:
 789     case MINUS_EXPR:
 790       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 791       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 792
 793       if (!op1)
 794         return op0;
 795
 796       if (!op0)
 797         return (code == PLUS_EXPR
 798                 ? op1
 799                 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
 800
 801       return fold (build (code, ptr_type_node, op0, op1));
 802
 803     case NOP_EXPR:
 804     case CONVERT_EXPR:
 805       return determine_base_object (TREE_OPERAND (expr, 0));
 806
 807     default:
 808       return fold_convert (ptr_type_node, expr);
 809     }
 810 }
 811
 812 /* Allocates an induction variable with given initial value BASE and step STEP
 813    for loop LOOP.  */
 814
 815 static struct iv *
 816 alloc_iv (tree base, tree step)
 817 {
 818   struct iv *iv = xcalloc (1, sizeof (struct iv));
 819
 820   if (step && integer_zerop (step))
 821     step = NULL_TREE;
 822
 823   iv->base = base;
 824   iv->base_object = determine_base_object (base);
 825   iv->step = step;
 826   iv->biv_p = false;
 827   iv->have_use_for = false;
 828   iv->use_id = 0;
 829   iv->ssa_name = NULL_TREE;
 830
 831   return iv;
 832 }
 833
 834 /* Sets STEP and BASE for induction variable IV.  */
 835
 836 static void
 837 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 838 {
 839   struct version_info *info = name_info (data, iv);
 840
 841   gcc_assert (!info->iv);
 842
 843   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 844   info->iv = alloc_iv (base, step);
 845   info->iv->ssa_name = iv;
 846 }
 847
 848 /* Finds induction variable declaration for VAR.  */
 849
 850 static struct iv *
 851 get_iv (struct ivopts_data *data, tree var)
 852 {
 853   basic_block bb;
 854
 855   if (!name_info (data, var)->iv)
 856     {
 857       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 858
 859       if (!bb
 860           || !flow_bb_inside_loop_p (data->current_loop, bb))
 861         set_iv (data, var, var, NULL_TREE);
 862     }
 863
 864   return name_info (data, var)->iv;
 865 }
 866
 867 /* Determines the step of a biv defined in PHI.  */
 868
 869 static tree
 870 determine_biv_step (tree phi)
 871 {
 872   struct loop *loop = bb_for_stmt (phi)->loop_father;
 873   tree name = PHI_RESULT (phi), base, step;
 874   tree type = TREE_TYPE (name);
 875
 876   if (!is_gimple_reg (name))
 877     return NULL_TREE;
 878
 879   if (!simple_iv (loop, phi, name, &base, &step))
 880     return NULL_TREE;
 881
 882   if (!step)
 883     return build_int_cst (type, 0);
 884
 885   return step;
 886 }
 887
 888 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 889
 890 static bool
 891 abnormal_ssa_name_p (tree exp)
 892 {
 893   if (!exp)
 894     return false;
 895
 896   if (TREE_CODE (exp) != SSA_NAME)
 897     return false;
 898
 899   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 900 }
 901
 902 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 903    abnormal phi node.  Callback for for_each_index.  */
 904
 905 static bool
 906 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 907                                   void *data ATTRIBUTE_UNUSED)
 908 {
 909   if (TREE_CODE (base) == ARRAY_REF)
 910     {
 911       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 912         return false;
 913       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 914         return false;
 915     }
 916
 917   return !abnormal_ssa_name_p (*index);
 918 }
 919
 920 /* Returns true if EXPR contains a ssa name that occurs in an
 921    abnormal phi node.  */
 922
 923 static bool
 924 contains_abnormal_ssa_name_p (tree expr)
 925 {
 926   enum tree_code code = TREE_CODE (expr);
 927   enum tree_code_class class = TREE_CODE_CLASS (code);
 928
 929   if (code == SSA_NAME)
 930     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 931
 932   if (code == INTEGER_CST
 933       || is_gimple_min_invariant (expr))
 934     return false;
 935
 936   if (code == ADDR_EXPR)
 937     return !for_each_index (&TREE_OPERAND (expr, 0),
 938                             idx_contains_abnormal_ssa_name_p,
 939                             NULL);
 940
 941   switch (class)
 942     {
 943     case tcc_binary:
 944     case tcc_comparison:
 945       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 946         return true;
 947
 948       /* Fallthru.  */
 949     case tcc_unary:
 950       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 951         return true;
 952
 953       break;
 954
 955     default:
 956       gcc_unreachable ();
 957     }
 958
 959   return false;
 960 }
 961
 962 /* Finds basic ivs.  */
 963
 964 static bool
 965 find_bivs (struct ivopts_data *data)
 966 {
 967   tree phi, step, type, base;
 968   bool found = false;
 969   struct loop *loop = data->current_loop;
 970
 971   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 972     {
 973       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 974         continue;
 975
 976       step = determine_biv_step (phi);
 977
 978       if (!step)
 979         continue;
 980       if (cst_and_fits_in_hwi (step)
 981           && int_cst_value (step) == 0)
 982         continue;
 983
 984       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 985       if (contains_abnormal_ssa_name_p (base))
 986         continue;
 987
 988       type = TREE_TYPE (PHI_RESULT (phi));
 989       base = fold_convert (type, base);
 990       step = fold_convert (type, step);
 991
 992       /* FIXME: We do not handle induction variables whose step does
 993          not satisfy cst_and_fits_in_hwi.  */
 994       if (!cst_and_fits_in_hwi (step))
 995         continue;
 996
 997       set_iv (data, PHI_RESULT (phi), base, step);
 998       found = true;
 999     }
1000
1001   return found;
1002 }
1003
1004 /* Marks basic ivs.  */
1005
1006 static void
1007 mark_bivs (struct ivopts_data *data)
1008 {
1009   tree phi, var;
1010   struct iv *iv, *incr_iv;
1011   struct loop *loop = data->current_loop;
1012   basic_block incr_bb;
1013
1014   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1015     {
1016       iv = get_iv (data, PHI_RESULT (phi));
1017       if (!iv)
1018         continue;
1019
1020       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1021       incr_iv = get_iv (data, var);
1022       if (!incr_iv)
1023         continue;
1024
1025       /* If the increment is in the subloop, ignore it.  */
1026       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1027       if (incr_bb->loop_father != data->current_loop
1028           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1029         continue;
1030
1031       iv->biv_p = true;
1032       incr_iv->biv_p = true;
1033     }
1034 }
1035
1036 /* Checks whether STMT defines a linear induction variable and stores its
1037    parameters to BASE and STEP.  */
1038
1039 static bool
1040 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1041                         tree *base, tree *step)
1042 {
1043   tree lhs;
1044   struct loop *loop = data->current_loop;
1045
1046   *base = NULL_TREE;
1047   *step = NULL_TREE;
1048
1049   if (TREE_CODE (stmt) != MODIFY_EXPR)
1050     return false;
1051
1052   lhs = TREE_OPERAND (stmt, 0);
1053   if (TREE_CODE (lhs) != SSA_NAME)
1054     return false;
1055
1056   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1057     return false;
1058
1059   /* FIXME: We do not handle induction variables whose step does
1060      not satisfy cst_and_fits_in_hwi.  */
1061   if (!zero_p (*step)
1062       && !cst_and_fits_in_hwi (*step))
1063     return false;
1064
1065   if (contains_abnormal_ssa_name_p (*base))
1066     return false;
1067
1068   return true;
1069 }
1070
1071 /* Finds general ivs in statement STMT.  */
1072
1073 static void
1074 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1075 {
1076   tree base, step;
1077
1078   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1079     return;
1080
1081   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1082 }
1083
1084 /* Finds general ivs in basic block BB.  */
1085
1086 static void
1087 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1088 {
1089   block_stmt_iterator bsi;
1090
1091   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1092     find_givs_in_stmt (data, bsi_stmt (bsi));
1093 }
1094
1095 /* Finds general ivs.  */
1096
1097 static void
1098 find_givs (struct ivopts_data *data)
1099 {
1100   struct loop *loop = data->current_loop;
1101   basic_block *body = get_loop_body_in_dom_order (loop);
1102   unsigned i;
1103
1104   for (i = 0; i < loop->num_nodes; i++)
1105     find_givs_in_bb (data, body[i]);
1106   free (body);
1107 }
1108
1109 /* For each ssa name defined in LOOP determines whether it is an induction
1110    variable and if so, its initial value and step.  */
1111
1112 static bool
1113 find_induction_variables (struct ivopts_data *data)
1114 {
1115   unsigned i;
1116   bitmap_iterator bi;
1117
1118   if (!find_bivs (data))
1119     return false;
1120
1121   find_givs (data);
1122   mark_bivs (data);
1123
1124   if (dump_file && (dump_flags & TDF_DETAILS))
1125     {
1126       struct tree_niter_desc *niter;
1127
1128       niter = niter_for_single_dom_exit (data);
1129
1130       if (niter)
1131         {
1132           fprintf (dump_file, "  number of iterations ");
1133           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1134           fprintf (dump_file, "\n");
1135
1136           fprintf (dump_file, "  may be zero if ");
1137           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1138           fprintf (dump_file, "\n");
1139           fprintf (dump_file, "\n");
1140         };
1141
1142       fprintf (dump_file, "Induction variables:\n\n");
1143
1144       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1145         {
1146           if (ver_info (data, i)->iv)
1147             dump_iv (dump_file, ver_info (data, i)->iv);
1148         }
1149     }
1150
1151   return true;
1152 }
1153
1154 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1155
1156 static struct iv_use *
1157 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1158             tree stmt, enum use_type use_type)
1159 {
1160   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1161
1162   use->id = n_iv_uses (data);
1163   use->type = use_type;
1164   use->iv = iv;
1165   use->stmt = stmt;
1166   use->op_p = use_p;
1167   use->related_cands = BITMAP_ALLOC (NULL);
1168
1169   /* To avoid showing ssa name in the dumps, if it was not reset by the
1170      caller.  */
1171   iv->ssa_name = NULL_TREE;
1172
1173   if (dump_file && (dump_flags & TDF_DETAILS))
1174     dump_use (dump_file, use);
1175
1176   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1177
1178   return use;
1179 }
1180
1181 /* Checks whether OP is a loop-level invariant and if so, records it.
1182    NONLINEAR_USE is true if the invariant is used in a way we do not
1183    handle specially.  */
1184
1185 static void
1186 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1187 {
1188   basic_block bb;
1189   struct version_info *info;
1190
1191   if (TREE_CODE (op) != SSA_NAME
1192       || !is_gimple_reg (op))
1193     return;
1194
1195   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1196   if (bb
1197       && flow_bb_inside_loop_p (data->current_loop, bb))
1198     return;
1199
1200   info = name_info (data, op);
1201   info->name = op;
1202   info->has_nonlin_use |= nonlinear_use;
1203   if (!info->inv_id)
1204     info->inv_id = ++data->max_inv_id;
1205   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1206 }
1207
1208 /* Checks whether the use OP is interesting and if so, records it
1209    as TYPE.  */
1210
1211 static struct iv_use *
1212 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1213                                        enum use_type type)
1214 {
1215   struct iv *iv;
1216   struct iv *civ;
1217   tree stmt;
1218   struct iv_use *use;
1219
1220   if (TREE_CODE (op) != SSA_NAME)
1221     return NULL;
1222
1223   iv = get_iv (data, op);
1224   if (!iv)
1225     return NULL;
1226
1227   if (iv->have_use_for)
1228     {
1229       use = iv_use (data, iv->use_id);
1230
1231       gcc_assert (use->type == USE_NONLINEAR_EXPR
1232                   || use->type == USE_OUTER);
1233
1234       if (type == USE_NONLINEAR_EXPR)
1235         use->type = USE_NONLINEAR_EXPR;
1236       return use;
1237     }
1238
1239   if (zero_p (iv->step))
1240     {
1241       record_invariant (data, op, true);
1242       return NULL;
1243     }
1244   iv->have_use_for = true;
1245
1246   civ = xmalloc (sizeof (struct iv));
1247   *civ = *iv;
1248
1249   stmt = SSA_NAME_DEF_STMT (op);
1250   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1251               || TREE_CODE (stmt) == MODIFY_EXPR);
1252
1253   use = record_use (data, NULL, civ, stmt, type);
1254   iv->use_id = use->id;
1255
1256   return use;
1257 }
1258
1259 /* Checks whether the use OP is interesting and if so, records it.  */
1260
1261 static struct iv_use *
1262 find_interesting_uses_op (struct ivopts_data *data, tree op)
1263 {
1264   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1265 }
1266
1267 /* Records a definition of induction variable OP that is used outside of the
1268    loop.  */
1269
1270 static struct iv_use *
1271 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1272 {
1273   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1274 }
1275
1276 /* Checks whether the condition *COND_P in STMT is interesting
1277    and if so, records it.  */
1278
1279 static void
1280 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1281 {
1282   tree *op0_p;
1283   tree *op1_p;
1284   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1285   struct iv const_iv;
1286   tree zero = integer_zero_node;
1287
1288   const_iv.step = NULL_TREE;
1289
1290   if (TREE_CODE (*cond_p) != SSA_NAME
1291       && !COMPARISON_CLASS_P (*cond_p))
1292     return;
1293
1294   if (TREE_CODE (*cond_p) == SSA_NAME)
1295     {
1296       op0_p = cond_p;
1297       op1_p = &zero;
1298     }
1299   else
1300     {
1301       op0_p = &TREE_OPERAND (*cond_p, 0);
1302       op1_p = &TREE_OPERAND (*cond_p, 1);
1303     }
1304
1305   if (TREE_CODE (*op0_p) == SSA_NAME)
1306     iv0 = get_iv (data, *op0_p);
1307   else
1308     iv0 = &const_iv;
1309
1310   if (TREE_CODE (*op1_p) == SSA_NAME)
1311     iv1 = get_iv (data, *op1_p);
1312   else
1313     iv1 = &const_iv;
1314
1315   if (/* When comparing with non-invariant value, we may not do any senseful
1316          induction variable elimination.  */
1317       (!iv0 || !iv1)
1318       /* Eliminating condition based on two ivs would be nontrivial.
1319          ??? TODO -- it is not really important to handle this case.  */
1320       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1321     {
1322       find_interesting_uses_op (data, *op0_p);
1323       find_interesting_uses_op (data, *op1_p);
1324       return;
1325     }
1326
1327   if (zero_p (iv0->step) && zero_p (iv1->step))
1328     {
1329       /* If both are invariants, this is a work for unswitching.  */
1330       return;
1331     }
1332
1333   civ = xmalloc (sizeof (struct iv));
1334   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1335   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1336 }
1337
1338 /* Returns true if expression EXPR is obviously invariant in LOOP,
1339    i.e. if all its operands are defined outside of the LOOP.  */
1340
1341 bool
1342 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1343 {
1344   basic_block def_bb;
1345   unsigned i, len;
1346
1347   if (is_gimple_min_invariant (expr))
1348     return true;
1349
1350   if (TREE_CODE (expr) == SSA_NAME)
1351     {
1352       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1353       if (def_bb
1354           && flow_bb_inside_loop_p (loop, def_bb))
1355         return false;
1356
1357       return true;
1358     }
1359
1360   if (!EXPR_P (expr))
1361     return false;
1362
1363   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1364   for (i = 0; i < len; i++)
1365     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1366       return false;
1367
1368   return true;
1369 }
1370
1371 /* Cumulates the steps of indices into DATA and replaces their values with the
1372    initial ones.  Returns false when the value of the index cannot be determined.
1373    Callback for for_each_index.  */
1374
1375 struct ifs_ivopts_data
1376 {
1377   struct ivopts_data *ivopts_data;
1378   tree stmt;
1379   tree *step_p;
1380 };
1381
1382 static bool
1383 idx_find_step (tree base, tree *idx, void *data)
1384 {
1385   struct ifs_ivopts_data *dta = data;
1386   struct iv *iv;
1387   tree step, type, iv_type, iv_step, lbound, off;
1388   struct loop *loop = dta->ivopts_data->current_loop;
1389
1390   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1391       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1392     return false;
1393
1394   /* If base is a component ref, require that the offset of the reference
1395      be invariant.  */
1396   if (TREE_CODE (base) == COMPONENT_REF)
1397     {
1398       off = component_ref_field_offset (base);
1399       return expr_invariant_in_loop_p (loop, off);
1400     }
1401
1402   /* If base is array, first check whether we will be able to move the
1403      reference out of the loop (in order to take its address in strength
1404      reduction).  In order for this to work we need both lower bound
1405      and step to be loop invariants.  */
1406   if (TREE_CODE (base) == ARRAY_REF)
1407     {
1408       step = array_ref_element_size (base);
1409       lbound = array_ref_low_bound (base);
1410
1411       if (!expr_invariant_in_loop_p (loop, step)
1412           || !expr_invariant_in_loop_p (loop, lbound))
1413         return false;
1414     }
1415
1416   if (TREE_CODE (*idx) != SSA_NAME)
1417     return true;
1418
1419   iv = get_iv (dta->ivopts_data, *idx);
1420   if (!iv)
1421     return false;
1422
1423   *idx = iv->base;
1424
1425   if (!iv->step)
1426     return true;
1427
1428   iv_type = TREE_TYPE (iv->base);
1429   type = build_pointer_type (TREE_TYPE (base));
1430   if (TREE_CODE (base) == ARRAY_REF)
1431     {
1432       step = array_ref_element_size (base);
1433
1434       /* We only handle addresses whose step is an integer constant.  */
1435       if (TREE_CODE (step) != INTEGER_CST)
1436         return false;
1437     }
1438   else
1439     /* The step for pointer arithmetics already is 1 byte.  */
1440     step = build_int_cst (type, 1);
1441
1442   if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1443     iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1444                                           type, iv->base, iv->step, dta->stmt);
1445   else
1446     iv_step = fold_convert (iv_type, iv->step);
1447
1448   if (!iv_step)
1449     {
1450       /* The index might wrap.  */
1451       return false;
1452     }
1453
1454   step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1455
1456   if (!*dta->step_p)
1457     *dta->step_p = step;
1458   else
1459     *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1460                                             *dta->step_p, step);
1461
1462   return true;
1463 }
1464
1465 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1466    object is passed to it in DATA.  */
1467
1468 static bool
1469 idx_record_use (tree base, tree *idx,
1470                 void *data)
1471 {
1472   find_interesting_uses_op (data, *idx);
1473   if (TREE_CODE (base) == ARRAY_REF)
1474     {
1475       find_interesting_uses_op (data, array_ref_element_size (base));
1476       find_interesting_uses_op (data, array_ref_low_bound (base));
1477     }
1478   return true;
1479 }
1480
1481 /* Returns true if memory reference REF may be unaligned.  */
1482
1483 static bool
1484 may_be_unaligned_p (tree ref)
1485 {
1486   tree base;
1487   tree base_type;
1488   HOST_WIDE_INT bitsize;
1489   HOST_WIDE_INT bitpos;
1490   tree toffset;
1491   enum machine_mode mode;
1492   int unsignedp, volatilep;
1493   unsigned base_align;
1494
1495   /* The test below is basically copy of what expr.c:normal_inner_ref
1496      does to check whether the object must be loaded by parts when
1497      STRICT_ALIGNMENT is true.  */
1498   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1499                               &unsignedp, &volatilep, true);
1500   base_type = TREE_TYPE (base);
1501   base_align = TYPE_ALIGN (base_type);
1502
1503   if (mode != BLKmode
1504       && (base_align < GET_MODE_ALIGNMENT (mode)
1505           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1506           || bitpos % BITS_PER_UNIT != 0))
1507     return true;
1508
1509   return false;
1510 }
1511
1512 /* Finds addresses in *OP_P inside STMT.  */
1513
1514 static void
1515 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1516 {
1517   tree base = unshare_expr (*op_p), step = NULL;
1518   struct iv *civ;
1519   struct ifs_ivopts_data ifs_ivopts_data;
1520
1521   /* Do not play with volatile memory references.  A bit too conservative,
1522      perhaps, but safe.  */
1523   if (stmt_ann (stmt)->has_volatile_ops)
1524     goto fail;
1525
1526   /* Ignore bitfields for now.  Not really something terribly complicated
1527      to handle.  TODO.  */
1528   if (TREE_CODE (base) == COMPONENT_REF
1529       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1530     goto fail;
1531
1532   if (STRICT_ALIGNMENT
1533       && may_be_unaligned_p (base))
1534     goto fail;
1535
1536   ifs_ivopts_data.ivopts_data = data;
1537   ifs_ivopts_data.stmt = stmt;
1538   ifs_ivopts_data.step_p = &step;
1539   if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1540       || zero_p (step))
1541     goto fail;
1542
1543   gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1544   gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1545
1546   if (TREE_CODE (base) == INDIRECT_REF)
1547     base = TREE_OPERAND (base, 0);
1548   else
1549     base = build_addr (base);
1550
1551   civ = alloc_iv (base, step);
1552   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1553   return;
1554
1555 fail:
1556   for_each_index (op_p, idx_record_use, data);
1557 }
1558
1559 /* Finds and records invariants used in STMT.  */
1560
1561 static void
1562 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1563 {
1564   use_optype uses = NULL;
1565   unsigned i, n;
1566   tree op;
1567
1568   if (TREE_CODE (stmt) == PHI_NODE)
1569     n = PHI_NUM_ARGS (stmt);
1570   else
1571     {
1572       uses = STMT_USE_OPS (stmt);
1573       n = NUM_USES (uses);
1574     }
1575
1576   for (i = 0; i < n; i++)
1577     {
1578       if (TREE_CODE (stmt) == PHI_NODE)
1579         op = PHI_ARG_DEF (stmt, i);
1580       else
1581         op = USE_OP (uses, i);
1582
1583       record_invariant (data, op, false);
1584     }
1585 }
1586
1587 /* Finds interesting uses of induction variables in the statement STMT.  */
1588
1589 static void
1590 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1591 {
1592   struct iv *iv;
1593   tree op, lhs, rhs;
1594   use_optype uses = NULL;
1595   unsigned i, n;
1596
1597   find_invariants_stmt (data, stmt);
1598
1599   if (TREE_CODE (stmt) == COND_EXPR)
1600     {
1601       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1602       return;
1603     }
1604
1605   if (TREE_CODE (stmt) == MODIFY_EXPR)
1606     {
1607       lhs = TREE_OPERAND (stmt, 0);
1608       rhs = TREE_OPERAND (stmt, 1);
1609
1610       if (TREE_CODE (lhs) == SSA_NAME)
1611         {
1612           /* If the statement defines an induction variable, the uses are not
1613              interesting by themselves.  */
1614
1615           iv = get_iv (data, lhs);
1616
1617           if (iv && !zero_p (iv->step))
1618             return;
1619         }
1620
1621       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1622         {
1623         case tcc_comparison:
1624           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1625           return;
1626
1627         case tcc_reference:
1628           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1629           if (REFERENCE_CLASS_P (lhs))
1630             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1631           return;
1632
1633         default: ;
1634         }
1635
1636       if (REFERENCE_CLASS_P (lhs)
1637           && is_gimple_val (rhs))
1638         {
1639           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1640           find_interesting_uses_op (data, rhs);
1641           return;
1642         }
1643
1644       /* TODO -- we should also handle address uses of type
1645
1646          memory = call (whatever);
1647
1648          and
1649
1650          call (memory).  */
1651     }
1652
1653   if (TREE_CODE (stmt) == PHI_NODE
1654       && bb_for_stmt (stmt) == data->current_loop->header)
1655     {
1656       lhs = PHI_RESULT (stmt);
1657       iv = get_iv (data, lhs);
1658
1659       if (iv && !zero_p (iv->step))
1660         return;
1661     }
1662
1663   if (TREE_CODE (stmt) == PHI_NODE)
1664     n = PHI_NUM_ARGS (stmt);
1665   else
1666     {
1667       uses = STMT_USE_OPS (stmt);
1668       n = NUM_USES (uses);
1669     }
1670
1671   for (i = 0; i < n; i++)
1672     {
1673       if (TREE_CODE (stmt) == PHI_NODE)
1674         op = PHI_ARG_DEF (stmt, i);
1675       else
1676         op = USE_OP (uses, i);
1677
1678       if (TREE_CODE (op) != SSA_NAME)
1679         continue;
1680
1681       iv = get_iv (data, op);
1682       if (!iv)
1683         continue;
1684
1685       find_interesting_uses_op (data, op);
1686     }
1687 }
1688
1689 /* Finds interesting uses of induction variables outside of loops
1690    on loop exit edge EXIT.  */
1691
1692 static void
1693 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1694 {
1695   tree phi, def;
1696
1697   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1698     {
1699       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1700       find_interesting_uses_outer (data, def);
1701     }
1702 }
1703
1704 /* Finds uses of the induction variables that are interesting.  */
1705
1706 static void
1707 find_interesting_uses (struct ivopts_data *data)
1708 {
1709   basic_block bb;
1710   block_stmt_iterator bsi;
1711   tree phi;
1712   basic_block *body = get_loop_body (data->current_loop);
1713   unsigned i;
1714   struct version_info *info;
1715   edge e;
1716
1717   if (dump_file && (dump_flags & TDF_DETAILS))
1718     fprintf (dump_file, "Uses:\n\n");
1719
1720   for (i = 0; i < data->current_loop->num_nodes; i++)
1721     {
1722       edge_iterator ei;
1723       bb = body[i];
1724
1725       FOR_EACH_EDGE (e, ei, bb->succs)
1726         if (e->dest != EXIT_BLOCK_PTR
1727             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1728           find_interesting_uses_outside (data, e);
1729
1730       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1731         find_interesting_uses_stmt (data, phi);
1732       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1733         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1734     }
1735
1736   if (dump_file && (dump_flags & TDF_DETAILS))
1737     {
1738       bitmap_iterator bi;
1739
1740       fprintf (dump_file, "\n");
1741
1742       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1743         {
1744           info = ver_info (data, i);
1745           if (info->inv_id)
1746             {
1747               fprintf (dump_file, "  ");
1748               print_generic_expr (dump_file, info->name, TDF_SLIM);
1749               fprintf (dump_file, " is invariant (%d)%s\n",
1750                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1751             }
1752         }
1753
1754       fprintf (dump_file, "\n");
1755     }
1756
1757   free (body);
1758 }
1759
1760 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1761    is true, assume we are inside an address.  */
1762
1763 static tree
1764 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1765 {
1766   tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1767   enum tree_code code;
1768   tree type, orig_type = TREE_TYPE (expr);
1769   unsigned HOST_WIDE_INT off0, off1, st;
1770   tree orig_expr = expr;
1771
1772   STRIP_NOPS (expr);
1773   type = TREE_TYPE (expr);
1774   code = TREE_CODE (expr);
1775   *offset = 0;
1776
1777   switch (code)
1778     {
1779     case INTEGER_CST:
1780       if (!cst_and_fits_in_hwi (expr)
1781           || zero_p (expr))
1782         return orig_expr;
1783
1784       *offset = int_cst_value (expr);
1785       return build_int_cst_type (orig_type, 0);
1786
1787     case PLUS_EXPR:
1788     case MINUS_EXPR:
1789       op0 = TREE_OPERAND (expr, 0);
1790       op1 = TREE_OPERAND (expr, 1);
1791
1792       op0 = strip_offset (op0, false, &off0);
1793       op1 = strip_offset (op1, false, &off1);
1794
1795       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1796       if (op0 == TREE_OPERAND (expr, 0)
1797           && op1 == TREE_OPERAND (expr, 1))
1798         return orig_expr;
1799
1800       if (zero_p (op1))
1801         expr = op0;
1802       else if (zero_p (op0))
1803         {
1804           if (code == PLUS_EXPR)
1805             expr = op1;
1806           else
1807             expr = build1 (NEGATE_EXPR, type, op1);
1808         }
1809       else
1810         expr = build2 (code, type, op0, op1);
1811
1812       return fold_convert (orig_type, expr);
1813
1814     case ARRAY_REF:
1815       if (!inside_addr)
1816         return orig_expr;
1817
1818       step = array_ref_element_size (expr);
1819       if (!cst_and_fits_in_hwi (step))
1820         break;
1821
1822       st = int_cst_value (step);
1823       op1 = TREE_OPERAND (expr, 1);
1824       op1 = strip_offset (op1, false, &off1);
1825       *offset = off1 * st;
1826       break;
1827
1828     case COMPONENT_REF:
1829       if (!inside_addr)
1830         return orig_expr;
1831       break;
1832
1833     case ADDR_EXPR:
1834       inside_addr = true;
1835       break;
1836
1837     default:
1838       return orig_expr;
1839     }
1840
1841   /* Default handling of expressions for that we want to recurse into
1842      the first operand.  */
1843   op0 = TREE_OPERAND (expr, 0);
1844   op0 = strip_offset (op0, inside_addr, &off0);
1845   *offset += off0;
1846
1847   if (op0 == TREE_OPERAND (expr, 0)
1848       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1849     return orig_expr;
1850
1851   expr = copy_node (expr);
1852   TREE_OPERAND (expr, 0) = op0;
1853   if (op1)
1854     TREE_OPERAND (expr, 1) = op1;
1855
1856   return fold_convert (orig_type, expr);
1857 }
1858
1859 /* Returns variant of TYPE that can be used as base for different uses.
1860    For integer types, we return unsigned variant of the type, which
1861    avoids problems with overflows.  For pointer types, we return void *.  */
1862
1863 static tree
1864 generic_type_for (tree type)
1865 {
1866   if (POINTER_TYPE_P (type))
1867     return ptr_type_node;
1868
1869   if (TYPE_UNSIGNED (type))
1870     return type;
1871
1872   return unsigned_type_for (type);
1873 }
1874
1875 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1876    position to POS.  If USE is not NULL, the candidate is set as related to
1877    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1878    replacement of the final value of the iv by a direct computation.  */
1879
1880 static struct iv_cand *
1881 add_candidate_1 (struct ivopts_data *data,
1882                  tree base, tree step, bool important, enum iv_position pos,
1883                  struct iv_use *use, tree incremented_at)
1884 {
1885   unsigned i;
1886   struct iv_cand *cand = NULL;
1887   tree type, orig_type;
1888
1889   if (base)
1890     {
1891       orig_type = TREE_TYPE (base);
1892       type = generic_type_for (orig_type);
1893       if (type != orig_type)
1894         {
1895           base = fold_convert (type, base);
1896           if (step)
1897             step = fold_convert (type, step);
1898         }
1899     }
1900
1901   for (i = 0; i < n_iv_cands (data); i++)
1902     {
1903       cand = iv_cand (data, i);
1904
1905       if (cand->pos != pos)
1906         continue;
1907
1908       if (cand->incremented_at != incremented_at)
1909         continue;
1910
1911       if (!cand->iv)
1912         {
1913           if (!base && !step)
1914             break;
1915
1916           continue;
1917         }
1918
1919       if (!base && !step)
1920         continue;
1921
1922       if (!operand_equal_p (base, cand->iv->base, 0))
1923         continue;
1924
1925       if (zero_p (cand->iv->step))
1926         {
1927           if (zero_p (step))
1928             break;
1929         }
1930       else
1931         {
1932           if (step && operand_equal_p (step, cand->iv->step, 0))
1933             break;
1934         }
1935     }
1936
1937   if (i == n_iv_cands (data))
1938     {
1939       cand = xcalloc (1, sizeof (struct iv_cand));
1940       cand->id = i;
1941
1942       if (!base && !step)
1943         cand->iv = NULL;
1944       else
1945         cand->iv = alloc_iv (base, step);
1946
1947       cand->pos = pos;
1948       if (pos != IP_ORIGINAL && cand->iv)
1949         {
1950           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1951           cand->var_after = cand->var_before;
1952         }
1953       cand->important = important;
1954       cand->incremented_at = incremented_at;
1955       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1956
1957       if (dump_file && (dump_flags & TDF_DETAILS))
1958         dump_cand (dump_file, cand);
1959     }
1960
1961   if (important && !cand->important)
1962     {
1963       cand->important = true;
1964       if (dump_file && (dump_flags & TDF_DETAILS))
1965         fprintf (dump_file, "Candidate %d is important\n", cand->id);
1966     }
1967
1968   if (use)
1969     {
1970       bitmap_set_bit (use->related_cands, i);
1971       if (dump_file && (dump_flags & TDF_DETAILS))
1972         fprintf (dump_file, "Candidate %d is related to use %d\n",
1973                  cand->id, use->id);
1974     }
1975
1976   return cand;
1977 }
1978
1979 /* Returns true if incrementing the induction variable at the end of the LOOP
1980    is allowed.
1981
1982    The purpose is to avoid splitting latch edge with a biv increment, thus
1983    creating a jump, possibly confusing other optimization passes and leaving
1984    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
1985    is not available (so we do not have a better alternative), or if the latch
1986    edge is already nonempty.  */
1987
1988 static bool
1989 allow_ip_end_pos_p (struct loop *loop)
1990 {
1991   if (!ip_normal_pos (loop))
1992     return true;
1993
1994   if (!empty_block_p (ip_end_pos (loop)))
1995     return true;
1996
1997   return false;
1998 }
1999
2000 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2001    position to POS.  If USE is not NULL, the candidate is set as related to
2002    it.  The candidate computation is scheduled on all available positions.  */
2003
2004 static void
2005 add_candidate (struct ivopts_data *data,
2006                tree base, tree step, bool important, struct iv_use *use)
2007 {
2008   if (ip_normal_pos (data->current_loop))
2009     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2010   if (ip_end_pos (data->current_loop)
2011       && allow_ip_end_pos_p (data->current_loop))
2012     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2013 }
2014
2015 /* Add a standard "0 + 1 * iteration" iv candidate for a
2016    type with SIZE bits.  */
2017
2018 static void
2019 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2020                                      unsigned int size)
2021 {
2022   tree type = lang_hooks.types.type_for_size (size, true);
2023   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2024                  true, NULL);
2025 }
2026
2027 /* Adds standard iv candidates.  */
2028
2029 static void
2030 add_standard_iv_candidates (struct ivopts_data *data)
2031 {
2032   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2033
2034   /* The same for a double-integer type if it is still fast enough.  */
2035   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2036     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2037 }
2038
2039
2040 /* Adds candidates bases on the old induction variable IV.  */
2041
2042 static void
2043 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2044 {
2045   tree phi, def;
2046   struct iv_cand *cand;
2047
2048   add_candidate (data, iv->base, iv->step, true, NULL);
2049
2050   /* The same, but with initial value zero.  */
2051   add_candidate (data,
2052                  build_int_cst (TREE_TYPE (iv->base), 0),
2053                  iv->step, true, NULL);
2054
2055   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2056   if (TREE_CODE (phi) == PHI_NODE)
2057     {
2058       /* Additionally record the possibility of leaving the original iv
2059          untouched.  */
2060       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2061       cand = add_candidate_1 (data,
2062                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2063                               SSA_NAME_DEF_STMT (def));
2064       cand->var_before = iv->ssa_name;
2065       cand->var_after = def;
2066     }
2067 }
2068
2069 /* Adds candidates based on the old induction variables.  */
2070
2071 static void
2072 add_old_ivs_candidates (struct ivopts_data *data)
2073 {
2074   unsigned i;
2075   struct iv *iv;
2076   bitmap_iterator bi;
2077
2078   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2079     {
2080       iv = ver_info (data, i)->iv;
2081       if (iv && iv->biv_p && !zero_p (iv->step))
2082         add_old_iv_candidates (data, iv);
2083     }
2084 }
2085
2086 /* Adds candidates based on the value of the induction variable IV and USE.  */
2087
2088 static void
2089 add_iv_value_candidates (struct ivopts_data *data,
2090                          struct iv *iv, struct iv_use *use)
2091 {
2092   add_candidate (data, iv->base, iv->step, false, use);
2093
2094   /* The same, but with initial value zero.  */
2095   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2096                  iv->step, false, use);
2097 }
2098
2099 /* Adds candidates based on the address IV and USE.  */
2100
2101 static void
2102 add_address_candidates (struct ivopts_data *data,
2103                         struct iv *iv, struct iv_use *use)
2104 {
2105   tree base, abase;
2106   unsigned HOST_WIDE_INT offset;
2107
2108   /* First, the trivial choices.  */
2109   add_iv_value_candidates (data, iv, use);
2110
2111   /* Second, try removing the COMPONENT_REFs.  */
2112   if (TREE_CODE (iv->base) == ADDR_EXPR)
2113     {
2114       base = TREE_OPERAND (iv->base, 0);
2115       while (TREE_CODE (base) == COMPONENT_REF
2116              || (TREE_CODE (base) == ARRAY_REF
2117                  && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2118         base = TREE_OPERAND (base, 0);
2119
2120       if (base != TREE_OPERAND (iv->base, 0))
2121         {
2122           gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2123           gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2124
2125           if (TREE_CODE (base) == INDIRECT_REF)
2126             base = TREE_OPERAND (base, 0);
2127           else
2128             base = build_addr (base);
2129           add_candidate (data, base, iv->step, false, use);
2130         }
2131     }
2132
2133   /* Third, try removing the constant offset.  */
2134   abase = iv->base;
2135   base = strip_offset (abase, false, &offset);
2136   if (offset)
2137     add_candidate (data, base, iv->step, false, use);
2138 }
2139
2140 /* Possibly adds pseudocandidate for replacing the final value of USE by
2141    a direct computation.  */
2142
2143 static void
2144 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2145 {
2146   struct tree_niter_desc *niter;
2147
2148   /* We must know where we exit the loop and how many times does it roll.  */
2149   niter = niter_for_single_dom_exit (data);
2150   if (!niter
2151       || !zero_p (niter->may_be_zero))
2152     return;
2153
2154   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2155 }
2156
2157 /* Adds candidates based on the uses.  */
2158
2159 static void
2160 add_derived_ivs_candidates (struct ivopts_data *data)
2161 {
2162   unsigned i;
2163
2164   for (i = 0; i < n_iv_uses (data); i++)
2165     {
2166       struct iv_use *use = iv_use (data, i);
2167
2168       if (!use)
2169         continue;
2170
2171       switch (use->type)
2172         {
2173         case USE_NONLINEAR_EXPR:
2174         case USE_COMPARE:
2175           /* Just add the ivs based on the value of the iv used here.  */
2176           add_iv_value_candidates (data, use->iv, use);
2177           break;
2178
2179         case USE_OUTER:
2180           add_iv_value_candidates (data, use->iv, use);
2181
2182           /* Additionally, add the pseudocandidate for the possibility to
2183              replace the final value by a direct computation.  */
2184           add_iv_outer_candidates (data, use);
2185           break;
2186
2187         case USE_ADDRESS:
2188           add_address_candidates (data, use->iv, use);
2189           break;
2190
2191         default:
2192           gcc_unreachable ();
2193         }
2194     }
2195 }
2196
2197 /* Record important candidates and add them to related_cands bitmaps
2198    if needed.  */
2199
2200 static void
2201 record_important_candidates (struct ivopts_data *data)
2202 {
2203   unsigned i;
2204   struct iv_use *use;
2205
2206   for (i = 0; i < n_iv_cands (data); i++)
2207     {
2208       struct iv_cand *cand = iv_cand (data, i);
2209
2210       if (cand->important)
2211         bitmap_set_bit (data->important_candidates, i);
2212     }
2213
2214   data->consider_all_candidates = (n_iv_cands (data)
2215                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2216
2217   if (data->consider_all_candidates)
2218     {
2219       /* We will not need "related_cands" bitmaps in this case,
2220          so release them to decrease peak memory consumption.  */
2221       for (i = 0; i < n_iv_uses (data); i++)
2222         {
2223           use = iv_use (data, i);
2224           BITMAP_FREE (use->related_cands);
2225         }
2226     }
2227   else
2228     {
2229       /* Add important candidates to the related_cands bitmaps.  */
2230       for (i = 0; i < n_iv_uses (data); i++)
2231         bitmap_ior_into (iv_use (data, i)->related_cands,
2232                          data->important_candidates);
2233     }
2234 }
2235
2236 /* Finds the candidates for the induction variables.  */
2237
2238 static void
2239 find_iv_candidates (struct ivopts_data *data)
2240 {
2241   /* Add commonly used ivs.  */
2242   add_standard_iv_candidates (data);
2243
2244   /* Add old induction variables.  */
2245   add_old_ivs_candidates (data);
2246
2247   /* Add induction variables derived from uses.  */
2248   add_derived_ivs_candidates (data);
2249
2250   /* Record the important candidates.  */
2251   record_important_candidates (data);
2252 }
2253
2254 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2255    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2256    we allocate a simple list to every use.  */
2257
2258 static void
2259 alloc_use_cost_map (struct ivopts_data *data)
2260 {
2261   unsigned i, size, s, j;
2262
2263   for (i = 0; i < n_iv_uses (data); i++)
2264     {
2265       struct iv_use *use = iv_use (data, i);
2266       bitmap_iterator bi;
2267
2268       if (data->consider_all_candidates)
2269         size = n_iv_cands (data);
2270       else
2271         {
2272           s = 0;
2273           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2274             {
2275               s++;
2276             }
2277
2278           /* Round up to the power of two, so that moduling by it is fast.  */
2279           for (size = 1; size < s; size <<= 1)
2280             continue;
2281         }
2282
2283       use->n_map_members = size;
2284       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2285     }
2286 }
2287
2288 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2289    on invariants DEPENDS_ON and that the value used in expressing it
2290    is VALUE.*/
2291
2292 static void
2293 set_use_iv_cost (struct ivopts_data *data,
2294                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2295                  bitmap depends_on, tree value)
2296 {
2297   unsigned i, s;
2298
2299   if (cost == INFTY)
2300     {
2301       BITMAP_FREE (depends_on);
2302       return;
2303     }
2304
2305   if (data->consider_all_candidates)
2306     {
2307       use->cost_map[cand->id].cand = cand;
2308       use->cost_map[cand->id].cost = cost;
2309       use->cost_map[cand->id].depends_on = depends_on;
2310       use->cost_map[cand->id].value = value;
2311       return;
2312     }
2313
2314   /* n_map_members is a power of two, so this computes modulo.  */
2315   s = cand->id & (use->n_map_members - 1);
2316   for (i = s; i < use->n_map_members; i++)
2317     if (!use->cost_map[i].cand)
2318       goto found;
2319   for (i = 0; i < s; i++)
2320     if (!use->cost_map[i].cand)
2321       goto found;
2322
2323   gcc_unreachable ();
2324
2325 found:
2326   use->cost_map[i].cand = cand;
2327   use->cost_map[i].cost = cost;
2328   use->cost_map[i].depends_on = depends_on;
2329   use->cost_map[i].value = value;
2330 }
2331
2332 /* Gets cost of (USE, CANDIDATE) pair.  */
2333
2334 static struct cost_pair *
2335 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2336                  struct iv_cand *cand)
2337 {
2338   unsigned i, s;
2339   struct cost_pair *ret;
2340
2341   if (!cand)
2342     return NULL;
2343
2344   if (data->consider_all_candidates)
2345     {
2346       ret = use->cost_map + cand->id;
2347       if (!ret->cand)
2348         return NULL;
2349
2350       return ret;
2351     }
2352
2353   /* n_map_members is a power of two, so this computes modulo.  */
2354   s = cand->id & (use->n_map_members - 1);
2355   for (i = s; i < use->n_map_members; i++)
2356     if (use->cost_map[i].cand == cand)
2357       return use->cost_map + i;
2358
2359   for (i = 0; i < s; i++)
2360     if (use->cost_map[i].cand == cand)
2361       return use->cost_map + i;
2362
2363   return NULL;
2364 }
2365
2366 /* Returns estimate on cost of computing SEQ.  */
2367
2368 static unsigned
2369 seq_cost (rtx seq)
2370 {
2371   unsigned cost = 0;
2372   rtx set;
2373
2374   for (; seq; seq = NEXT_INSN (seq))
2375     {
2376       set = single_set (seq);
2377       if (set)
2378         cost += rtx_cost (set, SET);
2379       else
2380         cost++;
2381     }
2382
2383   return cost;
2384 }
2385
2386 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2387 static rtx
2388 produce_memory_decl_rtl (tree obj, int *regno)
2389 {
2390   rtx x;
2391
2392   gcc_assert (obj);
2393   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2394     {
2395       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2396       x = gen_rtx_SYMBOL_REF (Pmode, name);
2397     }
2398   else
2399     x = gen_raw_REG (Pmode, (*regno)++);
2400
2401   return gen_rtx_MEM (DECL_MODE (obj), x);
2402 }
2403
2404 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2405    walk_tree.  DATA contains the actual fake register number.  */
2406
2407 static tree
2408 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2409 {
2410   tree obj = NULL_TREE;
2411   rtx x = NULL_RTX;
2412   int *regno = data;
2413
2414   switch (TREE_CODE (*expr_p))
2415     {
2416     case ADDR_EXPR:
2417       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2418            handled_component_p (*expr_p);
2419            expr_p = &TREE_OPERAND (*expr_p, 0))
2420         continue;
2421       obj = *expr_p;
2422       if (DECL_P (obj))
2423         x = produce_memory_decl_rtl (obj, regno);
2424       break;
2425
2426     case SSA_NAME:
2427       *ws = 0;
2428       obj = SSA_NAME_VAR (*expr_p);
2429       if (!DECL_RTL_SET_P (obj))
2430         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2431       break;
2432
2433     case VAR_DECL:
2434     case PARM_DECL:
2435     case RESULT_DECL:
2436       *ws = 0;
2437       obj = *expr_p;
2438
2439       if (DECL_RTL_SET_P (obj))
2440         break;
2441
2442       if (DECL_MODE (obj) == BLKmode)
2443         x = produce_memory_decl_rtl (obj, regno);
2444       else
2445         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2446
2447       break;
2448
2449     default:
2450       break;
2451     }
2452
2453   if (x)
2454     {
2455       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2456       SET_DECL_RTL (obj, x);
2457     }
2458
2459   return NULL_TREE;
2460 }
2461
2462 /* Determines cost of the computation of EXPR.  */
2463
2464 static unsigned
2465 computation_cost (tree expr)
2466 {
2467   rtx seq, rslt;
2468   tree type = TREE_TYPE (expr);
2469   unsigned cost;
2470   /* Avoid using hard regs in ways which may be unsupported.  */
2471   int regno = LAST_VIRTUAL_REGISTER + 1;
2472
2473   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2474   start_sequence ();
2475   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2476   seq = get_insns ();
2477   end_sequence ();
2478
2479   cost = seq_cost (seq);
2480   if (MEM_P (rslt))
2481     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2482
2483   return cost;
2484 }
2485
2486 /* Returns variable containing the value of candidate CAND at statement AT.  */
2487
2488 static tree
2489 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2490 {
2491   if (stmt_after_increment (loop, cand, stmt))
2492     return cand->var_after;
2493   else
2494     return cand->var_before;
2495 }
2496
2497 /* Determines the expression by that USE is expressed from induction variable
2498    CAND at statement AT in LOOP.  */
2499
2500 static tree
2501 get_computation_at (struct loop *loop,
2502                     struct iv_use *use, struct iv_cand *cand, tree at)
2503 {
2504   tree ubase = use->iv->base;
2505   tree ustep = use->iv->step;
2506   tree cbase = cand->iv->base;
2507   tree cstep = cand->iv->step;
2508   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2509   tree uutype;
2510   tree expr, delta;
2511   tree ratio;
2512   unsigned HOST_WIDE_INT ustepi, cstepi;
2513   HOST_WIDE_INT ratioi;
2514
2515   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2516     {
2517       /* We do not have a precision to express the values of use.  */
2518       return NULL_TREE;
2519     }
2520
2521   expr = var_at_stmt (loop, cand, at);
2522
2523   if (TREE_TYPE (expr) != ctype)
2524     {
2525       /* This may happen with the original ivs.  */
2526       expr = fold_convert (ctype, expr);
2527     }
2528
2529   if (TYPE_UNSIGNED (utype))
2530     uutype = utype;
2531   else
2532     {
2533       uutype = unsigned_type_for (utype);
2534       ubase = fold_convert (uutype, ubase);
2535       ustep = fold_convert (uutype, ustep);
2536     }
2537
2538   if (uutype != ctype)
2539     {
2540       expr = fold_convert (uutype, expr);
2541       cbase = fold_convert (uutype, cbase);
2542       cstep = fold_convert (uutype, cstep);
2543     }
2544
2545   if (!cst_and_fits_in_hwi (cstep)
2546       || !cst_and_fits_in_hwi (ustep))
2547     return NULL_TREE;
2548
2549   ustepi = int_cst_value (ustep);
2550   cstepi = int_cst_value (cstep);
2551
2552   if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2553     {
2554       /* TODO maybe consider case when ustep divides cstep and the ratio is
2555          a power of 2 (so that the division is fast to execute)?  We would
2556          need to be much more careful with overflows etc. then.  */
2557       return NULL_TREE;
2558     }
2559
2560   /* We may need to shift the value if we are after the increment.  */
2561   if (stmt_after_increment (loop, cand, at))
2562     cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2563
2564   /* use = ubase - ratio * cbase + ratio * var.
2565
2566      In general case ubase + ratio * (var - cbase) could be better (one less
2567      multiplication), but often it is possible to eliminate redundant parts
2568      of computations from (ubase - ratio * cbase) term, and if it does not
2569      happen, fold is able to apply the distributive law to obtain this form
2570      anyway.  */
2571
2572   if (ratioi == 1)
2573     {
2574       delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2575       expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2576     }
2577   else if (ratioi == -1)
2578     {
2579       delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2580       expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2581     }
2582   else
2583     {
2584       ratio = build_int_cst_type (uutype, ratioi);
2585       delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2586       delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2587       expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2588       expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2589     }
2590
2591   return fold_convert (utype, expr);
2592 }
2593
2594 /* Determines the expression by that USE is expressed from induction variable
2595    CAND in LOOP.  */
2596
2597 static tree
2598 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2599 {
2600   return get_computation_at (loop, use, cand, use->stmt);
2601 }
2602
2603 /* Returns cost of addition in MODE.  */
2604
2605 static unsigned
2606 add_cost (enum machine_mode mode)
2607 {
2608   static unsigned costs[NUM_MACHINE_MODES];
2609   rtx seq;
2610   unsigned cost;
2611
2612   if (costs[mode])
2613     return costs[mode];
2614
2615   start_sequence ();
2616   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2617                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2618                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2619                  NULL_RTX);
2620   seq = get_insns ();
2621   end_sequence ();
2622
2623   cost = seq_cost (seq);
2624   if (!cost)
2625     cost = 1;
2626
2627   costs[mode] = cost;
2628
2629   if (dump_file && (dump_flags & TDF_DETAILS))
2630     fprintf (dump_file, "Addition in %s costs %d\n",
2631              GET_MODE_NAME (mode), cost);
2632   return cost;
2633 }
2634
2635 /* Entry in a hashtable of already known costs for multiplication.  */
2636 struct mbc_entry
2637 {
2638   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2639   enum machine_mode mode;       /* In mode.  */
2640   unsigned cost;                /* The cost.  */
2641 };
2642
2643 /* Counts hash value for the ENTRY.  */
2644
2645 static hashval_t
2646 mbc_entry_hash (const void *entry)
2647 {
2648   const struct mbc_entry *e = entry;
2649
2650   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2651 }
2652
2653 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2654
2655 static int
2656 mbc_entry_eq (const void *entry1, const void *entry2)
2657 {
2658   const struct mbc_entry *e1 = entry1;
2659   const struct mbc_entry *e2 = entry2;
2660
2661   return (e1->mode == e2->mode
2662           && e1->cst == e2->cst);
2663 }
2664
2665 /* Returns cost of multiplication by constant CST in MODE.  */
2666
2667 static unsigned
2668 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2669 {
2670   static htab_t costs;
2671   struct mbc_entry **cached, act;
2672   rtx seq;
2673   unsigned cost;
2674
2675   if (!costs)
2676     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2677
2678   act.mode = mode;
2679   act.cst = cst;
2680   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2681   if (*cached)
2682     return (*cached)->cost;
2683
2684   *cached = xmalloc (sizeof (struct mbc_entry));
2685   (*cached)->mode = mode;
2686   (*cached)->cst = cst;
2687
2688   start_sequence ();
2689   expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2690                NULL_RTX, 0);
2691   seq = get_insns ();
2692   end_sequence ();
2693
2694   cost = seq_cost (seq);
2695
2696   if (dump_file && (dump_flags & TDF_DETAILS))
2697     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2698              (int) cst, GET_MODE_NAME (mode), cost);
2699
2700   (*cached)->cost = cost;
2701
2702   return cost;
2703 }
2704
2705 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2706    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2707    variable is omitted.  The created memory accesses MODE.
2708
2709    TODO -- there must be some better way.  This all is quite crude.  */
2710
2711 static unsigned
2712 get_address_cost (bool symbol_present, bool var_present,
2713                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2714 {
2715 #define MAX_RATIO 128
2716   static sbitmap valid_mult;
2717   static HOST_WIDE_INT rat, off;
2718   static HOST_WIDE_INT min_offset, max_offset;
2719   static unsigned costs[2][2][2][2];
2720   unsigned cost, acost;
2721   rtx seq, addr, base;
2722   bool offset_p, ratio_p;
2723   rtx reg1;
2724   HOST_WIDE_INT s_offset;
2725   unsigned HOST_WIDE_INT mask;
2726   unsigned bits;
2727
2728   if (!valid_mult)
2729     {
2730       HOST_WIDE_INT i;
2731
2732       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2733
2734       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2735       for (i = 1; i <= 1 << 20; i <<= 1)
2736         {
2737           XEXP (addr, 1) = GEN_INT (i);
2738           if (!memory_address_p (Pmode, addr))
2739             break;
2740         }
2741       max_offset = i >> 1;
2742       off = max_offset;
2743
2744       for (i = 1; i <= 1 << 20; i <<= 1)
2745         {
2746           XEXP (addr, 1) = GEN_INT (-i);
2747           if (!memory_address_p (Pmode, addr))
2748             break;
2749         }
2750       min_offset = -(i >> 1);
2751
2752       if (dump_file && (dump_flags & TDF_DETAILS))
2753         {
2754           fprintf (dump_file, "get_address_cost:\n");
2755           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
2756           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
2757         }
2758
2759       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2760       sbitmap_zero (valid_mult);
2761       rat = 1;
2762       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2763       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2764         {
2765           XEXP (addr, 1) = GEN_INT (i);
2766           if (memory_address_p (Pmode, addr))
2767             {
2768               SET_BIT (valid_mult, i + MAX_RATIO);
2769               rat = i;
2770             }
2771         }
2772
2773       if (dump_file && (dump_flags & TDF_DETAILS))
2774         {
2775           fprintf (dump_file, "  allowed multipliers:");
2776           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2777             if (TEST_BIT (valid_mult, i + MAX_RATIO))
2778               fprintf (dump_file, " %d", (int) i);
2779           fprintf (dump_file, "\n");
2780           fprintf (dump_file, "\n");
2781         }
2782     }
2783
2784   bits = GET_MODE_BITSIZE (Pmode);
2785   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2786   offset &= mask;
2787   if ((offset >> (bits - 1) & 1))
2788     offset |= ~mask;
2789   s_offset = offset;
2790
2791   cost = 0;
2792   offset_p = (s_offset != 0
2793               && min_offset <= s_offset && s_offset <= max_offset);
2794   ratio_p = (ratio != 1
2795              && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2796              && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2797
2798   if (ratio != 1 && !ratio_p)
2799     cost += multiply_by_cost (ratio, Pmode);
2800
2801   if (s_offset && !offset_p && !symbol_present)
2802     {
2803       cost += add_cost (Pmode);
2804       var_present = true;
2805     }
2806
2807   acost = costs[symbol_present][var_present][offset_p][ratio_p];
2808   if (!acost)
2809     {
2810       acost = 0;
2811
2812       addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2813       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2814       if (ratio_p)
2815         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2816
2817       if (var_present)
2818         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2819
2820       if (symbol_present)
2821         {
2822           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2823           if (offset_p)
2824             base = gen_rtx_fmt_e (CONST, Pmode,
2825                                   gen_rtx_fmt_ee (PLUS, Pmode,
2826                                                   base,
2827                                                   GEN_INT (off)));
2828         }
2829       else if (offset_p)
2830         base = GEN_INT (off);
2831       else
2832         base = NULL_RTX;
2833
2834       if (base)
2835         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2836
2837       start_sequence ();
2838       addr = memory_address (Pmode, addr);
2839       seq = get_insns ();
2840       end_sequence ();
2841
2842       acost = seq_cost (seq);
2843       acost += address_cost (addr, Pmode);
2844
2845       if (!acost)
2846         acost = 1;
2847       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2848     }
2849
2850   return cost + acost;
2851 }
2852
2853 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2854    the bitmap to that we should store it.  */
2855
2856 static struct ivopts_data *fd_ivopts_data;
2857 static tree
2858 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2859 {
2860   bitmap *depends_on = data;
2861   struct version_info *info;
2862
2863   if (TREE_CODE (*expr_p) != SSA_NAME)
2864     return NULL_TREE;
2865   info = name_info (fd_ivopts_data, *expr_p);
2866
2867   if (!info->inv_id || info->has_nonlin_use)
2868     return NULL_TREE;
2869
2870   if (!*depends_on)
2871     *depends_on = BITMAP_ALLOC (NULL);
2872   bitmap_set_bit (*depends_on, info->inv_id);
2873
2874   return NULL_TREE;
2875 }
2876
2877 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
2878    invariants the computation depends on.  */
2879
2880 static unsigned
2881 force_var_cost (struct ivopts_data *data,
2882                 tree expr, bitmap *depends_on)
2883 {
2884   static bool costs_initialized = false;
2885   static unsigned integer_cost;
2886   static unsigned symbol_cost;
2887   static unsigned address_cost;
2888   tree op0, op1;
2889   unsigned cost0, cost1, cost;
2890   enum machine_mode mode;
2891
2892   if (!costs_initialized)
2893     {
2894       tree var = create_tmp_var_raw (integer_type_node, "test_var");
2895       rtx x = gen_rtx_MEM (DECL_MODE (var),
2896                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2897       tree addr;
2898       tree type = build_pointer_type (integer_type_node);
2899
2900       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2901                                                            2000));
2902
2903       SET_DECL_RTL (var, x);
2904       TREE_STATIC (var) = 1;
2905       addr = build1 (ADDR_EXPR, type, var);
2906       symbol_cost = computation_cost (addr) + 1;
2907
2908       address_cost
2909         = computation_cost (build2 (PLUS_EXPR, type,
2910                                     addr,
2911                                     build_int_cst_type (type, 2000))) + 1;
2912       if (dump_file && (dump_flags & TDF_DETAILS))
2913         {
2914           fprintf (dump_file, "force_var_cost:\n");
2915           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
2916           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
2917           fprintf (dump_file, "  address %d\n", (int) address_cost);
2918           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
2919           fprintf (dump_file, "\n");
2920         }
2921
2922       costs_initialized = true;
2923     }
2924
2925   STRIP_NOPS (expr);
2926
2927   if (depends_on)
2928     {
2929       fd_ivopts_data = data;
2930       walk_tree (&expr, find_depends, depends_on, NULL);
2931     }
2932
2933   if (SSA_VAR_P (expr))
2934     return 0;
2935
2936   if (TREE_INVARIANT (expr))
2937     {
2938       if (TREE_CODE (expr) == INTEGER_CST)
2939         return integer_cost;
2940
2941       if (TREE_CODE (expr) == ADDR_EXPR)
2942         {
2943           tree obj = TREE_OPERAND (expr, 0);
2944
2945           if (TREE_CODE (obj) == VAR_DECL
2946               || TREE_CODE (obj) == PARM_DECL
2947               || TREE_CODE (obj) == RESULT_DECL)
2948             return symbol_cost;
2949         }
2950
2951       return address_cost;
2952     }
2953
2954   switch (TREE_CODE (expr))
2955     {
2956     case PLUS_EXPR:
2957     case MINUS_EXPR:
2958     case MULT_EXPR:
2959       op0 = TREE_OPERAND (expr, 0);
2960       op1 = TREE_OPERAND (expr, 1);
2961       STRIP_NOPS (op0);
2962       STRIP_NOPS (op1);
2963
2964       if (is_gimple_val (op0))
2965         cost0 = 0;
2966       else
2967         cost0 = force_var_cost (data, op0, NULL);
2968
2969       if (is_gimple_val (op1))
2970         cost1 = 0;
2971       else
2972         cost1 = force_var_cost (data, op1, NULL);
2973
2974       break;
2975
2976     default:
2977       /* Just an arbitrary value, FIXME.  */
2978       return target_spill_cost;
2979     }
2980
2981   mode = TYPE_MODE (TREE_TYPE (expr));
2982   switch (TREE_CODE (expr))
2983     {
2984     case PLUS_EXPR:
2985     case MINUS_EXPR:
2986       cost = add_cost (mode);
2987       break;
2988
2989     case MULT_EXPR:
2990       if (cst_and_fits_in_hwi (op0))
2991         cost = multiply_by_cost (int_cst_value (op0), mode);
2992       else if (cst_and_fits_in_hwi (op1))
2993         cost = multiply_by_cost (int_cst_value (op1), mode);
2994       else
2995         return target_spill_cost;
2996       break;
2997
2998     default:
2999       gcc_unreachable ();
3000     }
3001
3002   cost += cost0;
3003   cost += cost1;
3004
3005   /* Bound the cost by target_spill_cost.  The parts of complicated
3006      computations often are either loop invariant or at least can
3007      be shared between several iv uses, so letting this grow without
3008      limits would not give reasonable results.  */
3009   return cost < target_spill_cost ? cost : target_spill_cost;
3010 }
3011
3012 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3013    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3014    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3015    invariants the computation depends on.  */
3016
3017 static unsigned
3018 split_address_cost (struct ivopts_data *data,
3019                     tree addr, bool *symbol_present, bool *var_present,
3020                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3021 {
3022   tree core;
3023   HOST_WIDE_INT bitsize;
3024   HOST_WIDE_INT bitpos;
3025   tree toffset;
3026   enum machine_mode mode;
3027   int unsignedp, volatilep;
3028
3029   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3030                               &unsignedp, &volatilep, false);
3031
3032   if (toffset != 0
3033       || bitpos % BITS_PER_UNIT != 0
3034       || TREE_CODE (core) != VAR_DECL)
3035     {
3036       *symbol_present = false;
3037       *var_present = true;
3038       fd_ivopts_data = data;
3039       walk_tree (&addr, find_depends, depends_on, NULL);
3040       return target_spill_cost;
3041     }
3042
3043   *offset += bitpos / BITS_PER_UNIT;
3044   if (TREE_STATIC (core)
3045       || DECL_EXTERNAL (core))
3046     {
3047       *symbol_present = true;
3048       *var_present = false;
3049       return 0;
3050     }
3051
3052   *symbol_present = false;
3053   *var_present = true;
3054   return 0;
3055 }
3056
3057 /* Estimates cost of expressing difference of addresses E1 - E2 as
3058    var + symbol + offset.  The value of offset is added to OFFSET,
3059    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3060    part is missing.  DEPENDS_ON is a set of the invariants the computation
3061    depends on.  */
3062
3063 static unsigned
3064 ptr_difference_cost (struct ivopts_data *data,
3065                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3066                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3067 {
3068   HOST_WIDE_INT diff = 0;
3069   unsigned cost;
3070
3071   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3072
3073   if (ptr_difference_const (e1, e2, &diff))
3074     {
3075       *offset += diff;
3076       *symbol_present = false;
3077       *var_present = false;
3078       return 0;
3079     }
3080
3081   if (e2 == integer_zero_node)
3082     return split_address_cost (data, TREE_OPERAND (e1, 0),
3083                                symbol_present, var_present, offset, depends_on);
3084
3085   *symbol_present = false;
3086   *var_present = true;
3087
3088   cost = force_var_cost (data, e1, depends_on);
3089   cost += force_var_cost (data, e2, depends_on);
3090   cost += add_cost (Pmode);
3091
3092   return cost;
3093 }
3094
3095 /* Estimates cost of expressing difference E1 - E2 as
3096    var + symbol + offset.  The value of offset is added to OFFSET,
3097    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3098    part is missing.  DEPENDS_ON is a set of the invariants the computation
3099    depends on.  */
3100
3101 static unsigned
3102 difference_cost (struct ivopts_data *data,
3103                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3104                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3105 {
3106   unsigned cost;
3107   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3108   unsigned HOST_WIDE_INT off1, off2;
3109
3110   e1 = strip_offset (e1, false, &off1);
3111   e2 = strip_offset (e2, false, &off2);
3112   *offset += off1 - off2;
3113
3114   STRIP_NOPS (e1);
3115   STRIP_NOPS (e2);
3116
3117   if (TREE_CODE (e1) == ADDR_EXPR)
3118     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3119                                 depends_on);
3120   *symbol_present = false;
3121
3122   if (operand_equal_p (e1, e2, 0))
3123     {
3124       *var_present = false;
3125       return 0;
3126     }
3127   *var_present = true;
3128   if (zero_p (e2))
3129     return force_var_cost (data, e1, depends_on);
3130
3131   if (zero_p (e1))
3132     {
3133       cost = force_var_cost (data, e2, depends_on);
3134       cost += multiply_by_cost (-1, mode);
3135
3136       return cost;
3137     }
3138
3139   cost = force_var_cost (data, e1, depends_on);
3140   cost += force_var_cost (data, e2, depends_on);
3141   cost += add_cost (mode);
3142
3143   return cost;
3144 }
3145
3146 /* Determines the cost of the computation by that USE is expressed
3147    from induction variable CAND.  If ADDRESS_P is true, we just need
3148    to create an address from it, otherwise we want to get it into
3149    register.  A set of invariants we depend on is stored in
3150    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3151
3152 static unsigned
3153 get_computation_cost_at (struct ivopts_data *data,
3154                          struct iv_use *use, struct iv_cand *cand,
3155                          bool address_p, bitmap *depends_on, tree at)
3156 {
3157   tree ubase = use->iv->base, ustep = use->iv->step;
3158   tree cbase, cstep;
3159   tree utype = TREE_TYPE (ubase), ctype;
3160   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3161   HOST_WIDE_INT ratio, aratio;
3162   bool var_present, symbol_present;
3163   unsigned cost = 0, n_sums;
3164
3165   *depends_on = NULL;
3166
3167   /* Only consider real candidates.  */
3168   if (!cand->iv)
3169     return INFTY;
3170
3171   cbase = cand->iv->base;
3172   cstep = cand->iv->step;
3173   ctype = TREE_TYPE (cbase);
3174
3175   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3176     {
3177       /* We do not have a precision to express the values of use.  */
3178       return INFTY;
3179     }
3180
3181   if (address_p)
3182     {
3183       /* Do not try to express address of an object with computation based
3184          on address of a different object.  This may cause problems in rtl
3185          level alias analysis (that does not expect this to be happening,
3186          as this is illegal in C), and would be unlikely to be useful
3187          anyway.  */
3188       if (use->iv->base_object
3189           && cand->iv->base_object
3190           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3191         return INFTY;
3192     }
3193
3194   if (!cst_and_fits_in_hwi (ustep)
3195       || !cst_and_fits_in_hwi (cstep))
3196     return INFTY;
3197
3198   if (TREE_CODE (ubase) == INTEGER_CST
3199       && !cst_and_fits_in_hwi (ubase))
3200     goto fallback;
3201
3202   if (TREE_CODE (cbase) == INTEGER_CST
3203       && !cst_and_fits_in_hwi (cbase))
3204     goto fallback;
3205
3206   ustepi = int_cst_value (ustep);
3207   cstepi = int_cst_value (cstep);
3208
3209   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3210     {
3211       /* TODO -- add direct handling of this case.  */
3212       goto fallback;
3213     }
3214
3215   if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3216     return INFTY;
3217
3218   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3219      or ratio == 1, it is better to handle this like
3220
3221      ubase - ratio * cbase + ratio * var
3222
3223      (also holds in the case ratio == -1, TODO.  */
3224
3225   if (TREE_CODE (cbase) == INTEGER_CST)
3226     {
3227       offset = - ratio * int_cst_value (cbase);
3228       cost += difference_cost (data,
3229                                ubase, integer_zero_node,
3230                                &symbol_present, &var_present, &offset,
3231                                depends_on);
3232     }
3233   else if (ratio == 1)
3234     {
3235       cost += difference_cost (data,
3236                                ubase, cbase,
3237                                &symbol_present, &var_present, &offset,
3238                                depends_on);
3239     }
3240   else
3241     {
3242       cost += force_var_cost (data, cbase, depends_on);
3243       cost += add_cost (TYPE_MODE (ctype));
3244       cost += difference_cost (data,
3245                                ubase, integer_zero_node,
3246                                &symbol_present, &var_present, &offset,
3247                                depends_on);
3248     }
3249
3250   /* If we are after the increment, the value of the candidate is higher by
3251      one iteration.  */
3252   if (stmt_after_increment (data->current_loop, cand, at))
3253     offset -= ratio * cstepi;
3254
3255   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3256      (symbol/var/const parts may be omitted).  If we are looking for an address,
3257      find the cost of addressing this.  */
3258   if (address_p)
3259     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3260
3261   /* Otherwise estimate the costs for computing the expression.  */
3262   aratio = ratio > 0 ? ratio : -ratio;
3263   if (!symbol_present && !var_present && !offset)
3264     {
3265       if (ratio != 1)
3266         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3267
3268       return cost;
3269     }
3270
3271   if (aratio != 1)
3272     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3273
3274   n_sums = 1;
3275   if (var_present
3276       /* Symbol + offset should be compile-time computable.  */
3277       && (symbol_present || offset))
3278     n_sums++;
3279
3280   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3281
3282 fallback:
3283   {
3284     /* Just get the expression, expand it and measure the cost.  */
3285     tree comp = get_computation_at (data->current_loop, use, cand, at);
3286
3287     if (!comp)
3288       return INFTY;
3289
3290     if (address_p)
3291       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3292
3293     return computation_cost (comp);
3294   }
3295 }
3296
3297 /* Determines the cost of the computation by that USE is expressed
3298    from induction variable CAND.  If ADDRESS_P is true, we just need
3299    to create an address from it, otherwise we want to get it into
3300    register.  A set of invariants we depend on is stored in
3301    DEPENDS_ON.  */
3302
3303 static unsigned
3304 get_computation_cost (struct ivopts_data *data,
3305                       struct iv_use *use, struct iv_cand *cand,
3306                       bool address_p, bitmap *depends_on)
3307 {
3308   return get_computation_cost_at (data,
3309                                   use, cand, address_p, depends_on, use->stmt);
3310 }
3311
3312 /* Determines cost of basing replacement of USE on CAND in a generic
3313    expression.  */
3314
3315 static bool
3316 determine_use_iv_cost_generic (struct ivopts_data *data,
3317                                struct iv_use *use, struct iv_cand *cand)
3318 {
3319   bitmap depends_on;
3320   unsigned cost;
3321
3322   /* The simple case first -- if we need to express value of the preserved
3323      original biv, the cost is 0.  This also prevents us from counting the
3324      cost of increment twice -- once at this use and once in the cost of
3325      the candidate.  */
3326   if (cand->pos == IP_ORIGINAL
3327       && cand->incremented_at == use->stmt)
3328     {
3329       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3330       return true;
3331     }
3332
3333   cost = get_computation_cost (data, use, cand, false, &depends_on);
3334   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3335
3336   return cost != INFTY;
3337 }
3338
3339 /* Determines cost of basing replacement of USE on CAND in an address.  */
3340
3341 static bool
3342 determine_use_iv_cost_address (struct ivopts_data *data,
3343                                struct iv_use *use, struct iv_cand *cand)
3344 {
3345   bitmap depends_on;
3346   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3347
3348   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3349
3350   return cost != INFTY;
3351 }
3352
3353 /* Computes value of induction variable IV in iteration NITER.  */
3354
3355 static tree
3356 iv_value (struct iv *iv, tree niter)
3357 {
3358   tree val;
3359   tree type = TREE_TYPE (iv->base);
3360
3361   niter = fold_convert (type, niter);
3362   val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3363
3364   return fold (build2 (PLUS_EXPR, type, iv->base, val));
3365 }
3366
3367 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3368
3369 static tree
3370 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3371 {
3372   tree val = iv_value (cand->iv, niter);
3373   tree type = TREE_TYPE (cand->iv->base);
3374
3375   if (stmt_after_increment (loop, cand, at))
3376     val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3377
3378   return val;
3379 }
3380
3381 /* Returns period of induction variable iv.  */
3382
3383 static tree
3384 iv_period (struct iv *iv)
3385 {
3386   tree step = iv->step, period, type;
3387   tree pow2div;
3388
3389   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3390
3391   /* Period of the iv is gcd (step, type range).  Since type range is power
3392      of two, it suffices to determine the maximum power of two that divides
3393      step.  */
3394   pow2div = num_ending_zeros (step);
3395   type = unsigned_type_for (TREE_TYPE (step));
3396
3397   period = build_low_bits_mask (type,
3398                                 (TYPE_PRECISION (type)
3399                                  - tree_low_cst (pow2div, 1)));
3400
3401   return period;
3402 }
3403
3404 /* Returns the comparison operator used when eliminating the iv USE.  */
3405
3406 static enum tree_code
3407 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3408 {
3409   struct loop *loop = data->current_loop;
3410   basic_block ex_bb;
3411   edge exit;
3412
3413   ex_bb = bb_for_stmt (use->stmt);
3414   exit = EDGE_SUCC (ex_bb, 0);
3415   if (flow_bb_inside_loop_p (loop, exit->dest))
3416     exit = EDGE_SUCC (ex_bb, 1);
3417
3418   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3419 }
3420
3421 /* Check whether it is possible to express the condition in USE by comparison
3422    of candidate CAND.  If so, store the value compared with to BOUND.  */
3423
3424 static bool
3425 may_eliminate_iv (struct ivopts_data *data,
3426                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3427 {
3428   basic_block ex_bb;
3429   edge exit;
3430   struct tree_niter_desc *niter;
3431   tree nit, nit_type;
3432   tree wider_type, period, per_type;
3433   struct loop *loop = data->current_loop;
3434
3435   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3436      for other conditions inside loop body.  */
3437   ex_bb = bb_for_stmt (use->stmt);
3438   if (use->stmt != last_stmt (ex_bb)
3439       || TREE_CODE (use->stmt) != COND_EXPR)
3440     return false;
3441   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3442     return false;
3443
3444   exit = EDGE_SUCC (ex_bb, 0);
3445   if (flow_bb_inside_loop_p (loop, exit->dest))
3446     exit = EDGE_SUCC (ex_bb, 1);
3447   if (flow_bb_inside_loop_p (loop, exit->dest))
3448     return false;
3449
3450   niter = niter_for_exit (data, exit);
3451   if (!niter
3452       || !zero_p (niter->may_be_zero))
3453     return false;
3454
3455   nit = niter->niter;
3456   nit_type = TREE_TYPE (nit);
3457
3458   /* Determine whether we may use the variable to test whether niter iterations
3459      elapsed.  This is the case iff the period of the induction variable is
3460      greater than the number of iterations.  */
3461   period = iv_period (cand->iv);
3462   if (!period)
3463     return false;
3464   per_type = TREE_TYPE (period);
3465
3466   wider_type = TREE_TYPE (period);
3467   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3468     wider_type = per_type;
3469   else
3470     wider_type = nit_type;
3471
3472   if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3473                                        fold_convert (wider_type, period),
3474                                        fold_convert (wider_type, nit)))))
3475     return false;
3476
3477   *bound = cand_value_at (loop, cand, use->stmt, nit);
3478   return true;
3479 }
3480
3481 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3482
3483 static bool
3484 determine_use_iv_cost_condition (struct ivopts_data *data,
3485                                  struct iv_use *use, struct iv_cand *cand)
3486 {
3487   tree bound = NULL_TREE, op, cond;
3488   bitmap depends_on = NULL;
3489   unsigned cost;
3490
3491   /* Only consider real candidates.  */
3492   if (!cand->iv)
3493     {
3494       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3495       return false;
3496     }
3497
3498   if (may_eliminate_iv (data, use, cand, &bound))
3499     {
3500       cost = force_var_cost (data, bound, &depends_on);
3501
3502       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3503       return cost != INFTY;
3504     }
3505
3506   /* The induction variable elimination failed; just express the original
3507      giv.  If it is compared with an invariant, note that we cannot get
3508      rid of it.  */
3509   cost = get_computation_cost (data, use, cand, false, &depends_on);
3510
3511   cond = *use->op_p;
3512   if (TREE_CODE (cond) != SSA_NAME)
3513     {
3514       op = TREE_OPERAND (cond, 0);
3515       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
3516         op = TREE_OPERAND (cond, 1);
3517       if (TREE_CODE (op) == SSA_NAME)
3518         {
3519           op = get_iv (data, op)->base;
3520           fd_ivopts_data = data;
3521           walk_tree (&op, find_depends, &depends_on, NULL);
3522         }
3523     }
3524
3525   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3526   return cost != INFTY;
3527 }
3528
3529 /* Checks whether it is possible to replace the final value of USE by
3530    a direct computation.  If so, the formula is stored to *VALUE.  */
3531
3532 static bool
3533 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3534                          tree *value)
3535 {
3536   struct loop *loop = data->current_loop;
3537   edge exit;
3538   struct tree_niter_desc *niter;
3539
3540   exit = single_dom_exit (loop);
3541   if (!exit)
3542     return false;
3543
3544   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3545                               bb_for_stmt (use->stmt)));
3546
3547   niter = niter_for_single_dom_exit (data);
3548   if (!niter
3549       || !zero_p (niter->may_be_zero))
3550     return false;
3551
3552   *value = iv_value (use->iv, niter->niter);
3553
3554   return true;
3555 }
3556
3557 /* Determines cost of replacing final value of USE using CAND.  */
3558
3559 static bool
3560 determine_use_iv_cost_outer (struct ivopts_data *data,
3561                              struct iv_use *use, struct iv_cand *cand)
3562 {
3563   bitmap depends_on;
3564   unsigned cost;
3565   edge exit;
3566   tree value = NULL_TREE;
3567   struct loop *loop = data->current_loop;
3568
3569   /* The simple case first -- if we need to express value of the preserved
3570      original biv, the cost is 0.  This also prevents us from counting the
3571      cost of increment twice -- once at this use and once in the cost of
3572      the candidate.  */
3573   if (cand->pos == IP_ORIGINAL
3574       && cand->incremented_at == use->stmt)
3575     {
3576       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3577       return true;
3578     }
3579
3580   if (!cand->iv)
3581     {
3582       if (!may_replace_final_value (data, use, &value))
3583         {
3584           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3585           return false;
3586         }
3587
3588       depends_on = NULL;
3589       cost = force_var_cost (data, value, &depends_on);
3590
3591       cost /= AVG_LOOP_NITER (loop);
3592
3593       set_use_iv_cost (data, use, cand, cost, depends_on, value);
3594       return cost != INFTY;
3595     }
3596
3597   exit = single_dom_exit (loop);
3598   if (exit)
3599     {
3600       /* If there is just a single exit, we may use value of the candidate
3601          after we take it to determine the value of use.  */
3602       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3603                                       last_stmt (exit->src));
3604       if (cost != INFTY)
3605         cost /= AVG_LOOP_NITER (loop);
3606     }
3607   else
3608     {
3609       /* Otherwise we just need to compute the iv.  */
3610       cost = get_computation_cost (data, use, cand, false, &depends_on);
3611     }
3612
3613   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3614
3615   return cost != INFTY;
3616 }
3617
3618 /* Determines cost of basing replacement of USE on CAND.  Returns false
3619    if USE cannot be based on CAND.  */
3620
3621 static bool
3622 determine_use_iv_cost (struct ivopts_data *data,
3623                        struct iv_use *use, struct iv_cand *cand)
3624 {
3625   switch (use->type)
3626     {
3627     case USE_NONLINEAR_EXPR:
3628       return determine_use_iv_cost_generic (data, use, cand);
3629
3630     case USE_OUTER:
3631       return determine_use_iv_cost_outer (data, use, cand);
3632
3633     case USE_ADDRESS:
3634       return determine_use_iv_cost_address (data, use, cand);
3635
3636     case USE_COMPARE:
3637       return determine_use_iv_cost_condition (data, use, cand);
3638
3639     default:
3640       gcc_unreachable ();
3641     }
3642 }
3643
3644 /* Determines costs of basing the use of the iv on an iv candidate.  */
3645
3646 static void
3647 determine_use_iv_costs (struct ivopts_data *data)
3648 {
3649   unsigned i, j;
3650   struct iv_use *use;
3651   struct iv_cand *cand;
3652   bitmap to_clear = BITMAP_ALLOC (NULL);
3653
3654   alloc_use_cost_map (data);
3655
3656   for (i = 0; i < n_iv_uses (data); i++)
3657     {
3658       use = iv_use (data, i);
3659
3660       if (data->consider_all_candidates)
3661         {
3662           for (j = 0; j < n_iv_cands (data); j++)
3663             {
3664               cand = iv_cand (data, j);
3665               determine_use_iv_cost (data, use, cand);
3666             }
3667         }
3668       else
3669         {
3670           bitmap_iterator bi;
3671
3672           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3673             {
3674               cand = iv_cand (data, j);
3675               if (!determine_use_iv_cost (data, use, cand))
3676                 bitmap_set_bit (to_clear, j);
3677             }
3678
3679           /* Remove the candidates for that the cost is infinite from
3680              the list of related candidates.  */
3681           bitmap_and_compl_into (use->related_cands, to_clear);
3682           bitmap_clear (to_clear);
3683         }
3684     }
3685
3686   BITMAP_FREE (to_clear);
3687
3688   if (dump_file && (dump_flags & TDF_DETAILS))
3689     {
3690       fprintf (dump_file, "Use-candidate costs:\n");
3691
3692       for (i = 0; i < n_iv_uses (data); i++)
3693         {
3694           use = iv_use (data, i);
3695
3696           fprintf (dump_file, "Use %d:\n", i);
3697           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3698           for (j = 0; j < use->n_map_members; j++)
3699             {
3700               if (!use->cost_map[j].cand
3701                   || use->cost_map[j].cost == INFTY)
3702                 continue;
3703
3704               fprintf (dump_file, "  %d\t%d\t",
3705                        use->cost_map[j].cand->id,
3706                        use->cost_map[j].cost);
3707               if (use->cost_map[j].depends_on)
3708                 bitmap_print (dump_file,
3709                               use->cost_map[j].depends_on, "","");
3710               fprintf (dump_file, "\n");
3711             }
3712
3713           fprintf (dump_file, "\n");
3714         }
3715       fprintf (dump_file, "\n");
3716     }
3717 }
3718
3719 /* Determines cost of the candidate CAND.  */
3720
3721 static void
3722 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3723 {
3724   unsigned cost_base, cost_step;
3725   tree base;
3726
3727   if (!cand->iv)
3728     {
3729       cand->cost = 0;
3730       return;
3731     }
3732
3733   /* There are two costs associated with the candidate -- its increment
3734      and its initialization.  The second is almost negligible for any loop
3735      that rolls enough, so we take it just very little into account.  */
3736
3737   base = cand->iv->base;
3738   cost_base = force_var_cost (data, base, NULL);
3739   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3740
3741   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3742
3743   /* Prefer the original iv unless we may gain something by replacing it;
3744      this is not really relevant for artificial ivs created by other
3745      passes.  */
3746   if (cand->pos == IP_ORIGINAL
3747       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3748     cand->cost--;
3749
3750   /* Prefer not to insert statements into latch unless there are some
3751      already (so that we do not create unnecessary jumps).  */
3752   if (cand->pos == IP_END
3753       && empty_block_p (ip_end_pos (data->current_loop)))
3754     cand->cost++;
3755 }
3756
3757 /* Determines costs of computation of the candidates.  */
3758
3759 static void
3760 determine_iv_costs (struct ivopts_data *data)
3761 {
3762   unsigned i;
3763
3764   if (dump_file && (dump_flags & TDF_DETAILS))
3765     {
3766       fprintf (dump_file, "Candidate costs:\n");
3767       fprintf (dump_file, "  cand\tcost\n");
3768     }
3769
3770   for (i = 0; i < n_iv_cands (data); i++)
3771     {
3772       struct iv_cand *cand = iv_cand (data, i);
3773
3774       determine_iv_cost (data, cand);
3775
3776       if (dump_file && (dump_flags & TDF_DETAILS))
3777         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3778     }
3779
3780 if (dump_file && (dump_flags & TDF_DETAILS))
3781       fprintf (dump_file, "\n");
3782 }
3783
3784 /* Calculates cost for having SIZE induction variables.  */
3785
3786 static unsigned
3787 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3788 {
3789   return global_cost_for_size (size,
3790                                loop_data (data->current_loop)->regs_used,
3791                                n_iv_uses (data));
3792 }
3793
3794 /* For each size of the induction variable set determine the penalty.  */
3795
3796 static void
3797 determine_set_costs (struct ivopts_data *data)
3798 {
3799   unsigned j, n;
3800   tree phi, op;
3801   struct loop *loop = data->current_loop;
3802   bitmap_iterator bi;
3803
3804   /* We use the following model (definitely improvable, especially the
3805      cost function -- TODO):
3806
3807      We estimate the number of registers available (using MD data), name it A.
3808
3809      We estimate the number of registers used by the loop, name it U.  This
3810      number is obtained as the number of loop phi nodes (not counting virtual
3811      registers and bivs) + the number of variables from outside of the loop.
3812
3813      We set a reserve R (free regs that are used for temporary computations,
3814      etc.).  For now the reserve is a constant 3.
3815
3816      Let I be the number of induction variables.
3817
3818      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3819         make a lot of ivs without a reason).
3820      -- if A - R < U + I <= A, the cost is I * PRES_COST
3821      -- if U + I > A, the cost is I * PRES_COST and
3822         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3823
3824   if (dump_file && (dump_flags & TDF_DETAILS))
3825     {
3826       fprintf (dump_file, "Global costs:\n");
3827       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3828       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3829       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3830       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3831     }
3832
3833   n = 0;
3834   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3835     {
3836       op = PHI_RESULT (phi);
3837
3838       if (!is_gimple_reg (op))
3839         continue;
3840
3841       if (get_iv (data, op))
3842         continue;
3843
3844       n++;
3845     }
3846
3847   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3848     {
3849       struct version_info *info = ver_info (data, j);
3850
3851       if (info->inv_id && info->has_nonlin_use)
3852         n++;
3853     }
3854
3855   loop_data (loop)->regs_used = n;
3856   if (dump_file && (dump_flags & TDF_DETAILS))
3857     fprintf (dump_file, "  regs_used %d\n", n);
3858
3859   if (dump_file && (dump_flags & TDF_DETAILS))
3860     {
3861       fprintf (dump_file, "  cost for size:\n");
3862       fprintf (dump_file, "  ivs\tcost\n");
3863       for (j = 0; j <= 2 * target_avail_regs; j++)
3864         fprintf (dump_file, "  %d\t%d\n", j,
3865                  ivopts_global_cost_for_size (data, j));
3866       fprintf (dump_file, "\n");
3867     }
3868 }
3869
3870 /* Returns true if A is a cheaper cost pair than B.  */
3871
3872 static bool
3873 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3874 {
3875   if (!a)
3876     return false;
3877
3878   if (!b)
3879     return true;
3880
3881   if (a->cost < b->cost)
3882     return true;
3883
3884   if (a->cost > b->cost)
3885     return false;
3886
3887   /* In case the costs are the same, prefer the cheaper candidate.  */
3888   if (a->cand->cost < b->cand->cost)
3889     return true;
3890
3891   return false;
3892 }
3893
3894 /* Computes the cost field of IVS structure.  */
3895
3896 static void
3897 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3898 {
3899   unsigned cost = 0;
3900
3901   cost += ivs->cand_use_cost;
3902   cost += ivs->cand_cost;
3903   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3904
3905   ivs->cost = cost;
3906 }
3907
3908 /* Set USE not to be expressed by any candidate in IVS.  */
3909
3910 static void
3911 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3912                  struct iv_use *use)
3913 {
3914   unsigned uid = use->id, cid, iid;
3915   bitmap deps;
3916   struct cost_pair *cp;
3917   bitmap_iterator bi;
3918
3919   cp = ivs->cand_for_use[uid];
3920   if (!cp)
3921     return;
3922   cid = cp->cand->id;
3923
3924   ivs->bad_uses++;
3925   ivs->cand_for_use[uid] = NULL;
3926   ivs->n_cand_uses[cid]--;
3927
3928   if (ivs->n_cand_uses[cid] == 0)
3929     {
3930       bitmap_clear_bit (ivs->cands, cid);
3931       /* Do not count the pseudocandidates.  */
3932       if (cp->cand->iv)
3933         ivs->n_regs--;
3934       ivs->n_cands--;
3935       ivs->cand_cost -= cp->cand->cost;
3936     }
3937
3938   ivs->cand_use_cost -= cp->cost;
3939
3940   deps = cp->depends_on;
3941
3942   if (deps)
3943     {
3944       EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3945         {
3946           ivs->n_invariant_uses[iid]--;
3947           if (ivs->n_invariant_uses[iid] == 0)
3948             ivs->n_regs--;
3949         }
3950     }
3951
3952   iv_ca_recount_cost (data, ivs);
3953 }
3954
3955 /* Set cost pair for USE in set IVS to CP.  */
3956
3957 static void
3958 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3959               struct iv_use *use, struct cost_pair *cp)
3960 {
3961   unsigned uid = use->id, cid, iid;
3962   bitmap deps;
3963   bitmap_iterator bi;
3964
3965   if (ivs->cand_for_use[uid] == cp)
3966     return;
3967
3968   if (ivs->cand_for_use[uid])
3969     iv_ca_set_no_cp (data, ivs, use);
3970
3971   if (cp)
3972     {
3973       cid = cp->cand->id;
3974
3975       ivs->bad_uses--;
3976       ivs->cand_for_use[uid] = cp;
3977       ivs->n_cand_uses[cid]++;
3978       if (ivs->n_cand_uses[cid] == 1)
3979         {
3980           bitmap_set_bit (ivs->cands, cid);
3981           /* Do not count the pseudocandidates.  */
3982           if (cp->cand->iv)
3983             ivs->n_regs++;
3984           ivs->n_cands++;
3985           ivs->cand_cost += cp->cand->cost;
3986         }
3987
3988       ivs->cand_use_cost += cp->cost;
3989
3990       deps = cp->depends_on;
3991
3992       if (deps)
3993         {
3994           EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3995             {
3996               ivs->n_invariant_uses[iid]++;
3997               if (ivs->n_invariant_uses[iid] == 1)
3998                 ivs->n_regs++;
3999             }
4000         }
4001
4002       iv_ca_recount_cost (data, ivs);
4003     }
4004 }
4005
4006 /* Extend set IVS by expressing USE by some of the candidates in it
4007    if possible.  */
4008
4009 static void
4010 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4011                struct iv_use *use)
4012 {
4013   struct cost_pair *best_cp = NULL, *cp;
4014   bitmap_iterator bi;
4015   unsigned i;
4016
4017   gcc_assert (ivs->upto >= use->id);
4018
4019   if (ivs->upto == use->id)
4020     {
4021       ivs->upto++;
4022       ivs->bad_uses++;
4023     }
4024
4025   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4026     {
4027       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4028
4029       if (cheaper_cost_pair (cp, best_cp))
4030         best_cp = cp;
4031     }
4032
4033   iv_ca_set_cp (data, ivs, use, best_cp);
4034 }
4035
4036 /* Get cost for assignment IVS.  */
4037
4038 static unsigned
4039 iv_ca_cost (struct iv_ca *ivs)
4040 {
4041   return (ivs->bad_uses ? INFTY : ivs->cost);
4042 }
4043
4044 /* Returns true if all dependences of CP are among invariants in IVS.  */
4045
4046 static bool
4047 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4048 {
4049   unsigned i;
4050   bitmap_iterator bi;
4051
4052   if (!cp->depends_on)
4053     return true;
4054
4055   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4056     {
4057       if (ivs->n_invariant_uses[i] == 0)
4058         return false;
4059     }
4060
4061   return true;
4062 }
4063
4064 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4065    it before NEXT_CHANGE.  */
4066
4067 static struct iv_ca_delta *
4068 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4069                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4070 {
4071   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4072
4073   change->use = use;
4074   change->old_cp = old_cp;
4075   change->new_cp = new_cp;
4076   change->next_change = next_change;
4077
4078   return change;
4079 }
4080
4081 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4082    are rewritten.  */
4083
4084 static struct iv_ca_delta *
4085 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4086 {
4087   struct iv_ca_delta *last;
4088
4089   if (!l2)
4090     return l1;
4091
4092   if (!l1)
4093     return l2;
4094
4095   for (last = l1; last->next_change; last = last->next_change)
4096     continue;
4097   last->next_change = l2;
4098
4099   return l1;
4100 }
4101
4102 /* Returns candidate by that USE is expressed in IVS.  */
4103
4104 static struct cost_pair *
4105 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4106 {
4107   return ivs->cand_for_use[use->id];
4108 }
4109
4110 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4111
4112 static struct iv_ca_delta *
4113 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4114 {
4115   struct iv_ca_delta *act, *next, *prev = NULL;
4116   struct cost_pair *tmp;
4117
4118   for (act = delta; act; act = next)
4119     {
4120       next = act->next_change;
4121       act->next_change = prev;
4122       prev = act;
4123
4124       tmp = act->old_cp;
4125       act->old_cp = act->new_cp;
4126       act->new_cp = tmp;
4127     }
4128
4129   return prev;
4130 }
4131
4132 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4133    reverted instead.  */
4134
4135 static void
4136 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4137                     struct iv_ca_delta *delta, bool forward)
4138 {
4139   struct cost_pair *from, *to;
4140   struct iv_ca_delta *act;
4141
4142   if (!forward)
4143     delta = iv_ca_delta_reverse (delta);
4144
4145   for (act = delta; act; act = act->next_change)
4146     {
4147       from = act->old_cp;
4148       to = act->new_cp;
4149       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4150       iv_ca_set_cp (data, ivs, act->use, to);
4151     }
4152
4153   if (!forward)
4154     iv_ca_delta_reverse (delta);
4155 }
4156
4157 /* Returns true if CAND is used in IVS.  */
4158
4159 static bool
4160 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4161 {
4162   return ivs->n_cand_uses[cand->id] > 0;
4163 }
4164
4165 /* Returns number of induction variable candidates in the set IVS.  */
4166
4167 static unsigned
4168 iv_ca_n_cands (struct iv_ca *ivs)
4169 {
4170   return ivs->n_cands;
4171 }
4172
4173 /* Free the list of changes DELTA.  */
4174
4175 static void
4176 iv_ca_delta_free (struct iv_ca_delta **delta)
4177 {
4178   struct iv_ca_delta *act, *next;
4179
4180   for (act = *delta; act; act = next)
4181     {
4182       next = act->next_change;
4183       free (act);
4184     }
4185
4186   *delta = NULL;
4187 }
4188
4189 /* Allocates new iv candidates assignment.  */
4190
4191 static struct iv_ca *
4192 iv_ca_new (struct ivopts_data *data)
4193 {
4194   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4195
4196   nw->upto = 0;
4197   nw->bad_uses = 0;
4198   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4199   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4200   nw->cands = BITMAP_ALLOC (NULL);
4201   nw->n_cands = 0;
4202   nw->n_regs = 0;
4203   nw->cand_use_cost = 0;
4204   nw->cand_cost = 0;
4205   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4206   nw->cost = 0;
4207
4208   return nw;
4209 }
4210
4211 /* Free memory occupied by the set IVS.  */
4212
4213 static void
4214 iv_ca_free (struct iv_ca **ivs)
4215 {
4216   free ((*ivs)->cand_for_use);
4217   free ((*ivs)->n_cand_uses);
4218   BITMAP_FREE ((*ivs)->cands);
4219   free ((*ivs)->n_invariant_uses);
4220   free (*ivs);
4221   *ivs = NULL;
4222 }
4223
4224 /* Dumps IVS to FILE.  */
4225
4226 static void
4227 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4228 {
4229   const char *pref = "  invariants ";
4230   unsigned i;
4231
4232   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4233   bitmap_print (file, ivs->cands, "  candidates ","\n");
4234
4235   for (i = 1; i <= data->max_inv_id; i++)
4236     if (ivs->n_invariant_uses[i])
4237       {
4238         fprintf (file, "%s%d", pref, i);
4239         pref = ", ";
4240       }
4241   fprintf (file, "\n");
4242 }
4243
4244 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4245    new set, and store differences in DELTA.  Number of induction variables
4246    in the new set is stored to N_IVS.  */
4247
4248 static unsigned
4249 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4250               struct iv_cand *cand, struct iv_ca_delta **delta,
4251               unsigned *n_ivs)
4252 {
4253   unsigned i, cost;
4254   struct iv_use *use;
4255   struct cost_pair *old_cp, *new_cp;
4256
4257   *delta = NULL;
4258   for (i = 0; i < ivs->upto; i++)
4259     {
4260       use = iv_use (data, i);
4261       old_cp = iv_ca_cand_for_use (ivs, use);
4262
4263       if (old_cp
4264           && old_cp->cand == cand)
4265         continue;
4266
4267       new_cp = get_use_iv_cost (data, use, cand);
4268       if (!new_cp)
4269         continue;
4270
4271       if (!iv_ca_has_deps (ivs, new_cp))
4272         continue;
4273
4274       if (!cheaper_cost_pair (new_cp, old_cp))
4275         continue;
4276
4277       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4278     }
4279
4280   iv_ca_delta_commit (data, ivs, *delta, true);
4281   cost = iv_ca_cost (ivs);
4282   if (n_ivs)
4283     *n_ivs = iv_ca_n_cands (ivs);
4284   iv_ca_delta_commit (data, ivs, *delta, false);
4285
4286   return cost;
4287 }
4288
4289 /* Try narrowing set IVS by removing CAND.  Return the cost of
4290    the new set and store the differences in DELTA.  */
4291
4292 static unsigned
4293 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4294               struct iv_cand *cand, struct iv_ca_delta **delta)
4295 {
4296   unsigned i, ci;
4297   struct iv_use *use;
4298   struct cost_pair *old_cp, *new_cp, *cp;
4299   bitmap_iterator bi;
4300   struct iv_cand *cnd;
4301   unsigned cost;
4302
4303   *delta = NULL;
4304   for (i = 0; i < n_iv_uses (data); i++)
4305     {
4306       use = iv_use (data, i);
4307
4308       old_cp = iv_ca_cand_for_use (ivs, use);
4309       if (old_cp->cand != cand)
4310         continue;
4311
4312       new_cp = NULL;
4313
4314       if (data->consider_all_candidates)
4315         {
4316           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4317             {
4318               if (ci == cand->id)
4319                 continue;
4320
4321               cnd = iv_cand (data, ci);
4322
4323               cp = get_use_iv_cost (data, use, cnd);
4324               if (!cp)
4325                 continue;
4326               if (!iv_ca_has_deps (ivs, cp))
4327                 continue;
4328
4329               if (!cheaper_cost_pair (cp, new_cp))
4330                 continue;
4331
4332               new_cp = cp;
4333             }
4334         }
4335       else
4336         {
4337           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4338             {
4339               if (ci == cand->id)
4340                 continue;
4341
4342               cnd = iv_cand (data, ci);
4343
4344               cp = get_use_iv_cost (data, use, cnd);
4345               if (!cp)
4346                 continue;
4347               if (!iv_ca_has_deps (ivs, cp))
4348                 continue;
4349
4350               if (!cheaper_cost_pair (cp, new_cp))
4351                 continue;
4352
4353               new_cp = cp;
4354             }
4355         }
4356
4357       if (!new_cp)
4358         {
4359           iv_ca_delta_free (delta);
4360           return INFTY;
4361         }
4362
4363       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4364     }
4365
4366   iv_ca_delta_commit (data, ivs, *delta, true);
4367   cost = iv_ca_cost (ivs);
4368   iv_ca_delta_commit (data, ivs, *delta, false);
4369
4370   return cost;
4371 }
4372
4373 /* Try optimizing the set of candidates IVS by removing candidates different
4374    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4375    differences in DELTA.  */
4376
4377 static unsigned
4378 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4379              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4380 {
4381   bitmap_iterator bi;
4382   struct iv_ca_delta *act_delta, *best_delta;
4383   unsigned i, best_cost, acost;
4384   struct iv_cand *cand;
4385
4386   best_delta = NULL;
4387   best_cost = iv_ca_cost (ivs);
4388
4389   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4390     {
4391       cand = iv_cand (data, i);
4392
4393       if (cand == except_cand)
4394         continue;
4395
4396       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4397
4398       if (acost < best_cost)
4399         {
4400           best_cost = acost;
4401           iv_ca_delta_free (&best_delta);
4402           best_delta = act_delta;
4403         }
4404       else
4405         iv_ca_delta_free (&act_delta);
4406     }
4407
4408   if (!best_delta)
4409     {
4410       *delta = NULL;
4411       return best_cost;
4412     }
4413
4414   /* Recurse to possibly remove other unnecessary ivs.  */
4415   iv_ca_delta_commit (data, ivs, best_delta, true);
4416   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4417   iv_ca_delta_commit (data, ivs, best_delta, false);
4418   *delta = iv_ca_delta_join (best_delta, *delta);
4419   return best_cost;
4420 }
4421
4422 /* Tries to extend the sets IVS in the best possible way in order
4423    to express the USE.  */
4424
4425 static bool
4426 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4427                   struct iv_use *use)
4428 {
4429   unsigned best_cost, act_cost;
4430   unsigned i;
4431   bitmap_iterator bi;
4432   struct iv_cand *cand;
4433   struct iv_ca_delta *best_delta = NULL, *act_delta;
4434   struct cost_pair *cp;
4435
4436   iv_ca_add_use (data, ivs, use);
4437   best_cost = iv_ca_cost (ivs);
4438
4439   cp = iv_ca_cand_for_use (ivs, use);
4440   if (cp)
4441     {
4442       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4443       iv_ca_set_no_cp (data, ivs, use);
4444     }
4445
4446   /* First try important candidates.  Only if it fails, try the specific ones.
4447      Rationale -- in loops with many variables the best choice often is to use
4448      just one generic biv.  If we added here many ivs specific to the uses,
4449      the optimization algorithm later would be likely to get stuck in a local
4450      minimum, thus causing us to create too many ivs.  The approach from
4451      few ivs to more seems more likely to be successful -- starting from few
4452      ivs, replacing an expensive use by a specific iv should always be a
4453      win.  */
4454   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4455     {
4456       cand = iv_cand (data, i);
4457
4458       if (iv_ca_cand_used_p (ivs, cand))
4459         continue;
4460
4461       cp = get_use_iv_cost (data, use, cand);
4462       if (!cp)
4463         continue;
4464
4465       iv_ca_set_cp (data, ivs, use, cp);
4466       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4467       iv_ca_set_no_cp (data, ivs, use);
4468       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4469
4470       if (act_cost < best_cost)
4471         {
4472           best_cost = act_cost;
4473
4474           iv_ca_delta_free (&best_delta);
4475           best_delta = act_delta;
4476         }
4477       else
4478         iv_ca_delta_free (&act_delta);
4479     }
4480
4481   if (best_cost == INFTY)
4482     {
4483       for (i = 0; i < use->n_map_members; i++)
4484         {
4485           cp = use->cost_map + i;
4486           cand = cp->cand;
4487           if (!cand)
4488             continue;
4489
4490           /* Already tried this.  */
4491           if (cand->important)
4492             continue;
4493
4494           if (iv_ca_cand_used_p (ivs, cand))
4495             continue;
4496
4497           act_delta = NULL;
4498           iv_ca_set_cp (data, ivs, use, cp);
4499           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4500           iv_ca_set_no_cp (data, ivs, use);
4501           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4502                                        cp, act_delta);
4503
4504           if (act_cost < best_cost)
4505             {
4506               best_cost = act_cost;
4507
4508               if (best_delta)
4509                 iv_ca_delta_free (&best_delta);
4510               best_delta = act_delta;
4511             }
4512           else
4513             iv_ca_delta_free (&act_delta);
4514         }
4515     }
4516
4517   iv_ca_delta_commit (data, ivs, best_delta, true);
4518   iv_ca_delta_free (&best_delta);
4519
4520   return (best_cost != INFTY);
4521 }
4522
4523 /* Finds an initial assignment of candidates to uses.  */
4524
4525 static struct iv_ca *
4526 get_initial_solution (struct ivopts_data *data)
4527 {
4528   struct iv_ca *ivs = iv_ca_new (data);
4529   unsigned i;
4530
4531   for (i = 0; i < n_iv_uses (data); i++)
4532     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4533       {
4534         iv_ca_free (&ivs);
4535         return NULL;
4536       }
4537
4538   return ivs;
4539 }
4540
4541 /* Tries to improve set of induction variables IVS.  */
4542
4543 static bool
4544 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4545 {
4546   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4547   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4548   struct iv_cand *cand;
4549
4550   /* Try extending the set of induction variables by one.  */
4551   for (i = 0; i < n_iv_cands (data); i++)
4552     {
4553       cand = iv_cand (data, i);
4554
4555       if (iv_ca_cand_used_p (ivs, cand))
4556         continue;
4557
4558       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4559       if (!act_delta)
4560         continue;
4561
4562       /* If we successfully added the candidate and the set is small enough,
4563          try optimizing it by removing other candidates.  */
4564       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4565         {
4566           iv_ca_delta_commit (data, ivs, act_delta, true);
4567           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4568           iv_ca_delta_commit (data, ivs, act_delta, false);
4569           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4570         }
4571
4572       if (acost < best_cost)
4573         {
4574           best_cost = acost;
4575           iv_ca_delta_free (&best_delta);
4576           best_delta = act_delta;
4577         }
4578       else
4579         iv_ca_delta_free (&act_delta);
4580     }
4581
4582   if (!best_delta)
4583     {
4584       /* Try removing the candidates from the set instead.  */
4585       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4586
4587       /* Nothing more we can do.  */
4588       if (!best_delta)
4589         return false;
4590     }
4591
4592   iv_ca_delta_commit (data, ivs, best_delta, true);
4593   gcc_assert (best_cost == iv_ca_cost (ivs));
4594   iv_ca_delta_free (&best_delta);
4595   return true;
4596 }
4597
4598 /* Attempts to find the optimal set of induction variables.  We do simple
4599    greedy heuristic -- we try to replace at most one candidate in the selected
4600    solution and remove the unused ivs while this improves the cost.  */
4601
4602 static struct iv_ca *
4603 find_optimal_iv_set (struct ivopts_data *data)
4604 {
4605   unsigned i;
4606   struct iv_ca *set;
4607   struct iv_use *use;
4608
4609   /* Get the initial solution.  */
4610   set = get_initial_solution (data);
4611   if (!set)
4612     {
4613       if (dump_file && (dump_flags & TDF_DETAILS))
4614         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4615       return NULL;
4616     }
4617
4618   if (dump_file && (dump_flags & TDF_DETAILS))
4619     {
4620       fprintf (dump_file, "Initial set of candidates:\n");
4621       iv_ca_dump (data, dump_file, set);
4622     }
4623
4624   while (try_improve_iv_set (data, set))
4625     {
4626       if (dump_file && (dump_flags & TDF_DETAILS))
4627         {
4628           fprintf (dump_file, "Improved to:\n");
4629           iv_ca_dump (data, dump_file, set);
4630         }
4631     }
4632
4633   if (dump_file && (dump_flags & TDF_DETAILS))
4634     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4635
4636   for (i = 0; i < n_iv_uses (data); i++)
4637     {
4638       use = iv_use (data, i);
4639       use->selected = iv_ca_cand_for_use (set, use)->cand;
4640     }
4641
4642   return set;
4643 }
4644
4645 /* Creates a new induction variable corresponding to CAND.  */
4646
4647 static void
4648 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4649 {
4650   block_stmt_iterator incr_pos;
4651   tree base;
4652   bool after = false;
4653
4654   if (!cand->iv)
4655     return;
4656
4657   switch (cand->pos)
4658     {
4659     case IP_NORMAL:
4660       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4661       break;
4662
4663     case IP_END:
4664       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4665       after = true;
4666       break;
4667
4668     case IP_ORIGINAL:
4669       /* Mark that the iv is preserved.  */
4670       name_info (data, cand->var_before)->preserve_biv = true;
4671       name_info (data, cand->var_after)->preserve_biv = true;
4672
4673       /* Rewrite the increment so that it uses var_before directly.  */
4674       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4675
4676       return;
4677     }
4678
4679   gimple_add_tmp_var (cand->var_before);
4680   add_referenced_tmp_var (cand->var_before);
4681
4682   base = unshare_expr (cand->iv->base);
4683
4684   create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4685              &incr_pos, after, &cand->var_before, &cand->var_after);
4686 }
4687
4688 /* Creates new induction variables described in SET.  */
4689
4690 static void
4691 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4692 {
4693   unsigned i;
4694   struct iv_cand *cand;
4695   bitmap_iterator bi;
4696
4697   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4698     {
4699       cand = iv_cand (data, i);
4700       create_new_iv (data, cand);
4701     }
4702 }
4703
4704 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4705    is true, remove also the ssa name defined by the statement.  */
4706
4707 static void
4708 remove_statement (tree stmt, bool including_defined_name)
4709 {
4710   if (TREE_CODE (stmt) == PHI_NODE)
4711     {
4712       if (!including_defined_name)
4713         {
4714           /* Prevent the ssa name defined by the statement from being removed.  */
4715           SET_PHI_RESULT (stmt, NULL);
4716         }
4717       remove_phi_node (stmt, NULL_TREE);
4718     }
4719   else
4720     {
4721       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4722
4723       bsi_remove (&bsi);
4724     }
4725 }
4726
4727 /* Rewrites USE (definition of iv used in a nonlinear expression)
4728    using candidate CAND.  */
4729
4730 static void
4731 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4732                             struct iv_use *use, struct iv_cand *cand)
4733 {
4734   tree comp;
4735   tree op, stmts, tgt, ass;
4736   block_stmt_iterator bsi, pbsi;
4737
4738   /* An important special case -- if we are asked to express value of
4739      the original iv by itself, just exit; there is no need to
4740      introduce a new computation (that might also need casting the
4741      variable to unsigned and back).  */
4742   if (cand->pos == IP_ORIGINAL
4743       && TREE_CODE (use->stmt) == MODIFY_EXPR
4744       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4745     {
4746       op = TREE_OPERAND (use->stmt, 1);
4747
4748       /* Be a bit careful.  In case variable is expressed in some
4749          complicated way, rewrite it so that we may get rid of this
4750          complicated expression.  */
4751       if ((TREE_CODE (op) == PLUS_EXPR
4752            || TREE_CODE (op) == MINUS_EXPR)
4753           && TREE_OPERAND (op, 0) == cand->var_before
4754           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4755         return;
4756     }
4757
4758   comp = unshare_expr (get_computation (data->current_loop,
4759                                         use, cand));
4760   switch (TREE_CODE (use->stmt))
4761     {
4762     case PHI_NODE:
4763       tgt = PHI_RESULT (use->stmt);
4764
4765       /* If we should keep the biv, do not replace it.  */
4766       if (name_info (data, tgt)->preserve_biv)
4767         return;
4768
4769       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4770       while (!bsi_end_p (pbsi)
4771              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4772         {
4773           bsi = pbsi;
4774           bsi_next (&pbsi);
4775         }
4776       break;
4777
4778     case MODIFY_EXPR:
4779       tgt = TREE_OPERAND (use->stmt, 0);
4780       bsi = bsi_for_stmt (use->stmt);
4781       break;
4782
4783     default:
4784       gcc_unreachable ();
4785     }
4786
4787   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4788
4789   if (TREE_CODE (use->stmt) == PHI_NODE)
4790     {
4791       if (stmts)
4792         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4793       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4794       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4795       remove_statement (use->stmt, false);
4796       SSA_NAME_DEF_STMT (tgt) = ass;
4797     }
4798   else
4799     {
4800       if (stmts)
4801         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4802       TREE_OPERAND (use->stmt, 1) = op;
4803     }
4804 }
4805
4806 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4807    for_each_index.  */
4808
4809 static bool
4810 idx_remove_ssa_names (tree base, tree *idx,
4811                       void *data ATTRIBUTE_UNUSED)
4812 {
4813   tree *op;
4814
4815   if (TREE_CODE (*idx) == SSA_NAME)
4816     *idx = SSA_NAME_VAR (*idx);
4817
4818   if (TREE_CODE (base) == ARRAY_REF)
4819     {
4820       op = &TREE_OPERAND (base, 2);
4821       if (*op
4822           && TREE_CODE (*op) == SSA_NAME)
4823         *op = SSA_NAME_VAR (*op);
4824       op = &TREE_OPERAND (base, 3);
4825       if (*op
4826           && TREE_CODE (*op) == SSA_NAME)
4827         *op = SSA_NAME_VAR (*op);
4828     }
4829
4830   return true;
4831 }
4832
4833 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4834
4835 static tree
4836 unshare_and_remove_ssa_names (tree ref)
4837 {
4838   ref = unshare_expr (ref);
4839   for_each_index (&ref, idx_remove_ssa_names, NULL);
4840
4841   return ref;
4842 }
4843
4844 /* Rewrites base of memory access OP with expression WITH in statement
4845    pointed to by BSI.  */
4846
4847 static void
4848 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4849 {
4850   tree bvar, var, new_name, copy, name;
4851   tree orig;
4852
4853   var = bvar = get_base_address (*op);
4854
4855   if (!var || TREE_CODE (with) != SSA_NAME)
4856     goto do_rewrite;
4857
4858   gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4859   gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4860   if (TREE_CODE (var) == INDIRECT_REF)
4861     var = TREE_OPERAND (var, 0);
4862   if (TREE_CODE (var) == SSA_NAME)
4863     {
4864       name = var;
4865       var = SSA_NAME_VAR (var);
4866     }
4867   else if (DECL_P (var))
4868     name = NULL_TREE;
4869   else
4870     goto do_rewrite;
4871
4872   /* We need to add a memory tag for the variable.  But we do not want
4873      to add it to the temporary used for the computations, since this leads
4874      to problems in redundancy elimination when there are common parts
4875      in two computations referring to the different arrays.  So we copy
4876      the variable to a new temporary.  */
4877   copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4878
4879   if (name)
4880     new_name = duplicate_ssa_name (name, copy);
4881   else
4882     {
4883       tree tag = var_ann (var)->type_mem_tag;
4884       tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
4885       add_referenced_tmp_var (new_ptr);
4886       if (tag)
4887         var_ann (new_ptr)->type_mem_tag = tag;
4888       else
4889         add_type_alias (new_ptr, var);
4890       new_name = make_ssa_name (new_ptr, copy);
4891     }
4892
4893   TREE_OPERAND (copy, 0) = new_name;
4894   update_stmt (copy);
4895   bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4896   with = new_name;
4897
4898 do_rewrite:
4899
4900   orig = NULL_TREE;
4901   gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4902   gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4903
4904   if (TREE_CODE (*op) == INDIRECT_REF)
4905     orig = REF_ORIGINAL (*op);
4906   if (!orig)
4907     orig = unshare_and_remove_ssa_names (*op);
4908
4909   *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4910
4911   /* Record the original reference, for purposes of alias analysis.  */
4912   REF_ORIGINAL (*op) = orig;
4913
4914   /* Virtual operands in the original statement may have to be renamed
4915      because of the replacement.  */
4916   mark_new_vars_to_rename (bsi_stmt (*bsi));
4917 }
4918
4919 /* Rewrites USE (address that is an iv) using candidate CAND.  */
4920
4921 static void
4922 rewrite_use_address (struct ivopts_data *data,
4923                      struct iv_use *use, struct iv_cand *cand)
4924 {
4925   tree comp = unshare_expr (get_computation (data->current_loop,
4926                                              use, cand));
4927   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4928   tree stmts;
4929   tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4930
4931   if (stmts)
4932     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4933
4934   rewrite_address_base (&bsi, use->op_p, op);
4935 }
4936
4937 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4938    candidate CAND.  */
4939
4940 static void
4941 rewrite_use_compare (struct ivopts_data *data,
4942                      struct iv_use *use, struct iv_cand *cand)
4943 {
4944   tree comp;
4945   tree *op_p, cond, op, stmts, bound;
4946   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4947   enum tree_code compare;
4948   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
4949
4950   bound = cp->value;
4951   if (bound)
4952     {
4953       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4954       tree var_type = TREE_TYPE (var);
4955
4956       compare = iv_elimination_compare (data, use);
4957       bound = fold_convert (var_type, bound);
4958       op = force_gimple_operand (unshare_expr (bound), &stmts,
4959                                  true, NULL_TREE);
4960
4961       if (stmts)
4962         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4963
4964       *use->op_p = build2 (compare, boolean_type_node, var, op);
4965       update_stmt (use->stmt);
4966       return;
4967     }
4968
4969   /* The induction variable elimination failed; just express the original
4970      giv.  */
4971   comp = unshare_expr (get_computation (data->current_loop, use, cand));
4972
4973   cond = *use->op_p;
4974   op_p = &TREE_OPERAND (cond, 0);
4975   if (TREE_CODE (*op_p) != SSA_NAME
4976       || zero_p (get_iv (data, *op_p)->step))
4977     op_p = &TREE_OPERAND (cond, 1);
4978
4979   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4980   if (stmts)
4981     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4982
4983   *op_p = op;
4984 }
4985
4986 /* Ensure that operand *OP_P may be used at the end of EXIT without
4987    violating loop closed ssa form.  */
4988
4989 static void
4990 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4991 {
4992   basic_block def_bb;
4993   struct loop *def_loop;
4994   tree phi, use;
4995
4996   use = USE_FROM_PTR (op_p);
4997   if (TREE_CODE (use) != SSA_NAME)
4998     return;
4999
5000   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5001   if (!def_bb)
5002     return;
5003
5004   def_loop = def_bb->loop_father;
5005   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5006     return;
5007
5008   /* Try finding a phi node that copies the value out of the loop.  */
5009   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5010     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5011       break;
5012
5013   if (!phi)
5014     {
5015       /* Create such a phi node.  */
5016       tree new_name = duplicate_ssa_name (use, NULL);
5017
5018       phi = create_phi_node (new_name, exit->dest);
5019       SSA_NAME_DEF_STMT (new_name) = phi;
5020       add_phi_arg (phi, use, exit);
5021     }
5022
5023   SET_USE (op_p, PHI_RESULT (phi));
5024 }
5025
5026 /* Ensure that operands of STMT may be used at the end of EXIT without
5027    violating loop closed ssa form.  */
5028
5029 static void
5030 protect_loop_closed_ssa_form (edge exit, tree stmt)
5031 {
5032   use_optype uses;
5033   vuse_optype vuses;
5034   v_may_def_optype v_may_defs;
5035   unsigned i;
5036
5037   uses = STMT_USE_OPS (stmt);
5038   for (i = 0; i < NUM_USES (uses); i++)
5039     protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
5040
5041   vuses = STMT_VUSE_OPS (stmt);
5042   for (i = 0; i < NUM_VUSES (vuses); i++)
5043     protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5044
5045   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5046   for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5047     protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5048 }
5049
5050 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5051    so that they are emitted on the correct place, and so that the loop closed
5052    ssa form is preserved.  */
5053
5054 static void
5055 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5056 {
5057   tree_stmt_iterator tsi;
5058   block_stmt_iterator bsi;
5059   tree phi, stmt, def, next;
5060
5061   if (!single_pred_p (exit->dest))
5062     split_loop_exit_edge (exit);
5063
5064   /* Ensure there is label in exit->dest, so that we can
5065      insert after it.  */
5066   tree_block_label (exit->dest);
5067   bsi = bsi_after_labels (exit->dest);
5068
5069   if (TREE_CODE (stmts) == STATEMENT_LIST)
5070     {
5071       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5072         {
5073           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5074           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5075         }
5076     }
5077   else
5078     {
5079       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5080       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5081     }
5082
5083   if (!op)
5084     return;
5085
5086   for (phi = phi_nodes (exit->dest); phi; phi = next)
5087     {
5088       next = PHI_CHAIN (phi);
5089
5090       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5091         {
5092           def = PHI_RESULT (phi);
5093           remove_statement (phi, false);
5094           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5095                         def, op);
5096           SSA_NAME_DEF_STMT (def) = stmt;
5097           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5098         }
5099     }
5100 }
5101
5102 /* Rewrites the final value of USE (that is only needed outside of the loop)
5103    using candidate CAND.  */
5104
5105 static void
5106 rewrite_use_outer (struct ivopts_data *data,
5107                    struct iv_use *use, struct iv_cand *cand)
5108 {
5109   edge exit;
5110   tree value, op, stmts, tgt;
5111   tree phi;
5112
5113   switch (TREE_CODE (use->stmt))
5114     {
5115     case PHI_NODE:
5116       tgt = PHI_RESULT (use->stmt);
5117       break;
5118     case MODIFY_EXPR:
5119       tgt = TREE_OPERAND (use->stmt, 0);
5120       break;
5121     default:
5122       gcc_unreachable ();
5123     }
5124
5125   exit = single_dom_exit (data->current_loop);
5126
5127   if (exit)
5128     {
5129       if (!cand->iv)
5130         {
5131           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5132           value = cp->value;
5133         }
5134       else
5135         value = get_computation_at (data->current_loop,
5136                                     use, cand, last_stmt (exit->src));
5137
5138       value = unshare_expr (value);
5139       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5140
5141       /* If we will preserve the iv anyway and we would need to perform
5142          some computation to replace the final value, do nothing.  */
5143       if (stmts && name_info (data, tgt)->preserve_biv)
5144         return;
5145
5146       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5147         {
5148           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5149
5150           if (USE_FROM_PTR (use_p) == tgt)
5151             SET_USE (use_p, op);
5152         }
5153
5154       if (stmts)
5155         compute_phi_arg_on_exit (exit, stmts, op);
5156
5157       /* Enable removal of the statement.  We cannot remove it directly,
5158          since we may still need the aliasing information attached to the
5159          ssa name defined by it.  */
5160       name_info (data, tgt)->iv->have_use_for = false;
5161       return;
5162     }
5163
5164   /* If the variable is going to be preserved anyway, there is nothing to
5165      do.  */
5166   if (name_info (data, tgt)->preserve_biv)
5167     return;
5168
5169   /* Otherwise we just need to compute the iv.  */
5170   rewrite_use_nonlinear_expr (data, use, cand);
5171 }
5172
5173 /* Rewrites USE using candidate CAND.  */
5174
5175 static void
5176 rewrite_use (struct ivopts_data *data,
5177              struct iv_use *use, struct iv_cand *cand)
5178 {
5179   switch (use->type)
5180     {
5181       case USE_NONLINEAR_EXPR:
5182         rewrite_use_nonlinear_expr (data, use, cand);
5183         break;
5184
5185       case USE_OUTER:
5186         rewrite_use_outer (data, use, cand);
5187         break;
5188
5189       case USE_ADDRESS:
5190         rewrite_use_address (data, use, cand);
5191         break;
5192
5193       case USE_COMPARE:
5194         rewrite_use_compare (data, use, cand);
5195         break;
5196
5197       default:
5198         gcc_unreachable ();
5199     }
5200   update_stmt (use->stmt);
5201 }
5202
5203 /* Rewrite the uses using the selected induction variables.  */
5204
5205 static void
5206 rewrite_uses (struct ivopts_data *data)
5207 {
5208   unsigned i;
5209   struct iv_cand *cand;
5210   struct iv_use *use;
5211
5212   for (i = 0; i < n_iv_uses (data); i++)
5213     {
5214       use = iv_use (data, i);
5215       cand = use->selected;
5216       gcc_assert (cand);
5217
5218       rewrite_use (data, use, cand);
5219     }
5220 }
5221
5222 /* Removes the ivs that are not used after rewriting.  */
5223
5224 static void
5225 remove_unused_ivs (struct ivopts_data *data)
5226 {
5227   unsigned j;
5228   bitmap_iterator bi;
5229
5230   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5231     {
5232       struct version_info *info;
5233
5234       info = ver_info (data, j);
5235       if (info->iv
5236           && !zero_p (info->iv->step)
5237           && !info->inv_id
5238           && !info->iv->have_use_for
5239           && !info->preserve_biv)
5240         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5241     }
5242 }
5243
5244 /* Frees data allocated by the optimization of a single loop.  */
5245
5246 static void
5247 free_loop_data (struct ivopts_data *data)
5248 {
5249   unsigned i, j;
5250   bitmap_iterator bi;
5251   tree obj;
5252
5253   htab_empty (data->niters);
5254
5255   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5256     {
5257       struct version_info *info;
5258
5259       info = ver_info (data, i);
5260       if (info->iv)
5261         free (info->iv);
5262       info->iv = NULL;
5263       info->has_nonlin_use = false;
5264       info->preserve_biv = false;
5265       info->inv_id = 0;
5266     }
5267   bitmap_clear (data->relevant);
5268   bitmap_clear (data->important_candidates);
5269
5270   for (i = 0; i < n_iv_uses (data); i++)
5271     {
5272       struct iv_use *use = iv_use (data, i);
5273
5274       free (use->iv);
5275       BITMAP_FREE (use->related_cands);
5276       for (j = 0; j < use->n_map_members; j++)
5277         if (use->cost_map[j].depends_on)
5278           BITMAP_FREE (use->cost_map[j].depends_on);
5279       free (use->cost_map);
5280       free (use);
5281     }
5282   VEC_truncate (iv_use_p, data->iv_uses, 0);
5283
5284   for (i = 0; i < n_iv_cands (data); i++)
5285     {
5286       struct iv_cand *cand = iv_cand (data, i);
5287
5288       if (cand->iv)
5289         free (cand->iv);
5290       free (cand);
5291     }
5292   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5293
5294   if (data->version_info_size < num_ssa_names)
5295     {
5296       data->version_info_size = 2 * num_ssa_names;
5297       free (data->version_info);
5298       data->version_info = xcalloc (data->version_info_size,
5299                                     sizeof (struct version_info));
5300     }
5301
5302   data->max_inv_id = 0;
5303
5304   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5305     SET_DECL_RTL (obj, NULL_RTX);
5306
5307   VEC_truncate (tree, decl_rtl_to_reset, 0);
5308 }
5309
5310 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5311    loop tree.  */
5312
5313 static void
5314 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5315 {
5316   unsigned i;
5317
5318   for (i = 1; i < loops->num; i++)
5319     if (loops->parray[i])
5320       {
5321         free (loops->parray[i]->aux);
5322         loops->parray[i]->aux = NULL;
5323       }
5324
5325   free_loop_data (data);
5326   free (data->version_info);
5327   BITMAP_FREE (data->relevant);
5328   BITMAP_FREE (data->important_candidates);
5329   htab_delete (data->niters);
5330
5331   VEC_free (tree, heap, decl_rtl_to_reset);
5332   VEC_free (iv_use_p, heap, data->iv_uses);
5333   VEC_free (iv_cand_p, heap, data->iv_candidates);
5334 }
5335
5336 /* Optimizes the LOOP.  Returns true if anything changed.  */
5337
5338 static bool
5339 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5340 {
5341   bool changed = false;
5342   struct iv_ca *iv_ca;
5343   edge exit;
5344
5345   data->current_loop = loop;
5346
5347   if (dump_file && (dump_flags & TDF_DETAILS))
5348     {
5349       fprintf (dump_file, "Processing loop %d\n", loop->num);
5350
5351       exit = single_dom_exit (loop);
5352       if (exit)
5353         {
5354           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5355                    exit->src->index, exit->dest->index);
5356           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5357           fprintf (dump_file, "\n");
5358         }
5359
5360       fprintf (dump_file, "\n");
5361     }
5362
5363   /* For each ssa name determines whether it behaves as an induction variable
5364      in some loop.  */
5365   if (!find_induction_variables (data))
5366     goto finish;
5367
5368   /* Finds interesting uses (item 1).  */
5369   find_interesting_uses (data);
5370   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5371     goto finish;
5372
5373   /* Finds candidates for the induction variables (item 2).  */
5374   find_iv_candidates (data);
5375
5376   /* Calculates the costs (item 3, part 1).  */
5377   determine_use_iv_costs (data);
5378   determine_iv_costs (data);
5379   determine_set_costs (data);
5380
5381   /* Find the optimal set of induction variables (item 3, part 2).  */
5382   iv_ca = find_optimal_iv_set (data);
5383   if (!iv_ca)
5384     goto finish;
5385   changed = true;
5386
5387   /* Create the new induction variables (item 4, part 1).  */
5388   create_new_ivs (data, iv_ca);
5389   iv_ca_free (&iv_ca);
5390
5391   /* Rewrite the uses (item 4, part 2).  */
5392   rewrite_uses (data);
5393
5394   /* Remove the ivs that are unused after rewriting.  */
5395   remove_unused_ivs (data);
5396
5397   /* We have changed the structure of induction variables; it might happen
5398      that definitions in the scev database refer to some of them that were
5399      eliminated.  */
5400   scev_reset ();
5401
5402 finish:
5403   free_loop_data (data);
5404
5405   return changed;
5406 }
5407
5408 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5409
5410 void
5411 tree_ssa_iv_optimize (struct loops *loops)
5412 {
5413   struct loop *loop;
5414   struct ivopts_data data;
5415
5416   tree_ssa_iv_optimize_init (loops, &data);
5417
5418   /* Optimize the loops starting with the innermost ones.  */
5419   loop = loops->tree_root;
5420   while (loop->inner)
5421     loop = loop->inner;
5422
5423   /* Scan the loops, inner ones first.  */
5424   while (loop != loops->tree_root)
5425     {
5426       if (dump_file && (dump_flags & TDF_DETAILS))
5427         flow_loop_dump (loop, dump_file, NULL, 1);
5428
5429       tree_ssa_iv_optimize_loop (&data, loop);
5430
5431       if (loop->next)
5432         {
5433           loop = loop->next;
5434           while (loop->inner)
5435             loop = loop->inner;
5436         }
5437       else
5438         loop = loop->outer;
5439     }
5440
5441   /* FIXME.  IV opts introduces new aliases and call-clobbered
5442      variables, which need to be renamed.  However, when we call the
5443      renamer, not all statements will be scanned for operands.  In
5444      particular, the newly introduced aliases may appear in statements
5445      that are considered "unmodified", so the renamer will not get a
5446      chance to rename those operands.
5447
5448      Work around this problem by forcing an operand re-scan on every
5449      statement.  This will not be necessary once the new operand
5450      scanner is implemented.  */
5451   if (need_ssa_update_p ())
5452     {
5453       basic_block bb;
5454       block_stmt_iterator si;
5455       FOR_EACH_BB (bb)
5456         for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5457           update_stmt (bsi_stmt (si));
5458     }
5459
5460   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5461   tree_ssa_iv_optimize_finalize (loops, &data);
5462 }