gcc/unroll.c

   1 /* Try to unroll loops, and split induction variables.
   2    Copyright (C) 1992, 93, 94, 95, 97, 1998 Free Software Foundation, Inc.
   3    Contributed by James E. Wilson, Cygnus Support/UC Berkeley.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* Try to unroll a loop, and split induction variables.
  23
  24    Loops for which the number of iterations can be calculated exactly are
  25    handled specially.  If the number of iterations times the insn_count is
  26    less than MAX_UNROLLED_INSNS, then the loop is unrolled completely.
  27    Otherwise, we try to unroll the loop a number of times modulo the number
  28    of iterations, so that only one exit test will be needed.  It is unrolled
  29    a number of times approximately equal to MAX_UNROLLED_INSNS divided by
  30    the insn count.
  31
  32    Otherwise, if the number of iterations can be calculated exactly at
  33    run time, and the loop is always entered at the top, then we try to
  34    precondition the loop.  That is, at run time, calculate how many times
  35    the loop will execute, and then execute the loop body a few times so
  36    that the remaining iterations will be some multiple of 4 (or 2 if the
  37    loop is large).  Then fall through to a loop unrolled 4 (or 2) times,
  38    with only one exit test needed at the end of the loop.
  39
  40    Otherwise, if the number of iterations can not be calculated exactly,
  41    not even at run time, then we still unroll the loop a number of times
  42    approximately equal to MAX_UNROLLED_INSNS divided by the insn count,
  43    but there must be an exit test after each copy of the loop body.
  44
  45    For each induction variable, which is dead outside the loop (replaceable)
  46    or for which we can easily calculate the final value, if we can easily
  47    calculate its value at each place where it is set as a function of the
  48    current loop unroll count and the variable's value at loop entry, then
  49    the induction variable is split into `N' different variables, one for
  50    each copy of the loop body.  One variable is live across the backward
  51    branch, and the others are all calculated as a function of this variable.
  52    This helps eliminate data dependencies, and leads to further opportunities
  53    for cse.  */
  54
  55 /* Possible improvements follow:  */
  56
  57 /* ??? Add an extra pass somewhere to determine whether unrolling will
  58    give any benefit.  E.g. after generating all unrolled insns, compute the
  59    cost of all insns and compare against cost of insns in rolled loop.
  60
  61    - On traditional architectures, unrolling a non-constant bound loop
  62      is a win if there is a giv whose only use is in memory addresses, the
  63      memory addresses can be split, and hence giv increments can be
  64      eliminated.
  65    - It is also a win if the loop is executed many times, and preconditioning
  66      can be performed for the loop.
  67    Add code to check for these and similar cases.  */
  68
  69 /* ??? Improve control of which loops get unrolled.  Could use profiling
  70    info to only unroll the most commonly executed loops.  Perhaps have
  71    a user specifyable option to control the amount of code expansion,
  72    or the percent of loops to consider for unrolling.  Etc.  */
  73
  74 /* ??? Look at the register copies inside the loop to see if they form a
  75    simple permutation.  If so, iterate the permutation until it gets back to
  76    the start state.  This is how many times we should unroll the loop, for
  77    best results, because then all register copies can be eliminated.
  78    For example, the lisp nreverse function should be unrolled 3 times
  79    while (this)
  80      {
  81        next = this->cdr;
  82        this->cdr = prev;
  83        prev = this;
  84        this = next;
  85      }
  86
  87    ??? The number of times to unroll the loop may also be based on data
  88    references in the loop.  For example, if we have a loop that references
  89    x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times.  */
  90
  91 /* ??? Add some simple linear equation solving capability so that we can
  92    determine the number of loop iterations for more complex loops.
  93    For example, consider this loop from gdb
  94    #define SWAP_TARGET_AND_HOST(buffer,len)
  95      {
  96        char tmp;
  97        char *p = (char *) buffer;
  98        char *q = ((char *) buffer) + len - 1;
  99        int iterations = (len + 1) >> 1;
 100        int i;
 101        for (p; p < q; p++, q--;)
 102          {
 103            tmp = *q;
 104            *q = *p;
 105            *p = tmp;
 106          }
 107      }
 108    Note that:
 109      start value = p = &buffer + current_iteration
 110      end value   = q = &buffer + len - 1 - current_iteration
 111    Given the loop exit test of "p < q", then there must be "q - p" iterations,
 112    set equal to zero and solve for number of iterations:
 113      q - p = len - 1 - 2*current_iteration = 0
 114      current_iteration = (len - 1) / 2
 115    Hence, there are (len - 1) / 2 (rounded up to the nearest integer)
 116    iterations of this loop.  */
 117
 118 /* ??? Currently, no labels are marked as loop invariant when doing loop
 119    unrolling.  This is because an insn inside the loop, that loads the address
 120    of a label inside the loop into a register, could be moved outside the loop
 121    by the invariant code motion pass if labels were invariant.  If the loop
 122    is subsequently unrolled, the code will be wrong because each unrolled
 123    body of the loop will use the same address, whereas each actually needs a
 124    different address.  A case where this happens is when a loop containing
 125    a switch statement is unrolled.
 126
 127    It would be better to let labels be considered invariant.  When we
 128    unroll loops here, check to see if any insns using a label local to the
 129    loop were moved before the loop.  If so, then correct the problem, by
 130    moving the insn back into the loop, or perhaps replicate the insn before
 131    the loop, one copy for each time the loop is unrolled.  */
 132
 133 /* The prime factors looked for when trying to unroll a loop by some
 134    number which is modulo the total number of iterations.  Just checking
 135    for these 4 prime factors will find at least one factor for 75% of
 136    all numbers theoretically.  Practically speaking, this will succeed
 137    almost all of the time since loops are generally a multiple of 2
 138    and/or 5.  */
 139
 140 #define NUM_FACTORS 4
 141
 142 struct _factor { int factor, count; } factors[NUM_FACTORS]
 143   = { {2, 0}, {3, 0}, {5, 0}, {7, 0}};
 144
 145 /* Describes the different types of loop unrolling performed.  */
 146
 147 enum unroll_types { UNROLL_COMPLETELY, UNROLL_MODULO, UNROLL_NAIVE };
 148
 149 #include "config.h"
 150 #include "system.h"
 151 #include "rtl.h"
 152 #include "insn-config.h"
 153 #include "integrate.h"
 154 #include "regs.h"
 155 #include "recog.h"
 156 #include "flags.h"
 157 #include "expr.h"
 158 #include "loop.h"
 159
 160 /* This controls which loops are unrolled, and by how much we unroll
 161    them.  */
 162
 163 #ifndef MAX_UNROLLED_INSNS
 164 #define MAX_UNROLLED_INSNS 100
 165 #endif
 166
 167 /* Indexed by register number, if non-zero, then it contains a pointer
 168    to a struct induction for a DEST_REG giv which has been combined with
 169    one of more address givs.  This is needed because whenever such a DEST_REG
 170    giv is modified, we must modify the value of all split address givs
 171    that were combined with this DEST_REG giv.  */
 172
 173 static struct induction **addr_combined_regs;
 174
 175 /* Indexed by register number, if this is a splittable induction variable,
 176    then this will hold the current value of the register, which depends on the
 177    iteration number.  */
 178
 179 static rtx *splittable_regs;
 180
 181 /* Indexed by register number, if this is a splittable induction variable,
 182    then this will hold the number of instructions in the loop that modify
 183    the induction variable.  Used to ensure that only the last insn modifying
 184    a split iv will update the original iv of the dest.  */
 185
 186 static int *splittable_regs_updates;
 187
 188 /* Values describing the current loop's iteration variable.  These are set up
 189    by loop_iterations, and used by precondition_loop_p.  */
 190
 191 static rtx loop_iteration_var;
 192 static rtx loop_initial_value;
 193 static rtx loop_increment;
 194 static rtx loop_final_value;
 195 static enum rtx_code loop_comparison_code;
 196
 197 /* Forward declarations.  */
 198
 199 static void init_reg_map PROTO((struct inline_remap *, int));
 200 static int precondition_loop_p PROTO((rtx *, rtx *, rtx *, rtx, rtx));
 201 static rtx calculate_giv_inc PROTO((rtx, rtx, int));
 202 static rtx initial_reg_note_copy PROTO((rtx, struct inline_remap *));
 203 static void final_reg_note_copy PROTO((rtx, struct inline_remap *));
 204 static void copy_loop_body PROTO((rtx, rtx, struct inline_remap *, rtx, int,
 205                                   enum unroll_types, rtx, rtx, rtx, rtx));
 206 void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
 207 static rtx approx_final_value PROTO((enum rtx_code, rtx, int *, int *));
 208 static int find_splittable_regs PROTO((enum unroll_types, rtx, rtx, rtx, int));
 209 static int find_splittable_givs PROTO((struct iv_class *,enum unroll_types,
 210                                        rtx, rtx, rtx, int));
 211 static int reg_dead_after_loop PROTO((rtx, rtx, rtx));
 212 static rtx fold_rtx_mult_add PROTO((rtx, rtx, rtx, enum machine_mode));
 213 static int verify_addresses PROTO((struct induction *, rtx, int));
 214 static rtx remap_split_bivs PROTO((rtx));
 215
 216 /* Try to unroll one loop and split induction variables in the loop.
 217
 218    The loop is described by the arguments LOOP_END, INSN_COUNT, and
 219    LOOP_START.  END_INSERT_BEFORE indicates where insns should be added
 220    which need to be executed when the loop falls through.  STRENGTH_REDUCTION_P
 221    indicates whether information generated in the strength reduction pass
 222    is available.
 223
 224    This function is intended to be called from within `strength_reduce'
 225    in loop.c.  */
 226
 227 void
 228 unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
 229              strength_reduce_p)
 230      rtx loop_end;
 231      int insn_count;
 232      rtx loop_start;
 233      rtx end_insert_before;
 234      int strength_reduce_p;
 235 {
 236   int i, j, temp;
 237   int unroll_number = 1;
 238   rtx copy_start, copy_end;
 239   rtx insn, sequence, pattern, tem;
 240   int max_labelno, max_insnno;
 241   rtx insert_before;
 242   struct inline_remap *map;
 243   char *local_label;
 244   char *local_regno;
 245   int maxregnum;
 246   int new_maxregnum;
 247   rtx exit_label = 0;
 248   rtx start_label;
 249   struct iv_class *bl;
 250   int splitting_not_safe = 0;
 251   enum unroll_types unroll_type;
 252   int loop_preconditioned = 0;
 253   rtx safety_label;
 254   /* This points to the last real insn in the loop, which should be either
 255      a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional
 256      jumps).  */
 257   rtx last_loop_insn;
 258
 259   /* Don't bother unrolling huge loops.  Since the minimum factor is
 260      two, loops greater than one half of MAX_UNROLLED_INSNS will never
 261      be unrolled.  */
 262   if (insn_count > MAX_UNROLLED_INSNS / 2)
 263     {
 264       if (loop_dump_stream)
 265         fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n");
 266       return;
 267     }
 268
 269   /* When emitting debugger info, we can't unroll loops with unequal numbers
 270      of block_beg and block_end notes, because that would unbalance the block
 271      structure of the function.  This can happen as a result of the
 272      "if (foo) bar; else break;" optimization in jump.c.  */
 273   /* ??? Gcc has a general policy that -g is never supposed to change the code
 274      that the compiler emits, so we must disable this optimization always,
 275      even if debug info is not being output.  This is rare, so this should
 276      not be a significant performance problem.  */
 277
 278   if (1 /* write_symbols != NO_DEBUG */)
 279     {
 280       int block_begins = 0;
 281       int block_ends = 0;
 282
 283       for (insn = loop_start; insn != loop_end; insn = NEXT_INSN (insn))
 284         {
 285           if (GET_CODE (insn) == NOTE)
 286             {
 287               if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
 288                 block_begins++;
 289               else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
 290                 block_ends++;
 291             }
 292         }
 293
 294       if (block_begins != block_ends)
 295         {
 296           if (loop_dump_stream)
 297             fprintf (loop_dump_stream,
 298                      "Unrolling failure: Unbalanced block notes.\n");
 299           return;
 300         }
 301     }
 302
 303   /* Determine type of unroll to perform.  Depends on the number of iterations
 304      and the size of the loop.  */
 305
 306   /* If there is no strength reduce info, then set loop_n_iterations to zero.
 307      This can happen if strength_reduce can't find any bivs in the loop.
 308      A value of zero indicates that the number of iterations could not be
 309      calculated.  */
 310
 311   if (! strength_reduce_p)
 312     loop_n_iterations = 0;
 313
 314   if (loop_dump_stream && loop_n_iterations > 0)
 315     fprintf (loop_dump_stream,
 316              "Loop unrolling: %d iterations.\n", loop_n_iterations);
 317
 318   /* Find and save a pointer to the last nonnote insn in the loop.  */
 319
 320   last_loop_insn = prev_nonnote_insn (loop_end);
 321
 322   /* Calculate how many times to unroll the loop.  Indicate whether or
 323      not the loop is being completely unrolled.  */
 324
 325   if (loop_n_iterations == 1)
 326     {
 327       /* If number of iterations is exactly 1, then eliminate the compare and
 328          branch at the end of the loop since they will never be taken.
 329          Then return, since no other action is needed here.  */
 330
 331       /* If the last instruction is not a BARRIER or a JUMP_INSN, then
 332          don't do anything.  */
 333
 334       if (GET_CODE (last_loop_insn) == BARRIER)
 335         {
 336           /* Delete the jump insn.  This will delete the barrier also.  */
 337           delete_insn (PREV_INSN (last_loop_insn));
 338         }
 339       else if (GET_CODE (last_loop_insn) == JUMP_INSN)
 340         {
 341 #ifdef HAVE_cc0
 342           /* The immediately preceding insn is a compare which must be
 343              deleted.  */
 344           delete_insn (last_loop_insn);
 345           delete_insn (PREV_INSN (last_loop_insn));
 346 #else
 347           /* The immediately preceding insn may not be the compare, so don't
 348              delete it.  */
 349           delete_insn (last_loop_insn);
 350 #endif
 351         }
 352       return;
 353     }
 354   else if (loop_n_iterations > 0
 355       && loop_n_iterations * insn_count < MAX_UNROLLED_INSNS)
 356     {
 357       unroll_number = loop_n_iterations;
 358       unroll_type = UNROLL_COMPLETELY;
 359     }
 360   else if (loop_n_iterations > 0)
 361     {
 362       /* Try to factor the number of iterations.  Don't bother with the
 363          general case, only using 2, 3, 5, and 7 will get 75% of all
 364          numbers theoretically, and almost all in practice.  */
 365
 366       for (i = 0; i < NUM_FACTORS; i++)
 367         factors[i].count = 0;
 368
 369       temp = loop_n_iterations;
 370       for (i = NUM_FACTORS - 1; i >= 0; i--)
 371         while (temp % factors[i].factor == 0)
 372           {
 373             factors[i].count++;
 374             temp = temp / factors[i].factor;
 375           }
 376
 377       /* Start with the larger factors first so that we generally
 378          get lots of unrolling.  */
 379
 380       unroll_number = 1;
 381       temp = insn_count;
 382       for (i = 3; i >= 0; i--)
 383         while (factors[i].count--)
 384           {
 385             if (temp * factors[i].factor < MAX_UNROLLED_INSNS)
 386               {
 387                 unroll_number *= factors[i].factor;
 388                 temp *= factors[i].factor;
 389               }
 390             else
 391               break;
 392           }
 393
 394       /* If we couldn't find any factors, then unroll as in the normal
 395          case.  */
 396       if (unroll_number == 1)
 397         {
 398           if (loop_dump_stream)
 399             fprintf (loop_dump_stream,
 400                      "Loop unrolling: No factors found.\n");
 401         }
 402       else
 403         unroll_type = UNROLL_MODULO;
 404     }
 405
 406
 407   /* Default case, calculate number of times to unroll loop based on its
 408      size.  */
 409   if (unroll_number == 1)
 410     {
 411       if (8 * insn_count < MAX_UNROLLED_INSNS)
 412         unroll_number = 8;
 413       else if (4 * insn_count < MAX_UNROLLED_INSNS)
 414         unroll_number = 4;
 415       else
 416         unroll_number = 2;
 417
 418       unroll_type = UNROLL_NAIVE;
 419     }
 420
 421   /* Now we know how many times to unroll the loop.  */
 422
 423   if (loop_dump_stream)
 424     fprintf (loop_dump_stream,
 425              "Unrolling loop %d times.\n", unroll_number);
 426
 427
 428   if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO)
 429     {
 430       /* Loops of these types should never start with a jump down to
 431          the exit condition test.  For now, check for this case just to
 432          be sure.  UNROLL_NAIVE loops can be of this form, this case is
 433          handled below.  */
 434       insn = loop_start;
 435       while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
 436         insn = NEXT_INSN (insn);
 437       if (GET_CODE (insn) == JUMP_INSN)
 438         abort ();
 439     }
 440
 441   if (unroll_type == UNROLL_COMPLETELY)
 442     {
 443       /* Completely unrolling the loop:  Delete the compare and branch at
 444          the end (the last two instructions).   This delete must done at the
 445          very end of loop unrolling, to avoid problems with calls to
 446          back_branch_in_range_p, which is called by find_splittable_regs.
 447          All increments of splittable bivs/givs are changed to load constant
 448          instructions.  */
 449
 450       copy_start = loop_start;
 451
 452       /* Set insert_before to the instruction immediately after the JUMP_INSN
 453          (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of
 454          the loop will be correctly handled by copy_loop_body.  */
 455       insert_before = NEXT_INSN (last_loop_insn);
 456
 457       /* Set copy_end to the insn before the jump at the end of the loop.  */
 458       if (GET_CODE (last_loop_insn) == BARRIER)
 459         copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
 460       else if (GET_CODE (last_loop_insn) == JUMP_INSN)
 461         {
 462 #ifdef HAVE_cc0
 463           /* The instruction immediately before the JUMP_INSN is a compare
 464              instruction which we do not want to copy.  */
 465           copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
 466 #else
 467           /* The instruction immediately before the JUMP_INSN may not be the
 468              compare, so we must copy it.  */
 469           copy_end = PREV_INSN (last_loop_insn);
 470 #endif
 471         }
 472       else
 473         {
 474           /* We currently can't unroll a loop if it doesn't end with a
 475              JUMP_INSN.  There would need to be a mechanism that recognizes
 476              this case, and then inserts a jump after each loop body, which
 477              jumps to after the last loop body.  */
 478           if (loop_dump_stream)
 479             fprintf (loop_dump_stream,
 480                      "Unrolling failure: loop does not end with a JUMP_INSN.\n");
 481           return;
 482         }
 483     }
 484   else if (unroll_type == UNROLL_MODULO)
 485     {
 486       /* Partially unrolling the loop:  The compare and branch at the end
 487          (the last two instructions) must remain.  Don't copy the compare
 488          and branch instructions at the end of the loop.  Insert the unrolled
 489          code immediately before the compare/branch at the end so that the
 490          code will fall through to them as before.  */
 491
 492       copy_start = loop_start;
 493
 494       /* Set insert_before to the jump insn at the end of the loop.
 495          Set copy_end to before the jump insn at the end of the loop.  */
 496       if (GET_CODE (last_loop_insn) == BARRIER)
 497         {
 498           insert_before = PREV_INSN (last_loop_insn);
 499           copy_end = PREV_INSN (insert_before);
 500         }
 501       else if (GET_CODE (last_loop_insn) == JUMP_INSN)
 502         {
 503 #ifdef HAVE_cc0
 504           /* The instruction immediately before the JUMP_INSN is a compare
 505              instruction which we do not want to copy or delete.  */
 506           insert_before = PREV_INSN (last_loop_insn);
 507           copy_end = PREV_INSN (insert_before);
 508 #else
 509           /* The instruction immediately before the JUMP_INSN may not be the
 510              compare, so we must copy it.  */
 511           insert_before = last_loop_insn;
 512           copy_end = PREV_INSN (last_loop_insn);
 513 #endif
 514         }
 515       else
 516         {
 517           /* We currently can't unroll a loop if it doesn't end with a
 518              JUMP_INSN.  There would need to be a mechanism that recognizes
 519              this case, and then inserts a jump after each loop body, which
 520              jumps to after the last loop body.  */
 521           if (loop_dump_stream)
 522             fprintf (loop_dump_stream,
 523                      "Unrolling failure: loop does not end with a JUMP_INSN.\n");
 524           return;
 525         }
 526     }
 527   else
 528     {
 529       /* Normal case: Must copy the compare and branch instructions at the
 530          end of the loop.  */
 531
 532       if (GET_CODE (last_loop_insn) == BARRIER)
 533         {
 534           /* Loop ends with an unconditional jump and a barrier.
 535              Handle this like above, don't copy jump and barrier.
 536              This is not strictly necessary, but doing so prevents generating
 537              unconditional jumps to an immediately following label.
 538
 539              This will be corrected below if the target of this jump is
 540              not the start_label.  */
 541
 542           insert_before = PREV_INSN (last_loop_insn);
 543           copy_end = PREV_INSN (insert_before);
 544         }
 545       else if (GET_CODE (last_loop_insn) == JUMP_INSN)
 546         {
 547           /* Set insert_before to immediately after the JUMP_INSN, so that
 548              NOTEs at the end of the loop will be correctly handled by
 549              copy_loop_body.  */
 550           insert_before = NEXT_INSN (last_loop_insn);
 551           copy_end = last_loop_insn;
 552         }
 553       else
 554         {
 555           /* We currently can't unroll a loop if it doesn't end with a
 556              JUMP_INSN.  There would need to be a mechanism that recognizes
 557              this case, and then inserts a jump after each loop body, which
 558              jumps to after the last loop body.  */
 559           if (loop_dump_stream)
 560             fprintf (loop_dump_stream,
 561                      "Unrolling failure: loop does not end with a JUMP_INSN.\n");
 562           return;
 563         }
 564
 565       /* If copying exit test branches because they can not be eliminated,
 566          then must convert the fall through case of the branch to a jump past
 567          the end of the loop.  Create a label to emit after the loop and save
 568          it for later use.  Do not use the label after the loop, if any, since
 569          it might be used by insns outside the loop, or there might be insns
 570          added before it later by final_[bg]iv_value which must be after
 571          the real exit label.  */
 572       exit_label = gen_label_rtx ();
 573
 574       insn = loop_start;
 575       while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
 576         insn = NEXT_INSN (insn);
 577
 578       if (GET_CODE (insn) == JUMP_INSN)
 579         {
 580           /* The loop starts with a jump down to the exit condition test.
 581              Start copying the loop after the barrier following this
 582              jump insn.  */
 583           copy_start = NEXT_INSN (insn);
 584
 585           /* Splitting induction variables doesn't work when the loop is
 586              entered via a jump to the bottom, because then we end up doing
 587              a comparison against a new register for a split variable, but
 588              we did not execute the set insn for the new register because
 589              it was skipped over.  */
 590           splitting_not_safe = 1;
 591           if (loop_dump_stream)
 592             fprintf (loop_dump_stream,
 593                      "Splitting not safe, because loop not entered at top.\n");
 594         }
 595       else
 596         copy_start = loop_start;
 597     }
 598
 599   /* This should always be the first label in the loop.  */
 600   start_label = NEXT_INSN (copy_start);
 601   /* There may be a line number note and/or a loop continue note here.  */
 602   while (GET_CODE (start_label) == NOTE)
 603     start_label = NEXT_INSN (start_label);
 604   if (GET_CODE (start_label) != CODE_LABEL)
 605     {
 606       /* This can happen as a result of jump threading.  If the first insns in
 607          the loop test the same condition as the loop's backward jump, or the
 608          opposite condition, then the backward jump will be modified to point
 609          to elsewhere, and the loop's start label is deleted.
 610
 611          This case currently can not be handled by the loop unrolling code.  */
 612
 613       if (loop_dump_stream)
 614         fprintf (loop_dump_stream,
 615                  "Unrolling failure: unknown insns between BEG note and loop label.\n");
 616       return;
 617     }
 618   if (LABEL_NAME (start_label))
 619     {
 620       /* The jump optimization pass must have combined the original start label
 621          with a named label for a goto.  We can't unroll this case because
 622          jumps which go to the named label must be handled differently than
 623          jumps to the loop start, and it is impossible to differentiate them
 624          in this case.  */
 625       if (loop_dump_stream)
 626         fprintf (loop_dump_stream,
 627                  "Unrolling failure: loop start label is gone\n");
 628       return;
 629     }
 630
 631   if (unroll_type == UNROLL_NAIVE
 632       && GET_CODE (last_loop_insn) == BARRIER
 633       && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn)))
 634     {
 635       /* In this case, we must copy the jump and barrier, because they will
 636          not be converted to jumps to an immediately following label.  */
 637
 638       insert_before = NEXT_INSN (last_loop_insn);
 639       copy_end = last_loop_insn;
 640     }
 641
 642   if (unroll_type == UNROLL_NAIVE
 643       && GET_CODE (last_loop_insn) == JUMP_INSN
 644       && start_label != JUMP_LABEL (last_loop_insn))
 645     {
 646       /* ??? The loop ends with a conditional branch that does not branch back
 647          to the loop start label.  In this case, we must emit an unconditional
 648          branch to the loop exit after emitting the final branch.
 649          copy_loop_body does not have support for this currently, so we
 650          give up.  It doesn't seem worthwhile to unroll anyways since
 651          unrolling would increase the number of branch instructions
 652          executed.  */
 653       if (loop_dump_stream)
 654         fprintf (loop_dump_stream,
 655                  "Unrolling failure: final conditional branch not to loop start\n");
 656       return;
 657     }
 658
 659   /* Allocate a translation table for the labels and insn numbers.
 660      They will be filled in as we copy the insns in the loop.  */
 661
 662   max_labelno = max_label_num ();
 663   max_insnno = get_max_uid ();
 664
 665   map = (struct inline_remap *) alloca (sizeof (struct inline_remap));
 666
 667   map->integrating = 0;
 668
 669   /* Allocate the label map.  */
 670
 671   if (max_labelno > 0)
 672     {
 673       map->label_map = (rtx *) alloca (max_labelno * sizeof (rtx));
 674
 675       local_label = (char *) alloca (max_labelno);
 676       bzero (local_label, max_labelno);
 677     }
 678   else
 679     map->label_map = 0;
 680
 681   /* Search the loop and mark all local labels, i.e. the ones which have to
 682      be distinct labels when copied.  For all labels which might be
 683      non-local, set their label_map entries to point to themselves.
 684      If they happen to be local their label_map entries will be overwritten
 685      before the loop body is copied.  The label_map entries for local labels
 686      will be set to a different value each time the loop body is copied.  */
 687
 688   for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn))
 689     {
 690       if (GET_CODE (insn) == CODE_LABEL)
 691         local_label[CODE_LABEL_NUMBER (insn)] = 1;
 692       else if (GET_CODE (insn) == JUMP_INSN)
 693         {
 694           if (JUMP_LABEL (insn))
 695             set_label_in_map (map,
 696                               CODE_LABEL_NUMBER (JUMP_LABEL (insn)),
 697                               JUMP_LABEL (insn));
 698           else if (GET_CODE (PATTERN (insn)) == ADDR_VEC
 699                    || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
 700             {
 701               rtx pat = PATTERN (insn);
 702               int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC;
 703               int len = XVECLEN (pat, diff_vec_p);
 704               rtx label;
 705
 706               for (i = 0; i < len; i++)
 707                 {
 708                   label = XEXP (XVECEXP (pat, diff_vec_p, i), 0);
 709                   set_label_in_map (map,
 710                                     CODE_LABEL_NUMBER (label),
 711                                     label);
 712                 }
 713             }
 714         }
 715     }
 716
 717   /* Allocate space for the insn map.  */
 718
 719   map->insn_map = (rtx *) alloca (max_insnno * sizeof (rtx));
 720
 721   /* Set this to zero, to indicate that we are doing loop unrolling,
 722      not function inlining.  */
 723   map->inline_target = 0;
 724
 725   /* The register and constant maps depend on the number of registers
 726      present, so the final maps can't be created until after
 727      find_splittable_regs is called.  However, they are needed for
 728      preconditioning, so we create temporary maps when preconditioning
 729      is performed.  */
 730
 731   /* The preconditioning code may allocate two new pseudo registers.  */
 732   maxregnum = max_reg_num ();
 733
 734   /* Allocate and zero out the splittable_regs and addr_combined_regs
 735      arrays.  These must be zeroed here because they will be used if
 736      loop preconditioning is performed, and must be zero for that case.
 737
 738      It is safe to do this here, since the extra registers created by the
 739      preconditioning code and find_splittable_regs will never be used
 740      to access the splittable_regs[] and addr_combined_regs[] arrays.  */
 741
 742   splittable_regs = (rtx *) alloca (maxregnum * sizeof (rtx));
 743   bzero ((char *) splittable_regs, maxregnum * sizeof (rtx));
 744   splittable_regs_updates = (int *) alloca (maxregnum * sizeof (int));
 745   bzero ((char *) splittable_regs_updates, maxregnum * sizeof (int));
 746   addr_combined_regs
 747     = (struct induction **) alloca (maxregnum * sizeof (struct induction *));
 748   bzero ((char *) addr_combined_regs, maxregnum * sizeof (struct induction *));
 749   /* We must limit it to max_reg_before_loop, because only these pseudo
 750      registers have valid regno_first_uid info.  Any register created after
 751      that is unlikely to be local to the loop anyways.  */
 752   local_regno = (char *) alloca (max_reg_before_loop);
 753   bzero (local_regno, max_reg_before_loop);
 754
 755   /* Mark all local registers, i.e. the ones which are referenced only
 756      inside the loop.  */
 757   if (INSN_UID (copy_end) < max_uid_for_loop)
 758   {
 759     int copy_start_luid = INSN_LUID (copy_start);
 760     int copy_end_luid = INSN_LUID (copy_end);
 761
 762     /* If a register is used in the jump insn, we must not duplicate it
 763        since it will also be used outside the loop.  */
 764     if (GET_CODE (copy_end) == JUMP_INSN)
 765       copy_end_luid--;
 766     /* If copy_start points to the NOTE that starts the loop, then we must
 767        use the next luid, because invariant pseudo-regs moved out of the loop
 768        have their lifetimes modified to start here, but they are not safe
 769        to duplicate.  */
 770     if (copy_start == loop_start)
 771       copy_start_luid++;
 772
 773     /* If a pseudo's lifetime is entirely contained within this loop, then we
 774        can use a different pseudo in each unrolled copy of the loop.  This
 775        results in better code.  */
 776     for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; ++j)
 777       if (REGNO_FIRST_UID (j) > 0 && REGNO_FIRST_UID (j) <= max_uid_for_loop
 778           && uid_luid[REGNO_FIRST_UID (j)] >= copy_start_luid
 779           && REGNO_LAST_UID (j) > 0 && REGNO_LAST_UID (j) <= max_uid_for_loop
 780           && uid_luid[REGNO_LAST_UID (j)] <= copy_end_luid)
 781         {
 782           /* However, we must also check for loop-carried dependencies.
 783              If the value the pseudo has at the end of iteration X is
 784              used by iteration X+1, then we can not use a different pseudo
 785              for each unrolled copy of the loop.  */
 786           /* A pseudo is safe if regno_first_uid is a set, and this
 787              set dominates all instructions from regno_first_uid to
 788              regno_last_uid.  */
 789           /* ??? This check is simplistic.  We would get better code if
 790              this check was more sophisticated.  */
 791           if (set_dominates_use (j, REGNO_FIRST_UID (j), REGNO_LAST_UID (j),
 792                                  copy_start, copy_end))
 793             local_regno[j] = 1;
 794
 795           if (loop_dump_stream)
 796             {
 797               if (local_regno[j])
 798                 fprintf (loop_dump_stream, "Marked reg %d as local\n", j);
 799               else
 800                 fprintf (loop_dump_stream, "Did not mark reg %d as local\n",
 801                          j);
 802             }
 803         }
 804   }
 805
 806   /* If this loop requires exit tests when unrolled, check to see if we
 807      can precondition the loop so as to make the exit tests unnecessary.
 808      Just like variable splitting, this is not safe if the loop is entered
 809      via a jump to the bottom.  Also, can not do this if no strength
 810      reduce info, because precondition_loop_p uses this info.  */
 811
 812   /* Must copy the loop body for preconditioning before the following
 813      find_splittable_regs call since that will emit insns which need to
 814      be after the preconditioned loop copies, but immediately before the
 815      unrolled loop copies.  */
 816
 817   /* Also, it is not safe to split induction variables for the preconditioned
 818      copies of the loop body.  If we split induction variables, then the code
 819      assumes that each induction variable can be represented as a function
 820      of its initial value and the loop iteration number.  This is not true
 821      in this case, because the last preconditioned copy of the loop body
 822      could be any iteration from the first up to the `unroll_number-1'th,
 823      depending on the initial value of the iteration variable.  Therefore
 824      we can not split induction variables here, because we can not calculate
 825      their value.  Hence, this code must occur before find_splittable_regs
 826      is called.  */
 827
 828   if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p)
 829     {
 830       rtx initial_value, final_value, increment;
 831
 832       if (precondition_loop_p (&initial_value, &final_value, &increment,
 833                                loop_start, loop_end))
 834         {
 835           register rtx diff ;
 836           enum machine_mode mode;
 837           rtx *labels;
 838           int abs_inc, neg_inc;
 839
 840           map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx));
 841
 842           map->const_equiv_map = (rtx *) alloca (maxregnum * sizeof (rtx));
 843           map->const_age_map = (unsigned *) alloca (maxregnum
 844                                                     * sizeof (unsigned));
 845           map->const_equiv_map_size = maxregnum;
 846           global_const_equiv_map = map->const_equiv_map;
 847           global_const_equiv_map_size = maxregnum;
 848
 849           init_reg_map (map, maxregnum);
 850
 851           /* Limit loop unrolling to 4, since this will make 7 copies of
 852              the loop body.  */
 853           if (unroll_number > 4)
 854             unroll_number = 4;
 855
 856           /* Save the absolute value of the increment, and also whether or
 857              not it is negative.  */
 858           neg_inc = 0;
 859           abs_inc = INTVAL (increment);
 860           if (abs_inc < 0)
 861             {
 862               abs_inc = - abs_inc;
 863               neg_inc = 1;
 864             }
 865
 866           start_sequence ();
 867
 868           /* Decide what mode to do these calculations in.  Choose the larger
 869              of final_value's mode and initial_value's mode, or a full-word if
 870              both are constants.  */
 871           mode = GET_MODE (final_value);
 872           if (mode == VOIDmode)
 873             {
 874               mode = GET_MODE (initial_value);
 875               if (mode == VOIDmode)
 876                 mode = word_mode;
 877             }
 878           else if (mode != GET_MODE (initial_value)
 879                    && (GET_MODE_SIZE (mode)
 880                        < GET_MODE_SIZE (GET_MODE (initial_value))))
 881             mode = GET_MODE (initial_value);
 882
 883           /* Calculate the difference between the final and initial values.
 884              Final value may be a (plus (reg x) (const_int 1)) rtx.
 885              Let the following cse pass simplify this if initial value is
 886              a constant.
 887
 888              We must copy the final and initial values here to avoid
 889              improperly shared rtl.  */
 890
 891           diff = expand_binop (mode, sub_optab, copy_rtx (final_value),
 892                                copy_rtx (initial_value), NULL_RTX, 0,
 893                                OPTAB_LIB_WIDEN);
 894
 895           /* Now calculate (diff % (unroll * abs (increment))) by using an
 896              and instruction.  */
 897           diff = expand_binop (GET_MODE (diff), and_optab, diff,
 898                                GEN_INT (unroll_number * abs_inc - 1),
 899                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 900
 901           /* Now emit a sequence of branches to jump to the proper precond
 902              loop entry point.  */
 903
 904           labels = (rtx *) alloca (sizeof (rtx) * unroll_number);
 905           for (i = 0; i < unroll_number; i++)
 906             labels[i] = gen_label_rtx ();
 907
 908           /* Check for the case where the initial value is greater than or
 909              equal to the final value.  In that case, we want to execute
 910              exactly one loop iteration.  The code below will fail for this
 911              case.  This check does not apply if the loop has a NE
 912              comparison at the end.  */
 913
 914           if (loop_comparison_code != NE)
 915             {
 916               emit_cmp_insn (initial_value, final_value, neg_inc ? LE : GE,
 917                              NULL_RTX, mode, 0, 0);
 918               if (neg_inc)
 919                 emit_jump_insn (gen_ble (labels[1]));
 920               else
 921                 emit_jump_insn (gen_bge (labels[1]));
 922               JUMP_LABEL (get_last_insn ()) = labels[1];
 923               LABEL_NUSES (labels[1])++;
 924             }
 925
 926           /* Assuming the unroll_number is 4, and the increment is 2, then
 927              for a negative increment:  for a positive increment:
 928              diff = 0,1   precond 0     diff = 0,7   precond 0
 929              diff = 2,3   precond 3     diff = 1,2   precond 1
 930              diff = 4,5   precond 2     diff = 3,4   precond 2
 931              diff = 6,7   precond 1     diff = 5,6   precond 3  */
 932
 933           /* We only need to emit (unroll_number - 1) branches here, the
 934              last case just falls through to the following code.  */
 935
 936           /* ??? This would give better code if we emitted a tree of branches
 937              instead of the current linear list of branches.  */
 938
 939           for (i = 0; i < unroll_number - 1; i++)
 940             {
 941               int cmp_const;
 942               enum rtx_code cmp_code;
 943
 944               /* For negative increments, must invert the constant compared
 945                  against, except when comparing against zero.  */
 946               if (i == 0)
 947                 {
 948                   cmp_const = 0;
 949                   cmp_code = EQ;
 950                 }
 951               else if (neg_inc)
 952                 {
 953                   cmp_const = unroll_number - i;
 954                   cmp_code = GE;
 955                 }
 956               else
 957                 {
 958                   cmp_const = i;
 959                   cmp_code = LE;
 960                 }
 961
 962               emit_cmp_insn (diff, GEN_INT (abs_inc * cmp_const),
 963                              cmp_code, NULL_RTX, mode, 0, 0);
 964
 965               if (i == 0)
 966                 emit_jump_insn (gen_beq (labels[i]));
 967               else if (neg_inc)
 968                 emit_jump_insn (gen_bge (labels[i]));
 969               else
 970                 emit_jump_insn (gen_ble (labels[i]));
 971               JUMP_LABEL (get_last_insn ()) = labels[i];
 972               LABEL_NUSES (labels[i])++;
 973             }
 974
 975           /* If the increment is greater than one, then we need another branch,
 976              to handle other cases equivalent to 0.  */
 977
 978           /* ??? This should be merged into the code above somehow to help
 979              simplify the code here, and reduce the number of branches emitted.
 980              For the negative increment case, the branch here could easily
 981              be merged with the `0' case branch above.  For the positive
 982              increment case, it is not clear how this can be simplified.  */
 983
 984           if (abs_inc != 1)
 985             {
 986               int cmp_const;
 987               enum rtx_code cmp_code;
 988
 989               if (neg_inc)
 990                 {
 991                   cmp_const = abs_inc - 1;
 992                   cmp_code = LE;
 993                 }
 994               else
 995                 {
 996                   cmp_const = abs_inc * (unroll_number - 1) + 1;
 997                   cmp_code = GE;
 998                 }
 999
1000               emit_cmp_insn (diff, GEN_INT (cmp_const), cmp_code, NULL_RTX,
1001                              mode, 0, 0);
1002
1003               if (neg_inc)
1004                 emit_jump_insn (gen_ble (labels[0]));
1005               else
1006                 emit_jump_insn (gen_bge (labels[0]));
1007               JUMP_LABEL (get_last_insn ()) = labels[0];
1008               LABEL_NUSES (labels[0])++;
1009             }
1010
1011           sequence = gen_sequence ();
1012           end_sequence ();
1013           emit_insn_before (sequence, loop_start);
1014
1015           /* Only the last copy of the loop body here needs the exit
1016              test, so set copy_end to exclude the compare/branch here,
1017              and then reset it inside the loop when get to the last
1018              copy.  */
1019
1020           if (GET_CODE (last_loop_insn) == BARRIER)
1021             copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1022           else if (GET_CODE (last_loop_insn) == JUMP_INSN)
1023             {
1024 #ifdef HAVE_cc0
1025               /* The immediately preceding insn is a compare which we do not
1026                  want to copy.  */
1027               copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1028 #else
1029               /* The immediately preceding insn may not be a compare, so we
1030                  must copy it.  */
1031               copy_end = PREV_INSN (last_loop_insn);
1032 #endif
1033             }
1034           else
1035             abort ();
1036
1037           for (i = 1; i < unroll_number; i++)
1038             {
1039               emit_label_after (labels[unroll_number - i],
1040                                 PREV_INSN (loop_start));
1041
1042               bzero ((char *) map->insn_map, max_insnno * sizeof (rtx));
1043               bzero ((char *) map->const_equiv_map, maxregnum * sizeof (rtx));
1044               bzero ((char *) map->const_age_map,
1045                      maxregnum * sizeof (unsigned));
1046               map->const_age = 0;
1047
1048               for (j = 0; j < max_labelno; j++)
1049                 if (local_label[j])
1050                   set_label_in_map (map, j, gen_label_rtx ());
1051
1052               for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; j++)
1053                 if (local_regno[j])
1054                   {
1055                     map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
1056                     record_base_value (REGNO (map->reg_map[j]),
1057                                        regno_reg_rtx[j]);
1058                   }
1059               /* The last copy needs the compare/branch insns at the end,
1060                  so reset copy_end here if the loop ends with a conditional
1061                  branch.  */
1062
1063               if (i == unroll_number - 1)
1064                 {
1065                   if (GET_CODE (last_loop_insn) == BARRIER)
1066                     copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1067                   else
1068                     copy_end = last_loop_insn;
1069                 }
1070
1071               /* None of the copies are the `last_iteration', so just
1072                  pass zero for that parameter.  */
1073               copy_loop_body (copy_start, copy_end, map, exit_label, 0,
1074                               unroll_type, start_label, loop_end,
1075                               loop_start, copy_end);
1076             }
1077           emit_label_after (labels[0], PREV_INSN (loop_start));
1078
1079           if (GET_CODE (last_loop_insn) == BARRIER)
1080             {
1081               insert_before = PREV_INSN (last_loop_insn);
1082               copy_end = PREV_INSN (insert_before);
1083             }
1084           else
1085             {
1086 #ifdef HAVE_cc0
1087               /* The immediately preceding insn is a compare which we do not
1088                  want to copy.  */
1089               insert_before = PREV_INSN (last_loop_insn);
1090               copy_end = PREV_INSN (insert_before);
1091 #else
1092               /* The immediately preceding insn may not be a compare, so we
1093                  must copy it.  */
1094               insert_before = last_loop_insn;
1095               copy_end = PREV_INSN (last_loop_insn);
1096 #endif
1097             }
1098
1099           /* Set unroll type to MODULO now.  */
1100           unroll_type = UNROLL_MODULO;
1101           loop_preconditioned = 1;
1102
1103 #ifdef HAIFA
1104           /* Fix the initial value for the loop as needed.  */
1105           if (loop_n_iterations <= 0)
1106             loop_start_value [uid_loop_num [INSN_UID (loop_start)]]
1107               = initial_value;
1108 #endif
1109         }
1110     }
1111
1112   /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
1113      the loop unless all loops are being unrolled.  */
1114   if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
1115     {
1116       if (loop_dump_stream)
1117         fprintf (loop_dump_stream, "Unrolling failure: Naive unrolling not being done.\n");
1118       return;
1119     }
1120
1121   /* At this point, we are guaranteed to unroll the loop.  */
1122
1123   /* Keep track of the unroll factor for each loop.  */
1124   if (unroll_type == UNROLL_COMPLETELY)
1125     loop_unroll_factor [uid_loop_num [INSN_UID (loop_start)]] = -1;
1126   else
1127     loop_unroll_factor [uid_loop_num [INSN_UID (loop_start)]] = unroll_number;
1128
1129
1130   /* For each biv and giv, determine whether it can be safely split into
1131      a different variable for each unrolled copy of the loop body.
1132      We precalculate and save this info here, since computing it is
1133      expensive.
1134
1135      Do this before deleting any instructions from the loop, so that
1136      back_branch_in_range_p will work correctly.  */
1137
1138   if (splitting_not_safe)
1139     temp = 0;
1140   else
1141     temp = find_splittable_regs (unroll_type, loop_start, loop_end,
1142                                 end_insert_before, unroll_number);
1143
1144   /* find_splittable_regs may have created some new registers, so must
1145      reallocate the reg_map with the new larger size, and must realloc
1146      the constant maps also.  */
1147
1148   maxregnum = max_reg_num ();
1149   map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx));
1150
1151   init_reg_map (map, maxregnum);
1152
1153   /* Space is needed in some of the map for new registers, so new_maxregnum
1154      is an (over)estimate of how many registers will exist at the end.  */
1155   new_maxregnum = maxregnum + (temp * unroll_number * 2);
1156
1157   /* Must realloc space for the constant maps, because the number of registers
1158      may have changed.  */
1159
1160   map->const_equiv_map = (rtx *) alloca (new_maxregnum * sizeof (rtx));
1161   map->const_age_map = (unsigned *) alloca (new_maxregnum * sizeof (unsigned));
1162
1163   map->const_equiv_map_size = new_maxregnum;
1164   global_const_equiv_map = map->const_equiv_map;
1165   global_const_equiv_map_size = new_maxregnum;
1166
1167   /* Search the list of bivs and givs to find ones which need to be remapped
1168      when split, and set their reg_map entry appropriately.  */
1169
1170   for (bl = loop_iv_list; bl; bl = bl->next)
1171     {
1172       if (REGNO (bl->biv->src_reg) != bl->regno)
1173         map->reg_map[bl->regno] = bl->biv->src_reg;
1174 #if 0
1175       /* Currently, non-reduced/final-value givs are never split.  */
1176       for (v = bl->giv; v; v = v->next_iv)
1177         if (REGNO (v->src_reg) != bl->regno)
1178           map->reg_map[REGNO (v->dest_reg)] = v->src_reg;
1179 #endif
1180     }
1181
1182   /* Use our current register alignment and pointer flags.  */
1183   map->regno_pointer_flag = regno_pointer_flag;
1184   map->regno_pointer_align = regno_pointer_align;
1185
1186   /* If the loop is being partially unrolled, and the iteration variables
1187      are being split, and are being renamed for the split, then must fix up
1188      the compare/jump instruction at the end of the loop to refer to the new
1189      registers.  This compare isn't copied, so the registers used in it
1190      will never be replaced if it isn't done here.  */
1191
1192   if (unroll_type == UNROLL_MODULO)
1193     {
1194       insn = NEXT_INSN (copy_end);
1195       if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
1196         PATTERN (insn) = remap_split_bivs (PATTERN (insn));
1197     }
1198
1199   /* For unroll_number - 1 times, make a copy of each instruction
1200      between copy_start and copy_end, and insert these new instructions
1201      before the end of the loop.  */
1202
1203   for (i = 0; i < unroll_number; i++)
1204     {
1205       bzero ((char *) map->insn_map, max_insnno * sizeof (rtx));
1206       bzero ((char *) map->const_equiv_map, new_maxregnum * sizeof (rtx));
1207       bzero ((char *) map->const_age_map, new_maxregnum * sizeof (unsigned));
1208       map->const_age = 0;
1209
1210       for (j = 0; j < max_labelno; j++)
1211         if (local_label[j])
1212           set_label_in_map (map, j, gen_label_rtx ());
1213
1214       for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; j++)
1215         if (local_regno[j])
1216           {
1217             map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
1218             record_base_value (REGNO (map->reg_map[j]),
1219                                regno_reg_rtx[j]);
1220           }
1221
1222       /* If loop starts with a branch to the test, then fix it so that
1223          it points to the test of the first unrolled copy of the loop.  */
1224       if (i == 0 && loop_start != copy_start)
1225         {
1226           insn = PREV_INSN (copy_start);
1227           pattern = PATTERN (insn);
1228
1229           tem = get_label_from_map (map,
1230                                     CODE_LABEL_NUMBER
1231                                     (XEXP (SET_SRC (pattern), 0)));
1232           SET_SRC (pattern) = gen_rtx_LABEL_REF (VOIDmode, tem);
1233
1234           /* Set the jump label so that it can be used by later loop unrolling
1235              passes.  */
1236           JUMP_LABEL (insn) = tem;
1237           LABEL_NUSES (tem)++;
1238         }
1239
1240       copy_loop_body (copy_start, copy_end, map, exit_label,
1241                       i == unroll_number - 1, unroll_type, start_label,
1242                       loop_end, insert_before, insert_before);
1243     }
1244
1245   /* Before deleting any insns, emit a CODE_LABEL immediately after the last
1246      insn to be deleted.  This prevents any runaway delete_insn call from
1247      more insns that it should, as it always stops at a CODE_LABEL.  */
1248
1249   /* Delete the compare and branch at the end of the loop if completely
1250      unrolling the loop.  Deleting the backward branch at the end also
1251      deletes the code label at the start of the loop.  This is done at
1252      the very end to avoid problems with back_branch_in_range_p.  */
1253
1254   if (unroll_type == UNROLL_COMPLETELY)
1255     safety_label = emit_label_after (gen_label_rtx (), last_loop_insn);
1256   else
1257     safety_label = emit_label_after (gen_label_rtx (), copy_end);
1258
1259   /* Delete all of the original loop instructions.  Don't delete the
1260      LOOP_BEG note, or the first code label in the loop.  */
1261
1262   insn = NEXT_INSN (copy_start);
1263   while (insn != safety_label)
1264     {
1265       if (insn != start_label)
1266         insn = delete_insn (insn);
1267       else
1268         insn = NEXT_INSN (insn);
1269     }
1270
1271   /* Can now delete the 'safety' label emitted to protect us from runaway
1272      delete_insn calls.  */
1273   if (INSN_DELETED_P (safety_label))
1274     abort ();
1275   delete_insn (safety_label);
1276
1277   /* If exit_label exists, emit it after the loop.  Doing the emit here
1278      forces it to have a higher INSN_UID than any insn in the unrolled loop.
1279      This is needed so that mostly_true_jump in reorg.c will treat jumps
1280      to this loop end label correctly, i.e. predict that they are usually
1281      not taken.  */
1282   if (exit_label)
1283     emit_label_after (exit_label, loop_end);
1284 }
1285 \f
1286 /* Return true if the loop can be safely, and profitably, preconditioned
1287    so that the unrolled copies of the loop body don't need exit tests.
1288
1289    This only works if final_value, initial_value and increment can be
1290    determined, and if increment is a constant power of 2.
1291    If increment is not a power of 2, then the preconditioning modulo
1292    operation would require a real modulo instead of a boolean AND, and this
1293    is not considered `profitable'.  */
1294
1295 /* ??? If the loop is known to be executed very many times, or the machine
1296    has a very cheap divide instruction, then preconditioning is a win even
1297    when the increment is not a power of 2.  Use RTX_COST to compute
1298    whether divide is cheap.  */
1299
1300 static int
1301 precondition_loop_p (initial_value, final_value, increment, loop_start,
1302                      loop_end)
1303      rtx *initial_value, *final_value, *increment;
1304      rtx loop_start, loop_end;
1305 {
1306
1307   if (loop_n_iterations > 0)
1308     {
1309       *initial_value = const0_rtx;
1310       *increment = const1_rtx;
1311       *final_value = GEN_INT (loop_n_iterations);
1312
1313       if (loop_dump_stream)
1314         fprintf (loop_dump_stream,
1315                  "Preconditioning: Success, number of iterations known, %d.\n",
1316                  loop_n_iterations);
1317       return 1;
1318     }
1319
1320   if (loop_initial_value == 0)
1321     {
1322       if (loop_dump_stream)
1323         fprintf (loop_dump_stream,
1324                  "Preconditioning: Could not find initial value.\n");
1325       return 0;
1326     }
1327   else if (loop_increment == 0)
1328     {
1329       if (loop_dump_stream)
1330         fprintf (loop_dump_stream,
1331                  "Preconditioning: Could not find increment value.\n");
1332       return 0;
1333     }
1334   else if (GET_CODE (loop_increment) != CONST_INT)
1335     {
1336       if (loop_dump_stream)
1337         fprintf (loop_dump_stream,
1338                  "Preconditioning: Increment not a constant.\n");
1339       return 0;
1340     }
1341   else if ((exact_log2 (INTVAL (loop_increment)) < 0)
1342            && (exact_log2 (- INTVAL (loop_increment)) < 0))
1343     {
1344       if (loop_dump_stream)
1345         fprintf (loop_dump_stream,
1346                  "Preconditioning: Increment not a constant power of 2.\n");
1347       return 0;
1348     }
1349
1350   /* Unsigned_compare and compare_dir can be ignored here, since they do
1351      not matter for preconditioning.  */
1352
1353   if (loop_final_value == 0)
1354     {
1355       if (loop_dump_stream)
1356         fprintf (loop_dump_stream,
1357                  "Preconditioning: EQ comparison loop.\n");
1358       return 0;
1359     }
1360
1361   /* Must ensure that final_value is invariant, so call invariant_p to
1362      check.  Before doing so, must check regno against max_reg_before_loop
1363      to make sure that the register is in the range covered by invariant_p.
1364      If it isn't, then it is most likely a biv/giv which by definition are
1365      not invariant.  */
1366   if ((GET_CODE (loop_final_value) == REG
1367        && REGNO (loop_final_value) >= max_reg_before_loop)
1368       || (GET_CODE (loop_final_value) == PLUS
1369           && REGNO (XEXP (loop_final_value, 0)) >= max_reg_before_loop)
1370       || ! invariant_p (loop_final_value))
1371     {
1372       if (loop_dump_stream)
1373         fprintf (loop_dump_stream,
1374                  "Preconditioning: Final value not invariant.\n");
1375       return 0;
1376     }
1377
1378   /* Fail for floating point values, since the caller of this function
1379      does not have code to deal with them.  */
1380   if (GET_MODE_CLASS (GET_MODE (loop_final_value)) == MODE_FLOAT
1381       || GET_MODE_CLASS (GET_MODE (loop_initial_value)) == MODE_FLOAT)
1382     {
1383       if (loop_dump_stream)
1384         fprintf (loop_dump_stream,
1385                  "Preconditioning: Floating point final or initial value.\n");
1386       return 0;
1387     }
1388
1389   /* Now set initial_value to be the iteration_var, since that may be a
1390      simpler expression, and is guaranteed to be correct if all of the
1391      above tests succeed.
1392
1393      We can not use the initial_value as calculated, because it will be
1394      one too small for loops of the form "while (i-- > 0)".  We can not
1395      emit code before the loop_skip_over insns to fix this problem as this
1396      will then give a number one too large for loops of the form
1397      "while (--i > 0)".
1398
1399      Note that all loops that reach here are entered at the top, because
1400      this function is not called if the loop starts with a jump.  */
1401
1402   /* Fail if loop_iteration_var is not live before loop_start, since we need
1403      to test its value in the preconditioning code.  */
1404
1405   if (uid_luid[REGNO_FIRST_UID (REGNO (loop_iteration_var))]
1406       > INSN_LUID (loop_start))
1407     {
1408       if (loop_dump_stream)
1409         fprintf (loop_dump_stream,
1410                  "Preconditioning: Iteration var not live before loop start.\n");
1411       return 0;
1412     }
1413
1414   *initial_value = loop_iteration_var;
1415   *increment = loop_increment;
1416   *final_value = loop_final_value;
1417
1418   /* Success! */
1419   if (loop_dump_stream)
1420     fprintf (loop_dump_stream, "Preconditioning: Successful.\n");
1421   return 1;
1422 }
1423
1424
1425 /* All pseudo-registers must be mapped to themselves.  Two hard registers
1426    must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_
1427    REGNUM, to avoid function-inlining specific conversions of these
1428    registers.  All other hard regs can not be mapped because they may be
1429    used with different
1430    modes.  */
1431
1432 static void
1433 init_reg_map (map, maxregnum)
1434      struct inline_remap *map;
1435      int maxregnum;
1436 {
1437   int i;
1438
1439   for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--)
1440     map->reg_map[i] = regno_reg_rtx[i];
1441   /* Just clear the rest of the entries.  */
1442   for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--)
1443     map->reg_map[i] = 0;
1444
1445   map->reg_map[VIRTUAL_STACK_VARS_REGNUM]
1446     = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM];
1447   map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM]
1448     = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM];
1449 }
1450 \f
1451 /* Strength-reduction will often emit code for optimized biv/givs which
1452    calculates their value in a temporary register, and then copies the result
1453    to the iv.  This procedure reconstructs the pattern computing the iv;
1454    verifying that all operands are of the proper form.
1455
1456    PATTERN must be the result of single_set.
1457    The return value is the amount that the giv is incremented by.  */
1458
1459 static rtx
1460 calculate_giv_inc (pattern, src_insn, regno)
1461      rtx pattern, src_insn;
1462      int regno;
1463 {
1464   rtx increment;
1465   rtx increment_total = 0;
1466   int tries = 0;
1467
1468  retry:
1469   /* Verify that we have an increment insn here.  First check for a plus
1470      as the set source.  */
1471   if (GET_CODE (SET_SRC (pattern)) != PLUS)
1472     {
1473       /* SR sometimes computes the new giv value in a temp, then copies it
1474          to the new_reg.  */
1475       src_insn = PREV_INSN (src_insn);
1476       pattern = PATTERN (src_insn);
1477       if (GET_CODE (SET_SRC (pattern)) != PLUS)
1478         abort ();
1479
1480       /* The last insn emitted is not needed, so delete it to avoid confusing
1481          the second cse pass.  This insn sets the giv unnecessarily.  */
1482       delete_insn (get_last_insn ());
1483     }
1484
1485   /* Verify that we have a constant as the second operand of the plus.  */
1486   increment = XEXP (SET_SRC (pattern), 1);
1487   if (GET_CODE (increment) != CONST_INT)
1488     {
1489       /* SR sometimes puts the constant in a register, especially if it is
1490          too big to be an add immed operand.  */
1491       src_insn = PREV_INSN (src_insn);
1492       increment = SET_SRC (PATTERN (src_insn));
1493
1494       /* SR may have used LO_SUM to compute the constant if it is too large
1495          for a load immed operand.  In this case, the constant is in operand
1496          one of the LO_SUM rtx.  */
1497       if (GET_CODE (increment) == LO_SUM)
1498         increment = XEXP (increment, 1);
1499
1500       /* Some ports store large constants in memory and add a REG_EQUAL
1501          note to the store insn.  */
1502       else if (GET_CODE (increment) == MEM)
1503         {
1504           rtx note = find_reg_note (src_insn, REG_EQUAL, 0);
1505           if (note)
1506             increment = XEXP (note, 0);
1507         }
1508
1509       else if (GET_CODE (increment) == IOR
1510                || GET_CODE (increment) == ASHIFT
1511                || GET_CODE (increment) == PLUS)
1512         {
1513           /* The rs6000 port loads some constants with IOR.
1514              The alpha port loads some constants with ASHIFT and PLUS.  */
1515           rtx second_part = XEXP (increment, 1);
1516           enum rtx_code code = GET_CODE (increment);
1517
1518           src_insn = PREV_INSN (src_insn);
1519           increment = SET_SRC (PATTERN (src_insn));
1520           /* Don't need the last insn anymore.  */
1521           delete_insn (get_last_insn ());
1522
1523           if (GET_CODE (second_part) != CONST_INT
1524               || GET_CODE (increment) != CONST_INT)
1525             abort ();
1526
1527           if (code == IOR)
1528             increment = GEN_INT (INTVAL (increment) | INTVAL (second_part));
1529           else if (code == PLUS)
1530             increment = GEN_INT (INTVAL (increment) + INTVAL (second_part));
1531           else
1532             increment = GEN_INT (INTVAL (increment) << INTVAL (second_part));
1533         }
1534
1535       if (GET_CODE (increment) != CONST_INT)
1536         abort ();
1537
1538       /* The insn loading the constant into a register is no longer needed,
1539          so delete it.  */
1540       delete_insn (get_last_insn ());
1541     }
1542
1543   if (increment_total)
1544     increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment));
1545   else
1546     increment_total = increment;
1547
1548   /* Check that the source register is the same as the register we expected
1549      to see as the source.  If not, something is seriously wrong.  */
1550   if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
1551       || REGNO (XEXP (SET_SRC (pattern), 0)) != regno)
1552     {
1553       /* Some machines (e.g. the romp), may emit two add instructions for
1554          certain constants, so lets try looking for another add immediately
1555          before this one if we have only seen one add insn so far.  */
1556
1557       if (tries == 0)
1558         {
1559           tries++;
1560
1561           src_insn = PREV_INSN (src_insn);
1562           pattern = PATTERN (src_insn);
1563
1564           delete_insn (get_last_insn ());
1565
1566           goto retry;
1567         }
1568
1569       abort ();
1570     }
1571
1572   return increment_total;
1573 }
1574
1575 /* Copy REG_NOTES, except for insn references, because not all insn_map
1576    entries are valid yet.  We do need to copy registers now though, because
1577    the reg_map entries can change during copying.  */
1578
1579 static rtx
1580 initial_reg_note_copy (notes, map)
1581      rtx notes;
1582      struct inline_remap *map;
1583 {
1584   rtx copy;
1585
1586   if (notes == 0)
1587     return 0;
1588
1589   copy = rtx_alloc (GET_CODE (notes));
1590   PUT_MODE (copy, GET_MODE (notes));
1591
1592   if (GET_CODE (notes) == EXPR_LIST)
1593     XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map);
1594   else if (GET_CODE (notes) == INSN_LIST)
1595     /* Don't substitute for these yet.  */
1596     XEXP (copy, 0) = XEXP (notes, 0);
1597   else
1598     abort ();
1599
1600   XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map);
1601
1602   return copy;
1603 }
1604
1605 /* Fixup insn references in copied REG_NOTES.  */
1606
1607 static void
1608 final_reg_note_copy (notes, map)
1609      rtx notes;
1610      struct inline_remap *map;
1611 {
1612   rtx note;
1613
1614   for (note = notes; note; note = XEXP (note, 1))
1615     if (GET_CODE (note) == INSN_LIST)
1616       XEXP (note, 0) = map->insn_map[INSN_UID (XEXP (note, 0))];
1617 }
1618
1619 /* Copy each instruction in the loop, substituting from map as appropriate.
1620    This is very similar to a loop in expand_inline_function.  */
1621
1622 static void
1623 copy_loop_body (copy_start, copy_end, map, exit_label, last_iteration,
1624                 unroll_type, start_label, loop_end, insert_before,
1625                 copy_notes_from)
1626      rtx copy_start, copy_end;
1627      struct inline_remap *map;
1628      rtx exit_label;
1629      int last_iteration;
1630      enum unroll_types unroll_type;
1631      rtx start_label, loop_end, insert_before, copy_notes_from;
1632 {
1633   rtx insn, pattern;
1634   rtx set, tem, copy;
1635   int dest_reg_was_split, i;
1636 #ifdef HAVE_cc0
1637   rtx cc0_insn = 0;
1638 #endif
1639   rtx final_label = 0;
1640   rtx giv_inc, giv_dest_reg, giv_src_reg;
1641
1642   /* If this isn't the last iteration, then map any references to the
1643      start_label to final_label.  Final label will then be emitted immediately
1644      after the end of this loop body if it was ever used.
1645
1646      If this is the last iteration, then map references to the start_label
1647      to itself.  */
1648   if (! last_iteration)
1649     {
1650       final_label = gen_label_rtx ();
1651       set_label_in_map (map, CODE_LABEL_NUMBER (start_label),
1652                         final_label);
1653     }
1654   else
1655     set_label_in_map (map, CODE_LABEL_NUMBER (start_label), start_label);
1656
1657   start_sequence ();
1658
1659   insn = copy_start;
1660   do
1661     {
1662       insn = NEXT_INSN (insn);
1663
1664       map->orig_asm_operands_vector = 0;
1665
1666       switch (GET_CODE (insn))
1667         {
1668         case INSN:
1669           pattern = PATTERN (insn);
1670           copy = 0;
1671           giv_inc = 0;
1672
1673           /* Check to see if this is a giv that has been combined with
1674              some split address givs.  (Combined in the sense that
1675              `combine_givs' in loop.c has put two givs in the same register.)
1676              In this case, we must search all givs based on the same biv to
1677              find the address givs.  Then split the address givs.
1678              Do this before splitting the giv, since that may map the
1679              SET_DEST to a new register.  */
1680
1681           if ((set = single_set (insn))
1682               && GET_CODE (SET_DEST (set)) == REG
1683               && addr_combined_regs[REGNO (SET_DEST (set))])
1684             {
1685               struct iv_class *bl;
1686               struct induction *v, *tv;
1687               int regno = REGNO (SET_DEST (set));
1688
1689               v = addr_combined_regs[REGNO (SET_DEST (set))];
1690               bl = reg_biv_class[REGNO (v->src_reg)];
1691
1692               /* Although the giv_inc amount is not needed here, we must call
1693                  calculate_giv_inc here since it might try to delete the
1694                  last insn emitted.  If we wait until later to call it,
1695                  we might accidentally delete insns generated immediately
1696                  below by emit_unrolled_add.  */
1697
1698               giv_inc = calculate_giv_inc (set, insn, regno);
1699
1700               /* Now find all address giv's that were combined with this
1701                  giv 'v'.  */
1702               for (tv = bl->giv; tv; tv = tv->next_iv)
1703                 if (tv->giv_type == DEST_ADDR && tv->same == v)
1704                   {
1705                     int this_giv_inc;
1706
1707                     /* If this DEST_ADDR giv was not split, then ignore it.  */
1708                     if (*tv->location != tv->dest_reg)
1709                       continue;
1710
1711                     /* Scale this_giv_inc if the multiplicative factors of
1712                        the two givs are different.  */
1713                     this_giv_inc = INTVAL (giv_inc);
1714                     if (tv->mult_val != v->mult_val)
1715                       this_giv_inc = (this_giv_inc / INTVAL (v->mult_val)
1716                                       * INTVAL (tv->mult_val));
1717
1718                     tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc);
1719                     *tv->location = tv->dest_reg;
1720
1721                     if (last_iteration && unroll_type != UNROLL_COMPLETELY)
1722                       {
1723                         /* Must emit an insn to increment the split address
1724                            giv.  Add in the const_adjust field in case there
1725                            was a constant eliminated from the address.  */
1726                         rtx value, dest_reg;
1727
1728                         /* tv->dest_reg will be either a bare register,
1729                            or else a register plus a constant.  */
1730                         if (GET_CODE (tv->dest_reg) == REG)
1731                           dest_reg = tv->dest_reg;
1732                         else
1733                           dest_reg = XEXP (tv->dest_reg, 0);
1734
1735                         /* Check for shared address givs, and avoid
1736                            incrementing the shared pseudo reg more than
1737                            once.  */
1738                         if (! tv->same_insn && ! tv->shared)
1739                           {
1740                             /* tv->dest_reg may actually be a (PLUS (REG)
1741                                (CONST)) here, so we must call plus_constant
1742                                to add the const_adjust amount before calling
1743                                emit_unrolled_add below.  */
1744                             value = plus_constant (tv->dest_reg,
1745                                                    tv->const_adjust);
1746
1747                             /* The constant could be too large for an add
1748                                immediate, so can't directly emit an insn
1749                                here.  */
1750                             emit_unrolled_add (dest_reg, XEXP (value, 0),
1751                                                XEXP (value, 1));
1752                           }
1753
1754                         /* Reset the giv to be just the register again, in case
1755                            it is used after the set we have just emitted.
1756                            We must subtract the const_adjust factor added in
1757                            above.  */
1758                         tv->dest_reg = plus_constant (dest_reg,
1759                                                       - tv->const_adjust);
1760                         *tv->location = tv->dest_reg;
1761                       }
1762                   }
1763             }
1764
1765           /* If this is a setting of a splittable variable, then determine
1766              how to split the variable, create a new set based on this split,
1767              and set up the reg_map so that later uses of the variable will
1768              use the new split variable.  */
1769
1770           dest_reg_was_split = 0;
1771
1772           if ((set = single_set (insn))
1773               && GET_CODE (SET_DEST (set)) == REG
1774               && splittable_regs[REGNO (SET_DEST (set))])
1775             {
1776               int regno = REGNO (SET_DEST (set));
1777
1778               dest_reg_was_split = 1;
1779
1780               /* Compute the increment value for the giv, if it wasn't
1781                  already computed above.  */
1782
1783               if (giv_inc == 0)
1784                 giv_inc = calculate_giv_inc (set, insn, regno);
1785               giv_dest_reg = SET_DEST (set);
1786               giv_src_reg = SET_DEST (set);
1787
1788               if (unroll_type == UNROLL_COMPLETELY)
1789                 {
1790                   /* Completely unrolling the loop.  Set the induction
1791                      variable to a known constant value.  */
1792
1793                   /* The value in splittable_regs may be an invariant
1794                      value, so we must use plus_constant here.  */
1795                   splittable_regs[regno]
1796                     = plus_constant (splittable_regs[regno], INTVAL (giv_inc));
1797
1798                   if (GET_CODE (splittable_regs[regno]) == PLUS)
1799                     {
1800                       giv_src_reg = XEXP (splittable_regs[regno], 0);
1801                       giv_inc = XEXP (splittable_regs[regno], 1);
1802                     }
1803                   else
1804                     {
1805                       /* The splittable_regs value must be a REG or a
1806                          CONST_INT, so put the entire value in the giv_src_reg
1807                          variable.  */
1808                       giv_src_reg = splittable_regs[regno];
1809                       giv_inc = const0_rtx;
1810                     }
1811                 }
1812               else
1813                 {
1814                   /* Partially unrolling loop.  Create a new pseudo
1815                      register for the iteration variable, and set it to
1816                      be a constant plus the original register.  Except
1817                      on the last iteration, when the result has to
1818                      go back into the original iteration var register.  */
1819
1820                   /* Handle bivs which must be mapped to a new register
1821                      when split.  This happens for bivs which need their
1822                      final value set before loop entry.  The new register
1823                      for the biv was stored in the biv's first struct
1824                      induction entry by find_splittable_regs.  */
1825
1826                   if (regno < max_reg_before_loop
1827                       && reg_iv_type[regno] == BASIC_INDUCT)
1828                     {
1829                       giv_src_reg = reg_biv_class[regno]->biv->src_reg;
1830                       giv_dest_reg = giv_src_reg;
1831                     }
1832
1833 #if 0
1834                   /* If non-reduced/final-value givs were split, then
1835                      this would have to remap those givs also.  See
1836                      find_splittable_regs.  */
1837 #endif
1838
1839                   splittable_regs[regno]
1840                     = GEN_INT (INTVAL (giv_inc)
1841                                + INTVAL (splittable_regs[regno]));
1842                   giv_inc = splittable_regs[regno];
1843
1844                   /* Now split the induction variable by changing the dest
1845                      of this insn to a new register, and setting its
1846                      reg_map entry to point to this new register.
1847
1848                      If this is the last iteration, and this is the last insn
1849                      that will update the iv, then reuse the original dest,
1850                      to ensure that the iv will have the proper value when
1851                      the loop exits or repeats.
1852
1853                      Using splittable_regs_updates here like this is safe,
1854                      because it can only be greater than one if all
1855                      instructions modifying the iv are always executed in
1856                      order.  */
1857
1858                   if (! last_iteration
1859                       || (splittable_regs_updates[regno]-- != 1))
1860                     {
1861                       tem = gen_reg_rtx (GET_MODE (giv_src_reg));
1862                       giv_dest_reg = tem;
1863                       map->reg_map[regno] = tem;
1864                       record_base_value (REGNO (tem), giv_src_reg);
1865                     }
1866                   else
1867                     map->reg_map[regno] = giv_src_reg;
1868                 }
1869
1870               /* The constant being added could be too large for an add
1871                  immediate, so can't directly emit an insn here.  */
1872               emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc);
1873               copy = get_last_insn ();
1874               pattern = PATTERN (copy);
1875             }
1876           else
1877             {
1878               pattern = copy_rtx_and_substitute (pattern, map);
1879               copy = emit_insn (pattern);
1880             }
1881           REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1882
1883 #ifdef HAVE_cc0
1884           /* If this insn is setting CC0, it may need to look at
1885              the insn that uses CC0 to see what type of insn it is.
1886              In that case, the call to recog via validate_change will
1887              fail.  So don't substitute constants here.  Instead,
1888              do it when we emit the following insn.
1889
1890              For example, see the pyr.md file.  That machine has signed and
1891              unsigned compares.  The compare patterns must check the
1892              following branch insn to see which what kind of compare to
1893              emit.
1894
1895              If the previous insn set CC0, substitute constants on it as
1896              well.  */
1897           if (sets_cc0_p (PATTERN (copy)) != 0)
1898             cc0_insn = copy;
1899           else
1900             {
1901               if (cc0_insn)
1902                 try_constants (cc0_insn, map);
1903               cc0_insn = 0;
1904               try_constants (copy, map);
1905             }
1906 #else
1907           try_constants (copy, map);
1908 #endif
1909
1910           /* Make split induction variable constants `permanent' since we
1911              know there are no backward branches across iteration variable
1912              settings which would invalidate this.  */
1913           if (dest_reg_was_split)
1914             {
1915               int regno = REGNO (SET_DEST (pattern));
1916
1917               if (regno < map->const_equiv_map_size
1918                   && map->const_age_map[regno] == map->const_age)
1919                 map->const_age_map[regno] = -1;
1920             }
1921           break;
1922
1923         case JUMP_INSN:
1924           pattern = copy_rtx_and_substitute (PATTERN (insn), map);
1925           copy = emit_jump_insn (pattern);
1926           REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1927
1928           if (JUMP_LABEL (insn) == start_label && insn == copy_end
1929               && ! last_iteration)
1930             {
1931               /* This is a branch to the beginning of the loop; this is the
1932                  last insn being copied; and this is not the last iteration.
1933                  In this case, we want to change the original fall through
1934                  case to be a branch past the end of the loop, and the
1935                  original jump label case to fall_through.  */
1936
1937               if (invert_exp (pattern, copy))
1938                 {
1939                   if (! redirect_exp (&pattern,
1940                                       get_label_from_map (map,
1941                                                           CODE_LABEL_NUMBER
1942                                                           (JUMP_LABEL (insn))),
1943                                       exit_label, copy))
1944                     abort ();
1945                 }
1946               else
1947                 {
1948                   rtx jmp;
1949                   rtx lab = gen_label_rtx ();
1950                   /* Can't do it by reversing the jump (probably because we
1951                      couldn't reverse the conditions), so emit a new
1952                      jump_insn after COPY, and redirect the jump around
1953                      that.  */
1954                   jmp = emit_jump_insn_after (gen_jump (exit_label), copy);
1955                   jmp = emit_barrier_after (jmp);
1956                   emit_label_after (lab, jmp);
1957                   LABEL_NUSES (lab) = 0;
1958                   if (! redirect_exp (&pattern,
1959                                       get_label_from_map (map,
1960                                                           CODE_LABEL_NUMBER
1961                                                           (JUMP_LABEL (insn))),
1962                                       lab, copy))
1963                     abort ();
1964                 }
1965             }
1966
1967 #ifdef HAVE_cc0
1968           if (cc0_insn)
1969             try_constants (cc0_insn, map);
1970           cc0_insn = 0;
1971 #endif
1972           try_constants (copy, map);
1973
1974           /* Set the jump label of COPY correctly to avoid problems with
1975              later passes of unroll_loop, if INSN had jump label set.  */
1976           if (JUMP_LABEL (insn))
1977             {
1978               rtx label = 0;
1979
1980               /* Can't use the label_map for every insn, since this may be
1981                  the backward branch, and hence the label was not mapped.  */
1982               if ((set = single_set (copy)))
1983                 {
1984                   tem = SET_SRC (set);
1985                   if (GET_CODE (tem) == LABEL_REF)
1986                     label = XEXP (tem, 0);
1987                   else if (GET_CODE (tem) == IF_THEN_ELSE)
1988                     {
1989                       if (XEXP (tem, 1) != pc_rtx)
1990                         label = XEXP (XEXP (tem, 1), 0);
1991                       else
1992                         label = XEXP (XEXP (tem, 2), 0);
1993                     }
1994                 }
1995
1996               if (label && GET_CODE (label) == CODE_LABEL)
1997                 JUMP_LABEL (copy) = label;
1998               else
1999                 {
2000                   /* An unrecognizable jump insn, probably the entry jump
2001                      for a switch statement.  This label must have been mapped,
2002                      so just use the label_map to get the new jump label.  */
2003                   JUMP_LABEL (copy)
2004                     = get_label_from_map (map,
2005                                           CODE_LABEL_NUMBER (JUMP_LABEL (insn)));
2006                 }
2007
2008               /* If this is a non-local jump, then must increase the label
2009                  use count so that the label will not be deleted when the
2010                  original jump is deleted.  */
2011               LABEL_NUSES (JUMP_LABEL (copy))++;
2012             }
2013           else if (GET_CODE (PATTERN (copy)) == ADDR_VEC
2014                    || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC)
2015             {
2016               rtx pat = PATTERN (copy);
2017               int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
2018               int len = XVECLEN (pat, diff_vec_p);
2019               int i;
2020
2021               for (i = 0; i < len; i++)
2022                 LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++;
2023             }
2024
2025           /* If this used to be a conditional jump insn but whose branch
2026              direction is now known, we must do something special.  */
2027           if (condjump_p (insn) && !simplejump_p (insn) && map->last_pc_value)
2028             {
2029 #ifdef HAVE_cc0
2030               /* The previous insn set cc0 for us.  So delete it.  */
2031               delete_insn (PREV_INSN (copy));
2032 #endif
2033
2034               /* If this is now a no-op, delete it.  */
2035               if (map->last_pc_value == pc_rtx)
2036                 {
2037                   /* Don't let delete_insn delete the label referenced here,
2038                      because we might possibly need it later for some other
2039                      instruction in the loop.  */
2040                   if (JUMP_LABEL (copy))
2041                     LABEL_NUSES (JUMP_LABEL (copy))++;
2042                   delete_insn (copy);
2043                   if (JUMP_LABEL (copy))
2044                     LABEL_NUSES (JUMP_LABEL (copy))--;
2045                   copy = 0;
2046                 }
2047               else
2048                 /* Otherwise, this is unconditional jump so we must put a
2049                    BARRIER after it.  We could do some dead code elimination
2050                    here, but jump.c will do it just as well.  */
2051                 emit_barrier ();
2052             }
2053           break;
2054
2055         case CALL_INSN:
2056           pattern = copy_rtx_and_substitute (PATTERN (insn), map);
2057           copy = emit_call_insn (pattern);
2058           REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2059
2060           /* Because the USAGE information potentially contains objects other
2061              than hard registers, we need to copy it.  */
2062           CALL_INSN_FUNCTION_USAGE (copy)
2063             = copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn), map);
2064
2065 #ifdef HAVE_cc0
2066           if (cc0_insn)
2067             try_constants (cc0_insn, map);
2068           cc0_insn = 0;
2069 #endif
2070           try_constants (copy, map);
2071
2072           /* Be lazy and assume CALL_INSNs clobber all hard registers.  */
2073           for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2074             map->const_equiv_map[i] = 0;
2075           break;
2076
2077         case CODE_LABEL:
2078           /* If this is the loop start label, then we don't need to emit a
2079              copy of this label since no one will use it.  */
2080
2081           if (insn != start_label)
2082             {
2083               copy = emit_label (get_label_from_map (map,
2084                                                      CODE_LABEL_NUMBER (insn)));
2085               map->const_age++;
2086             }
2087           break;
2088
2089         case BARRIER:
2090           copy = emit_barrier ();
2091           break;
2092
2093         case NOTE:
2094           /* VTOP notes are valid only before the loop exit test.  If placed
2095              anywhere else, loop may generate bad code.  */
2096
2097           if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2098               && (NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP
2099                   || (last_iteration && unroll_type != UNROLL_COMPLETELY)))
2100             copy = emit_note (NOTE_SOURCE_FILE (insn),
2101                               NOTE_LINE_NUMBER (insn));
2102           else
2103             copy = 0;
2104           break;
2105
2106         default:
2107           abort ();
2108           break;
2109         }
2110
2111       map->insn_map[INSN_UID (insn)] = copy;
2112     }
2113   while (insn != copy_end);
2114
2115   /* Now finish coping the REG_NOTES.  */
2116   insn = copy_start;
2117   do
2118     {
2119       insn = NEXT_INSN (insn);
2120       if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN
2121            || GET_CODE (insn) == CALL_INSN)
2122           && map->insn_map[INSN_UID (insn)])
2123         final_reg_note_copy (REG_NOTES (map->insn_map[INSN_UID (insn)]), map);
2124     }
2125   while (insn != copy_end);
2126
2127   /* There may be notes between copy_notes_from and loop_end.  Emit a copy of
2128      each of these notes here, since there may be some important ones, such as
2129      NOTE_INSN_BLOCK_END notes, in this group.  We don't do this on the last
2130      iteration, because the original notes won't be deleted.
2131
2132      We can't use insert_before here, because when from preconditioning,
2133      insert_before points before the loop.  We can't use copy_end, because
2134      there may be insns already inserted after it (which we don't want to
2135      copy) when not from preconditioning code.  */
2136
2137   if (! last_iteration)
2138     {
2139       for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn))
2140         {
2141           if (GET_CODE (insn) == NOTE
2142               && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED)
2143             emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn));
2144         }
2145     }
2146
2147   if (final_label && LABEL_NUSES (final_label) > 0)
2148     emit_label (final_label);
2149
2150   tem = gen_sequence ();
2151   end_sequence ();
2152   emit_insn_before (tem, insert_before);
2153 }
2154 \f
2155 /* Emit an insn, using the expand_binop to ensure that a valid insn is
2156    emitted.  This will correctly handle the case where the increment value
2157    won't fit in the immediate field of a PLUS insns.  */
2158
2159 void
2160 emit_unrolled_add (dest_reg, src_reg, increment)
2161      rtx dest_reg, src_reg, increment;
2162 {
2163   rtx result;
2164
2165   result = expand_binop (GET_MODE (dest_reg), add_optab, src_reg, increment,
2166                          dest_reg, 0, OPTAB_LIB_WIDEN);
2167
2168   if (dest_reg != result)
2169     emit_move_insn (dest_reg, result);
2170 }
2171 \f
2172 /* Searches the insns between INSN and LOOP_END.  Returns 1 if there
2173    is a backward branch in that range that branches to somewhere between
2174    LOOP_START and INSN.  Returns 0 otherwise.  */
2175
2176 /* ??? This is quadratic algorithm.  Could be rewritten to be linear.
2177    In practice, this is not a problem, because this function is seldom called,
2178    and uses a negligible amount of CPU time on average.  */
2179
2180 int
2181 back_branch_in_range_p (insn, loop_start, loop_end)
2182      rtx insn;
2183      rtx loop_start, loop_end;
2184 {
2185   rtx p, q, target_insn;
2186   rtx orig_loop_end = loop_end;
2187
2188   /* Stop before we get to the backward branch at the end of the loop.  */
2189   loop_end = prev_nonnote_insn (loop_end);
2190   if (GET_CODE (loop_end) == BARRIER)
2191     loop_end = PREV_INSN (loop_end);
2192
2193   /* Check in case insn has been deleted, search forward for first non
2194      deleted insn following it.  */
2195   while (INSN_DELETED_P (insn))
2196     insn = NEXT_INSN (insn);
2197
2198   /* Check for the case where insn is the last insn in the loop.  Deal
2199      with the case where INSN was a deleted loop test insn, in which case
2200      it will now be the NOTE_LOOP_END.  */
2201   if (insn == loop_end || insn == orig_loop_end)
2202     return 0;
2203
2204   for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
2205     {
2206       if (GET_CODE (p) == JUMP_INSN)
2207         {
2208           target_insn = JUMP_LABEL (p);
2209
2210           /* Search from loop_start to insn, to see if one of them is
2211              the target_insn.  We can't use INSN_LUID comparisons here,
2212              since insn may not have an LUID entry.  */
2213           for (q = loop_start; q != insn; q = NEXT_INSN (q))
2214             if (q == target_insn)
2215               return 1;
2216         }
2217     }
2218
2219   return 0;
2220 }
2221
2222 /* Try to generate the simplest rtx for the expression
2223    (PLUS (MULT mult1 mult2) add1).  This is used to calculate the initial
2224    value of giv's.  */
2225
2226 static rtx
2227 fold_rtx_mult_add (mult1, mult2, add1, mode)
2228      rtx mult1, mult2, add1;
2229      enum machine_mode mode;
2230 {
2231   rtx temp, mult_res;
2232   rtx result;
2233
2234   /* The modes must all be the same.  This should always be true.  For now,
2235      check to make sure.  */
2236   if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
2237       || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
2238       || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
2239     abort ();
2240
2241   /* Ensure that if at least one of mult1/mult2 are constant, then mult2
2242      will be a constant.  */
2243   if (GET_CODE (mult1) == CONST_INT)
2244     {
2245       temp = mult2;
2246       mult2 = mult1;
2247       mult1 = temp;
2248     }
2249
2250   mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
2251   if (! mult_res)
2252     mult_res = gen_rtx_MULT (mode, mult1, mult2);
2253
2254   /* Again, put the constant second.  */
2255   if (GET_CODE (add1) == CONST_INT)
2256     {
2257       temp = add1;
2258       add1 = mult_res;
2259       mult_res = temp;
2260     }
2261
2262   result = simplify_binary_operation (PLUS, mode, add1, mult_res);
2263   if (! result)
2264     result = gen_rtx_PLUS (mode, add1, mult_res);
2265
2266   return result;
2267 }
2268
2269 /* Searches the list of induction struct's for the biv BL, to try to calculate
2270    the total increment value for one iteration of the loop as a constant.
2271
2272    Returns the increment value as an rtx, simplified as much as possible,
2273    if it can be calculated.  Otherwise, returns 0.  */
2274
2275 rtx
2276 biv_total_increment (bl, loop_start, loop_end)
2277      struct iv_class *bl;
2278      rtx loop_start, loop_end;
2279 {
2280   struct induction *v;
2281   rtx result;
2282
2283   /* For increment, must check every instruction that sets it.  Each
2284      instruction must be executed only once each time through the loop.
2285      To verify this, we check that the the insn is always executed, and that
2286      there are no backward branches after the insn that branch to before it.
2287      Also, the insn must have a mult_val of one (to make sure it really is
2288      an increment).  */
2289
2290   result = const0_rtx;
2291   for (v = bl->biv; v; v = v->next_iv)
2292     {
2293       if (v->always_computable && v->mult_val == const1_rtx
2294           && ! back_branch_in_range_p (v->insn, loop_start, loop_end))
2295         result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
2296       else
2297         return 0;
2298     }
2299
2300   return result;
2301 }
2302
2303 /* Determine the initial value of the iteration variable, and the amount
2304    that it is incremented each loop.  Use the tables constructed by
2305    the strength reduction pass to calculate these values.
2306
2307    Initial_value and/or increment are set to zero if their values could not
2308    be calculated.  */
2309
2310 void
2311 iteration_info (iteration_var, initial_value, increment, loop_start, loop_end)
2312      rtx iteration_var, *initial_value, *increment;
2313      rtx loop_start, loop_end;
2314 {
2315   struct iv_class *bl;
2316 #if 0
2317   struct induction *v;
2318 #endif
2319
2320   /* Clear the result values, in case no answer can be found.  */
2321   *initial_value = 0;
2322   *increment = 0;
2323
2324   /* The iteration variable can be either a giv or a biv.  Check to see
2325      which it is, and compute the variable's initial value, and increment
2326      value if possible.  */
2327
2328   /* If this is a new register, can't handle it since we don't have any
2329      reg_iv_type entry for it.  */
2330   if (REGNO (iteration_var) >= max_reg_before_loop)
2331     {
2332       if (loop_dump_stream)
2333         fprintf (loop_dump_stream,
2334                  "Loop unrolling: No reg_iv_type entry for iteration var.\n");
2335       return;
2336     }
2337
2338   /* Reject iteration variables larger than the host wide int size, since they
2339      could result in a number of iterations greater than the range of our
2340      `unsigned HOST_WIDE_INT' variable loop_n_iterations.  */
2341   else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var))
2342             > HOST_BITS_PER_WIDE_INT))
2343     {
2344       if (loop_dump_stream)
2345         fprintf (loop_dump_stream,
2346                  "Loop unrolling: Iteration var rejected because mode too large.\n");
2347       return;
2348     }
2349   else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
2350     {
2351       if (loop_dump_stream)
2352         fprintf (loop_dump_stream,
2353                  "Loop unrolling: Iteration var not an integer.\n");
2354       return;
2355     }
2356   else if (reg_iv_type[REGNO (iteration_var)] == BASIC_INDUCT)
2357     {
2358       /* Grab initial value, only useful if it is a constant.  */
2359       bl = reg_biv_class[REGNO (iteration_var)];
2360       *initial_value = bl->initial_value;
2361
2362       *increment = biv_total_increment (bl, loop_start, loop_end);
2363     }
2364   else if (reg_iv_type[REGNO (iteration_var)] == GENERAL_INDUCT)
2365     {
2366 #if 1
2367       /* ??? The code below does not work because the incorrect number of
2368          iterations is calculated when the biv is incremented after the giv
2369          is set (which is the usual case).  This can probably be accounted
2370          for by biasing the initial_value by subtracting the amount of the
2371          increment that occurs between the giv set and the giv test.  However,
2372          a giv as an iterator is very rare, so it does not seem worthwhile
2373          to handle this.  */
2374       /* ??? An example failure is: i = 6; do {;} while (i++ < 9).  */
2375       if (loop_dump_stream)
2376         fprintf (loop_dump_stream,
2377                  "Loop unrolling: Giv iterators are not handled.\n");
2378       return;
2379 #else
2380       /* Initial value is mult_val times the biv's initial value plus
2381          add_val.  Only useful if it is a constant.  */
2382       v = reg_iv_info[REGNO (iteration_var)];
2383       bl = reg_biv_class[REGNO (v->src_reg)];
2384       *initial_value = fold_rtx_mult_add (v->mult_val, bl->initial_value,
2385                                           v->add_val, v->mode);
2386
2387       /* Increment value is mult_val times the increment value of the biv.  */
2388
2389       *increment = biv_total_increment (bl, loop_start, loop_end);
2390       if (*increment)
2391         *increment = fold_rtx_mult_add (v->mult_val, *increment, const0_rtx,
2392                                         v->mode);
2393 #endif
2394     }
2395   else
2396     {
2397       if (loop_dump_stream)
2398         fprintf (loop_dump_stream,
2399                  "Loop unrolling: Not basic or general induction var.\n");
2400       return;
2401     }
2402 }
2403
2404 /* Calculate the approximate final value of the iteration variable
2405    which has an loop exit test with code COMPARISON_CODE and comparison value
2406    of COMPARISON_VALUE.  Also returns an indication of whether the comparison
2407    was signed or unsigned, and the direction of the comparison.  This info is
2408    needed to calculate the number of loop iterations.  */
2409
2410 static rtx
2411 approx_final_value (comparison_code, comparison_value, unsigned_p, compare_dir)
2412      enum rtx_code comparison_code;
2413      rtx comparison_value;
2414      int *unsigned_p;
2415      int *compare_dir;
2416 {
2417   /* Calculate the final value of the induction variable.
2418      The exact final value depends on the branch operator, and increment sign.
2419      This is only an approximate value.  It will be wrong if the iteration
2420      variable is not incremented by one each time through the loop, and
2421      approx final value - start value % increment != 0.  */
2422
2423   *unsigned_p = 0;
2424   switch (comparison_code)
2425     {
2426     case LEU:
2427       *unsigned_p = 1;
2428     case LE:
2429       *compare_dir = 1;
2430       return plus_constant (comparison_value, 1);
2431     case GEU:
2432       *unsigned_p = 1;
2433     case GE:
2434       *compare_dir = -1;
2435       return plus_constant (comparison_value, -1);
2436     case EQ:
2437       /* Can not calculate a final value for this case.  */
2438       *compare_dir = 0;
2439       return 0;
2440     case LTU:
2441       *unsigned_p = 1;
2442     case LT:
2443       *compare_dir = 1;
2444       return comparison_value;
2445       break;
2446     case GTU:
2447       *unsigned_p = 1;
2448     case GT:
2449       *compare_dir = -1;
2450       return comparison_value;
2451     case NE:
2452       *compare_dir = 0;
2453       return comparison_value;
2454     default:
2455       abort ();
2456     }
2457 }
2458
2459 /* For each biv and giv, determine whether it can be safely split into
2460    a different variable for each unrolled copy of the loop body.  If it
2461    is safe to split, then indicate that by saving some useful info
2462    in the splittable_regs array.
2463
2464    If the loop is being completely unrolled, then splittable_regs will hold
2465    the current value of the induction variable while the loop is unrolled.
2466    It must be set to the initial value of the induction variable here.
2467    Otherwise, splittable_regs will hold the difference between the current
2468    value of the induction variable and the value the induction variable had
2469    at the top of the loop.  It must be set to the value 0 here.
2470
2471    Returns the total number of instructions that set registers that are
2472    splittable.  */
2473
2474 /* ?? If the loop is only unrolled twice, then most of the restrictions to
2475    constant values are unnecessary, since we can easily calculate increment
2476    values in this case even if nothing is constant.  The increment value
2477    should not involve a multiply however.  */
2478
2479 /* ?? Even if the biv/giv increment values aren't constant, it may still
2480    be beneficial to split the variable if the loop is only unrolled a few
2481    times, since multiplies by small integers (1,2,3,4) are very cheap.  */
2482
2483 static int
2484 find_splittable_regs (unroll_type, loop_start, loop_end, end_insert_before,
2485                      unroll_number)
2486      enum unroll_types unroll_type;
2487      rtx loop_start, loop_end;
2488      rtx end_insert_before;
2489      int unroll_number;
2490 {
2491   struct iv_class *bl;
2492   struct induction *v;
2493   rtx increment, tem;
2494   rtx biv_final_value;
2495   int biv_splittable;
2496   int result = 0;
2497
2498   for (bl = loop_iv_list; bl; bl = bl->next)
2499     {
2500       /* Biv_total_increment must return a constant value,
2501          otherwise we can not calculate the split values.  */
2502
2503       increment = biv_total_increment (bl, loop_start, loop_end);
2504       if (! increment || GET_CODE (increment) != CONST_INT)
2505         continue;
2506
2507       /* The loop must be unrolled completely, or else have a known number
2508          of iterations and only one exit, or else the biv must be dead
2509          outside the loop, or else the final value must be known.  Otherwise,
2510          it is unsafe to split the biv since it may not have the proper
2511          value on loop exit.  */
2512
2513       /* loop_number_exit_count is non-zero if the loop has an exit other than
2514          a fall through at the end.  */
2515
2516       biv_splittable = 1;
2517       biv_final_value = 0;
2518       if (unroll_type != UNROLL_COMPLETELY
2519           && (loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
2520               || unroll_type == UNROLL_NAIVE)
2521           && (uid_luid[REGNO_LAST_UID (bl->regno)] >= INSN_LUID (loop_end)
2522               || ! bl->init_insn
2523               || INSN_UID (bl->init_insn) >= max_uid_for_loop
2524               || (uid_luid[REGNO_FIRST_UID (bl->regno)]
2525                   < INSN_LUID (bl->init_insn))
2526               || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
2527           && ! (biv_final_value = final_biv_value (bl, loop_start, loop_end)))
2528         biv_splittable = 0;
2529
2530       /* If any of the insns setting the BIV don't do so with a simple
2531          PLUS, we don't know how to split it.  */
2532       for (v = bl->biv; biv_splittable && v; v = v->next_iv)
2533         if ((tem = single_set (v->insn)) == 0
2534             || GET_CODE (SET_DEST (tem)) != REG
2535             || REGNO (SET_DEST (tem)) != bl->regno
2536             || GET_CODE (SET_SRC (tem)) != PLUS)
2537           biv_splittable = 0;
2538
2539       /* If final value is non-zero, then must emit an instruction which sets
2540          the value of the biv to the proper value.  This is done after
2541          handling all of the givs, since some of them may need to use the
2542          biv's value in their initialization code.  */
2543
2544       /* This biv is splittable.  If completely unrolling the loop, save
2545          the biv's initial value.  Otherwise, save the constant zero.  */
2546
2547       if (biv_splittable == 1)
2548         {
2549           if (unroll_type == UNROLL_COMPLETELY)
2550             {
2551               /* If the initial value of the biv is itself (i.e. it is too
2552                  complicated for strength_reduce to compute), or is a hard
2553                  register, or it isn't invariant, then we must create a new
2554                  pseudo reg to hold the initial value of the biv.  */
2555
2556               if (GET_CODE (bl->initial_value) == REG
2557                   && (REGNO (bl->initial_value) == bl->regno
2558                       || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER
2559                       || ! invariant_p (bl->initial_value)))
2560                 {
2561                   rtx tem = gen_reg_rtx (bl->biv->mode);
2562
2563                   record_base_value (REGNO (tem), bl->biv->add_val);
2564                   emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2565                                     loop_start);
2566
2567                   if (loop_dump_stream)
2568                     fprintf (loop_dump_stream, "Biv %d initial value remapped to %d.\n",
2569                              bl->regno, REGNO (tem));
2570
2571                   splittable_regs[bl->regno] = tem;
2572                 }
2573               else
2574                 splittable_regs[bl->regno] = bl->initial_value;
2575             }
2576           else
2577             splittable_regs[bl->regno] = const0_rtx;
2578
2579           /* Save the number of instructions that modify the biv, so that
2580              we can treat the last one specially.  */
2581
2582           splittable_regs_updates[bl->regno] = bl->biv_count;
2583           result += bl->biv_count;
2584
2585           if (loop_dump_stream)
2586             fprintf (loop_dump_stream,
2587                      "Biv %d safe to split.\n", bl->regno);
2588         }
2589
2590       /* Check every giv that depends on this biv to see whether it is
2591          splittable also.  Even if the biv isn't splittable, givs which
2592          depend on it may be splittable if the biv is live outside the
2593          loop, and the givs aren't.  */
2594
2595       result += find_splittable_givs (bl, unroll_type, loop_start, loop_end,
2596                                      increment, unroll_number);
2597
2598       /* If final value is non-zero, then must emit an instruction which sets
2599          the value of the biv to the proper value.  This is done after
2600          handling all of the givs, since some of them may need to use the
2601          biv's value in their initialization code.  */
2602       if (biv_final_value)
2603         {
2604           /* If the loop has multiple exits, emit the insns before the
2605              loop to ensure that it will always be executed no matter
2606              how the loop exits.  Otherwise emit the insn after the loop,
2607              since this is slightly more efficient.  */
2608           if (! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]])
2609             emit_insn_before (gen_move_insn (bl->biv->src_reg,
2610                                              biv_final_value),
2611                               end_insert_before);
2612           else
2613             {
2614               /* Create a new register to hold the value of the biv, and then
2615                  set the biv to its final value before the loop start.  The biv
2616                  is set to its final value before loop start to ensure that
2617                  this insn will always be executed, no matter how the loop
2618                  exits.  */
2619               rtx tem = gen_reg_rtx (bl->biv->mode);
2620               record_base_value (REGNO (tem), bl->biv->add_val);
2621
2622               emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2623                                 loop_start);
2624               emit_insn_before (gen_move_insn (bl->biv->src_reg,
2625                                                biv_final_value),
2626                                 loop_start);
2627
2628               if (loop_dump_stream)
2629                 fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n",
2630                          REGNO (bl->biv->src_reg), REGNO (tem));
2631
2632               /* Set up the mapping from the original biv register to the new
2633                  register.  */
2634               bl->biv->src_reg = tem;
2635             }
2636         }
2637     }
2638   return result;
2639 }
2640
2641 /* Return 1 if the first and last unrolled copy of the address giv V is valid
2642    for the instruction that is using it.  Do not make any changes to that
2643    instruction.  */
2644
2645 static int
2646 verify_addresses (v, giv_inc, unroll_number)
2647      struct induction *v;
2648      rtx giv_inc;
2649      int unroll_number;
2650 {
2651   int ret = 1;
2652   rtx orig_addr = *v->location;
2653   rtx last_addr = plus_constant (v->dest_reg,
2654                                  INTVAL (giv_inc) * (unroll_number - 1));
2655
2656   /* First check to see if either address would fail.   Handle the fact
2657      that we have may have a match_dup.  */
2658   if (! validate_replace_rtx (*v->location, v->dest_reg, v->insn)
2659       || ! validate_replace_rtx (*v->location, last_addr, v->insn))
2660     ret = 0;
2661
2662   /* Now put things back the way they were before.  This will always
2663    succeed.  */
2664   validate_change (v->insn, v->location, orig_addr, 0);
2665
2666   return ret;
2667 }
2668
2669 /* For every giv based on the biv BL, check to determine whether it is
2670    splittable.  This is a subroutine to find_splittable_regs ().
2671
2672    Return the number of instructions that set splittable registers.  */
2673
2674 static int
2675 find_splittable_givs (bl, unroll_type, loop_start, loop_end, increment,
2676                       unroll_number)
2677      struct iv_class *bl;
2678      enum unroll_types unroll_type;
2679      rtx loop_start, loop_end;
2680      rtx increment;
2681      int unroll_number;
2682 {
2683   struct induction *v, *v2;
2684   rtx final_value;
2685   rtx tem;
2686   int result = 0;
2687
2688   /* Scan the list of givs, and set the same_insn field when there are
2689      multiple identical givs in the same insn.  */
2690   for (v = bl->giv; v; v = v->next_iv)
2691     for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2692       if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg)
2693           && ! v2->same_insn)
2694         v2->same_insn = v;
2695
2696   for (v = bl->giv; v; v = v->next_iv)
2697     {
2698       rtx giv_inc, value;
2699
2700       /* Only split the giv if it has already been reduced, or if the loop is
2701          being completely unrolled.  */
2702       if (unroll_type != UNROLL_COMPLETELY && v->ignore)
2703         continue;
2704
2705       /* The giv can be split if the insn that sets the giv is executed once
2706          and only once on every iteration of the loop.  */
2707       /* An address giv can always be split.  v->insn is just a use not a set,
2708          and hence it does not matter whether it is always executed.  All that
2709          matters is that all the biv increments are always executed, and we
2710          won't reach here if they aren't.  */
2711       if (v->giv_type != DEST_ADDR
2712           && (! v->always_computable
2713               || back_branch_in_range_p (v->insn, loop_start, loop_end)))
2714         continue;
2715
2716       /* The giv increment value must be a constant.  */
2717       giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2718                                    v->mode);
2719       if (! giv_inc || GET_CODE (giv_inc) != CONST_INT)
2720         continue;
2721
2722       /* The loop must be unrolled completely, or else have a known number of
2723          iterations and only one exit, or else the giv must be dead outside
2724          the loop, or else the final value of the giv must be known.
2725          Otherwise, it is not safe to split the giv since it may not have the
2726          proper value on loop exit.  */
2727
2728       /* The used outside loop test will fail for DEST_ADDR givs.  They are
2729          never used outside the loop anyways, so it is always safe to split a
2730          DEST_ADDR giv.  */
2731
2732       final_value = 0;
2733       if (unroll_type != UNROLL_COMPLETELY
2734           && (loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
2735               || unroll_type == UNROLL_NAIVE)
2736           && v->giv_type != DEST_ADDR
2737           /* The next part is true if the pseudo is used outside the loop.
2738              We assume that this is true for any pseudo created after loop
2739              starts, because we don't have a reg_n_info entry for them.  */
2740           && (REGNO (v->dest_reg) >= max_reg_before_loop
2741               || (REGNO_FIRST_UID (REGNO (v->dest_reg)) != INSN_UID (v->insn)
2742                   /* Check for the case where the pseudo is set by a shift/add
2743                      sequence, in which case the first insn setting the pseudo
2744                      is the first insn of the shift/add sequence.  */
2745                   && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX))
2746                       || (REGNO_FIRST_UID (REGNO (v->dest_reg))
2747                           != INSN_UID (XEXP (tem, 0)))))
2748               /* Line above always fails if INSN was moved by loop opt.  */
2749               || (uid_luid[REGNO_LAST_UID (REGNO (v->dest_reg))]
2750                   >= INSN_LUID (loop_end)))
2751           && ! (final_value = v->final_value))
2752         continue;
2753
2754 #if 0
2755       /* Currently, non-reduced/final-value givs are never split.  */
2756       /* Should emit insns after the loop if possible, as the biv final value
2757          code below does.  */
2758
2759       /* If the final value is non-zero, and the giv has not been reduced,
2760          then must emit an instruction to set the final value.  */
2761       if (final_value && !v->new_reg)
2762         {
2763           /* Create a new register to hold the value of the giv, and then set
2764              the giv to its final value before the loop start.  The giv is set
2765              to its final value before loop start to ensure that this insn
2766              will always be executed, no matter how we exit.  */
2767           tem = gen_reg_rtx (v->mode);
2768           emit_insn_before (gen_move_insn (tem, v->dest_reg), loop_start);
2769           emit_insn_before (gen_move_insn (v->dest_reg, final_value),
2770                             loop_start);
2771
2772           if (loop_dump_stream)
2773             fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n",
2774                      REGNO (v->dest_reg), REGNO (tem));
2775
2776           v->src_reg = tem;
2777         }
2778 #endif
2779
2780       /* This giv is splittable.  If completely unrolling the loop, save the
2781          giv's initial value.  Otherwise, save the constant zero for it.  */
2782
2783       if (unroll_type == UNROLL_COMPLETELY)
2784         {
2785           /* It is not safe to use bl->initial_value here, because it may not
2786              be invariant.  It is safe to use the initial value stored in
2787              the splittable_regs array if it is set.  In rare cases, it won't
2788              be set, so then we do exactly the same thing as
2789              find_splittable_regs does to get a safe value.  */
2790           rtx biv_initial_value;
2791
2792           if (splittable_regs[bl->regno])
2793             biv_initial_value = splittable_regs[bl->regno];
2794           else if (GET_CODE (bl->initial_value) != REG
2795                    || (REGNO (bl->initial_value) != bl->regno
2796                        && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER))
2797             biv_initial_value = bl->initial_value;
2798           else
2799             {
2800               rtx tem = gen_reg_rtx (bl->biv->mode);
2801
2802               record_base_value (REGNO (tem), bl->biv->add_val);
2803               emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2804                                 loop_start);
2805               biv_initial_value = tem;
2806             }
2807           value = fold_rtx_mult_add (v->mult_val, biv_initial_value,
2808                                      v->add_val, v->mode);
2809         }
2810       else
2811         value = const0_rtx;
2812
2813       if (v->new_reg)
2814         {
2815           /* If a giv was combined with another giv, then we can only split
2816              this giv if the giv it was combined with was reduced.  This
2817              is because the value of v->new_reg is meaningless in this
2818              case.  */
2819           if (v->same && ! v->same->new_reg)
2820             {
2821               if (loop_dump_stream)
2822                 fprintf (loop_dump_stream,
2823                          "giv combined with unreduced giv not split.\n");
2824               continue;
2825             }
2826           /* If the giv is an address destination, it could be something other
2827              than a simple register, these have to be treated differently.  */
2828           else if (v->giv_type == DEST_REG)
2829             {
2830               /* If value is not a constant, register, or register plus
2831                  constant, then compute its value into a register before
2832                  loop start.  This prevents invalid rtx sharing, and should
2833                  generate better code.  We can use bl->initial_value here
2834                  instead of splittable_regs[bl->regno] because this code
2835                  is going before the loop start.  */
2836               if (unroll_type == UNROLL_COMPLETELY
2837                   && GET_CODE (value) != CONST_INT
2838                   && GET_CODE (value) != REG
2839                   && (GET_CODE (value) != PLUS
2840                       || GET_CODE (XEXP (value, 0)) != REG
2841                       || GET_CODE (XEXP (value, 1)) != CONST_INT))
2842                 {
2843                   rtx tem = gen_reg_rtx (v->mode);
2844                   record_base_value (REGNO (tem), v->add_val);
2845                   emit_iv_add_mult (bl->initial_value, v->mult_val,
2846                                     v->add_val, tem, loop_start);
2847                   value = tem;
2848                 }
2849
2850               splittable_regs[REGNO (v->new_reg)] = value;
2851             }
2852           else
2853             {
2854               /* Splitting address givs is useful since it will often allow us
2855                  to eliminate some increment insns for the base giv as
2856                  unnecessary.  */
2857
2858               /* If the addr giv is combined with a dest_reg giv, then all
2859                  references to that dest reg will be remapped, which is NOT
2860                  what we want for split addr regs. We always create a new
2861                  register for the split addr giv, just to be safe.  */
2862
2863               /* If we have multiple identical address givs within a
2864                  single instruction, then use a single pseudo reg for
2865                  both.  This is necessary in case one is a match_dup
2866                  of the other.  */
2867
2868               v->const_adjust = 0;
2869
2870               if (v->same_insn)
2871                 {
2872                   v->dest_reg = v->same_insn->dest_reg;
2873                   if (loop_dump_stream)
2874                     fprintf (loop_dump_stream,
2875                              "Sharing address givs in insn %d\n",
2876                              INSN_UID (v->insn));
2877                 }
2878               /* If multiple address GIVs have been combined with the
2879                  same dest_reg GIV, do not create a new register for
2880                  each.  */
2881               else if (unroll_type != UNROLL_COMPLETELY
2882                        && v->giv_type == DEST_ADDR
2883                        && v->same && v->same->giv_type == DEST_ADDR
2884                        && v->same->unrolled
2885                        /* combine_givs_p may return true for some cases
2886                           where the add and mult values are not equal.
2887                           To share a register here, the values must be
2888                           equal.  */
2889                        && rtx_equal_p (v->same->mult_val, v->mult_val)
2890                        && rtx_equal_p (v->same->add_val, v->add_val))
2891
2892                 {
2893                   v->dest_reg = v->same->dest_reg;
2894                   v->shared = 1;
2895                 }
2896               else if (unroll_type != UNROLL_COMPLETELY)
2897                 {
2898                   /* If not completely unrolling the loop, then create a new
2899                      register to hold the split value of the DEST_ADDR giv.
2900                      Emit insn to initialize its value before loop start.  */
2901
2902                   rtx tem = gen_reg_rtx (v->mode);
2903                   record_base_value (REGNO (tem), v->add_val);
2904                   v->unrolled = 1;
2905
2906                   /* If the address giv has a constant in its new_reg value,
2907                      then this constant can be pulled out and put in value,
2908                      instead of being part of the initialization code.  */
2909
2910                   if (GET_CODE (v->new_reg) == PLUS
2911                       && GET_CODE (XEXP (v->new_reg, 1)) == CONST_INT)
2912                     {
2913                       v->dest_reg
2914                         = plus_constant (tem, INTVAL (XEXP (v->new_reg,1)));
2915
2916                       /* Only succeed if this will give valid addresses.
2917                          Try to validate both the first and the last
2918                          address resulting from loop unrolling, if
2919                          one fails, then can't do const elim here.  */
2920                       if (verify_addresses (v, giv_inc, unroll_number))
2921                         {
2922                           /* Save the negative of the eliminated const, so
2923                              that we can calculate the dest_reg's increment
2924                              value later.  */
2925                           v->const_adjust = - INTVAL (XEXP (v->new_reg, 1));
2926
2927                           v->new_reg = XEXP (v->new_reg, 0);
2928                           if (loop_dump_stream)
2929                             fprintf (loop_dump_stream,
2930                                      "Eliminating constant from giv %d\n",
2931                                      REGNO (tem));
2932                         }
2933                       else
2934                         v->dest_reg = tem;
2935                     }
2936                   else
2937                     v->dest_reg = tem;
2938
2939                   /* If the address hasn't been checked for validity yet, do so
2940                      now, and fail completely if either the first or the last
2941                      unrolled copy of the address is not a valid address
2942                      for the instruction that uses it.  */
2943                   if (v->dest_reg == tem
2944                       && ! verify_addresses (v, giv_inc, unroll_number))
2945                     {
2946                       for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2947                         if (v2->same_insn == v)
2948                           v2->same_insn = 0;
2949
2950                       if (loop_dump_stream)
2951                         fprintf (loop_dump_stream,
2952                                  "Invalid address for giv at insn %d\n",
2953                                  INSN_UID (v->insn));
2954                       continue;
2955                     }
2956
2957                   /* To initialize the new register, just move the value of
2958                      new_reg into it.  This is not guaranteed to give a valid
2959                      instruction on machines with complex addressing modes.
2960                      If we can't recognize it, then delete it and emit insns
2961                      to calculate the value from scratch.  */
2962                   emit_insn_before (gen_rtx_SET (VOIDmode, tem,
2963                                                  copy_rtx (v->new_reg)),
2964                                     loop_start);
2965                   if (recog_memoized (PREV_INSN (loop_start)) < 0)
2966                     {
2967                       rtx sequence, ret;
2968
2969                       /* We can't use bl->initial_value to compute the initial
2970                          value, because the loop may have been preconditioned.
2971                          We must calculate it from NEW_REG.  Try using
2972                          force_operand instead of emit_iv_add_mult.  */
2973                       delete_insn (PREV_INSN (loop_start));
2974
2975                       start_sequence ();
2976                       ret = force_operand (v->new_reg, tem);
2977                       if (ret != tem)
2978                         emit_move_insn (tem, ret);
2979                       sequence = gen_sequence ();
2980                       end_sequence ();
2981                       emit_insn_before (sequence, loop_start);
2982
2983                       if (loop_dump_stream)
2984                         fprintf (loop_dump_stream,
2985                                  "Invalid init insn, rewritten.\n");
2986                     }
2987                 }
2988               else
2989                 {
2990                   v->dest_reg = value;
2991
2992                   /* Check the resulting address for validity, and fail
2993                      if the resulting address would be invalid.  */
2994                   if (! verify_addresses (v, giv_inc, unroll_number))
2995                     {
2996                       for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2997                         if (v2->same_insn == v)
2998                           v2->same_insn = 0;
2999
3000                       if (loop_dump_stream)
3001                         fprintf (loop_dump_stream,
3002                                  "Invalid address for giv at insn %d\n",
3003                                  INSN_UID (v->insn));
3004                       continue;
3005                     }
3006                 }
3007
3008               /* Store the value of dest_reg into the insn.  This sharing
3009                  will not be a problem as this insn will always be copied
3010                  later.  */
3011
3012               *v->location = v->dest_reg;
3013
3014               /* If this address giv is combined with a dest reg giv, then
3015                  save the base giv's induction pointer so that we will be
3016                  able to handle this address giv properly.  The base giv
3017                  itself does not have to be splittable.  */
3018
3019               if (v->same && v->same->giv_type == DEST_REG)
3020                 addr_combined_regs[REGNO (v->same->new_reg)] = v->same;
3021
3022               if (GET_CODE (v->new_reg) == REG)
3023                 {
3024                   /* This giv maybe hasn't been combined with any others.
3025                      Make sure that it's giv is marked as splittable here.  */
3026
3027                   splittable_regs[REGNO (v->new_reg)] = value;
3028
3029                   /* Make it appear to depend upon itself, so that the
3030                      giv will be properly split in the main loop above.  */
3031                   if (! v->same)
3032                     {
3033                       v->same = v;
3034                       addr_combined_regs[REGNO (v->new_reg)] = v;
3035                     }
3036                 }
3037
3038               if (loop_dump_stream)
3039                 fprintf (loop_dump_stream, "DEST_ADDR giv being split.\n");
3040             }
3041         }
3042       else
3043         {
3044 #if 0
3045           /* Currently, unreduced giv's can't be split.  This is not too much
3046              of a problem since unreduced giv's are not live across loop
3047              iterations anyways.  When unrolling a loop completely though,
3048              it makes sense to reduce&split givs when possible, as this will
3049              result in simpler instructions, and will not require that a reg
3050              be live across loop iterations.  */
3051
3052           splittable_regs[REGNO (v->dest_reg)] = value;
3053           fprintf (stderr, "Giv %d at insn %d not reduced\n",
3054                    REGNO (v->dest_reg), INSN_UID (v->insn));
3055 #else
3056           continue;
3057 #endif
3058         }
3059
3060       /* Unreduced givs are only updated once by definition.  Reduced givs
3061          are updated as many times as their biv is.  Mark it so if this is
3062          a splittable register.  Don't need to do anything for address givs
3063          where this may not be a register.  */
3064
3065       if (GET_CODE (v->new_reg) == REG)
3066         {
3067           int count = 1;
3068           if (! v->ignore)
3069             count = reg_biv_class[REGNO (v->src_reg)]->biv_count;
3070
3071           splittable_regs_updates[REGNO (v->new_reg)] = count;
3072         }
3073
3074       result++;
3075
3076       if (loop_dump_stream)
3077         {
3078           int regnum;
3079
3080           if (GET_CODE (v->dest_reg) == CONST_INT)
3081             regnum = -1;
3082           else if (GET_CODE (v->dest_reg) != REG)
3083             regnum = REGNO (XEXP (v->dest_reg, 0));
3084           else
3085             regnum = REGNO (v->dest_reg);
3086           fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n",
3087                    regnum, INSN_UID (v->insn));
3088         }
3089     }
3090
3091   return result;
3092 }
3093 \f
3094 /* Try to prove that the register is dead after the loop exits.  Trace every
3095    loop exit looking for an insn that will always be executed, which sets
3096    the register to some value, and appears before the first use of the register
3097    is found.  If successful, then return 1, otherwise return 0.  */
3098
3099 /* ?? Could be made more intelligent in the handling of jumps, so that
3100    it can search past if statements and other similar structures.  */
3101
3102 static int
3103 reg_dead_after_loop (reg, loop_start, loop_end)
3104      rtx reg, loop_start, loop_end;
3105 {
3106   rtx insn, label;
3107   enum rtx_code code;
3108   int jump_count = 0;
3109   int label_count = 0;
3110   int this_loop_num = uid_loop_num[INSN_UID (loop_start)];
3111
3112   /* In addition to checking all exits of this loop, we must also check
3113      all exits of inner nested loops that would exit this loop.  We don't
3114      have any way to identify those, so we just give up if there are any
3115      such inner loop exits.  */
3116
3117   for (label = loop_number_exit_labels[this_loop_num]; label;
3118        label = LABEL_NEXTREF (label))
3119     label_count++;
3120
3121   if (label_count != loop_number_exit_count[this_loop_num])
3122     return 0;
3123
3124   /* HACK: Must also search the loop fall through exit, create a label_ref
3125      here which points to the loop_end, and append the loop_number_exit_labels
3126      list to it.  */
3127   label = gen_rtx_LABEL_REF (VOIDmode, loop_end);
3128   LABEL_NEXTREF (label) = loop_number_exit_labels[this_loop_num];
3129
3130   for ( ; label; label = LABEL_NEXTREF (label))
3131     {
3132       /* Succeed if find an insn which sets the biv or if reach end of
3133          function.  Fail if find an insn that uses the biv, or if come to
3134          a conditional jump.  */
3135
3136       insn = NEXT_INSN (XEXP (label, 0));
3137       while (insn)
3138         {
3139           code = GET_CODE (insn);
3140           if (GET_RTX_CLASS (code) == 'i')
3141             {
3142               rtx set;
3143
3144               if (reg_referenced_p (reg, PATTERN (insn)))
3145                 return 0;
3146
3147               set = single_set (insn);
3148               if (set && rtx_equal_p (SET_DEST (set), reg))
3149                 break;
3150             }
3151
3152           if (code == JUMP_INSN)
3153             {
3154               if (GET_CODE (PATTERN (insn)) == RETURN)
3155                 break;
3156               else if (! simplejump_p (insn)
3157                        /* Prevent infinite loop following infinite loops.  */
3158                        || jump_count++ > 20)
3159                 return 0;
3160               else
3161                 insn = JUMP_LABEL (insn);
3162             }
3163
3164           insn = NEXT_INSN (insn);
3165         }
3166     }
3167
3168   /* Success, the register is dead on all loop exits.  */
3169   return 1;
3170 }
3171
3172 /* Try to calculate the final value of the biv, the value it will have at
3173    the end of the loop.  If we can do it, return that value.  */
3174
3175 rtx
3176 final_biv_value (bl, loop_start, loop_end)
3177      struct iv_class *bl;
3178      rtx loop_start, loop_end;
3179 {
3180   rtx increment, tem;
3181
3182   /* ??? This only works for MODE_INT biv's.  Reject all others for now.  */
3183
3184   if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
3185     return 0;
3186
3187   /* The final value for reversed bivs must be calculated differently than
3188       for ordinary bivs.  In this case, there is already an insn after the
3189      loop which sets this biv's final value (if necessary), and there are
3190      no other loop exits, so we can return any value.  */
3191   if (bl->reversed)
3192     {
3193       if (loop_dump_stream)
3194         fprintf (loop_dump_stream,
3195                  "Final biv value for %d, reversed biv.\n", bl->regno);
3196
3197       return const0_rtx;
3198     }
3199
3200   /* Try to calculate the final value as initial value + (number of iterations
3201      * increment).  For this to work, increment must be invariant, the only
3202      exit from the loop must be the fall through at the bottom (otherwise
3203      it may not have its final value when the loop exits), and the initial
3204      value of the biv must be invariant.  */
3205
3206   if (loop_n_iterations != 0
3207       && ! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
3208       && invariant_p (bl->initial_value))
3209     {
3210       increment = biv_total_increment (bl, loop_start, loop_end);
3211
3212       if (increment && invariant_p (increment))
3213         {
3214           /* Can calculate the loop exit value, emit insns after loop
3215              end to calculate this value into a temporary register in
3216              case it is needed later.  */
3217
3218           tem = gen_reg_rtx (bl->biv->mode);
3219           record_base_value (REGNO (tem), bl->biv->add_val);
3220           /* Make sure loop_end is not the last insn.  */
3221           if (NEXT_INSN (loop_end) == 0)
3222             emit_note_after (NOTE_INSN_DELETED, loop_end);
3223           emit_iv_add_mult (increment, GEN_INT (loop_n_iterations),
3224                             bl->initial_value, tem, NEXT_INSN (loop_end));
3225
3226           if (loop_dump_stream)
3227             fprintf (loop_dump_stream,
3228                      "Final biv value for %d, calculated.\n", bl->regno);
3229
3230           return tem;
3231         }
3232     }
3233
3234   /* Check to see if the biv is dead at all loop exits.  */
3235   if (reg_dead_after_loop (bl->biv->src_reg, loop_start, loop_end))
3236     {
3237       if (loop_dump_stream)
3238         fprintf (loop_dump_stream,
3239                  "Final biv value for %d, biv dead after loop exit.\n",
3240                  bl->regno);
3241
3242       return const0_rtx;
3243     }
3244
3245   return 0;
3246 }
3247
3248 /* Try to calculate the final value of the giv, the value it will have at
3249    the end of the loop.  If we can do it, return that value.  */
3250
3251 rtx
3252 final_giv_value (v, loop_start, loop_end)
3253      struct induction *v;
3254      rtx loop_start, loop_end;
3255 {
3256   struct iv_class *bl;
3257   rtx insn;
3258   rtx increment, tem;
3259   rtx insert_before, seq;
3260
3261   bl = reg_biv_class[REGNO (v->src_reg)];
3262
3263   /* The final value for givs which depend on reversed bivs must be calculated
3264      differently than for ordinary givs.  In this case, there is already an
3265      insn after the loop which sets this giv's final value (if necessary),
3266      and there are no other loop exits, so we can return any value.  */
3267   if (bl->reversed)
3268     {
3269       if (loop_dump_stream)
3270         fprintf (loop_dump_stream,
3271                  "Final giv value for %d, depends on reversed biv\n",
3272                  REGNO (v->dest_reg));
3273       return const0_rtx;
3274     }
3275
3276   /* Try to calculate the final value as a function of the biv it depends
3277      upon.  The only exit from the loop must be the fall through at the bottom
3278      (otherwise it may not have its final value when the loop exits).  */
3279
3280   /* ??? Can calculate the final giv value by subtracting off the
3281      extra biv increments times the giv's mult_val.  The loop must have
3282      only one exit for this to work, but the loop iterations does not need
3283      to be known.  */
3284
3285   if (loop_n_iterations != 0
3286       && ! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]])
3287     {
3288       /* ?? It is tempting to use the biv's value here since these insns will
3289          be put after the loop, and hence the biv will have its final value
3290          then.  However, this fails if the biv is subsequently eliminated.
3291          Perhaps determine whether biv's are eliminable before trying to
3292          determine whether giv's are replaceable so that we can use the
3293          biv value here if it is not eliminable.  */
3294
3295       /* We are emitting code after the end of the loop, so we must make
3296          sure that bl->initial_value is still valid then.  It will still
3297          be valid if it is invariant.  */
3298
3299       increment = biv_total_increment (bl, loop_start, loop_end);
3300
3301       if (increment && invariant_p (increment)
3302           && invariant_p (bl->initial_value))
3303         {
3304           /* Can calculate the loop exit value of its biv as
3305              (loop_n_iterations * increment) + initial_value */
3306
3307           /* The loop exit value of the giv is then
3308              (final_biv_value - extra increments) * mult_val + add_val.
3309              The extra increments are any increments to the biv which
3310              occur in the loop after the giv's value is calculated.
3311              We must search from the insn that sets the giv to the end
3312              of the loop to calculate this value.  */
3313
3314           insert_before = NEXT_INSN (loop_end);
3315
3316           /* Put the final biv value in tem.  */
3317           tem = gen_reg_rtx (bl->biv->mode);
3318           record_base_value (REGNO (tem), bl->biv->add_val);
3319           emit_iv_add_mult (increment, GEN_INT (loop_n_iterations),
3320                             bl->initial_value, tem, insert_before);
3321
3322           /* Subtract off extra increments as we find them.  */
3323           for (insn = NEXT_INSN (v->insn); insn != loop_end;
3324                insn = NEXT_INSN (insn))
3325             {
3326               struct induction *biv;
3327
3328               for (biv = bl->biv; biv; biv = biv->next_iv)
3329                 if (biv->insn == insn)
3330                   {
3331                     start_sequence ();
3332                     tem = expand_binop (GET_MODE (tem), sub_optab, tem,
3333                                         biv->add_val, NULL_RTX, 0,
3334                                         OPTAB_LIB_WIDEN);
3335                     seq = gen_sequence ();
3336                     end_sequence ();
3337                     emit_insn_before (seq, insert_before);
3338                   }
3339             }
3340
3341           /* Now calculate the giv's final value.  */
3342           emit_iv_add_mult (tem, v->mult_val, v->add_val, tem,
3343                             insert_before);
3344
3345           if (loop_dump_stream)
3346             fprintf (loop_dump_stream,
3347                      "Final giv value for %d, calc from biv's value.\n",
3348                      REGNO (v->dest_reg));
3349
3350           return tem;
3351         }
3352     }
3353
3354   /* Replaceable giv's should never reach here.  */
3355   if (v->replaceable)
3356     abort ();
3357
3358   /* Check to see if the biv is dead at all loop exits.  */
3359   if (reg_dead_after_loop (v->dest_reg, loop_start, loop_end))
3360     {
3361       if (loop_dump_stream)
3362         fprintf (loop_dump_stream,
3363                  "Final giv value for %d, giv dead after loop exit.\n",
3364                  REGNO (v->dest_reg));
3365
3366       return const0_rtx;
3367     }
3368
3369   return 0;
3370 }
3371
3372
3373 /* Calculate the number of loop iterations.  Returns the exact number of loop
3374    iterations if it can be calculated, otherwise returns zero.  */
3375
3376 unsigned HOST_WIDE_INT
3377 loop_iterations (loop_start, loop_end)
3378      rtx loop_start, loop_end;
3379 {
3380   rtx comparison, comparison_value;
3381   rtx iteration_var, initial_value, increment, final_value;
3382   enum rtx_code comparison_code;
3383   HOST_WIDE_INT i;
3384   int increment_dir;
3385   int unsigned_compare, compare_dir, final_larger;
3386   unsigned long tempu;
3387   rtx last_loop_insn;
3388
3389   /* First find the iteration variable.  If the last insn is a conditional
3390      branch, and the insn before tests a register value, make that the
3391      iteration variable.  */
3392
3393   loop_initial_value = 0;
3394   loop_increment = 0;
3395   loop_final_value = 0;
3396   loop_iteration_var = 0;
3397
3398   /* We used to use pren_nonnote_insn here, but that fails because it might
3399      accidentally get the branch for a contained loop if the branch for this
3400      loop was deleted.  We can only trust branches immediately before the
3401      loop_end.  */
3402   last_loop_insn = PREV_INSN (loop_end);
3403
3404   comparison = get_condition_for_loop (last_loop_insn);
3405   if (comparison == 0)
3406     {
3407       if (loop_dump_stream)
3408         fprintf (loop_dump_stream,
3409                  "Loop unrolling: No final conditional branch found.\n");
3410       return 0;
3411     }
3412
3413   /* ??? Get_condition may switch position of induction variable and
3414      invariant register when it canonicalizes the comparison.  */
3415
3416   comparison_code = GET_CODE (comparison);
3417   iteration_var = XEXP (comparison, 0);
3418   comparison_value = XEXP (comparison, 1);
3419
3420   if (GET_CODE (iteration_var) != REG)
3421     {
3422       if (loop_dump_stream)
3423         fprintf (loop_dump_stream,
3424                  "Loop unrolling: Comparison not against register.\n");
3425       return 0;
3426     }
3427
3428   /* Loop iterations is always called before any new registers are created
3429      now, so this should never occur.  */
3430
3431   if (REGNO (iteration_var) >= max_reg_before_loop)
3432     abort ();
3433
3434   iteration_info (iteration_var, &initial_value, &increment,
3435                   loop_start, loop_end);
3436   if (initial_value == 0)
3437     /* iteration_info already printed a message.  */
3438     return 0;
3439
3440   /* If the comparison value is an invariant register, then try to find
3441      its value from the insns before the start of the loop.  */
3442
3443   if (GET_CODE (comparison_value) == REG && invariant_p (comparison_value))
3444     {
3445       rtx insn, set;
3446
3447       for (insn = PREV_INSN (loop_start); insn ; insn = PREV_INSN (insn))
3448         {
3449           if (GET_CODE (insn) == CODE_LABEL)
3450             break;
3451
3452           else if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
3453                    && reg_set_p (comparison_value, insn))
3454             {
3455               /* We found the last insn before the loop that sets the register.
3456                  If it sets the entire register, and has a REG_EQUAL note,
3457                  then use the value of the REG_EQUAL note.  */
3458               if ((set = single_set (insn))
3459                   && (SET_DEST (set) == comparison_value))
3460                 {
3461                   rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3462
3463                   /* Only use the REG_EQUAL note if it is a constant.
3464                      Other things, divide in particular, will cause
3465                      problems later if we use them.  */
3466                   if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
3467                       && CONSTANT_P (XEXP (note, 0)))
3468                     comparison_value = XEXP (note, 0);
3469                 }
3470               break;
3471             }
3472         }
3473     }
3474
3475   final_value = approx_final_value (comparison_code, comparison_value,
3476                                     &unsigned_compare, &compare_dir);
3477
3478   /* Save the calculated values describing this loop's bounds, in case
3479      precondition_loop_p will need them later.  These values can not be
3480      recalculated inside precondition_loop_p because strength reduction
3481      optimizations may obscure the loop's structure.  */
3482
3483   loop_iteration_var = iteration_var;
3484   loop_initial_value = initial_value;
3485   loop_increment = increment;
3486   loop_final_value = final_value;
3487   loop_comparison_code = comparison_code;
3488
3489   if (increment == 0)
3490     {
3491       if (loop_dump_stream)
3492         fprintf (loop_dump_stream,
3493                  "Loop unrolling: Increment value can't be calculated.\n");
3494       return 0;
3495     }
3496   else if (GET_CODE (increment) != CONST_INT)
3497     {
3498       if (loop_dump_stream)
3499         fprintf (loop_dump_stream,
3500                  "Loop unrolling: Increment value not constant.\n");
3501       return 0;
3502     }
3503   else if (GET_CODE (initial_value) != CONST_INT)
3504     {
3505       if (loop_dump_stream)
3506         fprintf (loop_dump_stream,
3507                  "Loop unrolling: Initial value not constant.\n");
3508       return 0;
3509     }
3510   else if (final_value == 0)
3511     {
3512       if (loop_dump_stream)
3513         fprintf (loop_dump_stream,
3514                  "Loop unrolling: EQ comparison loop.\n");
3515       return 0;
3516     }
3517   else if (GET_CODE (final_value) != CONST_INT)
3518     {
3519       if (loop_dump_stream)
3520         fprintf (loop_dump_stream,
3521                  "Loop unrolling: Final value not constant.\n");
3522       return 0;
3523     }
3524
3525   /* ?? Final value and initial value do not have to be constants.
3526      Only their difference has to be constant.  When the iteration variable
3527      is an array address, the final value and initial value might both
3528      be addresses with the same base but different constant offsets.
3529      Final value must be invariant for this to work.
3530
3531      To do this, need some way to find the values of registers which are
3532      invariant.  */
3533
3534   /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1.  */
3535   if (unsigned_compare)
3536     final_larger
3537       = ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3538          > (unsigned HOST_WIDE_INT) INTVAL (initial_value))
3539         - ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3540            < (unsigned HOST_WIDE_INT) INTVAL (initial_value));
3541   else
3542     final_larger = (INTVAL (final_value) > INTVAL (initial_value))
3543       - (INTVAL (final_value) < INTVAL (initial_value));
3544
3545   if (INTVAL (increment) > 0)
3546     increment_dir = 1;
3547   else if (INTVAL (increment) == 0)
3548     increment_dir = 0;
3549   else
3550     increment_dir = -1;
3551
3552   /* There are 27 different cases: compare_dir = -1, 0, 1;
3553      final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
3554      There are 4 normal cases, 4 reverse cases (where the iteration variable
3555      will overflow before the loop exits), 4 infinite loop cases, and 15
3556      immediate exit (0 or 1 iteration depending on loop type) cases.
3557      Only try to optimize the normal cases.  */
3558
3559   /* (compare_dir/final_larger/increment_dir)
3560      Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
3561      Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
3562      Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
3563      Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
3564
3565   /* ?? If the meaning of reverse loops (where the iteration variable
3566      will overflow before the loop exits) is undefined, then could
3567      eliminate all of these special checks, and just always assume
3568      the loops are normal/immediate/infinite.  Note that this means
3569      the sign of increment_dir does not have to be known.  Also,
3570      since it does not really hurt if immediate exit loops or infinite loops
3571      are optimized, then that case could be ignored also, and hence all
3572      loops can be optimized.
3573
3574      According to ANSI Spec, the reverse loop case result is undefined,
3575      because the action on overflow is undefined.
3576
3577      See also the special test for NE loops below.  */
3578
3579   if (final_larger == increment_dir && final_larger != 0
3580       && (final_larger == compare_dir || compare_dir == 0))
3581     /* Normal case.  */
3582     ;
3583   else
3584     {
3585       if (loop_dump_stream)
3586         fprintf (loop_dump_stream,
3587                  "Loop unrolling: Not normal loop.\n");
3588       return 0;
3589     }
3590
3591   /* Calculate the number of iterations, final_value is only an approximation,
3592      so correct for that.  Note that tempu and loop_n_iterations are
3593      unsigned, because they can be as large as 2^n - 1.  */
3594
3595   i = INTVAL (increment);
3596   if (i > 0)
3597     tempu = INTVAL (final_value) - INTVAL (initial_value);
3598   else if (i < 0)
3599     {
3600       tempu = INTVAL (initial_value) - INTVAL (final_value);
3601       i = -i;
3602     }
3603   else
3604     abort ();
3605
3606   /* For NE tests, make sure that the iteration variable won't miss the
3607      final value.  If tempu mod i is not zero, then the iteration variable
3608      will overflow before the loop exits, and we can not calculate the
3609      number of iterations.  */
3610   if (compare_dir == 0 && (tempu % i) != 0)
3611     return 0;
3612
3613   return tempu / i + ((tempu % i) != 0);
3614 }
3615
3616 /* Replace uses of split bivs with their split pseudo register.  This is
3617    for original instructions which remain after loop unrolling without
3618    copying.  */
3619
3620 static rtx
3621 remap_split_bivs (x)
3622      rtx x;
3623 {
3624   register enum rtx_code code;
3625   register int i;
3626   register char *fmt;
3627
3628   if (x == 0)
3629     return x;
3630
3631   code = GET_CODE (x);
3632   switch (code)
3633     {
3634     case SCRATCH:
3635     case PC:
3636     case CC0:
3637     case CONST_INT:
3638     case CONST_DOUBLE:
3639     case CONST:
3640     case SYMBOL_REF:
3641     case LABEL_REF:
3642       return x;
3643
3644     case REG:
3645 #if 0
3646       /* If non-reduced/final-value givs were split, then this would also
3647          have to remap those givs also.  */
3648 #endif
3649       if (REGNO (x) < max_reg_before_loop
3650           && reg_iv_type[REGNO (x)] == BASIC_INDUCT)
3651         return reg_biv_class[REGNO (x)]->biv->src_reg;
3652       break;
3653
3654     default:
3655       break;
3656     }
3657
3658   fmt = GET_RTX_FORMAT (code);
3659   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3660     {
3661       if (fmt[i] == 'e')
3662         XEXP (x, i) = remap_split_bivs (XEXP (x, i));
3663       if (fmt[i] == 'E')
3664         {
3665           register int j;
3666           for (j = 0; j < XVECLEN (x, i); j++)
3667             XVECEXP (x, i, j) = remap_split_bivs (XVECEXP (x, i, j));
3668         }
3669     }
3670   return x;
3671 }
3672
3673 /* If FIRST_UID is a set of REGNO, and FIRST_UID dominates LAST_UID (e.g.
3674    FIST_UID is always executed if LAST_UID is), then return 1.  Otherwise
3675    return 0.  COPY_START is where we can start looking for the insns
3676    FIRST_UID and LAST_UID.  COPY_END is where we stop looking for these
3677    insns.
3678
3679    If there is no JUMP_INSN between LOOP_START and FIRST_UID, then FIRST_UID
3680    must dominate LAST_UID.
3681
3682    If there is a CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
3683    may not dominate LAST_UID.
3684
3685    If there is no CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
3686    must dominate LAST_UID.  */
3687
3688 int
3689 set_dominates_use (regno, first_uid, last_uid, copy_start, copy_end)
3690      int regno;
3691      int first_uid;
3692      int last_uid;
3693      rtx copy_start;
3694      rtx copy_end;
3695 {
3696   int passed_jump = 0;
3697   rtx p = NEXT_INSN (copy_start);
3698
3699   while (INSN_UID (p) != first_uid)
3700     {
3701       if (GET_CODE (p) == JUMP_INSN)
3702         passed_jump= 1;
3703       /* Could not find FIRST_UID.  */
3704       if (p == copy_end)
3705         return 0;
3706       p = NEXT_INSN (p);
3707     }
3708
3709   /* Verify that FIRST_UID is an insn that entirely sets REGNO.  */
3710   if (GET_RTX_CLASS (GET_CODE (p)) != 'i'
3711       || ! dead_or_set_regno_p (p, regno))
3712     return 0;
3713
3714   /* FIRST_UID is always executed.  */
3715   if (passed_jump == 0)
3716     return 1;
3717
3718   while (INSN_UID (p) != last_uid)
3719     {
3720       /* If we see a CODE_LABEL between FIRST_UID and LAST_UID, then we
3721          can not be sure that FIRST_UID dominates LAST_UID.  */
3722       if (GET_CODE (p) == CODE_LABEL)
3723         return 0;
3724       /* Could not find LAST_UID, but we reached the end of the loop, so
3725          it must be safe.  */
3726       else if (p == copy_end)
3727         return 1;
3728       p = NEXT_INSN (p);
3729     }
3730
3731   /* FIRST_UID is always executed if LAST_UID is executed.  */
3732   return 1;
3733 }