gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "ggc.h"
 128 #include "tree.h"
 129 #include "target.h"
 130 #include "rtl.h"
 131 #include "basic-block.h"
 132 #include "diagnostic.h"
 133 #include "tree-flow.h"
 134 #include "tree-dump.h"
 135 #include "timevar.h"
 136 #include "cfgloop.h"
 137 #include "cfglayout.h"
 138 #include "expr.h"
 139 #include "optabs.h"
 140 #include "params.h"
 141 #include "toplev.h"
 142 #include "tree-chrec.h"
 143 #include "tree-data-ref.h"
 144 #include "tree-scalar-evolution.h"
 145 #include "input.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148
 149 /*************************************************************************
 150   Simple Loop Peeling Utilities
 151  *************************************************************************/
 152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 153   (struct loop *, struct loops *, edge);
 154 static void slpeel_update_phis_for_duplicate_loop
 155   (struct loop *, struct loop *, bool after);
 156 static void slpeel_update_phi_nodes_for_guard1
 157   (edge, struct loop *, bool, basic_block *, bitmap *);
 158 static void slpeel_update_phi_nodes_for_guard2
 159   (edge, struct loop *, bool, basic_block *);
 160 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 161
 162 static void rename_use_op (use_operand_p);
 163 static void rename_variables_in_bb (basic_block);
 164 static void rename_variables_in_loop (struct loop *);
 165
 166 /*************************************************************************
 167   General Vectorization Utilities
 168  *************************************************************************/
 169 static void vect_set_dump_settings (void);
 170
 171 /* vect_dump will be set to stderr or dump_file if exist.  */
 172 FILE *vect_dump;
 173
 174 /* vect_verbosity_level set to an invalid value
 175    to mark that it's uninitialized.  */
 176 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 177
 178 /* Number of loops, at the beginning of vectorization.  */
 179 unsigned int vect_loops_num;
 180
 181 /* Loop location.  */
 182 static LOC vect_loop_location;
 183
 184 /* Bitmap of virtual variables to be renamed.  */
 185 bitmap vect_vnames_to_rename;
 186 \f
 187 /*************************************************************************
 188   Simple Loop Peeling Utilities
 189
 190   Utilities to support loop peeling for vectorization purposes.
 191  *************************************************************************/
 192
 193
 194 /* Renames the use *OP_P.  */
 195
 196 static void
 197 rename_use_op (use_operand_p op_p)
 198 {
 199   tree new_name;
 200
 201   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 202     return;
 203
 204   new_name = get_current_def (USE_FROM_PTR (op_p));
 205
 206   /* Something defined outside of the loop.  */
 207   if (!new_name)
 208     return;
 209
 210   /* An ordinary ssa name defined in the loop.  */
 211
 212   SET_USE (op_p, new_name);
 213 }
 214
 215
 216 /* Renames the variables in basic block BB.  */
 217
 218 static void
 219 rename_variables_in_bb (basic_block bb)
 220 {
 221   tree phi;
 222   block_stmt_iterator bsi;
 223   tree stmt;
 224   use_operand_p use_p;
 225   ssa_op_iter iter;
 226   edge e;
 227   edge_iterator ei;
 228   struct loop *loop = bb->loop_father;
 229
 230   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 231     {
 232       stmt = bsi_stmt (bsi);
 233       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 234                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 235         rename_use_op (use_p);
 236     }
 237
 238   FOR_EACH_EDGE (e, ei, bb->succs)
 239     {
 240       if (!flow_bb_inside_loop_p (loop, e->dest))
 241         continue;
 242       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 243         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 244     }
 245 }
 246
 247
 248 /* Renames variables in new generated LOOP.  */
 249
 250 static void
 251 rename_variables_in_loop (struct loop *loop)
 252 {
 253   unsigned i;
 254   basic_block *bbs;
 255
 256   bbs = get_loop_body (loop);
 257
 258   for (i = 0; i < loop->num_nodes; i++)
 259     rename_variables_in_bb (bbs[i]);
 260
 261   free (bbs);
 262 }
 263
 264
 265 /* Update the PHI nodes of NEW_LOOP.
 266
 267    NEW_LOOP is a duplicate of ORIG_LOOP.
 268    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 269    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 270    executes before it.  */
 271
 272 static void
 273 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 274                                        struct loop *new_loop, bool after)
 275 {
 276   tree new_ssa_name;
 277   tree phi_new, phi_orig;
 278   tree def;
 279   edge orig_loop_latch = loop_latch_edge (orig_loop);
 280   edge orig_entry_e = loop_preheader_edge (orig_loop);
 281   edge new_loop_exit_e = new_loop->single_exit;
 282   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 283   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 284
 285   /*
 286      step 1. For each loop-header-phi:
 287              Add the first phi argument for the phi in NEW_LOOP
 288             (the one associated with the entry of NEW_LOOP)
 289
 290      step 2. For each loop-header-phi:
 291              Add the second phi argument for the phi in NEW_LOOP
 292             (the one associated with the latch of NEW_LOOP)
 293
 294      step 3. Update the phis in the successor block of NEW_LOOP.
 295
 296         case 1: NEW_LOOP was placed before ORIG_LOOP:
 297                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 298                 Updating the phis in the successor block can therefore be done
 299                 along with the scanning of the loop header phis, because the
 300                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 301                 phi nodes, organized in the same order.
 302
 303         case 2: NEW_LOOP was placed after ORIG_LOOP:
 304                 The successor block of NEW_LOOP is the original exit block of
 305                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 306                 We postpone updating these phis to a later stage (when
 307                 loop guards are added).
 308    */
 309
 310
 311   /* Scan the phis in the headers of the old and new loops
 312      (they are organized in exactly the same order).  */
 313
 314   for (phi_new = phi_nodes (new_loop->header),
 315        phi_orig = phi_nodes (orig_loop->header);
 316        phi_new && phi_orig;
 317        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 318     {
 319       /* step 1.  */
 320       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 321       add_phi_arg (phi_new, def, new_loop_entry_e);
 322
 323       /* step 2.  */
 324       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 325       if (TREE_CODE (def) != SSA_NAME)
 326         continue;
 327
 328       new_ssa_name = get_current_def (def);
 329       if (!new_ssa_name)
 330         {
 331           /* This only happens if there are no definitions
 332              inside the loop. use the phi_result in this case.  */
 333           new_ssa_name = PHI_RESULT (phi_new);
 334         }
 335
 336       /* An ordinary ssa name defined in the loop.  */
 337       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 338
 339       /* step 3 (case 1).  */
 340       if (!after)
 341         {
 342           gcc_assert (new_loop_exit_e == orig_entry_e);
 343           SET_PHI_ARG_DEF (phi_orig,
 344                            new_loop_exit_e->dest_idx,
 345                            new_ssa_name);
 346         }
 347     }
 348 }
 349
 350
 351 /* Update PHI nodes for a guard of the LOOP.
 352
 353    Input:
 354    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 355         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 356         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 357         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 358         We denote this bb NEW_MERGE_BB because before the guard code was added
 359         it had a single predecessor (the LOOP header), and now it became a merge
 360         point of two paths - the path that ends with the LOOP exit-edge, and
 361         the path that ends with GUARD_EDGE.
 362    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 363         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 364
 365    ===> The CFG before the guard-code was added:
 366         LOOP_header_bb:
 367           loop_body
 368           if (exit_loop) goto update_bb
 369           else           goto LOOP_header_bb
 370         update_bb:
 371
 372    ==> The CFG after the guard-code was added:
 373         guard_bb:
 374           if (LOOP_guard_condition) goto new_merge_bb
 375           else                      goto LOOP_header_bb
 376         LOOP_header_bb:
 377           loop_body
 378           if (exit_loop_condition) goto new_merge_bb
 379           else                     goto LOOP_header_bb
 380         new_merge_bb:
 381           goto update_bb
 382         update_bb:
 383
 384    ==> The CFG after this function:
 385         guard_bb:
 386           if (LOOP_guard_condition) goto new_merge_bb
 387           else                      goto LOOP_header_bb
 388         LOOP_header_bb:
 389           loop_body
 390           if (exit_loop_condition) goto new_exit_bb
 391           else                     goto LOOP_header_bb
 392         new_exit_bb:
 393         new_merge_bb:
 394           goto update_bb
 395         update_bb:
 396
 397    This function:
 398    1. creates and updates the relevant phi nodes to account for the new
 399       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 400       1.1. Create phi nodes at NEW_MERGE_BB.
 401       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 402            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 403    2. preserves loop-closed-ssa-form by creating the required phi nodes
 404       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 405
 406    There are two flavors to this function:
 407
 408    slpeel_update_phi_nodes_for_guard1:
 409      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 410      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 411      for variables that have phis in the loop header.
 412
 413    slpeel_update_phi_nodes_for_guard2:
 414      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 415      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 416      for variables that have phis in the loop exit.
 417
 418    I.E., the overall structure is:
 419
 420         loop1_preheader_bb:
 421                 guard1 (goto loop1/merg1_bb)
 422         loop1
 423         loop1_exit_bb:
 424                 guard2 (goto merge1_bb/merge2_bb)
 425         merge1_bb
 426         loop2
 427         loop2_exit_bb
 428         merge2_bb
 429         next_bb
 430
 431    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 432    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 433    that have phis in loop1->header).
 434
 435    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 436    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 437    that have phis in next_bb). It also adds some of these phis to
 438    loop1_exit_bb.
 439
 440    slpeel_update_phi_nodes_for_guard1 is always called before
 441    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 442    to create correct data-flow and loop-closed-ssa-form.
 443
 444    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 445    that change between iterations of a loop (and therefore have a phi-node
 446    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 447    phis for variables that are used out of the loop (and therefore have
 448    loop-closed exit phis). Some variables may be both updated between
 449    iterations and used after the loop. This is why in loop1_exit_bb we
 450    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 451    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 452
 453    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 454      an original loop. i.e., we have:
 455
 456            orig_loop
 457            guard_bb (goto LOOP/new_merge)
 458            new_loop <-- LOOP
 459            new_exit
 460            new_merge
 461            next_bb
 462
 463      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 464      have:
 465
 466            new_loop
 467            guard_bb (goto LOOP/new_merge)
 468            orig_loop <-- LOOP
 469            new_exit
 470            new_merge
 471            next_bb
 472
 473      The SSA names defined in the original loop have a current
 474      reaching definition that that records the corresponding new
 475      ssa-name used in the new duplicated loop copy.
 476   */
 477
 478 /* Function slpeel_update_phi_nodes_for_guard1
 479
 480    Input:
 481    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 482    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 483             information.
 484
 485    In the context of the overall structure, we have:
 486
 487         loop1_preheader_bb:
 488                 guard1 (goto loop1/merg1_bb)
 489 LOOP->  loop1
 490         loop1_exit_bb:
 491                 guard2 (goto merge1_bb/merge2_bb)
 492         merge1_bb
 493         loop2
 494         loop2_exit_bb
 495         merge2_bb
 496         next_bb
 497
 498    For each name updated between loop iterations (i.e - for each name that has
 499    an entry (loop-header) phi in LOOP) we create a new phi in:
 500    1. merge1_bb (to account for the edge from guard1)
 501    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 502 */
 503
 504 static void
 505 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 506                                     bool is_new_loop, basic_block *new_exit_bb,
 507                                     bitmap *defs)
 508 {
 509   tree orig_phi, new_phi;
 510   tree update_phi, update_phi2;
 511   tree guard_arg, loop_arg;
 512   basic_block new_merge_bb = guard_edge->dest;
 513   edge e = EDGE_SUCC (new_merge_bb, 0);
 514   basic_block update_bb = e->dest;
 515   basic_block orig_bb = loop->header;
 516   edge new_exit_e;
 517   tree current_new_name;
 518   tree name;
 519
 520   /* Create new bb between loop and new_merge_bb.  */
 521   *new_exit_bb = split_edge (loop->single_exit);
 522   add_bb_to_loop (*new_exit_bb, loop->outer);
 523
 524   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 525
 526   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 527        orig_phi && update_phi;
 528        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 529     {
 530       /* Virtual phi; Mark it for renaming. We actually want to call
 531          mar_sym_for_renaming, but since all ssa renaming datastructures
 532          are going to be freed before we get to call ssa_upate, we just
 533          record this name for now in a bitmap, and will mark it for
 534          renaming later.  */
 535       name = PHI_RESULT (orig_phi);
 536       if (!is_gimple_reg (SSA_NAME_VAR (name)))
 537         bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
 538
 539       /** 1. Handle new-merge-point phis  **/
 540
 541       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 542       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 543                                  new_merge_bb);
 544
 545       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 546             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 547       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 548       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 549
 550       add_phi_arg (new_phi, loop_arg, new_exit_e);
 551       add_phi_arg (new_phi, guard_arg, guard_edge);
 552
 553       /* 1.3. Update phi in successor block.  */
 554       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 555                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 556       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 557       update_phi2 = new_phi;
 558
 559
 560       /** 2. Handle loop-closed-ssa-form phis  **/
 561
 562       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 563       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 564                                  *new_exit_bb);
 565
 566       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 567       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 568
 569       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 570       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 571       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 572
 573       /* 2.4. Record the newly created name with set_current_def.
 574          We want to find a name such that
 575                 name = get_current_def (orig_loop_name)
 576          and to set its current definition as follows:
 577                 set_current_def (name, new_phi_name)
 578
 579          If LOOP is a new loop then loop_arg is already the name we're
 580          looking for. If LOOP is the original loop, then loop_arg is
 581          the orig_loop_name and the relevant name is recorded in its
 582          current reaching definition.  */
 583       if (is_new_loop)
 584         current_new_name = loop_arg;
 585       else
 586         {
 587           current_new_name = get_current_def (loop_arg);
 588           /* current_def is not available only if the variable does not
 589              change inside the loop, in which case we also don't care
 590              about recording a current_def for it because we won't be
 591              trying to create loop-exit-phis for it.  */
 592           if (!current_new_name)
 593             continue;
 594         }
 595       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 596
 597       set_current_def (current_new_name, PHI_RESULT (new_phi));
 598       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 599     }
 600
 601   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 602 }
 603
 604
 605 /* Function slpeel_update_phi_nodes_for_guard2
 606
 607    Input:
 608    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 609
 610    In the context of the overall structure, we have:
 611
 612         loop1_preheader_bb:
 613                 guard1 (goto loop1/merg1_bb)
 614         loop1
 615         loop1_exit_bb:
 616                 guard2 (goto merge1_bb/merge2_bb)
 617         merge1_bb
 618 LOOP->  loop2
 619         loop2_exit_bb
 620         merge2_bb
 621         next_bb
 622
 623    For each name used out side the loop (i.e - for each name that has an exit
 624    phi in next_bb) we create a new phi in:
 625    1. merge2_bb (to account for the edge from guard_bb)
 626    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 627    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 628       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 629 */
 630
 631 static void
 632 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 633                                     bool is_new_loop, basic_block *new_exit_bb)
 634 {
 635   tree orig_phi, new_phi;
 636   tree update_phi, update_phi2;
 637   tree guard_arg, loop_arg;
 638   basic_block new_merge_bb = guard_edge->dest;
 639   edge e = EDGE_SUCC (new_merge_bb, 0);
 640   basic_block update_bb = e->dest;
 641   edge new_exit_e;
 642   tree orig_def, orig_def_new_name;
 643   tree new_name, new_name2;
 644   tree arg;
 645
 646   /* Create new bb between loop and new_merge_bb.  */
 647   *new_exit_bb = split_edge (loop->single_exit);
 648   add_bb_to_loop (*new_exit_bb, loop->outer);
 649
 650   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 651
 652   for (update_phi = phi_nodes (update_bb); update_phi;
 653        update_phi = PHI_CHAIN (update_phi))
 654     {
 655       orig_phi = update_phi;
 656       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 657       /* This loop-closed-phi actually doesn't represent a use
 658          out of the loop - the phi arg is a constant.  */
 659       if (TREE_CODE (orig_def) != SSA_NAME)
 660         continue;
 661       orig_def_new_name = get_current_def (orig_def);
 662       arg = NULL_TREE;
 663
 664       /** 1. Handle new-merge-point phis  **/
 665
 666       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 667       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 668                                  new_merge_bb);
 669
 670       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 671             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 672       new_name = orig_def;
 673       new_name2 = NULL_TREE;
 674       if (orig_def_new_name)
 675         {
 676           new_name = orig_def_new_name;
 677           /* Some variables have both loop-entry-phis and loop-exit-phis.
 678              Such variables were given yet newer names by phis placed in
 679              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 680              new_name2 = get_current_def (get_current_def (orig_name)).  */
 681           new_name2 = get_current_def (new_name);
 682         }
 683
 684       if (is_new_loop)
 685         {
 686           guard_arg = orig_def;
 687           loop_arg = new_name;
 688         }
 689       else
 690         {
 691           guard_arg = new_name;
 692           loop_arg = orig_def;
 693         }
 694       if (new_name2)
 695         guard_arg = new_name2;
 696
 697       add_phi_arg (new_phi, loop_arg, new_exit_e);
 698       add_phi_arg (new_phi, guard_arg, guard_edge);
 699
 700       /* 1.3. Update phi in successor block.  */
 701       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 702       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 703       update_phi2 = new_phi;
 704
 705
 706       /** 2. Handle loop-closed-ssa-form phis  **/
 707
 708       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 709       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 710                                  *new_exit_bb);
 711
 712       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 713       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 714
 715       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 716       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 717       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 718
 719
 720       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 721
 722       /* 3.1. Find the relevant names that need an exit-phi in
 723          GUARD_BB, i.e. names for which
 724          slpeel_update_phi_nodes_for_guard1 had not already created a
 725          phi node. This is the case for names that are used outside
 726          the loop (and therefore need an exit phi) but are not updated
 727          across loop iterations (and therefore don't have a
 728          loop-header-phi).
 729
 730          slpeel_update_phi_nodes_for_guard1 is responsible for
 731          creating loop-exit phis in GUARD_BB for names that have a
 732          loop-header-phi.  When such a phi is created we also record
 733          the new name in its current definition.  If this new name
 734          exists, then guard_arg was set to this new name (see 1.2
 735          above).  Therefore, if guard_arg is not this new name, this
 736          is an indication that an exit-phi in GUARD_BB was not yet
 737          created, so we take care of it here.  */
 738       if (guard_arg == new_name2)
 739         continue;
 740       arg = guard_arg;
 741
 742       /* 3.2. Generate new phi node in GUARD_BB:  */
 743       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 744                                  guard_edge->src);
 745
 746       /* 3.3. GUARD_BB has one incoming edge:  */
 747       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 748       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 749
 750       /* 3.4. Update phi in successor of GUARD_BB:  */
 751       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 752                                                                 == guard_arg);
 753       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 754     }
 755
 756   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 757 }
 758
 759
 760 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 761    that starts at zero, increases by one and its limit is NITERS.
 762
 763    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 764
 765 void
 766 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 767 {
 768   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 769   tree orig_cond;
 770   edge exit_edge = loop->single_exit;
 771   block_stmt_iterator loop_cond_bsi;
 772   block_stmt_iterator incr_bsi;
 773   bool insert_after;
 774   tree begin_label = tree_block_label (loop->latch);
 775   tree exit_label = tree_block_label (loop->single_exit->dest);
 776   tree init = build_int_cst (TREE_TYPE (niters), 0);
 777   tree step = build_int_cst (TREE_TYPE (niters), 1);
 778   tree then_label;
 779   tree else_label;
 780   LOC loop_loc;
 781
 782   orig_cond = get_loop_exit_condition (loop);
 783   gcc_assert (orig_cond);
 784   loop_cond_bsi = bsi_for_stmt (orig_cond);
 785
 786   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 787   create_iv (init, step, NULL_TREE, loop,
 788              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 789
 790   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 791     {
 792       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 793       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 794       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 795     }
 796   else /* 'then' edge loops back.  */
 797     {
 798       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 799       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 800       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 801     }
 802
 803   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 804                      then_label, else_label);
 805   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 806
 807   /* Remove old loop exit test:  */
 808   bsi_remove (&loop_cond_bsi, true);
 809
 810   loop_loc = find_loop_location (loop);
 811   if (dump_file && (dump_flags & TDF_DETAILS))
 812     {
 813       if (loop_loc != UNKNOWN_LOC)
 814         fprintf (dump_file, "\nloop at %s:%d: ",
 815                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 816       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 817     }
 818
 819   loop->nb_iterations = niters;
 820 }
 821
 822
 823 /* Given LOOP this function generates a new copy of it and puts it
 824    on E which is either the entry or exit of LOOP.  */
 825
 826 static struct loop *
 827 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 828                                         edge e)
 829 {
 830   struct loop *new_loop;
 831   basic_block *new_bbs, *bbs;
 832   bool at_exit;
 833   bool was_imm_dom;
 834   basic_block exit_dest;
 835   tree phi, phi_arg;
 836
 837   at_exit = (e == loop->single_exit);
 838   if (!at_exit && e != loop_preheader_edge (loop))
 839     return NULL;
 840
 841   bbs = get_loop_body (loop);
 842
 843   /* Check whether duplication is possible.  */
 844   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 845     {
 846       free (bbs);
 847       return NULL;
 848     }
 849
 850   /* Generate new loop structure.  */
 851   new_loop = duplicate_loop (loops, loop, loop->outer);
 852   if (!new_loop)
 853     {
 854       free (bbs);
 855       return NULL;
 856     }
 857
 858   exit_dest = loop->single_exit->dest;
 859   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 860                                           exit_dest) == loop->header ?
 861                  true : false);
 862
 863   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
 864
 865   copy_bbs (bbs, loop->num_nodes, new_bbs,
 866             &loop->single_exit, 1, &new_loop->single_exit, NULL,
 867             e->src);
 868
 869   /* Duplicating phi args at exit bbs as coming
 870      also from exit of duplicated loop.  */
 871   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 872     {
 873       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 874       if (phi_arg)
 875         {
 876           edge new_loop_exit_edge;
 877
 878           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 879             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 880           else
 881             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 882
 883           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 884         }
 885     }
 886
 887   if (at_exit) /* Add the loop copy at exit.  */
 888     {
 889       redirect_edge_and_branch_force (e, new_loop->header);
 890       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 891       if (was_imm_dom)
 892         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 893     }
 894   else /* Add the copy at entry.  */
 895     {
 896       edge new_exit_e;
 897       edge entry_e = loop_preheader_edge (loop);
 898       basic_block preheader = entry_e->src;
 899
 900       if (!flow_bb_inside_loop_p (new_loop,
 901                                   EDGE_SUCC (new_loop->header, 0)->dest))
 902         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 903       else
 904         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 905
 906       redirect_edge_and_branch_force (new_exit_e, loop->header);
 907       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 908                                new_exit_e->src);
 909
 910       /* We have to add phi args to the loop->header here as coming
 911          from new_exit_e edge.  */
 912       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 913         {
 914           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 915           if (phi_arg)
 916             add_phi_arg (phi, phi_arg, new_exit_e);
 917         }
 918
 919       redirect_edge_and_branch_force (entry_e, new_loop->header);
 920       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 921     }
 922
 923   free (new_bbs);
 924   free (bbs);
 925
 926   return new_loop;
 927 }
 928
 929
 930 /* Given the condition statement COND, put it as the last statement
 931    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 932    Assumes that this is the single exit of the guarded loop.
 933    Returns the skip edge.  */
 934
 935 static edge
 936 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 937                         basic_block dom_bb)
 938 {
 939   block_stmt_iterator bsi;
 940   edge new_e, enter_e;
 941   tree cond_stmt, then_label, else_label;
 942
 943   enter_e = EDGE_SUCC (guard_bb, 0);
 944   enter_e->flags &= ~EDGE_FALLTHRU;
 945   enter_e->flags |= EDGE_FALSE_VALUE;
 946   bsi = bsi_last (guard_bb);
 947
 948   then_label = build1 (GOTO_EXPR, void_type_node,
 949                        tree_block_label (exit_bb));
 950   else_label = build1 (GOTO_EXPR, void_type_node,
 951                        tree_block_label (enter_e->dest));
 952   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 953                      then_label, else_label);
 954   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 955   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 956   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 957   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 958   return new_e;
 959 }
 960
 961
 962 /* This function verifies that the following restrictions apply to LOOP:
 963    (1) it is innermost
 964    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 965    (3) it is single entry, single exit
 966    (4) its exit condition is the last stmt in the header
 967    (5) E is the entry/exit edge of LOOP.
 968  */
 969
 970 bool
 971 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 972 {
 973   edge exit_e = loop->single_exit;
 974   edge entry_e = loop_preheader_edge (loop);
 975   tree orig_cond = get_loop_exit_condition (loop);
 976   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 977
 978   if (need_ssa_update_p ())
 979     return false;
 980
 981   if (loop->inner
 982       /* All loops have an outer scope; the only case loop->outer is NULL is for
 983          the function itself.  */
 984       || !loop->outer
 985       || loop->num_nodes != 2
 986       || !empty_block_p (loop->latch)
 987       || !loop->single_exit
 988       /* Verify that new loop exit condition can be trivially modified.  */
 989       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 990       || (e != exit_e && e != entry_e))
 991     return false;
 992
 993   return true;
 994 }
 995
 996 #ifdef ENABLE_CHECKING
 997 void
 998 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 999                                  struct loop *second_loop)
1000 {
1001   basic_block loop1_exit_bb = first_loop->single_exit->dest;
1002   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1003   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1004
1005   /* A guard that controls whether the second_loop is to be executed or skipped
1006      is placed in first_loop->exit.  first_loopt->exit therefore has two
1007      successors - one is the preheader of second_loop, and the other is a bb
1008      after second_loop.
1009    */
1010   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1011
1012   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1013         of second_loop.  */
1014
1015   /* The preheader of new_loop is expected to have two predecessors:
1016      first_loop->exit and the block that precedes first_loop.  */
1017
1018   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1019               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1020                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1021                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1022                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1023
1024   /* Verify that the other successor of first_loopt->exit is after the
1025      second_loop.  */
1026   /* TODO */
1027 }
1028 #endif
1029
1030 /* Function slpeel_tree_peel_loop_to_edge.
1031
1032    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1033    that is placed on the entry (exit) edge E of LOOP. After this transformation
1034    we have two loops one after the other - first-loop iterates FIRST_NITERS
1035    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1036
1037    Input:
1038    - LOOP: the loop to be peeled.
1039    - E: the exit or entry edge of LOOP.
1040         If it is the entry edge, we peel the first iterations of LOOP. In this
1041         case first-loop is LOOP, and second-loop is the newly created loop.
1042         If it is the exit edge, we peel the last iterations of LOOP. In this
1043         case, first-loop is the newly created loop, and second-loop is LOOP.
1044    - NITERS: the number of iterations that LOOP iterates.
1045    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1046    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1047         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1048         is false, the caller of this function may want to take care of this
1049         (this can be useful if we don't want new stmts added to first-loop).
1050
1051    Output:
1052    The function returns a pointer to the new loop-copy, or NULL if it failed
1053    to perform the transformation.
1054
1055    The function generates two if-then-else guards: one before the first loop,
1056    and the other before the second loop:
1057    The first guard is:
1058      if (FIRST_NITERS == 0) then skip the first loop,
1059      and go directly to the second loop.
1060    The second guard is:
1061      if (FIRST_NITERS == NITERS) then skip the second loop.
1062
1063    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1064    FORNOW the resulting code will not be in loop-closed-ssa form.
1065 */
1066
1067 struct loop*
1068 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1069                                edge e, tree first_niters,
1070                                tree niters, bool update_first_loop_count)
1071 {
1072   struct loop *new_loop = NULL, *first_loop, *second_loop;
1073   edge skip_e;
1074   tree pre_condition;
1075   bitmap definitions;
1076   basic_block bb_before_second_loop, bb_after_second_loop;
1077   basic_block bb_before_first_loop;
1078   basic_block bb_between_loops;
1079   basic_block new_exit_bb;
1080   edge exit_e = loop->single_exit;
1081   LOC loop_loc;
1082
1083   if (!slpeel_can_duplicate_loop_p (loop, e))
1084     return NULL;
1085
1086   /* We have to initialize cfg_hooks. Then, when calling
1087    cfg_hooks->split_edge, the function tree_split_edge
1088    is actually called and, when calling cfg_hooks->duplicate_block,
1089    the function tree_duplicate_bb is called.  */
1090   tree_register_cfg_hooks ();
1091
1092
1093   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1094         Resulting CFG would be:
1095
1096         first_loop:
1097         do {
1098         } while ...
1099
1100         second_loop:
1101         do {
1102         } while ...
1103
1104         orig_exit_bb:
1105    */
1106
1107   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1108     {
1109       loop_loc = find_loop_location (loop);
1110       if (dump_file && (dump_flags & TDF_DETAILS))
1111         {
1112           if (loop_loc != UNKNOWN_LOC)
1113             fprintf (dump_file, "\n%s:%d: note: ",
1114                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1115           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1116         }
1117       return NULL;
1118     }
1119
1120   if (e == exit_e)
1121     {
1122       /* NEW_LOOP was placed after LOOP.  */
1123       first_loop = loop;
1124       second_loop = new_loop;
1125     }
1126   else
1127     {
1128       /* NEW_LOOP was placed before LOOP.  */
1129       first_loop = new_loop;
1130       second_loop = loop;
1131     }
1132
1133   definitions = ssa_names_to_replace ();
1134   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1135   rename_variables_in_loop (new_loop);
1136
1137
1138   /* 2. Add the guard that controls whether the first loop is executed.
1139         Resulting CFG would be:
1140
1141         bb_before_first_loop:
1142         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1143                                GOTO first-loop
1144
1145         first_loop:
1146         do {
1147         } while ...
1148
1149         bb_before_second_loop:
1150
1151         second_loop:
1152         do {
1153         } while ...
1154
1155         orig_exit_bb:
1156    */
1157
1158   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1159   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1160   bb_before_second_loop = split_edge (first_loop->single_exit);
1161   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1162
1163   pre_condition =
1164     fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1165                  build_int_cst (TREE_TYPE (first_niters), 0));
1166   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1167                                   bb_before_second_loop, bb_before_first_loop);
1168   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1169                                       first_loop == new_loop,
1170                                       &new_exit_bb, &definitions);
1171
1172
1173   /* 3. Add the guard that controls whether the second loop is executed.
1174         Resulting CFG would be:
1175
1176         bb_before_first_loop:
1177         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1178                                GOTO first-loop
1179
1180         first_loop:
1181         do {
1182         } while ...
1183
1184         bb_between_loops:
1185         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1186                                     GOTO bb_before_second_loop
1187
1188         bb_before_second_loop:
1189
1190         second_loop:
1191         do {
1192         } while ...
1193
1194         bb_after_second_loop:
1195
1196         orig_exit_bb:
1197    */
1198
1199   bb_between_loops = new_exit_bb;
1200   bb_after_second_loop = split_edge (second_loop->single_exit);
1201   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1202
1203   pre_condition =
1204         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1205   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1206                                   bb_after_second_loop, bb_before_first_loop);
1207   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1208                                      second_loop == new_loop, &new_exit_bb);
1209
1210   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1211    */
1212   if (update_first_loop_count)
1213     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1214
1215   BITMAP_FREE (definitions);
1216   delete_update_ssa ();
1217
1218   return new_loop;
1219 }
1220
1221 /* Function vect_get_loop_location.
1222
1223    Extract the location of the loop in the source code.
1224    If the loop is not well formed for vectorization, an estimated
1225    location is calculated.
1226    Return the loop location if succeed and NULL if not.  */
1227
1228 LOC
1229 find_loop_location (struct loop *loop)
1230 {
1231   tree node = NULL_TREE;
1232   basic_block bb;
1233   block_stmt_iterator si;
1234
1235   if (!loop)
1236     return UNKNOWN_LOC;
1237
1238   node = get_loop_exit_condition (loop);
1239
1240   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1241       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1242     return EXPR_LOC (node);
1243
1244   /* If we got here the loop is probably not "well formed",
1245      try to estimate the loop location */
1246
1247   if (!loop->header)
1248     return UNKNOWN_LOC;
1249
1250   bb = loop->header;
1251
1252   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1253     {
1254       node = bsi_stmt (si);
1255       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1256         return EXPR_LOC (node);
1257     }
1258
1259   return UNKNOWN_LOC;
1260 }
1261
1262
1263 /*************************************************************************
1264   Vectorization Debug Information.
1265  *************************************************************************/
1266
1267 /* Function vect_set_verbosity_level.
1268
1269    Called from toplev.c upon detection of the
1270    -ftree-vectorizer-verbose=N option.  */
1271
1272 void
1273 vect_set_verbosity_level (const char *val)
1274 {
1275    unsigned int vl;
1276
1277    vl = atoi (val);
1278    if (vl < MAX_VERBOSITY_LEVEL)
1279      vect_verbosity_level = vl;
1280    else
1281      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1282 }
1283
1284
1285 /* Function vect_set_dump_settings.
1286
1287    Fix the verbosity level of the vectorizer if the
1288    requested level was not set explicitly using the flag
1289    -ftree-vectorizer-verbose=N.
1290    Decide where to print the debugging information (dump_file/stderr).
1291    If the user defined the verbosity level, but there is no dump file,
1292    print to stderr, otherwise print to the dump file.  */
1293
1294 static void
1295 vect_set_dump_settings (void)
1296 {
1297   vect_dump = dump_file;
1298
1299   /* Check if the verbosity level was defined by the user:  */
1300   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1301     {
1302       /* If there is no dump file, print to stderr.  */
1303       if (!dump_file)
1304         vect_dump = stderr;
1305       return;
1306     }
1307
1308   /* User didn't specify verbosity level:  */
1309   if (dump_file && (dump_flags & TDF_DETAILS))
1310     vect_verbosity_level = REPORT_DETAILS;
1311   else if (dump_file && (dump_flags & TDF_STATS))
1312     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1313   else
1314     vect_verbosity_level = REPORT_NONE;
1315
1316   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1317 }
1318
1319
1320 /* Function debug_loop_details.
1321
1322    For vectorization debug dumps.  */
1323
1324 bool
1325 vect_print_dump_info (enum verbosity_levels vl)
1326 {
1327   if (vl > vect_verbosity_level)
1328     return false;
1329
1330   if (!current_function_decl || !vect_dump)
1331     return false;
1332
1333   if (vect_loop_location == UNKNOWN_LOC)
1334     fprintf (vect_dump, "\n%s:%d: note: ",
1335                  DECL_SOURCE_FILE (current_function_decl),
1336                  DECL_SOURCE_LINE (current_function_decl));
1337   else
1338     fprintf (vect_dump, "\n%s:%d: note: ",
1339              LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1340
1341   return true;
1342 }
1343
1344
1345 /*************************************************************************
1346   Vectorization Utilities.
1347  *************************************************************************/
1348
1349 /* Function new_stmt_vec_info.
1350
1351    Create and initialize a new stmt_vec_info struct for STMT.  */
1352
1353 stmt_vec_info
1354 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1355 {
1356   stmt_vec_info res;
1357   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1358
1359   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1360   STMT_VINFO_STMT (res) = stmt;
1361   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1362   STMT_VINFO_RELEVANT_P (res) = 0;
1363   STMT_VINFO_LIVE_P (res) = 0;
1364   STMT_VINFO_VECTYPE (res) = NULL;
1365   STMT_VINFO_VEC_STMT (res) = NULL;
1366   STMT_VINFO_IN_PATTERN_P (res) = false;
1367   STMT_VINFO_RELATED_STMT (res) = NULL;
1368   STMT_VINFO_DATA_REF (res) = NULL;
1369   if (TREE_CODE (stmt) == PHI_NODE)
1370     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1371   else
1372     STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1373   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1374
1375   return res;
1376 }
1377
1378
1379 /* Function new_loop_vec_info.
1380
1381    Create and initialize a new loop_vec_info struct for LOOP, as well as
1382    stmt_vec_info structs for all the stmts in LOOP.  */
1383
1384 loop_vec_info
1385 new_loop_vec_info (struct loop *loop)
1386 {
1387   loop_vec_info res;
1388   basic_block *bbs;
1389   block_stmt_iterator si;
1390   unsigned int i;
1391
1392   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1393
1394   bbs = get_loop_body (loop);
1395
1396   /* Create stmt_info for all stmts in the loop.  */
1397   for (i = 0; i < loop->num_nodes; i++)
1398     {
1399       basic_block bb = bbs[i];
1400       tree phi;
1401
1402       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1403         {
1404           stmt_ann_t ann = get_stmt_ann (phi);
1405           set_stmt_info (ann, new_stmt_vec_info (phi, res));
1406         }
1407
1408       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1409         {
1410           tree stmt = bsi_stmt (si);
1411           stmt_ann_t ann;
1412
1413           ann = stmt_ann (stmt);
1414           set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1415         }
1416     }
1417
1418   LOOP_VINFO_LOOP (res) = loop;
1419   LOOP_VINFO_BBS (res) = bbs;
1420   LOOP_VINFO_EXIT_COND (res) = NULL;
1421   LOOP_VINFO_NITERS (res) = NULL;
1422   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1423   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1424   LOOP_VINFO_VECT_FACTOR (res) = 0;
1425   LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
1426   LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
1427   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1428   LOOP_VINFO_MAY_MISALIGN_STMTS (res)
1429     = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS));
1430
1431   return res;
1432 }
1433
1434
1435 /* Function destroy_loop_vec_info.
1436
1437    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1438    stmts in the loop.  */
1439
1440 void
1441 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1442 {
1443   struct loop *loop;
1444   basic_block *bbs;
1445   int nbbs;
1446   block_stmt_iterator si;
1447   int j;
1448
1449   if (!loop_vinfo)
1450     return;
1451
1452   loop = LOOP_VINFO_LOOP (loop_vinfo);
1453
1454   bbs = LOOP_VINFO_BBS (loop_vinfo);
1455   nbbs = loop->num_nodes;
1456
1457   for (j = 0; j < nbbs; j++)
1458     {
1459       basic_block bb = bbs[j];
1460       tree phi;
1461       stmt_vec_info stmt_info;
1462
1463       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1464         {
1465           stmt_ann_t ann = stmt_ann (phi);
1466
1467           stmt_info = vinfo_for_stmt (phi);
1468           free (stmt_info);
1469           set_stmt_info (ann, NULL);
1470         }
1471
1472       for (si = bsi_start (bb); !bsi_end_p (si); )
1473         {
1474           tree stmt = bsi_stmt (si);
1475           stmt_ann_t ann = stmt_ann (stmt);
1476           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1477
1478           if (stmt_info)
1479             {
1480               /* Check if this is a "pattern stmt" (introduced by the
1481                  vectorizer during the pattern recognition pass).  */
1482               bool remove_stmt_p = false;
1483               tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1484               if (orig_stmt)
1485                 {
1486                   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1487                   if (orig_stmt_info
1488                       && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
1489                     remove_stmt_p = true;
1490                 }
1491
1492               /* Free stmt_vec_info.  */
1493               VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1494               free (stmt_info);
1495               set_stmt_info (ann, NULL);
1496
1497               /* Remove dead "pattern stmts".  */
1498               if (remove_stmt_p)
1499                 bsi_remove (&si, true);
1500             }
1501           bsi_next (&si);
1502         }
1503     }
1504
1505   free (LOOP_VINFO_BBS (loop_vinfo));
1506   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1507   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1508   VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1509
1510   free (loop_vinfo);
1511 }
1512
1513
1514 /* Function vect_force_dr_alignment_p.
1515
1516    Returns whether the alignment of a DECL can be forced to be aligned
1517    on ALIGNMENT bit boundary.  */
1518
1519 bool
1520 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1521 {
1522   if (TREE_CODE (decl) != VAR_DECL)
1523     return false;
1524
1525   if (DECL_EXTERNAL (decl))
1526     return false;
1527
1528   if (TREE_ASM_WRITTEN (decl))
1529     return false;
1530
1531   if (TREE_STATIC (decl))
1532     return (alignment <= MAX_OFILE_ALIGNMENT);
1533   else
1534     /* This is not 100% correct.  The absolute correct stack alignment
1535        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1536        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1537        However, until someone implements forced stack alignment, SSE
1538        isn't really usable without this.  */
1539     return (alignment <= PREFERRED_STACK_BOUNDARY);
1540 }
1541
1542
1543 /* Function get_vectype_for_scalar_type.
1544
1545    Returns the vector type corresponding to SCALAR_TYPE as supported
1546    by the target.  */
1547
1548 tree
1549 get_vectype_for_scalar_type (tree scalar_type)
1550 {
1551   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1552   int nbytes = GET_MODE_SIZE (inner_mode);
1553   int nunits;
1554   tree vectype;
1555
1556   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1557     return NULL_TREE;
1558
1559   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1560      is expected.  */
1561   nunits = UNITS_PER_SIMD_WORD / nbytes;
1562
1563   vectype = build_vector_type (scalar_type, nunits);
1564   if (vect_print_dump_info (REPORT_DETAILS))
1565     {
1566       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1567       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1568     }
1569
1570   if (!vectype)
1571     return NULL_TREE;
1572
1573   if (vect_print_dump_info (REPORT_DETAILS))
1574     {
1575       fprintf (vect_dump, "vectype: ");
1576       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1577     }
1578
1579   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1580       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1581     {
1582       if (vect_print_dump_info (REPORT_DETAILS))
1583         fprintf (vect_dump, "mode not supported by target.");
1584       return NULL_TREE;
1585     }
1586
1587   return vectype;
1588 }
1589
1590
1591 /* Function vect_supportable_dr_alignment
1592
1593    Return whether the data reference DR is supported with respect to its
1594    alignment.  */
1595
1596 enum dr_alignment_support
1597 vect_supportable_dr_alignment (struct data_reference *dr)
1598 {
1599   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1600   enum machine_mode mode = (int) TYPE_MODE (vectype);
1601
1602   if (aligned_access_p (dr))
1603     return dr_aligned;
1604
1605   /* Possibly unaligned access.  */
1606
1607   if (DR_IS_READ (dr))
1608     {
1609       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1610           && (!targetm.vectorize.builtin_mask_for_load
1611               || targetm.vectorize.builtin_mask_for_load ()))
1612         return dr_unaligned_software_pipeline;
1613
1614       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1615         /* Can't software pipeline the loads, but can at least do them.  */
1616         return dr_unaligned_supported;
1617     }
1618
1619   /* Unsupported.  */
1620   return dr_unaligned_unsupported;
1621 }
1622
1623
1624 /* Function vect_is_simple_use.
1625
1626    Input:
1627    LOOP - the loop that is being vectorized.
1628    OPERAND - operand of a stmt in LOOP.
1629    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1630
1631    Returns whether a stmt with OPERAND can be vectorized.
1632    Supportable operands are constants, loop invariants, and operands that are
1633    defined by the current iteration of the loop. Unsupportable operands are
1634    those that are defined by a previous iteration of the loop (as is the case
1635    in reduction/induction computations).  */
1636
1637 bool
1638 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1639                     tree *def, enum vect_def_type *dt)
1640 {
1641   basic_block bb;
1642   stmt_vec_info stmt_vinfo;
1643   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1644
1645   *def_stmt = NULL_TREE;
1646   *def = NULL_TREE;
1647
1648   if (vect_print_dump_info (REPORT_DETAILS))
1649     {
1650       fprintf (vect_dump, "vect_is_simple_use: operand ");
1651       print_generic_expr (vect_dump, operand, TDF_SLIM);
1652     }
1653
1654   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1655     {
1656       *dt = vect_constant_def;
1657       return true;
1658     }
1659
1660   if (TREE_CODE (operand) != SSA_NAME)
1661     {
1662       if (vect_print_dump_info (REPORT_DETAILS))
1663         fprintf (vect_dump, "not ssa-name.");
1664       return false;
1665     }
1666
1667   *def_stmt = SSA_NAME_DEF_STMT (operand);
1668   if (*def_stmt == NULL_TREE )
1669     {
1670       if (vect_print_dump_info (REPORT_DETAILS))
1671         fprintf (vect_dump, "no def_stmt.");
1672       return false;
1673     }
1674
1675   if (vect_print_dump_info (REPORT_DETAILS))
1676     {
1677       fprintf (vect_dump, "def_stmt: ");
1678       print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1679     }
1680
1681   /* empty stmt is expected only in case of a function argument.
1682      (Otherwise - we expect a phi_node or a modify_expr).  */
1683   if (IS_EMPTY_STMT (*def_stmt))
1684     {
1685       tree arg = TREE_OPERAND (*def_stmt, 0);
1686       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1687         {
1688           *def = operand;
1689           *dt = vect_invariant_def;
1690           return true;
1691         }
1692
1693       if (vect_print_dump_info (REPORT_DETAILS))
1694         fprintf (vect_dump, "Unexpected empty stmt.");
1695       return false;
1696     }
1697
1698   bb = bb_for_stmt (*def_stmt);
1699   if (!flow_bb_inside_loop_p (loop, bb))
1700     *dt = vect_invariant_def;
1701   else
1702     {
1703       stmt_vinfo = vinfo_for_stmt (*def_stmt);
1704       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1705     }
1706
1707   if (*dt == vect_unknown_def_type)
1708     {
1709       if (vect_print_dump_info (REPORT_DETAILS))
1710         fprintf (vect_dump, "Unsupported pattern.");
1711       return false;
1712     }
1713
1714   /* stmts inside the loop that have been identified as performing
1715      a reduction operation cannot have uses in the loop.  */
1716   if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1717     {
1718       if (vect_print_dump_info (REPORT_DETAILS))
1719         fprintf (vect_dump, "reduction used in loop.");
1720       return false;
1721     }
1722
1723   if (vect_print_dump_info (REPORT_DETAILS))
1724     fprintf (vect_dump, "type of def: %d.",*dt);
1725
1726   switch (TREE_CODE (*def_stmt))
1727     {
1728     case PHI_NODE:
1729       *def = PHI_RESULT (*def_stmt);
1730       gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1731                   || *dt == vect_invariant_def);
1732       break;
1733
1734     case MODIFY_EXPR:
1735       *def = TREE_OPERAND (*def_stmt, 0);
1736       gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1737       break;
1738
1739     default:
1740       if (vect_print_dump_info (REPORT_DETAILS))
1741         fprintf (vect_dump, "unsupported defining stmt: ");
1742       return false;
1743     }
1744
1745   if (*dt == vect_induction_def)
1746     {
1747       if (vect_print_dump_info (REPORT_DETAILS))
1748         fprintf (vect_dump, "induction not supported.");
1749       return false;
1750     }
1751
1752   return true;
1753 }
1754
1755
1756 /* Function reduction_code_for_scalar_code
1757
1758    Input:
1759    CODE - tree_code of a reduction operations.
1760
1761    Output:
1762    REDUC_CODE - the corresponding tree-code to be used to reduce the
1763       vector of partial results into a single scalar result (which
1764       will also reside in a vector).
1765
1766    Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
1767
1768 bool
1769 reduction_code_for_scalar_code (enum tree_code code,
1770                                 enum tree_code *reduc_code)
1771 {
1772   switch (code)
1773   {
1774   case MAX_EXPR:
1775     *reduc_code = REDUC_MAX_EXPR;
1776     return true;
1777
1778   case MIN_EXPR:
1779     *reduc_code = REDUC_MIN_EXPR;
1780     return true;
1781
1782   case PLUS_EXPR:
1783     *reduc_code = REDUC_PLUS_EXPR;
1784     return true;
1785
1786   default:
1787     return false;
1788   }
1789 }
1790
1791
1792 /* Function vect_is_simple_reduction
1793
1794    Detect a cross-iteration def-use cucle that represents a simple
1795    reduction computation. We look for the following pattern:
1796
1797    loop_header:
1798      a1 = phi < a0, a2 >
1799      a3 = ...
1800      a2 = operation (a3, a1)
1801
1802    such that:
1803    1. operation is commutative and associative and it is safe to
1804       change the order of the computation.
1805    2. no uses for a2 in the loop (a2 is used out of the loop)
1806    3. no uses of a1 in the loop besides the reduction operation.
1807
1808    Condition 1 is tested here.
1809    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
1810
1811 tree
1812 vect_is_simple_reduction (struct loop *loop, tree phi)
1813 {
1814   edge latch_e = loop_latch_edge (loop);
1815   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1816   tree def_stmt, def1, def2;
1817   enum tree_code code;
1818   int op_type;
1819   tree operation, op1, op2;
1820   tree type;
1821
1822   if (TREE_CODE (loop_arg) != SSA_NAME)
1823     {
1824       if (vect_print_dump_info (REPORT_DETAILS))
1825         {
1826           fprintf (vect_dump, "reduction: not ssa_name: ");
1827           print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1828         }
1829       return NULL_TREE;
1830     }
1831
1832   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1833   if (!def_stmt)
1834     {
1835       if (vect_print_dump_info (REPORT_DETAILS))
1836         fprintf (vect_dump, "reduction: no def_stmt.");
1837       return NULL_TREE;
1838     }
1839
1840   if (TREE_CODE (def_stmt) != MODIFY_EXPR)
1841     {
1842       if (vect_print_dump_info (REPORT_DETAILS))
1843         {
1844           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1845         }
1846       return NULL_TREE;
1847     }
1848
1849   operation = TREE_OPERAND (def_stmt, 1);
1850   code = TREE_CODE (operation);
1851   if (!commutative_tree_code (code) || !associative_tree_code (code))
1852     {
1853       if (vect_print_dump_info (REPORT_DETAILS))
1854         {
1855           fprintf (vect_dump, "reduction: not commutative/associative: ");
1856           print_generic_expr (vect_dump, operation, TDF_SLIM);
1857         }
1858       return NULL_TREE;
1859     }
1860
1861   op_type = TREE_CODE_LENGTH (code);
1862   if (op_type != binary_op)
1863     {
1864       if (vect_print_dump_info (REPORT_DETAILS))
1865         {
1866           fprintf (vect_dump, "reduction: not binary operation: ");
1867           print_generic_expr (vect_dump, operation, TDF_SLIM);
1868         }
1869       return NULL_TREE;
1870     }
1871
1872   op1 = TREE_OPERAND (operation, 0);
1873   op2 = TREE_OPERAND (operation, 1);
1874   if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1875     {
1876       if (vect_print_dump_info (REPORT_DETAILS))
1877         {
1878           fprintf (vect_dump, "reduction: uses not ssa_names: ");
1879           print_generic_expr (vect_dump, operation, TDF_SLIM);
1880         }
1881       return NULL_TREE;
1882     }
1883
1884   /* Check that it's ok to change the order of the computation.  */
1885   type = TREE_TYPE (operation);
1886   if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
1887       || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
1888     {
1889       if (vect_print_dump_info (REPORT_DETAILS))
1890         {
1891           fprintf (vect_dump, "reduction: multiple types: operation type: ");
1892           print_generic_expr (vect_dump, type, TDF_SLIM);
1893           fprintf (vect_dump, ", operands types: ");
1894           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
1895           fprintf (vect_dump, ",");
1896           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
1897         }
1898       return NULL_TREE;
1899     }
1900
1901   /* CHECKME: check for !flag_finite_math_only too?  */
1902   if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
1903     {
1904       /* Changing the order of operations changes the semantics.  */
1905       if (vect_print_dump_info (REPORT_DETAILS))
1906         {
1907           fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
1908           print_generic_expr (vect_dump, operation, TDF_SLIM);
1909         }
1910       return NULL_TREE;
1911     }
1912   else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv)
1913     {
1914       /* Changing the order of operations changes the semantics.  */
1915       if (vect_print_dump_info (REPORT_DETAILS))
1916         {
1917           fprintf (vect_dump, "reduction: unsafe int math optimization: ");
1918           print_generic_expr (vect_dump, operation, TDF_SLIM);
1919         }
1920       return NULL_TREE;
1921     }
1922
1923   /* reduction is safe. we're dealing with one of the following:
1924      1) integer arithmetic and no trapv
1925      2) floating point arithmetic, and special flags permit this optimization.
1926    */
1927   def1 = SSA_NAME_DEF_STMT (op1);
1928   def2 = SSA_NAME_DEF_STMT (op2);
1929   if (!def1 || !def2)
1930     {
1931       if (vect_print_dump_info (REPORT_DETAILS))
1932         {
1933           fprintf (vect_dump, "reduction: no defs for operands: ");
1934           print_generic_expr (vect_dump, operation, TDF_SLIM);
1935         }
1936       return NULL_TREE;
1937     }
1938
1939   if (TREE_CODE (def1) == MODIFY_EXPR
1940       && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
1941       && def2 == phi)
1942     {
1943       if (vect_print_dump_info (REPORT_DETAILS))
1944         {
1945           fprintf (vect_dump, "detected reduction:");
1946           print_generic_expr (vect_dump, operation, TDF_SLIM);
1947         }
1948       return def_stmt;
1949     }
1950   else if (TREE_CODE (def2) == MODIFY_EXPR
1951       && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
1952       && def1 == phi)
1953     {
1954       /* Swap operands (just for simplicity - so that the rest of the code
1955          can assume that the reduction variable is always the last (second)
1956          argument).  */
1957       if (vect_print_dump_info (REPORT_DETAILS))
1958         {
1959           fprintf (vect_dump, "detected reduction: need to swap operands:");
1960           print_generic_expr (vect_dump, operation, TDF_SLIM);
1961         }
1962       swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
1963                                     &TREE_OPERAND (operation, 1));
1964       return def_stmt;
1965     }
1966   else
1967     {
1968       if (vect_print_dump_info (REPORT_DETAILS))
1969         {
1970           fprintf (vect_dump, "reduction: unknown pattern.");
1971           print_generic_expr (vect_dump, operation, TDF_SLIM);
1972         }
1973       return NULL_TREE;
1974     }
1975 }
1976
1977
1978 /* Function vect_is_simple_iv_evolution.
1979
1980    FORNOW: A simple evolution of an induction variables in the loop is
1981    considered a polynomial evolution with constant step.  */
1982
1983 bool
1984 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1985                              tree * step)
1986 {
1987   tree init_expr;
1988   tree step_expr;
1989
1990   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1991
1992   /* When there is no evolution in this loop, the evolution function
1993      is not "simple".  */
1994   if (evolution_part == NULL_TREE)
1995     return false;
1996
1997   /* When the evolution is a polynomial of degree >= 2
1998      the evolution function is not "simple".  */
1999   if (tree_is_chrec (evolution_part))
2000     return false;
2001
2002   step_expr = evolution_part;
2003   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2004                                                            loop_nb));
2005
2006   if (vect_print_dump_info (REPORT_DETAILS))
2007     {
2008       fprintf (vect_dump, "step: ");
2009       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2010       fprintf (vect_dump, ",  init: ");
2011       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
2012     }
2013
2014   *init = init_expr;
2015   *step = step_expr;
2016
2017   if (TREE_CODE (step_expr) != INTEGER_CST)
2018     {
2019       if (vect_print_dump_info (REPORT_DETAILS))
2020         fprintf (vect_dump, "step unknown.");
2021       return false;
2022     }
2023
2024   return true;
2025 }
2026
2027
2028 /* Function vectorize_loops.
2029
2030    Entry Point to loop vectorization phase.  */
2031
2032 void
2033 vectorize_loops (struct loops *loops)
2034 {
2035   unsigned int i;
2036   unsigned int num_vectorized_loops = 0;
2037
2038   /* Fix the verbosity level if not defined explicitly by the user.  */
2039   vect_set_dump_settings ();
2040
2041   /* Allocate the bitmap that records which virtual variables that
2042      need to be renamed.  */
2043   vect_vnames_to_rename = BITMAP_ALLOC (NULL);
2044
2045   /*  ----------- Analyze loops. -----------  */
2046
2047   /* If some loop was duplicated, it gets bigger number
2048      than all previously defined loops. This fact allows us to run
2049      only over initial loops skipping newly generated ones.  */
2050   vect_loops_num = loops->num;
2051   for (i = 1; i < vect_loops_num; i++)
2052     {
2053       loop_vec_info loop_vinfo;
2054       struct loop *loop = loops->parray[i];
2055
2056       if (!loop)
2057         continue;
2058
2059       vect_loop_location = find_loop_location (loop);
2060       loop_vinfo = vect_analyze_loop (loop);
2061       loop->aux = loop_vinfo;
2062
2063       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2064         continue;
2065
2066       vect_transform_loop (loop_vinfo, loops);
2067       num_vectorized_loops++;
2068     }
2069   vect_loop_location = UNKNOWN_LOC;
2070
2071   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2072     fprintf (vect_dump, "vectorized %u loops in function.\n",
2073              num_vectorized_loops);
2074
2075   /*  ----------- Finalize. -----------  */
2076
2077   BITMAP_FREE (vect_vnames_to_rename);
2078
2079   for (i = 1; i < vect_loops_num; i++)
2080     {
2081       struct loop *loop = loops->parray[i];
2082       loop_vec_info loop_vinfo;
2083
2084       if (!loop)
2085         continue;
2086       loop_vinfo = loop->aux;
2087       destroy_loop_vec_info (loop_vinfo);
2088       loop->aux = NULL;
2089     }
2090 }