]> gcc.gnu.org Git - gcc.git/blame - gcc/tree-vect-loop-manip.c
coretypes.h: Include machmode.h...
[gcc.git] / gcc / tree-vect-loop-manip.c
CommitLineData
b8698a0f 1/* Vectorizer Specific Loop Manipulations
5624e564 2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a 26#include "tm.h"
60393bbc 27#include "hash-set.h"
40e23961 28#include "vec.h"
40e23961
MC
29#include "input.h"
30#include "alias.h"
31#include "symtab.h"
40e23961
MC
32#include "inchash.h"
33#include "tree.h"
34#include "fold-const.h"
35#include "predict.h"
60393bbc
AM
36#include "hard-reg-set.h"
37#include "input.h"
38#include "function.h"
39#include "dominance.h"
40#include "cfg.h"
41#include "cfganal.h"
ebfd146a 42#include "basic-block.h"
cf835838 43#include "gimple-pretty-print.h"
2fb9a547
AM
44#include "tree-ssa-alias.h"
45#include "internal-fn.h"
46#include "gimple-expr.h"
47#include "is-a.h"
18f429e2 48#include "gimple.h"
45b0be94 49#include "gimplify.h"
5be5c238 50#include "gimple-iterator.h"
18f429e2 51#include "gimplify-me.h"
442b4905
AM
52#include "gimple-ssa.h"
53#include "tree-cfg.h"
54#include "tree-phinodes.h"
55#include "ssa-iterators.h"
d8a2d370 56#include "stringpool.h"
442b4905 57#include "tree-ssanames.h"
e28030cf 58#include "tree-ssa-loop-manip.h"
442b4905 59#include "tree-into-ssa.h"
7a300452 60#include "tree-ssa.h"
7ee2468b 61#include "tree-pass.h"
ebfd146a 62#include "cfgloop.h"
718f9c0f 63#include "diagnostic-core.h"
ebfd146a
IR
64#include "tree-scalar-evolution.h"
65#include "tree-vectorizer.h"
66#include "langhooks.h"
67
68/*************************************************************************
69 Simple Loop Peeling Utilities
70
71 Utilities to support loop peeling for vectorization purposes.
72 *************************************************************************/
73
74
75/* Renames the use *OP_P. */
76
77static void
78rename_use_op (use_operand_p op_p)
79{
80 tree new_name;
81
82 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
83 return;
84
85 new_name = get_current_def (USE_FROM_PTR (op_p));
86
87 /* Something defined outside of the loop. */
88 if (!new_name)
89 return;
90
91 /* An ordinary ssa name defined in the loop. */
92
93 SET_USE (op_p, new_name);
94}
95
96
97/* Renames the variables in basic block BB. */
98
2cfc56b9 99static void
ebfd146a
IR
100rename_variables_in_bb (basic_block bb)
101{
ebfd146a
IR
102 gimple stmt;
103 use_operand_p use_p;
104 ssa_op_iter iter;
105 edge e;
106 edge_iterator ei;
107 struct loop *loop = bb->loop_father;
108
538dd0b7
DM
109 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
110 gsi_next (&gsi))
ebfd146a
IR
111 {
112 stmt = gsi_stmt (gsi);
113 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
114 rename_use_op (use_p);
115 }
116
2cfc56b9 117 FOR_EACH_EDGE (e, ei, bb->preds)
ebfd146a 118 {
2cfc56b9 119 if (!flow_bb_inside_loop_p (loop, e->src))
ebfd146a 120 continue;
538dd0b7
DM
121 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
122 gsi_next (&gsi))
123 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
ebfd146a
IR
124 }
125}
126
127
684f25f4
AO
128typedef struct
129{
130 tree from, to;
131 basic_block bb;
132} adjust_info;
133
684f25f4
AO
134/* A stack of values to be adjusted in debug stmts. We have to
135 process them LIFO, so that the closest substitution applies. If we
136 processed them FIFO, without the stack, we might substitute uses
137 with a PHI DEF that would soon become non-dominant, and when we got
138 to the suitable one, it wouldn't have anything to substitute any
139 more. */
ff4c81cc 140static vec<adjust_info, va_heap> adjust_vec;
684f25f4
AO
141
142/* Adjust any debug stmts that referenced AI->from values to use the
143 loop-closed AI->to, if the references are dominated by AI->bb and
144 not by the definition of AI->from. */
145
146static void
147adjust_debug_stmts_now (adjust_info *ai)
148{
149 basic_block bbphi = ai->bb;
150 tree orig_def = ai->from;
151 tree new_def = ai->to;
152 imm_use_iterator imm_iter;
153 gimple stmt;
154 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
155
156 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
157
158 /* Adjust any debug stmts that held onto non-loop-closed
159 references. */
160 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
161 {
162 use_operand_p use_p;
163 basic_block bbuse;
164
165 if (!is_gimple_debug (stmt))
166 continue;
167
168 gcc_assert (gimple_debug_bind_p (stmt));
169
170 bbuse = gimple_bb (stmt);
171
172 if ((bbuse == bbphi
173 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
174 && !(bbuse == bbdef
175 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
176 {
177 if (new_def)
178 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
179 SET_USE (use_p, new_def);
180 else
181 {
182 gimple_debug_bind_reset_value (stmt);
183 update_stmt (stmt);
184 }
185 }
186 }
187}
188
189/* Adjust debug stmts as scheduled before. */
190
191static void
192adjust_vec_debug_stmts (void)
193{
194 if (!MAY_HAVE_DEBUG_STMTS)
195 return;
196
9771b263 197 gcc_assert (adjust_vec.exists ());
684f25f4 198
9771b263 199 while (!adjust_vec.is_empty ())
684f25f4 200 {
9771b263
DN
201 adjust_debug_stmts_now (&adjust_vec.last ());
202 adjust_vec.pop ();
684f25f4
AO
203 }
204
9771b263 205 adjust_vec.release ();
684f25f4
AO
206}
207
208/* Adjust any debug stmts that referenced FROM values to use the
209 loop-closed TO, if the references are dominated by BB and not by
210 the definition of FROM. If adjust_vec is non-NULL, adjustments
211 will be postponed until adjust_vec_debug_stmts is called. */
212
213static void
214adjust_debug_stmts (tree from, tree to, basic_block bb)
215{
216 adjust_info ai;
217
a471762f
RG
218 if (MAY_HAVE_DEBUG_STMTS
219 && TREE_CODE (from) == SSA_NAME
a52ca739 220 && ! SSA_NAME_IS_DEFAULT_DEF (from)
a471762f 221 && ! virtual_operand_p (from))
684f25f4
AO
222 {
223 ai.from = from;
224 ai.to = to;
225 ai.bb = bb;
226
9771b263
DN
227 if (adjust_vec.exists ())
228 adjust_vec.safe_push (ai);
684f25f4
AO
229 else
230 adjust_debug_stmts_now (&ai);
231 }
232}
233
234/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
235 to adjust any debug stmts that referenced the old phi arg,
236 presumably non-loop-closed references left over from other
237 transformations. */
238
239static void
240adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
241{
242 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
243
244 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
245
246 if (MAY_HAVE_DEBUG_STMTS)
247 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
248 gimple_bb (update_phi));
249}
250
ebfd146a 251
ebfd146a
IR
252/* Update PHI nodes for a guard of the LOOP.
253
254 Input:
255 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
256 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
257 originates from the guard-bb, skips LOOP and reaches the (unique) exit
258 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
259 We denote this bb NEW_MERGE_BB because before the guard code was added
260 it had a single predecessor (the LOOP header), and now it became a merge
261 point of two paths - the path that ends with the LOOP exit-edge, and
262 the path that ends with GUARD_EDGE.
263 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
264 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
265
266 ===> The CFG before the guard-code was added:
267 LOOP_header_bb:
268 loop_body
269 if (exit_loop) goto update_bb
270 else goto LOOP_header_bb
271 update_bb:
272
273 ==> The CFG after the guard-code was added:
274 guard_bb:
275 if (LOOP_guard_condition) goto new_merge_bb
276 else goto LOOP_header_bb
277 LOOP_header_bb:
278 loop_body
279 if (exit_loop_condition) goto new_merge_bb
280 else goto LOOP_header_bb
281 new_merge_bb:
282 goto update_bb
283 update_bb:
284
285 ==> The CFG after this function:
286 guard_bb:
287 if (LOOP_guard_condition) goto new_merge_bb
288 else goto LOOP_header_bb
289 LOOP_header_bb:
290 loop_body
291 if (exit_loop_condition) goto new_exit_bb
292 else goto LOOP_header_bb
293 new_exit_bb:
294 new_merge_bb:
295 goto update_bb
296 update_bb:
297
298 This function:
299 1. creates and updates the relevant phi nodes to account for the new
300 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
301 1.1. Create phi nodes at NEW_MERGE_BB.
302 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
303 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
304 2. preserves loop-closed-ssa-form by creating the required phi nodes
305 at the exit of LOOP (i.e, in NEW_EXIT_BB).
306
307 There are two flavors to this function:
308
309 slpeel_update_phi_nodes_for_guard1:
310 Here the guard controls whether we enter or skip LOOP, where LOOP is a
311 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
312 for variables that have phis in the loop header.
313
314 slpeel_update_phi_nodes_for_guard2:
315 Here the guard controls whether we enter or skip LOOP, where LOOP is an
316 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
317 for variables that have phis in the loop exit.
318
319 I.E., the overall structure is:
320
321 loop1_preheader_bb:
322 guard1 (goto loop1/merge1_bb)
323 loop1
324 loop1_exit_bb:
325 guard2 (goto merge1_bb/merge2_bb)
326 merge1_bb
327 loop2
328 loop2_exit_bb
329 merge2_bb
330 next_bb
331
332 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
333 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
334 that have phis in loop1->header).
335
336 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
337 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
338 that have phis in next_bb). It also adds some of these phis to
339 loop1_exit_bb.
340
341 slpeel_update_phi_nodes_for_guard1 is always called before
342 slpeel_update_phi_nodes_for_guard2. They are both needed in order
343 to create correct data-flow and loop-closed-ssa-form.
344
345 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
346 that change between iterations of a loop (and therefore have a phi-node
347 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
b8698a0f
L
348 phis for variables that are used out of the loop (and therefore have
349 loop-closed exit phis). Some variables may be both updated between
ebfd146a
IR
350 iterations and used after the loop. This is why in loop1_exit_bb we
351 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
352 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
353
354 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
355 an original loop. i.e., we have:
356
357 orig_loop
358 guard_bb (goto LOOP/new_merge)
359 new_loop <-- LOOP
360 new_exit
361 new_merge
362 next_bb
363
364 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
365 have:
366
367 new_loop
368 guard_bb (goto LOOP/new_merge)
369 orig_loop <-- LOOP
370 new_exit
371 new_merge
372 next_bb
373
374 The SSA names defined in the original loop have a current
375 reaching definition that that records the corresponding new
376 ssa-name used in the new duplicated loop copy.
377 */
378
379/* Function slpeel_update_phi_nodes_for_guard1
b8698a0f 380
ebfd146a
IR
381 Input:
382 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
383 - DEFS - a bitmap of ssa names to mark new names for which we recorded
b8698a0f
L
384 information.
385
ebfd146a
IR
386 In the context of the overall structure, we have:
387
b8698a0f 388 loop1_preheader_bb:
ebfd146a
IR
389 guard1 (goto loop1/merge1_bb)
390LOOP-> loop1
391 loop1_exit_bb:
392 guard2 (goto merge1_bb/merge2_bb)
393 merge1_bb
394 loop2
395 loop2_exit_bb
396 merge2_bb
397 next_bb
398
399 For each name updated between loop iterations (i.e - for each name that has
400 an entry (loop-header) phi in LOOP) we create a new phi in:
401 1. merge1_bb (to account for the edge from guard1)
402 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
403*/
404
405static void
406slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
c334023f 407 bool is_new_loop, basic_block *new_exit_bb)
ebfd146a 408{
538dd0b7
DM
409 gphi *orig_phi, *new_phi;
410 gphi *update_phi, *update_phi2;
ebfd146a
IR
411 tree guard_arg, loop_arg;
412 basic_block new_merge_bb = guard_edge->dest;
413 edge e = EDGE_SUCC (new_merge_bb, 0);
414 basic_block update_bb = e->dest;
415 basic_block orig_bb = loop->header;
416 edge new_exit_e;
417 tree current_new_name;
538dd0b7 418 gphi_iterator gsi_orig, gsi_update;
ebfd146a
IR
419
420 /* Create new bb between loop and new_merge_bb. */
421 *new_exit_bb = split_edge (single_exit (loop));
422
423 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
424
425 for (gsi_orig = gsi_start_phis (orig_bb),
426 gsi_update = gsi_start_phis (update_bb);
427 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
428 gsi_next (&gsi_orig), gsi_next (&gsi_update))
429 {
e20f6b4b 430 source_location loop_locus, guard_locus;
070ecdfd 431 tree new_res;
538dd0b7
DM
432 orig_phi = gsi_orig.phi ();
433 update_phi = gsi_update.phi ();
ebfd146a 434
ebfd146a
IR
435 /** 1. Handle new-merge-point phis **/
436
437 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
b731b390 438 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 439 new_phi = create_phi_node (new_res, new_merge_bb);
ebfd146a
IR
440
441 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
442 of LOOP. Set the two phi args in NEW_PHI for these edges: */
443 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
b8698a0f
L
444 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
445 EDGE_SUCC (loop->latch,
f5045c96 446 0));
ebfd146a 447 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
b8698a0f
L
448 guard_locus
449 = gimple_phi_arg_location_from_edge (orig_phi,
f5045c96 450 loop_preheader_edge (loop));
ebfd146a 451
9e227d60
DC
452 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
453 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
ebfd146a
IR
454
455 /* 1.3. Update phi in successor block. */
456 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
457 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
684f25f4 458 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
ebfd146a
IR
459 update_phi2 = new_phi;
460
461
462 /** 2. Handle loop-closed-ssa-form phis **/
463
ea057359 464 if (virtual_operand_p (PHI_RESULT (orig_phi)))
ebfd146a
IR
465 continue;
466
467 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
b731b390 468 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 469 new_phi = create_phi_node (new_res, *new_exit_bb);
ebfd146a
IR
470
471 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
9e227d60 472 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
ebfd146a
IR
473
474 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
475 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
684f25f4
AO
476 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
477 PHI_RESULT (new_phi));
ebfd146a
IR
478
479 /* 2.4. Record the newly created name with set_current_def.
480 We want to find a name such that
481 name = get_current_def (orig_loop_name)
482 and to set its current definition as follows:
483 set_current_def (name, new_phi_name)
484
485 If LOOP is a new loop then loop_arg is already the name we're
486 looking for. If LOOP is the original loop, then loop_arg is
487 the orig_loop_name and the relevant name is recorded in its
488 current reaching definition. */
489 if (is_new_loop)
490 current_new_name = loop_arg;
491 else
492 {
493 current_new_name = get_current_def (loop_arg);
494 /* current_def is not available only if the variable does not
495 change inside the loop, in which case we also don't care
496 about recording a current_def for it because we won't be
497 trying to create loop-exit-phis for it. */
498 if (!current_new_name)
499 continue;
500 }
39719c84
JJ
501 tree new_name = get_current_def (current_new_name);
502 /* Because of peeled_chrec optimization it is possible that we have
503 set this earlier. Verify the PHI has the same value. */
504 if (new_name)
505 {
506 gimple phi = SSA_NAME_DEF_STMT (new_name);
507 gcc_assert (gimple_code (phi) == GIMPLE_PHI
508 && gimple_bb (phi) == *new_exit_bb
509 && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
510 == loop_arg));
511 continue;
512 }
ebfd146a
IR
513
514 set_current_def (current_new_name, PHI_RESULT (new_phi));
ebfd146a
IR
515 }
516}
517
518
519/* Function slpeel_update_phi_nodes_for_guard2
520
521 Input:
522 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
523
524 In the context of the overall structure, we have:
525
b8698a0f 526 loop1_preheader_bb:
ebfd146a
IR
527 guard1 (goto loop1/merge1_bb)
528 loop1
b8698a0f 529 loop1_exit_bb:
ebfd146a
IR
530 guard2 (goto merge1_bb/merge2_bb)
531 merge1_bb
532LOOP-> loop2
533 loop2_exit_bb
534 merge2_bb
535 next_bb
536
537 For each name used out side the loop (i.e - for each name that has an exit
538 phi in next_bb) we create a new phi in:
b8698a0f 539 1. merge2_bb (to account for the edge from guard_bb)
ebfd146a
IR
540 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
541 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
542 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
543*/
544
545static void
546slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
547 bool is_new_loop, basic_block *new_exit_bb)
548{
538dd0b7
DM
549 gphi *orig_phi, *new_phi;
550 gphi *update_phi, *update_phi2;
ebfd146a
IR
551 tree guard_arg, loop_arg;
552 basic_block new_merge_bb = guard_edge->dest;
553 edge e = EDGE_SUCC (new_merge_bb, 0);
554 basic_block update_bb = e->dest;
555 edge new_exit_e;
556 tree orig_def, orig_def_new_name;
557 tree new_name, new_name2;
558 tree arg;
538dd0b7 559 gphi_iterator gsi;
ebfd146a
IR
560
561 /* Create new bb between loop and new_merge_bb. */
562 *new_exit_bb = split_edge (single_exit (loop));
563
564 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
565
566 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
567 {
070ecdfd 568 tree new_res;
538dd0b7 569 update_phi = gsi.phi ();
ebfd146a
IR
570 orig_phi = update_phi;
571 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
572 /* This loop-closed-phi actually doesn't represent a use
b8698a0f 573 out of the loop - the phi arg is a constant. */
ebfd146a
IR
574 if (TREE_CODE (orig_def) != SSA_NAME)
575 continue;
576 orig_def_new_name = get_current_def (orig_def);
577 arg = NULL_TREE;
578
579 /** 1. Handle new-merge-point phis **/
580
581 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
b731b390 582 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 583 new_phi = create_phi_node (new_res, new_merge_bb);
ebfd146a
IR
584
585 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
586 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
587 new_name = orig_def;
588 new_name2 = NULL_TREE;
589 if (orig_def_new_name)
590 {
591 new_name = orig_def_new_name;
592 /* Some variables have both loop-entry-phis and loop-exit-phis.
593 Such variables were given yet newer names by phis placed in
594 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
595 new_name2 = get_current_def (get_current_def (orig_name)). */
596 new_name2 = get_current_def (new_name);
597 }
b8698a0f 598
ebfd146a
IR
599 if (is_new_loop)
600 {
601 guard_arg = orig_def;
602 loop_arg = new_name;
603 }
604 else
605 {
606 guard_arg = new_name;
607 loop_arg = orig_def;
608 }
609 if (new_name2)
610 guard_arg = new_name2;
b8698a0f 611
9e227d60
DC
612 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
613 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
ebfd146a
IR
614
615 /* 1.3. Update phi in successor block. */
616 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
684f25f4 617 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
ebfd146a
IR
618 update_phi2 = new_phi;
619
620
621 /** 2. Handle loop-closed-ssa-form phis **/
622
623 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
b731b390 624 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 625 new_phi = create_phi_node (new_res, *new_exit_bb);
ebfd146a
IR
626
627 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
9e227d60 628 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
ebfd146a
IR
629
630 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
631 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
684f25f4
AO
632 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
633 PHI_RESULT (new_phi));
ebfd146a
IR
634
635
636 /** 3. Handle loop-closed-ssa-form phis for first loop **/
637
638 /* 3.1. Find the relevant names that need an exit-phi in
639 GUARD_BB, i.e. names for which
640 slpeel_update_phi_nodes_for_guard1 had not already created a
641 phi node. This is the case for names that are used outside
642 the loop (and therefore need an exit phi) but are not updated
643 across loop iterations (and therefore don't have a
644 loop-header-phi).
645
646 slpeel_update_phi_nodes_for_guard1 is responsible for
647 creating loop-exit phis in GUARD_BB for names that have a
648 loop-header-phi. When such a phi is created we also record
649 the new name in its current definition. If this new name
650 exists, then guard_arg was set to this new name (see 1.2
651 above). Therefore, if guard_arg is not this new name, this
652 is an indication that an exit-phi in GUARD_BB was not yet
653 created, so we take care of it here. */
654 if (guard_arg == new_name2)
655 continue;
656 arg = guard_arg;
657
658 /* 3.2. Generate new phi node in GUARD_BB: */
b731b390 659 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 660 new_phi = create_phi_node (new_res, guard_edge->src);
ebfd146a
IR
661
662 /* 3.3. GUARD_BB has one incoming edge: */
663 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
b8698a0f 664 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
9e227d60 665 UNKNOWN_LOCATION);
ebfd146a
IR
666
667 /* 3.4. Update phi in successor of GUARD_BB: */
668 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
669 == guard_arg);
684f25f4
AO
670 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
671 PHI_RESULT (new_phi));
ebfd146a
IR
672 }
673}
674
675
676/* Make the LOOP iterate NITERS times. This is done by adding a new IV
677 that starts at zero, increases by one and its limit is NITERS.
678
679 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
680
681void
682slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
683{
684 tree indx_before_incr, indx_after_incr;
538dd0b7
DM
685 gcond *cond_stmt;
686 gcond *orig_cond;
ebfd146a
IR
687 edge exit_edge = single_exit (loop);
688 gimple_stmt_iterator loop_cond_gsi;
689 gimple_stmt_iterator incr_gsi;
690 bool insert_after;
691 tree init = build_int_cst (TREE_TYPE (niters), 0);
692 tree step = build_int_cst (TREE_TYPE (niters), 1);
b05e0233 693 source_location loop_loc;
ebfd146a
IR
694 enum tree_code code;
695
696 orig_cond = get_loop_exit_condition (loop);
697 gcc_assert (orig_cond);
698 loop_cond_gsi = gsi_for_stmt (orig_cond);
699
700 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
701 create_iv (init, step, NULL_TREE, loop,
702 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
703
704 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
705 true, NULL_TREE, true,
706 GSI_SAME_STMT);
707 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
708 true, GSI_SAME_STMT);
709
710 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
711 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
712 NULL_TREE);
713
714 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
715
716 /* Remove old loop exit test: */
717 gsi_remove (&loop_cond_gsi, true);
6f723d33 718 free_stmt_vec_info (orig_cond);
ebfd146a
IR
719
720 loop_loc = find_loop_location (loop);
73fbfcad 721 if (dump_enabled_p ())
ebfd146a 722 {
b05e0233
RB
723 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
724 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
725 LOCATION_LINE (loop_loc));
78c60e3d 726 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
e645e942 727 dump_printf (MSG_NOTE, "\n");
ebfd146a 728 }
ebfd146a
IR
729 loop->nb_iterations = niters;
730}
731
5ce9450f
JJ
732/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
733 For all PHI arguments in FROM->dest and TO->dest from those
734 edges ensure that TO->dest PHI arguments have current_def
735 to that in from. */
736
737static void
738slpeel_duplicate_current_defs_from_edges (edge from, edge to)
739{
740 gimple_stmt_iterator gsi_from, gsi_to;
741
742 for (gsi_from = gsi_start_phis (from->dest),
743 gsi_to = gsi_start_phis (to->dest);
744 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
745 gsi_next (&gsi_from), gsi_next (&gsi_to))
746 {
747 gimple from_phi = gsi_stmt (gsi_from);
748 gimple to_phi = gsi_stmt (gsi_to);
749 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
750 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
751 if (TREE_CODE (from_arg) == SSA_NAME
752 && TREE_CODE (to_arg) == SSA_NAME
753 && get_current_def (to_arg) == NULL_TREE)
754 set_current_def (to_arg, get_current_def (from_arg));
755 }
756}
757
ebfd146a 758
b8698a0f 759/* Given LOOP this function generates a new copy of it and puts it
5ce9450f
JJ
760 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
761 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
762 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
763 entry or exit of LOOP. */
ebfd146a
IR
764
765struct loop *
5ce9450f
JJ
766slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
767 struct loop *scalar_loop, edge e)
ebfd146a
IR
768{
769 struct loop *new_loop;
770 basic_block *new_bbs, *bbs;
771 bool at_exit;
772 bool was_imm_dom;
b8698a0f 773 basic_block exit_dest;
ebfd146a 774 edge exit, new_exit;
ebfd146a 775
2cfc56b9
RB
776 exit = single_exit (loop);
777 at_exit = (e == exit);
ebfd146a
IR
778 if (!at_exit && e != loop_preheader_edge (loop))
779 return NULL;
780
5ce9450f
JJ
781 if (scalar_loop == NULL)
782 scalar_loop = loop;
783
784 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
785 get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
ebfd146a
IR
786
787 /* Check whether duplication is possible. */
5ce9450f 788 if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
ebfd146a
IR
789 {
790 free (bbs);
791 return NULL;
792 }
793
794 /* Generate new loop structure. */
5ce9450f
JJ
795 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
796 duplicate_subloops (scalar_loop, new_loop);
ebfd146a 797
2cfc56b9 798 exit_dest = exit->dest;
b8698a0f
L
799 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
800 exit_dest) == loop->header ?
ebfd146a
IR
801 true : false);
802
2cfc56b9
RB
803 /* Also copy the pre-header, this avoids jumping through hoops to
804 duplicate the loop entry PHI arguments. Create an empty
805 pre-header unconditionally for this. */
5ce9450f 806 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
2cfc56b9 807 edge entry_e = single_pred_edge (preheader);
5ce9450f
JJ
808 bbs[scalar_loop->num_nodes] = preheader;
809 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
ebfd146a 810
5ce9450f
JJ
811 exit = single_exit (scalar_loop);
812 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
ebfd146a 813 &exit, 1, &new_exit, NULL,
f14540b6 814 e->src, true);
5ce9450f
JJ
815 exit = single_exit (loop);
816 basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
ebfd146a 817
5ce9450f
JJ
818 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
819
820 if (scalar_loop != loop)
821 {
822 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
823 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
824 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
825 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
826 header) to have current_def set, so copy them over. */
827 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
828 exit);
829 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
830 0),
831 EDGE_SUCC (loop->latch, 0));
832 }
b8698a0f 833
ebfd146a
IR
834 if (at_exit) /* Add the loop copy at exit. */
835 {
5ce9450f
JJ
836 if (scalar_loop != loop)
837 {
538dd0b7 838 gphi_iterator gsi;
5ce9450f
JJ
839 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
840
841 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
842 gsi_next (&gsi))
843 {
538dd0b7 844 gphi *phi = gsi.phi ();
5ce9450f
JJ
845 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
846 location_t orig_locus
847 = gimple_phi_arg_location_from_edge (phi, e);
848
849 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
850 }
851 }
2cfc56b9
RB
852 redirect_edge_and_branch_force (e, new_preheader);
853 flush_pending_stmts (e);
854 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
ebfd146a 855 if (was_imm_dom)
5ce9450f 856 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
2cfc56b9
RB
857
858 /* And remove the non-necessary forwarder again. Keep the other
859 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
860 redirect_edge_pred (single_succ_edge (preheader),
861 single_pred (preheader));
2cfc56b9 862 delete_basic_block (preheader);
5ce9450f
JJ
863 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
864 loop_preheader_edge (scalar_loop)->src);
ebfd146a
IR
865 }
866 else /* Add the copy at entry. */
867 {
5ce9450f
JJ
868 if (scalar_loop != loop)
869 {
870 /* Remove the non-necessary forwarder of scalar_loop again. */
871 redirect_edge_pred (single_succ_edge (preheader),
872 single_pred (preheader));
873 delete_basic_block (preheader);
874 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
875 loop_preheader_edge (scalar_loop)->src);
876 preheader = split_edge (loop_preheader_edge (loop));
877 entry_e = single_pred_edge (preheader);
878 }
879
2cfc56b9
RB
880 redirect_edge_and_branch_force (entry_e, new_preheader);
881 flush_pending_stmts (entry_e);
882 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
883
884 redirect_edge_and_branch_force (new_exit, preheader);
885 flush_pending_stmts (new_exit);
886 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
887
888 /* And remove the non-necessary forwarder again. Keep the other
889 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
890 redirect_edge_pred (single_succ_edge (new_preheader),
891 single_pred (new_preheader));
2cfc56b9
RB
892 delete_basic_block (new_preheader);
893 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
894 loop_preheader_edge (new_loop)->src);
ebfd146a
IR
895 }
896
5ce9450f 897 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
2cfc56b9
RB
898 rename_variables_in_bb (new_bbs[i]);
899
5ce9450f
JJ
900 if (scalar_loop != loop)
901 {
902 /* Update new_loop->header PHIs, so that on the preheader
903 edge they are the ones from loop rather than scalar_loop. */
538dd0b7 904 gphi_iterator gsi_orig, gsi_new;
5ce9450f
JJ
905 edge orig_e = loop_preheader_edge (loop);
906 edge new_e = loop_preheader_edge (new_loop);
907
908 for (gsi_orig = gsi_start_phis (loop->header),
909 gsi_new = gsi_start_phis (new_loop->header);
910 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
911 gsi_next (&gsi_orig), gsi_next (&gsi_new))
912 {
538dd0b7
DM
913 gphi *orig_phi = gsi_orig.phi ();
914 gphi *new_phi = gsi_new.phi ();
5ce9450f
JJ
915 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
916 location_t orig_locus
917 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
918
919 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
920 }
921 }
922
ebfd146a
IR
923 free (new_bbs);
924 free (bbs);
925
2cfc56b9
RB
926#ifdef ENABLE_CHECKING
927 verify_dominators (CDI_DOMINATORS);
928#endif
929
ebfd146a
IR
930 return new_loop;
931}
932
933
934/* Given the condition statement COND, put it as the last statement
935 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
b8698a0f 936 Assumes that this is the single exit of the guarded loop.
86290011 937 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
ebfd146a
IR
938
939static edge
86290011
RG
940slpeel_add_loop_guard (basic_block guard_bb, tree cond,
941 gimple_seq cond_expr_stmt_list,
e78410bf
JH
942 basic_block exit_bb, basic_block dom_bb,
943 int probability)
ebfd146a
IR
944{
945 gimple_stmt_iterator gsi;
946 edge new_e, enter_e;
538dd0b7 947 gcond *cond_stmt;
ebfd146a
IR
948 gimple_seq gimplify_stmt_list = NULL;
949
950 enter_e = EDGE_SUCC (guard_bb, 0);
951 enter_e->flags &= ~EDGE_FALLTHRU;
952 enter_e->flags |= EDGE_FALSE_VALUE;
953 gsi = gsi_last_bb (guard_bb);
954
f7a06a98
RG
955 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
956 NULL_TREE);
86290011
RG
957 if (gimplify_stmt_list)
958 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
f7a06a98 959 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
86290011
RG
960 if (cond_expr_stmt_list)
961 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
ebfd146a
IR
962
963 gsi = gsi_last_bb (guard_bb);
964 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
965
966 /* Add new edge to connect guard block to the merge/loop-exit block. */
967 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
e78410bf
JH
968
969 new_e->count = guard_bb->count;
970 new_e->probability = probability;
971 new_e->count = apply_probability (enter_e->count, probability);
972 enter_e->count -= new_e->count;
973 enter_e->probability = inverse_probability (probability);
ebfd146a
IR
974 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
975 return new_e;
976}
977
978
979/* This function verifies that the following restrictions apply to LOOP:
980 (1) it is innermost
981 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
982 (3) it is single entry, single exit
983 (4) its exit condition is the last stmt in the header
984 (5) E is the entry/exit edge of LOOP.
985 */
986
987bool
988slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
989{
990 edge exit_e = single_exit (loop);
991 edge entry_e = loop_preheader_edge (loop);
538dd0b7 992 gcond *orig_cond = get_loop_exit_condition (loop);
ebfd146a
IR
993 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
994
ebfd146a
IR
995 if (loop->inner
996 /* All loops have an outer scope; the only case loop->outer is NULL is for
997 the function itself. */
998 || !loop_outer (loop)
999 || loop->num_nodes != 2
1000 || !empty_block_p (loop->latch)
1001 || !single_exit (loop)
1002 /* Verify that new loop exit condition can be trivially modified. */
1003 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1004 || (e != exit_e && e != entry_e))
1005 return false;
1006
1007 return true;
1008}
1009
1010#ifdef ENABLE_CHECKING
1011static void
1012slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1013 struct loop *second_loop)
1014{
1015 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1016 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1017 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1018
1019 /* A guard that controls whether the second_loop is to be executed or skipped
1020 is placed in first_loop->exit. first_loop->exit therefore has two
1021 successors - one is the preheader of second_loop, and the other is a bb
1022 after second_loop.
1023 */
1024 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
b8698a0f 1025
ebfd146a
IR
1026 /* 1. Verify that one of the successors of first_loop->exit is the preheader
1027 of second_loop. */
b8698a0f 1028
ebfd146a
IR
1029 /* The preheader of new_loop is expected to have two predecessors:
1030 first_loop->exit and the block that precedes first_loop. */
1031
b8698a0f 1032 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
ebfd146a
IR
1033 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1034 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1035 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1036 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
b8698a0f 1037
ebfd146a
IR
1038 /* Verify that the other successor of first_loop->exit is after the
1039 second_loop. */
1040 /* TODO */
1041}
1042#endif
1043
1044/* If the run time cost model check determines that vectorization is
1045 not profitable and hence scalar loop should be generated then set
1046 FIRST_NITERS to prologue peeled iterations. This will allow all the
1047 iterations to be executed in the prologue peeled scalar loop. */
1048
1049static void
1050set_prologue_iterations (basic_block bb_before_first_loop,
5d2eb24b 1051 tree *first_niters,
ebfd146a 1052 struct loop *loop,
e78410bf
JH
1053 unsigned int th,
1054 int probability)
ebfd146a
IR
1055{
1056 edge e;
1057 basic_block cond_bb, then_bb;
1058 tree var, prologue_after_cost_adjust_name;
1059 gimple_stmt_iterator gsi;
538dd0b7 1060 gphi *newphi;
ebfd146a 1061 edge e_true, e_false, e_fallthru;
538dd0b7 1062 gcond *cond_stmt;
f7a06a98 1063 gimple_seq stmts = NULL;
ebfd146a 1064 tree cost_pre_condition = NULL_TREE;
b8698a0f 1065 tree scalar_loop_iters =
ebfd146a
IR
1066 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1067
1068 e = single_pred_edge (bb_before_first_loop);
c3284718 1069 cond_bb = split_edge (e);
ebfd146a
IR
1070
1071 e = single_pred_edge (bb_before_first_loop);
c3284718 1072 then_bb = split_edge (e);
ebfd146a
IR
1073 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1074
1075 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1076 EDGE_FALSE_VALUE);
1077 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1078
1079 e_true = EDGE_PRED (then_bb, 0);
1080 e_true->flags &= ~EDGE_FALLTHRU;
1081 e_true->flags |= EDGE_TRUE_VALUE;
1082
e78410bf
JH
1083 e_true->probability = probability;
1084 e_false->probability = inverse_probability (probability);
1085 e_true->count = apply_probability (cond_bb->count, probability);
1086 e_false->count = cond_bb->count - e_true->count;
1087 then_bb->frequency = EDGE_FREQUENCY (e_true);
1088 then_bb->count = e_true->count;
1089
ebfd146a 1090 e_fallthru = EDGE_SUCC (then_bb, 0);
e78410bf 1091 e_fallthru->count = then_bb->count;
ebfd146a 1092
f7a06a98 1093 gsi = gsi_last_bb (cond_bb);
ebfd146a 1094 cost_pre_condition =
b8698a0f 1095 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
ebfd146a
IR
1096 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1097 cost_pre_condition =
f7a06a98
RG
1098 force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1099 NULL_TREE, false, GSI_CONTINUE_LINKING);
1100 cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1101 NULL_TREE, NULL_TREE);
ebfd146a 1102 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
b8698a0f 1103
ebfd146a
IR
1104 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1105 "prologue_after_cost_adjust");
b8698a0f 1106 prologue_after_cost_adjust_name =
ebfd146a
IR
1107 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1108
1109 gsi = gsi_last_bb (then_bb);
1110 if (stmts)
1111 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1112
1113 newphi = create_phi_node (var, bb_before_first_loop);
b8698a0f 1114 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
9e227d60
DC
1115 UNKNOWN_LOCATION);
1116 add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
ebfd146a 1117
5d2eb24b 1118 *first_niters = PHI_RESULT (newphi);
ebfd146a
IR
1119}
1120
ebfd146a
IR
1121/* Function slpeel_tree_peel_loop_to_edge.
1122
1123 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1124 that is placed on the entry (exit) edge E of LOOP. After this transformation
1125 we have two loops one after the other - first-loop iterates FIRST_NITERS
1126 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
b8698a0f 1127 If the cost model indicates that it is profitable to emit a scalar
ebfd146a
IR
1128 loop instead of the vector one, then the prolog (epilog) loop will iterate
1129 for the entire unchanged scalar iterations of the loop.
1130
1131 Input:
1132 - LOOP: the loop to be peeled.
5ce9450f
JJ
1133 - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1134 should be copied.
ebfd146a
IR
1135 - E: the exit or entry edge of LOOP.
1136 If it is the entry edge, we peel the first iterations of LOOP. In this
1137 case first-loop is LOOP, and second-loop is the newly created loop.
1138 If it is the exit edge, we peel the last iterations of LOOP. In this
1139 case, first-loop is the newly created loop, and second-loop is LOOP.
1140 - NITERS: the number of iterations that LOOP iterates.
1141 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1142 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1143 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1144 is false, the caller of this function may want to take care of this
1145 (this can be useful if we don't want new stmts added to first-loop).
1146 - TH: cost model profitability threshold of iterations for vectorization.
1147 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1148 during versioning and hence needs to occur during
b8698a0f 1149 prologue generation or whether cost model check
ebfd146a
IR
1150 has not occurred during prologue generation and hence
1151 needs to occur during epilogue generation.
e78410bf
JH
1152 - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1153 - BOUND2 is the upper bound on number of iterations of the second loop (if known)
b8698a0f 1154
ebfd146a
IR
1155
1156 Output:
1157 The function returns a pointer to the new loop-copy, or NULL if it failed
1158 to perform the transformation.
1159
1160 The function generates two if-then-else guards: one before the first loop,
1161 and the other before the second loop:
1162 The first guard is:
1163 if (FIRST_NITERS == 0) then skip the first loop,
1164 and go directly to the second loop.
1165 The second guard is:
1166 if (FIRST_NITERS == NITERS) then skip the second loop.
1167
86290011
RG
1168 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1169 then the generated condition is combined with COND_EXPR and the
1170 statements in COND_EXPR_STMT_LIST are emitted together with it.
1171
ebfd146a
IR
1172 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1173 FORNOW the resulting code will not be in loop-closed-ssa form.
1174*/
1175
5ce9450f
JJ
1176static struct loop *
1177slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
5d2eb24b 1178 edge e, tree *first_niters,
ebfd146a 1179 tree niters, bool update_first_loop_count,
86290011 1180 unsigned int th, bool check_profitability,
e78410bf
JH
1181 tree cond_expr, gimple_seq cond_expr_stmt_list,
1182 int bound1, int bound2)
ebfd146a
IR
1183{
1184 struct loop *new_loop = NULL, *first_loop, *second_loop;
1185 edge skip_e;
1186 tree pre_condition = NULL_TREE;
ebfd146a
IR
1187 basic_block bb_before_second_loop, bb_after_second_loop;
1188 basic_block bb_before_first_loop;
1189 basic_block bb_between_loops;
1190 basic_block new_exit_bb;
538dd0b7 1191 gphi_iterator gsi;
ebfd146a 1192 edge exit_e = single_exit (loop);
b05e0233 1193 source_location loop_loc;
e78410bf
JH
1194 /* There are many aspects to how likely the first loop is going to be executed.
1195 Without histogram we can't really do good job. Simply set it to
1196 2/3, so the first loop is not reordered to the end of function and
1197 the hot path through stays short. */
1198 int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1199 int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1200 int probability_of_second_loop;
b8698a0f 1201
ebfd146a
IR
1202 if (!slpeel_can_duplicate_loop_p (loop, e))
1203 return NULL;
b8698a0f 1204
141310ef
RB
1205 /* We might have a queued need to update virtual SSA form. As we
1206 delete the update SSA machinery below after doing a regular
1207 incremental SSA update during loop copying make sure we don't
1208 lose that fact.
1209 ??? Needing to update virtual SSA form by renaming is unfortunate
1210 but not all of the vectorizer code inserting new loads / stores
1211 properly assigns virtual operands to those statements. */
1212 update_ssa (TODO_update_ssa_only_virtuals);
1213
e20f6b4b
JJ
1214 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1215 in the exit bb and rename all the uses after the loop. This simplifies
1216 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1217 (but normally loop closed SSA form doesn't require virtual PHIs to be
1218 in the same form). Doing this early simplifies the checking what
1219 uses should be renamed. */
1220 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 1221 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b 1222 {
538dd0b7 1223 gphi *phi = gsi.phi ();
e20f6b4b
JJ
1224 for (gsi = gsi_start_phis (exit_e->dest);
1225 !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 1226 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b
JJ
1227 break;
1228 if (gsi_end_p (gsi))
1229 {
b731b390 1230 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
538dd0b7 1231 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
e20f6b4b
JJ
1232 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1233 imm_use_iterator imm_iter;
1234 gimple stmt;
e20f6b4b
JJ
1235 use_operand_p use_p;
1236
9e227d60 1237 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
e20f6b4b
JJ
1238 gimple_phi_set_result (new_phi, new_vop);
1239 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1240 if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1241 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1242 SET_USE (use_p, new_vop);
1243 }
1244 break;
1245 }
ebfd146a
IR
1246
1247 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1248 Resulting CFG would be:
1249
1250 first_loop:
1251 do {
1252 } while ...
1253
1254 second_loop:
1255 do {
1256 } while ...
1257
1258 orig_exit_bb:
1259 */
b8698a0f 1260
5ce9450f
JJ
1261 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1262 e)))
ebfd146a
IR
1263 {
1264 loop_loc = find_loop_location (loop);
78c60e3d
SS
1265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1266 "tree_duplicate_loop_to_edge_cfg failed.\n");
ebfd146a
IR
1267 return NULL;
1268 }
b8698a0f 1269
684f25f4
AO
1270 if (MAY_HAVE_DEBUG_STMTS)
1271 {
9771b263 1272 gcc_assert (!adjust_vec.exists ());
ff4c81cc 1273 adjust_vec.create (32);
684f25f4
AO
1274 }
1275
ebfd146a
IR
1276 if (e == exit_e)
1277 {
1278 /* NEW_LOOP was placed after LOOP. */
1279 first_loop = loop;
1280 second_loop = new_loop;
1281 }
1282 else
1283 {
1284 /* NEW_LOOP was placed before LOOP. */
1285 first_loop = new_loop;
1286 second_loop = loop;
1287 }
1288
ebfd146a
IR
1289 /* 2. Add the guard code in one of the following ways:
1290
1291 2.a Add the guard that controls whether the first loop is executed.
1292 This occurs when this function is invoked for prologue or epilogue
1293 generation and when the cost model check can be done at compile time.
1294
1295 Resulting CFG would be:
1296
1297 bb_before_first_loop:
1298 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1299 GOTO first-loop
1300
1301 first_loop:
1302 do {
1303 } while ...
1304
1305 bb_before_second_loop:
1306
1307 second_loop:
1308 do {
1309 } while ...
1310
1311 orig_exit_bb:
1312
1313 2.b Add the cost model check that allows the prologue
1314 to iterate for the entire unchanged scalar
1315 iterations of the loop in the event that the cost
1316 model indicates that the scalar loop is more
1317 profitable than the vector one. This occurs when
1318 this function is invoked for prologue generation
1319 and the cost model check needs to be done at run
1320 time.
1321
1322 Resulting CFG after prologue peeling would be:
1323
1324 if (scalar_loop_iterations <= th)
1325 FIRST_NITERS = scalar_loop_iterations
1326
1327 bb_before_first_loop:
1328 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1329 GOTO first-loop
1330
1331 first_loop:
1332 do {
1333 } while ...
1334
1335 bb_before_second_loop:
1336
1337 second_loop:
1338 do {
1339 } while ...
1340
1341 orig_exit_bb:
1342
1343 2.c Add the cost model check that allows the epilogue
1344 to iterate for the entire unchanged scalar
1345 iterations of the loop in the event that the cost
1346 model indicates that the scalar loop is more
1347 profitable than the vector one. This occurs when
1348 this function is invoked for epilogue generation
1349 and the cost model check needs to be done at run
86290011
RG
1350 time. This check is combined with any pre-existing
1351 check in COND_EXPR to avoid versioning.
ebfd146a
IR
1352
1353 Resulting CFG after prologue peeling would be:
1354
1355 bb_before_first_loop:
1356 if ((scalar_loop_iterations <= th)
1357 ||
1358 FIRST_NITERS == 0) GOTO bb_before_second_loop
1359 GOTO first-loop
1360
1361 first_loop:
1362 do {
1363 } while ...
1364
1365 bb_before_second_loop:
1366
1367 second_loop:
1368 do {
1369 } while ...
1370
1371 orig_exit_bb:
1372 */
1373
1374 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
2cfc56b9
RB
1375 /* Loop copying insterted a forwarder block for us here. */
1376 bb_before_second_loop = single_exit (first_loop)->dest;
ebfd146a 1377
e78410bf
JH
1378 probability_of_second_loop = (inverse_probability (first_guard_probability)
1379 + combine_probabilities (second_guard_probability,
1380 first_guard_probability));
1381 /* Theoretically preheader edge of first loop and exit edge should have
1382 same frequencies. Loop exit probablities are however easy to get wrong.
1383 It is safer to copy value from original loop entry. */
1384 bb_before_second_loop->frequency
8ddb5a29
TJ
1385 = combine_probabilities (bb_before_first_loop->frequency,
1386 probability_of_second_loop);
e78410bf
JH
1387 bb_before_second_loop->count
1388 = apply_probability (bb_before_first_loop->count,
1389 probability_of_second_loop);
1390 single_succ_edge (bb_before_second_loop)->count
1391 = bb_before_second_loop->count;
1392
ebfd146a
IR
1393 /* Epilogue peeling. */
1394 if (!update_first_loop_count)
1395 {
95b3eff3
RB
1396 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1397 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1398 unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1399 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1400 limit = limit + 1;
1401 if (check_profitability
1402 && th > limit)
1403 limit = th;
ebfd146a 1404 pre_condition =
95b3eff3
RB
1405 fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1406 build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
86290011
RG
1407 if (cond_expr)
1408 {
1409 pre_condition =
1410 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1411 pre_condition,
1412 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1413 cond_expr));
1414 }
ebfd146a
IR
1415 }
1416
b8698a0f 1417 /* Prologue peeling. */
ebfd146a
IR
1418 else
1419 {
1420 if (check_profitability)
1421 set_prologue_iterations (bb_before_first_loop, first_niters,
e78410bf 1422 loop, th, first_guard_probability);
ebfd146a
IR
1423
1424 pre_condition =
5d2eb24b
IR
1425 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1426 build_int_cst (TREE_TYPE (*first_niters), 0));
ebfd146a
IR
1427 }
1428
1429 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
86290011 1430 cond_expr_stmt_list,
e78410bf
JH
1431 bb_before_second_loop, bb_before_first_loop,
1432 inverse_probability (first_guard_probability));
1433 scale_loop_profile (first_loop, first_guard_probability,
1434 check_profitability && (int)th > bound1 ? th : bound1);
ebfd146a
IR
1435 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1436 first_loop == new_loop,
c334023f 1437 &new_exit_bb);
ebfd146a
IR
1438
1439
1440 /* 3. Add the guard that controls whether the second loop is executed.
1441 Resulting CFG would be:
1442
1443 bb_before_first_loop:
1444 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1445 GOTO first-loop
1446
1447 first_loop:
1448 do {
1449 } while ...
1450
1451 bb_between_loops:
1452 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1453 GOTO bb_before_second_loop
1454
1455 bb_before_second_loop:
1456
1457 second_loop:
1458 do {
1459 } while ...
1460
1461 bb_after_second_loop:
1462
1463 orig_exit_bb:
1464 */
1465
1466 bb_between_loops = new_exit_bb;
1467 bb_after_second_loop = split_edge (single_exit (second_loop));
1468
b8698a0f 1469 pre_condition =
5d2eb24b 1470 fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
86290011 1471 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
e78410bf
JH
1472 bb_after_second_loop, bb_before_first_loop,
1473 inverse_probability (second_guard_probability));
1474 scale_loop_profile (second_loop, probability_of_second_loop, bound2);
ebfd146a
IR
1475 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1476 second_loop == new_loop, &new_exit_bb);
1477
1478 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1479 */
1480 if (update_first_loop_count)
5d2eb24b 1481 slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
ebfd146a 1482
040d39ee
RG
1483 delete_update_ssa ();
1484
684f25f4
AO
1485 adjust_vec_debug_stmts ();
1486
ebfd146a
IR
1487 return new_loop;
1488}
1489
1490/* Function vect_get_loop_location.
1491
1492 Extract the location of the loop in the source code.
1493 If the loop is not well formed for vectorization, an estimated
1494 location is calculated.
1495 Return the loop location if succeed and NULL if not. */
1496
b05e0233 1497source_location
ebfd146a
IR
1498find_loop_location (struct loop *loop)
1499{
1500 gimple stmt = NULL;
1501 basic_block bb;
1502 gimple_stmt_iterator si;
1503
1504 if (!loop)
b05e0233 1505 return UNKNOWN_LOCATION;
ebfd146a
IR
1506
1507 stmt = get_loop_exit_condition (loop);
1508
502498d5
JJ
1509 if (stmt
1510 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
1511 return gimple_location (stmt);
1512
1513 /* If we got here the loop is probably not "well formed",
1514 try to estimate the loop location */
1515
1516 if (!loop->header)
b05e0233 1517 return UNKNOWN_LOCATION;
ebfd146a
IR
1518
1519 bb = loop->header;
1520
1521 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1522 {
1523 stmt = gsi_stmt (si);
502498d5 1524 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
1525 return gimple_location (stmt);
1526 }
1527
b05e0233 1528 return UNKNOWN_LOCATION;
ebfd146a
IR
1529}
1530
1531
ebfd146a
IR
1532/* Function vect_can_advance_ivs_p
1533
b8698a0f
L
1534 In case the number of iterations that LOOP iterates is unknown at compile
1535 time, an epilog loop will be generated, and the loop induction variables
1536 (IVs) will be "advanced" to the value they are supposed to take just before
ebfd146a
IR
1537 the epilog loop. Here we check that the access function of the loop IVs
1538 and the expression that represents the loop bound are simple enough.
1539 These restrictions will be relaxed in the future. */
1540
b8698a0f 1541bool
ebfd146a
IR
1542vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1543{
1544 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1545 basic_block bb = loop->header;
1546 gimple phi;
538dd0b7 1547 gphi_iterator gsi;
ebfd146a
IR
1548
1549 /* Analyze phi functions of the loop header. */
1550
73fbfcad 1551 if (dump_enabled_p ())
e645e942 1552 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
ebfd146a
IR
1553 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1554 {
ebfd146a
IR
1555 tree evolution_part;
1556
538dd0b7 1557 phi = gsi.phi ();
73fbfcad 1558 if (dump_enabled_p ())
ebfd146a 1559 {
78c60e3d
SS
1560 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1561 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 1562 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1563 }
1564
1565 /* Skip virtual phi's. The data dependences that are associated with
1566 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1567
ea057359 1568 if (virtual_operand_p (PHI_RESULT (phi)))
ebfd146a 1569 {
73fbfcad 1570 if (dump_enabled_p ())
78c60e3d 1571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1572 "virtual phi. skip.\n");
ebfd146a
IR
1573 continue;
1574 }
1575
1576 /* Skip reduction phis. */
1577
1578 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1579 {
73fbfcad 1580 if (dump_enabled_p ())
78c60e3d 1581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1582 "reduc phi. skip.\n");
ebfd146a
IR
1583 continue;
1584 }
1585
1586 /* Analyze the evolution function. */
1587
afb119be
RB
1588 evolution_part
1589 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
ebfd146a
IR
1590 if (evolution_part == NULL_TREE)
1591 {
73fbfcad 1592 if (dump_enabled_p ())
afb119be 1593 dump_printf (MSG_MISSED_OPTIMIZATION,
e645e942 1594 "No access function or evolution.\n");
ebfd146a
IR
1595 return false;
1596 }
b8698a0f
L
1597
1598 /* FORNOW: We do not transform initial conditions of IVs
ebfd146a
IR
1599 which evolution functions are a polynomial of degree >= 2. */
1600
1601 if (tree_is_chrec (evolution_part))
b8698a0f 1602 return false;
ebfd146a
IR
1603 }
1604
1605 return true;
1606}
1607
1608
1609/* Function vect_update_ivs_after_vectorizer.
1610
1611 "Advance" the induction variables of LOOP to the value they should take
1612 after the execution of LOOP. This is currently necessary because the
1613 vectorizer does not handle induction variables that are used after the
1614 loop. Such a situation occurs when the last iterations of LOOP are
1615 peeled, because:
1616 1. We introduced new uses after LOOP for IVs that were not originally used
1617 after LOOP: the IVs of LOOP are now used by an epilog loop.
1618 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1619 times, whereas the loop IVs should be bumped N times.
1620
1621 Input:
1622 - LOOP - a loop that is going to be vectorized. The last few iterations
1623 of LOOP were peeled.
1624 - NITERS - the number of iterations that LOOP executes (before it is
1625 vectorized). i.e, the number of times the ivs should be bumped.
1626 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1627 coming out from LOOP on which there are uses of the LOOP ivs
1628 (this is the path from LOOP->exit to epilog_loop->preheader).
1629
1630 The new definitions of the ivs are placed in LOOP->exit.
1631 The phi args associated with the edge UPDATE_E in the bb
1632 UPDATE_E->dest are updated accordingly.
1633
1634 Assumption 1: Like the rest of the vectorizer, this function assumes
1635 a single loop exit that has a single predecessor.
1636
1637 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1638 organized in the same order.
1639
1640 Assumption 3: The access function of the ivs is simple enough (see
1641 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1642
1643 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
b8698a0f 1644 coming out of LOOP on which the ivs of LOOP are used (this is the path
ebfd146a
IR
1645 that leads to the epilog loop; other paths skip the epilog loop). This
1646 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1647 needs to have its phis updated.
1648 */
1649
1650static void
b8698a0f 1651vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
ebfd146a
IR
1652 edge update_e)
1653{
1654 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1655 basic_block exit_bb = single_exit (loop)->dest;
538dd0b7
DM
1656 gphi *phi, *phi1;
1657 gphi_iterator gsi, gsi1;
ebfd146a
IR
1658 basic_block update_bb = update_e->dest;
1659
47c32082 1660 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
ebfd146a
IR
1661
1662 /* Make sure there exists a single-predecessor exit bb: */
1663 gcc_assert (single_pred_p (exit_bb));
1664
1665 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1666 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1667 gsi_next (&gsi), gsi_next (&gsi1))
1668 {
ebfd146a 1669 tree init_expr;
550918ca
RG
1670 tree step_expr, off;
1671 tree type;
ebfd146a
IR
1672 tree var, ni, ni_name;
1673 gimple_stmt_iterator last_gsi;
0ac168a1 1674 stmt_vec_info stmt_info;
ebfd146a 1675
538dd0b7
DM
1676 phi = gsi.phi ();
1677 phi1 = gsi1.phi ();
73fbfcad 1678 if (dump_enabled_p ())
ebfd146a 1679 {
78c60e3d
SS
1680 dump_printf_loc (MSG_NOTE, vect_location,
1681 "vect_update_ivs_after_vectorizer: phi: ");
1682 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 1683 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1684 }
1685
1686 /* Skip virtual phi's. */
ea057359 1687 if (virtual_operand_p (PHI_RESULT (phi)))
ebfd146a 1688 {
73fbfcad 1689 if (dump_enabled_p ())
78c60e3d 1690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1691 "virtual phi. skip.\n");
ebfd146a
IR
1692 continue;
1693 }
1694
1695 /* Skip reduction phis. */
0ac168a1
RG
1696 stmt_info = vinfo_for_stmt (phi);
1697 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
b8698a0f 1698 {
73fbfcad 1699 if (dump_enabled_p ())
78c60e3d 1700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1701 "reduc phi. skip.\n");
ebfd146a 1702 continue;
b8698a0f 1703 }
ebfd146a 1704
0ac168a1
RG
1705 type = TREE_TYPE (gimple_phi_result (phi));
1706 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1707 step_expr = unshare_expr (step_expr);
b8698a0f 1708
ebfd146a
IR
1709 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1710 of degree >= 2 or exponential. */
0ac168a1 1711 gcc_assert (!tree_is_chrec (step_expr));
ebfd146a 1712
0ac168a1 1713 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
ebfd146a 1714
550918ca
RG
1715 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1716 fold_convert (TREE_TYPE (step_expr), niters),
1717 step_expr);
0ac168a1 1718 if (POINTER_TYPE_P (type))
5d49b6a7 1719 ni = fold_build_pointer_plus (init_expr, off);
ebfd146a 1720 else
0ac168a1
RG
1721 ni = fold_build2 (PLUS_EXPR, type,
1722 init_expr, fold_convert (type, off));
ebfd146a 1723
0ac168a1 1724 var = create_tmp_var (type, "tmp");
ebfd146a
IR
1725
1726 last_gsi = gsi_last_bb (exit_bb);
1727 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1728 true, GSI_SAME_STMT);
b8698a0f 1729
ebfd146a 1730 /* Fix phi expressions in the successor bb. */
684f25f4 1731 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
ebfd146a
IR
1732 }
1733}
1734
ebfd146a
IR
1735/* Function vect_do_peeling_for_loop_bound
1736
1737 Peel the last iterations of the loop represented by LOOP_VINFO.
b8698a0f 1738 The peeled iterations form a new epilog loop. Given that the loop now
ebfd146a
IR
1739 iterates NITERS times, the new epilog loop iterates
1740 NITERS % VECTORIZATION_FACTOR times.
b8698a0f
L
1741
1742 The original loop will later be made to iterate
86290011
RG
1743 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1744
1745 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1746 test. */
ebfd146a 1747
b8698a0f 1748void
f3c92486
RB
1749vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1750 tree ni_name, tree ratio_mult_vf_name,
368117e8 1751 unsigned int th, bool check_profitability)
ebfd146a 1752{
ebfd146a 1753 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5ce9450f 1754 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
ebfd146a
IR
1755 struct loop *new_loop;
1756 edge update_e;
1757 basic_block preheader;
1758 int loop_num;
d68d56b5 1759 int max_iter;
368117e8
RG
1760 tree cond_expr = NULL_TREE;
1761 gimple_seq cond_expr_stmt_list = NULL;
ebfd146a 1762
73fbfcad 1763 if (dump_enabled_p ())
ccb3ad87 1764 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1765 "=== vect_do_peeling_for_loop_bound ===\n");
ebfd146a
IR
1766
1767 initialize_original_copy_tables ();
1768
b8698a0f 1769 loop_num = loop->num;
ebfd146a 1770
5ce9450f
JJ
1771 new_loop
1772 = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1773 &ratio_mult_vf_name, ni_name, false,
1774 th, check_profitability,
1775 cond_expr, cond_expr_stmt_list,
1776 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
ebfd146a
IR
1777 gcc_assert (new_loop);
1778 gcc_assert (loop_num == loop->num);
1779#ifdef ENABLE_CHECKING
1780 slpeel_verify_cfg_after_peeling (loop, new_loop);
1781#endif
1782
1783 /* A guard that controls whether the new_loop is to be executed or skipped
1784 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1785 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1786 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1787 is on the path where the LOOP IVs are used and need to be updated. */
1788
1789 preheader = loop_preheader_edge (new_loop)->src;
1790 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1791 update_e = EDGE_PRED (preheader, 0);
1792 else
1793 update_e = EDGE_PRED (preheader, 1);
1794
b8698a0f 1795 /* Update IVs of original loop as if they were advanced
ebfd146a 1796 by ratio_mult_vf_name steps. */
b8698a0f 1797 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
ebfd146a 1798
22458c5a
JH
1799 /* For vectorization factor N, we need to copy last N-1 values in epilogue
1800 and this means N-2 loopback edge executions.
1801
1802 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1803 will execute at least LOOP_VINFO_VECT_FACTOR times. */
1804 max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1805 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1806 : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
368117e8 1807 if (check_profitability)
22458c5a 1808 max_iter = MAX (max_iter, (int) th - 1);
807e902e 1809 record_niter_bound (new_loop, max_iter, false, true);
ccb3ad87 1810 dump_printf (MSG_NOTE,
78c60e3d
SS
1811 "Setting upper bound of nb iterations for epilogue "
1812 "loop to %d\n", max_iter);
7d5a99f4 1813
ebfd146a
IR
1814 /* After peeling we have to reset scalar evolution analyzer. */
1815 scev_reset ();
1816
1817 free_original_copy_tables ();
1818}
1819
1820
1821/* Function vect_gen_niters_for_prolog_loop
1822
1823 Set the number of iterations for the loop represented by LOOP_VINFO
1824 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1825 and the misalignment of DR - the data reference recorded in
b8698a0f 1826 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
ebfd146a
IR
1827 this loop, the data reference DR will refer to an aligned location.
1828
1829 The following computation is generated:
1830
1831 If the misalignment of DR is known at compile time:
1832 addr_mis = int mis = DR_MISALIGNMENT (dr);
1833 Else, compute address misalignment in bytes:
5aea1e76 1834 addr_mis = addr & (vectype_align - 1)
ebfd146a
IR
1835
1836 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1837
1838 (elem_size = element type size; an element is the scalar element whose type
1839 is the inner type of the vectype)
1840
1841 When the step of the data-ref in the loop is not 1 (as in interleaved data
1842 and SLP), the number of iterations of the prolog must be divided by the step
1843 (which is equal to the size of interleaved group).
1844
1845 The above formulas assume that VF == number of elements in the vector. This
1846 may not hold when there are multiple-types in the loop.
1847 In this case, for some data-references in the loop the VF does not represent
1848 the number of elements that fit in the vector. Therefore, instead of VF we
1849 use TYPE_VECTOR_SUBPARTS. */
1850
b8698a0f 1851static tree
e78410bf 1852vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
ebfd146a
IR
1853{
1854 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1855 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1856 tree var;
1857 gimple_seq stmts;
1858 tree iters, iters_name;
1859 edge pe;
1860 basic_block new_bb;
1861 gimple dr_stmt = DR_STMT (dr);
1862 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1863 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1864 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1865 tree niters_type = TREE_TYPE (loop_niters);
ebfd146a
IR
1866 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1867
b8698a0f 1868 pe = loop_preheader_edge (loop);
ebfd146a 1869
15e693cc 1870 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
ebfd146a 1871 {
15e693cc 1872 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a 1873
73fbfcad 1874 if (dump_enabled_p ())
ccb3ad87 1875 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1876 "known peeling = %d.\n", npeel);
ebfd146a 1877
720f5239 1878 iters = build_int_cst (niters_type, npeel);
15e693cc 1879 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a
IR
1880 }
1881 else
1882 {
1883 gimple_seq new_stmts = NULL;
d8ba5b19
RG
1884 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1885 tree offset = negative
1886 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
b8698a0f 1887 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
d8ba5b19 1888 &new_stmts, offset, loop);
96f9265a 1889 tree type = unsigned_type_for (TREE_TYPE (start_addr));
5aea1e76
UW
1890 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1891 HOST_WIDE_INT elem_size =
1892 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1893 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
ebfd146a
IR
1894 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1895 tree nelements_tree = build_int_cst (type, nelements);
1896 tree byte_misalign;
1897 tree elem_misalign;
1898
1899 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1900 gcc_assert (!new_bb);
b8698a0f 1901
5aea1e76 1902 /* Create: byte_misalign = addr & (vectype_align - 1) */
b8698a0f 1903 byte_misalign =
720f5239 1904 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
5aea1e76 1905 vectype_align_minus_1);
b8698a0f 1906
ebfd146a
IR
1907 /* Create: elem_misalign = byte_misalign / element_size */
1908 elem_misalign =
1909 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1910
1911 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
d8ba5b19
RG
1912 if (negative)
1913 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1914 else
1915 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
ebfd146a
IR
1916 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1917 iters = fold_convert (niters_type, iters);
e78410bf 1918 *bound = nelements;
ebfd146a
IR
1919 }
1920
1921 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1922 /* If the loop bound is known at compile time we already verified that it is
1923 greater than vf; since the misalignment ('iters') is at most vf, there's
1924 no need to generate the MIN_EXPR in this case. */
1925 if (TREE_CODE (loop_niters) != INTEGER_CST)
1926 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1927
73fbfcad 1928 if (dump_enabled_p ())
ebfd146a 1929 {
ccb3ad87 1930 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1931 "niters for prolog loop: ");
ccb3ad87 1932 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
e645e942 1933 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1934 }
1935
1936 var = create_tmp_var (niters_type, "prolog_loop_niters");
ebfd146a
IR
1937 stmts = NULL;
1938 iters_name = force_gimple_operand (iters, &stmts, false, var);
1939
1940 /* Insert stmt on loop preheader edge. */
1941 if (stmts)
1942 {
1943 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1944 gcc_assert (!new_bb);
1945 }
1946
b8698a0f 1947 return iters_name;
ebfd146a
IR
1948}
1949
1950
1951/* Function vect_update_init_of_dr
1952
1953 NITERS iterations were peeled from LOOP. DR represents a data reference
1954 in LOOP. This function updates the information recorded in DR to
b8698a0f 1955 account for the fact that the first NITERS iterations had already been
ebfd146a
IR
1956 executed. Specifically, it updates the OFFSET field of DR. */
1957
1958static void
1959vect_update_init_of_dr (struct data_reference *dr, tree niters)
1960{
1961 tree offset = DR_OFFSET (dr);
b8698a0f 1962
ebfd146a
IR
1963 niters = fold_build2 (MULT_EXPR, sizetype,
1964 fold_convert (sizetype, niters),
1965 fold_convert (sizetype, DR_STEP (dr)));
587aa063
RG
1966 offset = fold_build2 (PLUS_EXPR, sizetype,
1967 fold_convert (sizetype, offset), niters);
ebfd146a
IR
1968 DR_OFFSET (dr) = offset;
1969}
1970
1971
1972/* Function vect_update_inits_of_drs
1973
b8698a0f
L
1974 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1975 This function updates the information recorded for the data references in
1976 the loop to account for the fact that the first NITERS iterations had
ebfd146a
IR
1977 already been executed. Specifically, it updates the initial_condition of
1978 the access_function of all the data_references in the loop. */
1979
1980static void
1981vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1982{
1983 unsigned int i;
9771b263 1984 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
ebfd146a 1985 struct data_reference *dr;
78c60e3d 1986
73fbfcad 1987 if (dump_enabled_p ())
ccb3ad87 1988 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1989 "=== vect_update_inits_of_dr ===\n");
ebfd146a 1990
9771b263 1991 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1992 vect_update_init_of_dr (dr, niters);
1993}
1994
1995
1996/* Function vect_do_peeling_for_alignment
1997
1998 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1999 'niters' is set to the misalignment of one of the data references in the
2000 loop, thereby forcing it to refer to an aligned location at the beginning
2001 of the execution of this loop. The data reference for which we are
2002 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2003
2004void
f3c92486 2005vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
368117e8 2006 unsigned int th, bool check_profitability)
ebfd146a
IR
2007{
2008 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5ce9450f 2009 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
f3c92486 2010 tree niters_of_prolog_loop;
b61b1f17 2011 tree wide_prolog_niters;
ebfd146a 2012 struct loop *new_loop;
03fd03d5 2013 int max_iter;
e78410bf 2014 int bound = 0;
ebfd146a 2015
73fbfcad 2016 if (dump_enabled_p ())
9cc1fb4b
XDL
2017 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2018 "loop peeled for vectorization to enhance"
2019 " alignment\n");
ebfd146a
IR
2020
2021 initialize_original_copy_tables ();
2022
f3c92486
RB
2023 gimple_seq stmts = NULL;
2024 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5d2eb24b 2025 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
e78410bf
JH
2026 ni_name,
2027 &bound);
ebfd146a 2028
ebfd146a
IR
2029 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2030 new_loop =
5ce9450f
JJ
2031 slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2032 loop_preheader_edge (loop),
5d2eb24b 2033 &niters_of_prolog_loop, ni_name, true,
e78410bf 2034 th, check_profitability, NULL_TREE, NULL,
5ce9450f 2035 bound, 0);
ebfd146a
IR
2036
2037 gcc_assert (new_loop);
2038#ifdef ENABLE_CHECKING
2039 slpeel_verify_cfg_after_peeling (new_loop, loop);
2040#endif
22458c5a
JH
2041 /* For vectorization factor N, we need to copy at most N-1 values
2042 for alignment and this means N-2 loopback edge executions. */
2043 max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
368117e8 2044 if (check_profitability)
22458c5a 2045 max_iter = MAX (max_iter, (int) th - 1);
807e902e 2046 record_niter_bound (new_loop, max_iter, false, true);
ccb3ad87 2047 dump_printf (MSG_NOTE,
78c60e3d
SS
2048 "Setting upper bound of nb iterations for prologue "
2049 "loop to %d\n", max_iter);
ebfd146a
IR
2050
2051 /* Update number of times loop executes. */
ebfd146a 2052 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
15e693cc 2053 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
95b3eff3
RB
2054 LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2055 TREE_TYPE (ni_name),
2056 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
ebfd146a 2057
5d2eb24b
IR
2058 if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2059 wide_prolog_niters = niters_of_prolog_loop;
2060 else
2061 {
2062 gimple_seq seq = NULL;
2063 edge pe = loop_preheader_edge (loop);
2064 tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2065 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
5d2eb24b
IR
2066 wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2067 var);
2068 if (seq)
2069 {
2070 /* Insert stmt on loop preheader edge. */
2071 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2072 gcc_assert (!new_bb);
2073 }
2074 }
2075
ebfd146a 2076 /* Update the init conditions of the access functions of all data refs. */
b61b1f17 2077 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
ebfd146a
IR
2078
2079 /* After peeling we have to reset scalar evolution analyzer. */
2080 scev_reset ();
2081
2082 free_original_copy_tables ();
2083}
2084
2085
2086/* Function vect_create_cond_for_align_checks.
2087
2088 Create a conditional expression that represents the alignment checks for
2089 all of data references (array element references) whose alignment must be
2090 checked at runtime.
2091
2092 Input:
2093 COND_EXPR - input conditional expression. New conditions will be chained
2094 with logical AND operation.
2095 LOOP_VINFO - two fields of the loop information are used.
2096 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2097 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2098
2099 Output:
2100 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2101 expression.
2102 The returned value is the conditional expression to be used in the if
2103 statement that controls which version of the loop gets executed at runtime.
2104
2105 The algorithm makes two assumptions:
2106 1) The number of bytes "n" in a vector is a power of 2.
2107 2) An address "a" is aligned if a%n is zero and that this
2108 test can be done as a&(n-1) == 0. For example, for 16
2109 byte vectors the test is a&0xf == 0. */
2110
2111static void
2112vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2113 tree *cond_expr,
2114 gimple_seq *cond_expr_stmt_list)
2115{
2116 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
9771b263 2117 vec<gimple> may_misalign_stmts
ebfd146a
IR
2118 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2119 gimple ref_stmt;
2120 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2121 tree mask_cst;
2122 unsigned int i;
ebfd146a
IR
2123 tree int_ptrsize_type;
2124 char tmp_name[20];
2125 tree or_tmp_name = NULL_TREE;
83d5977e 2126 tree and_tmp_name;
ebfd146a
IR
2127 gimple and_stmt;
2128 tree ptrsize_zero;
2129 tree part_cond_expr;
2130
2131 /* Check that mask is one less than a power of 2, i.e., mask is
2132 all zeros followed by all ones. */
2133 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2134
96f9265a 2135 int_ptrsize_type = signed_type_for (ptr_type_node);
ebfd146a
IR
2136
2137 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2138 of the first vector of the i'th data reference. */
2139
9771b263 2140 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
ebfd146a
IR
2141 {
2142 gimple_seq new_stmt_list = NULL;
2143 tree addr_base;
83d5977e
RG
2144 tree addr_tmp_name;
2145 tree new_or_tmp_name;
ebfd146a 2146 gimple addr_stmt, or_stmt;
d8ba5b19
RG
2147 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2148 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2149 bool negative = tree_int_cst_compare
2150 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2151 tree offset = negative
2152 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
ebfd146a
IR
2153
2154 /* create: addr_tmp = (int)(address_of_first_vector) */
2155 addr_base =
2156 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
d8ba5b19 2157 offset, loop);
ebfd146a
IR
2158 if (new_stmt_list != NULL)
2159 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2160
83d5977e
RG
2161 sprintf (tmp_name, "addr2int%d", i);
2162 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03 2163 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
ebfd146a
IR
2164 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2165
2166 /* The addresses are OR together. */
2167
2168 if (or_tmp_name != NULL_TREE)
2169 {
2170 /* create: or_tmp = or_tmp | addr_tmp */
83d5977e
RG
2171 sprintf (tmp_name, "orptrs%d", i);
2172 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03
JJ
2173 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2174 or_tmp_name, addr_tmp_name);
ebfd146a
IR
2175 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2176 or_tmp_name = new_or_tmp_name;
2177 }
2178 else
2179 or_tmp_name = addr_tmp_name;
2180
2181 } /* end for i */
2182
2183 mask_cst = build_int_cst (int_ptrsize_type, mask);
2184
2185 /* create: and_tmp = or_tmp & mask */
83d5977e 2186 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
ebfd146a 2187
0d0e4a03
JJ
2188 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2189 or_tmp_name, mask_cst);
ebfd146a
IR
2190 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2191
2192 /* Make and_tmp the left operand of the conditional test against zero.
2193 if and_tmp has a nonzero bit then some address is unaligned. */
2194 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2195 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2196 and_tmp_name, ptrsize_zero);
2197 if (*cond_expr)
2198 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2199 *cond_expr, part_cond_expr);
2200 else
2201 *cond_expr = part_cond_expr;
2202}
2203
ebfd146a
IR
2204/* Function vect_create_cond_for_alias_checks.
2205
2206 Create a conditional expression that represents the run-time checks for
2207 overlapping of address ranges represented by a list of data references
2208 relations passed as input.
2209
2210 Input:
2211 COND_EXPR - input conditional expression. New conditions will be chained
a05a89fa
CH
2212 with logical AND operation. If it is NULL, then the function
2213 is used to return the number of alias checks.
ebfd146a
IR
2214 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2215 to be checked.
2216
2217 Output:
2218 COND_EXPR - conditional expression.
ebfd146a 2219
a05a89fa 2220 The returned COND_EXPR is the conditional expression to be used in the if
ebfd146a
IR
2221 statement that controls which version of the loop gets executed at runtime.
2222*/
2223
a05a89fa 2224void
4bdd44c4 2225vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
ebfd146a 2226{
93bdc3ed 2227 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
a05a89fa
CH
2228 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2229 tree part_cond_expr;
ebfd146a
IR
2230
2231 /* Create expression
36fc3799
RS
2232 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2233 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
b8698a0f 2234 &&
ebfd146a
IR
2235 ...
2236 &&
36fc3799
RS
2237 ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2238 || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
ebfd146a 2239
a05a89fa 2240 if (comp_alias_ddrs.is_empty ())
ebfd146a
IR
2241 return;
2242
a05a89fa 2243 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
ebfd146a 2244 {
93bdc3ed
CH
2245 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2246 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
a05a89fa
CH
2247 tree segment_length_a = dr_a.seg_len;
2248 tree segment_length_b = dr_b.seg_len;
ebfd146a 2249
a05a89fa 2250 tree addr_base_a
93bdc3ed 2251 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
a05a89fa 2252 tree addr_base_b
93bdc3ed 2253 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
ebfd146a 2254
73fbfcad 2255 if (dump_enabled_p ())
ebfd146a 2256 {
ccb3ad87 2257 dump_printf_loc (MSG_NOTE, vect_location,
a05a89fa
CH
2258 "create runtime check for data references ");
2259 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
ccb3ad87 2260 dump_printf (MSG_NOTE, " and ");
a05a89fa
CH
2261 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2262 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
2263 }
2264
a05a89fa
CH
2265 tree seg_a_min = addr_base_a;
2266 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
82d89471
BM
2267 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2268 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2269 [a, a+12) */
a05a89fa 2270 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
82d89471
BM
2271 {
2272 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2273 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2274 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2275 }
d8ba5b19 2276
a05a89fa
CH
2277 tree seg_b_min = addr_base_b;
2278 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2279 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
82d89471
BM
2280 {
2281 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2282 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2283 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2284 }
ebfd146a 2285
b8698a0f 2286 part_cond_expr =
ebfd146a 2287 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
36fc3799
RS
2288 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2289 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
b8698a0f 2290
ebfd146a
IR
2291 if (*cond_expr)
2292 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2293 *cond_expr, part_cond_expr);
2294 else
2295 *cond_expr = part_cond_expr;
2296 }
ebfd146a 2297
73fbfcad 2298 if (dump_enabled_p ())
ccb3ad87 2299 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2300 "created %u versioning for alias checks.\n",
a05a89fa
CH
2301 comp_alias_ddrs.length ());
2302
2303 comp_alias_ddrs.release ();
ebfd146a
IR
2304}
2305
2306
2307/* Function vect_loop_versioning.
b8698a0f 2308
ebfd146a
IR
2309 If the loop has data references that may or may not be aligned or/and
2310 has data reference relations whose independence was not proven then
2311 two versions of the loop need to be generated, one which is vectorized
2312 and one which isn't. A test is then generated to control which of the
2313 loops is executed. The test checks for the alignment of all of the
2314 data references that may or may not be aligned. An additional
2315 sequence of runtime tests is generated for each pairs of DDRs whose
b8698a0f
L
2316 independence was not proven. The vectorized version of loop is
2317 executed only if both alias and alignment tests are passed.
2318
ebfd146a 2319 The test generated to check which version of loop is executed
b8698a0f 2320 is modified to also check for profitability as indicated by the
86290011
RG
2321 cost model initially.
2322
2323 The versioning precondition(s) are placed in *COND_EXPR and
d68d56b5 2324 *COND_EXPR_STMT_LIST. */
ebfd146a
IR
2325
2326void
368117e8
RG
2327vect_loop_versioning (loop_vec_info loop_vinfo,
2328 unsigned int th, bool check_profitability)
ebfd146a
IR
2329{
2330 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5ce9450f 2331 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
ebfd146a 2332 basic_block condition_bb;
538dd0b7
DM
2333 gphi_iterator gsi;
2334 gimple_stmt_iterator cond_exp_gsi;
ebfd146a
IR
2335 basic_block merge_bb;
2336 basic_block new_exit_bb;
2337 edge new_exit_e, e;
538dd0b7 2338 gphi *orig_phi, *new_phi;
368117e8 2339 tree cond_expr = NULL_TREE;
d68d56b5 2340 gimple_seq cond_expr_stmt_list = NULL;
ebfd146a
IR
2341 tree arg;
2342 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2343 gimple_seq gimplify_stmt_list = NULL;
2344 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
9cc1fb4b
XDL
2345 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2346 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
ebfd146a 2347
368117e8
RG
2348 if (check_profitability)
2349 {
2350 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2351 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2352 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2353 is_gimple_condexpr, NULL_TREE);
2354 }
ebfd146a 2355
9cc1fb4b 2356 if (version_align)
d68d56b5
RG
2357 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2358 &cond_expr_stmt_list);
ebfd146a 2359
9cc1fb4b 2360 if (version_alias)
4bdd44c4 2361 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
86290011 2362
d68d56b5
RG
2363 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2364 is_gimple_condexpr, NULL_TREE);
2365 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
ebfd146a
IR
2366
2367 initialize_original_copy_tables ();
5ce9450f
JJ
2368 if (scalar_loop)
2369 {
2370 edge scalar_e;
2371 basic_block preheader, scalar_preheader;
2372
2373 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2374 scale LOOP's frequencies instead. */
2375 loop_version (scalar_loop, cond_expr, &condition_bb,
2376 prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2377 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2378 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2379 while we need to move it above LOOP's preheader. */
2380 e = loop_preheader_edge (loop);
2381 scalar_e = loop_preheader_edge (scalar_loop);
2382 gcc_assert (empty_block_p (e->src)
2383 && single_pred_p (e->src));
2384 gcc_assert (empty_block_p (scalar_e->src)
2385 && single_pred_p (scalar_e->src));
2386 gcc_assert (single_pred_p (condition_bb));
2387 preheader = e->src;
2388 scalar_preheader = scalar_e->src;
2389 scalar_e = find_edge (condition_bb, scalar_preheader);
2390 e = single_pred_edge (preheader);
2391 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2392 scalar_preheader);
2393 redirect_edge_and_branch_force (scalar_e, preheader);
2394 redirect_edge_and_branch_force (e, condition_bb);
2395 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2396 single_pred (condition_bb));
2397 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2398 single_pred (scalar_preheader));
2399 set_immediate_dominator (CDI_DOMINATORS, preheader,
2400 condition_bb);
2401 }
2402 else
2403 loop_version (loop, cond_expr, &condition_bb,
2404 prob, prob, REG_BR_PROB_BASE - prob, true);
9cc1fb4b 2405
b05e0233 2406 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
9cc1fb4b
XDL
2407 && dump_enabled_p ())
2408 {
2409 if (version_alias)
2410 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2411 "loop versioned for vectorization because of "
2412 "possible aliasing\n");
2413 if (version_align)
2414 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2415 "loop versioned for vectorization to enhance "
2416 "alignment\n");
2417
2418 }
c3284718 2419 free_original_copy_tables ();
ebfd146a 2420
b8698a0f 2421 /* Loop versioning violates an assumption we try to maintain during
ebfd146a
IR
2422 vectorization - that the loop exit block has a single predecessor.
2423 After versioning, the exit block of both loop versions is the same
2424 basic block (i.e. it has two predecessors). Just in order to simplify
2425 following transformations in the vectorizer, we fix this situation
2426 here by adding a new (empty) block on the exit-edge of the loop,
5ce9450f
JJ
2427 with the proper loop-exit phis to maintain loop-closed-form.
2428 If loop versioning wasn't done from loop, but scalar_loop instead,
2429 merge_bb will have already just a single successor. */
b8698a0f 2430
ebfd146a 2431 merge_bb = single_exit (loop)->dest;
5ce9450f 2432 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
ebfd146a 2433 {
5ce9450f
JJ
2434 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2435 new_exit_bb = split_edge (single_exit (loop));
2436 new_exit_e = single_exit (loop);
2437 e = EDGE_SUCC (new_exit_bb, 0);
2438
2439 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2440 {
2441 tree new_res;
538dd0b7 2442 orig_phi = gsi.phi ();
b731b390 2443 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
5ce9450f
JJ
2444 new_phi = create_phi_node (new_res, new_exit_bb);
2445 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2446 add_phi_arg (new_phi, arg, new_exit_e,
2447 gimple_phi_arg_location_from_edge (orig_phi, e));
2448 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2449 }
b8698a0f 2450 }
ebfd146a
IR
2451
2452 /* End loop-exit-fixes after versioning. */
2453
d68d56b5 2454 if (cond_expr_stmt_list)
ebfd146a
IR
2455 {
2456 cond_exp_gsi = gsi_last_bb (condition_bb);
d68d56b5 2457 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
86290011 2458 GSI_SAME_STMT);
ebfd146a 2459 }
90eb75f2 2460 update_ssa (TODO_update_ssa);
ebfd146a 2461}
This page took 4.127539 seconds and 5 git commands to generate.