]> gcc.gnu.org Git - gcc.git/blame - gcc/tree-vectorizer.c
re PR tree-optimization/18179 (vectorizer: wrong alignment/step/initial-address compu...
[gcc.git] / gcc / tree-vectorizer.c
CommitLineData
79fe1b3b
DN
1/* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to the Free
19Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2002111-1307, USA. */
21
22/* Loop Vectorization Pass.
23
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
28
29 For example, the vectorizer transforms the following simple loop:
30
31 short a[N]; short b[N]; short c[N]; int i;
32
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
35 }
36
37 as if it was manually vectorized by rewriting the source code into:
38
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
43
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
49 }
50
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
55
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
6775f1f3
IR
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
79fe1b3b
DN
63
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
69
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
74
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
85
86 For example, say stmt S1 was vectorized into stmt VS1:
87
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
91
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
96
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
101
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
104
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
111
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
118
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
121*/
122
123#include "config.h"
124#include "system.h"
125#include "coretypes.h"
126#include "tm.h"
127#include "errors.h"
128#include "ggc.h"
129#include "tree.h"
130#include "target.h"
131
132#include "rtl.h"
133#include "basic-block.h"
134#include "diagnostic.h"
135#include "tree-flow.h"
136#include "tree-dump.h"
137#include "timevar.h"
138#include "cfgloop.h"
139#include "cfglayout.h"
140#include "expr.h"
141#include "optabs.h"
a023975e 142#include "toplev.h"
79fe1b3b
DN
143#include "tree-chrec.h"
144#include "tree-data-ref.h"
145#include "tree-scalar-evolution.h"
146#include "tree-vectorizer.h"
147#include "tree-pass.h"
618bb89c 148#include "langhooks.h"
79fe1b3b 149
f88a8cfa
DN
150
151/*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
154
155/* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
63dfe6ff 168static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
335d3d54 169static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
63dfe6ff 170static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
d6901754 171static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
f88a8cfa
DN
172static void allocate_new_names (bitmap);
173static void rename_use_op (use_operand_p);
174static void rename_def_op (def_operand_p, tree);
175static void rename_variables_in_bb (basic_block);
176static void free_new_names (bitmap);
177static void rename_variables_in_loop (struct loop *);
1d8a9009
AT
178#ifdef ENABLE_CHECKING
179static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
180#endif
f88a8cfa
DN
181
182
183/*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
186
79fe1b3b
DN
187/* Main analysis functions. */
188static loop_vec_info vect_analyze_loop (struct loop *);
189static loop_vec_info vect_analyze_loop_form (struct loop *);
190static bool vect_analyze_data_refs (loop_vec_info);
191static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192static bool vect_analyze_scalar_cycles (loop_vec_info);
193static bool vect_analyze_data_ref_accesses (loop_vec_info);
194static bool vect_analyze_data_refs_alignment (loop_vec_info);
0dc0a70b 195static bool vect_compute_data_refs_alignment (loop_vec_info);
79fe1b3b
DN
196static bool vect_analyze_operations (loop_vec_info);
197
198/* Main code transformation functions. */
199static void vect_transform_loop (loop_vec_info, struct loops *);
79fe1b3b
DN
200static bool vect_transform_stmt (tree, block_stmt_iterator *);
201static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
0dc0a70b
DN
205static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
79fe1b3b
DN
207static void vect_align_data_ref (tree);
208static void vect_enhance_data_refs_alignment (loop_vec_info);
209
210/* Utility functions for the analyses. */
211static bool vect_is_simple_use (tree , struct loop *, tree *);
212static bool exist_non_indexing_operands_for_use_p (tree, tree);
213static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
2b0729ba 214static void vect_mark_relevant (varray_type *, tree);
79fe1b3b 215static bool vect_stmt_relevant_p (tree, loop_vec_info);
a023975e 216static tree vect_get_loop_niters (struct loop *, tree *);
6775f1f3 217static bool vect_compute_data_ref_alignment
79fe1b3b
DN
218 (struct data_reference *, loop_vec_info);
219static bool vect_analyze_data_ref_access (struct data_reference *);
220static bool vect_get_first_index (tree, tree *);
221static bool vect_can_force_dr_alignment_p (tree, unsigned int);
7ccf35ed
DN
222static struct data_reference * vect_analyze_pointer_ref_access
223 (tree, tree, bool);
d6901754 224static bool vect_can_advance_ivs_p (struct loop *);
6775f1f3
IR
225static tree vect_get_base_and_bit_offset
226 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
227static struct data_reference * vect_analyze_pointer_ref_access
228 (tree, tree, bool);
229static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
230static tree vect_compute_array_ref_alignment
231 (struct data_reference *, loop_vec_info, tree, tree *);
232static tree vect_get_ptr_offset (tree, tree, tree *);
233static tree vect_get_symbl_and_dr
234 (tree, tree, bool, loop_vec_info, struct data_reference **);
1de6a873
IR
235static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
236 tree *, tree *);
79fe1b3b
DN
237
238/* Utility functions for the code transformation. */
239static tree vect_create_destination_var (tree, tree);
7ccf35ed
DN
240static tree vect_create_data_ref_ptr
241 (tree, block_stmt_iterator *, tree, tree *, bool);
242static tree vect_create_index_for_vector_ref
243 (struct loop *, block_stmt_iterator *);
244static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
79fe1b3b
DN
245static tree get_vectype_for_scalar_type (tree);
246static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
247static tree vect_get_vec_def_for_operand (tree, tree);
248static tree vect_init_vector (tree, tree);
249static void vect_finish_stmt_generation
250 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
251
f88a8cfa
DN
252/* Utility function dealing with loop peeling (not peeling itself). */
253static void vect_generate_tmps_on_preheader
254 (loop_vec_info, tree *, tree *, tree *);
a023975e 255static tree vect_build_loop_niters (loop_vec_info);
63dfe6ff 256static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
a023975e 257static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
f88a8cfa
DN
258static void vect_update_inits_of_dr
259 (struct data_reference *, struct loop *, tree niters);
a023975e
OG
260static void vect_update_inits_of_drs (loop_vec_info, tree);
261static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
335d3d54 262static void vect_do_peeling_for_loop_bound
f88a8cfa 263 (loop_vec_info, tree *, struct loops *);
a023975e 264
79fe1b3b
DN
265/* Utilities for creation and deletion of vec_info structs. */
266loop_vec_info new_loop_vec_info (struct loop *loop);
267void destroy_loop_vec_info (loop_vec_info);
268stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
269
a023975e
OG
270static bool vect_debug_stats (struct loop *loop);
271static bool vect_debug_details (struct loop *loop);
272
273\f
f88a8cfa
DN
274/*************************************************************************
275 Simple Loop Peeling Utilities
276
277 Utilities to support loop peeling for vectorization purposes.
278 *************************************************************************/
a023975e
OG
279
280
281/* For each definition in DEFINITIONS this function allocates
282 new ssa name. */
283
284static void
285allocate_new_names (bitmap definitions)
286{
287 unsigned ver;
288 bitmap_iterator bi;
289
290 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
291 {
292 tree def = ssa_name (ver);
293 tree *new_name_ptr = xmalloc (sizeof (tree));
294
295 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
296
297 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
298 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
299
300 SSA_NAME_AUX (def) = new_name_ptr;
301 }
302}
303
304
305/* Renames the use *OP_P. */
306
307static void
308rename_use_op (use_operand_p op_p)
309{
310 tree *new_name_ptr;
311
312 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
313 return;
314
315 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
316
317 /* Something defined outside of the loop. */
318 if (!new_name_ptr)
319 return;
320
321 /* An ordinary ssa name defined in the loop. */
322
323 SET_USE (op_p, *new_name_ptr);
324}
325
326
327/* Renames the def *OP_P in statement STMT. */
328
329static void
330rename_def_op (def_operand_p op_p, tree stmt)
331{
332 tree *new_name_ptr;
333
334 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
335 return;
336
337 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
338
339 /* Something defined outside of the loop. */
340 if (!new_name_ptr)
341 return;
342
343 /* An ordinary ssa name defined in the loop. */
344
345 SET_DEF (op_p, *new_name_ptr);
346 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
347}
348
349
350/* Renames the variables in basic block BB. */
351
352static void
353rename_variables_in_bb (basic_block bb)
354{
355 tree phi;
356 block_stmt_iterator bsi;
357 tree stmt;
358 stmt_ann_t ann;
359 use_optype uses;
360 vuse_optype vuses;
361 def_optype defs;
362 v_may_def_optype v_may_defs;
363 v_must_def_optype v_must_defs;
364 unsigned i;
365 edge e;
366 edge_iterator ei;
63dfe6ff 367 struct loop *loop = bb->loop_father;
a023975e 368
bb29d951 369 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
a023975e
OG
370 rename_def_op (PHI_RESULT_PTR (phi), phi);
371
372 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
373 {
374 stmt = bsi_stmt (bsi);
375 get_stmt_operands (stmt);
376 ann = stmt_ann (stmt);
377
378 uses = USE_OPS (ann);
379 for (i = 0; i < NUM_USES (uses); i++)
380 rename_use_op (USE_OP_PTR (uses, i));
381
382 defs = DEF_OPS (ann);
383 for (i = 0; i < NUM_DEFS (defs); i++)
384 rename_def_op (DEF_OP_PTR (defs, i), stmt);
385
386 vuses = VUSE_OPS (ann);
387 for (i = 0; i < NUM_VUSES (vuses); i++)
388 rename_use_op (VUSE_OP_PTR (vuses, i));
389
390 v_may_defs = V_MAY_DEF_OPS (ann);
391 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
392 {
393 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
394 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
395 }
396
397 v_must_defs = V_MUST_DEF_OPS (ann);
398 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
52328bf6
DB
399 {
400 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
401 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
402 }
a023975e
OG
403 }
404
405 FOR_EACH_EDGE (e, ei, bb->succs)
63dfe6ff
DN
406 {
407 if (!flow_bb_inside_loop_p (loop, e->dest))
408 continue;
409 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
410 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
411 }
a023975e
OG
412}
413
414
415/* Releases the structures holding the new ssa names. */
416
417static void
418free_new_names (bitmap definitions)
419{
420 unsigned ver;
421 bitmap_iterator bi;
422
423 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
424 {
425 tree def = ssa_name (ver);
426
427 if (SSA_NAME_AUX (def))
428 {
429 free (SSA_NAME_AUX (def));
430 SSA_NAME_AUX (def) = NULL;
431 }
432 }
433}
434
435
436/* Renames variables in new generated LOOP. */
437
438static void
439rename_variables_in_loop (struct loop *loop)
440{
441 unsigned i;
442 basic_block *bbs;
443
444 bbs = get_loop_body (loop);
445
446 for (i = 0; i < loop->num_nodes; i++)
447 rename_variables_in_bb (bbs[i]);
448
449 free (bbs);
450}
451
452
63dfe6ff 453/* Update the PHI nodes of NEW_LOOP.
a023975e 454
63dfe6ff
DN
455 NEW_LOOP is a duplicate of ORIG_LOOP.
456 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
457 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
458 executes before it. */
a023975e
OG
459
460static void
63dfe6ff 461slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
f88a8cfa 462 struct loop *new_loop, bool after)
a023975e 463{
a023975e 464 tree *new_name_ptr, new_ssa_name;
63dfe6ff
DN
465 tree phi_new, phi_orig;
466 tree def;
467 edge orig_loop_latch = loop_latch_edge (orig_loop);
468 edge orig_entry_e = loop_preheader_edge (orig_loop);
469 edge new_loop_exit_e = new_loop->exit_edges[0];
470 edge new_loop_entry_e = loop_preheader_edge (new_loop);
471 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
472
473 /*
474 step 1. For each loop-header-phi:
475 Add the first phi argument for the phi in NEW_LOOP
476 (the one associated with the entry of NEW_LOOP)
477
478 step 2. For each loop-header-phi:
479 Add the second phi argument for the phi in NEW_LOOP
480 (the one associated with the latch of NEW_LOOP)
a023975e 481
63dfe6ff 482 step 3. Update the phis in the successor block of NEW_LOOP.
a023975e 483
63dfe6ff
DN
484 case 1: NEW_LOOP was placed before ORIG_LOOP:
485 The successor block of NEW_LOOP is the header of ORIG_LOOP.
486 Updating the phis in the successor block can therefore be done
487 along with the scanning of the loop header phis, because the
488 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
489 phi nodes, organized in the same order.
490
491 case 2: NEW_LOOP was placed after ORIG_LOOP:
492 The successor block of NEW_LOOP is the original exit block of
493 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
494 We postpone updating these phis to a later stage (when
495 loop guards are added).
496 */
497
498
499 /* Scan the phis in the headers of the old and new loops
500 (they are organized in exactly the same order). */
a023975e 501
a023975e 502 for (phi_new = phi_nodes (new_loop->header),
63dfe6ff
DN
503 phi_orig = phi_nodes (orig_loop->header);
504 phi_new && phi_orig;
505 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
a023975e 506 {
63dfe6ff
DN
507 /* step 1. */
508 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
d2e398df 509 add_phi_arg (phi_new, def, new_loop_entry_e);
a023975e 510
63dfe6ff
DN
511 /* step 2. */
512 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
a023975e 513 if (TREE_CODE (def) != SSA_NAME)
63dfe6ff 514 continue;
a023975e
OG
515
516 new_name_ptr = SSA_NAME_AUX (def);
a023975e 517 if (!new_name_ptr)
63dfe6ff
DN
518 /* Something defined outside of the loop. */
519 continue;
a023975e
OG
520
521 /* An ordinary ssa name defined in the loop. */
522 new_ssa_name = *new_name_ptr;
d2e398df 523 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
a023975e 524
63dfe6ff
DN
525 /* step 3 (case 1). */
526 if (!after)
527 {
528 gcc_assert (new_loop_exit_e == orig_entry_e);
529 SET_PHI_ARG_DEF (phi_orig,
530 phi_arg_from_edge (phi_orig, new_loop_exit_e),
531 new_ssa_name);
532 }
a023975e
OG
533 }
534}
535
536
537/* Update PHI nodes for a guard of the LOOP.
538
63dfe6ff
DN
539 Input:
540 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
541 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
542 originates from the guard-bb, skips LOOP and reaches the (unique) exit
543 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
544 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
545 LOOP header) before the guard code was added, and now it became a merge
546 point of two paths - the path that ends with the LOOP exit-edge, and
547 the path that ends with GUARD_EDGE.
548
549 This function creates and updates the relevant phi nodes to account for
550 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
551 1. Create phi nodes at NEW_MERGE_BB.
552 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
553 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
554 was added:
555
556 ===> The CFG before the guard-code was added:
557 LOOP_header_bb:
558 if (exit_loop) goto update_bb : LOOP_header_bb
559 update_bb:
560
561 ==> The CFG after the guard-code was added:
562 guard_bb:
563 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
564 LOOP_header_bb:
565 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
566 new_merge_bb:
567 goto update_bb
568 update_bb:
569
570 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
571 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
572 organized in the same order.
573 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
574 loop exit phis.
575
576 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
577 "original" loop). FALSE if LOOP is an original loop (not a newly
e7a531ae 578 created copy). The SSA_NAME_AUX fields of the defs in the original
63dfe6ff
DN
579 loop are the corresponding new ssa-names used in the new duplicated
580 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
581 nodes in UPDATE_BB takes the original ssa-name, and which takes the
582 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
583 the LOOP-exit-edge takes the new-name, and the phi-arg that is
584 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
585 FALSE, it's the other way around.
586 */
a023975e
OG
587
588static void
63dfe6ff
DN
589slpeel_update_phi_nodes_for_guard (edge guard_edge,
590 struct loop *loop,
591 bool entry_phis,
592 bool is_new_loop)
a023975e 593{
63dfe6ff
DN
594 tree orig_phi, new_phi, update_phi;
595 tree guard_arg, loop_arg;
596 basic_block new_merge_bb = guard_edge->dest;
597 edge e = EDGE_SUCC (new_merge_bb, 0);
598 basic_block update_bb = e->dest;
599 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
600
601 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
602 orig_phi && update_phi;
603 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
604 {
605 /* 1. Generate new phi node in NEW_MERGE_BB: */
606 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
607 new_merge_bb);
a023975e 608
63dfe6ff
DN
609 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
610 of LOOP. Set the two phi args in NEW_PHI for these edges: */
611 if (entry_phis)
612 {
613 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
614 EDGE_SUCC (loop->latch, 0));
615 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
616 }
617 else /* exit phis */
618 {
619 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
620 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
621 tree new_name;
622
623 if (new_name_ptr)
624 new_name = *new_name_ptr;
625 else
626 /* Something defined outside of the loop */
627 new_name = orig_def;
628
629 if (is_new_loop)
630 {
631 guard_arg = orig_def;
632 loop_arg = new_name;
633 }
634 else
635 {
636 guard_arg = new_name;
637 loop_arg = orig_def;
638 }
639 }
d2e398df
KH
640 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
641 add_phi_arg (new_phi, guard_arg, guard_edge);
63dfe6ff
DN
642
643 /* 3. Update phi in successor block. */
644 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
645 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
646 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
647 PHI_RESULT (new_phi));
648 }
a023975e 649
63dfe6ff 650 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
a023975e
OG
651}
652
653
654/* Make the LOOP iterate NITERS times. This is done by adding a new IV
335d3d54
DN
655 that starts at zero, increases by one and its limit is NITERS.
656
657 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
a023975e
OG
658
659static void
335d3d54 660slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
a023975e
OG
661{
662 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
663 tree orig_cond;
664 edge exit_edge = loop->exit_edges[0];
665 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
335d3d54
DN
666 tree begin_label = tree_block_label (loop->latch);
667 tree exit_label = tree_block_label (loop->single_exit->dest);
618bb89c
DN
668 tree init = build_int_cst (TREE_TYPE (niters), 0);
669 tree step = build_int_cst (TREE_TYPE (niters), 1);
bfe5acd6
DN
670 tree then_label;
671 tree else_label;
a023975e 672
a023975e
OG
673 orig_cond = get_loop_exit_condition (loop);
674 gcc_assert (orig_cond);
618bb89c 675 create_iv (init, step, NULL_TREE, loop,
a023975e
OG
676 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
677
678 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
679 back to the exit condition statement. */
680 bsi_next (&loop_exit_bsi);
681 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
682
a023975e 683 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
bfe5acd6
DN
684 {
685 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
686 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
687 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
688 }
471854f8 689 else /* 'then' edge loops back. */
bfe5acd6
DN
690 {
691 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
692 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
693 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
694 }
a023975e 695
e9c00ceb 696 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
bfe5acd6 697 then_label, else_label);
a023975e
OG
698 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
699
700 /* Remove old loop exit test: */
701 bsi_remove (&loop_exit_bsi);
702
703 if (vect_debug_stats (loop) || vect_debug_details (loop))
704 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
d6f6ef21
DN
705
706 loop->nb_iterations = niters;
a023975e
OG
707}
708
709
710/* Given LOOP this function generates a new copy of it and puts it
711 on E which is either the entry or exit of LOOP. */
712
713static struct loop *
f88a8cfa
DN
714slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
715 edge e)
a023975e
OG
716{
717 struct loop *new_loop;
718 basic_block *new_bbs, *bbs;
719 bool at_exit;
720 bool was_imm_dom;
721 basic_block exit_dest;
722 tree phi, phi_arg;
723
724 at_exit = (e == loop->exit_edges[0]);
725 if (!at_exit && e != loop_preheader_edge (loop))
726 {
727 if (dump_file && (dump_flags & TDF_DETAILS))
63dfe6ff 728 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
a023975e
OG
729 return NULL;
730 }
731
732 bbs = get_loop_body (loop);
733
734 /* Check whether duplication is possible. */
735 if (!can_copy_bbs_p (bbs, loop->num_nodes))
736 {
737 if (vect_debug_stats (loop) || vect_debug_details (loop))
63dfe6ff 738 fprintf (dump_file, "Cannot copy basic blocks.\n");
a023975e
OG
739 free (bbs);
740 return NULL;
741 }
742
743 /* Generate new loop structure. */
744 new_loop = duplicate_loop (loops, loop, loop->outer);
745 if (!new_loop)
746 {
747 if (vect_debug_stats (loop) || vect_debug_details (loop))
63dfe6ff 748 fprintf (dump_file, "duplicate_loop returns NULL.\n");
a023975e
OG
749 free (bbs);
750 return NULL;
751 }
752
753 exit_dest = loop->exit_edges[0]->dest;
754 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
755 exit_dest) == loop->header ?
756 true : false);
757
758 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
759
760 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
761
762 /* Duplicating phi args at exit bbs as coming
763 also from exit of duplicated loop. */
bb29d951 764 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
a023975e
OG
765 {
766 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
767 if (phi_arg)
768 {
769 edge new_loop_exit_edge;
770
771 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
772 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
773 else
774 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
775
d2e398df 776 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
a023975e
OG
777 }
778 }
779
780 if (at_exit) /* Add the loop copy at exit. */
781 {
782 redirect_edge_and_branch_force (e, new_loop->header);
783 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
784 if (was_imm_dom)
785 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
786 }
787 else /* Add the copy at entry. */
788 {
789 edge new_exit_e;
790 edge entry_e = loop_preheader_edge (loop);
791 basic_block preheader = entry_e->src;
792
793 if (!flow_bb_inside_loop_p (new_loop,
794 EDGE_SUCC (new_loop->header, 0)->dest))
795 new_exit_e = EDGE_SUCC (new_loop->header, 0);
796 else
797 new_exit_e = EDGE_SUCC (new_loop->header, 1);
798
799 redirect_edge_and_branch_force (new_exit_e, loop->header);
800 set_immediate_dominator (CDI_DOMINATORS, loop->header,
801 new_exit_e->src);
802
803 /* We have to add phi args to the loop->header here as coming
804 from new_exit_e edge. */
bb29d951 805 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
a023975e
OG
806 {
807 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
808 if (phi_arg)
d2e398df 809 add_phi_arg (phi, phi_arg, new_exit_e);
a023975e
OG
810 }
811
812 redirect_edge_and_branch_force (entry_e, new_loop->header);
813 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
814 }
815
816 flow_loop_scan (new_loop, LOOP_ALL);
817 flow_loop_scan (loop, LOOP_ALL);
818 free (new_bbs);
819 free (bbs);
820
821 return new_loop;
822}
823
824
825/* Given the condition statement COND, put it as the last statement
826 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
827 Assumes that this is the single exit of the guarded loop.
828 Returns the skip edge. */
829
830static edge
63dfe6ff
DN
831slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
832 basic_block dom_bb)
a023975e
OG
833{
834 block_stmt_iterator bsi;
835 edge new_e, enter_e;
836 tree cond_stmt, then_label, else_label;
837
838 enter_e = EDGE_SUCC (guard_bb, 0);
839 enter_e->flags &= ~EDGE_FALLTHRU;
840 enter_e->flags |= EDGE_FALSE_VALUE;
841 bsi = bsi_last (guard_bb);
842
843 then_label = build1 (GOTO_EXPR, void_type_node,
844 tree_block_label (exit_bb));
845 else_label = build1 (GOTO_EXPR, void_type_node,
846 tree_block_label (enter_e->dest));
e9c00ceb 847 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
a023975e
OG
848 then_label, else_label);
849 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
850 /* Add new edge to connect entry block to the second loop. */
851 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
63dfe6ff 852 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
a023975e
OG
853 return new_e;
854}
855
856
d6901754
DN
857/* This function verifies that the following restrictions apply to LOOP:
858 (1) it is innermost
859 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
860 (3) it is single entry, single exit
861 (4) its exit condition is the last stmt in the header
862 (5) E is the entry/exit edge of LOOP.
863 */
a023975e
OG
864
865static bool
d6901754 866slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
a023975e
OG
867{
868 edge exit_e = loop->exit_edges [0];
869 edge entry_e = loop_preheader_edge (loop);
d6901754
DN
870 tree orig_cond = get_loop_exit_condition (loop);
871 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
a023975e 872
d6901754
DN
873 if (any_marked_for_rewrite_p ())
874 return false;
a023975e 875
d6901754
DN
876 if (loop->inner
877 /* All loops have an outer scope; the only case loop->outer is NULL is for
878 the function itself. */
879 || !loop->outer
880 || loop->num_nodes != 2
881 || !empty_block_p (loop->latch)
882 || loop->num_exits != 1
883 || loop->num_entries != 1
884 /* Verify that new loop exit condition can be trivially modified. */
885 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
886 || (e != exit_e && e != entry_e))
887 return false;
a023975e
OG
888
889 return true;
890}
891
1d8a9009 892#ifdef ENABLE_CHECKING
63dfe6ff
DN
893static void
894slpeel_verify_cfg_after_peeling (struct loop *first_loop,
895 struct loop *second_loop)
896{
897 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
898 basic_block loop2_entry_bb = second_loop->pre_header;
899 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
900
901 /* A guard that controls whether the second_loop is to be executed or skipped
902 is placed in first_loop->exit. first_loopt->exit therefore has two
903 successors - one is the preheader of second_loop, and the other is a bb
904 after second_loop.
905 */
906 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
a023975e 907
a023975e 908
63dfe6ff
DN
909 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
910 of second_loop. */
a023975e 911
63dfe6ff
DN
912 /* The preheader of new_loop is expected to have two predessors:
913 first_loop->exit and the block that precedes first_loop. */
914
915 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
916 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
917 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
918 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
919 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
920
921 /* Verify that the other successor of first_loopt->exit is after the
922 second_loop. */
923 /* TODO */
924}
1d8a9009 925#endif
a023975e 926
63dfe6ff 927/* Function slpeel_tree_peel_loop_to_edge.
a023975e 928
63dfe6ff
DN
929 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
930 that is placed on the entry (exit) edge E of LOOP. After this transformation
931 we have two loops one after the other - first-loop iterates FIRST_NITERS
932 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
a023975e 933
63dfe6ff
DN
934 Input:
935 - LOOP: the loop to be peeled.
936 - E: the exit or entry edge of LOOP.
937 If it is the entry edge, we peel the first iterations of LOOP. In this
938 case first-loop is LOOP, and second-loop is the newly created loop.
939 If it is the exit edge, we peel the last iterations of LOOP. In this
940 case, first-loop is the newly created loop, and second-loop is LOOP.
941 - NITERS: the number of iterations that LOOP iterates.
942 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
e7a531ae 943 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
63dfe6ff
DN
944 for updating the loop bound of the first-loop to FIRST_NITERS. If it
945 is false, the caller of this function may want to take care of this
e7a531ae 946 (this can be useful if we don't want new stmts added to first-loop).
a023975e 947
63dfe6ff
DN
948 Output:
949 The function returns a pointer to the new loop-copy, or NULL if it failed
e7a531ae 950 to perform the transformation.
63dfe6ff
DN
951
952 The function generates two if-then-else guards: one before the first loop,
953 and the other before the second loop:
954 The first guard is:
955 if (FIRST_NITERS == 0) then skip the first loop,
956 and go directly to the second loop.
957 The second guard is:
958 if (FIRST_NITERS == NITERS) then skip the second loop.
959
960 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
961 FORNOW the resulting code will not be in loop-closed-ssa form.
962*/
a023975e 963
a023975e 964struct loop*
f88a8cfa
DN
965slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
966 edge e, tree first_niters,
967 tree niters, bool update_first_loop_count)
a023975e
OG
968{
969 struct loop *new_loop = NULL, *first_loop, *second_loop;
970 edge skip_e;
971 tree pre_condition;
972 bitmap definitions;
63dfe6ff
DN
973 basic_block bb_before_second_loop, bb_after_second_loop;
974 basic_block bb_before_first_loop;
975 basic_block bb_between_loops;
a023975e 976 edge exit_e = loop->exit_edges [0];
63dfe6ff 977
d6901754
DN
978 if (!slpeel_can_duplicate_loop_p (loop, e))
979 return NULL;
63dfe6ff
DN
980
981 /* We have to initialize cfg_hooks. Then, when calling
a023975e 982 cfg_hooks->split_edge, the function tree_split_edge
63dfe6ff 983 is actually called and, when calling cfg_hooks->duplicate_block,
a023975e
OG
984 the function tree_duplicate_bb is called. */
985 tree_register_cfg_hooks ();
986
63dfe6ff
DN
987
988 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
989 Resulting CFG would be:
990
991 first_loop:
992 do {
993 } while ...
994
995 second_loop:
996 do {
997 } while ...
998
999 orig_exit_bb:
1000 */
1001
f88a8cfa 1002 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
a023975e 1003 {
63dfe6ff
DN
1004 if (vect_debug_stats (loop) || vect_debug_details (loop))
1005 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
a023975e
OG
1006 return NULL;
1007 }
63dfe6ff 1008
a023975e
OG
1009 if (e == exit_e)
1010 {
63dfe6ff 1011 /* NEW_LOOP was placed after LOOP. */
a023975e
OG
1012 first_loop = loop;
1013 second_loop = new_loop;
1014 }
63dfe6ff 1015 else
a023975e 1016 {
63dfe6ff 1017 /* NEW_LOOP was placed before LOOP. */
a023975e
OG
1018 first_loop = new_loop;
1019 second_loop = loop;
1020 }
1021
63dfe6ff
DN
1022 definitions = marked_ssa_names ();
1023 allocate_new_names (definitions);
1024 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1025 rename_variables_in_loop (new_loop);
a023975e 1026
a023975e 1027
63dfe6ff
DN
1028 /* 2. Add the guard that controls whether the first loop is executed.
1029 Resulting CFG would be:
1030
1031 bb_before_first_loop:
1032 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1033 GOTO first-loop
1034
1035 first_loop:
1036 do {
1037 } while ...
1038
1039 bb_before_second_loop:
a023975e 1040
63dfe6ff
DN
1041 second_loop:
1042 do {
1043 } while ...
a023975e 1044
63dfe6ff
DN
1045 orig_exit_bb:
1046 */
a023975e 1047
63dfe6ff
DN
1048 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1049 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1050 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1051 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
a023975e 1052 flow_loop_scan (first_loop, LOOP_ALL);
63dfe6ff
DN
1053 flow_loop_scan (second_loop, LOOP_ALL);
1054
1055 pre_condition =
e9c00ceb 1056 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
63dfe6ff
DN
1057 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1058 bb_before_second_loop, bb_before_first_loop);
1059 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1060 first_loop == new_loop);
1061
1062
1063 /* 3. Add the guard that controls whether the second loop is executed.
1064 Resulting CFG would be:
79fe1b3b 1065
63dfe6ff
DN
1066 bb_before_first_loop:
1067 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1068 GOTO first-loop
a023975e 1069
63dfe6ff
DN
1070 first_loop:
1071 do {
1072 } while ...
a023975e 1073
63dfe6ff
DN
1074 bb_between_loops:
1075 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1076 GOTO bb_before_second_loop
a023975e 1077
63dfe6ff 1078 bb_before_second_loop:
a023975e 1079
63dfe6ff
DN
1080 second_loop:
1081 do {
1082 } while ...
a023975e 1083
63dfe6ff 1084 bb_after_second_loop:
a023975e 1085
63dfe6ff
DN
1086 orig_exit_bb:
1087 */
1088
1089 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1090 add_bb_to_loop (bb_between_loops, first_loop->outer);
1091 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1092 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1093 flow_loop_scan (first_loop, LOOP_ALL);
a023975e
OG
1094 flow_loop_scan (second_loop, LOOP_ALL);
1095
e9c00ceb 1096 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
63dfe6ff
DN
1097 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1098 bb_after_second_loop, bb_before_first_loop);
1099 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1100 second_loop == new_loop);
1101
1102 /* Flow loop scan does not update loop->single_exit field. */
1103 first_loop->single_exit = first_loop->exit_edges[0];
1104 second_loop->single_exit = second_loop->exit_edges[0];
a023975e 1105
63dfe6ff
DN
1106 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1107 */
1108 if (update_first_loop_count)
1109 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
a023975e 1110
63dfe6ff 1111 free_new_names (definitions);
a023975e
OG
1112 BITMAP_XFREE (definitions);
1113 unmark_all_for_rewrite ();
63dfe6ff 1114
a023975e
OG
1115 return new_loop;
1116}
1117
a023975e
OG
1118\f
1119/* Here the proper Vectorizer starts. */
79fe1b3b 1120
f88a8cfa
DN
1121/*************************************************************************
1122 Vectorization Utilities.
1123 *************************************************************************/
1124
79fe1b3b
DN
1125/* Function new_stmt_vec_info.
1126
1127 Create and initialize a new stmt_vec_info struct for STMT. */
1128
1129stmt_vec_info
1130new_stmt_vec_info (tree stmt, struct loop *loop)
1131{
1132 stmt_vec_info res;
1133 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1134
1135 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1136 STMT_VINFO_STMT (res) = stmt;
1137 STMT_VINFO_LOOP (res) = loop;
1138 STMT_VINFO_RELEVANT_P (res) = 0;
1139 STMT_VINFO_VECTYPE (res) = NULL;
1140 STMT_VINFO_VEC_STMT (res) = NULL;
1141 STMT_VINFO_DATA_REF (res) = NULL;
1142 STMT_VINFO_MEMTAG (res) = NULL;
6775f1f3 1143 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1de6a873
IR
1144 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1145 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1146 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1147 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
79fe1b3b
DN
1148
1149 return res;
1150}
1151
1152
1153/* Function new_loop_vec_info.
1154
1155 Create and initialize a new loop_vec_info struct for LOOP, as well as
1156 stmt_vec_info structs for all the stmts in LOOP. */
1157
1158loop_vec_info
1159new_loop_vec_info (struct loop *loop)
1160{
1161 loop_vec_info res;
1162 basic_block *bbs;
1163 block_stmt_iterator si;
1164 unsigned int i;
1165
1166 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1167
1168 bbs = get_loop_body (loop);
1169
1170 /* Create stmt_info for all stmts in the loop. */
1171 for (i = 0; i < loop->num_nodes; i++)
1172 {
1173 basic_block bb = bbs[i];
1174 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1175 {
1176 tree stmt = bsi_stmt (si);
1177 stmt_ann_t ann;
1178
1179 get_stmt_operands (stmt);
1180 ann = stmt_ann (stmt);
1181 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1182 }
1183 }
1184
1185 LOOP_VINFO_LOOP (res) = loop;
1186 LOOP_VINFO_BBS (res) = bbs;
1187 LOOP_VINFO_EXIT_COND (res) = NULL;
a023975e 1188 LOOP_VINFO_NITERS (res) = NULL;
79fe1b3b 1189 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
a023975e 1190 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
79fe1b3b
DN
1191 LOOP_VINFO_VECT_FACTOR (res) = 0;
1192 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1193 "loop_write_datarefs");
1194 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1195 "loop_read_datarefs");
0dc0a70b 1196 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
a023975e 1197
79fe1b3b
DN
1198 return res;
1199}
1200
1201
1202/* Function destroy_loop_vec_info.
1203
1204 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1205 stmts in the loop. */
1206
1207void
1208destroy_loop_vec_info (loop_vec_info loop_vinfo)
1209{
1210 struct loop *loop;
1211 basic_block *bbs;
1212 int nbbs;
1213 block_stmt_iterator si;
1214 int j;
1215
1216 if (!loop_vinfo)
1217 return;
1218
1219 loop = LOOP_VINFO_LOOP (loop_vinfo);
1220
1221 bbs = LOOP_VINFO_BBS (loop_vinfo);
1222 nbbs = loop->num_nodes;
1223
1224 for (j = 0; j < nbbs; j++)
1225 {
1226 basic_block bb = bbs[j];
1227 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1228 {
1229 tree stmt = bsi_stmt (si);
1230 stmt_ann_t ann = stmt_ann (stmt);
1231 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1232 free (stmt_info);
1233 set_stmt_info (ann, NULL);
1234 }
1235 }
1236
1237 free (LOOP_VINFO_BBS (loop_vinfo));
1238 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1239 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1240
1241 free (loop_vinfo);
1242}
1243
1244
1245/* Function debug_loop_stats.
1246
1247 For vectorization statistics dumps. */
1248
1249static bool
1250vect_debug_stats (struct loop *loop)
1251{
1252 basic_block bb;
1253 block_stmt_iterator si;
1254 tree node = NULL_TREE;
1255
1256 if (!dump_file || !(dump_flags & TDF_STATS))
1257 return false;
1258
1259 if (!loop)
1260 {
1261 fprintf (dump_file, "\n");
1262 return true;
1263 }
1264
1265 if (!loop->header)
1266 return false;
1267
1268 bb = loop->header;
1269
1270 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1271 {
1272 node = bsi_stmt (si);
1273 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1274 break;
1275 }
1276
1277 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1278 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1279 {
1280 fprintf (dump_file, "\nloop at %s:%d: ",
1281 EXPR_FILENAME (node), EXPR_LINENO (node));
1282 return true;
1283 }
1284
1285 return false;
1286}
1287
1288
1289/* Function debug_loop_details.
1290
1291 For vectorization debug dumps. */
1292
1293static bool
1294vect_debug_details (struct loop *loop)
1295{
1296 basic_block bb;
1297 block_stmt_iterator si;
1298 tree node = NULL_TREE;
1299
1300 if (!dump_file || !(dump_flags & TDF_DETAILS))
1301 return false;
1302
1303 if (!loop)
1304 {
1305 fprintf (dump_file, "\n");
1306 return true;
1307 }
1308
1309 if (!loop->header)
1310 return false;
1311
1312 bb = loop->header;
1313
1314 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1315 {
1316 node = bsi_stmt (si);
1317 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1318 break;
1319 }
1320
1321 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1322 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1323 {
1324 fprintf (dump_file, "\nloop at %s:%d: ",
1325 EXPR_FILENAME (node), EXPR_LINENO (node));
1326 return true;
1327 }
1328
1329 return false;
1330}
1331
6775f1f3
IR
1332
1333/* Function vect_get_ptr_offset
1334
1335 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1336
1337static tree
1338vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1339 tree vectype ATTRIBUTE_UNUSED,
1340 tree *offset ATTRIBUTE_UNUSED)
1341{
1342 /* TODO: Use alignment information. */
1343 return NULL_TREE;
1344}
1345
1346
1de6a873
IR
1347/* Function vect_analyze_offset_expr
1348
1349 Given an offset expression EXPR received from get_inner_reference, analyze
1350 it and create an expression for INITIAL_OFFSET by substituting the variables
1351 of EXPR with initial_condition of the corresponding access_fn in the loop.
1352 E.g.,
1353 for i
1354 for (j = 3; j < N; j++)
1355 a[j].b[i][j] = 0;
1356
1357 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1358 subsituted, since its access_fn in the inner loop is i. 'j' will be
1359 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1360 C` = 3 * C_j + C.
1361
1362 Compute MISALIGN (the misalignment of the data reference initial access from
1363 its base) if possible. Misalignment can be calculated only if all the
1364 variables can be substitued with constants, or if a variable is multiplied
1365 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1366 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1367 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1368 VECTYPE_ALIGNMENT computation in the caller of this function).
1369
1370 STEP is an evolution of the data reference in this loop in bytes.
1371 In the above example, STEP is C_j.
1372
1373 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1374 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1375 are NULL_TREEs. Otherwise, return TRUE.
1376
1377*/
1378
1379static bool
1380vect_analyze_offset_expr (tree expr,
1381 struct loop *loop,
1382 tree vectype_alignment,
1383 tree *initial_offset,
1384 tree *misalign,
1385 tree *step)
1386{
1387 tree oprnd0;
1388 tree oprnd1;
1389 tree left_offset = size_zero_node;
1390 tree right_offset = size_zero_node;
1391 tree left_misalign = size_zero_node;
1392 tree right_misalign = size_zero_node;
1393 tree left_step = size_zero_node;
1394 tree right_step = size_zero_node;
1395 enum tree_code code;
1396 tree init, evolution, def_stmt;
1397
1398 STRIP_NOPS (expr);
1399
1400 *step = NULL_TREE;
1401 *misalign = NULL_TREE;
1402 *initial_offset = NULL_TREE;
1403
1404 /* Stop conditions:
1405 1. Constant. */
1406 if (TREE_CONSTANT (expr))
1407 {
1408 *initial_offset = fold_convert (sizetype, expr);
1409 *misalign = fold_convert (sizetype, expr);
1410 *step = size_zero_node;
1411 return true;
1412 }
1413
1414 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1415 access_fn in the current loop. */
1416 if (SSA_VAR_P (expr))
1417 {
1418 tree access_fn = analyze_scalar_evolution (loop, expr);
1419
1420 if (access_fn == chrec_dont_know)
1421 /* No access_fn. */
1422 return false;
1423
1424 init = initial_condition_in_loop_num (access_fn, loop->num);
1425 if (init == expr)
1426 {
1427 def_stmt = SSA_NAME_DEF_STMT (init);
1428 if (def_stmt
1429 && !IS_EMPTY_STMT (def_stmt)
1430 && flow_bb_inside_loop_p (loop, bb_for_stmt (def_stmt)))
1431 /* Not enough information: may be not loop invariant.
1432 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1433 initial_condition is D, but it depends on i - loop's induction
1434 variable. */
1435 return false;
1436 }
1437
1438 evolution = evolution_part_in_loop_num (access_fn, loop->num);
1439 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1440 /* Evolution is not constant. */
1441 return false;
1442
1443 if (TREE_CONSTANT (init))
1444 *misalign = fold_convert (sizetype, init);
1445 else
1446 /* Not constant, misalignment cannot be calculated. */
1447 *misalign = NULL_TREE;
1448
1449 *initial_offset = fold_convert (sizetype, init);
1450
1451 *step = evolution ? fold_convert (sizetype, evolution) : size_zero_node;
1452 return true;
1453 }
1454
1455 /* Recursive computation. */
1456 oprnd0 = TREE_OPERAND (expr, 0);
1457 oprnd1 = TREE_OPERAND (expr, 1);
1458
1459 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1460 &left_misalign, &left_step)
1461 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1462 &right_offset, &right_misalign, &right_step))
1463 return false;
1464
1465 /* The type of the operation: plus, minus or mult. */
1466 code = TREE_CODE (expr);
1467 switch (code)
1468 {
1469 case MULT_EXPR:
1470 if (!TREE_CONSTANT (right_offset))
1471 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1472 sized types.
1473 FORNOW: We don't support such cases. */
1474 return false;
1475
1476 /* Misalignment computation. */
1477 if (SSA_VAR_P (left_offset))
1478 {
1479 /* If the left side contains variable that cannot be substituted with
1480 constant, we check if the right side is a multiple of ALIGNMENT. */
1481 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1482 vectype_alignment)))
1483 *misalign = size_zero_node;
1484 else
1485 /* If the remainder is not zero or the right side isn't constant, we
1486 can't compute misalignment. */
1487 *misalign = NULL_TREE;
1488 }
1489 else
1490 {
1491 /* The left operand was successfully substituted with constant. */
1492 if (left_misalign)
1493 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1494 NULL_TREE. */
1495 *misalign = size_binop (code, left_misalign, right_misalign);
1496 else
1497 *misalign = NULL_TREE;
1498 }
1499
1500 /* Step calculation. */
1501 /* Multiply the step by the right operand. */
1502 *step = size_binop (MULT_EXPR, left_step, right_offset);
1503 break;
1504
1505 case PLUS_EXPR:
1506 case MINUS_EXPR:
1507 /* Combine the recursive calculations for step and misalignment. */
1508 *step = size_binop (code, left_step, right_step);
1509
1510 if (left_misalign && right_misalign)
1511 *misalign = size_binop (code, left_misalign, right_misalign);
1512 else
1513 *misalign = NULL_TREE;
1514
1515 break;
1516
1517 default:
1518 gcc_unreachable ();
1519 }
1520
1521 /* Compute offset. */
1522 *initial_offset = fold_convert (sizetype,
1523 fold (build2 (code, TREE_TYPE (left_offset),
1524 left_offset,
1525 right_offset)));
1526 return true;
1527}
1528
1529
6775f1f3
IR
1530/* Function vect_get_base_and_bit_offset
1531
1532 Return the BASE of the data reference EXPR.
1533 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1534 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1535 bits of 'a.b[i] + 4B' from a.
1536
1537 Input:
1538 EXPR - the memory reference that is being analyzed
1539 DR - the data_reference struct of the _original_ memory reference
1540 (Note: DR_REF (DR) is not necessarily EXPR)
1541 VECTYPE - the type that defines the alignment (i.e, we compute
1542 alignment relative to TYPE_ALIGN(VECTYPE))
79fe1b3b 1543
6775f1f3
IR
1544 Output:
1545 BASE (returned value) - the base of the data reference EXPR.
1546 E.g, if EXPR is a.b[k].c[i][j] the returned
1547 base is a.
1548 OFFSET - offset of EXPR from BASE in bits
1549 BASE_ALIGNED_P - indicates if BASE is aligned
1550
1551 If something unexpected is encountered (an unsupported form of data-ref),
1552 or if VECTYPE is given but OFFSET cannot be determined:
1553 then NULL_TREE is returned. */
79fe1b3b
DN
1554
1555static tree
6775f1f3
IR
1556vect_get_base_and_bit_offset (struct data_reference *dr,
1557 tree expr,
1558 tree vectype,
1559 loop_vec_info loop_vinfo,
1560 tree *offset,
1561 bool *base_aligned_p)
79fe1b3b 1562{
6775f1f3
IR
1563 tree this_offset = size_zero_node;
1564 tree base = NULL_TREE;
1565 tree next_ref;
1566 tree oprnd0, oprnd1;
1567 struct data_reference *array_dr;
1568 enum tree_code code = TREE_CODE (expr);
1569
1570 *base_aligned_p = false;
79fe1b3b 1571
6775f1f3 1572 switch (code)
79fe1b3b 1573 {
6775f1f3
IR
1574 /* These cases end the recursion: */
1575 case VAR_DECL:
1576 *offset = size_zero_node;
1577 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1578 *base_aligned_p = true;
1579 return expr;
1580
1581 case SSA_NAME:
1582 if (!vectype)
1583 return expr;
1584
1585 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1586 return NULL_TREE;
1587
1588 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1589 {
1590 base = vect_get_ptr_offset (expr, vectype, offset);
1591 if (base)
1592 *base_aligned_p = true;
1593 }
1594 else
1595 {
1596 *base_aligned_p = true;
1597 *offset = size_zero_node;
1598 base = expr;
1599 }
1600 return base;
1601
1602 case INTEGER_CST:
1603 *offset = int_const_binop (MULT_EXPR, expr,
1604 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1605 return expr;
1606
1607 /* These cases continue the recursion: */
1608 case COMPONENT_REF:
1609 oprnd0 = TREE_OPERAND (expr, 0);
1610 oprnd1 = TREE_OPERAND (expr, 1);
79fe1b3b
DN
1611
1612 this_offset = bit_position (oprnd1);
6775f1f3
IR
1613 if (vectype && !host_integerp (this_offset, 1))
1614 return NULL_TREE;
1615 next_ref = oprnd0;
1616 break;
1617
1618 case ADDR_EXPR:
1619 oprnd0 = TREE_OPERAND (expr, 0);
1620 next_ref = oprnd0;
1621 break;
1622
1623 case INDIRECT_REF:
1624 oprnd0 = TREE_OPERAND (expr, 0);
1625 next_ref = oprnd0;
1626 break;
1627
1628 case ARRAY_REF:
1629 if (DR_REF (dr) != expr)
1630 /* Build array data_reference struct if the existing DR_REF
1631 doesn't match EXPR. This happens, for example, when the
1632 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1633 contains information on the access of T, not of arr. In order
1634 to continue the analysis, we create a new DR struct that
1635 describes the access of arr.
1636 */
1637 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1638 else
1639 array_dr = dr;
1640
1641 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1642 vectype, &this_offset);
1643 if (!next_ref)
79fe1b3b 1644 return NULL_TREE;
79fe1b3b 1645
6775f1f3
IR
1646 if (vectype &&
1647 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
79fe1b3b 1648 {
6775f1f3
IR
1649 *offset = this_offset;
1650 *base_aligned_p = true;
1651 return next_ref;
1652 }
1653 break;
79fe1b3b 1654
6775f1f3
IR
1655 case PLUS_EXPR:
1656 case MINUS_EXPR:
1657 /* In case we have a PLUS_EXPR of the form
1658 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1659 This is verified in vect_get_symbl_and_dr. */
1660 oprnd0 = TREE_OPERAND (expr, 0);
1661 oprnd1 = TREE_OPERAND (expr, 1);
1662
1663 base = vect_get_base_and_bit_offset
1664 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1665 if (vectype && !base)
1666 return NULL_TREE;
79fe1b3b 1667
6775f1f3
IR
1668 next_ref = oprnd0;
1669 break;
79fe1b3b 1670
6775f1f3
IR
1671 default:
1672 return NULL_TREE;
79fe1b3b
DN
1673 }
1674
6775f1f3
IR
1675 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1676 loop_vinfo, offset, base_aligned_p);
1677
1678 if (vectype && base)
1679 {
1680 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1681 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1682 return NULL_TREE;
1683
1684 if (vect_debug_details (NULL))
1685 {
1686 print_generic_expr (dump_file, expr, TDF_SLIM);
1687 fprintf (dump_file, " --> total offset for ref: ");
1688 print_generic_expr (dump_file, *offset, TDF_SLIM);
1689 }
1690 }
1691 return base;
79fe1b3b
DN
1692}
1693
1694
1695/* Function vect_force_dr_alignment_p.
1696
1697 Returns whether the alignment of a DECL can be forced to be aligned
1698 on ALIGNMENT bit boundary. */
1699
1700static bool
1701vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1702{
1703 if (TREE_CODE (decl) != VAR_DECL)
1704 return false;
1705
1706 if (DECL_EXTERNAL (decl))
1707 return false;
1708
d75bf0ca
DN
1709 if (TREE_ASM_WRITTEN (decl))
1710 return false;
1711
79fe1b3b
DN
1712 if (TREE_STATIC (decl))
1713 return (alignment <= MAX_OFILE_ALIGNMENT);
1714 else
7a8554ce
DN
1715 /* This is not 100% correct. The absolute correct stack alignment
1716 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1717 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1718 However, until someone implements forced stack alignment, SSE
1719 isn't really usable without this. */
1720 return (alignment <= PREFERRED_STACK_BOUNDARY);
79fe1b3b
DN
1721}
1722
1723
1724/* Function vect_get_new_vect_var.
1725
1726 Returns a name for a new variable. The current naming scheme appends the
1727 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1728 the name of vectorizer generated variables, and appends that to NAME if
1729 provided. */
1730
1731static tree
1732vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1733{
1734 const char *prefix;
1735 int prefix_len;
1736 tree new_vect_var;
1737
1738 if (var_kind == vect_simple_var)
1739 prefix = "vect_";
1740 else
1741 prefix = "vect_p";
1742
1743 prefix_len = strlen (prefix);
1744
1745 if (name)
1746 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1747 else
1748 new_vect_var = create_tmp_var (type, prefix);
1749
1750 return new_vect_var;
1751}
1752
1753
6775f1f3 1754/* Function vect_create_index_for_vector_ref.
79fe1b3b
DN
1755
1756 Create (and return) an index variable, along with it's update chain in the
1757 loop. This variable will be used to access a memory location in a vector
1758 operation.
1759
1760 Input:
6775f1f3 1761 LOOP: The loop being vectorized.
79fe1b3b
DN
1762 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1763 function can be added here, or in the loop pre-header.
1764
6775f1f3
IR
1765 Output:
1766 Return an index that will be used to index a vector array. It is expected
1767 that a pointer to the first vector will be used as the base address for the
1768 indexed reference.
1769
1770 FORNOW: we are not trying to be efficient, just creating a new index each
1771 time from scratch. At this time all vector references could use the same
1772 index.
1773
1774 TODO: create only one index to be used by all vector references. Record
1775 the index in the LOOP_VINFO the first time this procedure is called and
1776 return it on subsequent calls. The increment of this index must be placed
1777 just before the conditional expression that ends the single block loop. */
79fe1b3b
DN
1778
1779static tree
6775f1f3 1780vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
79fe1b3b 1781{
79fe1b3b 1782 tree init, step;
79fe1b3b 1783 tree indx_before_incr, indx_after_incr;
79fe1b3b 1784
6775f1f3
IR
1785 /* It is assumed that the base pointer used for vectorized access contains
1786 the address of the first vector. Therefore the index used for vectorized
1787 access must be initialized to zero and incremented by 1. */
79fe1b3b 1788
6775f1f3
IR
1789 init = integer_zero_node;
1790 step = integer_one_node;
1791
1792 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1793 create_iv (init, step, NULL_TREE, loop, bsi, false,
1794 &indx_before_incr, &indx_after_incr);
79fe1b3b 1795
6775f1f3
IR
1796 return indx_before_incr;
1797}
79fe1b3b 1798
79fe1b3b 1799
6775f1f3 1800/* Function vect_create_addr_base_for_vector_ref.
79fe1b3b 1801
6775f1f3
IR
1802 Create an expression that computes the address of the first memory location
1803 that will be accessed for a data reference.
79fe1b3b 1804
6775f1f3
IR
1805 Input:
1806 STMT: The statement containing the data reference.
7ccf35ed
DN
1807 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1808 OFFSET: Optional. If supplied, it is be added to the initial address.
79fe1b3b 1809
6775f1f3 1810 Output:
a023975e
OG
1811 1. Return an SSA_NAME whose value is the address of the memory location of
1812 the first vector of the data reference.
6775f1f3
IR
1813 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1814 these statement(s) which define the returned SSA_NAME.
79fe1b3b 1815
6775f1f3 1816 FORNOW: We are only handling array accesses with step 1. */
79fe1b3b 1817
6775f1f3
IR
1818static tree
1819vect_create_addr_base_for_vector_ref (tree stmt,
7ccf35ed
DN
1820 tree *new_stmt_list,
1821 tree offset)
6775f1f3
IR
1822{
1823 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1824 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1825 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1826 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1827 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1828 tree ref = DR_REF (dr);
1829 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1830 tree scalar_type = TREE_TYPE (ref);
1831 tree scalar_ptr_type = build_pointer_type (scalar_type);
1832 tree access_fn;
1833 tree init_val, step, init_oval;
1834 bool ok;
1835 bool is_ptr_ref, is_array_ref, is_addr_expr;
1836 tree array_base;
1837 tree vec_stmt;
1838 tree new_temp;
1839 tree array_ref;
1840 tree addr_base, addr_expr;
1841 tree dest, new_stmt;
79fe1b3b 1842
6775f1f3 1843 /* Only the access function of the last index is relevant (i_n in
471854f8 1844 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
6775f1f3 1845 access_fn = DR_ACCESS_FN (dr, 0);
a023975e
OG
1846 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1847 true);
6775f1f3
IR
1848 if (!ok)
1849 init_oval = integer_zero_node;
1850
1851 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1852 && TREE_CODE (data_ref_base) == SSA_NAME;
322ae40b 1853 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
6775f1f3
IR
1854 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1855 || TREE_CODE (data_ref_base) == PLUS_EXPR
1856 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1857 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1858
1859 /** Create: &(base[init_val])
1860
1861 if data_ref_base is an ARRAY_TYPE:
1862 base = data_ref_base
1863
1864 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1865 base = *((scalar_array *) data_ref_base)
1866 **/
1867
1868 if (is_array_ref)
1869 array_base = data_ref_base;
1870 else /* is_ptr_ref or is_addr_expr */
1871 {
1872 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1873 tree scalar_array_type = build_array_type (scalar_type, 0);
1874 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1875 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1876 add_referenced_tmp_var (array_ptr);
1877
1878 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1879 add_referenced_tmp_var (dest);
7ccf35ed
DN
1880 data_ref_base =
1881 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
6775f1f3
IR
1882 append_to_statement_list_force (new_stmt, new_stmt_list);
1883
1884 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1885 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1886 new_temp = make_ssa_name (array_ptr, vec_stmt);
1887 TREE_OPERAND (vec_stmt, 0) = new_temp;
1888 append_to_statement_list_force (vec_stmt, new_stmt_list);
1889
1890 /* (*array_ptr) */
1891 array_base = build_fold_indirect_ref (new_temp);
1892 }
1893
1894 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1895 add_referenced_tmp_var (dest);
1896 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1897 append_to_statement_list_force (new_stmt, new_stmt_list);
1898
7ccf35ed
DN
1899 if (offset)
1900 {
1901 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1902 add_referenced_tmp_var (tmp);
1903 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1904 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1905 init_val = make_ssa_name (tmp, vec_stmt);
1906 TREE_OPERAND (vec_stmt, 0) = init_val;
1907 append_to_statement_list_force (vec_stmt, new_stmt_list);
1908 }
1909
6775f1f3
IR
1910 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1911 NULL_TREE, NULL_TREE);
1912 addr_base = build_fold_addr_expr (array_ref);
1913
1914 /* addr_expr = addr_base */
1915 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1916 get_name (base_name));
1917 add_referenced_tmp_var (addr_expr);
1918 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1919 new_temp = make_ssa_name (addr_expr, vec_stmt);
1920 TREE_OPERAND (vec_stmt, 0) = new_temp;
1921 append_to_statement_list_force (vec_stmt, new_stmt_list);
7ccf35ed 1922
6775f1f3 1923 return new_temp;
79fe1b3b
DN
1924}
1925
1926
1927/* Function get_vectype_for_scalar_type.
1928
1929 Returns the vector type corresponding to SCALAR_TYPE as supported
1930 by the target. */
1931
1932static tree
1933get_vectype_for_scalar_type (tree scalar_type)
1934{
1935 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1936 int nbytes = GET_MODE_SIZE (inner_mode);
1937 int nunits;
6775f1f3 1938 tree vectype;
79fe1b3b
DN
1939
1940 if (nbytes == 0)
1941 return NULL_TREE;
1942
1943 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1944 is expected. */
1945 nunits = UNITS_PER_SIMD_WORD / nbytes;
1946
6775f1f3 1947 vectype = build_vector_type (scalar_type, nunits);
f0923257
DN
1948 if (vect_debug_details (NULL))
1949 {
1950 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1951 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1952 }
1953
1954 if (!vectype)
6775f1f3 1955 return NULL_TREE;
f0923257
DN
1956
1957 if (vect_debug_details (NULL))
1958 {
1959 fprintf (dump_file, "vectype: ");
1960 print_generic_expr (dump_file, vectype, TDF_SLIM);
1961 }
1962
1963 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1964 {
1965 /* TODO: tree-complex.c sometimes can parallelize operations
1966 on generic vectors. We can vectorize the loop in that case,
1967 but then we should re-run the lowering pass. */
1968 if (vect_debug_details (NULL))
1969 fprintf (dump_file, "mode not supported by target.");
1970 return NULL_TREE;
1971 }
1972
6775f1f3 1973 return vectype;
79fe1b3b
DN
1974}
1975
1976
1977/* Function vect_align_data_ref.
1978
1979 Handle mislignment of a memory accesses.
1980
1981 FORNOW: Can't handle misaligned accesses.
1982 Make sure that the dataref is aligned. */
1983
1984static void
1985vect_align_data_ref (tree stmt)
1986{
1987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1988 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1989
1990 /* FORNOW: can't handle misaligned accesses;
1991 all accesses expected to be aligned. */
1e128c5f 1992 gcc_assert (aligned_access_p (dr));
79fe1b3b
DN
1993}
1994
1995
7ccf35ed 1996/* Function vect_create_data_ref_ptr.
79fe1b3b
DN
1997
1998 Create a memory reference expression for vector access, to be used in a
7ccf35ed
DN
1999 vector load/store stmt. The reference is based on a new pointer to vector
2000 type (vp).
79fe1b3b
DN
2001
2002 Input:
7ccf35ed
DN
2003 1. STMT: a stmt that references memory. Expected to be of the form
2004 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2005 2. BSI: block_stmt_iterator where new stmts can be added.
2006 3. OFFSET (optional): an offset to be added to the initial address accessed
2007 by the data-ref in STMT.
2008 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2009 pointing to the initial address.
79fe1b3b
DN
2010
2011 Output:
7ccf35ed
DN
2012 1. Declare a new ptr to vector_type, and have it point to the base of the
2013 data reference (initial addressed accessed by the data reference).
2014 For example, for vector of type V8HI, the following code is generated:
2015
2016 v8hi *vp;
2017 vp = (v8hi *)initial_address;
2018
2019 if OFFSET is not supplied:
2020 initial_address = &a[init];
2021 if OFFSET is supplied:
2022 initial_address = &a[init + OFFSET];
2023
2024 Return the initial_address in INITIAL_ADDRESS.
2025
2026 2. Create a data-reference in the loop based on the new vector pointer vp,
2027 and using a new index variable 'idx' as follows:
2028
2029 vp' = vp + update
2030
2031 where if ONLY_INIT is true:
2032 update = zero
2033 and otherwise
2034 update = idx + vector_type_size
2035
2036 Return the pointer vp'.
2037
79fe1b3b
DN
2038
2039 FORNOW: handle only aligned and consecutive accesses. */
2040
2041static tree
7ccf35ed
DN
2042vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2043 tree *initial_address, bool only_init)
79fe1b3b 2044{
7ccf35ed 2045 tree base_name;
79fe1b3b 2046 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6775f1f3
IR
2047 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2048 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
79fe1b3b
DN
2049 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2050 tree vect_ptr_type;
2051 tree vect_ptr;
79fe1b3b 2052 tree tag;
6775f1f3
IR
2053 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2054 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2055 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2056 int nvuses, nv_may_defs, nv_must_defs;
2057 int i;
2058 tree new_temp;
2059 tree vec_stmt;
2060 tree new_stmt_list = NULL_TREE;
2061 tree idx;
7ccf35ed 2062 edge pe = loop_preheader_edge (loop);
6775f1f3 2063 basic_block new_bb;
7ccf35ed
DN
2064 tree vect_ptr_init;
2065 tree vectype_size;
2066 tree ptr_update;
2067 tree data_ref_ptr;
e088c552 2068 tree type, tmp, size;
79fe1b3b 2069
6775f1f3 2070 base_name = unshare_expr (DR_BASE_NAME (dr));
79fe1b3b
DN
2071 if (vect_debug_details (NULL))
2072 {
7ccf35ed 2073 tree data_ref_base = base_name;
79fe1b3b
DN
2074 fprintf (dump_file, "create array_ref of type: ");
2075 print_generic_expr (dump_file, vectype, TDF_SLIM);
6775f1f3 2076 if (TREE_CODE (data_ref_base) == VAR_DECL)
1de6a873 2077 fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
6775f1f3 2078 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1de6a873 2079 fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
6775f1f3 2080 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1de6a873 2081 fprintf (dump_file, "\nvectorizing a record based array ref: ");
6775f1f3 2082 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1de6a873 2083 fprintf (dump_file, "\nvectorizing a pointer ref: ");
6775f1f3 2084 print_generic_expr (dump_file, base_name, TDF_SLIM);
79fe1b3b
DN
2085 }
2086
7ccf35ed
DN
2087 /** (1) Create the new vector-pointer variable: **/
2088
2089 vect_ptr_type = build_pointer_type (vectype);
2090 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2091 get_name (base_name));
2092 add_referenced_tmp_var (vect_ptr);
2093
2094
2095 /** (2) Handle aliasing information of the new vector-pointer: **/
2096
79fe1b3b 2097 tag = STMT_VINFO_MEMTAG (stmt_info);
1e128c5f 2098 gcc_assert (tag);
79fe1b3b 2099 get_var_ann (vect_ptr)->type_mem_tag = tag;
7ccf35ed 2100
79fe1b3b 2101 /* Mark for renaming all aliased variables
6775f1f3
IR
2102 (i.e, the may-aliases of the type-mem-tag). */
2103 nvuses = NUM_VUSES (vuses);
2104 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2105 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2106 for (i = 0; i < nvuses; i++)
79fe1b3b 2107 {
6775f1f3 2108 tree use = VUSE_OP (vuses, i);
79fe1b3b
DN
2109 if (TREE_CODE (use) == SSA_NAME)
2110 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2111 }
6775f1f3
IR
2112 for (i = 0; i < nv_may_defs; i++)
2113 {
2114 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2115 if (TREE_CODE (def) == SSA_NAME)
2116 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2117 }
2118 for (i = 0; i < nv_must_defs; i++)
2119 {
52328bf6 2120 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
6775f1f3
IR
2121 if (TREE_CODE (def) == SSA_NAME)
2122 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2123 }
79fe1b3b 2124
79fe1b3b 2125
7ccf35ed
DN
2126 /** (3) Calculate the initial address the vector-pointer, and set
2127 the vector-pointer to point to it before the loop: **/
79fe1b3b 2128
7ccf35ed
DN
2129 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2130 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2131 offset);
2132 pe = loop_preheader_edge (loop);
2133 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2134 gcc_assert (!new_bb);
2135 *initial_address = new_temp;
2136
2137 /* Create: p = (vectype *) initial_base */
6775f1f3 2138 vec_stmt = fold_convert (vect_ptr_type, new_temp);
79fe1b3b
DN
2139 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2140 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2141 TREE_OPERAND (vec_stmt, 0) = new_temp;
7ccf35ed
DN
2142 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2143 gcc_assert (!new_bb);
2144 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2145
2146
2147 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2148
2149 if (only_init) /* No update in loop is required. */
2150 return vect_ptr_init;
79fe1b3b 2151
6775f1f3 2152 idx = vect_create_index_for_vector_ref (loop, bsi);
79fe1b3b 2153
7ccf35ed 2154 /* Create: update = idx * vectype_size */
e088c552
DN
2155 tmp = create_tmp_var (integer_type_node, "update");
2156 add_referenced_tmp_var (tmp);
2157 size = TYPE_SIZE (vect_ptr_type);
2158 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2159 ptr_update = create_tmp_var (type, "update");
7ccf35ed
DN
2160 add_referenced_tmp_var (ptr_update);
2161 vectype_size = build_int_cst (integer_type_node,
2162 GET_MODE_SIZE (TYPE_MODE (vectype)));
2163 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
e088c552
DN
2164 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2165 new_temp = make_ssa_name (tmp, vec_stmt);
2166 TREE_OPERAND (vec_stmt, 0) = new_temp;
2167 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2168 vec_stmt = fold_convert (type, new_temp);
7ccf35ed
DN
2169 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2170 new_temp = make_ssa_name (ptr_update, vec_stmt);
2171 TREE_OPERAND (vec_stmt, 0) = new_temp;
2172 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
79fe1b3b 2173
7ccf35ed
DN
2174 /* Create: data_ref_ptr = vect_ptr_init + update */
2175 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2176 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2177 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2178 TREE_OPERAND (vec_stmt, 0) = new_temp;
2179 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2180 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2181
2182 return data_ref_ptr;
79fe1b3b
DN
2183}
2184
2185
2186/* Function vect_create_destination_var.
2187
2188 Create a new temporary of type VECTYPE. */
2189
2190static tree
2191vect_create_destination_var (tree scalar_dest, tree vectype)
2192{
2193 tree vec_dest;
2194 const char *new_name;
2195
1e128c5f 2196 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
79fe1b3b
DN
2197
2198 new_name = get_name (scalar_dest);
2199 if (!new_name)
2200 new_name = "var_";
2201 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2202 add_referenced_tmp_var (vec_dest);
2203
2204 return vec_dest;
2205}
2206
2207
2208/* Function vect_init_vector.
2209
2210 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2211 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2212 used in the vectorization of STMT. */
2213
2214static tree
2215vect_init_vector (tree stmt, tree vector_var)
2216{
2217 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2218 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2219 tree new_var;
2220 tree init_stmt;
2221 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2222 tree vec_oprnd;
2223 edge pe;
2224 tree new_temp;
6775f1f3 2225 basic_block new_bb;
79fe1b3b
DN
2226
2227 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2228 add_referenced_tmp_var (new_var);
2229
2230 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2231 new_temp = make_ssa_name (new_var, init_stmt);
2232 TREE_OPERAND (init_stmt, 0) = new_temp;
2233
2234 pe = loop_preheader_edge (loop);
6775f1f3
IR
2235 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2236 gcc_assert (!new_bb);
79fe1b3b
DN
2237
2238 if (vect_debug_details (NULL))
2239 {
2240 fprintf (dump_file, "created new init_stmt: ");
2241 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2242 }
2243
2244 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2245 return vec_oprnd;
2246}
2247
2248
2249/* Function vect_get_vec_def_for_operand.
2250
2251 OP is an operand in STMT. This function returns a (vector) def that will be
2252 used in the vectorized stmt for STMT.
2253
2254 In the case that OP is an SSA_NAME which is defined in the loop, then
2255 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2256
2257 In case OP is an invariant or constant, a new stmt that creates a vector def
2258 needs to be introduced. */
2259
2260static tree
2261vect_get_vec_def_for_operand (tree op, tree stmt)
2262{
2263 tree vec_oprnd;
2264 tree vec_stmt;
2265 tree def_stmt;
2266 stmt_vec_info def_stmt_info = NULL;
2267 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2268 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2269 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2270 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2271 basic_block bb;
2272 tree vec_inv;
2273 tree t = NULL_TREE;
2274 tree def;
2275 int i;
2276
2277 if (vect_debug_details (NULL))
2278 {
2279 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2280 print_generic_expr (dump_file, op, TDF_SLIM);
2281 }
2282
2283 /** ===> Case 1: operand is a constant. **/
2284
2285 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2286 {
2287 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2288
2289 tree vec_cst;
79fe1b3b
DN
2290
2291 /* Build a tree with vector elements. */
2292 if (vect_debug_details (NULL))
2293 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2294
2295 for (i = nunits - 1; i >= 0; --i)
2296 {
2297 t = tree_cons (NULL_TREE, op, t);
2298 }
2299 vec_cst = build_vector (vectype, t);
2300 return vect_init_vector (stmt, vec_cst);
2301 }
2302
1e128c5f 2303 gcc_assert (TREE_CODE (op) == SSA_NAME);
79fe1b3b
DN
2304
2305 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2306
2307 def_stmt = SSA_NAME_DEF_STMT (op);
2308 def_stmt_info = vinfo_for_stmt (def_stmt);
2309
2310 if (vect_debug_details (NULL))
2311 {
2312 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2313 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2314 }
2315
2316
2317 /** ==> Case 2.1: operand is defined inside the loop. **/
2318
2319 if (def_stmt_info)
2320 {
2321 /* Get the def from the vectorized stmt. */
2322
2323 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1e128c5f 2324 gcc_assert (vec_stmt);
79fe1b3b
DN
2325 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2326 return vec_oprnd;
2327 }
2328
2329
2330 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2331 it is a reduction/induction. **/
2332
2333 bb = bb_for_stmt (def_stmt);
2334 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2335 {
2336 if (vect_debug_details (NULL))
2337 fprintf (dump_file, "reduction/induction - unsupported.");
1e128c5f 2338 internal_error ("no support for reduction/induction"); /* FORNOW */
79fe1b3b
DN
2339 }
2340
2341
2342 /** ==> Case 2.3: operand is defined outside the loop -
2343 it is a loop invariant. */
2344
2345 switch (TREE_CODE (def_stmt))
2346 {
2347 case PHI_NODE:
2348 def = PHI_RESULT (def_stmt);
2349 break;
2350 case MODIFY_EXPR:
2351 def = TREE_OPERAND (def_stmt, 0);
2352 break;
2353 case NOP_EXPR:
2354 def = TREE_OPERAND (def_stmt, 0);
1e128c5f 2355 gcc_assert (IS_EMPTY_STMT (def_stmt));
79fe1b3b
DN
2356 def = op;
2357 break;
2358 default:
2359 if (vect_debug_details (NULL))
2360 {
2361 fprintf (dump_file, "unsupported defining stmt: ");
2362 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2363 }
1e128c5f 2364 internal_error ("unsupported defining stmt");
79fe1b3b
DN
2365 }
2366
2367 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2368
2369 if (vect_debug_details (NULL))
2370 fprintf (dump_file, "Create vector_inv.");
2371
2372 for (i = nunits - 1; i >= 0; --i)
2373 {
2374 t = tree_cons (NULL_TREE, def, t);
2375 }
2376
2377 vec_inv = build_constructor (vectype, t);
2378 return vect_init_vector (stmt, vec_inv);
2379}
2380
2381
2382/* Function vect_finish_stmt_generation.
2383
2384 Insert a new stmt. */
2385
2386static void
2387vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2388{
2389 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2390
2391 if (vect_debug_details (NULL))
2392 {
2393 fprintf (dump_file, "add new stmt: ");
2394 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2395 }
2396
2397 /* Make sure bsi points to the stmt that is being vectorized. */
2398
7ccf35ed 2399 /* Assumption: any stmts created for the vectorization of stmt S were
63dfe6ff
DN
2400 inserted before S. BSI is expected to point to S or some new stmt before S.
2401 */
79fe1b3b
DN
2402
2403 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2404 bsi_next (bsi);
1e128c5f 2405 gcc_assert (stmt == bsi_stmt (*bsi));
79fe1b3b
DN
2406}
2407
2408
2409/* Function vectorizable_assignment.
2410
2411 Check if STMT performs an assignment (copy) that can be vectorized.
2412 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2413 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2414 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2415
2416static bool
2417vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2418{
2419 tree vec_dest;
2420 tree scalar_dest;
2421 tree op;
2422 tree vec_oprnd;
2423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2424 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2425 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2426 tree new_temp;
2427
2428 /* Is vectorizable assignment? */
2429
2430 if (TREE_CODE (stmt) != MODIFY_EXPR)
2431 return false;
2432
2433 scalar_dest = TREE_OPERAND (stmt, 0);
2434 if (TREE_CODE (scalar_dest) != SSA_NAME)
2435 return false;
2436
2437 op = TREE_OPERAND (stmt, 1);
2438 if (!vect_is_simple_use (op, loop, NULL))
2439 {
2440 if (vect_debug_details (NULL))
2441 fprintf (dump_file, "use not simple.");
2442 return false;
2443 }
2444
2445 if (!vec_stmt) /* transformation not required. */
2446 {
2447 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2448 return true;
2449 }
2450
2451 /** Trasform. **/
2452 if (vect_debug_details (NULL))
2453 fprintf (dump_file, "transform assignment.");
2454
2455 /* Handle def. */
2456 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2457
2458 /* Handle use. */
2459 op = TREE_OPERAND (stmt, 1);
2460 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2461
2462 /* Arguments are ready. create the new vector stmt. */
2463 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2464 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2465 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2466 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2467
2468 return true;
2469}
2470
2471
2472/* Function vectorizable_operation.
2473
2474 Check if STMT performs a binary or unary operation that can be vectorized.
2475 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2476 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2477 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2478
2479static bool
2480vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2481{
2482 tree vec_dest;
2483 tree scalar_dest;
2484 tree operation;
2485 tree op0, op1 = NULL;
2486 tree vec_oprnd0, vec_oprnd1=NULL;
2487 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2488 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2489 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2490 int i;
2491 enum tree_code code;
2492 enum machine_mode vec_mode;
2493 tree new_temp;
2494 int op_type;
2495 tree op;
2496 optab optab;
2497
2498 /* Is STMT a vectorizable binary/unary operation? */
2499 if (TREE_CODE (stmt) != MODIFY_EXPR)
2500 return false;
2501
2502 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2503 return false;
2504
2505 operation = TREE_OPERAND (stmt, 1);
2506 code = TREE_CODE (operation);
2507 optab = optab_for_tree_code (code, vectype);
2508
2509 /* Support only unary or binary operations. */
2510 op_type = TREE_CODE_LENGTH (code);
2511 if (op_type != unary_op && op_type != binary_op)
2512 {
2513 if (vect_debug_details (NULL))
2514 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2515 return false;
2516 }
2517
2518 for (i = 0; i < op_type; i++)
2519 {
2520 op = TREE_OPERAND (operation, i);
2521 if (!vect_is_simple_use (op, loop, NULL))
2522 {
2523 if (vect_debug_details (NULL))
2524 fprintf (dump_file, "use not simple.");
2525 return false;
2526 }
2527 }
2528
2529 /* Supportable by target? */
2530 if (!optab)
2531 {
2532 if (vect_debug_details (NULL))
2533 fprintf (dump_file, "no optab.");
2534 return false;
2535 }
2536 vec_mode = TYPE_MODE (vectype);
2537 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2538 {
2539 if (vect_debug_details (NULL))
2540 fprintf (dump_file, "op not supported by target.");
2541 return false;
2542 }
2543
2544 if (!vec_stmt) /* transformation not required. */
2545 {
2546 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2547 return true;
2548 }
2549
02ca1718 2550 /** Transform. **/
79fe1b3b
DN
2551
2552 if (vect_debug_details (NULL))
2553 fprintf (dump_file, "transform binary/unary operation.");
2554
2555 /* Handle def. */
2556 scalar_dest = TREE_OPERAND (stmt, 0);
2557 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2558
2559 /* Handle uses. */
2560 op0 = TREE_OPERAND (operation, 0);
2561 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2562
2563 if (op_type == binary_op)
2564 {
2565 op1 = TREE_OPERAND (operation, 1);
2566 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2567 }
2568
2569 /* Arguments are ready. create the new vector stmt. */
2570
2571 if (op_type == binary_op)
2572 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2573 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2574 else
2575 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2576 build1 (code, vectype, vec_oprnd0));
2577 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2578 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2579 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2580
2581 return true;
2582}
2583
2584
2585/* Function vectorizable_store.
2586
2587 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2588 can be vectorized.
2589 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2590 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2591 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2592
2593static bool
2594vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2595{
2596 tree scalar_dest;
2597 tree data_ref;
2598 tree op;
2599 tree vec_oprnd1;
2600 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
0dc0a70b 2601 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
79fe1b3b
DN
2602 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2603 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2604 enum machine_mode vec_mode;
7ccf35ed 2605 tree dummy;
0dc0a70b 2606 enum dr_alignment_support alignment_support_cheme;
79fe1b3b
DN
2607
2608 /* Is vectorizable store? */
2609
2610 if (TREE_CODE (stmt) != MODIFY_EXPR)
2611 return false;
2612
2613 scalar_dest = TREE_OPERAND (stmt, 0);
2614 if (TREE_CODE (scalar_dest) != ARRAY_REF
2615 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2616 return false;
2617
2618 op = TREE_OPERAND (stmt, 1);
2619 if (!vect_is_simple_use (op, loop, NULL))
2620 {
2621 if (vect_debug_details (NULL))
2622 fprintf (dump_file, "use not simple.");
2623 return false;
2624 }
2625
2626 vec_mode = TYPE_MODE (vectype);
2627 /* FORNOW. In some cases can vectorize even if data-type not supported
2628 (e.g. - array initialization with 0). */
2629 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2630 return false;
2631
2632 if (!STMT_VINFO_DATA_REF (stmt_info))
2633 return false;
2634
7ccf35ed 2635
79fe1b3b
DN
2636 if (!vec_stmt) /* transformation not required. */
2637 {
2638 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2639 return true;
2640 }
2641
2642 /** Trasform. **/
2643
2644 if (vect_debug_details (NULL))
2645 fprintf (dump_file, "transform store");
2646
0dc0a70b
DN
2647 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2648 gcc_assert (alignment_support_cheme);
2649 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2650
79fe1b3b
DN
2651 /* Handle use - get the vectorized def from the defining stmt. */
2652 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2653
2654 /* Handle def. */
7ccf35ed
DN
2655 /* FORNOW: make sure the data reference is aligned. */
2656 vect_align_data_ref (stmt);
2657 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2658 data_ref = build_fold_indirect_ref (data_ref);
79fe1b3b
DN
2659
2660 /* Arguments are ready. create the new vector stmt. */
2661 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2662 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2663
2664 return true;
2665}
2666
2667
2668/* vectorizable_load.
2669
2670 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2671 can be vectorized.
2672 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2673 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2674 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2675
2676static bool
2677vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2678{
2679 tree scalar_dest;
2680 tree vec_dest = NULL;
2681 tree data_ref = NULL;
2682 tree op;
2683 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7ccf35ed 2684 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
79fe1b3b
DN
2685 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2686 tree new_temp;
7ccf35ed
DN
2687 int mode;
2688 tree init_addr;
2689 tree new_stmt;
2690 tree dummy;
2691 basic_block new_bb;
2692 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2693 edge pe = loop_preheader_edge (loop);
0dc0a70b 2694 enum dr_alignment_support alignment_support_cheme;
79fe1b3b
DN
2695
2696 /* Is vectorizable load? */
2697
2698 if (TREE_CODE (stmt) != MODIFY_EXPR)
2699 return false;
2700
2701 scalar_dest = TREE_OPERAND (stmt, 0);
2702 if (TREE_CODE (scalar_dest) != SSA_NAME)
2703 return false;
2704
2705 op = TREE_OPERAND (stmt, 1);
2706 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2707 return false;
2708
2709 if (!STMT_VINFO_DATA_REF (stmt_info))
2710 return false;
2711
7ccf35ed
DN
2712 mode = (int) TYPE_MODE (vectype);
2713
79fe1b3b 2714 /* FORNOW. In some cases can vectorize even if data-type not supported
7ccf35ed
DN
2715 (e.g. - data copies). */
2716 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2717 {
2718 if (vect_debug_details (loop))
2719 fprintf (dump_file, "Aligned load, but unsupported type.");
2720 return false;
2721 }
2722
79fe1b3b
DN
2723 if (!vec_stmt) /* transformation not required. */
2724 {
2725 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2726 return true;
2727 }
2728
2729 /** Trasform. **/
2730
2731 if (vect_debug_details (NULL))
2732 fprintf (dump_file, "transform load.");
2733
0dc0a70b
DN
2734 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2735 gcc_assert (alignment_support_cheme);
2736
2737 if (alignment_support_cheme == dr_aligned
2738 || alignment_support_cheme == dr_unaligned_supported)
7ccf35ed
DN
2739 {
2740 /* Create:
2741 p = initial_addr;
2742 indx = 0;
2743 loop {
2744 vec_dest = *(p);
2745 indx = indx + 1;
2746 }
2747 */
2748
2749 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2750 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2751 if (aligned_access_p (dr))
2752 data_ref = build_fold_indirect_ref (data_ref);
2753 else
2754 {
2755 int mis = DR_MISALIGNMENT (dr);
2756 tree tmis = (mis == -1 ?
2757 integer_zero_node :
2758 build_int_cst (integer_type_node, mis));
2759 tmis = int_const_binop (MULT_EXPR, tmis,
2760 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2761 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2762 }
2763 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2764 new_temp = make_ssa_name (vec_dest, new_stmt);
2765 TREE_OPERAND (new_stmt, 0) = new_temp;
2766 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2767 }
0dc0a70b 2768 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
7ccf35ed
DN
2769 {
2770 /* Create:
2771 p1 = initial_addr;
2772 msq_init = *(floor(p1))
2773 p2 = initial_addr + VS - 1;
2774 magic = have_builtin ? builtin_result : initial_address;
2775 indx = 0;
2776 loop {
2777 p2' = p2 + indx * vectype_size
2778 lsq = *(floor(p2'))
2779 vec_dest = realign_load (msq, lsq, magic)
2780 indx = indx + 1;
2781 msq = lsq;
2782 }
2783 */
2784
2785 tree offset;
2786 tree magic;
2787 tree phi_stmt;
2788 tree msq_init;
2789 tree msq, lsq;
2790 tree dataref_ptr;
2791 tree params;
2792
2793 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2794 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2795 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2796 &init_addr, true);
2797 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2798 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2799 new_temp = make_ssa_name (vec_dest, new_stmt);
2800 TREE_OPERAND (new_stmt, 0) = new_temp;
2801 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2802 gcc_assert (!new_bb);
2803 msq_init = TREE_OPERAND (new_stmt, 0);
2804
2805
2806 /* <2> Create lsq = *(floor(p2')) in the loop */
2807 offset = build_int_cst (integer_type_node,
2808 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2809 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2810 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2811 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2812 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2813 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2814 new_temp = make_ssa_name (vec_dest, new_stmt);
2815 TREE_OPERAND (new_stmt, 0) = new_temp;
2816 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2817 lsq = TREE_OPERAND (new_stmt, 0);
2818
2819
2820 /* <3> */
2821 if (targetm.vectorize.builtin_mask_for_load)
2822 {
2823 /* Create permutation mask, if required, in loop preheader. */
2824 tree builtin_decl;
2825 params = build_tree_list (NULL_TREE, init_addr);
2826 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2827 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2828 new_stmt = build_function_call_expr (builtin_decl, params);
2829 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2830 new_temp = make_ssa_name (vec_dest, new_stmt);
2831 TREE_OPERAND (new_stmt, 0) = new_temp;
2832 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2833 gcc_assert (!new_bb);
2834 magic = TREE_OPERAND (new_stmt, 0);
7d5f9cc6
DN
2835
2836 /* Since we have just created a CALL_EXPR, we may need to
2837 rename call-clobbered variables. */
2838 mark_call_clobbered_vars_to_rename ();
7ccf35ed
DN
2839 }
2840 else
2841 {
a023975e
OG
2842 /* Use current address instead of init_addr for reduced reg pressure.
2843 */
7ccf35ed
DN
2844 magic = dataref_ptr;
2845 }
79fe1b3b 2846
79fe1b3b 2847
7ccf35ed
DN
2848 /* <4> Create msq = phi <msq_init, lsq> in loop */
2849 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2850 msq = make_ssa_name (vec_dest, NULL_TREE);
2851 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2852 SSA_NAME_DEF_STMT (msq) = phi_stmt;
d2e398df
KH
2853 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2854 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
7ccf35ed 2855
79fe1b3b 2856
a023975e
OG
2857 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2858 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2859 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2860 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2861 new_temp = make_ssa_name (vec_dest, new_stmt);
2862 TREE_OPERAND (new_stmt, 0) = new_temp;
2863 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2864 }
0dc0a70b
DN
2865 else
2866 gcc_unreachable ();
a023975e
OG
2867
2868 *vec_stmt = new_stmt;
2869 return true;
2870}
2871
2872
0dc0a70b
DN
2873/* Function vect_supportable_dr_alignment
2874
2875 Return whether the data reference DR is supported with respect to its
2876 alignment. */
2877
2878static enum dr_alignment_support
2879vect_supportable_dr_alignment (struct data_reference *dr)
2880{
2881 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2882 enum machine_mode mode = (int) TYPE_MODE (vectype);
2883
2884 if (aligned_access_p (dr))
2885 return dr_aligned;
2886
2887 /* Possibly unaligned access. */
2888
2889 if (DR_IS_READ (dr))
2890 {
2891 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2892 && (!targetm.vectorize.builtin_mask_for_load
2893 || targetm.vectorize.builtin_mask_for_load ()))
2894 return dr_unaligned_software_pipeline;
2895
1e0598e2
RH
2896 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2897 /* Can't software pipeline the loads, but can at least do them. */
0dc0a70b
DN
2898 return dr_unaligned_supported;
2899 }
2900
2901 /* Unsupported. */
2902 return dr_unaligned_unsupported;
2903}
2904
2905
a023975e
OG
2906/* Function vect_transform_stmt.
2907
2908 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2909
2910static bool
2911vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2912{
2913 bool is_store = false;
2914 tree vec_stmt = NULL_TREE;
2915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2916 bool done;
2917
2918 switch (STMT_VINFO_TYPE (stmt_info))
2919 {
2920 case op_vec_info_type:
2921 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2922 gcc_assert (done);
2923 break;
2924
2925 case assignment_vec_info_type:
2926 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2927 gcc_assert (done);
2928 break;
2929
2930 case load_vec_info_type:
2931 done = vectorizable_load (stmt, bsi, &vec_stmt);
2932 gcc_assert (done);
2933 break;
2934
2935 case store_vec_info_type:
2936 done = vectorizable_store (stmt, bsi, &vec_stmt);
2937 gcc_assert (done);
2938 is_store = true;
2939 break;
2940 default:
2941 if (vect_debug_details (NULL))
2942 fprintf (dump_file, "stmt not supported.");
2943 gcc_unreachable ();
2944 }
2945
2946 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2947
2948 return is_store;
2949}
2950
2951
2952/* This function builds ni_name = number of iterations loop executes
2953 on the loop preheader. */
2954
2955static tree
2956vect_build_loop_niters (loop_vec_info loop_vinfo)
2957{
2958 tree ni_name, stmt, var;
2959 edge pe;
a023975e 2960 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
618bb89c 2961 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
a023975e
OG
2962
2963 var = create_tmp_var (TREE_TYPE (ni), "niters");
2964 add_referenced_tmp_var (var);
618bb89c 2965 ni_name = force_gimple_operand (ni, &stmt, false, var);
a023975e
OG
2966
2967 pe = loop_preheader_edge (loop);
4951c3fd 2968 if (stmt)
e9c00ceb
DN
2969 {
2970 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2971 gcc_assert (!new_bb);
2972 }
a023975e
OG
2973
2974 return ni_name;
2975}
2976
2977
2978/* This function generates the following statements:
2979
2980 ni_name = number of iterations loop executes
2981 ratio = ni_name / vf
2982 ratio_mult_vf_name = ratio * vf
2983
2984 and places them at the loop preheader edge. */
2985
2986static void
e9c00ceb
DN
2987vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2988 tree *ni_name_ptr,
2989 tree *ratio_mult_vf_name_ptr,
2990 tree *ratio_name_ptr)
a023975e
OG
2991{
2992
2993 edge pe;
2994 basic_block new_bb;
2995 tree stmt, ni_name;
e9c00ceb
DN
2996 tree var;
2997 tree ratio_name;
2998 tree ratio_mult_vf_name;
a023975e 2999 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
e9c00ceb
DN
3000 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3001 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3002 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3003
3004 pe = loop_preheader_edge (loop);
a023975e
OG
3005
3006 /* Generate temporary variable that contains
3007 number of iterations loop executes. */
3008
3009 ni_name = vect_build_loop_niters (loop_vinfo);
3010
e9c00ceb 3011 /* Create: ratio = ni >> log2(vf) */
a023975e 3012
e9c00ceb
DN
3013 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3014 add_referenced_tmp_var (var);
3015 ratio_name = make_ssa_name (var, NULL_TREE);
3016 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3017 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3018 SSA_NAME_DEF_STMT (ratio_name) = stmt;
a023975e 3019
e9c00ceb
DN
3020 pe = loop_preheader_edge (loop);
3021 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3022 gcc_assert (!new_bb);
3023
3024 /* Create: ratio_mult_vf = ratio << log2 (vf). */
a023975e 3025
e9c00ceb
DN
3026 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3027 add_referenced_tmp_var (var);
3028 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
a023975e 3029 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
e9c00ceb 3030 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
a023975e
OG
3031 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3032
3033 pe = loop_preheader_edge (loop);
3034 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
e9c00ceb 3035 gcc_assert (!new_bb);
a023975e 3036
e9c00ceb
DN
3037 *ni_name_ptr = ni_name;
3038 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3039 *ratio_name_ptr = ratio_name;
a023975e
OG
3040
3041 return;
3042}
3043
3044
00803cd5
DN
3045/* Function vect_update_ivs_after_vectorizer.
3046
3047 "Advance" the induction variables of LOOP to the value they should take
3048 after the execution of LOOP. This is currently necessary because the
3049 vectorizer does not handle induction variables that are used after the
3050 loop. Such a situation occurs when the last iterations of LOOP are
3051 peeled, because:
3052 1. We introduced new uses after LOOP for IVs that were not originally used
3053 after LOOP: the IVs of LOOP are now used by an epilog loop.
3054 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3055 times, whereas the loop IVs should be bumped N times.
3056
3057 Input:
3058 - LOOP - a loop that is going to be vectorized. The last few iterations
3059 of LOOP were peeled.
3060 - NITERS - the number of iterations that LOOP executes (before it is
3061 vectorized). i.e, the number of times the ivs should be bumped.
63dfe6ff
DN
3062 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3063 coming out from LOOP on which there are uses of the LOOP ivs
3064 (this is the path from LOOP->exit to epilog_loop->preheader).
00803cd5 3065
63dfe6ff
DN
3066 The new definitions of the ivs are placed in LOOP->exit.
3067 The phi args associated with the edge UPDATE_E in the bb
3068 UPDATE_E->dest are updated accordingly.
00803cd5
DN
3069
3070 Assumption 1: Like the rest of the vectorizer, this function assumes
3071 a single loop exit that has a single predecessor.
3072
3073 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3074 organized in the same order.
3075
3076 Assumption 3: The access function of the ivs is simple enough (see
3077 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
63dfe6ff
DN
3078
3079 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3080 coming out of LOOP on which the ivs of LOOP are used (this is the path
3081 that leads to the epilog loop; other paths skip the epilog loop). This
3082 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3083 needs to have its phis updated.
00803cd5 3084 */
a023975e
OG
3085
3086static void
63dfe6ff 3087vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
a023975e 3088{
63dfe6ff 3089 basic_block exit_bb = loop->exit_edges[0]->dest;
00803cd5 3090 tree phi, phi1;
63dfe6ff 3091 basic_block update_bb = update_e->dest;
a023975e 3092
63dfe6ff
DN
3093 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
3094
3095 /* Make sure there exists a single-predecessor exit bb: */
3096 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
00803cd5 3097
00803cd5
DN
3098 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3099 phi && phi1;
3100 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
a023975e
OG
3101 {
3102 tree access_fn = NULL;
3103 tree evolution_part;
3104 tree init_expr;
3105 tree step_expr;
3106 tree var, stmt, ni, ni_name;
a023975e
OG
3107 block_stmt_iterator last_bsi;
3108
63dfe6ff 3109 /* Skip virtual phi's. */
a023975e
OG
3110 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3111 {
3112 if (vect_debug_details (NULL))
3113 fprintf (dump_file, "virtual phi. skip.");
3114 continue;
3115 }
3116
3117 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
00803cd5
DN
3118 gcc_assert (access_fn);
3119 evolution_part =
3120 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
63dfe6ff 3121 gcc_assert (evolution_part != NULL_TREE);
a023975e 3122
63dfe6ff
DN
3123 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3124 of degree >= 2 or exponential. */
00803cd5 3125 gcc_assert (!tree_is_chrec (evolution_part));
a023975e
OG
3126
3127 step_expr = evolution_part;
00803cd5 3128 init_expr = unshare_expr (initial_condition (access_fn));
a023975e
OG
3129
3130 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3131 build2 (MULT_EXPR, TREE_TYPE (niters),
3132 niters, step_expr), init_expr);
3133
3134 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3135 add_referenced_tmp_var (var);
3136
3137 ni_name = force_gimple_operand (ni, &stmt, false, var);
3138
63dfe6ff
DN
3139 /* Insert stmt into exit_bb. */
3140 last_bsi = bsi_last (exit_bb);
00803cd5 3141 if (stmt)
63dfe6ff 3142 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
a023975e 3143
63dfe6ff 3144 /* Fix phi expressions in the successor bb. */
00803cd5
DN
3145 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3146 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3147 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
a023975e 3148 }
a023975e
OG
3149}
3150
3151
63dfe6ff
DN
3152/* Function vect_do_peeling_for_loop_bound
3153
3154 Peel the last iterations of the loop represented by LOOP_VINFO.
3155 The peeled iterations form a new epilog loop. Given that the loop now
3156 iterates NITERS times, the new epilog loop iterates
3157 NITERS % VECTORIZATION_FACTOR times.
3158
3159 The original loop will later be made to iterate
3160 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
a023975e
OG
3161
3162static void
63dfe6ff 3163vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
335d3d54 3164 struct loops *loops)
a023975e
OG
3165{
3166
3167 tree ni_name, ratio_mult_vf_name;
63dfe6ff
DN
3168 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3169 struct loop *new_loop;
3170 edge update_e;
a023975e
OG
3171#ifdef ENABLE_CHECKING
3172 int loop_num;
3173#endif
a023975e
OG
3174
3175 if (vect_debug_details (NULL))
3176 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3177
3178 /* Generate the following variables on the preheader of original loop:
3179
3180 ni_name = number of iteration the original loop executes
3181 ratio = ni_name / vf
3182 ratio_mult_vf_name = ratio * vf */
3183 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3184 &ratio_mult_vf_name, ratio);
3185
3186 /* Update loop info. */
3187 loop->pre_header = loop_preheader_edge (loop)->src;
3188 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3189
3190#ifdef ENABLE_CHECKING
3191 loop_num = loop->num;
3192#endif
f88a8cfa 3193 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
63dfe6ff 3194 ratio_mult_vf_name, ni_name, false);
a023975e
OG
3195#ifdef ENABLE_CHECKING
3196 gcc_assert (new_loop);
3197 gcc_assert (loop_num == loop->num);
63dfe6ff 3198 slpeel_verify_cfg_after_peeling (loop, new_loop);
a023975e
OG
3199#endif
3200
63dfe6ff
DN
3201 /* A guard that controls whether the new_loop is to be executed or skipped
3202 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3203 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3204 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3205 is on the path where the LOOP IVs are used and need to be updated. */
3206
3207 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3208 update_e = EDGE_PRED (new_loop->pre_header, 0);
3209 else
3210 update_e = EDGE_PRED (new_loop->pre_header, 1);
3211
a023975e
OG
3212 /* Update IVs of original loop as if they were advanced
3213 by ratio_mult_vf_name steps. */
63dfe6ff 3214 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
a023975e 3215
63dfe6ff
DN
3216 /* After peeling we have to reset scalar evolution analyzer. */
3217 scev_reset ();
a023975e
OG
3218
3219 return;
79fe1b3b
DN
3220}
3221
3222
a023975e 3223/* Function vect_gen_niters_for_prolog_loop
79fe1b3b 3224
a023975e 3225 Set the number of iterations for the loop represented by LOOP_VINFO
618bb89c 3226 to the minimum between LOOP_NITERS (the original iteration count of the loop)
0dc0a70b
DN
3227 and the misalignment of DR - the first data reference recorded in
3228 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
618bb89c
DN
3229 this loop, the data reference DR will refer to an aligned location.
3230
3231 The following computation is generated:
3232
3233 compute address misalignment in bytes:
3234 addr_mis = addr & (vectype_size - 1)
3235
3236 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3237
3238 (elem_size = element type size; an element is the scalar element
3239 whose type is the inner type of the vectype) */
79fe1b3b 3240
a023975e 3241static tree
618bb89c 3242vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
79fe1b3b 3243{
0dc0a70b 3244 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
a023975e
OG
3245 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3246 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3247 tree var, stmt;
3248 tree iters, iters_name;
3249 edge pe;
3250 basic_block new_bb;
3251 tree dr_stmt = DR_STMT (dr);
3252 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
618bb89c
DN
3253 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3254 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3255 tree elem_misalign;
3256 tree byte_misalign;
3257 tree new_stmts = NULL_TREE;
3258 tree start_addr =
3259 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3260 tree ptr_type = TREE_TYPE (start_addr);
3261 tree size = TYPE_SIZE (ptr_type);
37ea4e67 3262 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
618bb89c
DN
3263 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3264 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3265 tree niters_type = TREE_TYPE (loop_niters);
3266 tree elem_size_log =
3267 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3268 tree vf_tree = build_int_cst (unsigned_type_node, vf);
a023975e
OG
3269
3270 pe = loop_preheader_edge (loop);
618bb89c 3271 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
e9c00ceb 3272 gcc_assert (!new_bb);
a023975e 3273
618bb89c
DN
3274 /* Create: byte_misalign = addr & (vectype_size - 1) */
3275 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3276
3277 /* Create: elem_misalign = byte_misalign / element_size */
3278 elem_misalign =
3279 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
a023975e 3280
618bb89c
DN
3281 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3282 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3283 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3284 iters = fold_convert (niters_type, iters);
e9c00ceb 3285
618bb89c 3286 /* Create: prolog_loop_niters = min (iters, loop_niters) */
e9c00ceb
DN
3287 /* If the loop bound is known at compile time we already verified that it is
3288 greater than vf; since the misalignment ('iters') is at most vf, there's
3289 no need to generate the MIN_EXPR in this case. */
3290 if (!host_integerp (loop_niters, 0))
3291 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3292
618bb89c 3293 var = create_tmp_var (niters_type, "prolog_loop_niters");
a023975e
OG
3294 add_referenced_tmp_var (var);
3295 iters_name = force_gimple_operand (iters, &stmt, false, var);
3296
3297 /* Insert stmt on loop preheader edge. */
3298 pe = loop_preheader_edge (loop);
4951c3fd 3299 if (stmt)
e9c00ceb
DN
3300 {
3301 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3302 gcc_assert (!new_bb);
3303 }
79fe1b3b 3304
a023975e
OG
3305 return iters_name;
3306}
79fe1b3b 3307
79fe1b3b 3308
a023975e 3309/* Function vect_update_inits_of_dr
79fe1b3b 3310
a023975e
OG
3311 NITERS iterations were peeled from LOOP. DR represents a data reference
3312 in LOOP. This function updates the information recorded in DR to
3313 account for the fact that the first NITERS iterations had already been
3314 executed. Specifically, it updates the initial_condition of the
3315 access_function of DR. */
79fe1b3b
DN
3316
3317static void
a023975e
OG
3318vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3319 tree niters)
79fe1b3b 3320{
a023975e
OG
3321 tree access_fn = DR_ACCESS_FN (dr, 0);
3322 tree init, init_new, step;
3323
3324 step = evolution_part_in_loop_num (access_fn, loop->num);
3325 init = initial_condition (access_fn);
3326
e9c00ceb
DN
3327 init_new = build2 (PLUS_EXPR, TREE_TYPE (init),
3328 build2 (MULT_EXPR, TREE_TYPE (niters),
a023975e
OG
3329 niters, step), init);
3330 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3331
3332 return;
3333}
79fe1b3b 3334
79fe1b3b 3335
a023975e 3336/* Function vect_update_inits_of_drs
79fe1b3b 3337
a023975e
OG
3338 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3339 This function updates the information recorded for the data references in
3340 the loop to account for the fact that the first NITERS iterations had
3341 already been executed. Specifically, it updates the initial_condition of the
3342 access_function of all the data_references in the loop. */
79fe1b3b 3343
a023975e
OG
3344static void
3345vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3346{
3347 unsigned int i;
3348 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3349 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3350 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
79fe1b3b 3351
a023975e
OG
3352 if (dump_file && (dump_flags & TDF_DETAILS))
3353 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
79fe1b3b 3354
a023975e
OG
3355 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3356 {
3357 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3358 vect_update_inits_of_dr (dr, loop, niters);
3359 }
79fe1b3b 3360
a023975e
OG
3361 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3362 {
3363 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3364 vect_update_inits_of_dr (dr, loop, niters);
a023975e
OG
3365 }
3366}
79fe1b3b 3367
79fe1b3b 3368
a023975e 3369/* Function vect_do_peeling_for_alignment
79fe1b3b 3370
a023975e
OG
3371 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3372 'niters' is set to the misalignment of one of the data references in the
3373 loop, thereby forcing it to refer to an aligned location at the beginning
3374 of the execution of this loop. The data reference for which we are
0dc0a70b 3375 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
a023975e
OG
3376
3377static void
3378vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3379{
3380 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3381 tree niters_of_prolog_loop, ni_name;
335d3d54 3382 tree n_iters;
63dfe6ff 3383 struct loop *new_loop;
79fe1b3b
DN
3384
3385 if (vect_debug_details (NULL))
a023975e
OG
3386 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3387
3388 ni_name = vect_build_loop_niters (loop_vinfo);
3389 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3390
a023975e 3391 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
63dfe6ff
DN
3392 new_loop =
3393 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3394 niters_of_prolog_loop, ni_name, true);
3395#ifdef ENABLE_CHECKING
3396 gcc_assert (new_loop);
3397 slpeel_verify_cfg_after_peeling (new_loop, loop);
3398#endif
a023975e 3399
a023975e 3400 /* Update number of times loop executes. */
335d3d54
DN
3401 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3402 LOOP_VINFO_NITERS (loop_vinfo) =
618bb89c 3403 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
a023975e 3404
63dfe6ff 3405 /* Update the init conditions of the access functions of all data refs. */
a023975e
OG
3406 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3407
3408 /* After peeling we have to reset scalar evolution analyzer. */
3409 scev_reset ();
3410
3411 return;
79fe1b3b
DN
3412}
3413
3414
3415/* Function vect_transform_loop.
3416
3417 The analysis phase has determined that the loop is vectorizable.
3418 Vectorize the loop - created vectorized stmts to replace the scalar
3419 stmts in the loop, and update the loop exit condition. */
3420
3421static void
3422vect_transform_loop (loop_vec_info loop_vinfo,
3423 struct loops *loops ATTRIBUTE_UNUSED)
3424{
3425 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3426 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3427 int nbbs = loop->num_nodes;
3428 block_stmt_iterator si;
3429 int i;
a023975e 3430 tree ratio = NULL;
79fe1b3b 3431 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
79fe1b3b
DN
3432
3433 if (vect_debug_details (NULL))
3434 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3435
a023975e
OG
3436
3437 /* Peel the loop if there are data refs with unknown alignment.
3438 Only one data ref with unknown store is allowed. */
3439
a023975e
OG
3440 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3441 vect_do_peeling_for_alignment (loop_vinfo, loops);
3442
335d3d54
DN
3443 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3444 compile time constant), or it is a constant that doesn't divide by the
3445 vectorization factor, then an epilog loop needs to be created.
3446 We therefore duplicate the loop: the original loop will be vectorized,
3447 and will compute the first (n/VF) iterations. The second copy of the loop
3448 will remain scalar and will compute the remaining (n%VF) iterations.
a023975e
OG
3449 (VF is the vectorization factor). */
3450
335d3d54
DN
3451 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3452 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3453 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3454 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
618bb89c
DN
3455 else
3456 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3457 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
a023975e 3458
79fe1b3b
DN
3459 /* 1) Make sure the loop header has exactly two entries
3460 2) Make sure we have a preheader basic block. */
3461
628f6a4e 3462 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
79fe1b3b
DN
3463
3464 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3465
3466
3467 /* FORNOW: the vectorizer supports only loops which body consist
3468 of one basic block (header + empty latch). When the vectorizer will
3469 support more involved loop forms, the order by which the BBs are
3470 traversed need to be reconsidered. */
3471
3472 for (i = 0; i < nbbs; i++)
3473 {
3474 basic_block bb = bbs[i];
3475
3476 for (si = bsi_start (bb); !bsi_end_p (si);)
3477 {
3478 tree stmt = bsi_stmt (si);
3479 stmt_vec_info stmt_info;
3480 bool is_store;
79fe1b3b
DN
3481
3482 if (vect_debug_details (NULL))
3483 {
3484 fprintf (dump_file, "------>vectorizing statement: ");
3485 print_generic_expr (dump_file, stmt, TDF_SLIM);
3486 }
3487 stmt_info = vinfo_for_stmt (stmt);
1e128c5f 3488 gcc_assert (stmt_info);
79fe1b3b
DN
3489 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3490 {
3491 bsi_next (&si);
3492 continue;
3493 }
3494#ifdef ENABLE_CHECKING
3495 /* FORNOW: Verify that all stmts operate on the same number of
3496 units and no inner unrolling is necessary. */
0dc0a70b
DN
3497 gcc_assert
3498 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3499 == vectorization_factor);
79fe1b3b
DN
3500#endif
3501 /* -------- vectorize statement ------------ */
3502 if (vect_debug_details (NULL))
3503 fprintf (dump_file, "transform statement.");
3504
3505 is_store = vect_transform_stmt (stmt, &si);
3506 if (is_store)
3507 {
3508 /* free the attached stmt_vec_info and remove the stmt. */
3509 stmt_ann_t ann = stmt_ann (stmt);
3510 free (stmt_info);
3511 set_stmt_info (ann, NULL);
3512 bsi_remove (&si);
3513 continue;
3514 }
3515
3516 bsi_next (&si);
3517 } /* stmts in BB */
3518 } /* BBs in loop */
3519
618bb89c 3520 slpeel_make_loop_iterate_ntimes (loop, ratio);
79fe1b3b
DN
3521
3522 if (vect_debug_details (loop))
3523 fprintf (dump_file,"Success! loop vectorized.");
3524 if (vect_debug_stats (loop))
3525 fprintf (dump_file, "LOOP VECTORIZED.");
3526}
3527
3528
3529/* Function vect_is_simple_use.
3530
3531 Input:
3532 LOOP - the loop that is being vectorized.
3533 OPERAND - operand of a stmt in LOOP.
3534 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3535
3536 Returns whether a stmt with OPERAND can be vectorized.
3537 Supportable operands are constants, loop invariants, and operands that are
6cb38cd4 3538 defined by the current iteration of the loop. Unsupportable operands are
79fe1b3b
DN
3539 those that are defined by a previous iteration of the loop (as is the case
3540 in reduction/induction computations). */
3541
3542static bool
3543vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3544{
3545 tree def_stmt;
3546 basic_block bb;
3547
3548 if (def)
3549 *def = NULL_TREE;
3550
3551 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3552 return true;
3553
3554 if (TREE_CODE (operand) != SSA_NAME)
3555 return false;
3556
3557 def_stmt = SSA_NAME_DEF_STMT (operand);
3558 if (def_stmt == NULL_TREE )
3559 {
3560 if (vect_debug_details (NULL))
3561 fprintf (dump_file, "no def_stmt.");
3562 return false;
3563 }
3564
3565 /* empty stmt is expected only in case of a function argument.
3566 (Otherwise - we expect a phi_node or a modify_expr). */
3567 if (IS_EMPTY_STMT (def_stmt))
3568 {
3569 tree arg = TREE_OPERAND (def_stmt, 0);
3570 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3571 return true;
3572 if (vect_debug_details (NULL))
3573 {
3574 fprintf (dump_file, "Unexpected empty stmt: ");
3575 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3576 }
3577 return false;
3578 }
3579
3580 /* phi_node inside the loop indicates an induction/reduction pattern.
3581 This is not supported yet. */
3582 bb = bb_for_stmt (def_stmt);
3583 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3584 {
3585 if (vect_debug_details (NULL))
3586 fprintf (dump_file, "reduction/induction - unsupported.");
3587 return false; /* FORNOW: not supported yet. */
3588 }
3589
3590 /* Expecting a modify_expr or a phi_node. */
3591 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3592 || TREE_CODE (def_stmt) == PHI_NODE)
3593 {
3594 if (def)
3595 *def = def_stmt;
3596 return true;
3597 }
3598
3599 return false;
3600}
3601
3602
3603/* Function vect_analyze_operations.
3604
3605 Scan the loop stmts and make sure they are all vectorizable. */
3606
3607static bool
3608vect_analyze_operations (loop_vec_info loop_vinfo)
3609{
3610 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3612 int nbbs = loop->num_nodes;
3613 block_stmt_iterator si;
e9c00ceb 3614 unsigned int vectorization_factor = 0;
79fe1b3b
DN
3615 int i;
3616 bool ok;
3617 tree scalar_type;
3618
3619 if (vect_debug_details (NULL))
3620 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3621
3622 for (i = 0; i < nbbs; i++)
3623 {
3624 basic_block bb = bbs[i];
3625
3626 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3627 {
3628 tree stmt = bsi_stmt (si);
e9c00ceb 3629 unsigned int nunits;
79fe1b3b
DN
3630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3631 tree vectype;
3632
3633 if (vect_debug_details (NULL))
3634 {
3635 fprintf (dump_file, "==> examining statement: ");
3636 print_generic_expr (dump_file, stmt, TDF_SLIM);
3637 }
1e128c5f
GB
3638
3639 gcc_assert (stmt_info);
3640
79fe1b3b
DN
3641 /* skip stmts which do not need to be vectorized.
3642 this is expected to include:
3643 - the COND_EXPR which is the loop exit condition
3644 - any LABEL_EXPRs in the loop
3645 - computations that are used only for array indexing or loop
3646 control */
3647
3648 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3649 {
3650 if (vect_debug_details (NULL))
3651 fprintf (dump_file, "irrelevant.");
3652 continue;
3653 }
3654
3655 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3656 {
3657 if (vect_debug_stats (loop) || vect_debug_details (loop))
3658 {
3659 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3660 print_generic_expr (dump_file, stmt, TDF_SLIM);
3661 }
3662 return false;
3663 }
3664
3665 if (STMT_VINFO_DATA_REF (stmt_info))
3666 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3667 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3668 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3669 else
3670 scalar_type = TREE_TYPE (stmt);
3671
3672 if (vect_debug_details (NULL))
3673 {
3674 fprintf (dump_file, "get vectype for scalar type: ");
3675 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3676 }
3677
3678 vectype = get_vectype_for_scalar_type (scalar_type);
3679 if (!vectype)
3680 {
3681 if (vect_debug_stats (loop) || vect_debug_details (loop))
3682 {
3683 fprintf (dump_file, "not vectorized: unsupported data-type ");
3684 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3685 }
3686 return false;
3687 }
3688
3689 if (vect_debug_details (NULL))
3690 {
3691 fprintf (dump_file, "vectype: ");
3692 print_generic_expr (dump_file, vectype, TDF_SLIM);
3693 }
3694 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3695
3696 ok = (vectorizable_operation (stmt, NULL, NULL)
3697 || vectorizable_assignment (stmt, NULL, NULL)
3698 || vectorizable_load (stmt, NULL, NULL)
3699 || vectorizable_store (stmt, NULL, NULL));
3700
3701 if (!ok)
3702 {
3703 if (vect_debug_stats (loop) || vect_debug_details (loop))
3704 {
3705 fprintf (dump_file, "not vectorized: stmt not supported: ");
3706 print_generic_expr (dump_file, stmt, TDF_SLIM);
3707 }
3708 return false;
3709 }
3710
3711 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3712 if (vect_debug_details (NULL))
3713 fprintf (dump_file, "nunits = %d", nunits);
3714
3715 if (vectorization_factor)
3716 {
3717 /* FORNOW: don't allow mixed units.
3718 This restriction will be relaxed in the future. */
3719 if (nunits != vectorization_factor)
3720 {
3721 if (vect_debug_stats (loop) || vect_debug_details (loop))
3722 fprintf (dump_file, "not vectorized: mixed data-types");
3723 return false;
3724 }
3725 }
3726 else
3727 vectorization_factor = nunits;
f0923257
DN
3728
3729#ifdef ENABLE_CHECKING
3730 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3731 * vectorization_factor == UNITS_PER_SIMD_WORD);
3732#endif
79fe1b3b
DN
3733 }
3734 }
3735
3736 /* TODO: Analyze cost. Decide if worth while to vectorize. */
f0923257
DN
3737
3738 if (vectorization_factor <= 1)
79fe1b3b
DN
3739 {
3740 if (vect_debug_stats (loop) || vect_debug_details (loop))
3741 fprintf (dump_file, "not vectorized: unsupported data-type");
3742 return false;
3743 }
3744 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3745
d6901754
DN
3746 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3747 fprintf (dump_file,
3748 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3749 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3750
e9c00ceb
DN
3751 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3752 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3753 {
3754 if (vect_debug_stats (loop) || vect_debug_details (loop))
3755 fprintf (dump_file, "not vectorized: iteration count too small.");
3756 return false;
3757 }
3758
d6901754
DN
3759 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3760 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3761 {
3762 if (vect_debug_stats (loop) || vect_debug_details (loop))
3763 fprintf (dump_file, "epilog loop required.");
3764 if (!vect_can_advance_ivs_p (loop))
3765 {
3766 if (vect_debug_stats (loop) || vect_debug_details (loop))
3767 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3768 return false;
3769 }
3770 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3771 {
3772 if (vect_debug_stats (loop) || vect_debug_details (loop))
3773 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3774 return false;
3775 }
79fe1b3b 3776 }
d6901754 3777
79fe1b3b
DN
3778 return true;
3779}
3780
3781
3782/* Function exist_non_indexing_operands_for_use_p
3783
3784 USE is one of the uses attached to STMT. Check if USE is
3785 used in STMT for anything other than indexing an array. */
3786
3787static bool
3788exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3789{
3790 tree operand;
3791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3792
3793 /* USE corresponds to some operand in STMT. If there is no data
3794 reference in STMT, then any operand that corresponds to USE
3795 is not indexing an array. */
3796 if (!STMT_VINFO_DATA_REF (stmt_info))
3797 return true;
3798
3799 /* STMT has a data_ref. FORNOW this means that its of one of
3800 the following forms:
3801 -1- ARRAY_REF = var
3802 -2- var = ARRAY_REF
3803 (This should have been verified in analyze_data_refs).
3804
3805 'var' in the second case corresponds to a def, not a use,
3806 so USE cannot correspond to any operands that are not used
3807 for array indexing.
3808
3809 Therefore, all we need to check is if STMT falls into the
3810 first case, and whether var corresponds to USE. */
3811
3812 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3813 return false;
3814
3815 operand = TREE_OPERAND (stmt, 1);
3816
3817 if (TREE_CODE (operand) != SSA_NAME)
3818 return false;
3819
3820 if (operand == use)
3821 return true;
3822
3823 return false;
3824}
3825
3826
3827/* Function vect_is_simple_iv_evolution.
3828
3829 FORNOW: A simple evolution of an induction variables in the loop is
3830 considered a polynomial evolution with constant step. */
3831
3832static bool
3833vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3834 tree * step, bool strict)
3835{
3836 tree init_expr;
3837 tree step_expr;
3838
3839 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3840
3841 /* When there is no evolution in this loop, the evolution function
3842 is not "simple". */
3843 if (evolution_part == NULL_TREE)
3844 return false;
3845
3846 /* When the evolution is a polynomial of degree >= 2
3847 the evolution function is not "simple". */
3848 if (tree_is_chrec (evolution_part))
3849 return false;
3850
3851 step_expr = evolution_part;
a023975e 3852 init_expr = unshare_expr (initial_condition (access_fn));
79fe1b3b
DN
3853
3854 if (vect_debug_details (NULL))
3855 {
3856 fprintf (dump_file, "step: ");
3857 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3858 fprintf (dump_file, ", init: ");
3859 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3860 }
3861
3862 *init = init_expr;
3863 *step = step_expr;
3864
3865 if (TREE_CODE (step_expr) != INTEGER_CST)
3866 {
3867 if (vect_debug_details (NULL))
3868 fprintf (dump_file, "step unknown.");
3869 return false;
3870 }
3871
3872 if (strict)
3873 if (!integer_onep (step_expr))
3874 {
3875 if (vect_debug_details (NULL))
3876 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3877 return false;
3878 }
3879
3880 return true;
3881}
3882
3883
3884/* Function vect_analyze_scalar_cycles.
3885
3886 Examine the cross iteration def-use cycles of scalar variables, by
3887 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3888 cycles that they represent do not impede vectorization.
3889
3890 FORNOW: Reduction as in the following loop, is not supported yet:
3891 loop1:
3892 for (i=0; i<N; i++)
3893 sum += a[i];
3894 The cross-iteration cycle corresponding to variable 'sum' will be
3895 considered too complicated and will impede vectorization.
3896
3897 FORNOW: Induction as in the following loop, is not supported yet:
3898 loop2:
3899 for (i=0; i<N; i++)
3900 a[i] = i;
3901
3902 However, the following loop *is* vectorizable:
3903 loop3:
3904 for (i=0; i<N; i++)
3905 a[i] = b[i];
3906
3907 In both loops there exists a def-use cycle for the variable i:
3908 loop: i_2 = PHI (i_0, i_1)
3909 a[i_2] = ...;
3910 i_1 = i_2 + 1;
3911 GOTO loop;
3912
3913 The evolution of the above cycle is considered simple enough,
3914 however, we also check that the cycle does not need to be
3915 vectorized, i.e - we check that the variable that this cycle
3916 defines is only used for array indexing or in stmts that do not
3917 need to be vectorized. This is not the case in loop2, but it
3918 *is* the case in loop3. */
3919
3920static bool
3921vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3922{
3923 tree phi;
3924 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3925 basic_block bb = loop->header;
3926 tree dummy;
3927
3928 if (vect_debug_details (NULL))
3929 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3930
bb29d951 3931 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
79fe1b3b
DN
3932 {
3933 tree access_fn = NULL;
3934
3935 if (vect_debug_details (NULL))
3936 {
3937 fprintf (dump_file, "Analyze phi: ");
3938 print_generic_expr (dump_file, phi, TDF_SLIM);
3939 }
3940
3941 /* Skip virtual phi's. The data dependences that are associated with
3942 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3943
3944 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3945 {
3946 if (vect_debug_details (NULL))
3947 fprintf (dump_file, "virtual phi. skip.");
3948 continue;
3949 }
3950
3951 /* Analyze the evolution function. */
3952
3953 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3954 those of loop induction variables; This property is verified here.
3955
3956 Furthermore, if that induction variable is used in an operation
3957 that needs to be vectorized (i.e, is not solely used to index
3958 arrays and check the exit condition) - we do not support its
3959 vectorization yet. This property is verified in vect_is_simple_use,
3960 during vect_analyze_operations. */
3961
6775f1f3
IR
3962 access_fn = /* instantiate_parameters
3963 (loop,*/
3964 analyze_scalar_evolution (loop, PHI_RESULT (phi));
79fe1b3b
DN
3965
3966 if (!access_fn)
3967 {
3968 if (vect_debug_stats (loop) || vect_debug_details (loop))
3969 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3970 return false;
3971 }
3972
3973 if (vect_debug_details (NULL))
3974 {
3975 fprintf (dump_file, "Access function of PHI: ");
3976 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3977 }
3978
3979 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3980 &dummy, false))
3981 {
3982 if (vect_debug_stats (loop) || vect_debug_details (loop))
3983 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3984 return false;
3985 }
3986 }
3987
3988 return true;
3989}
3990
3991
3992/* Function vect_analyze_data_ref_dependence.
3993
3994 Return TRUE if there (might) exist a dependence between a memory-reference
3995 DRA and a memory-reference DRB. */
3996
3997static bool
3998vect_analyze_data_ref_dependence (struct data_reference *dra,
3999 struct data_reference *drb,
4000 struct loop *loop)
4001{
6775f1f3 4002 bool differ_p;
79fe1b3b 4003 struct data_dependence_relation *ddr;
6775f1f3 4004
79fe1b3b
DN
4005 if (!array_base_name_differ_p (dra, drb, &differ_p))
4006 {
6775f1f3 4007 if (vect_debug_stats (loop) || vect_debug_details (loop))
79fe1b3b 4008 {
6775f1f3
IR
4009 fprintf (dump_file,
4010 "not vectorized: can't determine dependence between: ");
79fe1b3b
DN
4011 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4012 fprintf (dump_file, " and ");
4013 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4014 }
4015 return true;
4016 }
4017
4018 if (differ_p)
4019 return false;
4020
4021 ddr = initialize_data_dependence_relation (dra, drb);
4022 compute_affine_dependence (ddr);
4023
4024 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4025 return false;
4026
4027 if (vect_debug_stats (loop) || vect_debug_details (loop))
4028 {
4029 fprintf (dump_file,
4030 "not vectorized: possible dependence between data-refs ");
4031 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4032 fprintf (dump_file, " and ");
4033 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4034 }
4035
4036 return true;
4037}
4038
4039
4040/* Function vect_analyze_data_ref_dependences.
4041
4042 Examine all the data references in the loop, and make sure there do not
4043 exist any data dependences between them.
4044
4045 TODO: dependences which distance is greater than the vectorization factor
471854f8 4046 can be ignored. */
79fe1b3b
DN
4047
4048static bool
4049vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4050{
4051 unsigned int i, j;
4052 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4053 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4054 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4055
4056 /* Examine store-store (output) dependences. */
4057
4058 if (vect_debug_details (NULL))
4059 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4060
4061 if (vect_debug_details (NULL))
4062 fprintf (dump_file, "compare all store-store pairs.");
4063
4064 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4065 {
4066 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4067 {
4068 struct data_reference *dra =
4069 VARRAY_GENERIC_PTR (loop_write_refs, i);
4070 struct data_reference *drb =
4071 VARRAY_GENERIC_PTR (loop_write_refs, j);
4072 if (vect_analyze_data_ref_dependence (dra, drb, loop))
4073 return false;
4074 }
4075 }
4076
4077 /* Examine load-store (true/anti) dependences. */
4078
4079 if (vect_debug_details (NULL))
4080 fprintf (dump_file, "compare all load-store pairs.");
4081
4082 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4083 {
4084 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4085 {
4086 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4087 struct data_reference *drb =
4088 VARRAY_GENERIC_PTR (loop_write_refs, j);
4089 if (vect_analyze_data_ref_dependence (dra, drb, loop))
4090 return false;
4091 }
4092 }
4093
4094 return true;
4095}
4096
4097
4098/* Function vect_get_first_index.
4099
4100 REF is a data reference.
4101 If it is an ARRAY_REF: if its lower bound is simple enough,
4102 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
4103 If it is not an ARRAY_REF: REF has no "first index";
4104 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
4105
4106static bool
4107vect_get_first_index (tree ref, tree *array_first_index)
4108{
4109 tree array_start;
4110
4111 if (TREE_CODE (ref) != ARRAY_REF)
4112 *array_first_index = size_zero_node;
4113 else
4114 {
4115 array_start = array_ref_low_bound (ref);
e9c00ceb 4116 if (!host_integerp (array_start, 0))
79fe1b3b
DN
4117 {
4118 if (vect_debug_details (NULL))
4119 {
4120 fprintf (dump_file, "array min val not simple integer cst.");
4121 print_generic_expr (dump_file, array_start, TDF_DETAILS);
4122 }
4123 return false;
4124 }
4125 *array_first_index = array_start;
4126 }
4127
4128 return true;
4129}
4130
4131
6775f1f3
IR
4132/* Function vect_compute_array_base_alignment.
4133 A utility function of vect_compute_array_ref_alignment.
4134
4135 Compute the misalignment of ARRAY in bits.
4136
4137 Input:
4138 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
d4a9b3a3 4139 VECTYPE - we are interested in the misalignment modulo the size of vectype.
6775f1f3
IR
4140 if NULL: don't compute misalignment, just return the base of ARRAY.
4141 PREV_DIMENSIONS - initialized to one.
4142 MISALIGNMENT - the computed misalignment in bits.
4143
4144 Output:
4145 If VECTYPE is not NULL:
4146 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
4147 the base of the array, and put the computed misalignment in MISALIGNMENT.
4148 If VECTYPE is NULL:
4149 Return the base of the array.
4150
4151 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
4152 a[idx_N]...[idx_2][idx_1] is
4153 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
4154 ... + idx_N * dim_0 * ... * dim_N-1}.
4155 (The misalignment of &a is not checked here).
4156 Note, that every term contains dim_0, therefore, if dim_0 is a
4157 multiple of NUNITS, the whole sum is a multiple of NUNITS.
4158 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
4159 NUINTS, we can say that the misalignment of the sum is equal to
4160 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
4161 we can't determine this array misalignment, and we return
4162 false.
4163 We proceed recursively in this manner, accumulating total misalignment
4164 and the multiplication of previous dimensions for correct misalignment
4165 calculation. */
4166
4167static tree
4168vect_compute_array_base_alignment (tree array,
4169 tree vectype,
4170 tree *prev_dimensions,
4171 tree *misalignment)
4172{
4173 tree index;
4174 tree domain;
4175 tree dimension_size;
4176 tree mis;
4177 tree bits_per_vectype;
4178 tree bits_per_vectype_unit;
4179
4180 /* The 'stop condition' of the recursion. */
4181 if (TREE_CODE (array) != ARRAY_REF)
4182 return array;
4183
4184 if (!vectype)
4185 /* Just get the base decl. */
4186 return vect_compute_array_base_alignment
4187 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4188
4189 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4190 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4191 return NULL_TREE;
4192
4193 domain = TYPE_DOMAIN (TREE_TYPE (array));
4194 dimension_size =
4195 int_const_binop (PLUS_EXPR,
4196 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4197 TYPE_MIN_VALUE (domain), 1),
4198 size_one_node, 1);
4199
4200 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4201 is a multiple of NUNITS:
4202
4203 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4204 */
4205 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4206 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4207 if (integer_zerop (mis))
4208 /* This array is aligned. Continue just in order to get the base decl. */
4209 return vect_compute_array_base_alignment
4210 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4211
4212 index = TREE_OPERAND (array, 1);
4213 if (!host_integerp (index, 1))
4214 /* The current index is not constant. */
4215 return NULL_TREE;
4216
4217 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4218
4219 bits_per_vectype = fold_convert (unsigned_type_node,
4220 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4221 GET_MODE_SIZE (TYPE_MODE (vectype))));
4222 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4223 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4224 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4225
4226 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4227 earlier:
4228
4229 *misalignment =
4230 (*misalignment + index_val * dimension_size * *prev_dimensions)
4231 % vectype_nunits;
4232 */
4233
4234 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4235 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4236 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4237 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4238 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4239
4240
4241 *prev_dimensions = int_const_binop (MULT_EXPR,
4242 *prev_dimensions, dimension_size, 1);
4243
4244 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4245 prev_dimensions,
4246 misalignment);
4247}
4248
4249
79fe1b3b
DN
4250/* Function vect_compute_data_ref_alignment
4251
4252 Compute the misalignment of the data reference DR.
4253
6775f1f3
IR
4254 Output:
4255 1. If during the misalignment computation it is found that the data reference
4256 cannot be vectorized then false is returned.
4257 2. DR_MISALIGNMENT (DR) is defined.
4258
79fe1b3b
DN
4259 FOR NOW: No analysis is actually performed. Misalignment is calculated
4260 only for trivial cases. TODO. */
4261
6775f1f3 4262static bool
79fe1b3b 4263vect_compute_data_ref_alignment (struct data_reference *dr,
6775f1f3 4264 loop_vec_info loop_vinfo)
79fe1b3b
DN
4265{
4266 tree stmt = DR_STMT (dr);
6775f1f3 4267 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
79fe1b3b
DN
4268 tree ref = DR_REF (dr);
4269 tree vectype;
79fe1b3b 4270 tree scalar_type;
79fe1b3b 4271 tree offset = size_zero_node;
6775f1f3
IR
4272 tree base, bit_offset, alignment;
4273 tree unit_bits = fold_convert (unsigned_type_node,
4274 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4275 tree dr_base;
4276 bool base_aligned_p;
4277
79fe1b3b
DN
4278 if (vect_debug_details (NULL))
4279 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4280
4281 /* Initialize misalignment to unknown. */
4282 DR_MISALIGNMENT (dr) = -1;
4283
4284 scalar_type = TREE_TYPE (ref);
4285 vectype = get_vectype_for_scalar_type (scalar_type);
4286 if (!vectype)
4287 {
4288 if (vect_debug_details (NULL))
4289 {
4290 fprintf (dump_file, "no vectype for stmt: ");
4291 print_generic_expr (dump_file, stmt, TDF_SLIM);
6775f1f3 4292 fprintf (dump_file, " scalar_type: ");
79fe1b3b
DN
4293 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4294 }
471854f8 4295 /* It is not possible to vectorize this data reference. */
6775f1f3 4296 return false;
79fe1b3b 4297 }
0dc0a70b 4298 STMT_VINFO_VECTYPE (stmt_info) = vectype;
6775f1f3
IR
4299 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4300
4301 if (TREE_CODE (ref) == ARRAY_REF)
4302 dr_base = ref;
4303 else
4304 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
79fe1b3b 4305
6775f1f3
IR
4306 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4307 loop_vinfo, &bit_offset, &base_aligned_p);
4308 if (!base)
79fe1b3b 4309 {
6775f1f3 4310 if (vect_debug_details (NULL))
79fe1b3b 4311 {
6775f1f3
IR
4312 fprintf (dump_file, "Unknown alignment for access: ");
4313 print_generic_expr (dump_file,
4314 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
79fe1b3b 4315 }
6775f1f3
IR
4316 return true;
4317 }
79fe1b3b 4318
6775f1f3
IR
4319 if (!base_aligned_p)
4320 {
4321 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
79fe1b3b
DN
4322 {
4323 if (vect_debug_details (NULL))
6775f1f3
IR
4324 {
4325 fprintf (dump_file, "can't force alignment of ref: ");
4326 print_generic_expr (dump_file, ref, TDF_SLIM);
4327 }
4328 return true;
79fe1b3b 4329 }
6775f1f3
IR
4330
4331 /* Force the alignment of the decl.
4332 NOTE: This is the only change to the code we make during
4333 the analysis phase, before deciding to vectorize the loop. */
4334 if (vect_debug_details (NULL))
4335 fprintf (dump_file, "force alignment");
4336 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
d75bf0ca 4337 DECL_USER_ALIGN (base) = 1;
6775f1f3 4338 }
79fe1b3b 4339
6775f1f3
IR
4340 /* At this point we assume that the base is aligned, and the offset from it
4341 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4342 gcc_assert (base_aligned_p
4343 || (TREE_CODE (base) == VAR_DECL
4344 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4345
4346 /* Convert into bytes. */
4347 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4348 /* Check that there is no remainder in bits. */
4349 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4350 if (!integer_zerop (bit_offset))
4351 {
4352 if (vect_debug_details (NULL))
79fe1b3b 4353 {
6775f1f3
IR
4354 fprintf (dump_file, "bit offset alignment: ");
4355 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
79fe1b3b 4356 }
6775f1f3
IR
4357 return false;
4358 }
4359
4360 /* Alignment required, in bytes: */
4361 alignment = fold_convert (unsigned_type_node,
4362 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
79fe1b3b 4363
6775f1f3
IR
4364 /* Modulo alignment. */
4365 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4366 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4367 {
4368 if (vect_debug_details (NULL))
4369 fprintf (dump_file, "unexpected misalign value");
4370 return false;
79fe1b3b
DN
4371 }
4372
6775f1f3 4373 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
79fe1b3b 4374
6775f1f3
IR
4375 if (vect_debug_details (NULL))
4376 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4377
4378 return true;
4379}
4380
4381
4382/* Function vect_compute_array_ref_alignment
4383
4384 Compute the alignment of an array-ref.
4385 The alignment we compute here is relative to
4386 TYPE_ALIGN(VECTYPE) boundary.
4387
4388 Output:
4389 OFFSET - the alignment in bits
4390 Return value - the base of the array-ref. E.g,
4391 if the array-ref is a.b[k].c[i][j] the returned
4392 base is a.b[k].c
4393*/
4394
4395static tree
4396vect_compute_array_ref_alignment (struct data_reference *dr,
4397 loop_vec_info loop_vinfo,
4398 tree vectype,
4399 tree *offset)
4400{
4401 tree array_first_index = size_zero_node;
4402 tree init;
4403 tree ref = DR_REF (dr);
4404 tree scalar_type = TREE_TYPE (ref);
4405 tree oprnd0 = TREE_OPERAND (ref, 0);
4406 tree dims = size_one_node;
4407 tree misalign = size_zero_node;
4408 tree next_ref, this_offset = size_zero_node;
4409 tree nunits;
4410 tree nbits;
4411
4412 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
471854f8 4413 /* The reference is an array without its last index. */
a023975e
OG
4414 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4415 &misalign);
6775f1f3 4416 else
a023975e
OG
4417 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4418 &misalign);
6775f1f3
IR
4419 if (!vectype)
4420 /* Alignment is not requested. Just return the base. */
4421 return next_ref;
4422
4423 /* Compute alignment. */
4424 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4425 return NULL_TREE;
4426 this_offset = misalign;
4427
4428 /* Check the first index accessed. */
79fe1b3b
DN
4429 if (!vect_get_first_index (ref, &array_first_index))
4430 {
4431 if (vect_debug_details (NULL))
4432 fprintf (dump_file, "no first_index for array.");
6775f1f3 4433 return NULL_TREE;
79fe1b3b 4434 }
79fe1b3b 4435
6775f1f3
IR
4436 /* Check the index of the array_ref. */
4437 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4438 LOOP_VINFO_LOOP (loop_vinfo)->num);
79fe1b3b 4439
6775f1f3
IR
4440 /* FORNOW: In order to simplify the handling of alignment, we make sure
4441 that the first location at which the array is accessed ('init') is on an
79fe1b3b 4442 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
6775f1f3
IR
4443 This is too conservative, since we require that
4444 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
79fe1b3b
DN
4445 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4446 This should be relaxed in the future. */
4447
6775f1f3 4448 if (!init || !host_integerp (init, 0))
79fe1b3b
DN
4449 {
4450 if (vect_debug_details (NULL))
6775f1f3
IR
4451 fprintf (dump_file, "non constant init. ");
4452 return NULL_TREE;
79fe1b3b
DN
4453 }
4454
79fe1b3b 4455 /* bytes per scalar element: */
6775f1f3
IR
4456 nunits = fold_convert (unsigned_type_node,
4457 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4458 nbits = int_const_binop (MULT_EXPR, nunits,
4459 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
79fe1b3b 4460
6775f1f3 4461 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
79fe1b3b 4462 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
6775f1f3
IR
4463 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4464 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
79fe1b3b 4465
6775f1f3
IR
4466 /* TODO: allow negative misalign values. */
4467 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
79fe1b3b
DN
4468 {
4469 if (vect_debug_details (NULL))
6775f1f3
IR
4470 fprintf (dump_file, "unexpected misalign value");
4471 return NULL_TREE;
79fe1b3b 4472 }
6775f1f3
IR
4473 *offset = misalign;
4474 return next_ref;
79fe1b3b
DN
4475}
4476
4477
4478/* Function vect_compute_data_refs_alignment
4479
4480 Compute the misalignment of data references in the loop.
4481 This pass may take place at function granularity instead of at loop
4482 granularity.
4483
4484 FOR NOW: No analysis is actually performed. Misalignment is calculated
4485 only for trivial cases. TODO. */
4486
0dc0a70b 4487static bool
79fe1b3b
DN
4488vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4489{
4490 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4491 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4492 unsigned int i;
6775f1f3 4493
79fe1b3b
DN
4494 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4495 {
4496 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
0dc0a70b
DN
4497 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4498 return false;
79fe1b3b
DN
4499 }
4500
4501 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4502 {
4503 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
0dc0a70b
DN
4504 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4505 return false;
79fe1b3b 4506 }
0dc0a70b
DN
4507
4508 return true;
79fe1b3b
DN
4509}
4510
4511
4512/* Function vect_enhance_data_refs_alignment
4513
4514 This pass will use loop versioning and loop peeling in order to enhance
4515 the alignment of data references in the loop.
4516
4517 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4518 original loop is to be vectorized; Any other loops that are created by
4519 the transformations performed in this pass - are not supposed to be
d6901754 4520 vectorized. This restriction will be relaxed. */
79fe1b3b
DN
4521
4522static void
0dc0a70b 4523vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
79fe1b3b 4524{
0dc0a70b
DN
4525 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4526 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4527 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4528 unsigned int i;
4529
79fe1b3b
DN
4530 /*
4531 This pass will require a cost model to guide it whether to apply peeling
4532 or versioning or a combination of the two. For example, the scheme that
4533 intel uses when given a loop with several memory accesses, is as follows:
4534 choose one memory access ('p') which alignment you want to force by doing
4535 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4536 other accesses are not necessarily aligned, or (2) use loop versioning to
4537 generate one loop in which all accesses are aligned, and another loop in
4538 which only 'p' is necessarily aligned.
4539
4540 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4541 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4542 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4543
4544 Devising a cost model is the most critical aspect of this work. It will
4545 guide us on which access to peel for, whether to use loop versioning, how
4546 many versions to create, etc. The cost model will probably consist of
4547 generic considerations as well as target specific considerations (on
4548 powerpc for example, misaligned stores are more painful than misaligned
4549 loads).
4550
4551 Here is the general steps involved in alignment enhancements:
4552
4553 -- original loop, before alignment analysis:
4554 for (i=0; i<N; i++){
4555 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4556 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4557 }
4558
4559 -- After vect_compute_data_refs_alignment:
4560 for (i=0; i<N; i++){
4561 x = q[i]; # DR_MISALIGNMENT(q) = 3
4562 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4563 }
4564
4565 -- Possibility 1: we do loop versioning:
4566 if (p is aligned) {
4567 for (i=0; i<N; i++){ # loop 1A
4568 x = q[i]; # DR_MISALIGNMENT(q) = 3
4569 p[i] = y; # DR_MISALIGNMENT(p) = 0
4570 }
4571 }
4572 else {
4573 for (i=0; i<N; i++){ # loop 1B
4574 x = q[i]; # DR_MISALIGNMENT(q) = 3
4575 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4576 }
4577 }
4578
4579 -- Possibility 2: we do loop peeling:
4580 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4581 x = q[i];
4582 p[i] = y;
4583 }
4584 for (i = 3; i < N; i++){ # loop 2A
4585 x = q[i]; # DR_MISALIGNMENT(q) = 0
4586 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4587 }
4588
4589 -- Possibility 3: combination of loop peeling and versioning:
4590 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4591 x = q[i];
4592 p[i] = y;
4593 }
4594 if (p is aligned) {
4595 for (i = 3; i<N; i++){ # loop 3A
4596 x = q[i]; # DR_MISALIGNMENT(q) = 0
4597 p[i] = y; # DR_MISALIGNMENT(p) = 0
4598 }
4599 }
4600 else {
4601 for (i = 3; i<N; i++){ # loop 3B
4602 x = q[i]; # DR_MISALIGNMENT(q) = 0
4603 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4604 }
4605 }
4606
4607 These loops are later passed to loop_transform to be vectorized. The
4608 vectorizer will use the alignment information to guide the transformation
4609 (whether to generate regular loads/stores, or with special handling for
4610 misalignment).
4611 */
0dc0a70b
DN
4612
4613 /* (1) Peeling to force alignment. */
4614
4615 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4616 Considerations:
4617 + How many accesses will become aligned due to the peeling
4618 - How many accesses will become unaligned due to the peeling,
4619 and the cost of misaligned accesses.
4620 - The cost of peeling (the extra runtime checks, the increase
4621 in code size).
4622
4623 The scheme we use FORNOW: peel to force the alignment of the first
896b242c 4624 misaligned store in the loop.
00803cd5 4625 Rationale: misaligned stores are not yet supported.
0dc0a70b
DN
4626
4627 TODO: Use a better cost model. */
4628
4629 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4630 {
4631 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4632 if (!aligned_access_p (dr))
4633 {
4634 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4635 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4636 break;
4637 }
4638 }
4639
4640 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4641 {
4642 if (vect_debug_details (loop))
4643 fprintf (dump_file, "Peeling for alignment will not be applied.");
4644 return;
4645 }
4646 else
4647 if (vect_debug_details (loop))
4648 fprintf (dump_file, "Peeling for alignment will be applied.");
4649
4650
4651 /* (1.2) Update the alignment info according to the peeling factor.
4652 If the misalignment of the DR we peel for is M, then the
4653 peeling factor is VF - M, and the misalignment of each access DR_i
4654 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4655 If the misalignment of the DR we peel for is unknown, then the
4656 misalignment of each access DR_i in the loop is also unknown.
4657
4658 FORNOW: set the misalignment of the accesses to unknown even
4659 if the peeling factor is known at compile time.
4660
4661 TODO: - if the peeling factor is known at compile time, use that
4662 when updating the misalignment info of the loop DRs.
4663 - consider accesses that are known to have the same
4664 alignment, even if that alignment is unknown. */
4665
4666 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4667 {
4668 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4669 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4670 DR_MISALIGNMENT (dr) = 0;
4671 else
4672 DR_MISALIGNMENT (dr) = -1;
4673 }
4674 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4675 {
4676 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4677 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4678 DR_MISALIGNMENT (dr) = 0;
4679 else
4680 DR_MISALIGNMENT (dr) = -1;
4681 }
79fe1b3b
DN
4682}
4683
4684
4685/* Function vect_analyze_data_refs_alignment
4686
4687 Analyze the alignment of the data-references in the loop.
4688 FOR NOW: Until support for misliagned accesses is in place, only if all
4689 accesses are aligned can the loop be vectorized. This restriction will be
4690 relaxed. */
4691
4692static bool
4693vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4694{
0dc0a70b 4695 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
79fe1b3b 4696 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
a023975e 4697 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
0dc0a70b 4698 enum dr_alignment_support supportable_dr_alignment;
79fe1b3b
DN
4699 unsigned int i;
4700
4701 if (vect_debug_details (NULL))
4702 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4703
4704
4705 /* This pass may take place at function granularity instead of at loop
4706 granularity. */
4707
0dc0a70b
DN
4708 if (!vect_compute_data_refs_alignment (loop_vinfo))
4709 {
4710 if (vect_debug_details (loop) || vect_debug_stats (loop))
4711 fprintf (dump_file,
4712 "not vectorized: can't calculate alignment for data ref.");
4713 return false;
4714 }
79fe1b3b
DN
4715
4716
0dc0a70b
DN
4717 /* This pass will decide on using loop versioning and/or loop peeling in
4718 order to enhance the alignment of data references in the loop. */
79fe1b3b
DN
4719
4720 vect_enhance_data_refs_alignment (loop_vinfo);
4721
4722
0dc0a70b
DN
4723 /* Finally, check that all the data references in the loop can be
4724 handled with respect to their alignment. */
79fe1b3b 4725
0dc0a70b 4726 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
79fe1b3b 4727 {
0dc0a70b
DN
4728 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4729 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4730 if (!supportable_dr_alignment)
79fe1b3b 4731 {
0dc0a70b
DN
4732 if (vect_debug_details (loop) || vect_debug_stats (loop))
4733 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4734 return false;
79fe1b3b
DN
4735 }
4736 }
0dc0a70b 4737 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
79fe1b3b 4738 {
0dc0a70b
DN
4739 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4740 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4741 if (!supportable_dr_alignment)
79fe1b3b 4742 {
0dc0a70b
DN
4743 if (vect_debug_details (loop) || vect_debug_stats (loop))
4744 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
79fe1b3b
DN
4745 return false;
4746 }
4747 }
4748
4749 return true;
4750}
4751
4752
4753/* Function vect_analyze_data_ref_access.
4754
4755 Analyze the access pattern of the data-reference DR. For now, a data access
4756 has to consecutive and aligned to be considered vectorizable. */
4757
4758static bool
4759vect_analyze_data_ref_access (struct data_reference *dr)
4760{
4761 varray_type access_fns = DR_ACCESS_FNS (dr);
4762 tree access_fn;
4763 tree init, step;
6775f1f3 4764 unsigned int dimensions, i;
79fe1b3b 4765
6775f1f3
IR
4766 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4767 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4768 access is contiguous). */
4769 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4770
4771 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
79fe1b3b 4772 {
6775f1f3 4773 access_fn = DR_ACCESS_FN (dr, i);
79fe1b3b 4774
6775f1f3
IR
4775 if (evolution_part_in_loop_num (access_fn,
4776 loop_containing_stmt (DR_STMT (dr))->num))
4777 {
a023975e 4778 /* Evolution part is not NULL in this loop (it is neither constant
471854f8 4779 nor invariant). */
6775f1f3
IR
4780 if (vect_debug_details (NULL))
4781 {
4782 fprintf (dump_file,
a023975e 4783 "not vectorized: complicated multidim. array access.");
6775f1f3
IR
4784 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4785 }
4786 return false;
4787 }
4788 }
4789
4790 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4791 if (!evolution_function_is_constant_p (access_fn)
4792 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4793 access_fn, &init, &step, true))
79fe1b3b
DN
4794 {
4795 if (vect_debug_details (NULL))
4796 {
a023975e 4797 fprintf (dump_file, "not vectorized: complicated access function.");
79fe1b3b
DN
4798 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4799 }
4800 return false;
4801 }
6775f1f3 4802
79fe1b3b
DN
4803 return true;
4804}
4805
4806
4807/* Function vect_analyze_data_ref_accesses.
4808
4809 Analyze the access pattern of all the data references in the loop.
4810
4811 FORNOW: the only access pattern that is considered vectorizable is a
4812 simple step 1 (consecutive) access.
4813
6775f1f3 4814 FORNOW: handle only arrays and pointer accesses. */
79fe1b3b
DN
4815
4816static bool
4817vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4818{
4819 unsigned int i;
4820 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4821 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4822
4823 if (vect_debug_details (NULL))
4824 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4825
4826 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4827 {
4828 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4829 bool ok = vect_analyze_data_ref_access (dr);
4830 if (!ok)
4831 {
4832 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4833 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4834 fprintf (dump_file, "not vectorized: complicated access pattern.");
4835 return false;
4836 }
4837 }
4838
4839 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4840 {
4841 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4842 bool ok = vect_analyze_data_ref_access (dr);
4843 if (!ok)
4844 {
4845 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4846 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4847 fprintf (dump_file, "not vectorized: complicated access pattern.");
4848 return false;
4849 }
4850 }
4851
4852 return true;
4853}
4854
4855
4856/* Function vect_analyze_pointer_ref_access.
4857
4858 Input:
4859 STMT - a stmt that contains a data-ref
4860 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4861
4862 If the data-ref access is vectorizable, return a data_reference structure
471854f8 4863 that represents it (DR). Otherwise - return NULL. */
79fe1b3b
DN
4864
4865static struct data_reference *
4866vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4867{
4868 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4869 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4870 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4871 tree init, step;
4872 int step_val;
4873 tree reftype, innertype;
4874 enum machine_mode innermode;
4875 tree indx_access_fn;
4876 int loopnum = loop->num;
4877 struct data_reference *dr;
4878
4879 if (!access_fn)
4880 {
4881 if (vect_debug_stats (loop) || vect_debug_details (loop))
4882 fprintf (dump_file, "not vectorized: complicated pointer access.");
4883 return NULL;
4884 }
4885
4886 if (vect_debug_details (NULL))
4887 {
4888 fprintf (dump_file, "Access function of ptr: ");
4889 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4890 }
4891
4892 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4893 {
4894 if (vect_debug_stats (loop) || vect_debug_details (loop))
4895 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4896 return NULL;
4897 }
4898
6775f1f3
IR
4899 STRIP_NOPS (init);
4900
4901 if (!host_integerp (step,0))
79fe1b3b
DN
4902 {
4903 if (vect_debug_stats (loop) || vect_debug_details (loop))
4904 fprintf (dump_file,
6775f1f3 4905 "not vectorized: non constant step for pointer access.");
79fe1b3b
DN
4906 return NULL;
4907 }
4908
4909 step_val = TREE_INT_CST_LOW (step);
4910
4911 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4912 if (TREE_CODE (reftype) != POINTER_TYPE)
4913 {
4914 if (vect_debug_stats (loop) || vect_debug_details (loop))
4915 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4916 return NULL;
4917 }
4918
4919 reftype = TREE_TYPE (init);
4920 if (TREE_CODE (reftype) != POINTER_TYPE)
4921 {
4922 if (vect_debug_stats (loop) || vect_debug_details (loop))
4923 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4924 return NULL;
4925 }
4926
4927 innertype = TREE_TYPE (reftype);
4928 innermode = TYPE_MODE (innertype);
4929 if (GET_MODE_SIZE (innermode) != step_val)
4930 {
4931 /* FORNOW: support only consecutive access */
4932 if (vect_debug_stats (loop) || vect_debug_details (loop))
4933 fprintf (dump_file, "not vectorized: non consecutive access.");
4934 return NULL;
4935 }
4936
4937 indx_access_fn =
4938 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4939 if (vect_debug_details (NULL))
4940 {
4941 fprintf (dump_file, "Access function of ptr indx: ");
4942 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4943 }
4944 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4945 return dr;
4946}
4947
4948
6775f1f3
IR
4949/* Function vect_get_symbl_and_dr.
4950
4951 The function returns SYMBL - the relevant variable for
4952 memory tag (for aliasing purposes).
4953 Also data reference structure DR is created.
4954
6e611d92
IR
4955 This function handles three kinds of MEMREF:
4956
4957 It is called from vect_analyze_data_refs with a MEMREF that is either an
4958 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4959 It builds a DR for them using vect_get_base_and_bit_offset, and calls itself
4960 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4961 MEMREF along the way. During the recursive calls, the function may be called
4962 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4963 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4964 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4965 and SSA_NAME (this is category 3 - "recursion stop condition").
4966
4967 When the MEMREF falls into category 1 there is still no data reference struct
4968 (DR) available. It is created by this function, and then, along the recursion,
4969 MEMREF will fall into category 2 or 3, in which case a DR will have already
4970 been created, but the analysis continues to retrieve the MEMTAG.
4971
6775f1f3
IR
4972 Input:
4973 MEMREF - data reference in STMT
4974 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4975
4976 Output:
4977 DR - data_reference struct for MEMREF
4978 return value - the relevant variable for memory tag (for aliasing purposes).
4979
4980*/
4981
4982static tree
4983vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4984 loop_vec_info loop_vinfo, struct data_reference **dr)
4985{
4986 tree symbl, oprnd0, oprnd1;
4987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4988 tree offset;
6e611d92 4989 tree tag;
6775f1f3
IR
4990 struct data_reference *new_dr;
4991 bool base_aligned_p;
4992
6e611d92 4993 if (*dr)
6775f1f3 4994 {
6e611d92
IR
4995 /* Category 3: recursion stop condition. */
4996 /* (1) A DR already exists. We only need to get the relevant memtag for
4997 MEMREF, the rest of the data was already initialized. */
6775f1f3 4998
6e611d92
IR
4999 switch (TREE_CODE (memref))
5000 {
5001 /* (1.1) Stop condition: find the relevant memtag and return. */
6775f1f3 5002 case SSA_NAME:
6e611d92
IR
5003 symbl = SSA_NAME_VAR (memref);
5004 tag = get_var_ann (symbl)->type_mem_tag;
5005 if (!tag)
6775f1f3 5006 {
6e611d92
IR
5007 tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
5008 if (TREE_CODE (ptr) == SSA_NAME)
5009 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
6775f1f3 5010 }
6e611d92
IR
5011 if (!tag)
5012 {
5013 if (vect_debug_details (NULL))
5014 fprintf (dump_file, "not vectorized: no memtag for ref.");
5015 return NULL_TREE;
5016 }
5017 return tag;
6775f1f3 5018
6775f1f3 5019 case VAR_DECL:
6e611d92
IR
5020 case PARM_DECL:
5021 return memref;
6775f1f3 5022
6e611d92
IR
5023 /* Category 2: recursion continues. */
5024 /* (1.2) A recursive call to find the relevant memtag is required. */
6775f1f3 5025 case INDIRECT_REF:
6e611d92
IR
5026 symbl = TREE_OPERAND (memref, 0);
5027 break; /* For recursive call. */
6775f1f3
IR
5028
5029 case COMPONENT_REF:
5030 /* Could have recorded more accurate information -
5031 i.e, the actual FIELD_DECL that is being referenced -
6e611d92
IR
5032 but later passes expect VAR_DECL as the nmt. */
5033 /* Fall through. */
5034
5035 case ADDR_EXPR:
5036 symbl = vect_get_base_and_bit_offset ((*dr), memref, NULL_TREE,
6775f1f3 5037 loop_vinfo, &offset, &base_aligned_p);
6e611d92
IR
5038 break; /* For recursive call. */
5039
5040 case PLUS_EXPR:
5041 case MINUS_EXPR:
5042 /* Although DR exists, we have to call the function recursively to
5043 build MEMTAG for such expression. This is handled below. */
5044 oprnd0 = TREE_OPERAND (memref, 0);
5045 oprnd1 = TREE_OPERAND (memref, 1);
5046
5047 STRIP_NOPS (oprnd1);
5048 /* Supported plus/minus expressions are of the form
5049 {address_base + offset}, such that address_base is of type
5050 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
5051 or it's not of type POINTER/ARRAY.
5052 TODO: swap operands if {offset + address_base}. */
5053 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
5054 && TREE_CODE (oprnd1) != INTEGER_CST)
5055 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
5056 return NULL_TREE;
5057
5058 symbl = oprnd0;
5059 break; /* For recursive call. */
5060
6775f1f3 5061 default:
6775f1f3
IR
5062 return NULL_TREE;
5063 }
6e611d92
IR
5064 }
5065 else
5066 {
5067 /* Category 1: recursion begins. */
5068 /* (2) A DR does not exist yet and must be built, followed by a
5069 recursive call to get the relevant memtag for MEMREF. */
6775f1f3 5070
6e611d92
IR
5071 switch (TREE_CODE (memref))
5072 {
5073 case INDIRECT_REF:
5074 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
5075 if (!new_dr)
5076 return NULL_TREE;
5077 *dr = new_dr;
5078 symbl = DR_BASE_NAME (new_dr);
5079 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
5080 break;
5081
5082 case ARRAY_REF:
5083 new_dr = analyze_array (stmt, memref, is_read);
5084 *dr = new_dr;
5085 symbl = DR_BASE_NAME (new_dr);
5086 STMT_VINFO_VECT_DR_BASE (stmt_info) = TREE_OPERAND (memref, 0);
5087 break;
5088
5089 default:
5090 /* TODO: Support data-refs of form a[i].p for unions and single
5091 field structures. */
5092 return NULL_TREE;
5093 }
6775f1f3 5094 }
6e611d92
IR
5095
5096 if (!symbl)
5097 return NULL_TREE;
5098 /* Recursive call to retrieve the relevant memtag. */
5099 tag = vect_get_symbl_and_dr (symbl, stmt, is_read, loop_vinfo, dr);
5100 return tag;
6775f1f3
IR
5101}
5102
5103
79fe1b3b
DN
5104/* Function vect_analyze_data_refs.
5105
5106 Find all the data references in the loop.
5107
6775f1f3 5108 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
79fe1b3b 5109 which base is really an array (not a pointer) and which alignment
471854f8 5110 can be forced. This restriction will be relaxed. */
79fe1b3b
DN
5111
5112static bool
5113vect_analyze_data_refs (loop_vec_info loop_vinfo)
5114{
5115 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5116 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5117 int nbbs = loop->num_nodes;
5118 block_stmt_iterator si;
5119 int j;
c21accc5 5120 struct data_reference *dr;
79fe1b3b
DN
5121
5122 if (vect_debug_details (NULL))
5123 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
5124
5125 for (j = 0; j < nbbs; j++)
5126 {
5127 basic_block bb = bbs[j];
5128 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5129 {
5130 bool is_read = false;
5131 tree stmt = bsi_stmt (si);
5132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5133 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5134 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5135 vuse_optype vuses = STMT_VUSE_OPS (stmt);
5136 varray_type *datarefs = NULL;
5137 int nvuses, nv_may_defs, nv_must_defs;
5138 tree memref = NULL;
79fe1b3b
DN
5139 tree symbl;
5140
5141 /* Assumption: there exists a data-ref in stmt, if and only if
5142 it has vuses/vdefs. */
5143
5144 if (!vuses && !v_may_defs && !v_must_defs)
5145 continue;
5146
5147 nvuses = NUM_VUSES (vuses);
5148 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
5149 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
5150
5151 if (nvuses && (nv_may_defs || nv_must_defs))
5152 {
5153 if (vect_debug_details (NULL))
5154 {
5155 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
5156 print_generic_expr (dump_file, stmt, TDF_SLIM);
5157 }
5158 return false;
5159 }
5160
5161 if (TREE_CODE (stmt) != MODIFY_EXPR)
5162 {
5163 if (vect_debug_details (NULL))
5164 {
5165 fprintf (dump_file, "unexpected vops in stmt: ");
5166 print_generic_expr (dump_file, stmt, TDF_SLIM);
5167 }
5168 return false;
5169 }
5170
5171 if (vuses)
5172 {
5173 memref = TREE_OPERAND (stmt, 1);
5174 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
5175 is_read = true;
5176 }
5177 else /* vdefs */
5178 {
5179 memref = TREE_OPERAND (stmt, 0);
5180 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
5181 is_read = false;
5182 }
5183
6775f1f3
IR
5184 /* Analyze MEMREF. If it is of a supported form, build data_reference
5185 struct for it (DR) and find the relevant symbol for aliasing
5186 purposes. */
6e611d92 5187 dr = NULL;
a023975e
OG
5188 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
5189 &dr);
6775f1f3 5190 if (!symbl)
79fe1b3b
DN
5191 {
5192 if (vect_debug_stats (loop) || vect_debug_details (loop))
5193 {
6775f1f3 5194 fprintf (dump_file, "not vectorized: unhandled data ref: ");
79fe1b3b
DN
5195 print_generic_expr (dump_file, stmt, TDF_SLIM);
5196 }
5197 return false;
5198 }
6e611d92 5199 STMT_VINFO_MEMTAG (stmt_info) = symbl;
79fe1b3b
DN
5200 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5201 STMT_VINFO_DATA_REF (stmt_info) = dr;
5202 }
5203 }
5204
5205 return true;
5206}
5207
5208
8c27b7d4 5209/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
79fe1b3b
DN
5210
5211/* Function vect_mark_relevant.
5212
5213 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5214
5215static void
2b0729ba 5216vect_mark_relevant (varray_type *worklist, tree stmt)
79fe1b3b
DN
5217{
5218 stmt_vec_info stmt_info;
5219
5220 if (vect_debug_details (NULL))
5221 fprintf (dump_file, "mark relevant.");
5222
5223 if (TREE_CODE (stmt) == PHI_NODE)
5224 {
2b0729ba 5225 VARRAY_PUSH_TREE (*worklist, stmt);
79fe1b3b
DN
5226 return;
5227 }
5228
5229 stmt_info = vinfo_for_stmt (stmt);
5230
5231 if (!stmt_info)
5232 {
5233 if (vect_debug_details (NULL))
5234 {
5235 fprintf (dump_file, "mark relevant: no stmt info!!.");
5236 print_generic_expr (dump_file, stmt, TDF_SLIM);
5237 }
5238 return;
5239 }
5240
5241 if (STMT_VINFO_RELEVANT_P (stmt_info))
5242 {
5243 if (vect_debug_details (NULL))
5244 fprintf (dump_file, "already marked relevant.");
5245 return;
5246 }
5247
5248 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
2b0729ba 5249 VARRAY_PUSH_TREE (*worklist, stmt);
79fe1b3b
DN
5250}
5251
5252
5253/* Function vect_stmt_relevant_p.
5254
5255 Return true if STMT in loop that is represented by LOOP_VINFO is
5256 "relevant for vectorization".
5257
5258 A stmt is considered "relevant for vectorization" if:
5259 - it has uses outside the loop.
5260 - it has vdefs (it alters memory).
5261 - control stmts in the loop (except for the exit condition).
5262
5263 CHECKME: what other side effects would the vectorizer allow? */
5264
5265static bool
5266vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5267{
5268 v_may_def_optype v_may_defs;
5269 v_must_def_optype v_must_defs;
5270 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5271 int i;
5272 dataflow_t df;
5273 int num_uses;
5274
5275 /* cond stmt other than loop exit cond. */
5276 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5277 return true;
5278
5279 /* changing memory. */
5280 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5281 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5282 if (v_may_defs || v_must_defs)
5283 {
5284 if (vect_debug_details (NULL))
5285 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5286 return true;
5287 }
5288
5289 /* uses outside the loop. */
5290 df = get_immediate_uses (stmt);
5291 num_uses = num_immediate_uses (df);
5292 for (i = 0; i < num_uses; i++)
5293 {
5294 tree use = immediate_use (df, i);
5295 basic_block bb = bb_for_stmt (use);
5296 if (!flow_bb_inside_loop_p (loop, bb))
5297 {
5298 if (vect_debug_details (NULL))
5299 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5300 return true;
5301 }
5302 }
5303
5304 return false;
5305}
5306
5307
5308/* Function vect_mark_stmts_to_be_vectorized.
5309
5310 Not all stmts in the loop need to be vectorized. For example:
5311
5312 for i...
5313 for j...
5314 1. T0 = i + j
5315 2. T1 = a[T0]
5316
5317 3. j = j + 1
5318
5319 Stmt 1 and 3 do not need to be vectorized, because loop control and
5320 addressing of vectorized data-refs are handled differently.
5321
5322 This pass detects such stmts. */
5323
5324static bool
5325vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5326{
5327 varray_type worklist;
5328 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5329 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5330 unsigned int nbbs = loop->num_nodes;
5331 block_stmt_iterator si;
5332 tree stmt;
5333 stmt_ann_t ann;
5334 unsigned int i;
5335 int j;
5336 use_optype use_ops;
5337 stmt_vec_info stmt_info;
5338
5339 if (vect_debug_details (NULL))
5340 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5341
5342 VARRAY_TREE_INIT (worklist, 64, "work list");
5343
5344 /* 1. Init worklist. */
5345
5346 for (i = 0; i < nbbs; i++)
5347 {
5348 basic_block bb = bbs[i];
5349 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5350 {
5351 stmt = bsi_stmt (si);
5352
5353 if (vect_debug_details (NULL))
5354 {
5355 fprintf (dump_file, "init: stmt relevant? ");
5356 print_generic_expr (dump_file, stmt, TDF_SLIM);
5357 }
5358
5359 stmt_info = vinfo_for_stmt (stmt);
5360 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5361
5362 if (vect_stmt_relevant_p (stmt, loop_vinfo))
2b0729ba 5363 vect_mark_relevant (&worklist, stmt);
79fe1b3b
DN
5364 }
5365 }
5366
5367
5368 /* 2. Process_worklist */
5369
5370 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5371 {
5372 stmt = VARRAY_TOP_TREE (worklist);
5373 VARRAY_POP (worklist);
5374
5375 if (vect_debug_details (NULL))
5376 {
5377 fprintf (dump_file, "worklist: examine stmt: ");
5378 print_generic_expr (dump_file, stmt, TDF_SLIM);
5379 }
5380
5381 /* Examine the USES in this statement. Mark all the statements which
5382 feed this statement's uses as "relevant", unless the USE is used as
5383 an array index. */
5384
5385 if (TREE_CODE (stmt) == PHI_NODE)
5386 {
5387 /* follow the def-use chain inside the loop. */
5388 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5389 {
5390 tree arg = PHI_ARG_DEF (stmt, j);
5391 tree def_stmt = NULL_TREE;
5392 basic_block bb;
5393 if (!vect_is_simple_use (arg, loop, &def_stmt))
5394 {
5395 if (vect_debug_details (NULL))
5396 fprintf (dump_file, "worklist: unsupported use.");
5397 varray_clear (worklist);
5398 return false;
5399 }
5400 if (!def_stmt)
5401 continue;
5402
5403 if (vect_debug_details (NULL))
5404 {
5405 fprintf (dump_file, "worklist: def_stmt: ");
5406 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5407 }
5408
5409 bb = bb_for_stmt (def_stmt);
5410 if (flow_bb_inside_loop_p (loop, bb))
2b0729ba 5411 vect_mark_relevant (&worklist, def_stmt);
79fe1b3b
DN
5412 }
5413 }
5414
5415 ann = stmt_ann (stmt);
5416 use_ops = USE_OPS (ann);
5417
5418 for (i = 0; i < NUM_USES (use_ops); i++)
5419 {
5420 tree use = USE_OP (use_ops, i);
5421
5422 /* We are only interested in uses that need to be vectorized. Uses
5423 that are used for address computation are not considered relevant.
5424 */
5425 if (exist_non_indexing_operands_for_use_p (use, stmt))
5426 {
5427 tree def_stmt = NULL_TREE;
5428 basic_block bb;
5429 if (!vect_is_simple_use (use, loop, &def_stmt))
5430 {
5431 if (vect_debug_details (NULL))
5432 fprintf (dump_file, "worklist: unsupported use.");
5433 varray_clear (worklist);
5434 return false;
5435 }
5436
5437 if (!def_stmt)
5438 continue;
5439
5440 if (vect_debug_details (NULL))
5441 {
5442 fprintf (dump_file, "worklist: examine use %d: ", i);
5443 print_generic_expr (dump_file, use, TDF_SLIM);
5444 }
5445
5446 bb = bb_for_stmt (def_stmt);
5447 if (flow_bb_inside_loop_p (loop, bb))
2b0729ba 5448 vect_mark_relevant (&worklist, def_stmt);
79fe1b3b
DN
5449 }
5450 }
5451 } /* while worklist */
5452
5453 varray_clear (worklist);
5454 return true;
5455}
5456
5457
d6901754 5458/* Function vect_can_advance_ivs_p
a023975e
OG
5459
5460 In case the number of iterations that LOOP iterates in unknown at compile
5461 time, an epilog loop will be generated, and the loop induction variables
5462 (IVs) will be "advanced" to the value they are supposed to take just before
d6901754 5463 the epilog loop. Here we check that the access function of the loop IVs
a023975e
OG
5464 and the expression that represents the loop bound are simple enough.
5465 These restrictions will be relaxed in the future. */
5466
5467static bool
d6901754 5468vect_can_advance_ivs_p (struct loop *loop)
a023975e
OG
5469{
5470 basic_block bb = loop->header;
5471 tree phi;
5472
a023975e
OG
5473 /* Analyze phi functions of the loop header. */
5474
bb29d951 5475 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
a023975e
OG
5476 {
5477 tree access_fn = NULL;
5478 tree evolution_part;
5479
5480 if (vect_debug_details (NULL))
5481 {
5482 fprintf (dump_file, "Analyze phi: ");
5483 print_generic_expr (dump_file, phi, TDF_SLIM);
5484 }
5485
5486 /* Skip virtual phi's. The data dependences that are associated with
5487 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5488
5489 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5490 {
5491 if (vect_debug_details (NULL))
5492 fprintf (dump_file, "virtual phi. skip.");
5493 continue;
5494 }
5495
5496 /* Analyze the evolution function. */
5497
5498 access_fn = instantiate_parameters
5499 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5500
5501 if (!access_fn)
5502 {
5503 if (vect_debug_details (NULL))
5504 fprintf (dump_file, "No Access function.");
5505 return false;
5506 }
5507
5508 if (vect_debug_details (NULL))
5509 {
5510 fprintf (dump_file, "Access function of PHI: ");
5511 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5512 }
5513
5514 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5515
5516 if (evolution_part == NULL_TREE)
5517 return false;
5518
5519 /* FORNOW: We do not transform initial conditions of IVs
5520 which evolution functions are a polynomial of degree >= 2. */
5521
5522 if (tree_is_chrec (evolution_part))
5523 return false;
5524 }
5525
d6901754 5526 return true;
a023975e
OG
5527}
5528
5529
79fe1b3b
DN
5530/* Function vect_get_loop_niters.
5531
d6901754
DN
5532 Determine how many iterations the loop is executed.
5533 If an expression that represents the number of iterations
5534 can be constructed, place it in NUMBER_OF_ITERATIONS.
5535 Return the loop exit condition. */
79fe1b3b
DN
5536
5537static tree
a023975e 5538vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
79fe1b3b
DN
5539{
5540 tree niters;
5541
5542 if (vect_debug_details (NULL))
5543 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5544
5545 niters = number_of_iterations_in_loop (loop);
5546
5547 if (niters != NULL_TREE
a023975e 5548 && niters != chrec_dont_know)
79fe1b3b 5549 {
a023975e 5550 *number_of_iterations = niters;
79fe1b3b
DN
5551
5552 if (vect_debug_details (NULL))
a023975e
OG
5553 {
5554 fprintf (dump_file, "==> get_loop_niters:" );
5555 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5556 }
79fe1b3b
DN
5557 }
5558
5559 return get_loop_exit_condition (loop);
5560}
5561
5562
5563/* Function vect_analyze_loop_form.
5564
5565 Verify the following restrictions (some may be relaxed in the future):
5566 - it's an inner-most loop
5567 - number of BBs = 2 (which are the loop header and the latch)
5568 - the loop has a pre-header
5569 - the loop has a single entry and exit
5570 - the loop exit condition is simple enough, and the number of iterations
5571 can be analyzed (a countable loop). */
5572
5573static loop_vec_info
5574vect_analyze_loop_form (struct loop *loop)
5575{
5576 loop_vec_info loop_vinfo;
5577 tree loop_cond;
a023975e 5578 tree number_of_iterations = NULL;
d6901754 5579 bool rescan = false;
79fe1b3b
DN
5580
5581 if (vect_debug_details (loop))
5582 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5583
82b85a85
ZD
5584 if (loop->inner
5585 || !loop->single_exit
d6901754
DN
5586 || loop->num_nodes != 2
5587 || EDGE_COUNT (loop->header->preds) != 2
5588 || loop->num_entries != 1)
79fe1b3b
DN
5589 {
5590 if (vect_debug_stats (loop) || vect_debug_details (loop))
5591 {
5592 fprintf (dump_file, "not vectorized: bad loop form. ");
82b85a85 5593 if (loop->inner)
79fe1b3b 5594 fprintf (dump_file, "nested loop.");
82b85a85
ZD
5595 else if (!loop->single_exit)
5596 fprintf (dump_file, "multiple exits.");
5597 else if (loop->num_nodes != 2)
79fe1b3b 5598 fprintf (dump_file, "too many BBs in loop.");
d6901754
DN
5599 else if (EDGE_COUNT (loop->header->preds) != 2)
5600 fprintf (dump_file, "too many incoming edges.");
5601 else if (loop->num_entries != 1)
5602 fprintf (dump_file, "too many entries.");
79fe1b3b
DN
5603 }
5604
5605 return NULL;
5606 }
5607
5608 /* We assume that the loop exit condition is at the end of the loop. i.e,
5609 that the loop is represented as a do-while (with a proper if-guard
5610 before the loop if needed), where the loop header contains all the
5611 executable statements, and the latch is empty. */
5612 if (!empty_block_p (loop->latch))
5613 {
5614 if (vect_debug_stats (loop) || vect_debug_details (loop))
5615 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5616 return NULL;
5617 }
5618
d6901754
DN
5619 /* Make sure we have a preheader basic block. */
5620 if (!loop->pre_header)
5621 {
5622 rescan = true;
5623 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5624 }
5625
5626 /* Make sure there exists a single-predecessor exit bb: */
5627 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5628 {
5629 rescan = true;
5630 loop_split_edge_with (loop->exit_edges[0], NULL);
5631 }
5632
5633 if (rescan)
5634 {
5635 flow_loop_scan (loop, LOOP_ALL);
5636 /* Flow loop scan does not update loop->single_exit field. */
5637 loop->single_exit = loop->exit_edges[0];
5638 }
5639
79fe1b3b
DN
5640 if (empty_block_p (loop->header))
5641 {
5642 if (vect_debug_stats (loop) || vect_debug_details (loop))
5643 fprintf (dump_file, "not vectorized: empty loop.");
5644 return NULL;
5645 }
5646
5647 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5648 if (!loop_cond)
5649 {
5650 if (vect_debug_stats (loop) || vect_debug_details (loop))
5651 fprintf (dump_file, "not vectorized: complicated exit condition.");
5652 return NULL;
5653 }
a023975e
OG
5654
5655 if (!number_of_iterations)
79fe1b3b
DN
5656 {
5657 if (vect_debug_stats (loop) || vect_debug_details (loop))
a023975e
OG
5658 fprintf (dump_file,
5659 "not vectorized: number of iterations cannot be computed.");
79fe1b3b
DN
5660 return NULL;
5661 }
5662
d6901754
DN
5663 if (chrec_contains_undetermined (number_of_iterations))
5664 {
5665 if (vect_debug_details (NULL))
5666 fprintf (dump_file, "Infinite number of iterations.");
5667 return false;
5668 }
5669
a023975e
OG
5670 loop_vinfo = new_loop_vec_info (loop);
5671 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
d6901754 5672
a023975e
OG
5673 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5674 {
d6901754
DN
5675 if (vect_debug_details (loop))
5676 {
5677 fprintf (dump_file, "loop bound unknown.\n");
5678 fprintf (dump_file, "Symbolic number of iterations is ");
5679 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5680 }
a023975e
OG
5681 }
5682 else
5683 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
79fe1b3b
DN
5684 {
5685 if (vect_debug_stats (loop) || vect_debug_details (loop))
5686 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5687 return NULL;
5688 }
5689
79fe1b3b 5690 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
79fe1b3b
DN
5691
5692 return loop_vinfo;
5693}
5694
5695
5696/* Function vect_analyze_loop.
5697
5698 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5699 for it. The different analyses will record information in the
5700 loop_vec_info struct. */
5701
5702static loop_vec_info
5703vect_analyze_loop (struct loop *loop)
5704{
5705 bool ok;
5706 loop_vec_info loop_vinfo;
5707
5708 if (vect_debug_details (NULL))
5709 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5710
5711 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5712
5713 loop_vinfo = vect_analyze_loop_form (loop);
5714 if (!loop_vinfo)
5715 {
5716 if (vect_debug_details (loop))
5717 fprintf (dump_file, "bad loop form.");
5718 return NULL;
5719 }
5720
5721 /* Find all data references in the loop (which correspond to vdefs/vuses)
5722 and analyze their evolution in the loop.
5723
6775f1f3 5724 FORNOW: Handle only simple, array references, which
79fe1b3b
DN
5725 alignment can be forced, and aligned pointer-references. */
5726
5727 ok = vect_analyze_data_refs (loop_vinfo);
5728 if (!ok)
5729 {
5730 if (vect_debug_details (loop))
5731 fprintf (dump_file, "bad data references.");
5732 destroy_loop_vec_info (loop_vinfo);
5733 return NULL;
5734 }
5735
79fe1b3b
DN
5736 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5737
5738 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5739 if (!ok)
5740 {
5741 if (vect_debug_details (loop))
5742 fprintf (dump_file, "unexpected pattern.");
5743 if (vect_debug_details (loop))
5744 fprintf (dump_file, "not vectorized: unexpected pattern.");
5745 destroy_loop_vec_info (loop_vinfo);
5746 return NULL;
5747 }
5748
79fe1b3b
DN
5749 /* Check that all cross-iteration scalar data-flow cycles are OK.
5750 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5751
5752 ok = vect_analyze_scalar_cycles (loop_vinfo);
5753 if (!ok)
5754 {
5755 if (vect_debug_details (loop))
5756 fprintf (dump_file, "bad scalar cycle.");
5757 destroy_loop_vec_info (loop_vinfo);
5758 return NULL;
5759 }
5760
79fe1b3b
DN
5761 /* Analyze data dependences between the data-refs in the loop.
5762 FORNOW: fail at the first data dependence that we encounter. */
5763
5764 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5765 if (!ok)
5766 {
5767 if (vect_debug_details (loop))
5768 fprintf (dump_file, "bad data dependence.");
5769 destroy_loop_vec_info (loop_vinfo);
5770 return NULL;
5771 }
5772
79fe1b3b
DN
5773 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5774 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5775
5776 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5777 if (!ok)
5778 {
5779 if (vect_debug_details (loop))
5780 fprintf (dump_file, "bad data access.");
5781 destroy_loop_vec_info (loop_vinfo);
5782 return NULL;
5783 }
5784
79fe1b3b
DN
5785 /* Analyze the alignment of the data-refs in the loop.
5786 FORNOW: Only aligned accesses are handled. */
5787
5788 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5789 if (!ok)
5790 {
5791 if (vect_debug_details (loop))
5792 fprintf (dump_file, "bad data alignment.");
5793 destroy_loop_vec_info (loop_vinfo);
5794 return NULL;
5795 }
5796
79fe1b3b
DN
5797 /* Scan all the operations in the loop and make sure they are
5798 vectorizable. */
5799
5800 ok = vect_analyze_operations (loop_vinfo);
5801 if (!ok)
5802 {
5803 if (vect_debug_details (loop))
5804 fprintf (dump_file, "bad operation or unsupported loop bound.");
5805 destroy_loop_vec_info (loop_vinfo);
5806 return NULL;
5807 }
5808
5809 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5810
5811 return loop_vinfo;
5812}
5813
5814
5815/* Function need_imm_uses_for.
5816
5817 Return whether we ought to include information for 'var'
5818 when calculating immediate uses. For this pass we only want use
5819 information for non-virtual variables. */
5820
5821static bool
5822need_imm_uses_for (tree var)
5823{
5824 return is_gimple_reg (var);
5825}
5826
5827
5828/* Function vectorize_loops.
5829
5830 Entry Point to loop vectorization phase. */
5831
5832void
5833vectorize_loops (struct loops *loops)
5834{
5835 unsigned int i, loops_num;
5836 unsigned int num_vectorized_loops = 0;
5837
5838 /* Does the target support SIMD? */
5839 /* FORNOW: until more sophisticated machine modelling is in place. */
5840 if (!UNITS_PER_SIMD_WORD)
5841 {
5842 if (vect_debug_details (NULL))
5843 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5844 return;
5845 }
5846
d6901754
DN
5847#ifdef ENABLE_CHECKING
5848 verify_loop_closed_ssa ();
5849#endif
5850
79fe1b3b
DN
5851 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5852
5853 /* ----------- Analyze loops. ----------- */
5854
5855 /* If some loop was duplicated, it gets bigger number
5856 than all previously defined loops. This fact allows us to run
5857 only over initial loops skipping newly generated ones. */
5858 loops_num = loops->num;
5859 for (i = 1; i < loops_num; i++)
5860 {
5861 loop_vec_info loop_vinfo;
5862 struct loop *loop = loops->parray[i];
5863
5864 if (!loop)
5865 continue;
5866
79fe1b3b
DN
5867 loop_vinfo = vect_analyze_loop (loop);
5868 loop->aux = loop_vinfo;
5869
5870 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5871 continue;
5872
5873 vect_transform_loop (loop_vinfo, loops);
5874 num_vectorized_loops++;
5875 }
5876
5877 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5878 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5879 num_vectorized_loops);
5880
5881 /* ----------- Finalize. ----------- */
5882
5883 free_df ();
5884 for (i = 1; i < loops_num; i++)
5885 {
5886 struct loop *loop = loops->parray[i];
6775f1f3
IR
5887 loop_vec_info loop_vinfo;
5888
79fe1b3b 5889 if (!loop)
6775f1f3
IR
5890 continue;
5891 loop_vinfo = loop->aux;
79fe1b3b
DN
5892 destroy_loop_vec_info (loop_vinfo);
5893 loop->aux = NULL;
5894 }
5895
79fe1b3b 5896 rewrite_into_ssa (false);
d6901754 5897 rewrite_into_loop_closed_ssa (); /* FORNOW */
79fe1b3b
DN
5898 bitmap_clear (vars_to_rename);
5899}
This page took 0.976776 seconds and 5 git commands to generate.