]> gcc.gnu.org Git - gcc.git/blame - gcc/tree-vectorizer.c
* gnu/java/net/natPlainSocketImplPosix.cc
[gcc.git] / gcc / tree-vectorizer.c
CommitLineData
79fe1b3b
DN
1/* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to the Free
19Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2002111-1307, USA. */
21
22/* Loop Vectorization Pass.
23
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
28
29 For example, the vectorizer transforms the following simple loop:
30
31 short a[N]; short b[N]; short c[N]; int i;
32
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
35 }
36
37 as if it was manually vectorized by rewriting the source code into:
38
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
43
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
49 }
50
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
55
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
6775f1f3
IR
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
79fe1b3b
DN
63
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
69
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
74
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
85
86 For example, say stmt S1 was vectorized into stmt VS1:
87
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
91
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
96
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
101
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
104
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
111
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
118
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
121*/
122
123#include "config.h"
124#include "system.h"
125#include "coretypes.h"
126#include "tm.h"
127#include "errors.h"
128#include "ggc.h"
129#include "tree.h"
130#include "target.h"
131
132#include "rtl.h"
133#include "basic-block.h"
134#include "diagnostic.h"
135#include "tree-flow.h"
136#include "tree-dump.h"
137#include "timevar.h"
138#include "cfgloop.h"
139#include "cfglayout.h"
140#include "expr.h"
141#include "optabs.h"
a023975e 142#include "toplev.h"
79fe1b3b
DN
143#include "tree-chrec.h"
144#include "tree-data-ref.h"
145#include "tree-scalar-evolution.h"
146#include "tree-vectorizer.h"
147#include "tree-pass.h"
148
149/* Main analysis functions. */
150static loop_vec_info vect_analyze_loop (struct loop *);
151static loop_vec_info vect_analyze_loop_form (struct loop *);
152static bool vect_analyze_data_refs (loop_vec_info);
153static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
154static bool vect_analyze_scalar_cycles (loop_vec_info);
155static bool vect_analyze_data_ref_accesses (loop_vec_info);
156static bool vect_analyze_data_refs_alignment (loop_vec_info);
157static void vect_compute_data_refs_alignment (loop_vec_info);
158static bool vect_analyze_operations (loop_vec_info);
159
160/* Main code transformation functions. */
161static void vect_transform_loop (loop_vec_info, struct loops *);
a023975e 162static void vect_transform_loop_bound (loop_vec_info, tree niters);
79fe1b3b
DN
163static bool vect_transform_stmt (tree, block_stmt_iterator *);
164static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
165static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
166static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
167static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
168static void vect_align_data_ref (tree);
169static void vect_enhance_data_refs_alignment (loop_vec_info);
170
171/* Utility functions for the analyses. */
172static bool vect_is_simple_use (tree , struct loop *, tree *);
173static bool exist_non_indexing_operands_for_use_p (tree, tree);
174static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
175static void vect_mark_relevant (varray_type, tree);
176static bool vect_stmt_relevant_p (tree, loop_vec_info);
a023975e 177static tree vect_get_loop_niters (struct loop *, tree *);
6775f1f3 178static bool vect_compute_data_ref_alignment
79fe1b3b
DN
179 (struct data_reference *, loop_vec_info);
180static bool vect_analyze_data_ref_access (struct data_reference *);
181static bool vect_get_first_index (tree, tree *);
182static bool vect_can_force_dr_alignment_p (tree, unsigned int);
7ccf35ed
DN
183static struct data_reference * vect_analyze_pointer_ref_access
184 (tree, tree, bool);
a023975e
OG
185static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
186 struct loop *loop);
6775f1f3
IR
187static tree vect_get_base_and_bit_offset
188 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
189static struct data_reference * vect_analyze_pointer_ref_access
190 (tree, tree, bool);
191static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
192static tree vect_compute_array_ref_alignment
193 (struct data_reference *, loop_vec_info, tree, tree *);
194static tree vect_get_ptr_offset (tree, tree, tree *);
195static tree vect_get_symbl_and_dr
196 (tree, tree, bool, loop_vec_info, struct data_reference **);
79fe1b3b
DN
197
198/* Utility functions for the code transformation. */
199static tree vect_create_destination_var (tree, tree);
7ccf35ed
DN
200static tree vect_create_data_ref_ptr
201 (tree, block_stmt_iterator *, tree, tree *, bool);
202static tree vect_create_index_for_vector_ref
203 (struct loop *, block_stmt_iterator *);
204static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
79fe1b3b
DN
205static tree get_vectype_for_scalar_type (tree);
206static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
207static tree vect_get_vec_def_for_operand (tree, tree);
208static tree vect_init_vector (tree, tree);
a023975e 209static tree vect_build_symbol_bound (tree, int, struct loop *);
79fe1b3b
DN
210static void vect_finish_stmt_generation
211 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
212
a023975e
OG
213static void vect_generate_tmps_on_preheader (loop_vec_info,
214 tree *, tree *,
215 tree *);
216static tree vect_build_loop_niters (loop_vec_info);
217static void vect_update_ivs_after_vectorizer (struct loop *, tree);
218
ed8b7558 219/* Loop transformations prior to vectorization. */
a023975e
OG
220
221/* Loop transformations entry point function.
222 It can be used outside of the vectorizer
223 in case the loop to be manipulated answers conditions specified
224 in function documentation. */
225struct loop *tree_duplicate_loop_to_edge (struct loop *,
226 struct loops *, edge,
227 tree, tree, bool);
228
229static void allocate_new_names (bitmap);
230static void rename_use_op (use_operand_p);
231static void rename_def_op (def_operand_p, tree);
232static void rename_variables_in_bb (basic_block);
233static void free_new_names (bitmap);
234static void rename_variables_in_loop (struct loop *);
235static void copy_phi_nodes (struct loop *, struct loop *, bool);
236static void update_phis_for_duplicate_loop (struct loop *,
237 struct loop *,
238 bool after);
239static void update_phi_nodes_for_guard (edge, struct loop *);
240static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree);
241static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *,
242 struct loops *,
243 edge);
244static edge add_loop_guard (basic_block, tree, basic_block);
245static bool verify_loop_for_duplication (struct loop *, bool, edge);
246
247/* Utilities dealing with loop peeling (not peeling itself). */
248static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
249static void vect_update_niters_after_peeling (loop_vec_info, tree);
250static void vect_update_inits_of_dr (struct data_reference *, struct loop *,
251 tree niters);
252static void vect_update_inits_of_drs (loop_vec_info, tree);
253static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
254
79fe1b3b
DN
255/* Utilities for creation and deletion of vec_info structs. */
256loop_vec_info new_loop_vec_info (struct loop *loop);
257void destroy_loop_vec_info (loop_vec_info);
258stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
259
a023975e
OG
260static bool vect_debug_stats (struct loop *loop);
261static bool vect_debug_details (struct loop *loop);
262
263\f
264/* Utilities to support loop peeling for vectorization purposes. */
265
266
267/* For each definition in DEFINITIONS this function allocates
268 new ssa name. */
269
270static void
271allocate_new_names (bitmap definitions)
272{
273 unsigned ver;
274 bitmap_iterator bi;
275
276 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
277 {
278 tree def = ssa_name (ver);
279 tree *new_name_ptr = xmalloc (sizeof (tree));
280
281 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
282
283 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
284 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
285
286 SSA_NAME_AUX (def) = new_name_ptr;
287 }
288}
289
290
291/* Renames the use *OP_P. */
292
293static void
294rename_use_op (use_operand_p op_p)
295{
296 tree *new_name_ptr;
297
298 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
299 return;
300
301 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
302
303 /* Something defined outside of the loop. */
304 if (!new_name_ptr)
305 return;
306
307 /* An ordinary ssa name defined in the loop. */
308
309 SET_USE (op_p, *new_name_ptr);
310}
311
312
313/* Renames the def *OP_P in statement STMT. */
314
315static void
316rename_def_op (def_operand_p op_p, tree stmt)
317{
318 tree *new_name_ptr;
319
320 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
321 return;
322
323 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
324
325 /* Something defined outside of the loop. */
326 if (!new_name_ptr)
327 return;
328
329 /* An ordinary ssa name defined in the loop. */
330
331 SET_DEF (op_p, *new_name_ptr);
332 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
333}
334
335
336/* Renames the variables in basic block BB. */
337
338static void
339rename_variables_in_bb (basic_block bb)
340{
341 tree phi;
342 block_stmt_iterator bsi;
343 tree stmt;
344 stmt_ann_t ann;
345 use_optype uses;
346 vuse_optype vuses;
347 def_optype defs;
348 v_may_def_optype v_may_defs;
349 v_must_def_optype v_must_defs;
350 unsigned i;
351 edge e;
352 edge_iterator ei;
353
354 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
355 rename_def_op (PHI_RESULT_PTR (phi), phi);
356
357 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
358 {
359 stmt = bsi_stmt (bsi);
360 get_stmt_operands (stmt);
361 ann = stmt_ann (stmt);
362
363 uses = USE_OPS (ann);
364 for (i = 0; i < NUM_USES (uses); i++)
365 rename_use_op (USE_OP_PTR (uses, i));
366
367 defs = DEF_OPS (ann);
368 for (i = 0; i < NUM_DEFS (defs); i++)
369 rename_def_op (DEF_OP_PTR (defs, i), stmt);
370
371 vuses = VUSE_OPS (ann);
372 for (i = 0; i < NUM_VUSES (vuses); i++)
373 rename_use_op (VUSE_OP_PTR (vuses, i));
374
375 v_may_defs = V_MAY_DEF_OPS (ann);
376 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
377 {
378 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
379 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
380 }
381
382 v_must_defs = V_MUST_DEF_OPS (ann);
383 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
384 rename_def_op (V_MUST_DEF_OP_PTR (v_must_defs, i), stmt);
385 }
386
387 FOR_EACH_EDGE (e, ei, bb->succs)
388 for (phi = phi_nodes (e->dest); phi; phi = TREE_CHAIN (phi))
389 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
390}
391
392
393/* Releases the structures holding the new ssa names. */
394
395static void
396free_new_names (bitmap definitions)
397{
398 unsigned ver;
399 bitmap_iterator bi;
400
401 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
402 {
403 tree def = ssa_name (ver);
404
405 if (SSA_NAME_AUX (def))
406 {
407 free (SSA_NAME_AUX (def));
408 SSA_NAME_AUX (def) = NULL;
409 }
410 }
411}
412
413
414/* Renames variables in new generated LOOP. */
415
416static void
417rename_variables_in_loop (struct loop *loop)
418{
419 unsigned i;
420 basic_block *bbs;
421
422 bbs = get_loop_body (loop);
423
424 for (i = 0; i < loop->num_nodes; i++)
425 rename_variables_in_bb (bbs[i]);
426
427 free (bbs);
428}
429
430
431/* This function copies phis from LOOP header to
432 NEW_LOOP header. AFTER is as
433 in update_phis_for_duplicate_loop function. */
434
435static void
436copy_phi_nodes (struct loop *loop, struct loop *new_loop,
437 bool after)
438{
439 tree phi, new_phi, def;
440 edge new_e;
441 edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
442
443 /* Second add arguments to newly created phi nodes. */
444 for (phi = phi_nodes (loop->header),
445 new_phi = phi_nodes (new_loop->header);
446 phi;
447 phi = TREE_CHAIN (phi),
448 new_phi = TREE_CHAIN (new_phi))
449 {
450 new_e = loop_preheader_edge (new_loop);
451 def = PHI_ARG_DEF_FROM_EDGE (phi, e);
452 add_phi_arg (&new_phi, def, new_e);
453 }
454}
455
456
457/* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
458 executes after LOOP, and false if it executes before it. */
459
460static void
461update_phis_for_duplicate_loop (struct loop *loop,
462 struct loop *new_loop, bool after)
463{
464 edge old_latch;
465 tree *new_name_ptr, new_ssa_name;
466 tree phi_new, phi_old, def;
467 edge orig_entry_e = loop_preheader_edge (loop);
468
469 /* Copy phis from loop->header to new_loop->header. */
470 copy_phi_nodes (loop, new_loop, after);
471
472 old_latch = loop_latch_edge (loop);
473
474 /* Update PHI args for the new loop latch edge, and
475 the old loop preheader edge, we know that the PHI nodes
476 are ordered appropriately in copy_phi_nodes. */
477 for (phi_new = phi_nodes (new_loop->header),
478 phi_old = phi_nodes (loop->header);
479 phi_new && phi_old;
480 phi_new = TREE_CHAIN (phi_new), phi_old = TREE_CHAIN (phi_old))
481 {
482 def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
483
484 if (TREE_CODE (def) != SSA_NAME)
485 continue;
486
487 new_name_ptr = SSA_NAME_AUX (def);
488
489 /* Something defined outside of the loop. */
490 if (!new_name_ptr)
491 continue;
492
493 /* An ordinary ssa name defined in the loop. */
494 new_ssa_name = *new_name_ptr;
495
496 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
497
498 /* Update PHI args for the original loop pre-header edge. */
499 if (! after)
500 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
501 new_ssa_name);
502 }
503}
504
505
506/* Update PHI nodes for a guard of the LOOP.
507
508 LOOP is supposed to have a preheader bb at which a guard condition is
509 located. The true edge of this condition skips the LOOP and ends
510 at the destination of the (unique) LOOP exit. The loop exit bb is supposed
511 to be an empty bb (created by this transformation) with one successor.
512
513 This function creates phi nodes at the LOOP exit bb. These phis need to be
514 created as a result of adding true edge coming from guard.
515
516 FORNOW: Only phis which have corresponding phi nodes at the header of the
517 LOOP are created. Here we use the assumption that after the LOOP there
518 are no uses of defs generated in LOOP.
519
520 After the phis creation, the function updates the values of phi nodes at
521 the LOOP exit successor bb:
522
523 Original loop:
524
525 bb0: loop preheader
526 goto bb1
527 bb1: loop header
528 if (exit_cond) goto bb3 else goto bb2
529 bb2: loop latch
530 goto bb1
531 bb3:
532
533
534 After guard creation (the loop before this function):
535
536 bb0: loop preheader
537 if (guard_condition) goto bb4 else goto bb1
538 bb1: loop header
539 if (exit_cond) goto bb4 else goto bb2
540 bb2: loop latch
541 goto bb1
542 bb4: loop exit
543 (new empty bb)
544 goto bb3
545 bb3:
546
547 This function updates the phi nodes in bb4 and in bb3, to account for the
548 new edge from bb0 to bb4. */
549
550static void
551update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
552{
553 tree phi, phi1;
554
555 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
556 {
557 tree new_phi;
558 tree phi_arg;
559
560 /* Generate new phi node. */
561 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
562 loop->exit_edges[0]->dest);
563
564 /* Add argument coming from guard true edge. */
565 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
566 add_phi_arg (&new_phi, phi_arg, guard_true_edge);
567
568 /* Add argument coming from loop exit edge. */
569 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
570 add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
571
572 /* Update all phi nodes at the loop exit successor. */
573 for (phi1 = phi_nodes (EDGE_SUCC (loop->exit_edges[0]->dest, 0)->dest);
574 phi1;
575 phi1 = TREE_CHAIN (phi1))
576 {
577 tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1,
578 EDGE_SUCC (loop->exit_edges[0]->dest, 0));
579 if (old_arg == phi_arg)
580 {
581 edge e = EDGE_SUCC (loop->exit_edges[0]->dest, 0);
582
583 SET_PHI_ARG_DEF (phi1,
584 phi_arg_from_edge (phi1, e),
585 PHI_RESULT (new_phi));
586 }
587 }
588 }
589}
590
591
592/* Make the LOOP iterate NITERS times. This is done by adding a new IV
593 that starts at zero, increases by one and its limit is NITERS. */
594
595static void
596make_loop_iterate_ntimes (struct loop *loop, tree niters,
597 tree begin_label, tree exit_label)
598{
599 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
600 tree orig_cond;
601 edge exit_edge = loop->exit_edges[0];
602 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
603
604 /* Flow loop scan does not update loop->single_exit field. */
605 loop->single_exit = loop->exit_edges[0];
606 orig_cond = get_loop_exit_condition (loop);
607 gcc_assert (orig_cond);
608 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
609 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
610
611 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
612 back to the exit condition statement. */
613 bsi_next (&loop_exit_bsi);
614 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
615
616
617 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
618 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
619 else /* 'then' edge loops back. */
620 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
621
622 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
623 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
624 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
625 begin_label, exit_label);
626 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
627
628 /* Remove old loop exit test: */
629 bsi_remove (&loop_exit_bsi);
630
631 if (vect_debug_stats (loop) || vect_debug_details (loop))
632 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
633}
634
635
636/* Given LOOP this function generates a new copy of it and puts it
637 on E which is either the entry or exit of LOOP. */
638
639static struct loop *
640tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
641 edge e)
642{
643 struct loop *new_loop;
644 basic_block *new_bbs, *bbs;
645 bool at_exit;
646 bool was_imm_dom;
647 basic_block exit_dest;
648 tree phi, phi_arg;
649
650 at_exit = (e == loop->exit_edges[0]);
651 if (!at_exit && e != loop_preheader_edge (loop))
652 {
653 if (dump_file && (dump_flags & TDF_DETAILS))
654 fprintf (dump_file,
655 "Edge is not an entry nor an exit edge.\n");
656 return NULL;
657 }
658
659 bbs = get_loop_body (loop);
660
661 /* Check whether duplication is possible. */
662 if (!can_copy_bbs_p (bbs, loop->num_nodes))
663 {
664 if (vect_debug_stats (loop) || vect_debug_details (loop))
665 fprintf (dump_file,
666 "Cannot copy basic blocks.\n");
667 free (bbs);
668 return NULL;
669 }
670
671 /* Generate new loop structure. */
672 new_loop = duplicate_loop (loops, loop, loop->outer);
673 if (!new_loop)
674 {
675 if (vect_debug_stats (loop) || vect_debug_details (loop))
676 fprintf (dump_file,
677 "The duplicate_loop returns NULL.\n");
678 free (bbs);
679 return NULL;
680 }
681
682 exit_dest = loop->exit_edges[0]->dest;
683 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
684 exit_dest) == loop->header ?
685 true : false);
686
687 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
688
689 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
690
691 /* Duplicating phi args at exit bbs as coming
692 also from exit of duplicated loop. */
693 for (phi = phi_nodes (exit_dest); phi; phi = TREE_CHAIN (phi))
694 {
695 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
696 if (phi_arg)
697 {
698 edge new_loop_exit_edge;
699
700 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
701 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
702 else
703 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
704
705 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
706 }
707 }
708
709 if (at_exit) /* Add the loop copy at exit. */
710 {
711 redirect_edge_and_branch_force (e, new_loop->header);
712 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
713 if (was_imm_dom)
714 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
715 }
716 else /* Add the copy at entry. */
717 {
718 edge new_exit_e;
719 edge entry_e = loop_preheader_edge (loop);
720 basic_block preheader = entry_e->src;
721
722 if (!flow_bb_inside_loop_p (new_loop,
723 EDGE_SUCC (new_loop->header, 0)->dest))
724 new_exit_e = EDGE_SUCC (new_loop->header, 0);
725 else
726 new_exit_e = EDGE_SUCC (new_loop->header, 1);
727
728 redirect_edge_and_branch_force (new_exit_e, loop->header);
729 set_immediate_dominator (CDI_DOMINATORS, loop->header,
730 new_exit_e->src);
731
732 /* We have to add phi args to the loop->header here as coming
733 from new_exit_e edge. */
734 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
735 {
736 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
737 if (phi_arg)
738 add_phi_arg (&phi, phi_arg, new_exit_e);
739 }
740
741 redirect_edge_and_branch_force (entry_e, new_loop->header);
742 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
743 }
744
745 flow_loop_scan (new_loop, LOOP_ALL);
746 flow_loop_scan (loop, LOOP_ALL);
747 free (new_bbs);
748 free (bbs);
749
750 return new_loop;
751}
752
753
754/* Given the condition statement COND, put it as the last statement
755 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
756 Assumes that this is the single exit of the guarded loop.
757 Returns the skip edge. */
758
759static edge
760add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
761{
762 block_stmt_iterator bsi;
763 edge new_e, enter_e;
764 tree cond_stmt, then_label, else_label;
765
766 enter_e = EDGE_SUCC (guard_bb, 0);
767 enter_e->flags &= ~EDGE_FALLTHRU;
768 enter_e->flags |= EDGE_FALSE_VALUE;
769 bsi = bsi_last (guard_bb);
770
771 then_label = build1 (GOTO_EXPR, void_type_node,
772 tree_block_label (exit_bb));
773 else_label = build1 (GOTO_EXPR, void_type_node,
774 tree_block_label (enter_e->dest));
775 cond_stmt = build (COND_EXPR, void_type_node, cond,
776 then_label, else_label);
777 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
778 /* Add new edge to connect entry block to the second loop. */
779 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
780 set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
781 return new_e;
782}
783
784
785/* This function verifies that certain restrictions apply to LOOP. */
786
787static bool
788verify_loop_for_duplication (struct loop *loop,
789 bool update_first_loop_count, edge e)
790{
791 edge exit_e = loop->exit_edges [0];
792 edge entry_e = loop_preheader_edge (loop);
793
794 /* We duplicate only innermost loops. */
795 if (loop->inner)
796 {
797 if (vect_debug_stats (loop) || vect_debug_details (loop))
798 fprintf (dump_file,
799 "Loop duplication failed. Loop is not innermost.\n");
800 return false;
801 }
802
803 /* Only loops with 1 exit. */
804 if (loop->num_exits != 1)
805 {
806 if (vect_debug_stats (loop) || vect_debug_details (loop))
807 fprintf (dump_file,
808 "More than one exit from loop.\n");
809 return false;
810 }
811
812 /* Only loops with 1 entry. */
813 if (loop->num_entries != 1)
814 {
815 if (vect_debug_stats (loop) || vect_debug_details (loop))
816 fprintf (dump_file,
817 "More than one exit from loop.\n");
818 return false;
819 }
820
821 /* All loops has outers, the only case loop->outer is NULL is for
822 the function itself. */
823 if (!loop->outer)
824 {
825 if (vect_debug_stats (loop) || vect_debug_details (loop))
826 fprintf (dump_file,
827 "Loop is outer-most loop.\n");
828 return false;
829 }
830
831 /* Verify that new IV can be created and loop condition
832 can be changed to make first loop iterate first_niters times. */
833 if (!update_first_loop_count)
834 {
835 tree orig_cond = get_loop_exit_condition (loop);
836 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
837
838 if (!orig_cond)
839 {
840 if (vect_debug_stats (loop) || vect_debug_details (loop))
841 fprintf (dump_file,
842 "Loop has no exit condition.\n");
843 return false;
844 }
845 if (orig_cond != bsi_stmt (loop_exit_bsi))
846 {
847 if (vect_debug_stats (loop) || vect_debug_details (loop))
848 fprintf (dump_file,
849 "Loop exit condition is not loop header last stmt.\n");
850 return false;
851 }
852 }
853
854 /* Make sure E is either an entry or an exit edge. */
855 if (e != exit_e && e != entry_e)
856 {
857 if (vect_debug_stats (loop) || vect_debug_details (loop))
858 fprintf (dump_file,
859 "E is not loop entry or exit edge.\n");
860 return false;
861 }
862
863 return true;
864}
865
866
867/* Given LOOP this function duplicates it to the edge E.
868
869 This transformation takes place before the loop is vectorized.
870 For now, there are two main cases when it's used
871 by the vectorizer: to support loops with unknown loop bounds
872 (or loop bounds indivisible by vectorization factor) and to force the
873 alignment of data references in the loop. In the first case, LOOP is
874 duplicated to the exit edge, producing epilog loop. In the second case, LOOP
875 is duplicated to the preheader edge thus generating prolog loop. In both
876 cases, the original loop will be vectorized after the transformation.
877
878 The edge E is supposed to be either preheader edge of the LOOP or
879 its exit edge. If preheader edge is specified, the LOOP copy
880 will precede the original one. Otherwise the copy will be located
881 at the exit of the LOOP.
882
883 FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
884 the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
885 loop will be iterated FIRST_NITERS times by introducing additional
886 induction variable and replacing loop exit condition. If
887 UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
888 the caller to tree_duplicate_loop_to_edge is responsible for updating
889 the first loop count.
890
891 NITERS (also SSA_NAME) parameter defines the number of iteration the
892 original loop iterated. The function generates two if-then guards:
893 one prior to the first loop and the other prior to the second loop.
894 The first guard will be:
895
896 if (FIRST_NITERS == 0) then skip the first loop
897
898 The second guard will be:
899
900 if (FIRST_NITERS == NITERS) then skip the second loop
901
902 Thus the equivalence to the original code is guaranteed by correct values
903 of NITERS and FIRST_NITERS and generation of if-then loop guards.
904
905 For now this function supports only loop forms that are candidate for
906 vectorization. Such types are the following:
907
908 (1) only innermost loops
909 (2) loops built from 2 basic blocks
910 (3) loops with one entry and one exit
911 (4) loops without function calls
912 (5) loops without defs that are used after the loop
913
914 (1), (3) are checked in this function; (2) - in function
915 vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
916 (5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
917 when excluding induction/reduction support.
918
919 The function returns NULL in case one of these checks or
920 transformations failed. */
921
922struct loop*
923tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops,
924 edge e, tree first_niters,
925 tree niters, bool update_first_loop_count)
926{
927 struct loop *new_loop = NULL, *first_loop, *second_loop;
928 edge skip_e;
929 tree pre_condition;
930 bitmap definitions;
931 basic_block first_exit_bb, second_exit_bb;
932 basic_block pre_header_bb;
933 edge exit_e = loop->exit_edges [0];
934
935 gcc_assert (!any_marked_for_rewrite_p ());
936
937 if (!verify_loop_for_duplication (loop, update_first_loop_count, e))
938 return NULL;
939
940 /* We have to initialize cfg_hooks. Then, when calling
941 cfg_hooks->split_edge, the function tree_split_edge
942 is actually called and, when calling cfg_hooks->duplicate_block,
943 the function tree_duplicate_bb is called. */
944 tree_register_cfg_hooks ();
945
946 /* 1. Generate a copy of LOOP and put it on E (entry or exit). */
947 if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
948 {
949 if (vect_debug_stats (loop) || vect_debug_details (loop))
950 fprintf (dump_file,
951 "The tree_duplicate_loop_to_edge_cfg failed.\n");
952 return NULL;
953 }
954
955 definitions = marked_ssa_names ();
956 allocate_new_names (definitions);
957 update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
ed8b7558 958 /* Here, using assumption (5), we do not propagate new names further
a023975e
OG
959 than on phis of the exit from the second loop. */
960 rename_variables_in_loop (new_loop);
961 free_new_names (definitions);
962
963 if (e == exit_e)
964 {
965 first_loop = loop;
966 second_loop = new_loop;
967 }
968 else
969 {
970 first_loop = new_loop;
971 second_loop = loop;
972 }
973
974 /* 2. Generate bb between the loops. */
975 first_exit_bb = split_edge (first_loop->exit_edges[0]);
976 add_bb_to_loop (first_exit_bb, first_loop->outer);
977
978 /* We need to update here first loop exit edge
979 and second loop preheader edge. */
980 flow_loop_scan (first_loop, LOOP_ALL);
981 flow_loop_scan (second_loop, LOOP_ALL);
982
983 /* 3. Make first loop iterate FIRST_NITERS times, if needed. */
984 if (!update_first_loop_count)
985 {
986 tree first_loop_latch_lbl = tree_block_label (first_loop->latch);
987 tree first_loop_exit_lbl = tree_block_label (first_exit_bb);
988
989 make_loop_iterate_ntimes (first_loop, first_niters,
990 first_loop_latch_lbl,
991 first_loop_exit_lbl);
992 }
993
994 /* 4. Add the guard before first loop:
995
996 if FIRST_NITERS == 0
997 skip first loop
998 else
999 enter first loop */
1000
1001 /* 4a. Generate bb before first loop. */
1002 pre_header_bb = split_edge (loop_preheader_edge (first_loop));
1003 add_bb_to_loop (pre_header_bb, first_loop->outer);
1004
1005 /* First loop preheader edge is changed. */
1006 flow_loop_scan (first_loop, LOOP_ALL);
79fe1b3b 1007
a023975e
OG
1008 /* 4b. Generate guard condition. */
1009 pre_condition = build (LE_EXPR, boolean_type_node,
1010 first_niters, integer_zero_node);
1011
1012 /* 4c. Add condition at the end of preheader bb. */
1013 skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
1014
ed8b7558 1015 /* 4d. Update phis at first loop exit and propagate changes
a023975e
OG
1016 to the phis of second loop. */
1017 update_phi_nodes_for_guard (skip_e, first_loop);
1018
1019 /* 5. Add the guard before second loop:
1020
1021 if FIRST_NITERS == NITERS SKIP
1022 skip second loop
1023 else
1024 enter second loop */
1025
1026 /* 5a. Generate empty bb at the exit from the second loop. */
1027 second_exit_bb = split_edge (second_loop->exit_edges[0]);
1028 add_bb_to_loop (second_exit_bb, second_loop->outer);
1029
1030 /* Second loop preheader edge is changed. */
1031 flow_loop_scan (second_loop, LOOP_ALL);
1032
1033 /* 5b. Generate guard condition. */
1034 pre_condition = build (EQ_EXPR, boolean_type_node,
1035 first_niters, niters);
1036
1037 /* 5c. Add condition at the end of preheader bb. */
1038 skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
1039 update_phi_nodes_for_guard (skip_e, second_loop);
1040
1041 BITMAP_XFREE (definitions);
1042 unmark_all_for_rewrite ();
1043
1044 return new_loop;
1045}
1046
1047
1048\f
1049/* Here the proper Vectorizer starts. */
79fe1b3b
DN
1050
1051/* Function new_stmt_vec_info.
1052
1053 Create and initialize a new stmt_vec_info struct for STMT. */
1054
1055stmt_vec_info
1056new_stmt_vec_info (tree stmt, struct loop *loop)
1057{
1058 stmt_vec_info res;
1059 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1060
1061 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1062 STMT_VINFO_STMT (res) = stmt;
1063 STMT_VINFO_LOOP (res) = loop;
1064 STMT_VINFO_RELEVANT_P (res) = 0;
1065 STMT_VINFO_VECTYPE (res) = NULL;
1066 STMT_VINFO_VEC_STMT (res) = NULL;
1067 STMT_VINFO_DATA_REF (res) = NULL;
1068 STMT_VINFO_MEMTAG (res) = NULL;
6775f1f3 1069 STMT_VINFO_VECT_DR_BASE (res) = NULL;
79fe1b3b
DN
1070
1071 return res;
1072}
1073
1074
1075/* Function new_loop_vec_info.
1076
1077 Create and initialize a new loop_vec_info struct for LOOP, as well as
1078 stmt_vec_info structs for all the stmts in LOOP. */
1079
1080loop_vec_info
1081new_loop_vec_info (struct loop *loop)
1082{
1083 loop_vec_info res;
1084 basic_block *bbs;
1085 block_stmt_iterator si;
1086 unsigned int i;
1087
1088 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1089
1090 bbs = get_loop_body (loop);
1091
1092 /* Create stmt_info for all stmts in the loop. */
1093 for (i = 0; i < loop->num_nodes; i++)
1094 {
1095 basic_block bb = bbs[i];
1096 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1097 {
1098 tree stmt = bsi_stmt (si);
1099 stmt_ann_t ann;
1100
1101 get_stmt_operands (stmt);
1102 ann = stmt_ann (stmt);
1103 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1104 }
1105 }
1106
1107 LOOP_VINFO_LOOP (res) = loop;
1108 LOOP_VINFO_BBS (res) = bbs;
1109 LOOP_VINFO_EXIT_COND (res) = NULL;
a023975e 1110 LOOP_VINFO_NITERS (res) = NULL;
79fe1b3b 1111 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
a023975e 1112 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
79fe1b3b
DN
1113 LOOP_VINFO_VECT_FACTOR (res) = 0;
1114 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1115 "loop_write_datarefs");
1116 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1117 "loop_read_datarefs");
a023975e
OG
1118
1119 for (i=0; i<MAX_NUMBER_OF_UNALIGNED_DATA_REFS; i++)
1120 LOOP_UNALIGNED_DR (res, i) = NULL;
79fe1b3b
DN
1121 return res;
1122}
1123
1124
1125/* Function destroy_loop_vec_info.
1126
1127 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1128 stmts in the loop. */
1129
1130void
1131destroy_loop_vec_info (loop_vec_info loop_vinfo)
1132{
1133 struct loop *loop;
1134 basic_block *bbs;
1135 int nbbs;
1136 block_stmt_iterator si;
1137 int j;
1138
1139 if (!loop_vinfo)
1140 return;
1141
1142 loop = LOOP_VINFO_LOOP (loop_vinfo);
1143
1144 bbs = LOOP_VINFO_BBS (loop_vinfo);
1145 nbbs = loop->num_nodes;
1146
1147 for (j = 0; j < nbbs; j++)
1148 {
1149 basic_block bb = bbs[j];
1150 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1151 {
1152 tree stmt = bsi_stmt (si);
1153 stmt_ann_t ann = stmt_ann (stmt);
1154 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1155 free (stmt_info);
1156 set_stmt_info (ann, NULL);
1157 }
1158 }
1159
1160 free (LOOP_VINFO_BBS (loop_vinfo));
1161 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1162 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1163
1164 free (loop_vinfo);
1165}
1166
1167
1168/* Function debug_loop_stats.
1169
1170 For vectorization statistics dumps. */
1171
1172static bool
1173vect_debug_stats (struct loop *loop)
1174{
1175 basic_block bb;
1176 block_stmt_iterator si;
1177 tree node = NULL_TREE;
1178
1179 if (!dump_file || !(dump_flags & TDF_STATS))
1180 return false;
1181
1182 if (!loop)
1183 {
1184 fprintf (dump_file, "\n");
1185 return true;
1186 }
1187
1188 if (!loop->header)
1189 return false;
1190
1191 bb = loop->header;
1192
1193 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1194 {
1195 node = bsi_stmt (si);
1196 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1197 break;
1198 }
1199
1200 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1201 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1202 {
1203 fprintf (dump_file, "\nloop at %s:%d: ",
1204 EXPR_FILENAME (node), EXPR_LINENO (node));
1205 return true;
1206 }
1207
1208 return false;
1209}
1210
1211
1212/* Function debug_loop_details.
1213
1214 For vectorization debug dumps. */
1215
1216static bool
1217vect_debug_details (struct loop *loop)
1218{
1219 basic_block bb;
1220 block_stmt_iterator si;
1221 tree node = NULL_TREE;
1222
1223 if (!dump_file || !(dump_flags & TDF_DETAILS))
1224 return false;
1225
1226 if (!loop)
1227 {
1228 fprintf (dump_file, "\n");
1229 return true;
1230 }
1231
1232 if (!loop->header)
1233 return false;
1234
1235 bb = loop->header;
1236
1237 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1238 {
1239 node = bsi_stmt (si);
1240 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1241 break;
1242 }
1243
1244 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1245 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1246 {
1247 fprintf (dump_file, "\nloop at %s:%d: ",
1248 EXPR_FILENAME (node), EXPR_LINENO (node));
1249 return true;
1250 }
1251
1252 return false;
1253}
1254
6775f1f3
IR
1255
1256/* Function vect_get_ptr_offset
1257
1258 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1259
1260static tree
1261vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1262 tree vectype ATTRIBUTE_UNUSED,
1263 tree *offset ATTRIBUTE_UNUSED)
1264{
1265 /* TODO: Use alignment information. */
1266 return NULL_TREE;
1267}
1268
1269
1270/* Function vect_get_base_and_bit_offset
1271
1272 Return the BASE of the data reference EXPR.
1273 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1274 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1275 bits of 'a.b[i] + 4B' from a.
1276
1277 Input:
1278 EXPR - the memory reference that is being analyzed
1279 DR - the data_reference struct of the _original_ memory reference
1280 (Note: DR_REF (DR) is not necessarily EXPR)
1281 VECTYPE - the type that defines the alignment (i.e, we compute
1282 alignment relative to TYPE_ALIGN(VECTYPE))
79fe1b3b 1283
6775f1f3
IR
1284 Output:
1285 BASE (returned value) - the base of the data reference EXPR.
1286 E.g, if EXPR is a.b[k].c[i][j] the returned
1287 base is a.
1288 OFFSET - offset of EXPR from BASE in bits
1289 BASE_ALIGNED_P - indicates if BASE is aligned
1290
1291 If something unexpected is encountered (an unsupported form of data-ref),
1292 or if VECTYPE is given but OFFSET cannot be determined:
1293 then NULL_TREE is returned. */
79fe1b3b
DN
1294
1295static tree
6775f1f3
IR
1296vect_get_base_and_bit_offset (struct data_reference *dr,
1297 tree expr,
1298 tree vectype,
1299 loop_vec_info loop_vinfo,
1300 tree *offset,
1301 bool *base_aligned_p)
79fe1b3b 1302{
6775f1f3
IR
1303 tree this_offset = size_zero_node;
1304 tree base = NULL_TREE;
1305 tree next_ref;
1306 tree oprnd0, oprnd1;
1307 struct data_reference *array_dr;
1308 enum tree_code code = TREE_CODE (expr);
1309
1310 *base_aligned_p = false;
79fe1b3b 1311
6775f1f3 1312 switch (code)
79fe1b3b 1313 {
6775f1f3
IR
1314 /* These cases end the recursion: */
1315 case VAR_DECL:
1316 *offset = size_zero_node;
1317 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1318 *base_aligned_p = true;
1319 return expr;
1320
1321 case SSA_NAME:
1322 if (!vectype)
1323 return expr;
1324
1325 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1326 return NULL_TREE;
1327
1328 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1329 {
1330 base = vect_get_ptr_offset (expr, vectype, offset);
1331 if (base)
1332 *base_aligned_p = true;
1333 }
1334 else
1335 {
1336 *base_aligned_p = true;
1337 *offset = size_zero_node;
1338 base = expr;
1339 }
1340 return base;
1341
1342 case INTEGER_CST:
1343 *offset = int_const_binop (MULT_EXPR, expr,
1344 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1345 return expr;
1346
1347 /* These cases continue the recursion: */
1348 case COMPONENT_REF:
1349 oprnd0 = TREE_OPERAND (expr, 0);
1350 oprnd1 = TREE_OPERAND (expr, 1);
79fe1b3b
DN
1351
1352 this_offset = bit_position (oprnd1);
6775f1f3
IR
1353 if (vectype && !host_integerp (this_offset, 1))
1354 return NULL_TREE;
1355 next_ref = oprnd0;
1356 break;
1357
1358 case ADDR_EXPR:
1359 oprnd0 = TREE_OPERAND (expr, 0);
1360 next_ref = oprnd0;
1361 break;
1362
1363 case INDIRECT_REF:
1364 oprnd0 = TREE_OPERAND (expr, 0);
1365 next_ref = oprnd0;
1366 break;
1367
1368 case ARRAY_REF:
1369 if (DR_REF (dr) != expr)
1370 /* Build array data_reference struct if the existing DR_REF
1371 doesn't match EXPR. This happens, for example, when the
1372 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1373 contains information on the access of T, not of arr. In order
1374 to continue the analysis, we create a new DR struct that
1375 describes the access of arr.
1376 */
1377 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1378 else
1379 array_dr = dr;
1380
1381 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1382 vectype, &this_offset);
1383 if (!next_ref)
79fe1b3b 1384 return NULL_TREE;
79fe1b3b 1385
6775f1f3
IR
1386 if (vectype &&
1387 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
79fe1b3b 1388 {
6775f1f3
IR
1389 *offset = this_offset;
1390 *base_aligned_p = true;
1391 return next_ref;
1392 }
1393 break;
79fe1b3b 1394
6775f1f3
IR
1395 case PLUS_EXPR:
1396 case MINUS_EXPR:
1397 /* In case we have a PLUS_EXPR of the form
1398 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1399 This is verified in vect_get_symbl_and_dr. */
1400 oprnd0 = TREE_OPERAND (expr, 0);
1401 oprnd1 = TREE_OPERAND (expr, 1);
1402
1403 base = vect_get_base_and_bit_offset
1404 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1405 if (vectype && !base)
1406 return NULL_TREE;
79fe1b3b 1407
6775f1f3
IR
1408 next_ref = oprnd0;
1409 break;
79fe1b3b 1410
6775f1f3
IR
1411 default:
1412 return NULL_TREE;
79fe1b3b
DN
1413 }
1414
6775f1f3
IR
1415 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1416 loop_vinfo, offset, base_aligned_p);
1417
1418 if (vectype && base)
1419 {
1420 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1421 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1422 return NULL_TREE;
1423
1424 if (vect_debug_details (NULL))
1425 {
1426 print_generic_expr (dump_file, expr, TDF_SLIM);
1427 fprintf (dump_file, " --> total offset for ref: ");
1428 print_generic_expr (dump_file, *offset, TDF_SLIM);
1429 }
1430 }
1431 return base;
79fe1b3b
DN
1432}
1433
1434
1435/* Function vect_force_dr_alignment_p.
1436
1437 Returns whether the alignment of a DECL can be forced to be aligned
1438 on ALIGNMENT bit boundary. */
1439
1440static bool
1441vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1442{
1443 if (TREE_CODE (decl) != VAR_DECL)
1444 return false;
1445
1446 if (DECL_EXTERNAL (decl))
1447 return false;
1448
1449 if (TREE_STATIC (decl))
1450 return (alignment <= MAX_OFILE_ALIGNMENT);
1451 else
7a8554ce
DN
1452 /* This is not 100% correct. The absolute correct stack alignment
1453 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1454 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1455 However, until someone implements forced stack alignment, SSE
1456 isn't really usable without this. */
1457 return (alignment <= PREFERRED_STACK_BOUNDARY);
79fe1b3b
DN
1458}
1459
1460
1461/* Function vect_get_new_vect_var.
1462
1463 Returns a name for a new variable. The current naming scheme appends the
1464 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1465 the name of vectorizer generated variables, and appends that to NAME if
1466 provided. */
1467
1468static tree
1469vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1470{
1471 const char *prefix;
1472 int prefix_len;
1473 tree new_vect_var;
1474
1475 if (var_kind == vect_simple_var)
1476 prefix = "vect_";
1477 else
1478 prefix = "vect_p";
1479
1480 prefix_len = strlen (prefix);
1481
1482 if (name)
1483 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1484 else
1485 new_vect_var = create_tmp_var (type, prefix);
1486
1487 return new_vect_var;
1488}
1489
1490
6775f1f3 1491/* Function vect_create_index_for_vector_ref.
79fe1b3b
DN
1492
1493 Create (and return) an index variable, along with it's update chain in the
1494 loop. This variable will be used to access a memory location in a vector
1495 operation.
1496
1497 Input:
6775f1f3 1498 LOOP: The loop being vectorized.
79fe1b3b
DN
1499 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1500 function can be added here, or in the loop pre-header.
1501
6775f1f3
IR
1502 Output:
1503 Return an index that will be used to index a vector array. It is expected
1504 that a pointer to the first vector will be used as the base address for the
1505 indexed reference.
1506
1507 FORNOW: we are not trying to be efficient, just creating a new index each
1508 time from scratch. At this time all vector references could use the same
1509 index.
1510
1511 TODO: create only one index to be used by all vector references. Record
1512 the index in the LOOP_VINFO the first time this procedure is called and
1513 return it on subsequent calls. The increment of this index must be placed
1514 just before the conditional expression that ends the single block loop. */
79fe1b3b
DN
1515
1516static tree
6775f1f3 1517vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
79fe1b3b 1518{
79fe1b3b 1519 tree init, step;
79fe1b3b 1520 tree indx_before_incr, indx_after_incr;
79fe1b3b 1521
6775f1f3
IR
1522 /* It is assumed that the base pointer used for vectorized access contains
1523 the address of the first vector. Therefore the index used for vectorized
1524 access must be initialized to zero and incremented by 1. */
79fe1b3b 1525
6775f1f3
IR
1526 init = integer_zero_node;
1527 step = integer_one_node;
1528
1529 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1530 create_iv (init, step, NULL_TREE, loop, bsi, false,
1531 &indx_before_incr, &indx_after_incr);
79fe1b3b 1532
6775f1f3
IR
1533 return indx_before_incr;
1534}
79fe1b3b 1535
79fe1b3b 1536
6775f1f3 1537/* Function vect_create_addr_base_for_vector_ref.
79fe1b3b 1538
6775f1f3
IR
1539 Create an expression that computes the address of the first memory location
1540 that will be accessed for a data reference.
79fe1b3b 1541
6775f1f3
IR
1542 Input:
1543 STMT: The statement containing the data reference.
7ccf35ed
DN
1544 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1545 OFFSET: Optional. If supplied, it is be added to the initial address.
79fe1b3b 1546
6775f1f3 1547 Output:
a023975e
OG
1548 1. Return an SSA_NAME whose value is the address of the memory location of
1549 the first vector of the data reference.
6775f1f3
IR
1550 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1551 these statement(s) which define the returned SSA_NAME.
79fe1b3b 1552
6775f1f3 1553 FORNOW: We are only handling array accesses with step 1. */
79fe1b3b 1554
6775f1f3
IR
1555static tree
1556vect_create_addr_base_for_vector_ref (tree stmt,
7ccf35ed
DN
1557 tree *new_stmt_list,
1558 tree offset)
6775f1f3
IR
1559{
1560 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1561 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1562 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1563 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1564 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1565 tree ref = DR_REF (dr);
1566 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1567 tree scalar_type = TREE_TYPE (ref);
1568 tree scalar_ptr_type = build_pointer_type (scalar_type);
1569 tree access_fn;
1570 tree init_val, step, init_oval;
1571 bool ok;
1572 bool is_ptr_ref, is_array_ref, is_addr_expr;
1573 tree array_base;
1574 tree vec_stmt;
1575 tree new_temp;
1576 tree array_ref;
1577 tree addr_base, addr_expr;
1578 tree dest, new_stmt;
79fe1b3b 1579
6775f1f3
IR
1580 /* Only the access function of the last index is relevant (i_n in
1581 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1582 access_fn = DR_ACCESS_FN (dr, 0);
a023975e
OG
1583 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1584 true);
6775f1f3
IR
1585 if (!ok)
1586 init_oval = integer_zero_node;
1587
1588 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1589 && TREE_CODE (data_ref_base) == SSA_NAME;
322ae40b 1590 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
6775f1f3
IR
1591 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1592 || TREE_CODE (data_ref_base) == PLUS_EXPR
1593 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1594 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1595
1596 /** Create: &(base[init_val])
1597
1598 if data_ref_base is an ARRAY_TYPE:
1599 base = data_ref_base
1600
1601 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1602 base = *((scalar_array *) data_ref_base)
1603 **/
1604
1605 if (is_array_ref)
1606 array_base = data_ref_base;
1607 else /* is_ptr_ref or is_addr_expr */
1608 {
1609 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1610 tree scalar_array_type = build_array_type (scalar_type, 0);
1611 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1612 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1613 add_referenced_tmp_var (array_ptr);
1614
1615 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1616 add_referenced_tmp_var (dest);
7ccf35ed
DN
1617 data_ref_base =
1618 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
6775f1f3
IR
1619 append_to_statement_list_force (new_stmt, new_stmt_list);
1620
1621 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1622 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1623 new_temp = make_ssa_name (array_ptr, vec_stmt);
1624 TREE_OPERAND (vec_stmt, 0) = new_temp;
1625 append_to_statement_list_force (vec_stmt, new_stmt_list);
1626
1627 /* (*array_ptr) */
1628 array_base = build_fold_indirect_ref (new_temp);
1629 }
1630
1631 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1632 add_referenced_tmp_var (dest);
1633 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1634 append_to_statement_list_force (new_stmt, new_stmt_list);
1635
7ccf35ed
DN
1636 if (offset)
1637 {
1638 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1639 add_referenced_tmp_var (tmp);
1640 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1641 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1642 init_val = make_ssa_name (tmp, vec_stmt);
1643 TREE_OPERAND (vec_stmt, 0) = init_val;
1644 append_to_statement_list_force (vec_stmt, new_stmt_list);
1645 }
1646
6775f1f3
IR
1647 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1648 NULL_TREE, NULL_TREE);
1649 addr_base = build_fold_addr_expr (array_ref);
1650
1651 /* addr_expr = addr_base */
1652 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1653 get_name (base_name));
1654 add_referenced_tmp_var (addr_expr);
1655 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1656 new_temp = make_ssa_name (addr_expr, vec_stmt);
1657 TREE_OPERAND (vec_stmt, 0) = new_temp;
1658 append_to_statement_list_force (vec_stmt, new_stmt_list);
7ccf35ed 1659
6775f1f3 1660 return new_temp;
79fe1b3b
DN
1661}
1662
1663
1664/* Function get_vectype_for_scalar_type.
1665
1666 Returns the vector type corresponding to SCALAR_TYPE as supported
1667 by the target. */
1668
1669static tree
1670get_vectype_for_scalar_type (tree scalar_type)
1671{
1672 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1673 int nbytes = GET_MODE_SIZE (inner_mode);
1674 int nunits;
6775f1f3 1675 tree vectype;
79fe1b3b
DN
1676
1677 if (nbytes == 0)
1678 return NULL_TREE;
1679
1680 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1681 is expected. */
1682 nunits = UNITS_PER_SIMD_WORD / nbytes;
1683
6775f1f3 1684 vectype = build_vector_type (scalar_type, nunits);
f0923257
DN
1685 if (vect_debug_details (NULL))
1686 {
1687 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1688 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1689 }
1690
1691 if (!vectype)
6775f1f3 1692 return NULL_TREE;
f0923257
DN
1693
1694 if (vect_debug_details (NULL))
1695 {
1696 fprintf (dump_file, "vectype: ");
1697 print_generic_expr (dump_file, vectype, TDF_SLIM);
1698 }
1699
1700 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1701 {
1702 /* TODO: tree-complex.c sometimes can parallelize operations
1703 on generic vectors. We can vectorize the loop in that case,
1704 but then we should re-run the lowering pass. */
1705 if (vect_debug_details (NULL))
1706 fprintf (dump_file, "mode not supported by target.");
1707 return NULL_TREE;
1708 }
1709
6775f1f3 1710 return vectype;
79fe1b3b
DN
1711}
1712
1713
1714/* Function vect_align_data_ref.
1715
1716 Handle mislignment of a memory accesses.
1717
1718 FORNOW: Can't handle misaligned accesses.
1719 Make sure that the dataref is aligned. */
1720
1721static void
1722vect_align_data_ref (tree stmt)
1723{
1724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1725 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1726
1727 /* FORNOW: can't handle misaligned accesses;
1728 all accesses expected to be aligned. */
1e128c5f 1729 gcc_assert (aligned_access_p (dr));
79fe1b3b
DN
1730}
1731
1732
7ccf35ed 1733/* Function vect_create_data_ref_ptr.
79fe1b3b
DN
1734
1735 Create a memory reference expression for vector access, to be used in a
7ccf35ed
DN
1736 vector load/store stmt. The reference is based on a new pointer to vector
1737 type (vp).
79fe1b3b
DN
1738
1739 Input:
7ccf35ed
DN
1740 1. STMT: a stmt that references memory. Expected to be of the form
1741 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1742 2. BSI: block_stmt_iterator where new stmts can be added.
1743 3. OFFSET (optional): an offset to be added to the initial address accessed
1744 by the data-ref in STMT.
1745 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1746 pointing to the initial address.
79fe1b3b
DN
1747
1748 Output:
7ccf35ed
DN
1749 1. Declare a new ptr to vector_type, and have it point to the base of the
1750 data reference (initial addressed accessed by the data reference).
1751 For example, for vector of type V8HI, the following code is generated:
1752
1753 v8hi *vp;
1754 vp = (v8hi *)initial_address;
1755
1756 if OFFSET is not supplied:
1757 initial_address = &a[init];
1758 if OFFSET is supplied:
1759 initial_address = &a[init + OFFSET];
1760
1761 Return the initial_address in INITIAL_ADDRESS.
1762
1763 2. Create a data-reference in the loop based on the new vector pointer vp,
1764 and using a new index variable 'idx' as follows:
1765
1766 vp' = vp + update
1767
1768 where if ONLY_INIT is true:
1769 update = zero
1770 and otherwise
1771 update = idx + vector_type_size
1772
1773 Return the pointer vp'.
1774
79fe1b3b
DN
1775
1776 FORNOW: handle only aligned and consecutive accesses. */
1777
1778static tree
7ccf35ed
DN
1779vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1780 tree *initial_address, bool only_init)
79fe1b3b 1781{
7ccf35ed 1782 tree base_name;
79fe1b3b 1783 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6775f1f3
IR
1784 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1785 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
79fe1b3b
DN
1786 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1787 tree vect_ptr_type;
1788 tree vect_ptr;
79fe1b3b 1789 tree tag;
6775f1f3
IR
1790 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1791 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1792 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1793 int nvuses, nv_may_defs, nv_must_defs;
1794 int i;
1795 tree new_temp;
1796 tree vec_stmt;
1797 tree new_stmt_list = NULL_TREE;
1798 tree idx;
7ccf35ed 1799 edge pe = loop_preheader_edge (loop);
6775f1f3 1800 basic_block new_bb;
7ccf35ed
DN
1801 tree vect_ptr_init;
1802 tree vectype_size;
1803 tree ptr_update;
1804 tree data_ref_ptr;
79fe1b3b 1805
6775f1f3 1806 base_name = unshare_expr (DR_BASE_NAME (dr));
79fe1b3b
DN
1807 if (vect_debug_details (NULL))
1808 {
7ccf35ed 1809 tree data_ref_base = base_name;
79fe1b3b
DN
1810 fprintf (dump_file, "create array_ref of type: ");
1811 print_generic_expr (dump_file, vectype, TDF_SLIM);
6775f1f3 1812 if (TREE_CODE (data_ref_base) == VAR_DECL)
7ccf35ed 1813 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
6775f1f3 1814 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
7ccf35ed 1815 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
6775f1f3 1816 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
7ccf35ed 1817 fprintf (dump_file, "vectorizing a record based array ref: ");
6775f1f3 1818 else if (TREE_CODE (data_ref_base) == SSA_NAME)
7ccf35ed 1819 fprintf (dump_file, "vectorizing a pointer ref: ");
6775f1f3 1820 print_generic_expr (dump_file, base_name, TDF_SLIM);
79fe1b3b
DN
1821 }
1822
7ccf35ed
DN
1823 /** (1) Create the new vector-pointer variable: **/
1824
1825 vect_ptr_type = build_pointer_type (vectype);
1826 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1827 get_name (base_name));
1828 add_referenced_tmp_var (vect_ptr);
1829
1830
1831 /** (2) Handle aliasing information of the new vector-pointer: **/
1832
79fe1b3b 1833 tag = STMT_VINFO_MEMTAG (stmt_info);
1e128c5f 1834 gcc_assert (tag);
79fe1b3b 1835 get_var_ann (vect_ptr)->type_mem_tag = tag;
7ccf35ed 1836
79fe1b3b 1837 /* Mark for renaming all aliased variables
6775f1f3
IR
1838 (i.e, the may-aliases of the type-mem-tag). */
1839 nvuses = NUM_VUSES (vuses);
1840 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1841 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1842 for (i = 0; i < nvuses; i++)
79fe1b3b 1843 {
6775f1f3 1844 tree use = VUSE_OP (vuses, i);
79fe1b3b
DN
1845 if (TREE_CODE (use) == SSA_NAME)
1846 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1847 }
6775f1f3
IR
1848 for (i = 0; i < nv_may_defs; i++)
1849 {
1850 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1851 if (TREE_CODE (def) == SSA_NAME)
1852 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1853 }
1854 for (i = 0; i < nv_must_defs; i++)
1855 {
1856 tree def = V_MUST_DEF_OP (v_must_defs, i);
1857 if (TREE_CODE (def) == SSA_NAME)
1858 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1859 }
79fe1b3b 1860
79fe1b3b 1861
7ccf35ed
DN
1862 /** (3) Calculate the initial address the vector-pointer, and set
1863 the vector-pointer to point to it before the loop: **/
79fe1b3b 1864
7ccf35ed
DN
1865 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1866 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1867 offset);
1868 pe = loop_preheader_edge (loop);
1869 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1870 gcc_assert (!new_bb);
1871 *initial_address = new_temp;
1872
1873 /* Create: p = (vectype *) initial_base */
6775f1f3 1874 vec_stmt = fold_convert (vect_ptr_type, new_temp);
79fe1b3b
DN
1875 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1876 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1877 TREE_OPERAND (vec_stmt, 0) = new_temp;
7ccf35ed
DN
1878 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1879 gcc_assert (!new_bb);
1880 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1881
1882
1883 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1884
1885 if (only_init) /* No update in loop is required. */
1886 return vect_ptr_init;
79fe1b3b 1887
6775f1f3 1888 idx = vect_create_index_for_vector_ref (loop, bsi);
79fe1b3b 1889
7ccf35ed
DN
1890 /* Create: update = idx * vectype_size */
1891 ptr_update = create_tmp_var (integer_type_node, "update");
1892 add_referenced_tmp_var (ptr_update);
1893 vectype_size = build_int_cst (integer_type_node,
1894 GET_MODE_SIZE (TYPE_MODE (vectype)));
1895 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1896 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1897 new_temp = make_ssa_name (ptr_update, vec_stmt);
1898 TREE_OPERAND (vec_stmt, 0) = new_temp;
1899 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
79fe1b3b 1900
7ccf35ed
DN
1901 /* Create: data_ref_ptr = vect_ptr_init + update */
1902 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1903 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1904 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1905 TREE_OPERAND (vec_stmt, 0) = new_temp;
1906 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1907 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1908
1909 return data_ref_ptr;
79fe1b3b
DN
1910}
1911
1912
1913/* Function vect_create_destination_var.
1914
1915 Create a new temporary of type VECTYPE. */
1916
1917static tree
1918vect_create_destination_var (tree scalar_dest, tree vectype)
1919{
1920 tree vec_dest;
1921 const char *new_name;
1922
1e128c5f 1923 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
79fe1b3b
DN
1924
1925 new_name = get_name (scalar_dest);
1926 if (!new_name)
1927 new_name = "var_";
1928 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1929 add_referenced_tmp_var (vec_dest);
1930
1931 return vec_dest;
1932}
1933
1934
1935/* Function vect_init_vector.
1936
1937 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1938 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1939 used in the vectorization of STMT. */
1940
1941static tree
1942vect_init_vector (tree stmt, tree vector_var)
1943{
1944 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1945 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1946 tree new_var;
1947 tree init_stmt;
1948 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1949 tree vec_oprnd;
1950 edge pe;
1951 tree new_temp;
6775f1f3 1952 basic_block new_bb;
79fe1b3b
DN
1953
1954 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1955 add_referenced_tmp_var (new_var);
1956
1957 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1958 new_temp = make_ssa_name (new_var, init_stmt);
1959 TREE_OPERAND (init_stmt, 0) = new_temp;
1960
1961 pe = loop_preheader_edge (loop);
6775f1f3
IR
1962 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1963 gcc_assert (!new_bb);
79fe1b3b
DN
1964
1965 if (vect_debug_details (NULL))
1966 {
1967 fprintf (dump_file, "created new init_stmt: ");
1968 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1969 }
1970
1971 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1972 return vec_oprnd;
1973}
1974
1975
1976/* Function vect_get_vec_def_for_operand.
1977
1978 OP is an operand in STMT. This function returns a (vector) def that will be
1979 used in the vectorized stmt for STMT.
1980
1981 In the case that OP is an SSA_NAME which is defined in the loop, then
1982 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1983
1984 In case OP is an invariant or constant, a new stmt that creates a vector def
1985 needs to be introduced. */
1986
1987static tree
1988vect_get_vec_def_for_operand (tree op, tree stmt)
1989{
1990 tree vec_oprnd;
1991 tree vec_stmt;
1992 tree def_stmt;
1993 stmt_vec_info def_stmt_info = NULL;
1994 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1995 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1996 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1997 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1998 basic_block bb;
1999 tree vec_inv;
2000 tree t = NULL_TREE;
2001 tree def;
2002 int i;
2003
2004 if (vect_debug_details (NULL))
2005 {
2006 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2007 print_generic_expr (dump_file, op, TDF_SLIM);
2008 }
2009
2010 /** ===> Case 1: operand is a constant. **/
2011
2012 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2013 {
2014 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2015
2016 tree vec_cst;
79fe1b3b
DN
2017
2018 /* Build a tree with vector elements. */
2019 if (vect_debug_details (NULL))
2020 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2021
2022 for (i = nunits - 1; i >= 0; --i)
2023 {
2024 t = tree_cons (NULL_TREE, op, t);
2025 }
2026 vec_cst = build_vector (vectype, t);
2027 return vect_init_vector (stmt, vec_cst);
2028 }
2029
1e128c5f 2030 gcc_assert (TREE_CODE (op) == SSA_NAME);
79fe1b3b
DN
2031
2032 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2033
2034 def_stmt = SSA_NAME_DEF_STMT (op);
2035 def_stmt_info = vinfo_for_stmt (def_stmt);
2036
2037 if (vect_debug_details (NULL))
2038 {
2039 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2040 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2041 }
2042
2043
2044 /** ==> Case 2.1: operand is defined inside the loop. **/
2045
2046 if (def_stmt_info)
2047 {
2048 /* Get the def from the vectorized stmt. */
2049
2050 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1e128c5f 2051 gcc_assert (vec_stmt);
79fe1b3b
DN
2052 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2053 return vec_oprnd;
2054 }
2055
2056
2057 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2058 it is a reduction/induction. **/
2059
2060 bb = bb_for_stmt (def_stmt);
2061 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2062 {
2063 if (vect_debug_details (NULL))
2064 fprintf (dump_file, "reduction/induction - unsupported.");
1e128c5f 2065 internal_error ("no support for reduction/induction"); /* FORNOW */
79fe1b3b
DN
2066 }
2067
2068
2069 /** ==> Case 2.3: operand is defined outside the loop -
2070 it is a loop invariant. */
2071
2072 switch (TREE_CODE (def_stmt))
2073 {
2074 case PHI_NODE:
2075 def = PHI_RESULT (def_stmt);
2076 break;
2077 case MODIFY_EXPR:
2078 def = TREE_OPERAND (def_stmt, 0);
2079 break;
2080 case NOP_EXPR:
2081 def = TREE_OPERAND (def_stmt, 0);
1e128c5f 2082 gcc_assert (IS_EMPTY_STMT (def_stmt));
79fe1b3b
DN
2083 def = op;
2084 break;
2085 default:
2086 if (vect_debug_details (NULL))
2087 {
2088 fprintf (dump_file, "unsupported defining stmt: ");
2089 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2090 }
1e128c5f 2091 internal_error ("unsupported defining stmt");
79fe1b3b
DN
2092 }
2093
2094 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2095
2096 if (vect_debug_details (NULL))
2097 fprintf (dump_file, "Create vector_inv.");
2098
2099 for (i = nunits - 1; i >= 0; --i)
2100 {
2101 t = tree_cons (NULL_TREE, def, t);
2102 }
2103
2104 vec_inv = build_constructor (vectype, t);
2105 return vect_init_vector (stmt, vec_inv);
2106}
2107
2108
2109/* Function vect_finish_stmt_generation.
2110
2111 Insert a new stmt. */
2112
2113static void
2114vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2115{
2116 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2117
2118 if (vect_debug_details (NULL))
2119 {
2120 fprintf (dump_file, "add new stmt: ");
2121 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2122 }
2123
2124 /* Make sure bsi points to the stmt that is being vectorized. */
2125
7ccf35ed
DN
2126 /* Assumption: any stmts created for the vectorization of stmt S were
2127 inserted before S. BSI is expected to point to S or some new stmt before S. */
79fe1b3b
DN
2128
2129 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2130 bsi_next (bsi);
1e128c5f 2131 gcc_assert (stmt == bsi_stmt (*bsi));
79fe1b3b
DN
2132}
2133
2134
2135/* Function vectorizable_assignment.
2136
2137 Check if STMT performs an assignment (copy) that can be vectorized.
2138 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2139 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2140 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2141
2142static bool
2143vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2144{
2145 tree vec_dest;
2146 tree scalar_dest;
2147 tree op;
2148 tree vec_oprnd;
2149 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2150 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2151 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2152 tree new_temp;
2153
2154 /* Is vectorizable assignment? */
2155
2156 if (TREE_CODE (stmt) != MODIFY_EXPR)
2157 return false;
2158
2159 scalar_dest = TREE_OPERAND (stmt, 0);
2160 if (TREE_CODE (scalar_dest) != SSA_NAME)
2161 return false;
2162
2163 op = TREE_OPERAND (stmt, 1);
2164 if (!vect_is_simple_use (op, loop, NULL))
2165 {
2166 if (vect_debug_details (NULL))
2167 fprintf (dump_file, "use not simple.");
2168 return false;
2169 }
2170
2171 if (!vec_stmt) /* transformation not required. */
2172 {
2173 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2174 return true;
2175 }
2176
2177 /** Trasform. **/
2178 if (vect_debug_details (NULL))
2179 fprintf (dump_file, "transform assignment.");
2180
2181 /* Handle def. */
2182 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2183
2184 /* Handle use. */
2185 op = TREE_OPERAND (stmt, 1);
2186 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2187
2188 /* Arguments are ready. create the new vector stmt. */
2189 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2190 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2191 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2192 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2193
2194 return true;
2195}
2196
2197
2198/* Function vectorizable_operation.
2199
2200 Check if STMT performs a binary or unary operation that can be vectorized.
2201 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2202 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2203 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2204
2205static bool
2206vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2207{
2208 tree vec_dest;
2209 tree scalar_dest;
2210 tree operation;
2211 tree op0, op1 = NULL;
2212 tree vec_oprnd0, vec_oprnd1=NULL;
2213 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2214 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2215 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2216 int i;
2217 enum tree_code code;
2218 enum machine_mode vec_mode;
2219 tree new_temp;
2220 int op_type;
2221 tree op;
2222 optab optab;
2223
2224 /* Is STMT a vectorizable binary/unary operation? */
2225 if (TREE_CODE (stmt) != MODIFY_EXPR)
2226 return false;
2227
2228 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2229 return false;
2230
2231 operation = TREE_OPERAND (stmt, 1);
2232 code = TREE_CODE (operation);
2233 optab = optab_for_tree_code (code, vectype);
2234
2235 /* Support only unary or binary operations. */
2236 op_type = TREE_CODE_LENGTH (code);
2237 if (op_type != unary_op && op_type != binary_op)
2238 {
2239 if (vect_debug_details (NULL))
2240 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2241 return false;
2242 }
2243
2244 for (i = 0; i < op_type; i++)
2245 {
2246 op = TREE_OPERAND (operation, i);
2247 if (!vect_is_simple_use (op, loop, NULL))
2248 {
2249 if (vect_debug_details (NULL))
2250 fprintf (dump_file, "use not simple.");
2251 return false;
2252 }
2253 }
2254
2255 /* Supportable by target? */
2256 if (!optab)
2257 {
2258 if (vect_debug_details (NULL))
2259 fprintf (dump_file, "no optab.");
2260 return false;
2261 }
2262 vec_mode = TYPE_MODE (vectype);
2263 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2264 {
2265 if (vect_debug_details (NULL))
2266 fprintf (dump_file, "op not supported by target.");
2267 return false;
2268 }
2269
2270 if (!vec_stmt) /* transformation not required. */
2271 {
2272 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2273 return true;
2274 }
2275
02ca1718 2276 /** Transform. **/
79fe1b3b
DN
2277
2278 if (vect_debug_details (NULL))
2279 fprintf (dump_file, "transform binary/unary operation.");
2280
2281 /* Handle def. */
2282 scalar_dest = TREE_OPERAND (stmt, 0);
2283 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2284
2285 /* Handle uses. */
2286 op0 = TREE_OPERAND (operation, 0);
2287 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2288
2289 if (op_type == binary_op)
2290 {
2291 op1 = TREE_OPERAND (operation, 1);
2292 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2293 }
2294
2295 /* Arguments are ready. create the new vector stmt. */
2296
2297 if (op_type == binary_op)
2298 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2299 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2300 else
2301 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2302 build1 (code, vectype, vec_oprnd0));
2303 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2304 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2305 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2306
2307 return true;
2308}
2309
2310
2311/* Function vectorizable_store.
2312
2313 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2314 can be vectorized.
2315 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2316 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2317 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2318
2319static bool
2320vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2321{
2322 tree scalar_dest;
2323 tree data_ref;
2324 tree op;
2325 tree vec_oprnd1;
2326 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2327 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2328 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2329 enum machine_mode vec_mode;
7ccf35ed 2330 tree dummy;
79fe1b3b
DN
2331
2332 /* Is vectorizable store? */
2333
2334 if (TREE_CODE (stmt) != MODIFY_EXPR)
2335 return false;
2336
2337 scalar_dest = TREE_OPERAND (stmt, 0);
2338 if (TREE_CODE (scalar_dest) != ARRAY_REF
2339 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2340 return false;
2341
2342 op = TREE_OPERAND (stmt, 1);
2343 if (!vect_is_simple_use (op, loop, NULL))
2344 {
2345 if (vect_debug_details (NULL))
2346 fprintf (dump_file, "use not simple.");
2347 return false;
2348 }
2349
2350 vec_mode = TYPE_MODE (vectype);
2351 /* FORNOW. In some cases can vectorize even if data-type not supported
2352 (e.g. - array initialization with 0). */
2353 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2354 return false;
2355
2356 if (!STMT_VINFO_DATA_REF (stmt_info))
2357 return false;
2358
7ccf35ed 2359
79fe1b3b
DN
2360 if (!vec_stmt) /* transformation not required. */
2361 {
2362 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2363 return true;
2364 }
2365
2366 /** Trasform. **/
2367
2368 if (vect_debug_details (NULL))
2369 fprintf (dump_file, "transform store");
2370
2371 /* Handle use - get the vectorized def from the defining stmt. */
2372 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2373
2374 /* Handle def. */
7ccf35ed
DN
2375 /* FORNOW: make sure the data reference is aligned. */
2376 vect_align_data_ref (stmt);
2377 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2378 data_ref = build_fold_indirect_ref (data_ref);
79fe1b3b
DN
2379
2380 /* Arguments are ready. create the new vector stmt. */
2381 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2382 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2383
2384 return true;
2385}
2386
2387
2388/* vectorizable_load.
2389
2390 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2391 can be vectorized.
2392 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2393 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2394 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2395
2396static bool
2397vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2398{
2399 tree scalar_dest;
2400 tree vec_dest = NULL;
2401 tree data_ref = NULL;
2402 tree op;
2403 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7ccf35ed 2404 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
79fe1b3b
DN
2405 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2406 tree new_temp;
7ccf35ed
DN
2407 int mode;
2408 tree init_addr;
2409 tree new_stmt;
2410 tree dummy;
2411 basic_block new_bb;
2412 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2413 edge pe = loop_preheader_edge (loop);
2414 bool software_pipeline_loads_p = false;
79fe1b3b
DN
2415
2416 /* Is vectorizable load? */
2417
2418 if (TREE_CODE (stmt) != MODIFY_EXPR)
2419 return false;
2420
2421 scalar_dest = TREE_OPERAND (stmt, 0);
2422 if (TREE_CODE (scalar_dest) != SSA_NAME)
2423 return false;
2424
2425 op = TREE_OPERAND (stmt, 1);
2426 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2427 return false;
2428
2429 if (!STMT_VINFO_DATA_REF (stmt_info))
2430 return false;
2431
7ccf35ed
DN
2432 mode = (int) TYPE_MODE (vectype);
2433
79fe1b3b 2434 /* FORNOW. In some cases can vectorize even if data-type not supported
7ccf35ed
DN
2435 (e.g. - data copies). */
2436 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2437 {
2438 if (vect_debug_details (loop))
2439 fprintf (dump_file, "Aligned load, but unsupported type.");
2440 return false;
2441 }
2442
2443 if (!aligned_access_p (dr))
2444 {
2445 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2446 && (!targetm.vectorize.builtin_mask_for_load
2447 || targetm.vectorize.builtin_mask_for_load ()))
2448 software_pipeline_loads_p = true;
2449 else if (!targetm.vectorize.misaligned_mem_ok (mode))
2450 {
a023975e
OG
2451 /* Possibly unaligned access, and can't software pipeline the loads.
2452 */
7ccf35ed
DN
2453 if (vect_debug_details (loop))
2454 fprintf (dump_file, "Arbitrary load not supported.");
2455 return false;
2456 }
2457 }
79fe1b3b
DN
2458
2459 if (!vec_stmt) /* transformation not required. */
2460 {
2461 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2462 return true;
2463 }
2464
2465 /** Trasform. **/
2466
2467 if (vect_debug_details (NULL))
2468 fprintf (dump_file, "transform load.");
2469
7ccf35ed
DN
2470 if (!software_pipeline_loads_p)
2471 {
2472 /* Create:
2473 p = initial_addr;
2474 indx = 0;
2475 loop {
2476 vec_dest = *(p);
2477 indx = indx + 1;
2478 }
2479 */
2480
2481 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2482 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2483 if (aligned_access_p (dr))
2484 data_ref = build_fold_indirect_ref (data_ref);
2485 else
2486 {
2487 int mis = DR_MISALIGNMENT (dr);
2488 tree tmis = (mis == -1 ?
2489 integer_zero_node :
2490 build_int_cst (integer_type_node, mis));
2491 tmis = int_const_binop (MULT_EXPR, tmis,
2492 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2493 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2494 }
2495 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2496 new_temp = make_ssa_name (vec_dest, new_stmt);
2497 TREE_OPERAND (new_stmt, 0) = new_temp;
2498 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2499 }
2500 else /* software-pipeline the loads */
2501 {
2502 /* Create:
2503 p1 = initial_addr;
2504 msq_init = *(floor(p1))
2505 p2 = initial_addr + VS - 1;
2506 magic = have_builtin ? builtin_result : initial_address;
2507 indx = 0;
2508 loop {
2509 p2' = p2 + indx * vectype_size
2510 lsq = *(floor(p2'))
2511 vec_dest = realign_load (msq, lsq, magic)
2512 indx = indx + 1;
2513 msq = lsq;
2514 }
2515 */
2516
2517 tree offset;
2518 tree magic;
2519 tree phi_stmt;
2520 tree msq_init;
2521 tree msq, lsq;
2522 tree dataref_ptr;
2523 tree params;
2524
2525 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2526 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2527 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2528 &init_addr, true);
2529 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2530 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2531 new_temp = make_ssa_name (vec_dest, new_stmt);
2532 TREE_OPERAND (new_stmt, 0) = new_temp;
2533 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2534 gcc_assert (!new_bb);
2535 msq_init = TREE_OPERAND (new_stmt, 0);
2536
2537
2538 /* <2> Create lsq = *(floor(p2')) in the loop */
2539 offset = build_int_cst (integer_type_node,
2540 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2541 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2542 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2543 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2544 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2545 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2546 new_temp = make_ssa_name (vec_dest, new_stmt);
2547 TREE_OPERAND (new_stmt, 0) = new_temp;
2548 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2549 lsq = TREE_OPERAND (new_stmt, 0);
2550
2551
2552 /* <3> */
2553 if (targetm.vectorize.builtin_mask_for_load)
2554 {
2555 /* Create permutation mask, if required, in loop preheader. */
2556 tree builtin_decl;
2557 params = build_tree_list (NULL_TREE, init_addr);
2558 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2559 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2560 new_stmt = build_function_call_expr (builtin_decl, params);
2561 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2562 new_temp = make_ssa_name (vec_dest, new_stmt);
2563 TREE_OPERAND (new_stmt, 0) = new_temp;
2564 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2565 gcc_assert (!new_bb);
2566 magic = TREE_OPERAND (new_stmt, 0);
2567 }
2568 else
2569 {
a023975e
OG
2570 /* Use current address instead of init_addr for reduced reg pressure.
2571 */
7ccf35ed
DN
2572 magic = dataref_ptr;
2573 }
79fe1b3b 2574
79fe1b3b 2575
7ccf35ed
DN
2576 /* <4> Create msq = phi <msq_init, lsq> in loop */
2577 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2578 msq = make_ssa_name (vec_dest, NULL_TREE);
2579 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2580 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2581 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2582 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2583
79fe1b3b 2584
a023975e
OG
2585 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2586 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2587 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2588 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2589 new_temp = make_ssa_name (vec_dest, new_stmt);
2590 TREE_OPERAND (new_stmt, 0) = new_temp;
2591 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2592 }
2593
2594 *vec_stmt = new_stmt;
2595 return true;
2596}
2597
2598
2599/* Function vect_transform_stmt.
2600
2601 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2602
2603static bool
2604vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2605{
2606 bool is_store = false;
2607 tree vec_stmt = NULL_TREE;
2608 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2609 bool done;
2610
2611 switch (STMT_VINFO_TYPE (stmt_info))
2612 {
2613 case op_vec_info_type:
2614 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2615 gcc_assert (done);
2616 break;
2617
2618 case assignment_vec_info_type:
2619 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2620 gcc_assert (done);
2621 break;
2622
2623 case load_vec_info_type:
2624 done = vectorizable_load (stmt, bsi, &vec_stmt);
2625 gcc_assert (done);
2626 break;
2627
2628 case store_vec_info_type:
2629 done = vectorizable_store (stmt, bsi, &vec_stmt);
2630 gcc_assert (done);
2631 is_store = true;
2632 break;
2633 default:
2634 if (vect_debug_details (NULL))
2635 fprintf (dump_file, "stmt not supported.");
2636 gcc_unreachable ();
2637 }
2638
2639 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2640
2641 return is_store;
2642}
2643
2644
2645/* This function builds ni_name = number of iterations loop executes
2646 on the loop preheader. */
2647
2648static tree
2649vect_build_loop_niters (loop_vec_info loop_vinfo)
2650{
2651 tree ni_name, stmt, var;
2652 edge pe;
2653 basic_block new_bb;
2654 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2655 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2656
2657 var = create_tmp_var (TREE_TYPE (ni), "niters");
2658 add_referenced_tmp_var (var);
2659 if (TREE_CODE (ni) == INTEGER_CST)
2660 {
2661 /* This case is generated when treating a known loop bound
2662 indivisible by VF. Here we cannot use force_gimple_operand. */
2663 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2664 ni_name = make_ssa_name (var, stmt);
2665 TREE_OPERAND (stmt, 0) = ni_name;
2666 }
2667 else
2668 ni_name = force_gimple_operand (ni, &stmt, false, var);
2669
2670 pe = loop_preheader_edge (loop);
2671 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2672 if (new_bb)
2673 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2674
2675 return ni_name;
2676}
2677
2678
2679/* This function generates the following statements:
2680
2681 ni_name = number of iterations loop executes
2682 ratio = ni_name / vf
2683 ratio_mult_vf_name = ratio * vf
2684
2685 and places them at the loop preheader edge. */
2686
2687static void
2688vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2689 tree *ratio_mult_vf_name_p, tree *ratio_p)
2690{
2691
2692 edge pe;
2693 basic_block new_bb;
2694 tree stmt, ni_name;
2695 tree ratio;
2696 tree ratio_mult_vf_name, ratio_mult_vf;
2697 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2698 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2699
2700 int vf, i;
2701
2702 /* Generate temporary variable that contains
2703 number of iterations loop executes. */
2704
2705 ni_name = vect_build_loop_niters (loop_vinfo);
2706
2707 /* ratio = ni / vf.
2708 vf is power of 2; then if ratio = = n >> log2 (vf). */
2709 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2710 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2711
2712 /* Update initial conditions of loop copy. */
2713
2714 /* ratio_mult_vf = ratio * vf;
2715 then if ratio_mult_vf = ratio << log2 (vf). */
2716
2717 i = exact_log2 (vf);
2718 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2719 add_referenced_tmp_var (ratio_mult_vf);
2720
2721 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2722
2723 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2724 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2725 ratio, build_int_cst (unsigned_type_node,
2726 i)));
2727
2728 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2729
2730 pe = loop_preheader_edge (loop);
2731 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2732 if (new_bb)
2733 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2734
2735 *ni_name_p = ni_name;
2736 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2737 *ratio_p = ratio;
2738
2739 return;
2740}
2741
2742
2743/* This function generates stmt
2744
2745 tmp = n / vf;
2746
2747 and attaches it to preheader of LOOP. */
2748
2749static tree
2750vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2751{
2752 tree var, stmt, var_name;
2753 edge pe;
2754 basic_block new_bb;
2755 int i;
2756
2757 /* create temporary variable */
2758 var = create_tmp_var (TREE_TYPE (n), "bnd");
2759 add_referenced_tmp_var (var);
2760
2761 var_name = make_ssa_name (var, NULL_TREE);
2762
2763 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2764
2765 i = exact_log2 (vf);
2766 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2767 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2768 n, build_int_cst (unsigned_type_node,i)));
2769
2770 SSA_NAME_DEF_STMT (var_name) = stmt;
2771
2772 pe = loop_preheader_edge (loop);
2773 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2774 if (new_bb)
2775 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2776 else
2777 if (vect_debug_details (NULL))
2778 fprintf (dump_file, "New bb on preheader edge was not generated.");
2779
2780 return var_name;
2781}
2782
2783
2784/* Function vect_transform_loop_bound.
2785
2786 Create a new exit condition for the loop. */
2787
2788static void
2789vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2790{
2791 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2792 edge exit_edge = loop->single_exit;
2793 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
2794 tree indx_before_incr, indx_after_incr;
2795 tree orig_cond_expr;
2796 HOST_WIDE_INT old_N = 0;
2797 int vf;
2798 tree cond_stmt;
2799 tree new_loop_bound;
2800 bool symbol_niters;
2801 tree cond;
2802 tree lb_type;
2803
2804 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2805
2806 if (!symbol_niters)
2807 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2808
2809 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2810
2811 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2812#ifdef ENABLE_CHECKING
2813 gcc_assert (orig_cond_expr);
2814#endif
2815 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
2816
2817 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
2818 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
2819
2820 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
2821 to point to the exit condition. */
2822 bsi_next (&loop_exit_bsi);
2823 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
2824
2825 /* new loop exit test: */
2826 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
2827 if (!symbol_niters)
2828 new_loop_bound = fold_convert (lb_type,
2829 build_int_cst (unsigned_type_node,
2830 old_N/vf));
2831 else
2832 new_loop_bound = niters;
2833
2834 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
2835 cond = build2 (GE_EXPR, boolean_type_node,
2836 indx_after_incr, new_loop_bound);
2837 else /* 'then' edge loops back. */
2838 cond = build2 (LT_EXPR, boolean_type_node,
2839 indx_after_incr, new_loop_bound);
2840
2841 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
2842 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
2843
2844 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
2845
2846 /* remove old loop exit test: */
2847 bsi_remove (&loop_exit_bsi);
2848
2849 if (vect_debug_details (NULL))
2850 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
2851}
2852
2853
2854/* Advance IVs of the loop (to be vectorized later) to correct position.
2855
2856 When loop is vectorized, its IVs are not always advanced
2857 correctly since vectorization changes the loop count. It's ok
2858 in case epilog loop was not produced after original one before
2859 vectorization process (the vectorizer checks that there is no uses
2860 of IVs after the loop). However, in case the epilog loop was peeled,
2861 IVs from original loop are used in epilog loop and should be
2862 advanced correctly.
2863
2864 Here we use access functions of IVs and number of
2865 iteration loop executes in order to bring IVs to correct position.
2866
2867 Function also update phis of basic block at the exit
2868 from the loop. */
2869
2870static void
2871vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
2872{
2873 edge exit = loop->exit_edges[0];
2874 tree phi;
2875 edge latch = loop_latch_edge (loop);
2876
2877 /* Generate basic block at the exit from the loop. */
2878 basic_block new_bb = split_edge (exit);
2879 add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
2880
2881 loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
2882
2883 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
2884 {
2885 tree access_fn = NULL;
2886 tree evolution_part;
2887 tree init_expr;
2888 tree step_expr;
2889 tree var, stmt, ni, ni_name;
2890 int i, j, num_elem1, num_elem2;
2891 tree phi1;
2892 block_stmt_iterator last_bsi;
2893
2894 /* Skip virtual phi's. The data dependences that are associated with
2895 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2896
2897 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2898 {
2899 if (vect_debug_details (NULL))
2900 fprintf (dump_file, "virtual phi. skip.");
2901 continue;
2902 }
2903
2904 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2905
2906 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2907
2908 /* FORNOW: We do not transform initial conditions of IVs
2909 which evolution functions are a polynomial of degree >= 2 or
2910 exponential. */
2911
2912 step_expr = evolution_part;
2913 init_expr = initial_condition (access_fn);
2914
2915 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2916 build2 (MULT_EXPR, TREE_TYPE (niters),
2917 niters, step_expr), init_expr);
2918
2919 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2920 add_referenced_tmp_var (var);
2921
2922 ni_name = force_gimple_operand (ni, &stmt, false, var);
2923
2924 /* Insert stmt into new_bb. */
2925 last_bsi = bsi_last (new_bb);
2926 bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
2927
2928 /* Fix phi expressions in duplicated loop. */
2929 num_elem1 = PHI_NUM_ARGS (phi);
2930 for (i = 0; i < num_elem1; i++)
2931 if (PHI_ARG_EDGE (phi, i) == latch)
2932 {
2933 tree def = PHI_ARG_DEF (phi, i);
2934
2935 for (phi1 = phi_nodes (EDGE_SUCC (new_bb, 0)->dest); phi1;
2936 phi1 = TREE_CHAIN (phi1))
2937 {
2938 num_elem2 = PHI_NUM_ARGS (phi1);
2939 for (j = 0; j < num_elem2; j++)
2940 if (PHI_ARG_DEF (phi1, j) == def)
2941 {
2942 SET_PHI_ARG_DEF (phi1, j, ni_name);
2943 PHI_ARG_EDGE (phi1, j) = EDGE_SUCC (new_bb, 0);
2944 break;
2945 }
2946 }
2947 break;
2948 }
2949 }
2950
2951}
2952
2953
ed8b7558 2954/* This function is the main driver of transformation
a023975e
OG
2955 to be done for loop before vectorizing it in case of
2956 unknown loop bound. */
2957
2958static void
2959vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
2960 struct loops *loops)
2961{
2962
2963 tree ni_name, ratio_mult_vf_name;
2964#ifdef ENABLE_CHECKING
2965 int loop_num;
2966#endif
2967 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2968 struct loop *new_loop;
2969
2970 if (vect_debug_details (NULL))
2971 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2972
2973 /* Generate the following variables on the preheader of original loop:
2974
2975 ni_name = number of iteration the original loop executes
2976 ratio = ni_name / vf
2977 ratio_mult_vf_name = ratio * vf */
2978 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2979 &ratio_mult_vf_name, ratio);
2980
2981 /* Update loop info. */
2982 loop->pre_header = loop_preheader_edge (loop)->src;
2983 loop->pre_header_edges[0] = loop_preheader_edge (loop);
2984
2985#ifdef ENABLE_CHECKING
2986 loop_num = loop->num;
2987#endif
2988 new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0],
2989 ratio_mult_vf_name, ni_name, true);
2990#ifdef ENABLE_CHECKING
2991 gcc_assert (new_loop);
2992 gcc_assert (loop_num == loop->num);
2993#endif
2994
2995 /* Update IVs of original loop as if they were advanced
2996 by ratio_mult_vf_name steps. */
2997
2998#ifdef ENABLE_CHECKING
2999 /* Check existence of intermediate bb. */
3000 gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
3001#endif
3002 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
3003
3004 return;
7ccf35ed 3005
79fe1b3b
DN
3006}
3007
3008
a023975e 3009/* Function vect_gen_niters_for_prolog_loop
79fe1b3b 3010
a023975e
OG
3011 Set the number of iterations for the loop represented by LOOP_VINFO
3012 to the minimum between NITERS (the original iteration count of the loop)
3013 and the misalignment DR - the first data reference in the list
3014 LOOP_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of this
3015 loop, the data reference DR will refer to an aligned location. */
79fe1b3b 3016
a023975e
OG
3017static tree
3018vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
79fe1b3b 3019{
a023975e
OG
3020 struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
3021 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3022 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3023 tree var, stmt;
3024 tree iters, iters_name;
3025 edge pe;
3026 basic_block new_bb;
3027 tree dr_stmt = DR_STMT (dr);
3028 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3029 tree start_addr, byte_miss_align, elem_miss_align;
3030 int vec_type_align =
3031 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3032 / BITS_PER_UNIT;
3033 tree tmp1, tmp2;
3034 tree new_stmt_list = NULL_TREE;
79fe1b3b 3035
a023975e
OG
3036 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3037 &new_stmt_list, NULL_TREE);
3038
3039 pe = loop_preheader_edge (loop);
3040 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3041 if (new_bb)
3042 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3043
3044 byte_miss_align =
3045 build (BIT_AND_EXPR, integer_type_node, start_addr,
3046 build (MINUS_EXPR, integer_type_node,
3047 build_int_cst (unsigned_type_node,
3048 vec_type_align), integer_one_node));
3049 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3050 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3051 byte_miss_align, tmp1);
3052
3053 tmp2 =
3054 build (BIT_AND_EXPR, integer_type_node,
3055 build (MINUS_EXPR, integer_type_node,
3056 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3057 build (MINUS_EXPR, integer_type_node,
3058 build_int_cst (unsigned_type_node, vf), integer_one_node));
3059
3060 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3061 var = create_tmp_var (TREE_TYPE (iters), "iters");
3062 add_referenced_tmp_var (var);
3063 iters_name = force_gimple_operand (iters, &stmt, false, var);
3064
3065 /* Insert stmt on loop preheader edge. */
3066 pe = loop_preheader_edge (loop);
3067 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3068 if (new_bb)
3069 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
79fe1b3b 3070
a023975e
OG
3071 return iters_name;
3072}
79fe1b3b 3073
79fe1b3b 3074
a023975e 3075/* Function vect_update_niters_after_peeling
79fe1b3b 3076
a023975e
OG
3077 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3078 The new number of iterations is therefore original_niters - NITERS.
3079 Record the new number of iterations in LOOP_VINFO. */
79fe1b3b 3080
a023975e
OG
3081static void
3082vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters)
3083{
3084 tree n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3085 LOOP_VINFO_NITERS (loop_vinfo) =
3086 build (MINUS_EXPR, integer_type_node, n_iters, niters);
79fe1b3b
DN
3087}
3088
3089
a023975e 3090/* Function vect_update_inits_of_dr
79fe1b3b 3091
a023975e
OG
3092 NITERS iterations were peeled from LOOP. DR represents a data reference
3093 in LOOP. This function updates the information recorded in DR to
3094 account for the fact that the first NITERS iterations had already been
3095 executed. Specifically, it updates the initial_condition of the
3096 access_function of DR. */
79fe1b3b
DN
3097
3098static void
a023975e
OG
3099vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3100 tree niters)
79fe1b3b 3101{
a023975e
OG
3102 tree access_fn = DR_ACCESS_FN (dr, 0);
3103 tree init, init_new, step;
3104
3105 step = evolution_part_in_loop_num (access_fn, loop->num);
3106 init = initial_condition (access_fn);
3107
3108 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3109 build (MULT_EXPR, TREE_TYPE (niters),
3110 niters, step), init);
3111 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3112
3113 return;
3114}
79fe1b3b 3115
79fe1b3b 3116
a023975e 3117/* Function vect_update_inits_of_drs
79fe1b3b 3118
a023975e
OG
3119 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3120 This function updates the information recorded for the data references in
3121 the loop to account for the fact that the first NITERS iterations had
3122 already been executed. Specifically, it updates the initial_condition of the
3123 access_function of all the data_references in the loop. */
79fe1b3b 3124
a023975e
OG
3125static void
3126vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3127{
3128 unsigned int i;
3129 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3130 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3131 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
79fe1b3b 3132
a023975e
OG
3133 if (dump_file && (dump_flags & TDF_DETAILS))
3134 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
79fe1b3b 3135
a023975e
OG
3136 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3137 {
3138 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3139 vect_update_inits_of_dr (dr, loop, niters);
3140 }
79fe1b3b 3141
a023975e
OG
3142 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3143 {
3144 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3145 vect_update_inits_of_dr (dr, loop, niters);
3146 DR_MISALIGNMENT (dr) = -1;
3147 }
3148}
79fe1b3b 3149
79fe1b3b 3150
a023975e 3151/* Function vect_do_peeling_for_alignment
79fe1b3b 3152
a023975e
OG
3153 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3154 'niters' is set to the misalignment of one of the data references in the
3155 loop, thereby forcing it to refer to an aligned location at the beginning
3156 of the execution of this loop. The data reference for which we are
3157 peeling is chosen from LOOP_UNALIGNED_DR. */
3158
3159static void
3160vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3161{
3162 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3163 tree niters_of_prolog_loop, ni_name;
3164 struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
79fe1b3b
DN
3165
3166 if (vect_debug_details (NULL))
a023975e
OG
3167 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3168
3169 ni_name = vect_build_loop_niters (loop_vinfo);
3170 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3171
3172
3173 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3174 tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop),
3175 niters_of_prolog_loop, ni_name, false);
3176
3177
3178 /* Update stmt info of dr according to which we peeled. */
3179 DR_MISALIGNMENT (dr) = 0;
3180
3181 /* Update number of times loop executes. */
3182 vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
3183
3184 /* Update all inits of access functions of all data refs. */
3185 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3186
3187 /* After peeling we have to reset scalar evolution analyzer. */
3188 scev_reset ();
3189
3190 return;
79fe1b3b
DN
3191}
3192
3193
3194/* Function vect_transform_loop.
3195
3196 The analysis phase has determined that the loop is vectorizable.
3197 Vectorize the loop - created vectorized stmts to replace the scalar
3198 stmts in the loop, and update the loop exit condition. */
3199
3200static void
3201vect_transform_loop (loop_vec_info loop_vinfo,
3202 struct loops *loops ATTRIBUTE_UNUSED)
3203{
3204 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3205 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3206 int nbbs = loop->num_nodes;
3207 block_stmt_iterator si;
3208 int i;
a023975e 3209 tree ratio = NULL;
79fe1b3b 3210 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
79fe1b3b
DN
3211
3212 if (vect_debug_details (NULL))
3213 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3214
a023975e
OG
3215
3216 /* Peel the loop if there are data refs with unknown alignment.
3217 Only one data ref with unknown store is allowed. */
3218
3219
3220 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3221 vect_do_peeling_for_alignment (loop_vinfo, loops);
3222
3223 /* If the loop has a symbolic number of iterations 'n'
3224 (i.e. it's not a compile time constant),
3225 then an epilog loop needs to be created. We therefore duplicate
3226 the initial loop. The original loop will be vectorized, and will compute
3227 the first (n/VF) iterations. The second copy of the loop will remain
3228 serial and will compute the remaining (n%VF) iterations.
3229 (VF is the vectorization factor). */
3230
3231 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3232 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3233
3234 /* FORNOW: we'll treat the case where niters is constant and
3235
3236 niters % vf != 0
3237
3238 in the way similar to one with symbolic niters.
3239 For this we'll generate variable which value is equal to niters. */
3240
3241 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3242 && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3243 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3244
3245
79fe1b3b
DN
3246 /* 1) Make sure the loop header has exactly two entries
3247 2) Make sure we have a preheader basic block. */
3248
628f6a4e 3249 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
79fe1b3b
DN
3250
3251 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3252
3253
3254 /* FORNOW: the vectorizer supports only loops which body consist
3255 of one basic block (header + empty latch). When the vectorizer will
3256 support more involved loop forms, the order by which the BBs are
3257 traversed need to be reconsidered. */
3258
3259 for (i = 0; i < nbbs; i++)
3260 {
3261 basic_block bb = bbs[i];
3262
3263 for (si = bsi_start (bb); !bsi_end_p (si);)
3264 {
3265 tree stmt = bsi_stmt (si);
3266 stmt_vec_info stmt_info;
3267 bool is_store;
79fe1b3b
DN
3268
3269 if (vect_debug_details (NULL))
3270 {
3271 fprintf (dump_file, "------>vectorizing statement: ");
3272 print_generic_expr (dump_file, stmt, TDF_SLIM);
3273 }
3274 stmt_info = vinfo_for_stmt (stmt);
1e128c5f 3275 gcc_assert (stmt_info);
79fe1b3b
DN
3276 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3277 {
3278 bsi_next (&si);
3279 continue;
3280 }
3281#ifdef ENABLE_CHECKING
3282 /* FORNOW: Verify that all stmts operate on the same number of
3283 units and no inner unrolling is necessary. */
f0923257 3284 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
1e128c5f 3285 == vectorization_factor);
79fe1b3b
DN
3286#endif
3287 /* -------- vectorize statement ------------ */
3288 if (vect_debug_details (NULL))
3289 fprintf (dump_file, "transform statement.");
3290
3291 is_store = vect_transform_stmt (stmt, &si);
3292 if (is_store)
3293 {
3294 /* free the attached stmt_vec_info and remove the stmt. */
3295 stmt_ann_t ann = stmt_ann (stmt);
3296 free (stmt_info);
3297 set_stmt_info (ann, NULL);
3298 bsi_remove (&si);
3299 continue;
3300 }
3301
3302 bsi_next (&si);
3303 } /* stmts in BB */
3304 } /* BBs in loop */
3305
a023975e 3306 vect_transform_loop_bound (loop_vinfo, ratio);
79fe1b3b
DN
3307
3308 if (vect_debug_details (loop))
3309 fprintf (dump_file,"Success! loop vectorized.");
3310 if (vect_debug_stats (loop))
3311 fprintf (dump_file, "LOOP VECTORIZED.");
3312}
3313
3314
3315/* Function vect_is_simple_use.
3316
3317 Input:
3318 LOOP - the loop that is being vectorized.
3319 OPERAND - operand of a stmt in LOOP.
3320 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3321
3322 Returns whether a stmt with OPERAND can be vectorized.
3323 Supportable operands are constants, loop invariants, and operands that are
6cb38cd4 3324 defined by the current iteration of the loop. Unsupportable operands are
79fe1b3b
DN
3325 those that are defined by a previous iteration of the loop (as is the case
3326 in reduction/induction computations). */
3327
3328static bool
3329vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3330{
3331 tree def_stmt;
3332 basic_block bb;
3333
3334 if (def)
3335 *def = NULL_TREE;
3336
3337 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3338 return true;
3339
3340 if (TREE_CODE (operand) != SSA_NAME)
3341 return false;
3342
3343 def_stmt = SSA_NAME_DEF_STMT (operand);
3344 if (def_stmt == NULL_TREE )
3345 {
3346 if (vect_debug_details (NULL))
3347 fprintf (dump_file, "no def_stmt.");
3348 return false;
3349 }
3350
3351 /* empty stmt is expected only in case of a function argument.
3352 (Otherwise - we expect a phi_node or a modify_expr). */
3353 if (IS_EMPTY_STMT (def_stmt))
3354 {
3355 tree arg = TREE_OPERAND (def_stmt, 0);
3356 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3357 return true;
3358 if (vect_debug_details (NULL))
3359 {
3360 fprintf (dump_file, "Unexpected empty stmt: ");
3361 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3362 }
3363 return false;
3364 }
3365
3366 /* phi_node inside the loop indicates an induction/reduction pattern.
3367 This is not supported yet. */
3368 bb = bb_for_stmt (def_stmt);
3369 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3370 {
3371 if (vect_debug_details (NULL))
3372 fprintf (dump_file, "reduction/induction - unsupported.");
3373 return false; /* FORNOW: not supported yet. */
3374 }
3375
3376 /* Expecting a modify_expr or a phi_node. */
3377 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3378 || TREE_CODE (def_stmt) == PHI_NODE)
3379 {
3380 if (def)
3381 *def = def_stmt;
3382 return true;
3383 }
3384
3385 return false;
3386}
3387
3388
3389/* Function vect_analyze_operations.
3390
3391 Scan the loop stmts and make sure they are all vectorizable. */
3392
3393static bool
3394vect_analyze_operations (loop_vec_info loop_vinfo)
3395{
3396 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3397 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3398 int nbbs = loop->num_nodes;
3399 block_stmt_iterator si;
3400 int vectorization_factor = 0;
3401 int i;
3402 bool ok;
3403 tree scalar_type;
3404
3405 if (vect_debug_details (NULL))
3406 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3407
3408 for (i = 0; i < nbbs; i++)
3409 {
3410 basic_block bb = bbs[i];
3411
3412 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3413 {
3414 tree stmt = bsi_stmt (si);
3415 int nunits;
3416 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3417 tree vectype;
3418
3419 if (vect_debug_details (NULL))
3420 {
3421 fprintf (dump_file, "==> examining statement: ");
3422 print_generic_expr (dump_file, stmt, TDF_SLIM);
3423 }
1e128c5f
GB
3424
3425 gcc_assert (stmt_info);
3426
79fe1b3b
DN
3427 /* skip stmts which do not need to be vectorized.
3428 this is expected to include:
3429 - the COND_EXPR which is the loop exit condition
3430 - any LABEL_EXPRs in the loop
3431 - computations that are used only for array indexing or loop
3432 control */
3433
3434 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3435 {
3436 if (vect_debug_details (NULL))
3437 fprintf (dump_file, "irrelevant.");
3438 continue;
3439 }
3440
3441 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3442 {
3443 if (vect_debug_stats (loop) || vect_debug_details (loop))
3444 {
3445 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3446 print_generic_expr (dump_file, stmt, TDF_SLIM);
3447 }
3448 return false;
3449 }
3450
3451 if (STMT_VINFO_DATA_REF (stmt_info))
3452 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3453 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3454 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3455 else
3456 scalar_type = TREE_TYPE (stmt);
3457
3458 if (vect_debug_details (NULL))
3459 {
3460 fprintf (dump_file, "get vectype for scalar type: ");
3461 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3462 }
3463
3464 vectype = get_vectype_for_scalar_type (scalar_type);
3465 if (!vectype)
3466 {
3467 if (vect_debug_stats (loop) || vect_debug_details (loop))
3468 {
3469 fprintf (dump_file, "not vectorized: unsupported data-type ");
3470 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3471 }
3472 return false;
3473 }
3474
3475 if (vect_debug_details (NULL))
3476 {
3477 fprintf (dump_file, "vectype: ");
3478 print_generic_expr (dump_file, vectype, TDF_SLIM);
3479 }
3480 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3481
3482 ok = (vectorizable_operation (stmt, NULL, NULL)
3483 || vectorizable_assignment (stmt, NULL, NULL)
3484 || vectorizable_load (stmt, NULL, NULL)
3485 || vectorizable_store (stmt, NULL, NULL));
3486
3487 if (!ok)
3488 {
3489 if (vect_debug_stats (loop) || vect_debug_details (loop))
3490 {
3491 fprintf (dump_file, "not vectorized: stmt not supported: ");
3492 print_generic_expr (dump_file, stmt, TDF_SLIM);
3493 }
3494 return false;
3495 }
3496
3497 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3498 if (vect_debug_details (NULL))
3499 fprintf (dump_file, "nunits = %d", nunits);
3500
3501 if (vectorization_factor)
3502 {
3503 /* FORNOW: don't allow mixed units.
3504 This restriction will be relaxed in the future. */
3505 if (nunits != vectorization_factor)
3506 {
3507 if (vect_debug_stats (loop) || vect_debug_details (loop))
3508 fprintf (dump_file, "not vectorized: mixed data-types");
3509 return false;
3510 }
3511 }
3512 else
3513 vectorization_factor = nunits;
f0923257
DN
3514
3515#ifdef ENABLE_CHECKING
3516 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3517 * vectorization_factor == UNITS_PER_SIMD_WORD);
3518#endif
79fe1b3b
DN
3519 }
3520 }
3521
3522 /* TODO: Analyze cost. Decide if worth while to vectorize. */
f0923257
DN
3523
3524 if (vectorization_factor <= 1)
79fe1b3b
DN
3525 {
3526 if (vect_debug_stats (loop) || vect_debug_details (loop))
3527 fprintf (dump_file, "not vectorized: unsupported data-type");
3528 return false;
3529 }
3530 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3531
a023975e
OG
3532
3533 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3534 && vect_debug_details (NULL))
79fe1b3b
DN
3535 fprintf (dump_file,
3536 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
a023975e 3537 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
79fe1b3b 3538
a023975e
OG
3539 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3540 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
79fe1b3b 3541 {
a023975e
OG
3542 /* In this case we have to generate epilog loop, that
3543 can be done only for loops with one entry edge. */
3544 if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1
3545 || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header))
3546 {
3547 if (vect_debug_stats (loop) || vect_debug_details (loop))
3548 fprintf (dump_file, "not vectorized: more than one entry.");
3549 return false;
3550 }
79fe1b3b 3551 }
a023975e 3552
79fe1b3b
DN
3553 return true;
3554}
3555
3556
3557/* Function exist_non_indexing_operands_for_use_p
3558
3559 USE is one of the uses attached to STMT. Check if USE is
3560 used in STMT for anything other than indexing an array. */
3561
3562static bool
3563exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3564{
3565 tree operand;
3566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3567
3568 /* USE corresponds to some operand in STMT. If there is no data
3569 reference in STMT, then any operand that corresponds to USE
3570 is not indexing an array. */
3571 if (!STMT_VINFO_DATA_REF (stmt_info))
3572 return true;
3573
3574 /* STMT has a data_ref. FORNOW this means that its of one of
3575 the following forms:
3576 -1- ARRAY_REF = var
3577 -2- var = ARRAY_REF
3578 (This should have been verified in analyze_data_refs).
3579
3580 'var' in the second case corresponds to a def, not a use,
3581 so USE cannot correspond to any operands that are not used
3582 for array indexing.
3583
3584 Therefore, all we need to check is if STMT falls into the
3585 first case, and whether var corresponds to USE. */
3586
3587 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3588 return false;
3589
3590 operand = TREE_OPERAND (stmt, 1);
3591
3592 if (TREE_CODE (operand) != SSA_NAME)
3593 return false;
3594
3595 if (operand == use)
3596 return true;
3597
3598 return false;
3599}
3600
3601
3602/* Function vect_is_simple_iv_evolution.
3603
3604 FORNOW: A simple evolution of an induction variables in the loop is
3605 considered a polynomial evolution with constant step. */
3606
3607static bool
3608vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3609 tree * step, bool strict)
3610{
3611 tree init_expr;
3612 tree step_expr;
3613
3614 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3615
3616 /* When there is no evolution in this loop, the evolution function
3617 is not "simple". */
3618 if (evolution_part == NULL_TREE)
3619 return false;
3620
3621 /* When the evolution is a polynomial of degree >= 2
3622 the evolution function is not "simple". */
3623 if (tree_is_chrec (evolution_part))
3624 return false;
3625
3626 step_expr = evolution_part;
a023975e 3627 init_expr = unshare_expr (initial_condition (access_fn));
79fe1b3b
DN
3628
3629 if (vect_debug_details (NULL))
3630 {
3631 fprintf (dump_file, "step: ");
3632 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3633 fprintf (dump_file, ", init: ");
3634 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3635 }
3636
3637 *init = init_expr;
3638 *step = step_expr;
3639
3640 if (TREE_CODE (step_expr) != INTEGER_CST)
3641 {
3642 if (vect_debug_details (NULL))
3643 fprintf (dump_file, "step unknown.");
3644 return false;
3645 }
3646
3647 if (strict)
3648 if (!integer_onep (step_expr))
3649 {
3650 if (vect_debug_details (NULL))
3651 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3652 return false;
3653 }
3654
3655 return true;
3656}
3657
3658
3659/* Function vect_analyze_scalar_cycles.
3660
3661 Examine the cross iteration def-use cycles of scalar variables, by
3662 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3663 cycles that they represent do not impede vectorization.
3664
3665 FORNOW: Reduction as in the following loop, is not supported yet:
3666 loop1:
3667 for (i=0; i<N; i++)
3668 sum += a[i];
3669 The cross-iteration cycle corresponding to variable 'sum' will be
3670 considered too complicated and will impede vectorization.
3671
3672 FORNOW: Induction as in the following loop, is not supported yet:
3673 loop2:
3674 for (i=0; i<N; i++)
3675 a[i] = i;
3676
3677 However, the following loop *is* vectorizable:
3678 loop3:
3679 for (i=0; i<N; i++)
3680 a[i] = b[i];
3681
3682 In both loops there exists a def-use cycle for the variable i:
3683 loop: i_2 = PHI (i_0, i_1)
3684 a[i_2] = ...;
3685 i_1 = i_2 + 1;
3686 GOTO loop;
3687
3688 The evolution of the above cycle is considered simple enough,
3689 however, we also check that the cycle does not need to be
3690 vectorized, i.e - we check that the variable that this cycle
3691 defines is only used for array indexing or in stmts that do not
3692 need to be vectorized. This is not the case in loop2, but it
3693 *is* the case in loop3. */
3694
3695static bool
3696vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3697{
3698 tree phi;
3699 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3700 basic_block bb = loop->header;
3701 tree dummy;
3702
3703 if (vect_debug_details (NULL))
3704 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3705
3706 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
3707 {
3708 tree access_fn = NULL;
3709
3710 if (vect_debug_details (NULL))
3711 {
3712 fprintf (dump_file, "Analyze phi: ");
3713 print_generic_expr (dump_file, phi, TDF_SLIM);
3714 }
3715
3716 /* Skip virtual phi's. The data dependences that are associated with
3717 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3718
3719 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3720 {
3721 if (vect_debug_details (NULL))
3722 fprintf (dump_file, "virtual phi. skip.");
3723 continue;
3724 }
3725
3726 /* Analyze the evolution function. */
3727
3728 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3729 those of loop induction variables; This property is verified here.
3730
3731 Furthermore, if that induction variable is used in an operation
3732 that needs to be vectorized (i.e, is not solely used to index
3733 arrays and check the exit condition) - we do not support its
3734 vectorization yet. This property is verified in vect_is_simple_use,
3735 during vect_analyze_operations. */
3736
6775f1f3
IR
3737 access_fn = /* instantiate_parameters
3738 (loop,*/
3739 analyze_scalar_evolution (loop, PHI_RESULT (phi));
79fe1b3b
DN
3740
3741 if (!access_fn)
3742 {
3743 if (vect_debug_stats (loop) || vect_debug_details (loop))
3744 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3745 return false;
3746 }
3747
3748 if (vect_debug_details (NULL))
3749 {
3750 fprintf (dump_file, "Access function of PHI: ");
3751 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3752 }
3753
3754 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3755 &dummy, false))
3756 {
3757 if (vect_debug_stats (loop) || vect_debug_details (loop))
3758 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3759 return false;
3760 }
3761 }
3762
3763 return true;
3764}
3765
3766
3767/* Function vect_analyze_data_ref_dependence.
3768
3769 Return TRUE if there (might) exist a dependence between a memory-reference
3770 DRA and a memory-reference DRB. */
3771
3772static bool
3773vect_analyze_data_ref_dependence (struct data_reference *dra,
3774 struct data_reference *drb,
3775 struct loop *loop)
3776{
6775f1f3 3777 bool differ_p;
79fe1b3b 3778 struct data_dependence_relation *ddr;
6775f1f3 3779
79fe1b3b
DN
3780 if (!array_base_name_differ_p (dra, drb, &differ_p))
3781 {
6775f1f3 3782 if (vect_debug_stats (loop) || vect_debug_details (loop))
79fe1b3b 3783 {
6775f1f3
IR
3784 fprintf (dump_file,
3785 "not vectorized: can't determine dependence between: ");
79fe1b3b
DN
3786 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3787 fprintf (dump_file, " and ");
3788 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3789 }
3790 return true;
3791 }
3792
3793 if (differ_p)
3794 return false;
3795
3796 ddr = initialize_data_dependence_relation (dra, drb);
3797 compute_affine_dependence (ddr);
3798
3799 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3800 return false;
3801
3802 if (vect_debug_stats (loop) || vect_debug_details (loop))
3803 {
3804 fprintf (dump_file,
3805 "not vectorized: possible dependence between data-refs ");
3806 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3807 fprintf (dump_file, " and ");
3808 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3809 }
3810
3811 return true;
3812}
3813
3814
3815/* Function vect_analyze_data_ref_dependences.
3816
3817 Examine all the data references in the loop, and make sure there do not
3818 exist any data dependences between them.
3819
3820 TODO: dependences which distance is greater than the vectorization factor
3821 can be ignored. */
3822
3823static bool
3824vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3825{
3826 unsigned int i, j;
3827 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3828 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3829 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3830
3831 /* Examine store-store (output) dependences. */
3832
3833 if (vect_debug_details (NULL))
3834 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3835
3836 if (vect_debug_details (NULL))
3837 fprintf (dump_file, "compare all store-store pairs.");
3838
3839 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3840 {
3841 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3842 {
3843 struct data_reference *dra =
3844 VARRAY_GENERIC_PTR (loop_write_refs, i);
3845 struct data_reference *drb =
3846 VARRAY_GENERIC_PTR (loop_write_refs, j);
3847 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3848 return false;
3849 }
3850 }
3851
3852 /* Examine load-store (true/anti) dependences. */
3853
3854 if (vect_debug_details (NULL))
3855 fprintf (dump_file, "compare all load-store pairs.");
3856
3857 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3858 {
3859 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3860 {
3861 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3862 struct data_reference *drb =
3863 VARRAY_GENERIC_PTR (loop_write_refs, j);
3864 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3865 return false;
3866 }
3867 }
3868
3869 return true;
3870}
3871
3872
3873/* Function vect_get_first_index.
3874
3875 REF is a data reference.
3876 If it is an ARRAY_REF: if its lower bound is simple enough,
3877 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3878 If it is not an ARRAY_REF: REF has no "first index";
3879 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3880
3881static bool
3882vect_get_first_index (tree ref, tree *array_first_index)
3883{
3884 tree array_start;
3885
3886 if (TREE_CODE (ref) != ARRAY_REF)
3887 *array_first_index = size_zero_node;
3888 else
3889 {
3890 array_start = array_ref_low_bound (ref);
3891 if (!host_integerp (array_start,0))
3892 {
3893 if (vect_debug_details (NULL))
3894 {
3895 fprintf (dump_file, "array min val not simple integer cst.");
3896 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3897 }
3898 return false;
3899 }
3900 *array_first_index = array_start;
3901 }
3902
3903 return true;
3904}
3905
3906
6775f1f3
IR
3907/* Function vect_compute_array_base_alignment.
3908 A utility function of vect_compute_array_ref_alignment.
3909
3910 Compute the misalignment of ARRAY in bits.
3911
3912 Input:
3913 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
d4a9b3a3 3914 VECTYPE - we are interested in the misalignment modulo the size of vectype.
6775f1f3
IR
3915 if NULL: don't compute misalignment, just return the base of ARRAY.
3916 PREV_DIMENSIONS - initialized to one.
3917 MISALIGNMENT - the computed misalignment in bits.
3918
3919 Output:
3920 If VECTYPE is not NULL:
3921 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3922 the base of the array, and put the computed misalignment in MISALIGNMENT.
3923 If VECTYPE is NULL:
3924 Return the base of the array.
3925
3926 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3927 a[idx_N]...[idx_2][idx_1] is
3928 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3929 ... + idx_N * dim_0 * ... * dim_N-1}.
3930 (The misalignment of &a is not checked here).
3931 Note, that every term contains dim_0, therefore, if dim_0 is a
3932 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3933 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3934 NUINTS, we can say that the misalignment of the sum is equal to
3935 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3936 we can't determine this array misalignment, and we return
3937 false.
3938 We proceed recursively in this manner, accumulating total misalignment
3939 and the multiplication of previous dimensions for correct misalignment
3940 calculation. */
3941
3942static tree
3943vect_compute_array_base_alignment (tree array,
3944 tree vectype,
3945 tree *prev_dimensions,
3946 tree *misalignment)
3947{
3948 tree index;
3949 tree domain;
3950 tree dimension_size;
3951 tree mis;
3952 tree bits_per_vectype;
3953 tree bits_per_vectype_unit;
3954
3955 /* The 'stop condition' of the recursion. */
3956 if (TREE_CODE (array) != ARRAY_REF)
3957 return array;
3958
3959 if (!vectype)
3960 /* Just get the base decl. */
3961 return vect_compute_array_base_alignment
3962 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3963
3964 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
3965 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
3966 return NULL_TREE;
3967
3968 domain = TYPE_DOMAIN (TREE_TYPE (array));
3969 dimension_size =
3970 int_const_binop (PLUS_EXPR,
3971 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
3972 TYPE_MIN_VALUE (domain), 1),
3973 size_one_node, 1);
3974
3975 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
3976 is a multiple of NUNITS:
3977
3978 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
3979 */
3980 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
3981 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
3982 if (integer_zerop (mis))
3983 /* This array is aligned. Continue just in order to get the base decl. */
3984 return vect_compute_array_base_alignment
3985 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3986
3987 index = TREE_OPERAND (array, 1);
3988 if (!host_integerp (index, 1))
3989 /* The current index is not constant. */
3990 return NULL_TREE;
3991
3992 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
3993
3994 bits_per_vectype = fold_convert (unsigned_type_node,
3995 build_int_cst (NULL_TREE, BITS_PER_UNIT *
3996 GET_MODE_SIZE (TYPE_MODE (vectype))));
3997 bits_per_vectype_unit = fold_convert (unsigned_type_node,
3998 build_int_cst (NULL_TREE, BITS_PER_UNIT *
3999 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4000
4001 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4002 earlier:
4003
4004 *misalignment =
4005 (*misalignment + index_val * dimension_size * *prev_dimensions)
4006 % vectype_nunits;
4007 */
4008
4009 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4010 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4011 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4012 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4013 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4014
4015
4016 *prev_dimensions = int_const_binop (MULT_EXPR,
4017 *prev_dimensions, dimension_size, 1);
4018
4019 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4020 prev_dimensions,
4021 misalignment);
4022}
4023
4024
79fe1b3b
DN
4025/* Function vect_compute_data_ref_alignment
4026
4027 Compute the misalignment of the data reference DR.
4028
6775f1f3
IR
4029 Output:
4030 1. If during the misalignment computation it is found that the data reference
4031 cannot be vectorized then false is returned.
4032 2. DR_MISALIGNMENT (DR) is defined.
4033
79fe1b3b
DN
4034 FOR NOW: No analysis is actually performed. Misalignment is calculated
4035 only for trivial cases. TODO. */
4036
6775f1f3 4037static bool
79fe1b3b 4038vect_compute_data_ref_alignment (struct data_reference *dr,
6775f1f3 4039 loop_vec_info loop_vinfo)
79fe1b3b
DN
4040{
4041 tree stmt = DR_STMT (dr);
6775f1f3 4042 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
79fe1b3b
DN
4043 tree ref = DR_REF (dr);
4044 tree vectype;
79fe1b3b 4045 tree scalar_type;
79fe1b3b 4046 tree offset = size_zero_node;
6775f1f3
IR
4047 tree base, bit_offset, alignment;
4048 tree unit_bits = fold_convert (unsigned_type_node,
4049 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4050 tree dr_base;
4051 bool base_aligned_p;
4052
79fe1b3b
DN
4053 if (vect_debug_details (NULL))
4054 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4055
4056 /* Initialize misalignment to unknown. */
4057 DR_MISALIGNMENT (dr) = -1;
4058
4059 scalar_type = TREE_TYPE (ref);
4060 vectype = get_vectype_for_scalar_type (scalar_type);
4061 if (!vectype)
4062 {
4063 if (vect_debug_details (NULL))
4064 {
4065 fprintf (dump_file, "no vectype for stmt: ");
4066 print_generic_expr (dump_file, stmt, TDF_SLIM);
6775f1f3 4067 fprintf (dump_file, " scalar_type: ");
79fe1b3b
DN
4068 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4069 }
6775f1f3
IR
4070 /* It is not possible to vectorize this data reference. */
4071 return false;
79fe1b3b 4072 }
6775f1f3
IR
4073 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4074
4075 if (TREE_CODE (ref) == ARRAY_REF)
4076 dr_base = ref;
4077 else
4078 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
79fe1b3b 4079
6775f1f3
IR
4080 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4081 loop_vinfo, &bit_offset, &base_aligned_p);
4082 if (!base)
79fe1b3b 4083 {
6775f1f3 4084 if (vect_debug_details (NULL))
79fe1b3b 4085 {
6775f1f3
IR
4086 fprintf (dump_file, "Unknown alignment for access: ");
4087 print_generic_expr (dump_file,
4088 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
79fe1b3b 4089 }
6775f1f3
IR
4090 return true;
4091 }
79fe1b3b 4092
6775f1f3
IR
4093 if (!base_aligned_p)
4094 {
4095 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
79fe1b3b
DN
4096 {
4097 if (vect_debug_details (NULL))
6775f1f3
IR
4098 {
4099 fprintf (dump_file, "can't force alignment of ref: ");
4100 print_generic_expr (dump_file, ref, TDF_SLIM);
4101 }
4102 return true;
79fe1b3b 4103 }
6775f1f3
IR
4104
4105 /* Force the alignment of the decl.
4106 NOTE: This is the only change to the code we make during
4107 the analysis phase, before deciding to vectorize the loop. */
4108 if (vect_debug_details (NULL))
4109 fprintf (dump_file, "force alignment");
4110 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4111 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4112 }
79fe1b3b 4113
6775f1f3
IR
4114 /* At this point we assume that the base is aligned, and the offset from it
4115 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4116 gcc_assert (base_aligned_p
4117 || (TREE_CODE (base) == VAR_DECL
4118 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4119
4120 /* Convert into bytes. */
4121 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4122 /* Check that there is no remainder in bits. */
4123 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4124 if (!integer_zerop (bit_offset))
4125 {
4126 if (vect_debug_details (NULL))
79fe1b3b 4127 {
6775f1f3
IR
4128 fprintf (dump_file, "bit offset alignment: ");
4129 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
79fe1b3b 4130 }
6775f1f3
IR
4131 return false;
4132 }
4133
4134 /* Alignment required, in bytes: */
4135 alignment = fold_convert (unsigned_type_node,
4136 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
79fe1b3b 4137
6775f1f3
IR
4138 /* Modulo alignment. */
4139 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4140 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4141 {
4142 if (vect_debug_details (NULL))
4143 fprintf (dump_file, "unexpected misalign value");
4144 return false;
79fe1b3b
DN
4145 }
4146
6775f1f3 4147 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
79fe1b3b 4148
6775f1f3
IR
4149 if (vect_debug_details (NULL))
4150 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4151
4152 return true;
4153}
4154
4155
4156/* Function vect_compute_array_ref_alignment
4157
4158 Compute the alignment of an array-ref.
4159 The alignment we compute here is relative to
4160 TYPE_ALIGN(VECTYPE) boundary.
4161
4162 Output:
4163 OFFSET - the alignment in bits
4164 Return value - the base of the array-ref. E.g,
4165 if the array-ref is a.b[k].c[i][j] the returned
4166 base is a.b[k].c
4167*/
4168
4169static tree
4170vect_compute_array_ref_alignment (struct data_reference *dr,
4171 loop_vec_info loop_vinfo,
4172 tree vectype,
4173 tree *offset)
4174{
4175 tree array_first_index = size_zero_node;
4176 tree init;
4177 tree ref = DR_REF (dr);
4178 tree scalar_type = TREE_TYPE (ref);
4179 tree oprnd0 = TREE_OPERAND (ref, 0);
4180 tree dims = size_one_node;
4181 tree misalign = size_zero_node;
4182 tree next_ref, this_offset = size_zero_node;
4183 tree nunits;
4184 tree nbits;
4185
4186 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
a023975e
OG
4187 /* The reference is an array without its last index. */
4188 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4189 &misalign);
6775f1f3 4190 else
a023975e
OG
4191 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4192 &misalign);
6775f1f3
IR
4193 if (!vectype)
4194 /* Alignment is not requested. Just return the base. */
4195 return next_ref;
4196
4197 /* Compute alignment. */
4198 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4199 return NULL_TREE;
4200 this_offset = misalign;
4201
4202 /* Check the first index accessed. */
79fe1b3b
DN
4203 if (!vect_get_first_index (ref, &array_first_index))
4204 {
4205 if (vect_debug_details (NULL))
4206 fprintf (dump_file, "no first_index for array.");
6775f1f3 4207 return NULL_TREE;
79fe1b3b 4208 }
79fe1b3b 4209
6775f1f3
IR
4210 /* Check the index of the array_ref. */
4211 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4212 LOOP_VINFO_LOOP (loop_vinfo)->num);
79fe1b3b 4213
6775f1f3
IR
4214 /* FORNOW: In order to simplify the handling of alignment, we make sure
4215 that the first location at which the array is accessed ('init') is on an
79fe1b3b 4216 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
6775f1f3
IR
4217 This is too conservative, since we require that
4218 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
79fe1b3b
DN
4219 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4220 This should be relaxed in the future. */
4221
6775f1f3 4222 if (!init || !host_integerp (init, 0))
79fe1b3b
DN
4223 {
4224 if (vect_debug_details (NULL))
6775f1f3
IR
4225 fprintf (dump_file, "non constant init. ");
4226 return NULL_TREE;
79fe1b3b
DN
4227 }
4228
79fe1b3b 4229 /* bytes per scalar element: */
6775f1f3
IR
4230 nunits = fold_convert (unsigned_type_node,
4231 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4232 nbits = int_const_binop (MULT_EXPR, nunits,
4233 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
79fe1b3b 4234
6775f1f3 4235 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
79fe1b3b 4236 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
6775f1f3
IR
4237 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4238 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
79fe1b3b 4239
6775f1f3
IR
4240 /* TODO: allow negative misalign values. */
4241 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
79fe1b3b
DN
4242 {
4243 if (vect_debug_details (NULL))
6775f1f3
IR
4244 fprintf (dump_file, "unexpected misalign value");
4245 return NULL_TREE;
79fe1b3b 4246 }
6775f1f3
IR
4247 *offset = misalign;
4248 return next_ref;
79fe1b3b
DN
4249}
4250
4251
4252/* Function vect_compute_data_refs_alignment
4253
4254 Compute the misalignment of data references in the loop.
4255 This pass may take place at function granularity instead of at loop
4256 granularity.
4257
4258 FOR NOW: No analysis is actually performed. Misalignment is calculated
4259 only for trivial cases. TODO. */
4260
4261static void
4262vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4263{
4264 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4265 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4266 unsigned int i;
6775f1f3 4267
79fe1b3b
DN
4268 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4269 {
4270 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4271 vect_compute_data_ref_alignment (dr, loop_vinfo);
4272 }
4273
4274 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4275 {
4276 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4277 vect_compute_data_ref_alignment (dr, loop_vinfo);
4278 }
4279}
4280
4281
4282/* Function vect_enhance_data_refs_alignment
4283
4284 This pass will use loop versioning and loop peeling in order to enhance
4285 the alignment of data references in the loop.
4286
4287 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4288 original loop is to be vectorized; Any other loops that are created by
4289 the transformations performed in this pass - are not supposed to be
4290 vectorized. This restriction will be relaxed.
4291
4292 FOR NOW: No transformation is actually performed. TODO. */
4293
4294static void
7ccf35ed 4295vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED)
79fe1b3b
DN
4296{
4297 /*
4298 This pass will require a cost model to guide it whether to apply peeling
4299 or versioning or a combination of the two. For example, the scheme that
4300 intel uses when given a loop with several memory accesses, is as follows:
4301 choose one memory access ('p') which alignment you want to force by doing
4302 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4303 other accesses are not necessarily aligned, or (2) use loop versioning to
4304 generate one loop in which all accesses are aligned, and another loop in
4305 which only 'p' is necessarily aligned.
4306
4307 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4308 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4309 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4310
4311 Devising a cost model is the most critical aspect of this work. It will
4312 guide us on which access to peel for, whether to use loop versioning, how
4313 many versions to create, etc. The cost model will probably consist of
4314 generic considerations as well as target specific considerations (on
4315 powerpc for example, misaligned stores are more painful than misaligned
4316 loads).
4317
4318 Here is the general steps involved in alignment enhancements:
4319
4320 -- original loop, before alignment analysis:
4321 for (i=0; i<N; i++){
4322 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4323 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4324 }
4325
4326 -- After vect_compute_data_refs_alignment:
4327 for (i=0; i<N; i++){
4328 x = q[i]; # DR_MISALIGNMENT(q) = 3
4329 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4330 }
4331
4332 -- Possibility 1: we do loop versioning:
4333 if (p is aligned) {
4334 for (i=0; i<N; i++){ # loop 1A
4335 x = q[i]; # DR_MISALIGNMENT(q) = 3
4336 p[i] = y; # DR_MISALIGNMENT(p) = 0
4337 }
4338 }
4339 else {
4340 for (i=0; i<N; i++){ # loop 1B
4341 x = q[i]; # DR_MISALIGNMENT(q) = 3
4342 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4343 }
4344 }
4345
4346 -- Possibility 2: we do loop peeling:
4347 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4348 x = q[i];
4349 p[i] = y;
4350 }
4351 for (i = 3; i < N; i++){ # loop 2A
4352 x = q[i]; # DR_MISALIGNMENT(q) = 0
4353 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4354 }
4355
4356 -- Possibility 3: combination of loop peeling and versioning:
4357 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4358 x = q[i];
4359 p[i] = y;
4360 }
4361 if (p is aligned) {
4362 for (i = 3; i<N; i++){ # loop 3A
4363 x = q[i]; # DR_MISALIGNMENT(q) = 0
4364 p[i] = y; # DR_MISALIGNMENT(p) = 0
4365 }
4366 }
4367 else {
4368 for (i = 3; i<N; i++){ # loop 3B
4369 x = q[i]; # DR_MISALIGNMENT(q) = 0
4370 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4371 }
4372 }
4373
4374 These loops are later passed to loop_transform to be vectorized. The
4375 vectorizer will use the alignment information to guide the transformation
4376 (whether to generate regular loads/stores, or with special handling for
4377 misalignment).
4378 */
4379}
4380
4381
4382/* Function vect_analyze_data_refs_alignment
4383
4384 Analyze the alignment of the data-references in the loop.
4385 FOR NOW: Until support for misliagned accesses is in place, only if all
4386 accesses are aligned can the loop be vectorized. This restriction will be
4387 relaxed. */
4388
4389static bool
4390vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4391{
4392 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
a023975e 4393 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7ccf35ed
DN
4394 /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
4395
79fe1b3b 4396 unsigned int i;
a023975e 4397 unsigned int decide_peeling_count = 0;
79fe1b3b
DN
4398
4399 if (vect_debug_details (NULL))
4400 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4401
4402
4403 /* This pass may take place at function granularity instead of at loop
4404 granularity. */
4405
4406 vect_compute_data_refs_alignment (loop_vinfo);
4407
4408
4409 /* This pass will use loop versioning and loop peeling in order to enhance
4410 the alignment of data references in the loop.
4411 FOR NOW: we assume that whatever versioning/peeling took place, the
4412 original loop is to be vectorized. Any other loops that were created by
4413 the transformations performed in this pass - are not supposed to be
4414 vectorized. This restriction will be relaxed. */
4415
4416 vect_enhance_data_refs_alignment (loop_vinfo);
4417
4418
4419 /* Finally, check that loop can be vectorized.
a023975e
OG
4420 FOR NOW: Until support for misaligned stores is in place, only if all
4421 stores are aligned can the loop be vectorized. This restriction will be
4422 relaxed. In the meantime, we can force the alignment of on of the
4423 data-references in the loop using peeling. We currently use a heuristic
4424 that peels the first misaligned store, but we plan to develop a
4425 better cost model to guide the decision on which data-access to peel for.
4426 */
79fe1b3b
DN
4427
4428 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4429 {
4430 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4431 if (!aligned_access_p (dr))
4432 {
a023975e
OG
4433 /* Decide here whether we need peeling for alignment. */
4434 decide_peeling_count++;
4435 if (decide_peeling_count > MAX_NUMBER_OF_UNALIGNED_DATA_REFS)
4436 {
4437 if (vect_debug_stats (loop) || vect_debug_details (loop))
4438 fprintf (dump_file,
4439 "not vectorized: multiple misaligned stores.");
4440 return false;
4441 }
4442 else
4443 {
4444 LOOP_UNALIGNED_DR (loop_vinfo, decide_peeling_count - 1) = dr;
4445 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4446 }
79fe1b3b
DN
4447 }
4448 }
4449
7ccf35ed
DN
4450 /* The vectorizer now supports misaligned loads, so we don't fail anymore
4451 in the presence of a misaligned read dataref. For some targets however
4452 it may be preferable not to vectorize in such a case as misaligned
4453 accesses are very costly. This should be considered in the future. */
4454/*
79fe1b3b
DN
4455 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4456 {
4457 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4458 if (!aligned_access_p (dr))
4459 {
4460 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4461 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4462 fprintf (dump_file, "not vectorized: unaligned load.");
4463 return false;
4464 }
4465 }
7ccf35ed 4466*/
79fe1b3b
DN
4467
4468 return true;
4469}
4470
4471
4472/* Function vect_analyze_data_ref_access.
4473
4474 Analyze the access pattern of the data-reference DR. For now, a data access
4475 has to consecutive and aligned to be considered vectorizable. */
4476
4477static bool
4478vect_analyze_data_ref_access (struct data_reference *dr)
4479{
4480 varray_type access_fns = DR_ACCESS_FNS (dr);
4481 tree access_fn;
4482 tree init, step;
6775f1f3 4483 unsigned int dimensions, i;
79fe1b3b 4484
6775f1f3
IR
4485 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4486 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4487 access is contiguous). */
4488 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4489
4490 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
79fe1b3b 4491 {
6775f1f3 4492 access_fn = DR_ACCESS_FN (dr, i);
79fe1b3b 4493
6775f1f3
IR
4494 if (evolution_part_in_loop_num (access_fn,
4495 loop_containing_stmt (DR_STMT (dr))->num))
4496 {
a023975e
OG
4497 /* Evolution part is not NULL in this loop (it is neither constant
4498 nor invariant). */
6775f1f3
IR
4499 if (vect_debug_details (NULL))
4500 {
4501 fprintf (dump_file,
a023975e 4502 "not vectorized: complicated multidim. array access.");
6775f1f3
IR
4503 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4504 }
4505 return false;
4506 }
4507 }
4508
4509 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4510 if (!evolution_function_is_constant_p (access_fn)
4511 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4512 access_fn, &init, &step, true))
79fe1b3b
DN
4513 {
4514 if (vect_debug_details (NULL))
4515 {
a023975e 4516 fprintf (dump_file, "not vectorized: complicated access function.");
79fe1b3b
DN
4517 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4518 }
4519 return false;
4520 }
6775f1f3 4521
79fe1b3b
DN
4522 return true;
4523}
4524
4525
4526/* Function vect_analyze_data_ref_accesses.
4527
4528 Analyze the access pattern of all the data references in the loop.
4529
4530 FORNOW: the only access pattern that is considered vectorizable is a
4531 simple step 1 (consecutive) access.
4532
6775f1f3 4533 FORNOW: handle only arrays and pointer accesses. */
79fe1b3b
DN
4534
4535static bool
4536vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4537{
4538 unsigned int i;
4539 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4540 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4541
4542 if (vect_debug_details (NULL))
4543 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4544
4545 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4546 {
4547 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4548 bool ok = vect_analyze_data_ref_access (dr);
4549 if (!ok)
4550 {
4551 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4552 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4553 fprintf (dump_file, "not vectorized: complicated access pattern.");
4554 return false;
4555 }
4556 }
4557
4558 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4559 {
4560 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4561 bool ok = vect_analyze_data_ref_access (dr);
4562 if (!ok)
4563 {
4564 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4565 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4566 fprintf (dump_file, "not vectorized: complicated access pattern.");
4567 return false;
4568 }
4569 }
4570
4571 return true;
4572}
4573
4574
4575/* Function vect_analyze_pointer_ref_access.
4576
4577 Input:
4578 STMT - a stmt that contains a data-ref
4579 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4580
4581 If the data-ref access is vectorizable, return a data_reference structure
4582 that represents it (DR). Otherwise - return NULL. */
4583
4584static struct data_reference *
4585vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4586{
4587 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4588 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4589 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4590 tree init, step;
4591 int step_val;
4592 tree reftype, innertype;
4593 enum machine_mode innermode;
4594 tree indx_access_fn;
4595 int loopnum = loop->num;
4596 struct data_reference *dr;
4597
4598 if (!access_fn)
4599 {
4600 if (vect_debug_stats (loop) || vect_debug_details (loop))
4601 fprintf (dump_file, "not vectorized: complicated pointer access.");
4602 return NULL;
4603 }
4604
4605 if (vect_debug_details (NULL))
4606 {
4607 fprintf (dump_file, "Access function of ptr: ");
4608 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4609 }
4610
4611 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4612 {
4613 if (vect_debug_stats (loop) || vect_debug_details (loop))
4614 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4615 return NULL;
4616 }
4617
6775f1f3
IR
4618 STRIP_NOPS (init);
4619
4620 if (!host_integerp (step,0))
79fe1b3b
DN
4621 {
4622 if (vect_debug_stats (loop) || vect_debug_details (loop))
4623 fprintf (dump_file,
6775f1f3 4624 "not vectorized: non constant step for pointer access.");
79fe1b3b
DN
4625 return NULL;
4626 }
4627
4628 step_val = TREE_INT_CST_LOW (step);
4629
4630 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4631 if (TREE_CODE (reftype) != POINTER_TYPE)
4632 {
4633 if (vect_debug_stats (loop) || vect_debug_details (loop))
4634 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4635 return NULL;
4636 }
4637
4638 reftype = TREE_TYPE (init);
4639 if (TREE_CODE (reftype) != POINTER_TYPE)
4640 {
4641 if (vect_debug_stats (loop) || vect_debug_details (loop))
4642 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4643 return NULL;
4644 }
4645
4646 innertype = TREE_TYPE (reftype);
4647 innermode = TYPE_MODE (innertype);
4648 if (GET_MODE_SIZE (innermode) != step_val)
4649 {
4650 /* FORNOW: support only consecutive access */
4651 if (vect_debug_stats (loop) || vect_debug_details (loop))
4652 fprintf (dump_file, "not vectorized: non consecutive access.");
4653 return NULL;
4654 }
4655
4656 indx_access_fn =
4657 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4658 if (vect_debug_details (NULL))
4659 {
4660 fprintf (dump_file, "Access function of ptr indx: ");
4661 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4662 }
4663 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4664 return dr;
4665}
4666
4667
6775f1f3
IR
4668/* Function vect_get_symbl_and_dr.
4669
4670 The function returns SYMBL - the relevant variable for
4671 memory tag (for aliasing purposes).
4672 Also data reference structure DR is created.
4673
4674 Input:
4675 MEMREF - data reference in STMT
4676 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4677
4678 Output:
4679 DR - data_reference struct for MEMREF
4680 return value - the relevant variable for memory tag (for aliasing purposes).
4681
4682*/
4683
4684static tree
4685vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4686 loop_vec_info loop_vinfo, struct data_reference **dr)
4687{
4688 tree symbl, oprnd0, oprnd1;
4689 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4690 tree offset;
4691 tree array_base, base;
4692 struct data_reference *new_dr;
4693 bool base_aligned_p;
4694
4695 *dr = NULL;
4696 switch (TREE_CODE (memref))
4697 {
4698 case INDIRECT_REF:
4699 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4700 if (! new_dr)
4701 return NULL_TREE;
4702 *dr = new_dr;
4703 symbl = DR_BASE_NAME (new_dr);
4704 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4705
4706 switch (TREE_CODE (symbl))
4707 {
4708 case PLUS_EXPR:
4709 case MINUS_EXPR:
4710 oprnd0 = TREE_OPERAND (symbl, 0);
4711 oprnd1 = TREE_OPERAND (symbl, 1);
4712
4713 STRIP_NOPS(oprnd1);
4714 /* Only {address_base + offset} expressions are supported,
d4a9b3a3 4715 where address_base can be POINTER_TYPE or ARRAY_TYPE and
6775f1f3
IR
4716 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4717 TODO: swap operands if {offset + address_base}. */
4718 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4719 && TREE_CODE (oprnd1) != INTEGER_CST)
4720 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4721 return NULL_TREE;
4722
4723 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4724 symbl = oprnd0;
4725 else
4726 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4727 loop_vinfo, &new_dr);
4728
4729 case SSA_NAME:
4730 case ADDR_EXPR:
4731 /* symbl remains unchanged. */
4732 break;
4733
4734 default:
4735 if (vect_debug_details (NULL))
4736 {
4737 fprintf (dump_file, "unhandled data ref: ");
4738 print_generic_expr (dump_file, memref, TDF_SLIM);
4739 fprintf (dump_file, " (symbl ");
4740 print_generic_expr (dump_file, symbl, TDF_SLIM);
4741 fprintf (dump_file, ") in stmt ");
4742 print_generic_expr (dump_file, stmt, TDF_SLIM);
4743 }
4744 return NULL_TREE;
4745 }
4746 break;
4747
4748 case ARRAY_REF:
4749 offset = size_zero_node;
6775f1f3
IR
4750
4751 /* Store the array base in the stmt info.
4752 For one dimensional array ref a[i], the base is a,
4753 for multidimensional a[i1][i2]..[iN], the base is
4754 a[i1][i2]..[iN-1]. */
4755 array_base = TREE_OPERAND (memref, 0);
4756 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4757
4758 new_dr = analyze_array (stmt, memref, is_read);
4759 *dr = new_dr;
4760
4761 /* Find the relevant symbol for aliasing purposes. */
4762 base = DR_BASE_NAME (new_dr);
4763 switch (TREE_CODE (base))
4764 {
4765 case VAR_DECL:
4766 symbl = base;
4767 break;
4768
4769 case INDIRECT_REF:
4770 symbl = TREE_OPERAND (base, 0);
4771 break;
4772
4773 case COMPONENT_REF:
4774 /* Could have recorded more accurate information -
4775 i.e, the actual FIELD_DECL that is being referenced -
4776 but later passes expect VAR_DECL as the nmt. */
4777 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4778 loop_vinfo, &offset, &base_aligned_p);
4779 if (symbl)
4780 break;
4781 /* fall through */
4782 default:
4783 if (vect_debug_details (NULL))
4784 {
4785 fprintf (dump_file, "unhandled struct/class field access ");
4786 print_generic_expr (dump_file, stmt, TDF_SLIM);
4787 }
4788 return NULL_TREE;
4789 }
4790 break;
4791
4792 default:
4793 if (vect_debug_details (NULL))
4794 {
4795 fprintf (dump_file, "unhandled data ref: ");
4796 print_generic_expr (dump_file, memref, TDF_SLIM);
4797 fprintf (dump_file, " in stmt ");
4798 print_generic_expr (dump_file, stmt, TDF_SLIM);
4799 }
4800 return NULL_TREE;
4801 }
4802 return symbl;
4803}
4804
4805
79fe1b3b
DN
4806/* Function vect_analyze_data_refs.
4807
4808 Find all the data references in the loop.
4809
6775f1f3 4810 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
79fe1b3b
DN
4811 which base is really an array (not a pointer) and which alignment
4812 can be forced. This restriction will be relaxed. */
4813
4814static bool
4815vect_analyze_data_refs (loop_vec_info loop_vinfo)
4816{
4817 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4818 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4819 int nbbs = loop->num_nodes;
4820 block_stmt_iterator si;
4821 int j;
4822 struct data_reference *dr;
6775f1f3
IR
4823 tree tag;
4824 tree address_base;
322ae40b
IR
4825 bool base_aligned_p;
4826 tree offset;
79fe1b3b
DN
4827
4828 if (vect_debug_details (NULL))
4829 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4830
4831 for (j = 0; j < nbbs; j++)
4832 {
4833 basic_block bb = bbs[j];
4834 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4835 {
4836 bool is_read = false;
4837 tree stmt = bsi_stmt (si);
4838 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4839 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4840 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4841 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4842 varray_type *datarefs = NULL;
4843 int nvuses, nv_may_defs, nv_must_defs;
4844 tree memref = NULL;
79fe1b3b
DN
4845 tree symbl;
4846
4847 /* Assumption: there exists a data-ref in stmt, if and only if
4848 it has vuses/vdefs. */
4849
4850 if (!vuses && !v_may_defs && !v_must_defs)
4851 continue;
4852
4853 nvuses = NUM_VUSES (vuses);
4854 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4855 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4856
4857 if (nvuses && (nv_may_defs || nv_must_defs))
4858 {
4859 if (vect_debug_details (NULL))
4860 {
4861 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4862 print_generic_expr (dump_file, stmt, TDF_SLIM);
4863 }
4864 return false;
4865 }
4866
4867 if (TREE_CODE (stmt) != MODIFY_EXPR)
4868 {
4869 if (vect_debug_details (NULL))
4870 {
4871 fprintf (dump_file, "unexpected vops in stmt: ");
4872 print_generic_expr (dump_file, stmt, TDF_SLIM);
4873 }
4874 return false;
4875 }
4876
4877 if (vuses)
4878 {
4879 memref = TREE_OPERAND (stmt, 1);
4880 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4881 is_read = true;
4882 }
4883 else /* vdefs */
4884 {
4885 memref = TREE_OPERAND (stmt, 0);
4886 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4887 is_read = false;
4888 }
4889
6775f1f3
IR
4890 /* Analyze MEMREF. If it is of a supported form, build data_reference
4891 struct for it (DR) and find the relevant symbol for aliasing
4892 purposes. */
a023975e
OG
4893 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4894 &dr);
6775f1f3 4895 if (!symbl)
79fe1b3b
DN
4896 {
4897 if (vect_debug_stats (loop) || vect_debug_details (loop))
4898 {
6775f1f3 4899 fprintf (dump_file, "not vectorized: unhandled data ref: ");
79fe1b3b
DN
4900 print_generic_expr (dump_file, stmt, TDF_SLIM);
4901 }
4902 return false;
4903 }
6775f1f3 4904
79fe1b3b 4905 /* Find and record the memtag assigned to this data-ref. */
6775f1f3 4906 switch (TREE_CODE (symbl))
79fe1b3b 4907 {
6775f1f3
IR
4908 case VAR_DECL:
4909 STMT_VINFO_MEMTAG (stmt_info) = symbl;
4910 break;
4911
4912 case SSA_NAME:
79fe1b3b
DN
4913 symbl = SSA_NAME_VAR (symbl);
4914 tag = get_var_ann (symbl)->type_mem_tag;
4915 if (!tag)
4916 {
4917 tree ptr = TREE_OPERAND (memref, 0);
4918 if (TREE_CODE (ptr) == SSA_NAME)
4919 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4920 }
4921 if (!tag)
4922 {
4923 if (vect_debug_stats (loop) || vect_debug_details (loop))
4924 fprintf (dump_file, "not vectorized: no memtag for ref.");
4925 return false;
4926 }
4927 STMT_VINFO_MEMTAG (stmt_info) = tag;
6775f1f3
IR
4928 break;
4929
4930 case ADDR_EXPR:
4931 address_base = TREE_OPERAND (symbl, 0);
4932
4933 switch (TREE_CODE (address_base))
4934 {
4935 case ARRAY_REF:
a023975e
OG
4936 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
4937 DR_IS_READ(dr));
322ae40b
IR
4938 STMT_VINFO_MEMTAG (stmt_info) =
4939 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
4940 loop_vinfo, &offset,
4941 &base_aligned_p);
6775f1f3
IR
4942 break;
4943
4944 case VAR_DECL:
4945 STMT_VINFO_MEMTAG (stmt_info) = address_base;
4946 break;
4947
4948 default:
4949 if (vect_debug_stats (loop) || vect_debug_details (loop))
4950 {
a023975e
OG
4951 fprintf (dump_file,
4952 "not vectorized: unhandled address expr: ");
6775f1f3
IR
4953 print_generic_expr (dump_file, stmt, TDF_SLIM);
4954 }
4955 return false;
4956 }
4957 break;
4958
4959 default:
79fe1b3b
DN
4960 if (vect_debug_stats (loop) || vect_debug_details (loop))
4961 {
4962 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
4963 print_generic_expr (dump_file, memref, TDF_SLIM);
4964 }
4965 return false;
6775f1f3 4966 }
79fe1b3b
DN
4967
4968 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
4969 STMT_VINFO_DATA_REF (stmt_info) = dr;
4970 }
4971 }
4972
4973 return true;
4974}
4975
4976
8c27b7d4 4977/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
79fe1b3b
DN
4978
4979/* Function vect_mark_relevant.
4980
4981 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
4982
4983static void
4984vect_mark_relevant (varray_type worklist, tree stmt)
4985{
4986 stmt_vec_info stmt_info;
4987
4988 if (vect_debug_details (NULL))
4989 fprintf (dump_file, "mark relevant.");
4990
4991 if (TREE_CODE (stmt) == PHI_NODE)
4992 {
4993 VARRAY_PUSH_TREE (worklist, stmt);
4994 return;
4995 }
4996
4997 stmt_info = vinfo_for_stmt (stmt);
4998
4999 if (!stmt_info)
5000 {
5001 if (vect_debug_details (NULL))
5002 {
5003 fprintf (dump_file, "mark relevant: no stmt info!!.");
5004 print_generic_expr (dump_file, stmt, TDF_SLIM);
5005 }
5006 return;
5007 }
5008
5009 if (STMT_VINFO_RELEVANT_P (stmt_info))
5010 {
5011 if (vect_debug_details (NULL))
5012 fprintf (dump_file, "already marked relevant.");
5013 return;
5014 }
5015
5016 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5017 VARRAY_PUSH_TREE (worklist, stmt);
5018}
5019
5020
5021/* Function vect_stmt_relevant_p.
5022
5023 Return true if STMT in loop that is represented by LOOP_VINFO is
5024 "relevant for vectorization".
5025
5026 A stmt is considered "relevant for vectorization" if:
5027 - it has uses outside the loop.
5028 - it has vdefs (it alters memory).
5029 - control stmts in the loop (except for the exit condition).
5030
5031 CHECKME: what other side effects would the vectorizer allow? */
5032
5033static bool
5034vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5035{
5036 v_may_def_optype v_may_defs;
5037 v_must_def_optype v_must_defs;
5038 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5039 int i;
5040 dataflow_t df;
5041 int num_uses;
5042
5043 /* cond stmt other than loop exit cond. */
5044 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5045 return true;
5046
5047 /* changing memory. */
5048 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5049 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5050 if (v_may_defs || v_must_defs)
5051 {
5052 if (vect_debug_details (NULL))
5053 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5054 return true;
5055 }
5056
5057 /* uses outside the loop. */
5058 df = get_immediate_uses (stmt);
5059 num_uses = num_immediate_uses (df);
5060 for (i = 0; i < num_uses; i++)
5061 {
5062 tree use = immediate_use (df, i);
5063 basic_block bb = bb_for_stmt (use);
5064 if (!flow_bb_inside_loop_p (loop, bb))
5065 {
5066 if (vect_debug_details (NULL))
5067 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5068 return true;
5069 }
5070 }
5071
5072 return false;
5073}
5074
5075
5076/* Function vect_mark_stmts_to_be_vectorized.
5077
5078 Not all stmts in the loop need to be vectorized. For example:
5079
5080 for i...
5081 for j...
5082 1. T0 = i + j
5083 2. T1 = a[T0]
5084
5085 3. j = j + 1
5086
5087 Stmt 1 and 3 do not need to be vectorized, because loop control and
5088 addressing of vectorized data-refs are handled differently.
5089
5090 This pass detects such stmts. */
5091
5092static bool
5093vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5094{
5095 varray_type worklist;
5096 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5097 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5098 unsigned int nbbs = loop->num_nodes;
5099 block_stmt_iterator si;
5100 tree stmt;
5101 stmt_ann_t ann;
5102 unsigned int i;
5103 int j;
5104 use_optype use_ops;
5105 stmt_vec_info stmt_info;
5106
5107 if (vect_debug_details (NULL))
5108 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5109
5110 VARRAY_TREE_INIT (worklist, 64, "work list");
5111
5112 /* 1. Init worklist. */
5113
5114 for (i = 0; i < nbbs; i++)
5115 {
5116 basic_block bb = bbs[i];
5117 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5118 {
5119 stmt = bsi_stmt (si);
5120
5121 if (vect_debug_details (NULL))
5122 {
5123 fprintf (dump_file, "init: stmt relevant? ");
5124 print_generic_expr (dump_file, stmt, TDF_SLIM);
5125 }
5126
5127 stmt_info = vinfo_for_stmt (stmt);
5128 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5129
5130 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5131 vect_mark_relevant (worklist, stmt);
5132 }
5133 }
5134
5135
5136 /* 2. Process_worklist */
5137
5138 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5139 {
5140 stmt = VARRAY_TOP_TREE (worklist);
5141 VARRAY_POP (worklist);
5142
5143 if (vect_debug_details (NULL))
5144 {
5145 fprintf (dump_file, "worklist: examine stmt: ");
5146 print_generic_expr (dump_file, stmt, TDF_SLIM);
5147 }
5148
5149 /* Examine the USES in this statement. Mark all the statements which
5150 feed this statement's uses as "relevant", unless the USE is used as
5151 an array index. */
5152
5153 if (TREE_CODE (stmt) == PHI_NODE)
5154 {
5155 /* follow the def-use chain inside the loop. */
5156 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5157 {
5158 tree arg = PHI_ARG_DEF (stmt, j);
5159 tree def_stmt = NULL_TREE;
5160 basic_block bb;
5161 if (!vect_is_simple_use (arg, loop, &def_stmt))
5162 {
5163 if (vect_debug_details (NULL))
5164 fprintf (dump_file, "worklist: unsupported use.");
5165 varray_clear (worklist);
5166 return false;
5167 }
5168 if (!def_stmt)
5169 continue;
5170
5171 if (vect_debug_details (NULL))
5172 {
5173 fprintf (dump_file, "worklist: def_stmt: ");
5174 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5175 }
5176
5177 bb = bb_for_stmt (def_stmt);
5178 if (flow_bb_inside_loop_p (loop, bb))
5179 vect_mark_relevant (worklist, def_stmt);
5180 }
5181 }
5182
5183 ann = stmt_ann (stmt);
5184 use_ops = USE_OPS (ann);
5185
5186 for (i = 0; i < NUM_USES (use_ops); i++)
5187 {
5188 tree use = USE_OP (use_ops, i);
5189
5190 /* We are only interested in uses that need to be vectorized. Uses
5191 that are used for address computation are not considered relevant.
5192 */
5193 if (exist_non_indexing_operands_for_use_p (use, stmt))
5194 {
5195 tree def_stmt = NULL_TREE;
5196 basic_block bb;
5197 if (!vect_is_simple_use (use, loop, &def_stmt))
5198 {
5199 if (vect_debug_details (NULL))
5200 fprintf (dump_file, "worklist: unsupported use.");
5201 varray_clear (worklist);
5202 return false;
5203 }
5204
5205 if (!def_stmt)
5206 continue;
5207
5208 if (vect_debug_details (NULL))
5209 {
5210 fprintf (dump_file, "worklist: examine use %d: ", i);
5211 print_generic_expr (dump_file, use, TDF_SLIM);
5212 }
5213
5214 bb = bb_for_stmt (def_stmt);
5215 if (flow_bb_inside_loop_p (loop, bb))
5216 vect_mark_relevant (worklist, def_stmt);
5217 }
5218 }
5219 } /* while worklist */
5220
5221 varray_clear (worklist);
5222 return true;
5223}
5224
5225
a023975e
OG
5226/* Function vect_analyze_loop_with_symbolic_num_of_iters.
5227
5228 In case the number of iterations that LOOP iterates in unknown at compile
5229 time, an epilog loop will be generated, and the loop induction variables
5230 (IVs) will be "advanced" to the value they are supposed to take just before
5231 the epilog loop. Here we check that the access function of the loop IVs
5232 and the expression that represents the loop bound are simple enough.
5233 These restrictions will be relaxed in the future. */
5234
5235static bool
5236vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
5237 struct loop *loop)
5238{
5239 basic_block bb = loop->header;
5240 tree phi;
5241
5242 if (vect_debug_details (NULL))
5243 fprintf (dump_file,
5244 "\n<<vect_analyze_loop_with_symbolic_num_of_iters>>\n");
5245
5246 if (chrec_contains_undetermined (niters))
5247 {
5248 if (vect_debug_details (NULL))
5249 fprintf (dump_file, "Infinite number of iterations.");
5250 return false;
5251 }
5252
5253 if (!niters)
5254 {
5255 if (vect_debug_details (NULL))
5256 fprintf (dump_file, "niters is NULL pointer.");
5257 return false;
5258 }
5259
5260 if (vect_debug_details (NULL))
5261 {
5262 fprintf (dump_file, "Symbolic number of iterations is ");
5263 print_generic_expr (dump_file, niters, TDF_DETAILS);
5264 }
5265
5266 /* Analyze phi functions of the loop header. */
5267
5268 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
5269 {
5270 tree access_fn = NULL;
5271 tree evolution_part;
5272
5273 if (vect_debug_details (NULL))
5274 {
5275 fprintf (dump_file, "Analyze phi: ");
5276 print_generic_expr (dump_file, phi, TDF_SLIM);
5277 }
5278
5279 /* Skip virtual phi's. The data dependences that are associated with
5280 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5281
5282 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5283 {
5284 if (vect_debug_details (NULL))
5285 fprintf (dump_file, "virtual phi. skip.");
5286 continue;
5287 }
5288
5289 /* Analyze the evolution function. */
5290
5291 access_fn = instantiate_parameters
5292 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5293
5294 if (!access_fn)
5295 {
5296 if (vect_debug_details (NULL))
5297 fprintf (dump_file, "No Access function.");
5298 return false;
5299 }
5300
5301 if (vect_debug_details (NULL))
5302 {
5303 fprintf (dump_file, "Access function of PHI: ");
5304 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5305 }
5306
5307 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5308
5309 if (evolution_part == NULL_TREE)
5310 return false;
5311
5312 /* FORNOW: We do not transform initial conditions of IVs
5313 which evolution functions are a polynomial of degree >= 2. */
5314
5315 if (tree_is_chrec (evolution_part))
5316 return false;
5317 }
5318
5319 return true;
5320}
5321
5322
79fe1b3b
DN
5323/* Function vect_get_loop_niters.
5324
5325 Determine how many iterations the loop is executed. */
5326
5327static tree
a023975e 5328vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
79fe1b3b
DN
5329{
5330 tree niters;
5331
5332 if (vect_debug_details (NULL))
5333 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5334
5335 niters = number_of_iterations_in_loop (loop);
5336
5337 if (niters != NULL_TREE
a023975e 5338 && niters != chrec_dont_know)
79fe1b3b 5339 {
a023975e 5340 *number_of_iterations = niters;
79fe1b3b
DN
5341
5342 if (vect_debug_details (NULL))
a023975e
OG
5343 {
5344 fprintf (dump_file, "==> get_loop_niters:" );
5345 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5346 }
79fe1b3b
DN
5347 }
5348
5349 return get_loop_exit_condition (loop);
5350}
5351
5352
5353/* Function vect_analyze_loop_form.
5354
5355 Verify the following restrictions (some may be relaxed in the future):
5356 - it's an inner-most loop
5357 - number of BBs = 2 (which are the loop header and the latch)
5358 - the loop has a pre-header
5359 - the loop has a single entry and exit
5360 - the loop exit condition is simple enough, and the number of iterations
5361 can be analyzed (a countable loop). */
5362
5363static loop_vec_info
5364vect_analyze_loop_form (struct loop *loop)
5365{
5366 loop_vec_info loop_vinfo;
5367 tree loop_cond;
a023975e 5368 tree number_of_iterations = NULL;
79fe1b3b
DN
5369
5370 if (vect_debug_details (loop))
5371 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5372
82b85a85
ZD
5373 if (loop->inner
5374 || !loop->single_exit
5375 || loop->num_nodes != 2)
79fe1b3b
DN
5376 {
5377 if (vect_debug_stats (loop) || vect_debug_details (loop))
5378 {
5379 fprintf (dump_file, "not vectorized: bad loop form. ");
82b85a85 5380 if (loop->inner)
79fe1b3b 5381 fprintf (dump_file, "nested loop.");
82b85a85
ZD
5382 else if (!loop->single_exit)
5383 fprintf (dump_file, "multiple exits.");
5384 else if (loop->num_nodes != 2)
79fe1b3b 5385 fprintf (dump_file, "too many BBs in loop.");
79fe1b3b
DN
5386 }
5387
5388 return NULL;
5389 }
5390
5391 /* We assume that the loop exit condition is at the end of the loop. i.e,
5392 that the loop is represented as a do-while (with a proper if-guard
5393 before the loop if needed), where the loop header contains all the
5394 executable statements, and the latch is empty. */
5395 if (!empty_block_p (loop->latch))
5396 {
5397 if (vect_debug_stats (loop) || vect_debug_details (loop))
5398 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5399 return NULL;
5400 }
5401
5402 if (empty_block_p (loop->header))
5403 {
5404 if (vect_debug_stats (loop) || vect_debug_details (loop))
5405 fprintf (dump_file, "not vectorized: empty loop.");
5406 return NULL;
5407 }
5408
5409 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5410 if (!loop_cond)
5411 {
5412 if (vect_debug_stats (loop) || vect_debug_details (loop))
5413 fprintf (dump_file, "not vectorized: complicated exit condition.");
5414 return NULL;
5415 }
a023975e
OG
5416
5417 if (!number_of_iterations)
79fe1b3b
DN
5418 {
5419 if (vect_debug_stats (loop) || vect_debug_details (loop))
a023975e
OG
5420 fprintf (dump_file,
5421 "not vectorized: number of iterations cannot be computed.");
79fe1b3b
DN
5422 return NULL;
5423 }
5424
a023975e
OG
5425 loop_vinfo = new_loop_vec_info (loop);
5426 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5427 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5428 {
5429 if (vect_debug_stats (loop) || vect_debug_details (loop))
5430 fprintf (dump_file, "loop bound unknown.");
5431
5432 /* Unknown loop bound. */
5433 if (!vect_analyze_loop_with_symbolic_num_of_iters
5434 (number_of_iterations, loop))
5435 {
5436 if (vect_debug_stats (loop) || vect_debug_details (loop))
5437 fprintf (dump_file,
5438 "not vectorized: can't determine loop bound.");
5439 return NULL;
5440 }
5441 else
5442 {
5443 /* We need only one loop entry for unknown loop bound support. */
5444 if (loop->num_entries != 1 || !loop->pre_header)
5445 {
5446 if (vect_debug_stats (loop) || vect_debug_details (loop))
5447 fprintf (dump_file,
5448 "not vectorized: more than one loop entry.");
5449 return NULL;
5450 }
5451 }
5452 }
5453 else
5454 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
79fe1b3b
DN
5455 {
5456 if (vect_debug_stats (loop) || vect_debug_details (loop))
5457 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5458 return NULL;
5459 }
5460
79fe1b3b 5461 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
79fe1b3b
DN
5462
5463 return loop_vinfo;
5464}
5465
5466
5467/* Function vect_analyze_loop.
5468
5469 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5470 for it. The different analyses will record information in the
5471 loop_vec_info struct. */
5472
5473static loop_vec_info
5474vect_analyze_loop (struct loop *loop)
5475{
5476 bool ok;
5477 loop_vec_info loop_vinfo;
5478
5479 if (vect_debug_details (NULL))
5480 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5481
5482 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5483
5484 loop_vinfo = vect_analyze_loop_form (loop);
5485 if (!loop_vinfo)
5486 {
5487 if (vect_debug_details (loop))
5488 fprintf (dump_file, "bad loop form.");
5489 return NULL;
5490 }
5491
5492 /* Find all data references in the loop (which correspond to vdefs/vuses)
5493 and analyze their evolution in the loop.
5494
6775f1f3 5495 FORNOW: Handle only simple, array references, which
79fe1b3b
DN
5496 alignment can be forced, and aligned pointer-references. */
5497
5498 ok = vect_analyze_data_refs (loop_vinfo);
5499 if (!ok)
5500 {
5501 if (vect_debug_details (loop))
5502 fprintf (dump_file, "bad data references.");
5503 destroy_loop_vec_info (loop_vinfo);
5504 return NULL;
5505 }
5506
79fe1b3b
DN
5507 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5508
5509 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5510 if (!ok)
5511 {
5512 if (vect_debug_details (loop))
5513 fprintf (dump_file, "unexpected pattern.");
5514 if (vect_debug_details (loop))
5515 fprintf (dump_file, "not vectorized: unexpected pattern.");
5516 destroy_loop_vec_info (loop_vinfo);
5517 return NULL;
5518 }
5519
79fe1b3b
DN
5520 /* Check that all cross-iteration scalar data-flow cycles are OK.
5521 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5522
5523 ok = vect_analyze_scalar_cycles (loop_vinfo);
5524 if (!ok)
5525 {
5526 if (vect_debug_details (loop))
5527 fprintf (dump_file, "bad scalar cycle.");
5528 destroy_loop_vec_info (loop_vinfo);
5529 return NULL;
5530 }
5531
79fe1b3b
DN
5532 /* Analyze data dependences between the data-refs in the loop.
5533 FORNOW: fail at the first data dependence that we encounter. */
5534
5535 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5536 if (!ok)
5537 {
5538 if (vect_debug_details (loop))
5539 fprintf (dump_file, "bad data dependence.");
5540 destroy_loop_vec_info (loop_vinfo);
5541 return NULL;
5542 }
5543
79fe1b3b
DN
5544 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5545 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5546
5547 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5548 if (!ok)
5549 {
5550 if (vect_debug_details (loop))
5551 fprintf (dump_file, "bad data access.");
5552 destroy_loop_vec_info (loop_vinfo);
5553 return NULL;
5554 }
5555
79fe1b3b
DN
5556 /* Analyze the alignment of the data-refs in the loop.
5557 FORNOW: Only aligned accesses are handled. */
5558
5559 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5560 if (!ok)
5561 {
5562 if (vect_debug_details (loop))
5563 fprintf (dump_file, "bad data alignment.");
5564 destroy_loop_vec_info (loop_vinfo);
5565 return NULL;
5566 }
5567
79fe1b3b
DN
5568 /* Scan all the operations in the loop and make sure they are
5569 vectorizable. */
5570
5571 ok = vect_analyze_operations (loop_vinfo);
5572 if (!ok)
5573 {
5574 if (vect_debug_details (loop))
5575 fprintf (dump_file, "bad operation or unsupported loop bound.");
5576 destroy_loop_vec_info (loop_vinfo);
5577 return NULL;
5578 }
5579
5580 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5581
5582 return loop_vinfo;
5583}
5584
5585
5586/* Function need_imm_uses_for.
5587
5588 Return whether we ought to include information for 'var'
5589 when calculating immediate uses. For this pass we only want use
5590 information for non-virtual variables. */
5591
5592static bool
5593need_imm_uses_for (tree var)
5594{
5595 return is_gimple_reg (var);
5596}
5597
5598
5599/* Function vectorize_loops.
5600
5601 Entry Point to loop vectorization phase. */
5602
5603void
5604vectorize_loops (struct loops *loops)
5605{
5606 unsigned int i, loops_num;
5607 unsigned int num_vectorized_loops = 0;
5608
5609 /* Does the target support SIMD? */
5610 /* FORNOW: until more sophisticated machine modelling is in place. */
5611 if (!UNITS_PER_SIMD_WORD)
5612 {
5613 if (vect_debug_details (NULL))
5614 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5615 return;
5616 }
5617
5618 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5619
5620 /* ----------- Analyze loops. ----------- */
5621
5622 /* If some loop was duplicated, it gets bigger number
5623 than all previously defined loops. This fact allows us to run
5624 only over initial loops skipping newly generated ones. */
5625 loops_num = loops->num;
5626 for (i = 1; i < loops_num; i++)
5627 {
5628 loop_vec_info loop_vinfo;
5629 struct loop *loop = loops->parray[i];
5630
5631 if (!loop)
5632 continue;
5633
79fe1b3b
DN
5634 loop_vinfo = vect_analyze_loop (loop);
5635 loop->aux = loop_vinfo;
5636
5637 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5638 continue;
5639
5640 vect_transform_loop (loop_vinfo, loops);
5641 num_vectorized_loops++;
5642 }
5643
5644 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5645 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5646 num_vectorized_loops);
5647
5648 /* ----------- Finalize. ----------- */
5649
5650 free_df ();
5651 for (i = 1; i < loops_num; i++)
5652 {
5653 struct loop *loop = loops->parray[i];
6775f1f3
IR
5654 loop_vec_info loop_vinfo;
5655
79fe1b3b 5656 if (!loop)
6775f1f3
IR
5657 continue;
5658 loop_vinfo = loop->aux;
79fe1b3b
DN
5659 destroy_loop_vec_info (loop_vinfo);
5660 loop->aux = NULL;
5661 }
5662
79fe1b3b
DN
5663 rewrite_into_ssa (false);
5664 if (bitmap_first_set_bit (vars_to_rename) >= 0)
5665 {
5666 /* The rewrite of ssa names may cause violation of loop closed ssa
5667 form invariants. TODO -- avoid these rewrites completely.
5668 Information in virtual phi nodes is sufficient for it. */
5669 rewrite_into_loop_closed_ssa ();
5670 }
a023975e 5671 rewrite_into_loop_closed_ssa ();
79fe1b3b
DN
5672 bitmap_clear (vars_to_rename);
5673}
This page took 0.758215 seconds and 5 git commands to generate.