]>
Commit | Line | Data |
---|---|---|
79fe1b3b DN |
1 | /* Loop Vectorization |
2 | Copyright (C) 2003, 2004 Free Software Foundation, Inc. | |
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 2, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
19 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
20 | 02111-1307, USA. */ | |
21 | ||
22 | /* Loop Vectorization Pass. | |
23 | ||
24 | This pass tries to vectorize loops. This first implementation focuses on | |
25 | simple inner-most loops, with no conditional control flow, and a set of | |
26 | simple operations which vector form can be expressed using existing | |
27 | tree codes (PLUS, MULT etc). | |
28 | ||
29 | For example, the vectorizer transforms the following simple loop: | |
30 | ||
31 | short a[N]; short b[N]; short c[N]; int i; | |
32 | ||
33 | for (i=0; i<N; i++){ | |
34 | a[i] = b[i] + c[i]; | |
35 | } | |
36 | ||
37 | as if it was manually vectorized by rewriting the source code into: | |
38 | ||
39 | typedef int __attribute__((mode(V8HI))) v8hi; | |
40 | short a[N]; short b[N]; short c[N]; int i; | |
41 | v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c; | |
42 | v8hi va, vb, vc; | |
43 | ||
44 | for (i=0; i<N/8; i++){ | |
45 | vb = pb[i]; | |
46 | vc = pc[i]; | |
47 | va = vb + vc; | |
48 | pa[i] = va; | |
49 | } | |
50 | ||
51 | The main entry to this pass is vectorize_loops(), in which | |
52 | the vectorizer applies a set of analyses on a given set of loops, | |
53 | followed by the actual vectorization transformation for the loops that | |
54 | had successfully passed the analysis phase. | |
55 | ||
56 | Throughout this pass we make a distinction between two types of | |
57 | data: scalars (which are represented by SSA_NAMES), and memory references | |
58 | ("data-refs"). These two types of data require different handling both | |
59 | during analysis and transformation. The types of data-refs that the | |
60 | vectorizer currently supports are ARRAY_REFS that are one dimensional | |
61 | arrays which base is an array DECL (not a pointer), and INDIRECT_REFS | |
62 | through pointers; both array and pointer accesses are required to have a | |
63 | simple (consecutive) access pattern. | |
64 | ||
65 | Analysis phase: | |
66 | =============== | |
67 | The driver for the analysis phase is vect_analyze_loop_nest(). | |
68 | It applies a set of analyses, some of which rely on the scalar evolution | |
69 | analyzer (scev) developed by Sebastian Pop. | |
70 | ||
71 | During the analysis phase the vectorizer records some information | |
72 | per stmt in a "stmt_vec_info" struct which is attached to each stmt in the | |
73 | loop, as well as general information about the loop as a whole, which is | |
74 | recorded in a "loop_vec_info" struct attached to each loop. | |
75 | ||
76 | Transformation phase: | |
77 | ===================== | |
78 | The loop transformation phase scans all the stmts in the loop, and | |
79 | creates a vector stmt (or a sequence of stmts) for each scalar stmt S in | |
80 | the loop that needs to be vectorized. It insert the vector code sequence | |
81 | just before the scalar stmt S, and records a pointer to the vector code | |
82 | in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct | |
83 | attached to S). This pointer will be used for the vectorization of following | |
84 | stmts which use the def of stmt S. Stmt S is removed if it writes to memory; | |
85 | otherwise, we rely on dead code elimination for removing it. | |
86 | ||
87 | For example, say stmt S1 was vectorized into stmt VS1: | |
88 | ||
89 | VS1: vb = px[i]; | |
90 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |
91 | S2: a = b; | |
92 | ||
93 | To vectorize stmt S2, the vectorizer first finds the stmt that defines | |
94 | the operand 'b' (S1), and gets the relevant vector def 'vb' from the | |
95 | vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The | |
96 | resulting sequence would be: | |
97 | ||
98 | VS1: vb = px[i]; | |
99 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |
100 | VS2: va = vb; | |
101 | S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2 | |
102 | ||
103 | Operands that are not SSA_NAMEs, are data-refs that appear in | |
104 | load/store operations (like 'x[i]' in S1), and are handled differently. | |
105 | ||
106 | Target modeling: | |
107 | ================= | |
108 | Currently the only target specific information that is used is the | |
109 | size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can | |
110 | support different sizes of vectors, for now will need to specify one value | |
111 | for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future. | |
112 | ||
113 | Since we only vectorize operations which vector form can be | |
114 | expressed using existing tree codes, to verify that an operation is | |
115 | supported, the vectorizer checks the relevant optab at the relevant | |
116 | machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If | |
117 | the value found is CODE_FOR_nothing, then there's no target support, and | |
118 | we can't vectorize the stmt. | |
119 | ||
120 | For additional information on this project see: | |
121 | http://gcc.gnu.org/projects/tree-ssa/vectorization.html | |
122 | */ | |
123 | ||
124 | #include "config.h" | |
125 | #include "system.h" | |
126 | #include "coretypes.h" | |
127 | #include "tm.h" | |
128 | #include "errors.h" | |
129 | #include "ggc.h" | |
130 | #include "tree.h" | |
131 | #include "target.h" | |
132 | ||
133 | #include "rtl.h" | |
134 | #include "basic-block.h" | |
135 | #include "diagnostic.h" | |
136 | #include "tree-flow.h" | |
137 | #include "tree-dump.h" | |
138 | #include "timevar.h" | |
139 | #include "cfgloop.h" | |
140 | #include "cfglayout.h" | |
141 | #include "expr.h" | |
142 | #include "optabs.h" | |
143 | #include "tree-chrec.h" | |
144 | #include "tree-data-ref.h" | |
145 | #include "tree-scalar-evolution.h" | |
146 | #include "tree-vectorizer.h" | |
147 | #include "tree-pass.h" | |
148 | ||
149 | /* Main analysis functions. */ | |
150 | static loop_vec_info vect_analyze_loop (struct loop *); | |
151 | static loop_vec_info vect_analyze_loop_form (struct loop *); | |
152 | static bool vect_analyze_data_refs (loop_vec_info); | |
153 | static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); | |
154 | static bool vect_analyze_scalar_cycles (loop_vec_info); | |
155 | static bool vect_analyze_data_ref_accesses (loop_vec_info); | |
156 | static bool vect_analyze_data_refs_alignment (loop_vec_info); | |
157 | static void vect_compute_data_refs_alignment (loop_vec_info); | |
158 | static bool vect_analyze_operations (loop_vec_info); | |
159 | ||
160 | /* Main code transformation functions. */ | |
161 | static void vect_transform_loop (loop_vec_info, struct loops *); | |
162 | static void vect_transform_loop_bound (loop_vec_info); | |
163 | static bool vect_transform_stmt (tree, block_stmt_iterator *); | |
164 | static bool vectorizable_load (tree, block_stmt_iterator *, tree *); | |
165 | static bool vectorizable_store (tree, block_stmt_iterator *, tree *); | |
166 | static bool vectorizable_operation (tree, block_stmt_iterator *, tree *); | |
167 | static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); | |
168 | static void vect_align_data_ref (tree); | |
169 | static void vect_enhance_data_refs_alignment (loop_vec_info); | |
170 | ||
171 | /* Utility functions for the analyses. */ | |
172 | static bool vect_is_simple_use (tree , struct loop *, tree *); | |
173 | static bool exist_non_indexing_operands_for_use_p (tree, tree); | |
174 | static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool); | |
175 | static void vect_mark_relevant (varray_type, tree); | |
176 | static bool vect_stmt_relevant_p (tree, loop_vec_info); | |
177 | static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *); | |
178 | static void vect_compute_data_ref_alignment | |
179 | (struct data_reference *, loop_vec_info); | |
180 | static bool vect_analyze_data_ref_access (struct data_reference *); | |
181 | static bool vect_get_first_index (tree, tree *); | |
182 | static bool vect_can_force_dr_alignment_p (tree, unsigned int); | |
183 | static tree vect_get_base_decl_and_bit_offset (tree, tree *); | |
184 | static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, bool); | |
185 | ||
186 | /* Utility functions for the code transformation. */ | |
187 | static tree vect_create_destination_var (tree, tree); | |
188 | static tree vect_create_data_ref (tree, block_stmt_iterator *); | |
189 | static tree vect_create_index_for_array_ref (tree, block_stmt_iterator *); | |
190 | static tree get_vectype_for_scalar_type (tree); | |
191 | static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); | |
192 | static tree vect_get_vec_def_for_operand (tree, tree); | |
193 | static tree vect_init_vector (tree, tree); | |
194 | static void vect_finish_stmt_generation | |
195 | (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); | |
196 | ||
197 | /* Utilities for creation and deletion of vec_info structs. */ | |
198 | loop_vec_info new_loop_vec_info (struct loop *loop); | |
199 | void destroy_loop_vec_info (loop_vec_info); | |
200 | stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop); | |
201 | ||
202 | static bool vect_debug_stats (struct loop *loop); | |
203 | static bool vect_debug_details (struct loop *loop); | |
204 | ||
205 | ||
206 | /* Function new_stmt_vec_info. | |
207 | ||
208 | Create and initialize a new stmt_vec_info struct for STMT. */ | |
209 | ||
210 | stmt_vec_info | |
211 | new_stmt_vec_info (tree stmt, struct loop *loop) | |
212 | { | |
213 | stmt_vec_info res; | |
214 | res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); | |
215 | ||
216 | STMT_VINFO_TYPE (res) = undef_vec_info_type; | |
217 | STMT_VINFO_STMT (res) = stmt; | |
218 | STMT_VINFO_LOOP (res) = loop; | |
219 | STMT_VINFO_RELEVANT_P (res) = 0; | |
220 | STMT_VINFO_VECTYPE (res) = NULL; | |
221 | STMT_VINFO_VEC_STMT (res) = NULL; | |
222 | STMT_VINFO_DATA_REF (res) = NULL; | |
223 | STMT_VINFO_MEMTAG (res) = NULL; | |
224 | ||
225 | return res; | |
226 | } | |
227 | ||
228 | ||
229 | /* Function new_loop_vec_info. | |
230 | ||
231 | Create and initialize a new loop_vec_info struct for LOOP, as well as | |
232 | stmt_vec_info structs for all the stmts in LOOP. */ | |
233 | ||
234 | loop_vec_info | |
235 | new_loop_vec_info (struct loop *loop) | |
236 | { | |
237 | loop_vec_info res; | |
238 | basic_block *bbs; | |
239 | block_stmt_iterator si; | |
240 | unsigned int i; | |
241 | ||
242 | res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); | |
243 | ||
244 | bbs = get_loop_body (loop); | |
245 | ||
246 | /* Create stmt_info for all stmts in the loop. */ | |
247 | for (i = 0; i < loop->num_nodes; i++) | |
248 | { | |
249 | basic_block bb = bbs[i]; | |
250 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
251 | { | |
252 | tree stmt = bsi_stmt (si); | |
253 | stmt_ann_t ann; | |
254 | ||
255 | get_stmt_operands (stmt); | |
256 | ann = stmt_ann (stmt); | |
257 | set_stmt_info (ann, new_stmt_vec_info (stmt, loop)); | |
258 | } | |
259 | } | |
260 | ||
261 | LOOP_VINFO_LOOP (res) = loop; | |
262 | LOOP_VINFO_BBS (res) = bbs; | |
263 | LOOP_VINFO_EXIT_COND (res) = NULL; | |
264 | LOOP_VINFO_NITERS (res) = -1; | |
265 | LOOP_VINFO_VECTORIZABLE_P (res) = 0; | |
266 | LOOP_VINFO_VECT_FACTOR (res) = 0; | |
267 | VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, | |
268 | "loop_write_datarefs"); | |
269 | VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20, | |
270 | "loop_read_datarefs"); | |
271 | return res; | |
272 | } | |
273 | ||
274 | ||
275 | /* Function destroy_loop_vec_info. | |
276 | ||
277 | Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the | |
278 | stmts in the loop. */ | |
279 | ||
280 | void | |
281 | destroy_loop_vec_info (loop_vec_info loop_vinfo) | |
282 | { | |
283 | struct loop *loop; | |
284 | basic_block *bbs; | |
285 | int nbbs; | |
286 | block_stmt_iterator si; | |
287 | int j; | |
288 | ||
289 | if (!loop_vinfo) | |
290 | return; | |
291 | ||
292 | loop = LOOP_VINFO_LOOP (loop_vinfo); | |
293 | ||
294 | bbs = LOOP_VINFO_BBS (loop_vinfo); | |
295 | nbbs = loop->num_nodes; | |
296 | ||
297 | for (j = 0; j < nbbs; j++) | |
298 | { | |
299 | basic_block bb = bbs[j]; | |
300 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
301 | { | |
302 | tree stmt = bsi_stmt (si); | |
303 | stmt_ann_t ann = stmt_ann (stmt); | |
304 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
305 | free (stmt_info); | |
306 | set_stmt_info (ann, NULL); | |
307 | } | |
308 | } | |
309 | ||
310 | free (LOOP_VINFO_BBS (loop_vinfo)); | |
311 | varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); | |
312 | varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo)); | |
313 | ||
314 | free (loop_vinfo); | |
315 | } | |
316 | ||
317 | ||
318 | /* Function debug_loop_stats. | |
319 | ||
320 | For vectorization statistics dumps. */ | |
321 | ||
322 | static bool | |
323 | vect_debug_stats (struct loop *loop) | |
324 | { | |
325 | basic_block bb; | |
326 | block_stmt_iterator si; | |
327 | tree node = NULL_TREE; | |
328 | ||
329 | if (!dump_file || !(dump_flags & TDF_STATS)) | |
330 | return false; | |
331 | ||
332 | if (!loop) | |
333 | { | |
334 | fprintf (dump_file, "\n"); | |
335 | return true; | |
336 | } | |
337 | ||
338 | if (!loop->header) | |
339 | return false; | |
340 | ||
341 | bb = loop->header; | |
342 | ||
343 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
344 | { | |
345 | node = bsi_stmt (si); | |
346 | if (node && EXPR_P (node) && EXPR_LOCUS (node)) | |
347 | break; | |
348 | } | |
349 | ||
350 | if (node && EXPR_P (node) && EXPR_LOCUS (node) | |
351 | && EXPR_FILENAME (node) && EXPR_LINENO (node)) | |
352 | { | |
353 | fprintf (dump_file, "\nloop at %s:%d: ", | |
354 | EXPR_FILENAME (node), EXPR_LINENO (node)); | |
355 | return true; | |
356 | } | |
357 | ||
358 | return false; | |
359 | } | |
360 | ||
361 | ||
362 | /* Function debug_loop_details. | |
363 | ||
364 | For vectorization debug dumps. */ | |
365 | ||
366 | static bool | |
367 | vect_debug_details (struct loop *loop) | |
368 | { | |
369 | basic_block bb; | |
370 | block_stmt_iterator si; | |
371 | tree node = NULL_TREE; | |
372 | ||
373 | if (!dump_file || !(dump_flags & TDF_DETAILS)) | |
374 | return false; | |
375 | ||
376 | if (!loop) | |
377 | { | |
378 | fprintf (dump_file, "\n"); | |
379 | return true; | |
380 | } | |
381 | ||
382 | if (!loop->header) | |
383 | return false; | |
384 | ||
385 | bb = loop->header; | |
386 | ||
387 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
388 | { | |
389 | node = bsi_stmt (si); | |
390 | if (node && EXPR_P (node) && EXPR_LOCUS (node)) | |
391 | break; | |
392 | } | |
393 | ||
394 | if (node && EXPR_P (node) && EXPR_LOCUS (node) | |
395 | && EXPR_FILENAME (node) && EXPR_LINENO (node)) | |
396 | { | |
397 | fprintf (dump_file, "\nloop at %s:%d: ", | |
398 | EXPR_FILENAME (node), EXPR_LINENO (node)); | |
399 | return true; | |
400 | } | |
401 | ||
402 | return false; | |
403 | } | |
404 | ||
79fe1b3b DN |
405 | /* Function vect_get_base_decl_and_bit_offset |
406 | ||
407 | Get the decl from which the data reference REF is based, | |
408 | and compute the OFFSET from it in bits on the way. | |
409 | FORNOW: Handle only component-refs that consist of | |
410 | VAR_DECLs (no ARRAY_REF or INDIRECT_REF). */ | |
411 | ||
412 | static tree | |
413 | vect_get_base_decl_and_bit_offset (tree ref, tree *offset) | |
414 | { | |
415 | tree decl; | |
416 | if (TREE_CODE (ref) == VAR_DECL) | |
417 | return ref; | |
418 | ||
419 | if (TREE_CODE (ref) == COMPONENT_REF) | |
420 | { | |
421 | tree this_offset; | |
422 | tree oprnd0 = TREE_OPERAND (ref, 0); | |
423 | tree oprnd1 = TREE_OPERAND (ref, 1); | |
424 | ||
425 | this_offset = bit_position (oprnd1); | |
426 | if (!host_integerp (this_offset,1)) | |
427 | return NULL_TREE; | |
428 | ||
429 | decl = vect_get_base_decl_and_bit_offset (oprnd0, offset); | |
430 | ||
431 | if (decl) | |
432 | { | |
433 | *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1); | |
434 | ||
435 | if (!host_integerp (*offset,1) || TREE_OVERFLOW (*offset)) | |
436 | return NULL_TREE; | |
437 | ||
438 | if (vect_debug_details (NULL)) | |
439 | { | |
440 | print_generic_expr (dump_file, ref, TDF_SLIM); | |
441 | fprintf (dump_file, " --> total offset for ref: "); | |
442 | print_generic_expr (dump_file, *offset, TDF_SLIM); | |
443 | } | |
444 | } | |
445 | ||
446 | return decl; | |
447 | } | |
448 | ||
449 | /* TODO: extend to handle more cases. */ | |
450 | return NULL_TREE; | |
451 | } | |
452 | ||
453 | ||
454 | /* Function vect_force_dr_alignment_p. | |
455 | ||
456 | Returns whether the alignment of a DECL can be forced to be aligned | |
457 | on ALIGNMENT bit boundary. */ | |
458 | ||
459 | static bool | |
460 | vect_can_force_dr_alignment_p (tree decl, unsigned int alignment) | |
461 | { | |
462 | if (TREE_CODE (decl) != VAR_DECL) | |
463 | return false; | |
464 | ||
465 | if (DECL_EXTERNAL (decl)) | |
466 | return false; | |
467 | ||
468 | if (TREE_STATIC (decl)) | |
469 | return (alignment <= MAX_OFILE_ALIGNMENT); | |
470 | else | |
7a8554ce DN |
471 | /* This is not 100% correct. The absolute correct stack alignment |
472 | is STACK_BOUNDARY. We're supposed to hope, but not assume, that | |
473 | PREFERRED_STACK_BOUNDARY is honored by all translation units. | |
474 | However, until someone implements forced stack alignment, SSE | |
475 | isn't really usable without this. */ | |
476 | return (alignment <= PREFERRED_STACK_BOUNDARY); | |
79fe1b3b DN |
477 | } |
478 | ||
479 | ||
480 | /* Function vect_get_new_vect_var. | |
481 | ||
482 | Returns a name for a new variable. The current naming scheme appends the | |
483 | prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to | |
484 | the name of vectorizer generated variables, and appends that to NAME if | |
485 | provided. */ | |
486 | ||
487 | static tree | |
488 | vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) | |
489 | { | |
490 | const char *prefix; | |
491 | int prefix_len; | |
492 | tree new_vect_var; | |
493 | ||
494 | if (var_kind == vect_simple_var) | |
495 | prefix = "vect_"; | |
496 | else | |
497 | prefix = "vect_p"; | |
498 | ||
499 | prefix_len = strlen (prefix); | |
500 | ||
501 | if (name) | |
502 | new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); | |
503 | else | |
504 | new_vect_var = create_tmp_var (type, prefix); | |
505 | ||
506 | return new_vect_var; | |
507 | } | |
508 | ||
509 | ||
510 | /* Function create_index_for_array_ref. | |
511 | ||
512 | Create (and return) an index variable, along with it's update chain in the | |
513 | loop. This variable will be used to access a memory location in a vector | |
514 | operation. | |
515 | ||
516 | Input: | |
517 | STMT: The stmt that contains a memory data-ref. | |
518 | BSI: The block_stmt_iterator where STMT is. Any new stmts created by this | |
519 | function can be added here, or in the loop pre-header. | |
520 | ||
521 | FORNOW: We are only handling array accesses with step 1. */ | |
522 | ||
523 | static tree | |
524 | vect_create_index_for_array_ref (tree stmt, block_stmt_iterator *bsi) | |
525 | { | |
526 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
527 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
528 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); | |
529 | tree expr = DR_REF (dr); | |
530 | tree access_fn; | |
531 | tree init, step; | |
532 | loop_vec_info loop_info = loop->aux; | |
533 | int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_info); | |
534 | tree vf; | |
535 | tree array_first_index; | |
536 | tree indx_before_incr, indx_after_incr; | |
537 | int loopnum = loop->num; | |
538 | bool ok; | |
539 | #ifdef ENABLE_CHECKING | |
540 | varray_type access_fns = DR_ACCESS_FNS (dr); | |
541 | ||
542 | /* FORNOW: handling only one dimensional arrays. */ | |
543 | if (VARRAY_ACTIVE_SIZE (access_fns) != 1) | |
544 | abort (); | |
545 | ||
546 | if (!vectorization_factor) | |
547 | abort (); | |
548 | #endif | |
549 | ||
550 | access_fn = DR_ACCESS_FN (dr, 0); | |
551 | ok = vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, true) | |
552 | && vect_get_first_index (expr, &array_first_index); | |
553 | ||
554 | #ifdef ENABLE_CHECKING | |
555 | if (!ok) | |
556 | abort (); | |
557 | ||
558 | /* FORNOW: Handling only constant 'init'. */ | |
559 | if (TREE_CODE (init) != INTEGER_CST) | |
560 | abort (); | |
561 | #endif | |
562 | ||
7d60be94 | 563 | vf = build_int_cst (unsigned_type_node, vectorization_factor); |
79fe1b3b DN |
564 | |
565 | if (vect_debug_details (NULL)) | |
566 | { | |
567 | fprintf (dump_file, "int vf = %d",vectorization_factor); | |
568 | fprintf (dump_file, ", vf:"); | |
569 | print_generic_expr (dump_file, vf, TDF_SLIM); | |
570 | fprintf (dump_file, ", init:"); | |
571 | print_generic_expr (dump_file, init, TDF_SLIM); | |
572 | fprintf (dump_file, ", array_first_index:"); | |
573 | print_generic_expr (dump_file, array_first_index, TDF_SLIM); | |
574 | } | |
575 | ||
576 | /* Calculate the 'init' of the new index. | |
577 | init = (init - array_first_index) / vectorization_factor */ | |
578 | init = int_const_binop (TRUNC_DIV_EXPR, | |
579 | int_const_binop (MINUS_EXPR, init, array_first_index, 1), | |
580 | vf, 1); | |
581 | ||
582 | /* Calculate the 'step' of the new index. FORNOW: always 1. */ | |
583 | step = size_one_node; | |
584 | ||
585 | if (vect_debug_details (NULL)) | |
586 | { | |
587 | fprintf (dump_file, "create iv for ("); | |
588 | print_generic_expr (dump_file, init, TDF_SLIM); | |
589 | fprintf (dump_file, ", + ,"); | |
590 | print_generic_expr (dump_file, step, TDF_SLIM); | |
591 | fprintf (dump_file, ")"); | |
592 | } | |
593 | ||
82b85a85 ZD |
594 | create_iv (init, step, NULL_TREE, loop, bsi, false, |
595 | &indx_before_incr, &indx_after_incr); | |
79fe1b3b DN |
596 | |
597 | return indx_before_incr; | |
598 | } | |
599 | ||
600 | ||
601 | /* Function get_vectype_for_scalar_type. | |
602 | ||
603 | Returns the vector type corresponding to SCALAR_TYPE as supported | |
604 | by the target. */ | |
605 | ||
606 | static tree | |
607 | get_vectype_for_scalar_type (tree scalar_type) | |
608 | { | |
609 | enum machine_mode inner_mode = TYPE_MODE (scalar_type); | |
610 | int nbytes = GET_MODE_SIZE (inner_mode); | |
611 | int nunits; | |
612 | ||
613 | if (nbytes == 0) | |
614 | return NULL_TREE; | |
615 | ||
616 | /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD) | |
617 | is expected. */ | |
618 | nunits = UNITS_PER_SIMD_WORD / nbytes; | |
619 | ||
620 | return build_vector_type (scalar_type, nunits); | |
621 | } | |
622 | ||
623 | ||
624 | /* Function vect_align_data_ref. | |
625 | ||
626 | Handle mislignment of a memory accesses. | |
627 | ||
628 | FORNOW: Can't handle misaligned accesses. | |
629 | Make sure that the dataref is aligned. */ | |
630 | ||
631 | static void | |
632 | vect_align_data_ref (tree stmt) | |
633 | { | |
634 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
635 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); | |
636 | ||
637 | /* FORNOW: can't handle misaligned accesses; | |
638 | all accesses expected to be aligned. */ | |
639 | if (!aligned_access_p (dr)) | |
640 | abort (); | |
641 | } | |
642 | ||
643 | ||
644 | /* Function vect_create_data_ref. | |
645 | ||
646 | Create a memory reference expression for vector access, to be used in a | |
647 | vector load/store stmt. | |
648 | ||
649 | Input: | |
650 | STMT: a stmt that references memory. expected to be of the form | |
651 | MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>. | |
652 | BSI: block_stmt_iterator where new stmts can be added. | |
653 | ||
654 | Output: | |
655 | 1. Declare a new ptr to vector_type, and have it point to the array base. | |
656 | For example, for vector of type V8HI: | |
657 | v8hi *p0; | |
658 | p0 = (v8hi *)&a; | |
659 | 2. Create a data-reference based on the new vector pointer p0, and using | |
660 | a new index variable 'idx'. Return the expression '(*p0)[idx]'. | |
661 | ||
662 | FORNOW: handle only aligned and consecutive accesses. */ | |
663 | ||
664 | static tree | |
665 | vect_create_data_ref (tree stmt, block_stmt_iterator *bsi) | |
666 | { | |
667 | tree new_base; | |
668 | tree data_ref; | |
669 | tree idx; | |
670 | tree vec_stmt; | |
671 | tree new_temp; | |
672 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
673 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
674 | tree vect_ptr_type; | |
675 | tree vect_ptr; | |
676 | tree addr_ref; | |
79fe1b3b DN |
677 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); |
678 | tree array_type; | |
679 | tree base_addr = NULL_TREE; | |
680 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
681 | edge pe; | |
682 | tree tag; | |
683 | tree addr_expr; | |
684 | tree scalar_ptr_type; | |
4c124b4c AM |
685 | tree use; |
686 | ssa_op_iter iter; | |
79fe1b3b DN |
687 | |
688 | /* FORNOW: make sure the data reference is aligned. */ | |
689 | vect_align_data_ref (stmt); | |
690 | ||
691 | addr_ref = DR_BASE_NAME (dr); | |
692 | ||
693 | array_type = build_array_type (vectype, 0); | |
694 | TYPE_ALIGN (array_type) = TYPE_ALIGN (TREE_TYPE (addr_ref)); | |
695 | vect_ptr_type = build_pointer_type (array_type); | |
696 | scalar_ptr_type = build_pointer_type (TREE_TYPE (addr_ref)); | |
697 | ||
698 | if (vect_debug_details (NULL)) | |
699 | { | |
700 | fprintf (dump_file, "create array_ref of type: "); | |
701 | print_generic_expr (dump_file, vectype, TDF_SLIM); | |
702 | } | |
703 | ||
704 | /*** create: vectype_array *p; ***/ | |
705 | vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, | |
706 | get_name (addr_ref)); | |
707 | add_referenced_tmp_var (vect_ptr); | |
708 | ||
709 | #ifdef ENABLE_CHECKING | |
710 | if (TREE_CODE (addr_ref) != VAR_DECL | |
711 | && TREE_CODE (addr_ref) != COMPONENT_REF | |
712 | && TREE_CODE (addr_ref) != SSA_NAME) | |
713 | abort (); | |
714 | #endif | |
715 | ||
716 | if (vect_debug_details (NULL)) | |
717 | { | |
718 | if (TREE_CODE (addr_ref) == VAR_DECL) | |
719 | fprintf (dump_file, "vectorizing an array ref: "); | |
720 | else if (TREE_CODE (addr_ref) == SSA_NAME) | |
721 | fprintf (dump_file, "vectorizing a pointer ref: "); | |
722 | else if (TREE_CODE (addr_ref) == COMPONENT_REF) | |
723 | fprintf (dump_file, "vectorizing a record ref: "); | |
724 | print_generic_expr (dump_file, addr_ref, TDF_SLIM); | |
725 | } | |
726 | ||
727 | /* Get base address: */ | |
728 | if (TREE_CODE (addr_ref) == SSA_NAME) | |
729 | base_addr = addr_ref; | |
730 | else | |
731 | base_addr = build_fold_addr_expr (addr_ref); | |
732 | ||
733 | /* Handle aliasing: */ | |
734 | tag = STMT_VINFO_MEMTAG (stmt_info); | |
735 | #ifdef ENABLE_CHECKING | |
736 | if (!tag) | |
737 | abort (); | |
738 | #endif | |
739 | get_var_ann (vect_ptr)->type_mem_tag = tag; | |
740 | ||
741 | /* Mark for renaming all aliased variables | |
742 | (i.e, the may-aliases of the type-mem-tag) */ | |
4c124b4c AM |
743 | FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, |
744 | (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE)) | |
79fe1b3b | 745 | { |
79fe1b3b DN |
746 | if (TREE_CODE (use) == SSA_NAME) |
747 | bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid); | |
748 | } | |
79fe1b3b DN |
749 | |
750 | pe = loop_preheader_edge (loop); | |
751 | ||
752 | /*** create: p = (vectype *)&a; ***/ | |
753 | ||
754 | /* addr_expr = &a */ | |
755 | addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var, | |
756 | get_name (addr_ref)); | |
757 | add_referenced_tmp_var (addr_expr); | |
758 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, base_addr); | |
759 | new_temp = make_ssa_name (addr_expr, vec_stmt); | |
760 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
761 | bsi_insert_on_edge (pe, vec_stmt); | |
762 | ||
763 | /* vect_ptr = (vectype_array *)&a; */ | |
764 | vec_stmt = fold_convert (vect_ptr_type, new_temp); | |
765 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); | |
766 | new_temp = make_ssa_name (vect_ptr, vec_stmt); | |
767 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
768 | bsi_insert_on_edge (pe, vec_stmt); | |
769 | ||
770 | /*** create data ref: '(*p)[idx]' ***/ | |
771 | ||
772 | idx = vect_create_index_for_array_ref (stmt, bsi); | |
773 | ||
774 | new_base = build_fold_indirect_ref (new_temp); | |
775 | data_ref = build4 (ARRAY_REF, vectype, new_base, idx, NULL_TREE, NULL_TREE); | |
776 | ||
777 | if (vect_debug_details (NULL)) | |
778 | { | |
779 | fprintf (dump_file, "created new data-ref: "); | |
780 | print_generic_expr (dump_file, data_ref, TDF_SLIM); | |
781 | } | |
782 | ||
783 | return data_ref; | |
784 | } | |
785 | ||
786 | ||
787 | /* Function vect_create_destination_var. | |
788 | ||
789 | Create a new temporary of type VECTYPE. */ | |
790 | ||
791 | static tree | |
792 | vect_create_destination_var (tree scalar_dest, tree vectype) | |
793 | { | |
794 | tree vec_dest; | |
795 | const char *new_name; | |
796 | ||
797 | #ifdef ENABLE_CHECKING | |
798 | if (TREE_CODE (scalar_dest) != SSA_NAME) | |
799 | abort (); | |
800 | #endif | |
801 | ||
802 | new_name = get_name (scalar_dest); | |
803 | if (!new_name) | |
804 | new_name = "var_"; | |
805 | vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name); | |
806 | add_referenced_tmp_var (vec_dest); | |
807 | ||
808 | return vec_dest; | |
809 | } | |
810 | ||
811 | ||
812 | /* Function vect_init_vector. | |
813 | ||
814 | Insert a new stmt (INIT_STMT) that initializes a new vector variable with | |
815 | the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be | |
816 | used in the vectorization of STMT. */ | |
817 | ||
818 | static tree | |
819 | vect_init_vector (tree stmt, tree vector_var) | |
820 | { | |
821 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
822 | struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo); | |
823 | tree new_var; | |
824 | tree init_stmt; | |
825 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
826 | tree vec_oprnd; | |
827 | edge pe; | |
828 | tree new_temp; | |
829 | ||
830 | new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_"); | |
831 | add_referenced_tmp_var (new_var); | |
832 | ||
833 | init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var); | |
834 | new_temp = make_ssa_name (new_var, init_stmt); | |
835 | TREE_OPERAND (init_stmt, 0) = new_temp; | |
836 | ||
837 | pe = loop_preheader_edge (loop); | |
838 | bsi_insert_on_edge (pe, init_stmt); | |
839 | ||
840 | if (vect_debug_details (NULL)) | |
841 | { | |
842 | fprintf (dump_file, "created new init_stmt: "); | |
843 | print_generic_expr (dump_file, init_stmt, TDF_SLIM); | |
844 | } | |
845 | ||
846 | vec_oprnd = TREE_OPERAND (init_stmt, 0); | |
847 | return vec_oprnd; | |
848 | } | |
849 | ||
850 | ||
851 | /* Function vect_get_vec_def_for_operand. | |
852 | ||
853 | OP is an operand in STMT. This function returns a (vector) def that will be | |
854 | used in the vectorized stmt for STMT. | |
855 | ||
856 | In the case that OP is an SSA_NAME which is defined in the loop, then | |
857 | STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. | |
858 | ||
859 | In case OP is an invariant or constant, a new stmt that creates a vector def | |
860 | needs to be introduced. */ | |
861 | ||
862 | static tree | |
863 | vect_get_vec_def_for_operand (tree op, tree stmt) | |
864 | { | |
865 | tree vec_oprnd; | |
866 | tree vec_stmt; | |
867 | tree def_stmt; | |
868 | stmt_vec_info def_stmt_info = NULL; | |
869 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
870 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
871 | int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); | |
872 | struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo); | |
873 | basic_block bb; | |
874 | tree vec_inv; | |
875 | tree t = NULL_TREE; | |
876 | tree def; | |
877 | int i; | |
878 | ||
879 | if (vect_debug_details (NULL)) | |
880 | { | |
881 | fprintf (dump_file, "vect_get_vec_def_for_operand: "); | |
882 | print_generic_expr (dump_file, op, TDF_SLIM); | |
883 | } | |
884 | ||
885 | /** ===> Case 1: operand is a constant. **/ | |
886 | ||
887 | if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST) | |
888 | { | |
889 | /* Create 'vect_cst_ = {cst,cst,...,cst}' */ | |
890 | ||
891 | tree vec_cst; | |
892 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
893 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
894 | int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); | |
895 | tree t = NULL_TREE; | |
896 | int i; | |
897 | ||
898 | /* Build a tree with vector elements. */ | |
899 | if (vect_debug_details (NULL)) | |
900 | fprintf (dump_file, "Create vector_cst. nunits = %d", nunits); | |
901 | ||
902 | for (i = nunits - 1; i >= 0; --i) | |
903 | { | |
904 | t = tree_cons (NULL_TREE, op, t); | |
905 | } | |
906 | vec_cst = build_vector (vectype, t); | |
907 | return vect_init_vector (stmt, vec_cst); | |
908 | } | |
909 | ||
910 | #ifdef ENABLE_CHECKING | |
911 | if (TREE_CODE (op) != SSA_NAME) | |
912 | abort (); | |
913 | #endif | |
914 | ||
915 | /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/ | |
916 | ||
917 | def_stmt = SSA_NAME_DEF_STMT (op); | |
918 | def_stmt_info = vinfo_for_stmt (def_stmt); | |
919 | ||
920 | if (vect_debug_details (NULL)) | |
921 | { | |
922 | fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: "); | |
923 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
924 | } | |
925 | ||
926 | ||
927 | /** ==> Case 2.1: operand is defined inside the loop. **/ | |
928 | ||
929 | if (def_stmt_info) | |
930 | { | |
931 | /* Get the def from the vectorized stmt. */ | |
932 | ||
933 | vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); | |
934 | #ifdef ENABLE_CHECKING | |
935 | if (!vec_stmt) | |
936 | abort (); | |
937 | #endif | |
938 | vec_oprnd = TREE_OPERAND (vec_stmt, 0); | |
939 | return vec_oprnd; | |
940 | } | |
941 | ||
942 | ||
943 | /** ==> Case 2.2: operand is defined by the loop-header phi-node - | |
944 | it is a reduction/induction. **/ | |
945 | ||
946 | bb = bb_for_stmt (def_stmt); | |
947 | if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) | |
948 | { | |
949 | if (vect_debug_details (NULL)) | |
950 | fprintf (dump_file, "reduction/induction - unsupported."); | |
951 | abort (); /* FORNOW no support for reduction/induction. */ | |
952 | } | |
953 | ||
954 | ||
955 | /** ==> Case 2.3: operand is defined outside the loop - | |
956 | it is a loop invariant. */ | |
957 | ||
958 | switch (TREE_CODE (def_stmt)) | |
959 | { | |
960 | case PHI_NODE: | |
961 | def = PHI_RESULT (def_stmt); | |
962 | break; | |
963 | case MODIFY_EXPR: | |
964 | def = TREE_OPERAND (def_stmt, 0); | |
965 | break; | |
966 | case NOP_EXPR: | |
967 | def = TREE_OPERAND (def_stmt, 0); | |
968 | #ifdef ENABLE_CHECKING | |
969 | if (!IS_EMPTY_STMT (def_stmt)) | |
970 | abort (); | |
971 | #endif | |
972 | def = op; | |
973 | break; | |
974 | default: | |
975 | if (vect_debug_details (NULL)) | |
976 | { | |
977 | fprintf (dump_file, "unsupported defining stmt: "); | |
978 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
979 | } | |
980 | abort (); | |
981 | } | |
982 | ||
983 | /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */ | |
984 | ||
985 | if (vect_debug_details (NULL)) | |
986 | fprintf (dump_file, "Create vector_inv."); | |
987 | ||
988 | for (i = nunits - 1; i >= 0; --i) | |
989 | { | |
990 | t = tree_cons (NULL_TREE, def, t); | |
991 | } | |
992 | ||
993 | vec_inv = build_constructor (vectype, t); | |
994 | return vect_init_vector (stmt, vec_inv); | |
995 | } | |
996 | ||
997 | ||
998 | /* Function vect_finish_stmt_generation. | |
999 | ||
1000 | Insert a new stmt. */ | |
1001 | ||
1002 | static void | |
1003 | vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) | |
1004 | { | |
1005 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
1006 | ||
1007 | if (vect_debug_details (NULL)) | |
1008 | { | |
1009 | fprintf (dump_file, "add new stmt: "); | |
1010 | print_generic_expr (dump_file, vec_stmt, TDF_SLIM); | |
1011 | } | |
1012 | ||
1013 | /* Make sure bsi points to the stmt that is being vectorized. */ | |
1014 | ||
1015 | /* Assumption: any stmts created for the vectorization of smtmt S are | |
1016 | inserted before S. BSI may point to S or some new stmt before it. */ | |
1017 | ||
1018 | while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi)) | |
1019 | bsi_next (bsi); | |
1020 | #ifdef ENABLE_CHECKING | |
1021 | if (stmt != bsi_stmt (*bsi)) | |
1022 | abort (); | |
1023 | #endif | |
1024 | } | |
1025 | ||
1026 | ||
1027 | /* Function vectorizable_assignment. | |
1028 | ||
1029 | Check if STMT performs an assignment (copy) that can be vectorized. | |
1030 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
1031 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
1032 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
1033 | ||
1034 | static bool | |
1035 | vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
1036 | { | |
1037 | tree vec_dest; | |
1038 | tree scalar_dest; | |
1039 | tree op; | |
1040 | tree vec_oprnd; | |
1041 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1042 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1043 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
1044 | tree new_temp; | |
1045 | ||
1046 | /* Is vectorizable assignment? */ | |
1047 | ||
1048 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
1049 | return false; | |
1050 | ||
1051 | scalar_dest = TREE_OPERAND (stmt, 0); | |
1052 | if (TREE_CODE (scalar_dest) != SSA_NAME) | |
1053 | return false; | |
1054 | ||
1055 | op = TREE_OPERAND (stmt, 1); | |
1056 | if (!vect_is_simple_use (op, loop, NULL)) | |
1057 | { | |
1058 | if (vect_debug_details (NULL)) | |
1059 | fprintf (dump_file, "use not simple."); | |
1060 | return false; | |
1061 | } | |
1062 | ||
1063 | if (!vec_stmt) /* transformation not required. */ | |
1064 | { | |
1065 | STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; | |
1066 | return true; | |
1067 | } | |
1068 | ||
1069 | /** Trasform. **/ | |
1070 | if (vect_debug_details (NULL)) | |
1071 | fprintf (dump_file, "transform assignment."); | |
1072 | ||
1073 | /* Handle def. */ | |
1074 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1075 | ||
1076 | /* Handle use. */ | |
1077 | op = TREE_OPERAND (stmt, 1); | |
1078 | vec_oprnd = vect_get_vec_def_for_operand (op, stmt); | |
1079 | ||
1080 | /* Arguments are ready. create the new vector stmt. */ | |
1081 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); | |
1082 | new_temp = make_ssa_name (vec_dest, *vec_stmt); | |
1083 | TREE_OPERAND (*vec_stmt, 0) = new_temp; | |
1084 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
1085 | ||
1086 | return true; | |
1087 | } | |
1088 | ||
1089 | ||
1090 | /* Function vectorizable_operation. | |
1091 | ||
1092 | Check if STMT performs a binary or unary operation that can be vectorized. | |
1093 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
1094 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
1095 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
1096 | ||
1097 | static bool | |
1098 | vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
1099 | { | |
1100 | tree vec_dest; | |
1101 | tree scalar_dest; | |
1102 | tree operation; | |
1103 | tree op0, op1 = NULL; | |
1104 | tree vec_oprnd0, vec_oprnd1=NULL; | |
1105 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1106 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1107 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
1108 | int i; | |
1109 | enum tree_code code; | |
1110 | enum machine_mode vec_mode; | |
1111 | tree new_temp; | |
1112 | int op_type; | |
1113 | tree op; | |
1114 | optab optab; | |
1115 | ||
1116 | /* Is STMT a vectorizable binary/unary operation? */ | |
1117 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
1118 | return false; | |
1119 | ||
1120 | if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) | |
1121 | return false; | |
1122 | ||
1123 | operation = TREE_OPERAND (stmt, 1); | |
1124 | code = TREE_CODE (operation); | |
1125 | optab = optab_for_tree_code (code, vectype); | |
1126 | ||
1127 | /* Support only unary or binary operations. */ | |
1128 | op_type = TREE_CODE_LENGTH (code); | |
1129 | if (op_type != unary_op && op_type != binary_op) | |
1130 | { | |
1131 | if (vect_debug_details (NULL)) | |
1132 | fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type); | |
1133 | return false; | |
1134 | } | |
1135 | ||
1136 | for (i = 0; i < op_type; i++) | |
1137 | { | |
1138 | op = TREE_OPERAND (operation, i); | |
1139 | if (!vect_is_simple_use (op, loop, NULL)) | |
1140 | { | |
1141 | if (vect_debug_details (NULL)) | |
1142 | fprintf (dump_file, "use not simple."); | |
1143 | return false; | |
1144 | } | |
1145 | } | |
1146 | ||
1147 | /* Supportable by target? */ | |
1148 | if (!optab) | |
1149 | { | |
1150 | if (vect_debug_details (NULL)) | |
1151 | fprintf (dump_file, "no optab."); | |
1152 | return false; | |
1153 | } | |
1154 | vec_mode = TYPE_MODE (vectype); | |
1155 | if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) | |
1156 | { | |
1157 | if (vect_debug_details (NULL)) | |
1158 | fprintf (dump_file, "op not supported by target."); | |
1159 | return false; | |
1160 | } | |
1161 | ||
1162 | if (!vec_stmt) /* transformation not required. */ | |
1163 | { | |
1164 | STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; | |
1165 | return true; | |
1166 | } | |
1167 | ||
1168 | /** Trasform. **/ | |
1169 | ||
1170 | if (vect_debug_details (NULL)) | |
1171 | fprintf (dump_file, "transform binary/unary operation."); | |
1172 | ||
1173 | /* Handle def. */ | |
1174 | scalar_dest = TREE_OPERAND (stmt, 0); | |
1175 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1176 | ||
1177 | /* Handle uses. */ | |
1178 | op0 = TREE_OPERAND (operation, 0); | |
1179 | vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt); | |
1180 | ||
1181 | if (op_type == binary_op) | |
1182 | { | |
1183 | op1 = TREE_OPERAND (operation, 1); | |
1184 | vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt); | |
1185 | } | |
1186 | ||
1187 | /* Arguments are ready. create the new vector stmt. */ | |
1188 | ||
1189 | if (op_type == binary_op) | |
1190 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, | |
1191 | build2 (code, vectype, vec_oprnd0, vec_oprnd1)); | |
1192 | else | |
1193 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, | |
1194 | build1 (code, vectype, vec_oprnd0)); | |
1195 | new_temp = make_ssa_name (vec_dest, *vec_stmt); | |
1196 | TREE_OPERAND (*vec_stmt, 0) = new_temp; | |
1197 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
1198 | ||
1199 | return true; | |
1200 | } | |
1201 | ||
1202 | ||
1203 | /* Function vectorizable_store. | |
1204 | ||
1205 | Check if STMT defines a non scalar data-ref (array/pointer/structure) that | |
1206 | can be vectorized. | |
1207 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
1208 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
1209 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
1210 | ||
1211 | static bool | |
1212 | vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
1213 | { | |
1214 | tree scalar_dest; | |
1215 | tree data_ref; | |
1216 | tree op; | |
1217 | tree vec_oprnd1; | |
1218 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1219 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1220 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
1221 | enum machine_mode vec_mode; | |
1222 | ||
1223 | /* Is vectorizable store? */ | |
1224 | ||
1225 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
1226 | return false; | |
1227 | ||
1228 | scalar_dest = TREE_OPERAND (stmt, 0); | |
1229 | if (TREE_CODE (scalar_dest) != ARRAY_REF | |
1230 | && TREE_CODE (scalar_dest) != INDIRECT_REF) | |
1231 | return false; | |
1232 | ||
1233 | op = TREE_OPERAND (stmt, 1); | |
1234 | if (!vect_is_simple_use (op, loop, NULL)) | |
1235 | { | |
1236 | if (vect_debug_details (NULL)) | |
1237 | fprintf (dump_file, "use not simple."); | |
1238 | return false; | |
1239 | } | |
1240 | ||
1241 | vec_mode = TYPE_MODE (vectype); | |
1242 | /* FORNOW. In some cases can vectorize even if data-type not supported | |
1243 | (e.g. - array initialization with 0). */ | |
1244 | if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) | |
1245 | return false; | |
1246 | ||
1247 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
1248 | return false; | |
1249 | ||
1250 | if (!vec_stmt) /* transformation not required. */ | |
1251 | { | |
1252 | STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; | |
1253 | return true; | |
1254 | } | |
1255 | ||
1256 | /** Trasform. **/ | |
1257 | ||
1258 | if (vect_debug_details (NULL)) | |
1259 | fprintf (dump_file, "transform store"); | |
1260 | ||
1261 | /* Handle use - get the vectorized def from the defining stmt. */ | |
1262 | vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); | |
1263 | ||
1264 | /* Handle def. */ | |
1265 | data_ref = vect_create_data_ref (stmt, bsi); | |
1266 | ||
1267 | /* Arguments are ready. create the new vector stmt. */ | |
1268 | *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1); | |
1269 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
1270 | ||
1271 | return true; | |
1272 | } | |
1273 | ||
1274 | ||
1275 | /* vectorizable_load. | |
1276 | ||
1277 | Check if STMT reads a non scalar data-ref (array/pointer/structure) that | |
1278 | can be vectorized. | |
1279 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
1280 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
1281 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
1282 | ||
1283 | static bool | |
1284 | vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
1285 | { | |
1286 | tree scalar_dest; | |
1287 | tree vec_dest = NULL; | |
1288 | tree data_ref = NULL; | |
1289 | tree op; | |
1290 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1291 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1292 | tree new_temp; | |
1293 | enum machine_mode vec_mode; | |
1294 | ||
1295 | /* Is vectorizable load? */ | |
1296 | ||
1297 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
1298 | return false; | |
1299 | ||
1300 | scalar_dest = TREE_OPERAND (stmt, 0); | |
1301 | if (TREE_CODE (scalar_dest) != SSA_NAME) | |
1302 | return false; | |
1303 | ||
1304 | op = TREE_OPERAND (stmt, 1); | |
1305 | if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF) | |
1306 | return false; | |
1307 | ||
1308 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
1309 | return false; | |
1310 | ||
1311 | vec_mode = TYPE_MODE (vectype); | |
1312 | /* FORNOW. In some cases can vectorize even if data-type not supported | |
1313 | (e.g. - data copies). */ | |
1314 | if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) | |
1315 | return false; | |
1316 | ||
1317 | if (!vec_stmt) /* transformation not required. */ | |
1318 | { | |
1319 | STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; | |
1320 | return true; | |
1321 | } | |
1322 | ||
1323 | /** Trasform. **/ | |
1324 | ||
1325 | if (vect_debug_details (NULL)) | |
1326 | fprintf (dump_file, "transform load."); | |
1327 | ||
1328 | /* Handle def. */ | |
1329 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1330 | ||
1331 | /* Handle use. */ | |
1332 | op = TREE_OPERAND (stmt, 1); | |
1333 | data_ref = vect_create_data_ref (stmt, bsi); | |
1334 | ||
1335 | /* Arguments are ready. create the new vector stmt. */ | |
1336 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); | |
1337 | new_temp = make_ssa_name (vec_dest, *vec_stmt); | |
1338 | TREE_OPERAND (*vec_stmt, 0) = new_temp; | |
1339 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
1340 | ||
1341 | return true; | |
1342 | } | |
1343 | ||
1344 | ||
1345 | /* Function vect_transform_stmt. | |
1346 | ||
1347 | Create a vectorized stmt to replace STMT, and insert it at BSI. */ | |
1348 | ||
1349 | static bool | |
1350 | vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) | |
1351 | { | |
1352 | bool is_store = false; | |
1353 | tree vec_stmt = NULL_TREE; | |
1354 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1355 | ||
1356 | switch (STMT_VINFO_TYPE (stmt_info)) | |
1357 | { | |
1358 | case op_vec_info_type: | |
1359 | if (!vectorizable_operation (stmt, bsi, &vec_stmt)) | |
1360 | abort (); | |
1361 | break; | |
1362 | ||
1363 | case assignment_vec_info_type: | |
1364 | if (!vectorizable_assignment (stmt, bsi, &vec_stmt)) | |
1365 | abort (); | |
1366 | break; | |
1367 | ||
1368 | case load_vec_info_type: | |
1369 | if (!vectorizable_load (stmt, bsi, &vec_stmt)) | |
1370 | abort (); | |
1371 | break; | |
1372 | ||
1373 | case store_vec_info_type: | |
1374 | if (!vectorizable_store (stmt, bsi, &vec_stmt)) | |
1375 | abort (); | |
1376 | is_store = true; | |
1377 | break; | |
1378 | default: | |
1379 | if (vect_debug_details (NULL)) | |
1380 | fprintf (dump_file, "stmt not supported."); | |
1381 | abort (); | |
1382 | } | |
1383 | ||
1384 | STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; | |
1385 | ||
1386 | return is_store; | |
1387 | } | |
1388 | ||
1389 | ||
1390 | /* Function vect_transform_loop_bound. | |
1391 | ||
1392 | Create a new exit condition for the loop. */ | |
1393 | ||
1394 | static void | |
1395 | vect_transform_loop_bound (loop_vec_info loop_vinfo) | |
1396 | { | |
1397 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
82b85a85 | 1398 | edge exit_edge = loop->single_exit; |
79fe1b3b DN |
1399 | block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src); |
1400 | tree indx_before_incr, indx_after_incr; | |
1401 | tree orig_cond_expr; | |
1402 | HOST_WIDE_INT old_N = 0; | |
1403 | int vf; | |
1404 | tree cond_stmt; | |
1405 | tree new_loop_bound; | |
1406 | tree cond; | |
1407 | tree lb_type; | |
1408 | ||
1409 | #ifdef ENABLE_CHECKING | |
1410 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | |
1411 | abort (); | |
1412 | #endif | |
1413 | old_N = LOOP_VINFO_NITERS (loop_vinfo); | |
1414 | vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
1415 | ||
1416 | #ifdef ENABLE_CHECKING | |
1417 | /* FORNOW: | |
1418 | assuming number-of-iterations divides by the vectorization factor. */ | |
1419 | if (old_N % vf) | |
1420 | abort (); | |
1421 | #endif | |
1422 | ||
1423 | orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo); | |
1424 | #ifdef ENABLE_CHECKING | |
1425 | if (!orig_cond_expr) | |
1426 | abort (); | |
1427 | #endif | |
1428 | if (orig_cond_expr != bsi_stmt (loop_exit_bsi)) | |
1429 | abort (); | |
1430 | ||
82b85a85 ZD |
1431 | create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, |
1432 | &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr); | |
79fe1b3b DN |
1433 | |
1434 | /* bsi_insert is using BSI_NEW_STMT. We need to bump it back | |
8c27b7d4 | 1435 | to point to the exit condition. */ |
79fe1b3b DN |
1436 | bsi_next (&loop_exit_bsi); |
1437 | if (bsi_stmt (loop_exit_bsi) != orig_cond_expr) | |
1438 | abort (); | |
1439 | ||
1440 | /* new loop exit test: */ | |
1441 | lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1)); | |
7d60be94 | 1442 | new_loop_bound = build_int_cst (lb_type, old_N/vf); |
79fe1b3b DN |
1443 | |
1444 | if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ | |
1445 | cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound); | |
1446 | else /* 'then' edge loops back. */ | |
1447 | cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound); | |
1448 | ||
1449 | cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond, | |
1450 | TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2)); | |
1451 | ||
1452 | bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT); | |
1453 | ||
1454 | /* remove old loop exit test: */ | |
1455 | bsi_remove (&loop_exit_bsi); | |
1456 | ||
1457 | if (vect_debug_details (NULL)) | |
1458 | print_generic_expr (dump_file, cond_stmt, TDF_SLIM); | |
1459 | } | |
1460 | ||
1461 | ||
1462 | /* Function vect_transform_loop. | |
1463 | ||
1464 | The analysis phase has determined that the loop is vectorizable. | |
1465 | Vectorize the loop - created vectorized stmts to replace the scalar | |
1466 | stmts in the loop, and update the loop exit condition. */ | |
1467 | ||
1468 | static void | |
1469 | vect_transform_loop (loop_vec_info loop_vinfo, | |
1470 | struct loops *loops ATTRIBUTE_UNUSED) | |
1471 | { | |
1472 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1473 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
1474 | int nbbs = loop->num_nodes; | |
1475 | block_stmt_iterator si; | |
1476 | int i; | |
1477 | #ifdef ENABLE_CHECKING | |
1478 | int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
1479 | #endif | |
1480 | ||
1481 | if (vect_debug_details (NULL)) | |
1482 | fprintf (dump_file, "\n<<vec_transform_loop>>\n"); | |
1483 | ||
1484 | /* 1) Make sure the loop header has exactly two entries | |
1485 | 2) Make sure we have a preheader basic block. */ | |
1486 | ||
1487 | if (!loop->header->pred->pred_next | |
1488 | || loop->header->pred->pred_next->pred_next) | |
1489 | abort (); | |
1490 | ||
1491 | loop_split_edge_with (loop_preheader_edge (loop), NULL); | |
1492 | ||
1493 | ||
1494 | /* FORNOW: the vectorizer supports only loops which body consist | |
1495 | of one basic block (header + empty latch). When the vectorizer will | |
1496 | support more involved loop forms, the order by which the BBs are | |
1497 | traversed need to be reconsidered. */ | |
1498 | ||
1499 | for (i = 0; i < nbbs; i++) | |
1500 | { | |
1501 | basic_block bb = bbs[i]; | |
1502 | ||
1503 | for (si = bsi_start (bb); !bsi_end_p (si);) | |
1504 | { | |
1505 | tree stmt = bsi_stmt (si); | |
1506 | stmt_vec_info stmt_info; | |
1507 | bool is_store; | |
1508 | #ifdef ENABLE_CHECKING | |
1509 | tree vectype; | |
1510 | #endif | |
1511 | ||
1512 | if (vect_debug_details (NULL)) | |
1513 | { | |
1514 | fprintf (dump_file, "------>vectorizing statement: "); | |
1515 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
1516 | } | |
1517 | stmt_info = vinfo_for_stmt (stmt); | |
1518 | #ifdef ENABLE_CHECKING | |
1519 | if (!stmt_info) | |
1520 | abort (); | |
1521 | #endif | |
1522 | if (!STMT_VINFO_RELEVANT_P (stmt_info)) | |
1523 | { | |
1524 | bsi_next (&si); | |
1525 | continue; | |
1526 | } | |
1527 | #ifdef ENABLE_CHECKING | |
1528 | /* FORNOW: Verify that all stmts operate on the same number of | |
1529 | units and no inner unrolling is necessary. */ | |
1530 | vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1531 | if (GET_MODE_NUNITS (TYPE_MODE (vectype)) != vectorization_factor) | |
1532 | abort (); | |
1533 | #endif | |
1534 | /* -------- vectorize statement ------------ */ | |
1535 | if (vect_debug_details (NULL)) | |
1536 | fprintf (dump_file, "transform statement."); | |
1537 | ||
1538 | is_store = vect_transform_stmt (stmt, &si); | |
1539 | if (is_store) | |
1540 | { | |
1541 | /* free the attached stmt_vec_info and remove the stmt. */ | |
1542 | stmt_ann_t ann = stmt_ann (stmt); | |
1543 | free (stmt_info); | |
1544 | set_stmt_info (ann, NULL); | |
1545 | bsi_remove (&si); | |
1546 | continue; | |
1547 | } | |
1548 | ||
1549 | bsi_next (&si); | |
1550 | } /* stmts in BB */ | |
1551 | } /* BBs in loop */ | |
1552 | ||
1553 | vect_transform_loop_bound (loop_vinfo); | |
1554 | ||
1555 | if (vect_debug_details (loop)) | |
1556 | fprintf (dump_file,"Success! loop vectorized."); | |
1557 | if (vect_debug_stats (loop)) | |
1558 | fprintf (dump_file, "LOOP VECTORIZED."); | |
1559 | } | |
1560 | ||
1561 | ||
1562 | /* Function vect_is_simple_use. | |
1563 | ||
1564 | Input: | |
1565 | LOOP - the loop that is being vectorized. | |
1566 | OPERAND - operand of a stmt in LOOP. | |
1567 | DEF - the defining stmt in case OPERAND is an SSA_NAME. | |
1568 | ||
1569 | Returns whether a stmt with OPERAND can be vectorized. | |
1570 | Supportable operands are constants, loop invariants, and operands that are | |
1571 | defined by the current iteration of the loop. Unsupportable opernads are | |
1572 | those that are defined by a previous iteration of the loop (as is the case | |
1573 | in reduction/induction computations). */ | |
1574 | ||
1575 | static bool | |
1576 | vect_is_simple_use (tree operand, struct loop *loop, tree *def) | |
1577 | { | |
1578 | tree def_stmt; | |
1579 | basic_block bb; | |
1580 | ||
1581 | if (def) | |
1582 | *def = NULL_TREE; | |
1583 | ||
1584 | if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) | |
1585 | return true; | |
1586 | ||
1587 | if (TREE_CODE (operand) != SSA_NAME) | |
1588 | return false; | |
1589 | ||
1590 | def_stmt = SSA_NAME_DEF_STMT (operand); | |
1591 | if (def_stmt == NULL_TREE ) | |
1592 | { | |
1593 | if (vect_debug_details (NULL)) | |
1594 | fprintf (dump_file, "no def_stmt."); | |
1595 | return false; | |
1596 | } | |
1597 | ||
1598 | /* empty stmt is expected only in case of a function argument. | |
1599 | (Otherwise - we expect a phi_node or a modify_expr). */ | |
1600 | if (IS_EMPTY_STMT (def_stmt)) | |
1601 | { | |
1602 | tree arg = TREE_OPERAND (def_stmt, 0); | |
1603 | if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST) | |
1604 | return true; | |
1605 | if (vect_debug_details (NULL)) | |
1606 | { | |
1607 | fprintf (dump_file, "Unexpected empty stmt: "); | |
1608 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
1609 | } | |
1610 | return false; | |
1611 | } | |
1612 | ||
1613 | /* phi_node inside the loop indicates an induction/reduction pattern. | |
1614 | This is not supported yet. */ | |
1615 | bb = bb_for_stmt (def_stmt); | |
1616 | if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) | |
1617 | { | |
1618 | if (vect_debug_details (NULL)) | |
1619 | fprintf (dump_file, "reduction/induction - unsupported."); | |
1620 | return false; /* FORNOW: not supported yet. */ | |
1621 | } | |
1622 | ||
1623 | /* Expecting a modify_expr or a phi_node. */ | |
1624 | if (TREE_CODE (def_stmt) == MODIFY_EXPR | |
1625 | || TREE_CODE (def_stmt) == PHI_NODE) | |
1626 | { | |
1627 | if (def) | |
1628 | *def = def_stmt; | |
1629 | return true; | |
1630 | } | |
1631 | ||
1632 | return false; | |
1633 | } | |
1634 | ||
1635 | ||
1636 | /* Function vect_analyze_operations. | |
1637 | ||
1638 | Scan the loop stmts and make sure they are all vectorizable. */ | |
1639 | ||
1640 | static bool | |
1641 | vect_analyze_operations (loop_vec_info loop_vinfo) | |
1642 | { | |
1643 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1644 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
1645 | int nbbs = loop->num_nodes; | |
1646 | block_stmt_iterator si; | |
1647 | int vectorization_factor = 0; | |
1648 | int i; | |
1649 | bool ok; | |
1650 | tree scalar_type; | |
1651 | ||
1652 | if (vect_debug_details (NULL)) | |
1653 | fprintf (dump_file, "\n<<vect_analyze_operations>>\n"); | |
1654 | ||
1655 | for (i = 0; i < nbbs; i++) | |
1656 | { | |
1657 | basic_block bb = bbs[i]; | |
1658 | ||
1659 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
1660 | { | |
1661 | tree stmt = bsi_stmt (si); | |
1662 | int nunits; | |
1663 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1664 | tree vectype; | |
1665 | ||
1666 | if (vect_debug_details (NULL)) | |
1667 | { | |
1668 | fprintf (dump_file, "==> examining statement: "); | |
1669 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
1670 | } | |
1671 | #ifdef ENABLE_CHECKING | |
1672 | if (!stmt_info) | |
1673 | abort (); | |
1674 | #endif | |
1675 | /* skip stmts which do not need to be vectorized. | |
1676 | this is expected to include: | |
1677 | - the COND_EXPR which is the loop exit condition | |
1678 | - any LABEL_EXPRs in the loop | |
1679 | - computations that are used only for array indexing or loop | |
1680 | control */ | |
1681 | ||
1682 | if (!STMT_VINFO_RELEVANT_P (stmt_info)) | |
1683 | { | |
1684 | if (vect_debug_details (NULL)) | |
1685 | fprintf (dump_file, "irrelevant."); | |
1686 | continue; | |
1687 | } | |
1688 | ||
1689 | if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) | |
1690 | { | |
1691 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
1692 | { | |
1693 | fprintf (dump_file, "not vectorized: vector stmt in loop:"); | |
1694 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
1695 | } | |
1696 | return false; | |
1697 | } | |
1698 | ||
1699 | if (STMT_VINFO_DATA_REF (stmt_info)) | |
1700 | scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); | |
1701 | else if (TREE_CODE (stmt) == MODIFY_EXPR) | |
1702 | scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); | |
1703 | else | |
1704 | scalar_type = TREE_TYPE (stmt); | |
1705 | ||
1706 | if (vect_debug_details (NULL)) | |
1707 | { | |
1708 | fprintf (dump_file, "get vectype for scalar type: "); | |
1709 | print_generic_expr (dump_file, scalar_type, TDF_SLIM); | |
1710 | } | |
1711 | ||
1712 | vectype = get_vectype_for_scalar_type (scalar_type); | |
1713 | if (!vectype) | |
1714 | { | |
1715 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
1716 | { | |
1717 | fprintf (dump_file, "not vectorized: unsupported data-type "); | |
1718 | print_generic_expr (dump_file, scalar_type, TDF_SLIM); | |
1719 | } | |
1720 | return false; | |
1721 | } | |
1722 | ||
1723 | if (vect_debug_details (NULL)) | |
1724 | { | |
1725 | fprintf (dump_file, "vectype: "); | |
1726 | print_generic_expr (dump_file, vectype, TDF_SLIM); | |
1727 | } | |
1728 | STMT_VINFO_VECTYPE (stmt_info) = vectype; | |
1729 | ||
1730 | ok = (vectorizable_operation (stmt, NULL, NULL) | |
1731 | || vectorizable_assignment (stmt, NULL, NULL) | |
1732 | || vectorizable_load (stmt, NULL, NULL) | |
1733 | || vectorizable_store (stmt, NULL, NULL)); | |
1734 | ||
1735 | if (!ok) | |
1736 | { | |
1737 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
1738 | { | |
1739 | fprintf (dump_file, "not vectorized: stmt not supported: "); | |
1740 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
1741 | } | |
1742 | return false; | |
1743 | } | |
1744 | ||
1745 | nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); | |
1746 | if (vect_debug_details (NULL)) | |
1747 | fprintf (dump_file, "nunits = %d", nunits); | |
1748 | ||
1749 | if (vectorization_factor) | |
1750 | { | |
1751 | /* FORNOW: don't allow mixed units. | |
1752 | This restriction will be relaxed in the future. */ | |
1753 | if (nunits != vectorization_factor) | |
1754 | { | |
1755 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
1756 | fprintf (dump_file, "not vectorized: mixed data-types"); | |
1757 | return false; | |
1758 | } | |
1759 | } | |
1760 | else | |
1761 | vectorization_factor = nunits; | |
1762 | } | |
1763 | } | |
1764 | ||
1765 | /* TODO: Analyze cost. Decide if worth while to vectorize. */ | |
1766 | if (!vectorization_factor) | |
1767 | { | |
1768 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
1769 | fprintf (dump_file, "not vectorized: unsupported data-type"); | |
1770 | return false; | |
1771 | } | |
1772 | LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; | |
1773 | ||
1774 | /* FORNOW: handle only cases where the loop bound divides by the | |
1775 | vectorization factor. */ | |
1776 | ||
1777 | if (vect_debug_details (NULL)) | |
1778 | fprintf (dump_file, | |
1779 | "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, | |
1780 | vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo)); | |
1781 | ||
1782 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | |
1783 | { | |
1784 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
1785 | fprintf (dump_file, "not vectorized: Unknown loop bound."); | |
1786 | return false; | |
1787 | } | |
1788 | ||
1789 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
1790 | && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0) | |
1791 | { | |
1792 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
1793 | fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.", | |
1794 | vectorization_factor); | |
1795 | return false; | |
1796 | } | |
1797 | ||
1798 | return true; | |
1799 | } | |
1800 | ||
1801 | ||
1802 | /* Function exist_non_indexing_operands_for_use_p | |
1803 | ||
1804 | USE is one of the uses attached to STMT. Check if USE is | |
1805 | used in STMT for anything other than indexing an array. */ | |
1806 | ||
1807 | static bool | |
1808 | exist_non_indexing_operands_for_use_p (tree use, tree stmt) | |
1809 | { | |
1810 | tree operand; | |
1811 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1812 | ||
1813 | /* USE corresponds to some operand in STMT. If there is no data | |
1814 | reference in STMT, then any operand that corresponds to USE | |
1815 | is not indexing an array. */ | |
1816 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
1817 | return true; | |
1818 | ||
1819 | /* STMT has a data_ref. FORNOW this means that its of one of | |
1820 | the following forms: | |
1821 | -1- ARRAY_REF = var | |
1822 | -2- var = ARRAY_REF | |
1823 | (This should have been verified in analyze_data_refs). | |
1824 | ||
1825 | 'var' in the second case corresponds to a def, not a use, | |
1826 | so USE cannot correspond to any operands that are not used | |
1827 | for array indexing. | |
1828 | ||
1829 | Therefore, all we need to check is if STMT falls into the | |
1830 | first case, and whether var corresponds to USE. */ | |
1831 | ||
1832 | if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME) | |
1833 | return false; | |
1834 | ||
1835 | operand = TREE_OPERAND (stmt, 1); | |
1836 | ||
1837 | if (TREE_CODE (operand) != SSA_NAME) | |
1838 | return false; | |
1839 | ||
1840 | if (operand == use) | |
1841 | return true; | |
1842 | ||
1843 | return false; | |
1844 | } | |
1845 | ||
1846 | ||
1847 | /* Function vect_is_simple_iv_evolution. | |
1848 | ||
1849 | FORNOW: A simple evolution of an induction variables in the loop is | |
1850 | considered a polynomial evolution with constant step. */ | |
1851 | ||
1852 | static bool | |
1853 | vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, | |
1854 | tree * step, bool strict) | |
1855 | { | |
1856 | tree init_expr; | |
1857 | tree step_expr; | |
1858 | ||
1859 | tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); | |
1860 | ||
1861 | /* When there is no evolution in this loop, the evolution function | |
1862 | is not "simple". */ | |
1863 | if (evolution_part == NULL_TREE) | |
1864 | return false; | |
1865 | ||
1866 | /* When the evolution is a polynomial of degree >= 2 | |
1867 | the evolution function is not "simple". */ | |
1868 | if (tree_is_chrec (evolution_part)) | |
1869 | return false; | |
1870 | ||
1871 | step_expr = evolution_part; | |
1872 | init_expr = initial_condition (access_fn); | |
1873 | ||
1874 | if (vect_debug_details (NULL)) | |
1875 | { | |
1876 | fprintf (dump_file, "step: "); | |
1877 | print_generic_expr (dump_file, step_expr, TDF_SLIM); | |
1878 | fprintf (dump_file, ", init: "); | |
1879 | print_generic_expr (dump_file, init_expr, TDF_SLIM); | |
1880 | } | |
1881 | ||
1882 | *init = init_expr; | |
1883 | *step = step_expr; | |
1884 | ||
1885 | if (TREE_CODE (step_expr) != INTEGER_CST) | |
1886 | { | |
1887 | if (vect_debug_details (NULL)) | |
1888 | fprintf (dump_file, "step unknown."); | |
1889 | return false; | |
1890 | } | |
1891 | ||
1892 | if (strict) | |
1893 | if (!integer_onep (step_expr)) | |
1894 | { | |
1895 | if (vect_debug_details (NULL)) | |
1896 | print_generic_expr (dump_file, step_expr, TDF_SLIM); | |
1897 | return false; | |
1898 | } | |
1899 | ||
1900 | return true; | |
1901 | } | |
1902 | ||
1903 | ||
1904 | /* Function vect_analyze_scalar_cycles. | |
1905 | ||
1906 | Examine the cross iteration def-use cycles of scalar variables, by | |
1907 | analyzing the loop (scalar) PHIs; verify that the cross iteration def-use | |
1908 | cycles that they represent do not impede vectorization. | |
1909 | ||
1910 | FORNOW: Reduction as in the following loop, is not supported yet: | |
1911 | loop1: | |
1912 | for (i=0; i<N; i++) | |
1913 | sum += a[i]; | |
1914 | The cross-iteration cycle corresponding to variable 'sum' will be | |
1915 | considered too complicated and will impede vectorization. | |
1916 | ||
1917 | FORNOW: Induction as in the following loop, is not supported yet: | |
1918 | loop2: | |
1919 | for (i=0; i<N; i++) | |
1920 | a[i] = i; | |
1921 | ||
1922 | However, the following loop *is* vectorizable: | |
1923 | loop3: | |
1924 | for (i=0; i<N; i++) | |
1925 | a[i] = b[i]; | |
1926 | ||
1927 | In both loops there exists a def-use cycle for the variable i: | |
1928 | loop: i_2 = PHI (i_0, i_1) | |
1929 | a[i_2] = ...; | |
1930 | i_1 = i_2 + 1; | |
1931 | GOTO loop; | |
1932 | ||
1933 | The evolution of the above cycle is considered simple enough, | |
1934 | however, we also check that the cycle does not need to be | |
1935 | vectorized, i.e - we check that the variable that this cycle | |
1936 | defines is only used for array indexing or in stmts that do not | |
1937 | need to be vectorized. This is not the case in loop2, but it | |
1938 | *is* the case in loop3. */ | |
1939 | ||
1940 | static bool | |
1941 | vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) | |
1942 | { | |
1943 | tree phi; | |
1944 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1945 | basic_block bb = loop->header; | |
1946 | tree dummy; | |
1947 | ||
1948 | if (vect_debug_details (NULL)) | |
1949 | fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n"); | |
1950 | ||
1951 | for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) | |
1952 | { | |
1953 | tree access_fn = NULL; | |
1954 | ||
1955 | if (vect_debug_details (NULL)) | |
1956 | { | |
1957 | fprintf (dump_file, "Analyze phi: "); | |
1958 | print_generic_expr (dump_file, phi, TDF_SLIM); | |
1959 | } | |
1960 | ||
1961 | /* Skip virtual phi's. The data dependences that are associated with | |
1962 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |
1963 | ||
1964 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
1965 | { | |
1966 | if (vect_debug_details (NULL)) | |
1967 | fprintf (dump_file, "virtual phi. skip."); | |
1968 | continue; | |
1969 | } | |
1970 | ||
1971 | /* Analyze the evolution function. */ | |
1972 | ||
1973 | /* FORNOW: The only scalar cross-iteration cycles that we allow are | |
1974 | those of loop induction variables; This property is verified here. | |
1975 | ||
1976 | Furthermore, if that induction variable is used in an operation | |
1977 | that needs to be vectorized (i.e, is not solely used to index | |
1978 | arrays and check the exit condition) - we do not support its | |
1979 | vectorization yet. This property is verified in vect_is_simple_use, | |
1980 | during vect_analyze_operations. */ | |
1981 | ||
1982 | access_fn = instantiate_parameters | |
1983 | (loop, | |
1984 | analyze_scalar_evolution (loop, PHI_RESULT (phi))); | |
1985 | ||
1986 | if (!access_fn) | |
1987 | { | |
1988 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
1989 | fprintf (dump_file, "not vectorized: unsupported scalar cycle."); | |
1990 | return false; | |
1991 | } | |
1992 | ||
1993 | if (vect_debug_details (NULL)) | |
1994 | { | |
1995 | fprintf (dump_file, "Access function of PHI: "); | |
1996 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
1997 | } | |
1998 | ||
1999 | if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, | |
2000 | &dummy, false)) | |
2001 | { | |
2002 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2003 | fprintf (dump_file, "not vectorized: unsupported scalar cycle."); | |
2004 | return false; | |
2005 | } | |
2006 | } | |
2007 | ||
2008 | return true; | |
2009 | } | |
2010 | ||
2011 | ||
2012 | /* Function vect_analyze_data_ref_dependence. | |
2013 | ||
2014 | Return TRUE if there (might) exist a dependence between a memory-reference | |
2015 | DRA and a memory-reference DRB. */ | |
2016 | ||
2017 | static bool | |
2018 | vect_analyze_data_ref_dependence (struct data_reference *dra, | |
2019 | struct data_reference *drb, | |
2020 | struct loop *loop) | |
2021 | { | |
2022 | bool differ_p; | |
2023 | struct data_dependence_relation *ddr; | |
2024 | ||
2025 | if (!array_base_name_differ_p (dra, drb, &differ_p)) | |
2026 | { | |
2027 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2028 | { | |
2029 | fprintf (dump_file, | |
2030 | "not vectorized: can't determine dependence between: "); | |
2031 | print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM); | |
2032 | fprintf (dump_file, " and "); | |
2033 | print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM); | |
2034 | } | |
2035 | return true; | |
2036 | } | |
2037 | ||
2038 | if (differ_p) | |
2039 | return false; | |
2040 | ||
2041 | ddr = initialize_data_dependence_relation (dra, drb); | |
2042 | compute_affine_dependence (ddr); | |
2043 | ||
2044 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) | |
2045 | return false; | |
2046 | ||
2047 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2048 | { | |
2049 | fprintf (dump_file, | |
2050 | "not vectorized: possible dependence between data-refs "); | |
2051 | print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM); | |
2052 | fprintf (dump_file, " and "); | |
2053 | print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM); | |
2054 | } | |
2055 | ||
2056 | return true; | |
2057 | } | |
2058 | ||
2059 | ||
2060 | /* Function vect_analyze_data_ref_dependences. | |
2061 | ||
2062 | Examine all the data references in the loop, and make sure there do not | |
2063 | exist any data dependences between them. | |
2064 | ||
2065 | TODO: dependences which distance is greater than the vectorization factor | |
2066 | can be ignored. */ | |
2067 | ||
2068 | static bool | |
2069 | vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo) | |
2070 | { | |
2071 | unsigned int i, j; | |
2072 | varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
2073 | varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
2074 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2075 | ||
2076 | /* Examine store-store (output) dependences. */ | |
2077 | ||
2078 | if (vect_debug_details (NULL)) | |
2079 | fprintf (dump_file, "\n<<vect_analyze_dependences>>\n"); | |
2080 | ||
2081 | if (vect_debug_details (NULL)) | |
2082 | fprintf (dump_file, "compare all store-store pairs."); | |
2083 | ||
2084 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++) | |
2085 | { | |
2086 | for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) | |
2087 | { | |
2088 | struct data_reference *dra = | |
2089 | VARRAY_GENERIC_PTR (loop_write_refs, i); | |
2090 | struct data_reference *drb = | |
2091 | VARRAY_GENERIC_PTR (loop_write_refs, j); | |
2092 | if (vect_analyze_data_ref_dependence (dra, drb, loop)) | |
2093 | return false; | |
2094 | } | |
2095 | } | |
2096 | ||
2097 | /* Examine load-store (true/anti) dependences. */ | |
2098 | ||
2099 | if (vect_debug_details (NULL)) | |
2100 | fprintf (dump_file, "compare all load-store pairs."); | |
2101 | ||
2102 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++) | |
2103 | { | |
2104 | for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) | |
2105 | { | |
2106 | struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i); | |
2107 | struct data_reference *drb = | |
2108 | VARRAY_GENERIC_PTR (loop_write_refs, j); | |
2109 | if (vect_analyze_data_ref_dependence (dra, drb, loop)) | |
2110 | return false; | |
2111 | } | |
2112 | } | |
2113 | ||
2114 | return true; | |
2115 | } | |
2116 | ||
2117 | ||
2118 | /* Function vect_get_first_index. | |
2119 | ||
2120 | REF is a data reference. | |
2121 | If it is an ARRAY_REF: if its lower bound is simple enough, | |
2122 | put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE. | |
2123 | If it is not an ARRAY_REF: REF has no "first index"; | |
2124 | ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */ | |
2125 | ||
2126 | static bool | |
2127 | vect_get_first_index (tree ref, tree *array_first_index) | |
2128 | { | |
2129 | tree array_start; | |
2130 | ||
2131 | if (TREE_CODE (ref) != ARRAY_REF) | |
2132 | *array_first_index = size_zero_node; | |
2133 | else | |
2134 | { | |
2135 | array_start = array_ref_low_bound (ref); | |
2136 | if (!host_integerp (array_start,0)) | |
2137 | { | |
2138 | if (vect_debug_details (NULL)) | |
2139 | { | |
2140 | fprintf (dump_file, "array min val not simple integer cst."); | |
2141 | print_generic_expr (dump_file, array_start, TDF_DETAILS); | |
2142 | } | |
2143 | return false; | |
2144 | } | |
2145 | *array_first_index = array_start; | |
2146 | } | |
2147 | ||
2148 | return true; | |
2149 | } | |
2150 | ||
2151 | ||
2152 | /* Function vect_compute_data_ref_alignment | |
2153 | ||
2154 | Compute the misalignment of the data reference DR. | |
2155 | ||
2156 | FOR NOW: No analysis is actually performed. Misalignment is calculated | |
2157 | only for trivial cases. TODO. */ | |
2158 | ||
2159 | static void | |
2160 | vect_compute_data_ref_alignment (struct data_reference *dr, | |
2161 | loop_vec_info loop_vinfo ATTRIBUTE_UNUSED) | |
2162 | { | |
2163 | tree stmt = DR_STMT (dr); | |
2164 | tree ref = DR_REF (dr); | |
2165 | tree vectype; | |
2166 | tree access_fn = DR_ACCESS_FN (dr, 0); /* FORNOW: single access_fn. */ | |
2167 | tree init; | |
2168 | tree scalar_type; | |
2169 | tree misalign; | |
2170 | tree array_first_index; | |
2171 | tree array_base = DR_BASE_NAME (dr); | |
2172 | tree base_decl = NULL_TREE; | |
2173 | tree bit_offset = size_zero_node; | |
2174 | tree offset = size_zero_node; | |
7d60be94 | 2175 | tree unit_bits = build_int_cst (unsigned_type_node, BITS_PER_UNIT); |
79fe1b3b DN |
2176 | tree nunits; |
2177 | tree alignment; | |
2178 | ||
2179 | if (vect_debug_details (NULL)) | |
2180 | fprintf (dump_file, "vect_compute_data_ref_alignment:"); | |
2181 | ||
2182 | /* Initialize misalignment to unknown. */ | |
2183 | DR_MISALIGNMENT (dr) = -1; | |
2184 | ||
2185 | scalar_type = TREE_TYPE (ref); | |
2186 | vectype = get_vectype_for_scalar_type (scalar_type); | |
2187 | if (!vectype) | |
2188 | { | |
2189 | if (vect_debug_details (NULL)) | |
2190 | { | |
2191 | fprintf (dump_file, "no vectype for stmt: "); | |
2192 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
2193 | fprintf (dump_file, "scalar_type: "); | |
2194 | print_generic_expr (dump_file, scalar_type, TDF_DETAILS); | |
2195 | } | |
2196 | return; | |
2197 | } | |
2198 | ||
2199 | if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (array_base))) < TYPE_ALIGN (vectype)) | |
2200 | { | |
2201 | base_decl = vect_get_base_decl_and_bit_offset (array_base, &bit_offset); | |
2202 | if (!base_decl) | |
2203 | { | |
2204 | if (vect_debug_details (NULL)) | |
2205 | fprintf (dump_file, "Unknown alignment for access"); | |
2206 | return; | |
2207 | } | |
2208 | ||
2209 | offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1); | |
2210 | bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1); | |
2211 | if (!integer_zerop (bit_offset)) | |
2212 | { | |
2213 | if (vect_debug_details (NULL)) | |
2214 | { | |
2215 | fprintf (dump_file, "bit offset alignment: "); | |
2216 | print_generic_expr (dump_file, bit_offset, TDF_SLIM); | |
2217 | } | |
2218 | return; | |
2219 | } | |
2220 | ||
2221 | if (!base_decl || | |
2222 | (DECL_ALIGN (base_decl) < TYPE_ALIGN (vectype) | |
2223 | && !vect_can_force_dr_alignment_p (base_decl, TYPE_ALIGN (vectype)))) | |
2224 | { | |
2225 | if (vect_debug_details (NULL)) | |
2226 | { | |
2227 | fprintf (dump_file, "can't force alignment of ref: "); | |
2228 | print_generic_expr (dump_file, array_base, TDF_SLIM); | |
2229 | } | |
2230 | return; | |
2231 | } | |
2232 | ||
2233 | if (DECL_ALIGN (base_decl) < TYPE_ALIGN (vectype)) | |
2234 | { | |
2235 | /* Force the alignment of the decl. | |
2236 | NOTE: This is the only change to the code we make during | |
2237 | the analysis phase, before deciding to vectorize the loop. */ | |
2238 | if (vect_debug_details (NULL)) | |
2239 | fprintf (dump_file, "force alignment"); | |
2240 | DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype); | |
2241 | DECL_USER_ALIGN (base_decl) = TYPE_ALIGN (vectype); | |
2242 | } | |
2243 | } | |
2244 | ||
2245 | /* The misalignement is: | |
2246 | (base_alignment + offset + index_access_fn_init) % alignment. | |
2247 | At this point we already guaranteed that base_alignment == 0, | |
2248 | and computed the offset. | |
2249 | It remains to check the first index accessed. */ | |
2250 | ||
2251 | if (!vect_get_first_index (ref, &array_first_index)) | |
2252 | { | |
2253 | if (vect_debug_details (NULL)) | |
2254 | fprintf (dump_file, "no first_index for array."); | |
2255 | return; | |
2256 | } | |
2257 | ||
2258 | /* Check the index of the array_ref. */ | |
2259 | ||
2260 | init = initial_condition (access_fn); | |
2261 | ||
2262 | /* FORNOW: In order to simplify the handling of alignment, we make sure | |
2263 | that the first location at which the array is accessed ('init') is on an | |
2264 | 'NUNITS' boundary, since we are assuming here that 'array base' is aligned. | |
2265 | This is too conservative, since we require that | |
2266 | both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of | |
2267 | NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}. | |
2268 | This should be relaxed in the future. */ | |
2269 | ||
2270 | if (!init || !host_integerp (init,0)) | |
2271 | { | |
2272 | if (vect_debug_details (NULL)) | |
2273 | fprintf (dump_file, "init not simple INTEGER_CST."); | |
2274 | return; | |
2275 | } | |
2276 | ||
2277 | /* alignment required, in bytes: */ | |
2278 | alignment = build_int_cst (unsigned_type_node, | |
7d60be94 | 2279 | TYPE_ALIGN (vectype)/BITS_PER_UNIT); |
79fe1b3b DN |
2280 | /* bytes per scalar element: */ |
2281 | nunits = build_int_cst (unsigned_type_node, | |
7d60be94 | 2282 | GET_MODE_SIZE (TYPE_MODE (scalar_type))); |
79fe1b3b DN |
2283 | |
2284 | /* misalign = (offset + (init-array_first_index)*nunits) % alignment */ | |
2285 | if (vect_debug_details (NULL)) | |
2286 | { | |
2287 | fprintf (dump_file, "misalign = ( offset <"); | |
2288 | print_generic_expr (dump_file, offset, TDF_SLIM); | |
2289 | fprintf (dump_file, "> + (init <"); | |
2290 | print_generic_expr (dump_file, init, TDF_SLIM); | |
2291 | fprintf (dump_file, "> - first_indx <"); | |
2292 | print_generic_expr (dump_file, array_first_index, TDF_SLIM); | |
2293 | fprintf (dump_file, ">) * nunits <"); | |
2294 | print_generic_expr (dump_file, nunits, TDF_SLIM); | |
2295 | fprintf (dump_file, ">) mod alignment <"); | |
2296 | print_generic_expr (dump_file, alignment, TDF_SLIM); | |
2297 | fprintf (dump_file, ">"); | |
2298 | } | |
2299 | ||
2300 | misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0); | |
2301 | misalign = int_const_binop (MULT_EXPR, misalign, nunits, 0); | |
2302 | misalign = int_const_binop (PLUS_EXPR, misalign, offset, 0); | |
2303 | misalign = int_const_binop (TRUNC_MOD_EXPR, misalign, alignment, 0); | |
2304 | ||
2305 | if (vect_debug_details (NULL)) | |
2306 | { | |
2307 | fprintf (dump_file, "misalign = "); | |
2308 | print_generic_expr (dump_file, misalign, TDF_SLIM); | |
2309 | } | |
2310 | ||
2311 | if (!host_integerp (misalign,1) || TREE_OVERFLOW (misalign)) | |
2312 | { | |
2313 | if (vect_debug_details (NULL)) | |
2314 | fprintf (dump_file, "unexpected misalign value"); | |
2315 | return; | |
2316 | } | |
2317 | ||
2318 | DR_MISALIGNMENT (dr) = tree_low_cst (misalign,1); | |
2319 | ||
2320 | if (vect_debug_details (NULL)) | |
2321 | fprintf (dump_file, "misalign = %d",DR_MISALIGNMENT (dr)); | |
2322 | } | |
2323 | ||
2324 | ||
2325 | /* Function vect_compute_data_refs_alignment | |
2326 | ||
2327 | Compute the misalignment of data references in the loop. | |
2328 | This pass may take place at function granularity instead of at loop | |
2329 | granularity. | |
2330 | ||
2331 | FOR NOW: No analysis is actually performed. Misalignment is calculated | |
2332 | only for trivial cases. TODO. */ | |
2333 | ||
2334 | static void | |
2335 | vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) | |
2336 | { | |
2337 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
2338 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
2339 | unsigned int i; | |
2340 | ||
2341 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
2342 | { | |
2343 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
2344 | vect_compute_data_ref_alignment (dr, loop_vinfo); | |
2345 | } | |
2346 | ||
2347 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
2348 | { | |
2349 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
2350 | vect_compute_data_ref_alignment (dr, loop_vinfo); | |
2351 | } | |
2352 | } | |
2353 | ||
2354 | ||
2355 | /* Function vect_enhance_data_refs_alignment | |
2356 | ||
2357 | This pass will use loop versioning and loop peeling in order to enhance | |
2358 | the alignment of data references in the loop. | |
2359 | ||
2360 | FOR NOW: we assume that whatever versioning/peeling takes place, only the | |
2361 | original loop is to be vectorized; Any other loops that are created by | |
2362 | the transformations performed in this pass - are not supposed to be | |
2363 | vectorized. This restriction will be relaxed. | |
2364 | ||
2365 | FOR NOW: No transformation is actually performed. TODO. */ | |
2366 | ||
2367 | static void | |
2368 | vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo ATTRIBUTE_UNUSED) | |
2369 | { | |
2370 | /* | |
2371 | This pass will require a cost model to guide it whether to apply peeling | |
2372 | or versioning or a combination of the two. For example, the scheme that | |
2373 | intel uses when given a loop with several memory accesses, is as follows: | |
2374 | choose one memory access ('p') which alignment you want to force by doing | |
2375 | peeling. Then, either (1) generate a loop in which 'p' is aligned and all | |
2376 | other accesses are not necessarily aligned, or (2) use loop versioning to | |
2377 | generate one loop in which all accesses are aligned, and another loop in | |
2378 | which only 'p' is necessarily aligned. | |
2379 | ||
2380 | ("Automatic Intra-Register Vectorization for the Intel Architecture", | |
2381 | Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International | |
2382 | Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) | |
2383 | ||
2384 | Devising a cost model is the most critical aspect of this work. It will | |
2385 | guide us on which access to peel for, whether to use loop versioning, how | |
2386 | many versions to create, etc. The cost model will probably consist of | |
2387 | generic considerations as well as target specific considerations (on | |
2388 | powerpc for example, misaligned stores are more painful than misaligned | |
2389 | loads). | |
2390 | ||
2391 | Here is the general steps involved in alignment enhancements: | |
2392 | ||
2393 | -- original loop, before alignment analysis: | |
2394 | for (i=0; i<N; i++){ | |
2395 | x = q[i]; # DR_MISALIGNMENT(q) = unknown | |
2396 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
2397 | } | |
2398 | ||
2399 | -- After vect_compute_data_refs_alignment: | |
2400 | for (i=0; i<N; i++){ | |
2401 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
2402 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
2403 | } | |
2404 | ||
2405 | -- Possibility 1: we do loop versioning: | |
2406 | if (p is aligned) { | |
2407 | for (i=0; i<N; i++){ # loop 1A | |
2408 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
2409 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | |
2410 | } | |
2411 | } | |
2412 | else { | |
2413 | for (i=0; i<N; i++){ # loop 1B | |
2414 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
2415 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | |
2416 | } | |
2417 | } | |
2418 | ||
2419 | -- Possibility 2: we do loop peeling: | |
2420 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | |
2421 | x = q[i]; | |
2422 | p[i] = y; | |
2423 | } | |
2424 | for (i = 3; i < N; i++){ # loop 2A | |
2425 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
2426 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
2427 | } | |
2428 | ||
2429 | -- Possibility 3: combination of loop peeling and versioning: | |
2430 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | |
2431 | x = q[i]; | |
2432 | p[i] = y; | |
2433 | } | |
2434 | if (p is aligned) { | |
2435 | for (i = 3; i<N; i++){ # loop 3A | |
2436 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
2437 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | |
2438 | } | |
2439 | } | |
2440 | else { | |
2441 | for (i = 3; i<N; i++){ # loop 3B | |
2442 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
2443 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | |
2444 | } | |
2445 | } | |
2446 | ||
2447 | These loops are later passed to loop_transform to be vectorized. The | |
2448 | vectorizer will use the alignment information to guide the transformation | |
2449 | (whether to generate regular loads/stores, or with special handling for | |
2450 | misalignment). | |
2451 | */ | |
2452 | } | |
2453 | ||
2454 | ||
2455 | /* Function vect_analyze_data_refs_alignment | |
2456 | ||
2457 | Analyze the alignment of the data-references in the loop. | |
2458 | FOR NOW: Until support for misliagned accesses is in place, only if all | |
2459 | accesses are aligned can the loop be vectorized. This restriction will be | |
2460 | relaxed. */ | |
2461 | ||
2462 | static bool | |
2463 | vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) | |
2464 | { | |
2465 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
2466 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
2467 | unsigned int i; | |
2468 | ||
2469 | if (vect_debug_details (NULL)) | |
2470 | fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n"); | |
2471 | ||
2472 | ||
2473 | /* This pass may take place at function granularity instead of at loop | |
2474 | granularity. */ | |
2475 | ||
2476 | vect_compute_data_refs_alignment (loop_vinfo); | |
2477 | ||
2478 | ||
2479 | /* This pass will use loop versioning and loop peeling in order to enhance | |
2480 | the alignment of data references in the loop. | |
2481 | FOR NOW: we assume that whatever versioning/peeling took place, the | |
2482 | original loop is to be vectorized. Any other loops that were created by | |
2483 | the transformations performed in this pass - are not supposed to be | |
2484 | vectorized. This restriction will be relaxed. */ | |
2485 | ||
2486 | vect_enhance_data_refs_alignment (loop_vinfo); | |
2487 | ||
2488 | ||
2489 | /* Finally, check that loop can be vectorized. | |
2490 | FOR NOW: Until support for misaligned accesses is in place, only if all | |
2491 | accesses are aligned can the loop be vectorized. This restriction will be | |
2492 | relaxed. */ | |
2493 | ||
2494 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
2495 | { | |
2496 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
2497 | if (!aligned_access_p (dr)) | |
2498 | { | |
2499 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
2500 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
2501 | fprintf (dump_file, "not vectorized: unaligned store."); | |
2502 | return false; | |
2503 | } | |
2504 | } | |
2505 | ||
2506 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
2507 | { | |
2508 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
2509 | if (!aligned_access_p (dr)) | |
2510 | { | |
2511 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
2512 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
2513 | fprintf (dump_file, "not vectorized: unaligned load."); | |
2514 | return false; | |
2515 | } | |
2516 | } | |
2517 | ||
2518 | return true; | |
2519 | } | |
2520 | ||
2521 | ||
2522 | /* Function vect_analyze_data_ref_access. | |
2523 | ||
2524 | Analyze the access pattern of the data-reference DR. For now, a data access | |
2525 | has to consecutive and aligned to be considered vectorizable. */ | |
2526 | ||
2527 | static bool | |
2528 | vect_analyze_data_ref_access (struct data_reference *dr) | |
2529 | { | |
2530 | varray_type access_fns = DR_ACCESS_FNS (dr); | |
2531 | tree access_fn; | |
2532 | tree init, step; | |
2533 | ||
2534 | /* FORNOW: handle only one dimensional arrays. | |
2535 | This restriction will be relaxed in the future. */ | |
2536 | if (VARRAY_ACTIVE_SIZE (access_fns) != 1) | |
2537 | { | |
2538 | if (vect_debug_details (NULL)) | |
2539 | fprintf (dump_file, "multi dimensional array reference."); | |
2540 | return false; | |
2541 | } | |
2542 | access_fn = DR_ACCESS_FN (dr, 0); | |
2543 | ||
2544 | if (!vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num, | |
2545 | access_fn, &init, &step, true)) | |
2546 | { | |
2547 | if (vect_debug_details (NULL)) | |
2548 | { | |
2549 | fprintf (dump_file, "too complicated access function."); | |
2550 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
2551 | } | |
2552 | return false; | |
2553 | } | |
2554 | ||
2555 | return true; | |
2556 | } | |
2557 | ||
2558 | ||
2559 | /* Function vect_analyze_data_ref_accesses. | |
2560 | ||
2561 | Analyze the access pattern of all the data references in the loop. | |
2562 | ||
2563 | FORNOW: the only access pattern that is considered vectorizable is a | |
2564 | simple step 1 (consecutive) access. | |
2565 | ||
2566 | FORNOW: handle only one dimensional arrays, and pointer accesses. */ | |
2567 | ||
2568 | static bool | |
2569 | vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo) | |
2570 | { | |
2571 | unsigned int i; | |
2572 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
2573 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
2574 | ||
2575 | if (vect_debug_details (NULL)) | |
2576 | fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n"); | |
2577 | ||
2578 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
2579 | { | |
2580 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
2581 | bool ok = vect_analyze_data_ref_access (dr); | |
2582 | if (!ok) | |
2583 | { | |
2584 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
2585 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
2586 | fprintf (dump_file, "not vectorized: complicated access pattern."); | |
2587 | return false; | |
2588 | } | |
2589 | } | |
2590 | ||
2591 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
2592 | { | |
2593 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
2594 | bool ok = vect_analyze_data_ref_access (dr); | |
2595 | if (!ok) | |
2596 | { | |
2597 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
2598 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
2599 | fprintf (dump_file, "not vectorized: complicated access pattern."); | |
2600 | return false; | |
2601 | } | |
2602 | } | |
2603 | ||
2604 | return true; | |
2605 | } | |
2606 | ||
2607 | ||
2608 | /* Function vect_analyze_pointer_ref_access. | |
2609 | ||
2610 | Input: | |
2611 | STMT - a stmt that contains a data-ref | |
2612 | MEMREF - a data-ref in STMT, which is an INDIRECT_REF. | |
2613 | ||
2614 | If the data-ref access is vectorizable, return a data_reference structure | |
2615 | that represents it (DR). Otherwise - return NULL. */ | |
2616 | ||
2617 | static struct data_reference * | |
2618 | vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read) | |
2619 | { | |
2620 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2621 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
2622 | tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0)); | |
2623 | tree init, step; | |
2624 | int step_val; | |
2625 | tree reftype, innertype; | |
2626 | enum machine_mode innermode; | |
2627 | tree indx_access_fn; | |
2628 | int loopnum = loop->num; | |
2629 | struct data_reference *dr; | |
2630 | ||
2631 | if (!access_fn) | |
2632 | { | |
2633 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2634 | fprintf (dump_file, "not vectorized: complicated pointer access."); | |
2635 | return NULL; | |
2636 | } | |
2637 | ||
2638 | if (vect_debug_details (NULL)) | |
2639 | { | |
2640 | fprintf (dump_file, "Access function of ptr: "); | |
2641 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
2642 | } | |
2643 | ||
2644 | if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false)) | |
2645 | { | |
2646 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2647 | fprintf (dump_file, "not vectorized: pointer access is not simple."); | |
2648 | return NULL; | |
2649 | } | |
2650 | ||
2651 | if (TREE_CODE (init) != SSA_NAME /* FORNOW */ | |
2652 | || !host_integerp (step,0)) | |
2653 | { | |
2654 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2655 | fprintf (dump_file, | |
2656 | "not vectorized: non constant init/step for pointer access."); | |
2657 | return NULL; | |
2658 | } | |
2659 | ||
2660 | step_val = TREE_INT_CST_LOW (step); | |
2661 | ||
2662 | reftype = TREE_TYPE (TREE_OPERAND (memref, 0)); | |
2663 | if (TREE_CODE (reftype) != POINTER_TYPE) | |
2664 | { | |
2665 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2666 | fprintf (dump_file, "not vectorized: unexpected pointer access form."); | |
2667 | return NULL; | |
2668 | } | |
2669 | ||
2670 | reftype = TREE_TYPE (init); | |
2671 | if (TREE_CODE (reftype) != POINTER_TYPE) | |
2672 | { | |
2673 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2674 | fprintf (dump_file, "not vectorized: unexpected pointer access form."); | |
2675 | return NULL; | |
2676 | } | |
2677 | ||
2678 | innertype = TREE_TYPE (reftype); | |
2679 | innermode = TYPE_MODE (innertype); | |
2680 | if (GET_MODE_SIZE (innermode) != step_val) | |
2681 | { | |
2682 | /* FORNOW: support only consecutive access */ | |
2683 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2684 | fprintf (dump_file, "not vectorized: non consecutive access."); | |
2685 | return NULL; | |
2686 | } | |
2687 | ||
2688 | indx_access_fn = | |
2689 | build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node); | |
2690 | if (vect_debug_details (NULL)) | |
2691 | { | |
2692 | fprintf (dump_file, "Access function of ptr indx: "); | |
2693 | print_generic_expr (dump_file, indx_access_fn, TDF_SLIM); | |
2694 | } | |
2695 | dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read); | |
2696 | return dr; | |
2697 | } | |
2698 | ||
2699 | ||
2700 | /* Function vect_analyze_data_refs. | |
2701 | ||
2702 | Find all the data references in the loop. | |
2703 | ||
2704 | FORNOW: Handle aligned INDIRECT_REFs and one dimensional ARRAY_REFs | |
2705 | which base is really an array (not a pointer) and which alignment | |
2706 | can be forced. This restriction will be relaxed. */ | |
2707 | ||
2708 | static bool | |
2709 | vect_analyze_data_refs (loop_vec_info loop_vinfo) | |
2710 | { | |
2711 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2712 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
2713 | int nbbs = loop->num_nodes; | |
2714 | block_stmt_iterator si; | |
2715 | int j; | |
2716 | struct data_reference *dr; | |
2717 | ||
2718 | if (vect_debug_details (NULL)) | |
2719 | fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n"); | |
2720 | ||
2721 | for (j = 0; j < nbbs; j++) | |
2722 | { | |
2723 | basic_block bb = bbs[j]; | |
2724 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
2725 | { | |
2726 | bool is_read = false; | |
2727 | tree stmt = bsi_stmt (si); | |
2728 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2729 | v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); | |
2730 | v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
2731 | vuse_optype vuses = STMT_VUSE_OPS (stmt); | |
2732 | varray_type *datarefs = NULL; | |
2733 | int nvuses, nv_may_defs, nv_must_defs; | |
2734 | tree memref = NULL; | |
2735 | tree array_base; | |
2736 | tree symbl; | |
2737 | ||
2738 | /* Assumption: there exists a data-ref in stmt, if and only if | |
2739 | it has vuses/vdefs. */ | |
2740 | ||
2741 | if (!vuses && !v_may_defs && !v_must_defs) | |
2742 | continue; | |
2743 | ||
2744 | nvuses = NUM_VUSES (vuses); | |
2745 | nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); | |
2746 | nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); | |
2747 | ||
2748 | if (nvuses && (nv_may_defs || nv_must_defs)) | |
2749 | { | |
2750 | if (vect_debug_details (NULL)) | |
2751 | { | |
2752 | fprintf (dump_file, "unexpected vdefs and vuses in stmt: "); | |
2753 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
2754 | } | |
2755 | return false; | |
2756 | } | |
2757 | ||
2758 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
2759 | { | |
2760 | if (vect_debug_details (NULL)) | |
2761 | { | |
2762 | fprintf (dump_file, "unexpected vops in stmt: "); | |
2763 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
2764 | } | |
2765 | return false; | |
2766 | } | |
2767 | ||
2768 | if (vuses) | |
2769 | { | |
2770 | memref = TREE_OPERAND (stmt, 1); | |
2771 | datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo)); | |
2772 | is_read = true; | |
2773 | } | |
2774 | else /* vdefs */ | |
2775 | { | |
2776 | memref = TREE_OPERAND (stmt, 0); | |
2777 | datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); | |
2778 | is_read = false; | |
2779 | } | |
2780 | ||
2781 | if (TREE_CODE (memref) == INDIRECT_REF) | |
2782 | { | |
2783 | dr = vect_analyze_pointer_ref_access (memref, stmt, is_read); | |
2784 | if (! dr) | |
2785 | return false; | |
2786 | symbl = DR_BASE_NAME (dr); | |
2787 | } | |
2788 | else if (TREE_CODE (memref) == ARRAY_REF) | |
2789 | { | |
2790 | tree base; | |
2791 | tree offset = size_zero_node; | |
2792 | array_base = TREE_OPERAND (memref, 0); | |
2793 | ||
2794 | /* FORNOW: make sure that the array is one dimensional. | |
2795 | This restriction will be relaxed in the future. */ | |
2796 | if (TREE_CODE (array_base) == ARRAY_REF) | |
2797 | { | |
2798 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2799 | { | |
2800 | fprintf (dump_file, | |
2801 | "not vectorized: multi-dimensional array."); | |
2802 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
2803 | } | |
2804 | return false; | |
2805 | } | |
2806 | ||
2807 | dr = analyze_array (stmt, memref, is_read); | |
2808 | ||
2809 | /* Find the relevant symbol for aliasing purposes. */ | |
2810 | base = DR_BASE_NAME (dr); | |
2811 | switch (TREE_CODE (base)) | |
2812 | { | |
2813 | case VAR_DECL: | |
2814 | symbl = base; | |
2815 | break; | |
2816 | /* FORNOW: Disabled. | |
2817 | case INDIRECT_REF: | |
2818 | symbl = TREE_OPERAND (base, 0); | |
2819 | break; | |
2820 | */ | |
2821 | case COMPONENT_REF: | |
2822 | /* CHECKME: could have recorded more accurate information - | |
2823 | i.e, the actual FIELD_DECL that is being referenced - | |
2824 | but later passes expect VAR_DECL as the nmt. */ | |
2825 | symbl = vect_get_base_decl_and_bit_offset (base, &offset); | |
2826 | if (symbl) | |
2827 | break; | |
2828 | /* fall through */ | |
2829 | default: | |
2830 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2831 | { | |
2832 | fprintf (dump_file, | |
2833 | "not vectorized: unhandled struct/class field access "); | |
2834 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
2835 | } | |
2836 | return false; | |
2837 | } /* switch */ | |
2838 | } | |
2839 | else | |
2840 | { | |
2841 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2842 | { | |
2843 | fprintf (dump_file, "not vectorized: unhandled data ref: "); | |
2844 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
2845 | } | |
2846 | return false; | |
2847 | } | |
2848 | ||
2849 | /* Find and record the memtag assigned to this data-ref. */ | |
2850 | if (TREE_CODE (symbl) == VAR_DECL) | |
2851 | STMT_VINFO_MEMTAG (stmt_info) = symbl; | |
2852 | else if (TREE_CODE (symbl) == SSA_NAME) | |
2853 | { | |
2854 | tree tag; | |
2855 | symbl = SSA_NAME_VAR (symbl); | |
2856 | tag = get_var_ann (symbl)->type_mem_tag; | |
2857 | if (!tag) | |
2858 | { | |
2859 | tree ptr = TREE_OPERAND (memref, 0); | |
2860 | if (TREE_CODE (ptr) == SSA_NAME) | |
2861 | tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag; | |
2862 | } | |
2863 | if (!tag) | |
2864 | { | |
2865 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2866 | fprintf (dump_file, "not vectorized: no memtag for ref."); | |
2867 | return false; | |
2868 | } | |
2869 | STMT_VINFO_MEMTAG (stmt_info) = tag; | |
2870 | } | |
2871 | else | |
2872 | { | |
2873 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2874 | { | |
2875 | fprintf (dump_file, "not vectorized: unsupported data-ref: "); | |
2876 | print_generic_expr (dump_file, memref, TDF_SLIM); | |
2877 | } | |
2878 | return false; | |
2879 | } | |
2880 | ||
2881 | VARRAY_PUSH_GENERIC_PTR (*datarefs, dr); | |
2882 | STMT_VINFO_DATA_REF (stmt_info) = dr; | |
2883 | } | |
2884 | } | |
2885 | ||
2886 | return true; | |
2887 | } | |
2888 | ||
2889 | ||
8c27b7d4 | 2890 | /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ |
79fe1b3b DN |
2891 | |
2892 | /* Function vect_mark_relevant. | |
2893 | ||
2894 | Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ | |
2895 | ||
2896 | static void | |
2897 | vect_mark_relevant (varray_type worklist, tree stmt) | |
2898 | { | |
2899 | stmt_vec_info stmt_info; | |
2900 | ||
2901 | if (vect_debug_details (NULL)) | |
2902 | fprintf (dump_file, "mark relevant."); | |
2903 | ||
2904 | if (TREE_CODE (stmt) == PHI_NODE) | |
2905 | { | |
2906 | VARRAY_PUSH_TREE (worklist, stmt); | |
2907 | return; | |
2908 | } | |
2909 | ||
2910 | stmt_info = vinfo_for_stmt (stmt); | |
2911 | ||
2912 | if (!stmt_info) | |
2913 | { | |
2914 | if (vect_debug_details (NULL)) | |
2915 | { | |
2916 | fprintf (dump_file, "mark relevant: no stmt info!!."); | |
2917 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
2918 | } | |
2919 | return; | |
2920 | } | |
2921 | ||
2922 | if (STMT_VINFO_RELEVANT_P (stmt_info)) | |
2923 | { | |
2924 | if (vect_debug_details (NULL)) | |
2925 | fprintf (dump_file, "already marked relevant."); | |
2926 | return; | |
2927 | } | |
2928 | ||
2929 | STMT_VINFO_RELEVANT_P (stmt_info) = 1; | |
2930 | VARRAY_PUSH_TREE (worklist, stmt); | |
2931 | } | |
2932 | ||
2933 | ||
2934 | /* Function vect_stmt_relevant_p. | |
2935 | ||
2936 | Return true if STMT in loop that is represented by LOOP_VINFO is | |
2937 | "relevant for vectorization". | |
2938 | ||
2939 | A stmt is considered "relevant for vectorization" if: | |
2940 | - it has uses outside the loop. | |
2941 | - it has vdefs (it alters memory). | |
2942 | - control stmts in the loop (except for the exit condition). | |
2943 | ||
2944 | CHECKME: what other side effects would the vectorizer allow? */ | |
2945 | ||
2946 | static bool | |
2947 | vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo) | |
2948 | { | |
2949 | v_may_def_optype v_may_defs; | |
2950 | v_must_def_optype v_must_defs; | |
2951 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2952 | int i; | |
2953 | dataflow_t df; | |
2954 | int num_uses; | |
2955 | ||
2956 | /* cond stmt other than loop exit cond. */ | |
2957 | if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo))) | |
2958 | return true; | |
2959 | ||
2960 | /* changing memory. */ | |
2961 | v_may_defs = STMT_V_MAY_DEF_OPS (stmt); | |
2962 | v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
2963 | if (v_may_defs || v_must_defs) | |
2964 | { | |
2965 | if (vect_debug_details (NULL)) | |
2966 | fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs."); | |
2967 | return true; | |
2968 | } | |
2969 | ||
2970 | /* uses outside the loop. */ | |
2971 | df = get_immediate_uses (stmt); | |
2972 | num_uses = num_immediate_uses (df); | |
2973 | for (i = 0; i < num_uses; i++) | |
2974 | { | |
2975 | tree use = immediate_use (df, i); | |
2976 | basic_block bb = bb_for_stmt (use); | |
2977 | if (!flow_bb_inside_loop_p (loop, bb)) | |
2978 | { | |
2979 | if (vect_debug_details (NULL)) | |
2980 | fprintf (dump_file, "vec_stmt_relevant_p: used out of loop."); | |
2981 | return true; | |
2982 | } | |
2983 | } | |
2984 | ||
2985 | return false; | |
2986 | } | |
2987 | ||
2988 | ||
2989 | /* Function vect_mark_stmts_to_be_vectorized. | |
2990 | ||
2991 | Not all stmts in the loop need to be vectorized. For example: | |
2992 | ||
2993 | for i... | |
2994 | for j... | |
2995 | 1. T0 = i + j | |
2996 | 2. T1 = a[T0] | |
2997 | ||
2998 | 3. j = j + 1 | |
2999 | ||
3000 | Stmt 1 and 3 do not need to be vectorized, because loop control and | |
3001 | addressing of vectorized data-refs are handled differently. | |
3002 | ||
3003 | This pass detects such stmts. */ | |
3004 | ||
3005 | static bool | |
3006 | vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) | |
3007 | { | |
3008 | varray_type worklist; | |
3009 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3010 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
3011 | unsigned int nbbs = loop->num_nodes; | |
3012 | block_stmt_iterator si; | |
3013 | tree stmt; | |
3014 | stmt_ann_t ann; | |
3015 | unsigned int i; | |
3016 | int j; | |
3017 | use_optype use_ops; | |
3018 | stmt_vec_info stmt_info; | |
3019 | ||
3020 | if (vect_debug_details (NULL)) | |
3021 | fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n"); | |
3022 | ||
3023 | VARRAY_TREE_INIT (worklist, 64, "work list"); | |
3024 | ||
3025 | /* 1. Init worklist. */ | |
3026 | ||
3027 | for (i = 0; i < nbbs; i++) | |
3028 | { | |
3029 | basic_block bb = bbs[i]; | |
3030 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
3031 | { | |
3032 | stmt = bsi_stmt (si); | |
3033 | ||
3034 | if (vect_debug_details (NULL)) | |
3035 | { | |
3036 | fprintf (dump_file, "init: stmt relevant? "); | |
3037 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3038 | } | |
3039 | ||
3040 | stmt_info = vinfo_for_stmt (stmt); | |
3041 | STMT_VINFO_RELEVANT_P (stmt_info) = 0; | |
3042 | ||
3043 | if (vect_stmt_relevant_p (stmt, loop_vinfo)) | |
3044 | vect_mark_relevant (worklist, stmt); | |
3045 | } | |
3046 | } | |
3047 | ||
3048 | ||
3049 | /* 2. Process_worklist */ | |
3050 | ||
3051 | while (VARRAY_ACTIVE_SIZE (worklist) > 0) | |
3052 | { | |
3053 | stmt = VARRAY_TOP_TREE (worklist); | |
3054 | VARRAY_POP (worklist); | |
3055 | ||
3056 | if (vect_debug_details (NULL)) | |
3057 | { | |
3058 | fprintf (dump_file, "worklist: examine stmt: "); | |
3059 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3060 | } | |
3061 | ||
3062 | /* Examine the USES in this statement. Mark all the statements which | |
3063 | feed this statement's uses as "relevant", unless the USE is used as | |
3064 | an array index. */ | |
3065 | ||
3066 | if (TREE_CODE (stmt) == PHI_NODE) | |
3067 | { | |
3068 | /* follow the def-use chain inside the loop. */ | |
3069 | for (j = 0; j < PHI_NUM_ARGS (stmt); j++) | |
3070 | { | |
3071 | tree arg = PHI_ARG_DEF (stmt, j); | |
3072 | tree def_stmt = NULL_TREE; | |
3073 | basic_block bb; | |
3074 | if (!vect_is_simple_use (arg, loop, &def_stmt)) | |
3075 | { | |
3076 | if (vect_debug_details (NULL)) | |
3077 | fprintf (dump_file, "worklist: unsupported use."); | |
3078 | varray_clear (worklist); | |
3079 | return false; | |
3080 | } | |
3081 | if (!def_stmt) | |
3082 | continue; | |
3083 | ||
3084 | if (vect_debug_details (NULL)) | |
3085 | { | |
3086 | fprintf (dump_file, "worklist: def_stmt: "); | |
3087 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
3088 | } | |
3089 | ||
3090 | bb = bb_for_stmt (def_stmt); | |
3091 | if (flow_bb_inside_loop_p (loop, bb)) | |
3092 | vect_mark_relevant (worklist, def_stmt); | |
3093 | } | |
3094 | } | |
3095 | ||
3096 | ann = stmt_ann (stmt); | |
3097 | use_ops = USE_OPS (ann); | |
3098 | ||
3099 | for (i = 0; i < NUM_USES (use_ops); i++) | |
3100 | { | |
3101 | tree use = USE_OP (use_ops, i); | |
3102 | ||
3103 | /* We are only interested in uses that need to be vectorized. Uses | |
3104 | that are used for address computation are not considered relevant. | |
3105 | */ | |
3106 | if (exist_non_indexing_operands_for_use_p (use, stmt)) | |
3107 | { | |
3108 | tree def_stmt = NULL_TREE; | |
3109 | basic_block bb; | |
3110 | if (!vect_is_simple_use (use, loop, &def_stmt)) | |
3111 | { | |
3112 | if (vect_debug_details (NULL)) | |
3113 | fprintf (dump_file, "worklist: unsupported use."); | |
3114 | varray_clear (worklist); | |
3115 | return false; | |
3116 | } | |
3117 | ||
3118 | if (!def_stmt) | |
3119 | continue; | |
3120 | ||
3121 | if (vect_debug_details (NULL)) | |
3122 | { | |
3123 | fprintf (dump_file, "worklist: examine use %d: ", i); | |
3124 | print_generic_expr (dump_file, use, TDF_SLIM); | |
3125 | } | |
3126 | ||
3127 | bb = bb_for_stmt (def_stmt); | |
3128 | if (flow_bb_inside_loop_p (loop, bb)) | |
3129 | vect_mark_relevant (worklist, def_stmt); | |
3130 | } | |
3131 | } | |
3132 | } /* while worklist */ | |
3133 | ||
3134 | varray_clear (worklist); | |
3135 | return true; | |
3136 | } | |
3137 | ||
3138 | ||
3139 | /* Function vect_get_loop_niters. | |
3140 | ||
3141 | Determine how many iterations the loop is executed. */ | |
3142 | ||
3143 | static tree | |
3144 | vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations) | |
3145 | { | |
3146 | tree niters; | |
3147 | ||
3148 | if (vect_debug_details (NULL)) | |
3149 | fprintf (dump_file, "\n<<get_loop_niters>>\n"); | |
3150 | ||
3151 | niters = number_of_iterations_in_loop (loop); | |
3152 | ||
3153 | if (niters != NULL_TREE | |
3154 | && niters != chrec_dont_know | |
3155 | && host_integerp (niters,0)) | |
3156 | { | |
3157 | *number_of_iterations = TREE_INT_CST_LOW (niters); | |
3158 | ||
3159 | if (vect_debug_details (NULL)) | |
3160 | fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC, | |
3161 | *number_of_iterations); | |
3162 | } | |
3163 | ||
3164 | return get_loop_exit_condition (loop); | |
3165 | } | |
3166 | ||
3167 | ||
3168 | /* Function vect_analyze_loop_form. | |
3169 | ||
3170 | Verify the following restrictions (some may be relaxed in the future): | |
3171 | - it's an inner-most loop | |
3172 | - number of BBs = 2 (which are the loop header and the latch) | |
3173 | - the loop has a pre-header | |
3174 | - the loop has a single entry and exit | |
3175 | - the loop exit condition is simple enough, and the number of iterations | |
3176 | can be analyzed (a countable loop). */ | |
3177 | ||
3178 | static loop_vec_info | |
3179 | vect_analyze_loop_form (struct loop *loop) | |
3180 | { | |
3181 | loop_vec_info loop_vinfo; | |
3182 | tree loop_cond; | |
3183 | HOST_WIDE_INT number_of_iterations = -1; | |
3184 | ||
3185 | if (vect_debug_details (loop)) | |
3186 | fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n"); | |
3187 | ||
82b85a85 ZD |
3188 | if (loop->inner |
3189 | || !loop->single_exit | |
3190 | || loop->num_nodes != 2) | |
79fe1b3b DN |
3191 | { |
3192 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3193 | { | |
3194 | fprintf (dump_file, "not vectorized: bad loop form. "); | |
82b85a85 | 3195 | if (loop->inner) |
79fe1b3b | 3196 | fprintf (dump_file, "nested loop."); |
82b85a85 ZD |
3197 | else if (!loop->single_exit) |
3198 | fprintf (dump_file, "multiple exits."); | |
3199 | else if (loop->num_nodes != 2) | |
79fe1b3b | 3200 | fprintf (dump_file, "too many BBs in loop."); |
79fe1b3b DN |
3201 | } |
3202 | ||
3203 | return NULL; | |
3204 | } | |
3205 | ||
3206 | /* We assume that the loop exit condition is at the end of the loop. i.e, | |
3207 | that the loop is represented as a do-while (with a proper if-guard | |
3208 | before the loop if needed), where the loop header contains all the | |
3209 | executable statements, and the latch is empty. */ | |
3210 | if (!empty_block_p (loop->latch)) | |
3211 | { | |
3212 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3213 | fprintf (dump_file, "not vectorized: unexpectd loop form."); | |
3214 | return NULL; | |
3215 | } | |
3216 | ||
3217 | if (empty_block_p (loop->header)) | |
3218 | { | |
3219 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3220 | fprintf (dump_file, "not vectorized: empty loop."); | |
3221 | return NULL; | |
3222 | } | |
3223 | ||
3224 | loop_cond = vect_get_loop_niters (loop, &number_of_iterations); | |
3225 | if (!loop_cond) | |
3226 | { | |
3227 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3228 | fprintf (dump_file, "not vectorized: complicated exit condition."); | |
3229 | return NULL; | |
3230 | } | |
3231 | ||
3232 | if (number_of_iterations < 0) | |
3233 | { | |
3234 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3235 | fprintf (dump_file, "not vectorized: unknown loop bound."); | |
3236 | return NULL; | |
3237 | } | |
3238 | ||
3239 | if (number_of_iterations == 0) /* CHECKME: can this happen? */ | |
3240 | { | |
3241 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3242 | fprintf (dump_file, "not vectorized: number of iterations = 0."); | |
3243 | return NULL; | |
3244 | } | |
3245 | ||
3246 | loop_vinfo = new_loop_vec_info (loop); | |
3247 | LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; | |
3248 | LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; | |
3249 | ||
3250 | return loop_vinfo; | |
3251 | } | |
3252 | ||
3253 | ||
3254 | /* Function vect_analyze_loop. | |
3255 | ||
3256 | Apply a set of analyses on LOOP, and create a loop_vec_info struct | |
3257 | for it. The different analyses will record information in the | |
3258 | loop_vec_info struct. */ | |
3259 | ||
3260 | static loop_vec_info | |
3261 | vect_analyze_loop (struct loop *loop) | |
3262 | { | |
3263 | bool ok; | |
3264 | loop_vec_info loop_vinfo; | |
3265 | ||
3266 | if (vect_debug_details (NULL)) | |
3267 | fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n"); | |
3268 | ||
3269 | /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */ | |
3270 | ||
3271 | loop_vinfo = vect_analyze_loop_form (loop); | |
3272 | if (!loop_vinfo) | |
3273 | { | |
3274 | if (vect_debug_details (loop)) | |
3275 | fprintf (dump_file, "bad loop form."); | |
3276 | return NULL; | |
3277 | } | |
3278 | ||
3279 | /* Find all data references in the loop (which correspond to vdefs/vuses) | |
3280 | and analyze their evolution in the loop. | |
3281 | ||
3282 | FORNOW: Handle only simple, one-dimensional, array references, which | |
3283 | alignment can be forced, and aligned pointer-references. */ | |
3284 | ||
3285 | ok = vect_analyze_data_refs (loop_vinfo); | |
3286 | if (!ok) | |
3287 | { | |
3288 | if (vect_debug_details (loop)) | |
3289 | fprintf (dump_file, "bad data references."); | |
3290 | destroy_loop_vec_info (loop_vinfo); | |
3291 | return NULL; | |
3292 | } | |
3293 | ||
3294 | ||
3295 | /* Data-flow analysis to detect stmts that do not need to be vectorized. */ | |
3296 | ||
3297 | ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); | |
3298 | if (!ok) | |
3299 | { | |
3300 | if (vect_debug_details (loop)) | |
3301 | fprintf (dump_file, "unexpected pattern."); | |
3302 | if (vect_debug_details (loop)) | |
3303 | fprintf (dump_file, "not vectorized: unexpected pattern."); | |
3304 | destroy_loop_vec_info (loop_vinfo); | |
3305 | return NULL; | |
3306 | } | |
3307 | ||
3308 | ||
3309 | /* Check that all cross-iteration scalar data-flow cycles are OK. | |
3310 | Cross-iteration cycles caused by virtual phis are analyzed separately. */ | |
3311 | ||
3312 | ok = vect_analyze_scalar_cycles (loop_vinfo); | |
3313 | if (!ok) | |
3314 | { | |
3315 | if (vect_debug_details (loop)) | |
3316 | fprintf (dump_file, "bad scalar cycle."); | |
3317 | destroy_loop_vec_info (loop_vinfo); | |
3318 | return NULL; | |
3319 | } | |
3320 | ||
3321 | ||
3322 | /* Analyze data dependences between the data-refs in the loop. | |
3323 | FORNOW: fail at the first data dependence that we encounter. */ | |
3324 | ||
3325 | ok = vect_analyze_data_ref_dependences (loop_vinfo); | |
3326 | if (!ok) | |
3327 | { | |
3328 | if (vect_debug_details (loop)) | |
3329 | fprintf (dump_file, "bad data dependence."); | |
3330 | destroy_loop_vec_info (loop_vinfo); | |
3331 | return NULL; | |
3332 | } | |
3333 | ||
3334 | ||
3335 | /* Analyze the access patterns of the data-refs in the loop (consecutive, | |
3336 | complex, etc.). FORNOW: Only handle consecutive access pattern. */ | |
3337 | ||
3338 | ok = vect_analyze_data_ref_accesses (loop_vinfo); | |
3339 | if (!ok) | |
3340 | { | |
3341 | if (vect_debug_details (loop)) | |
3342 | fprintf (dump_file, "bad data access."); | |
3343 | destroy_loop_vec_info (loop_vinfo); | |
3344 | return NULL; | |
3345 | } | |
3346 | ||
3347 | ||
3348 | /* Analyze the alignment of the data-refs in the loop. | |
3349 | FORNOW: Only aligned accesses are handled. */ | |
3350 | ||
3351 | ok = vect_analyze_data_refs_alignment (loop_vinfo); | |
3352 | if (!ok) | |
3353 | { | |
3354 | if (vect_debug_details (loop)) | |
3355 | fprintf (dump_file, "bad data alignment."); | |
3356 | destroy_loop_vec_info (loop_vinfo); | |
3357 | return NULL; | |
3358 | } | |
3359 | ||
3360 | ||
3361 | /* Scan all the operations in the loop and make sure they are | |
3362 | vectorizable. */ | |
3363 | ||
3364 | ok = vect_analyze_operations (loop_vinfo); | |
3365 | if (!ok) | |
3366 | { | |
3367 | if (vect_debug_details (loop)) | |
3368 | fprintf (dump_file, "bad operation or unsupported loop bound."); | |
3369 | destroy_loop_vec_info (loop_vinfo); | |
3370 | return NULL; | |
3371 | } | |
3372 | ||
3373 | LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; | |
3374 | ||
3375 | return loop_vinfo; | |
3376 | } | |
3377 | ||
3378 | ||
3379 | /* Function need_imm_uses_for. | |
3380 | ||
3381 | Return whether we ought to include information for 'var' | |
3382 | when calculating immediate uses. For this pass we only want use | |
3383 | information for non-virtual variables. */ | |
3384 | ||
3385 | static bool | |
3386 | need_imm_uses_for (tree var) | |
3387 | { | |
3388 | return is_gimple_reg (var); | |
3389 | } | |
3390 | ||
3391 | ||
3392 | /* Function vectorize_loops. | |
3393 | ||
3394 | Entry Point to loop vectorization phase. */ | |
3395 | ||
3396 | void | |
3397 | vectorize_loops (struct loops *loops) | |
3398 | { | |
3399 | unsigned int i, loops_num; | |
3400 | unsigned int num_vectorized_loops = 0; | |
3401 | ||
3402 | /* Does the target support SIMD? */ | |
3403 | /* FORNOW: until more sophisticated machine modelling is in place. */ | |
3404 | if (!UNITS_PER_SIMD_WORD) | |
3405 | { | |
3406 | if (vect_debug_details (NULL)) | |
3407 | fprintf (dump_file, "vectorizer: target vector size is not defined."); | |
3408 | return; | |
3409 | } | |
3410 | ||
3411 | compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for); | |
3412 | ||
3413 | /* ----------- Analyze loops. ----------- */ | |
3414 | ||
3415 | /* If some loop was duplicated, it gets bigger number | |
3416 | than all previously defined loops. This fact allows us to run | |
3417 | only over initial loops skipping newly generated ones. */ | |
3418 | loops_num = loops->num; | |
3419 | for (i = 1; i < loops_num; i++) | |
3420 | { | |
3421 | loop_vec_info loop_vinfo; | |
3422 | struct loop *loop = loops->parray[i]; | |
3423 | ||
3424 | if (!loop) | |
3425 | continue; | |
3426 | ||
79fe1b3b DN |
3427 | loop_vinfo = vect_analyze_loop (loop); |
3428 | loop->aux = loop_vinfo; | |
3429 | ||
3430 | if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) | |
3431 | continue; | |
3432 | ||
3433 | vect_transform_loop (loop_vinfo, loops); | |
3434 | num_vectorized_loops++; | |
3435 | } | |
3436 | ||
3437 | if (vect_debug_stats (NULL) || vect_debug_details (NULL)) | |
3438 | fprintf (dump_file, "\nvectorized %u loops in function.\n", | |
3439 | num_vectorized_loops); | |
3440 | ||
3441 | /* ----------- Finalize. ----------- */ | |
3442 | ||
3443 | free_df (); | |
3444 | for (i = 1; i < loops_num; i++) | |
3445 | { | |
3446 | struct loop *loop = loops->parray[i]; | |
3447 | loop_vec_info loop_vinfo = loop->aux; | |
3448 | if (!loop) | |
3449 | continue; | |
3450 | destroy_loop_vec_info (loop_vinfo); | |
3451 | loop->aux = NULL; | |
3452 | } | |
3453 | ||
3454 | loop_commit_inserts (); | |
3455 | rewrite_into_ssa (false); | |
3456 | if (bitmap_first_set_bit (vars_to_rename) >= 0) | |
3457 | { | |
3458 | /* The rewrite of ssa names may cause violation of loop closed ssa | |
3459 | form invariants. TODO -- avoid these rewrites completely. | |
3460 | Information in virtual phi nodes is sufficient for it. */ | |
3461 | rewrite_into_loop_closed_ssa (); | |
3462 | } | |
3463 | bitmap_clear (vars_to_rename); | |
3464 | } |