]>
Commit | Line | Data |
---|---|---|
79fe1b3b DN |
1 | /* Loop Vectorization |
2 | Copyright (C) 2003, 2004 Free Software Foundation, Inc. | |
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 2, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
19 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
20 | 02111-1307, USA. */ | |
21 | ||
22 | /* Loop Vectorization Pass. | |
23 | ||
24 | This pass tries to vectorize loops. This first implementation focuses on | |
25 | simple inner-most loops, with no conditional control flow, and a set of | |
26 | simple operations which vector form can be expressed using existing | |
27 | tree codes (PLUS, MULT etc). | |
28 | ||
29 | For example, the vectorizer transforms the following simple loop: | |
30 | ||
31 | short a[N]; short b[N]; short c[N]; int i; | |
32 | ||
33 | for (i=0; i<N; i++){ | |
34 | a[i] = b[i] + c[i]; | |
35 | } | |
36 | ||
37 | as if it was manually vectorized by rewriting the source code into: | |
38 | ||
39 | typedef int __attribute__((mode(V8HI))) v8hi; | |
40 | short a[N]; short b[N]; short c[N]; int i; | |
41 | v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c; | |
42 | v8hi va, vb, vc; | |
43 | ||
44 | for (i=0; i<N/8; i++){ | |
45 | vb = pb[i]; | |
46 | vc = pc[i]; | |
47 | va = vb + vc; | |
48 | pa[i] = va; | |
49 | } | |
50 | ||
51 | The main entry to this pass is vectorize_loops(), in which | |
52 | the vectorizer applies a set of analyses on a given set of loops, | |
53 | followed by the actual vectorization transformation for the loops that | |
54 | had successfully passed the analysis phase. | |
55 | ||
56 | Throughout this pass we make a distinction between two types of | |
57 | data: scalars (which are represented by SSA_NAMES), and memory references | |
58 | ("data-refs"). These two types of data require different handling both | |
59 | during analysis and transformation. The types of data-refs that the | |
6775f1f3 IR |
60 | vectorizer currently supports are ARRAY_REFS which base is an array DECL |
61 | (not a pointer), and INDIRECT_REFS through pointers; both array and pointer | |
62 | accesses are required to have a simple (consecutive) access pattern. | |
79fe1b3b DN |
63 | |
64 | Analysis phase: | |
65 | =============== | |
66 | The driver for the analysis phase is vect_analyze_loop_nest(). | |
67 | It applies a set of analyses, some of which rely on the scalar evolution | |
68 | analyzer (scev) developed by Sebastian Pop. | |
69 | ||
70 | During the analysis phase the vectorizer records some information | |
71 | per stmt in a "stmt_vec_info" struct which is attached to each stmt in the | |
72 | loop, as well as general information about the loop as a whole, which is | |
73 | recorded in a "loop_vec_info" struct attached to each loop. | |
74 | ||
75 | Transformation phase: | |
76 | ===================== | |
77 | The loop transformation phase scans all the stmts in the loop, and | |
78 | creates a vector stmt (or a sequence of stmts) for each scalar stmt S in | |
79 | the loop that needs to be vectorized. It insert the vector code sequence | |
80 | just before the scalar stmt S, and records a pointer to the vector code | |
81 | in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct | |
82 | attached to S). This pointer will be used for the vectorization of following | |
83 | stmts which use the def of stmt S. Stmt S is removed if it writes to memory; | |
84 | otherwise, we rely on dead code elimination for removing it. | |
85 | ||
86 | For example, say stmt S1 was vectorized into stmt VS1: | |
87 | ||
88 | VS1: vb = px[i]; | |
89 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |
90 | S2: a = b; | |
91 | ||
92 | To vectorize stmt S2, the vectorizer first finds the stmt that defines | |
93 | the operand 'b' (S1), and gets the relevant vector def 'vb' from the | |
94 | vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The | |
95 | resulting sequence would be: | |
96 | ||
97 | VS1: vb = px[i]; | |
98 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |
99 | VS2: va = vb; | |
100 | S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2 | |
101 | ||
102 | Operands that are not SSA_NAMEs, are data-refs that appear in | |
103 | load/store operations (like 'x[i]' in S1), and are handled differently. | |
104 | ||
105 | Target modeling: | |
106 | ================= | |
107 | Currently the only target specific information that is used is the | |
108 | size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can | |
109 | support different sizes of vectors, for now will need to specify one value | |
110 | for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future. | |
111 | ||
112 | Since we only vectorize operations which vector form can be | |
113 | expressed using existing tree codes, to verify that an operation is | |
114 | supported, the vectorizer checks the relevant optab at the relevant | |
115 | machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If | |
116 | the value found is CODE_FOR_nothing, then there's no target support, and | |
117 | we can't vectorize the stmt. | |
118 | ||
119 | For additional information on this project see: | |
120 | http://gcc.gnu.org/projects/tree-ssa/vectorization.html | |
121 | */ | |
122 | ||
123 | #include "config.h" | |
124 | #include "system.h" | |
125 | #include "coretypes.h" | |
126 | #include "tm.h" | |
127 | #include "errors.h" | |
128 | #include "ggc.h" | |
129 | #include "tree.h" | |
130 | #include "target.h" | |
131 | ||
132 | #include "rtl.h" | |
133 | #include "basic-block.h" | |
134 | #include "diagnostic.h" | |
135 | #include "tree-flow.h" | |
136 | #include "tree-dump.h" | |
137 | #include "timevar.h" | |
138 | #include "cfgloop.h" | |
139 | #include "cfglayout.h" | |
140 | #include "expr.h" | |
141 | #include "optabs.h" | |
a023975e | 142 | #include "toplev.h" |
79fe1b3b DN |
143 | #include "tree-chrec.h" |
144 | #include "tree-data-ref.h" | |
145 | #include "tree-scalar-evolution.h" | |
146 | #include "tree-vectorizer.h" | |
147 | #include "tree-pass.h" | |
148 | ||
149 | /* Main analysis functions. */ | |
150 | static loop_vec_info vect_analyze_loop (struct loop *); | |
151 | static loop_vec_info vect_analyze_loop_form (struct loop *); | |
152 | static bool vect_analyze_data_refs (loop_vec_info); | |
153 | static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); | |
154 | static bool vect_analyze_scalar_cycles (loop_vec_info); | |
155 | static bool vect_analyze_data_ref_accesses (loop_vec_info); | |
156 | static bool vect_analyze_data_refs_alignment (loop_vec_info); | |
157 | static void vect_compute_data_refs_alignment (loop_vec_info); | |
158 | static bool vect_analyze_operations (loop_vec_info); | |
159 | ||
160 | /* Main code transformation functions. */ | |
161 | static void vect_transform_loop (loop_vec_info, struct loops *); | |
a023975e | 162 | static void vect_transform_loop_bound (loop_vec_info, tree niters); |
79fe1b3b DN |
163 | static bool vect_transform_stmt (tree, block_stmt_iterator *); |
164 | static bool vectorizable_load (tree, block_stmt_iterator *, tree *); | |
165 | static bool vectorizable_store (tree, block_stmt_iterator *, tree *); | |
166 | static bool vectorizable_operation (tree, block_stmt_iterator *, tree *); | |
167 | static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); | |
168 | static void vect_align_data_ref (tree); | |
169 | static void vect_enhance_data_refs_alignment (loop_vec_info); | |
170 | ||
171 | /* Utility functions for the analyses. */ | |
172 | static bool vect_is_simple_use (tree , struct loop *, tree *); | |
173 | static bool exist_non_indexing_operands_for_use_p (tree, tree); | |
174 | static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool); | |
175 | static void vect_mark_relevant (varray_type, tree); | |
176 | static bool vect_stmt_relevant_p (tree, loop_vec_info); | |
a023975e | 177 | static tree vect_get_loop_niters (struct loop *, tree *); |
6775f1f3 | 178 | static bool vect_compute_data_ref_alignment |
79fe1b3b DN |
179 | (struct data_reference *, loop_vec_info); |
180 | static bool vect_analyze_data_ref_access (struct data_reference *); | |
181 | static bool vect_get_first_index (tree, tree *); | |
182 | static bool vect_can_force_dr_alignment_p (tree, unsigned int); | |
7ccf35ed DN |
183 | static struct data_reference * vect_analyze_pointer_ref_access |
184 | (tree, tree, bool); | |
a023975e OG |
185 | static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters, |
186 | struct loop *loop); | |
6775f1f3 IR |
187 | static tree vect_get_base_and_bit_offset |
188 | (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*); | |
189 | static struct data_reference * vect_analyze_pointer_ref_access | |
190 | (tree, tree, bool); | |
191 | static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *); | |
192 | static tree vect_compute_array_ref_alignment | |
193 | (struct data_reference *, loop_vec_info, tree, tree *); | |
194 | static tree vect_get_ptr_offset (tree, tree, tree *); | |
195 | static tree vect_get_symbl_and_dr | |
196 | (tree, tree, bool, loop_vec_info, struct data_reference **); | |
79fe1b3b DN |
197 | |
198 | /* Utility functions for the code transformation. */ | |
199 | static tree vect_create_destination_var (tree, tree); | |
7ccf35ed DN |
200 | static tree vect_create_data_ref_ptr |
201 | (tree, block_stmt_iterator *, tree, tree *, bool); | |
202 | static tree vect_create_index_for_vector_ref | |
203 | (struct loop *, block_stmt_iterator *); | |
204 | static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); | |
79fe1b3b DN |
205 | static tree get_vectype_for_scalar_type (tree); |
206 | static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); | |
207 | static tree vect_get_vec_def_for_operand (tree, tree); | |
208 | static tree vect_init_vector (tree, tree); | |
a023975e | 209 | static tree vect_build_symbol_bound (tree, int, struct loop *); |
79fe1b3b DN |
210 | static void vect_finish_stmt_generation |
211 | (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); | |
212 | ||
a023975e OG |
213 | static void vect_generate_tmps_on_preheader (loop_vec_info, |
214 | tree *, tree *, | |
215 | tree *); | |
216 | static tree vect_build_loop_niters (loop_vec_info); | |
217 | static void vect_update_ivs_after_vectorizer (struct loop *, tree); | |
218 | ||
ed8b7558 | 219 | /* Loop transformations prior to vectorization. */ |
a023975e OG |
220 | |
221 | /* Loop transformations entry point function. | |
222 | It can be used outside of the vectorizer | |
223 | in case the loop to be manipulated answers conditions specified | |
224 | in function documentation. */ | |
225 | struct loop *tree_duplicate_loop_to_edge (struct loop *, | |
226 | struct loops *, edge, | |
227 | tree, tree, bool); | |
228 | ||
229 | static void allocate_new_names (bitmap); | |
230 | static void rename_use_op (use_operand_p); | |
231 | static void rename_def_op (def_operand_p, tree); | |
232 | static void rename_variables_in_bb (basic_block); | |
233 | static void free_new_names (bitmap); | |
234 | static void rename_variables_in_loop (struct loop *); | |
235 | static void copy_phi_nodes (struct loop *, struct loop *, bool); | |
236 | static void update_phis_for_duplicate_loop (struct loop *, | |
237 | struct loop *, | |
238 | bool after); | |
239 | static void update_phi_nodes_for_guard (edge, struct loop *); | |
240 | static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree); | |
241 | static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *, | |
242 | struct loops *, | |
243 | edge); | |
244 | static edge add_loop_guard (basic_block, tree, basic_block); | |
245 | static bool verify_loop_for_duplication (struct loop *, bool, edge); | |
246 | ||
247 | /* Utilities dealing with loop peeling (not peeling itself). */ | |
248 | static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); | |
249 | static void vect_update_niters_after_peeling (loop_vec_info, tree); | |
250 | static void vect_update_inits_of_dr (struct data_reference *, struct loop *, | |
251 | tree niters); | |
252 | static void vect_update_inits_of_drs (loop_vec_info, tree); | |
253 | static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *); | |
254 | ||
79fe1b3b DN |
255 | /* Utilities for creation and deletion of vec_info structs. */ |
256 | loop_vec_info new_loop_vec_info (struct loop *loop); | |
257 | void destroy_loop_vec_info (loop_vec_info); | |
258 | stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop); | |
259 | ||
a023975e OG |
260 | static bool vect_debug_stats (struct loop *loop); |
261 | static bool vect_debug_details (struct loop *loop); | |
262 | ||
263 | \f | |
264 | /* Utilities to support loop peeling for vectorization purposes. */ | |
265 | ||
266 | ||
267 | /* For each definition in DEFINITIONS this function allocates | |
268 | new ssa name. */ | |
269 | ||
270 | static void | |
271 | allocate_new_names (bitmap definitions) | |
272 | { | |
273 | unsigned ver; | |
274 | bitmap_iterator bi; | |
275 | ||
276 | EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi) | |
277 | { | |
278 | tree def = ssa_name (ver); | |
279 | tree *new_name_ptr = xmalloc (sizeof (tree)); | |
280 | ||
281 | bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def); | |
282 | ||
283 | *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def)); | |
284 | SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal; | |
285 | ||
286 | SSA_NAME_AUX (def) = new_name_ptr; | |
287 | } | |
288 | } | |
289 | ||
290 | ||
291 | /* Renames the use *OP_P. */ | |
292 | ||
293 | static void | |
294 | rename_use_op (use_operand_p op_p) | |
295 | { | |
296 | tree *new_name_ptr; | |
297 | ||
298 | if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME) | |
299 | return; | |
300 | ||
301 | new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p)); | |
302 | ||
303 | /* Something defined outside of the loop. */ | |
304 | if (!new_name_ptr) | |
305 | return; | |
306 | ||
307 | /* An ordinary ssa name defined in the loop. */ | |
308 | ||
309 | SET_USE (op_p, *new_name_ptr); | |
310 | } | |
311 | ||
312 | ||
313 | /* Renames the def *OP_P in statement STMT. */ | |
314 | ||
315 | static void | |
316 | rename_def_op (def_operand_p op_p, tree stmt) | |
317 | { | |
318 | tree *new_name_ptr; | |
319 | ||
320 | if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME) | |
321 | return; | |
322 | ||
323 | new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p)); | |
324 | ||
325 | /* Something defined outside of the loop. */ | |
326 | if (!new_name_ptr) | |
327 | return; | |
328 | ||
329 | /* An ordinary ssa name defined in the loop. */ | |
330 | ||
331 | SET_DEF (op_p, *new_name_ptr); | |
332 | SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt; | |
333 | } | |
334 | ||
335 | ||
336 | /* Renames the variables in basic block BB. */ | |
337 | ||
338 | static void | |
339 | rename_variables_in_bb (basic_block bb) | |
340 | { | |
341 | tree phi; | |
342 | block_stmt_iterator bsi; | |
343 | tree stmt; | |
344 | stmt_ann_t ann; | |
345 | use_optype uses; | |
346 | vuse_optype vuses; | |
347 | def_optype defs; | |
348 | v_may_def_optype v_may_defs; | |
349 | v_must_def_optype v_must_defs; | |
350 | unsigned i; | |
351 | edge e; | |
352 | edge_iterator ei; | |
353 | ||
354 | for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) | |
355 | rename_def_op (PHI_RESULT_PTR (phi), phi); | |
356 | ||
357 | for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) | |
358 | { | |
359 | stmt = bsi_stmt (bsi); | |
360 | get_stmt_operands (stmt); | |
361 | ann = stmt_ann (stmt); | |
362 | ||
363 | uses = USE_OPS (ann); | |
364 | for (i = 0; i < NUM_USES (uses); i++) | |
365 | rename_use_op (USE_OP_PTR (uses, i)); | |
366 | ||
367 | defs = DEF_OPS (ann); | |
368 | for (i = 0; i < NUM_DEFS (defs); i++) | |
369 | rename_def_op (DEF_OP_PTR (defs, i), stmt); | |
370 | ||
371 | vuses = VUSE_OPS (ann); | |
372 | for (i = 0; i < NUM_VUSES (vuses); i++) | |
373 | rename_use_op (VUSE_OP_PTR (vuses, i)); | |
374 | ||
375 | v_may_defs = V_MAY_DEF_OPS (ann); | |
376 | for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++) | |
377 | { | |
378 | rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i)); | |
379 | rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt); | |
380 | } | |
381 | ||
382 | v_must_defs = V_MUST_DEF_OPS (ann); | |
383 | for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++) | |
384 | rename_def_op (V_MUST_DEF_OP_PTR (v_must_defs, i), stmt); | |
385 | } | |
386 | ||
387 | FOR_EACH_EDGE (e, ei, bb->succs) | |
388 | for (phi = phi_nodes (e->dest); phi; phi = TREE_CHAIN (phi)) | |
389 | rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e)); | |
390 | } | |
391 | ||
392 | ||
393 | /* Releases the structures holding the new ssa names. */ | |
394 | ||
395 | static void | |
396 | free_new_names (bitmap definitions) | |
397 | { | |
398 | unsigned ver; | |
399 | bitmap_iterator bi; | |
400 | ||
401 | EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi) | |
402 | { | |
403 | tree def = ssa_name (ver); | |
404 | ||
405 | if (SSA_NAME_AUX (def)) | |
406 | { | |
407 | free (SSA_NAME_AUX (def)); | |
408 | SSA_NAME_AUX (def) = NULL; | |
409 | } | |
410 | } | |
411 | } | |
412 | ||
413 | ||
414 | /* Renames variables in new generated LOOP. */ | |
415 | ||
416 | static void | |
417 | rename_variables_in_loop (struct loop *loop) | |
418 | { | |
419 | unsigned i; | |
420 | basic_block *bbs; | |
421 | ||
422 | bbs = get_loop_body (loop); | |
423 | ||
424 | for (i = 0; i < loop->num_nodes; i++) | |
425 | rename_variables_in_bb (bbs[i]); | |
426 | ||
427 | free (bbs); | |
428 | } | |
429 | ||
430 | ||
431 | /* This function copies phis from LOOP header to | |
432 | NEW_LOOP header. AFTER is as | |
433 | in update_phis_for_duplicate_loop function. */ | |
434 | ||
435 | static void | |
436 | copy_phi_nodes (struct loop *loop, struct loop *new_loop, | |
437 | bool after) | |
438 | { | |
439 | tree phi, new_phi, def; | |
440 | edge new_e; | |
441 | edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop)); | |
442 | ||
443 | /* Second add arguments to newly created phi nodes. */ | |
444 | for (phi = phi_nodes (loop->header), | |
445 | new_phi = phi_nodes (new_loop->header); | |
446 | phi; | |
447 | phi = TREE_CHAIN (phi), | |
448 | new_phi = TREE_CHAIN (new_phi)) | |
449 | { | |
450 | new_e = loop_preheader_edge (new_loop); | |
451 | def = PHI_ARG_DEF_FROM_EDGE (phi, e); | |
452 | add_phi_arg (&new_phi, def, new_e); | |
453 | } | |
454 | } | |
455 | ||
456 | ||
457 | /* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP | |
458 | executes after LOOP, and false if it executes before it. */ | |
459 | ||
460 | static void | |
461 | update_phis_for_duplicate_loop (struct loop *loop, | |
462 | struct loop *new_loop, bool after) | |
463 | { | |
464 | edge old_latch; | |
465 | tree *new_name_ptr, new_ssa_name; | |
466 | tree phi_new, phi_old, def; | |
467 | edge orig_entry_e = loop_preheader_edge (loop); | |
468 | ||
469 | /* Copy phis from loop->header to new_loop->header. */ | |
470 | copy_phi_nodes (loop, new_loop, after); | |
471 | ||
472 | old_latch = loop_latch_edge (loop); | |
473 | ||
474 | /* Update PHI args for the new loop latch edge, and | |
475 | the old loop preheader edge, we know that the PHI nodes | |
476 | are ordered appropriately in copy_phi_nodes. */ | |
477 | for (phi_new = phi_nodes (new_loop->header), | |
478 | phi_old = phi_nodes (loop->header); | |
479 | phi_new && phi_old; | |
480 | phi_new = TREE_CHAIN (phi_new), phi_old = TREE_CHAIN (phi_old)) | |
481 | { | |
482 | def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch); | |
483 | ||
484 | if (TREE_CODE (def) != SSA_NAME) | |
485 | continue; | |
486 | ||
487 | new_name_ptr = SSA_NAME_AUX (def); | |
488 | ||
489 | /* Something defined outside of the loop. */ | |
490 | if (!new_name_ptr) | |
491 | continue; | |
492 | ||
493 | /* An ordinary ssa name defined in the loop. */ | |
494 | new_ssa_name = *new_name_ptr; | |
495 | ||
496 | add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop)); | |
497 | ||
498 | /* Update PHI args for the original loop pre-header edge. */ | |
499 | if (! after) | |
500 | SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e), | |
501 | new_ssa_name); | |
502 | } | |
503 | } | |
504 | ||
505 | ||
506 | /* Update PHI nodes for a guard of the LOOP. | |
507 | ||
508 | LOOP is supposed to have a preheader bb at which a guard condition is | |
509 | located. The true edge of this condition skips the LOOP and ends | |
510 | at the destination of the (unique) LOOP exit. The loop exit bb is supposed | |
511 | to be an empty bb (created by this transformation) with one successor. | |
512 | ||
513 | This function creates phi nodes at the LOOP exit bb. These phis need to be | |
514 | created as a result of adding true edge coming from guard. | |
515 | ||
516 | FORNOW: Only phis which have corresponding phi nodes at the header of the | |
517 | LOOP are created. Here we use the assumption that after the LOOP there | |
518 | are no uses of defs generated in LOOP. | |
519 | ||
520 | After the phis creation, the function updates the values of phi nodes at | |
521 | the LOOP exit successor bb: | |
522 | ||
523 | Original loop: | |
524 | ||
525 | bb0: loop preheader | |
526 | goto bb1 | |
527 | bb1: loop header | |
528 | if (exit_cond) goto bb3 else goto bb2 | |
529 | bb2: loop latch | |
530 | goto bb1 | |
531 | bb3: | |
532 | ||
533 | ||
534 | After guard creation (the loop before this function): | |
535 | ||
536 | bb0: loop preheader | |
537 | if (guard_condition) goto bb4 else goto bb1 | |
538 | bb1: loop header | |
539 | if (exit_cond) goto bb4 else goto bb2 | |
540 | bb2: loop latch | |
541 | goto bb1 | |
542 | bb4: loop exit | |
543 | (new empty bb) | |
544 | goto bb3 | |
545 | bb3: | |
546 | ||
547 | This function updates the phi nodes in bb4 and in bb3, to account for the | |
548 | new edge from bb0 to bb4. */ | |
549 | ||
550 | static void | |
551 | update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop) | |
552 | { | |
553 | tree phi, phi1; | |
554 | ||
555 | for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi)) | |
556 | { | |
557 | tree new_phi; | |
558 | tree phi_arg; | |
559 | ||
560 | /* Generate new phi node. */ | |
561 | new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), | |
562 | loop->exit_edges[0]->dest); | |
563 | ||
564 | /* Add argument coming from guard true edge. */ | |
565 | phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]); | |
566 | add_phi_arg (&new_phi, phi_arg, guard_true_edge); | |
567 | ||
568 | /* Add argument coming from loop exit edge. */ | |
569 | phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)); | |
570 | add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]); | |
571 | ||
572 | /* Update all phi nodes at the loop exit successor. */ | |
573 | for (phi1 = phi_nodes (EDGE_SUCC (loop->exit_edges[0]->dest, 0)->dest); | |
574 | phi1; | |
575 | phi1 = TREE_CHAIN (phi1)) | |
576 | { | |
577 | tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, | |
578 | EDGE_SUCC (loop->exit_edges[0]->dest, 0)); | |
579 | if (old_arg == phi_arg) | |
580 | { | |
581 | edge e = EDGE_SUCC (loop->exit_edges[0]->dest, 0); | |
582 | ||
583 | SET_PHI_ARG_DEF (phi1, | |
584 | phi_arg_from_edge (phi1, e), | |
585 | PHI_RESULT (new_phi)); | |
586 | } | |
587 | } | |
588 | } | |
589 | } | |
590 | ||
591 | ||
592 | /* Make the LOOP iterate NITERS times. This is done by adding a new IV | |
593 | that starts at zero, increases by one and its limit is NITERS. */ | |
594 | ||
595 | static void | |
596 | make_loop_iterate_ntimes (struct loop *loop, tree niters, | |
597 | tree begin_label, tree exit_label) | |
598 | { | |
599 | tree indx_before_incr, indx_after_incr, cond_stmt, cond; | |
600 | tree orig_cond; | |
601 | edge exit_edge = loop->exit_edges[0]; | |
602 | block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src); | |
603 | ||
604 | /* Flow loop scan does not update loop->single_exit field. */ | |
605 | loop->single_exit = loop->exit_edges[0]; | |
606 | orig_cond = get_loop_exit_condition (loop); | |
607 | gcc_assert (orig_cond); | |
608 | create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, | |
609 | &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr); | |
610 | ||
611 | /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get | |
612 | back to the exit condition statement. */ | |
613 | bsi_next (&loop_exit_bsi); | |
614 | gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond); | |
615 | ||
616 | ||
617 | if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ | |
618 | cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters); | |
619 | else /* 'then' edge loops back. */ | |
620 | cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters); | |
621 | ||
622 | begin_label = build1 (GOTO_EXPR, void_type_node, begin_label); | |
623 | exit_label = build1 (GOTO_EXPR, void_type_node, exit_label); | |
624 | cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond, | |
625 | begin_label, exit_label); | |
626 | bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT); | |
627 | ||
628 | /* Remove old loop exit test: */ | |
629 | bsi_remove (&loop_exit_bsi); | |
630 | ||
631 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
632 | print_generic_expr (dump_file, cond_stmt, TDF_SLIM); | |
633 | } | |
634 | ||
635 | ||
636 | /* Given LOOP this function generates a new copy of it and puts it | |
637 | on E which is either the entry or exit of LOOP. */ | |
638 | ||
639 | static struct loop * | |
640 | tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, | |
641 | edge e) | |
642 | { | |
643 | struct loop *new_loop; | |
644 | basic_block *new_bbs, *bbs; | |
645 | bool at_exit; | |
646 | bool was_imm_dom; | |
647 | basic_block exit_dest; | |
648 | tree phi, phi_arg; | |
649 | ||
650 | at_exit = (e == loop->exit_edges[0]); | |
651 | if (!at_exit && e != loop_preheader_edge (loop)) | |
652 | { | |
653 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
654 | fprintf (dump_file, | |
655 | "Edge is not an entry nor an exit edge.\n"); | |
656 | return NULL; | |
657 | } | |
658 | ||
659 | bbs = get_loop_body (loop); | |
660 | ||
661 | /* Check whether duplication is possible. */ | |
662 | if (!can_copy_bbs_p (bbs, loop->num_nodes)) | |
663 | { | |
664 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
665 | fprintf (dump_file, | |
666 | "Cannot copy basic blocks.\n"); | |
667 | free (bbs); | |
668 | return NULL; | |
669 | } | |
670 | ||
671 | /* Generate new loop structure. */ | |
672 | new_loop = duplicate_loop (loops, loop, loop->outer); | |
673 | if (!new_loop) | |
674 | { | |
675 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
676 | fprintf (dump_file, | |
677 | "The duplicate_loop returns NULL.\n"); | |
678 | free (bbs); | |
679 | return NULL; | |
680 | } | |
681 | ||
682 | exit_dest = loop->exit_edges[0]->dest; | |
683 | was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, | |
684 | exit_dest) == loop->header ? | |
685 | true : false); | |
686 | ||
687 | new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes); | |
688 | ||
689 | copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL); | |
690 | ||
691 | /* Duplicating phi args at exit bbs as coming | |
692 | also from exit of duplicated loop. */ | |
693 | for (phi = phi_nodes (exit_dest); phi; phi = TREE_CHAIN (phi)) | |
694 | { | |
695 | phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]); | |
696 | if (phi_arg) | |
697 | { | |
698 | edge new_loop_exit_edge; | |
699 | ||
700 | if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch) | |
701 | new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1); | |
702 | else | |
703 | new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0); | |
704 | ||
705 | add_phi_arg (&phi, phi_arg, new_loop_exit_edge); | |
706 | } | |
707 | } | |
708 | ||
709 | if (at_exit) /* Add the loop copy at exit. */ | |
710 | { | |
711 | redirect_edge_and_branch_force (e, new_loop->header); | |
712 | set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src); | |
713 | if (was_imm_dom) | |
714 | set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header); | |
715 | } | |
716 | else /* Add the copy at entry. */ | |
717 | { | |
718 | edge new_exit_e; | |
719 | edge entry_e = loop_preheader_edge (loop); | |
720 | basic_block preheader = entry_e->src; | |
721 | ||
722 | if (!flow_bb_inside_loop_p (new_loop, | |
723 | EDGE_SUCC (new_loop->header, 0)->dest)) | |
724 | new_exit_e = EDGE_SUCC (new_loop->header, 0); | |
725 | else | |
726 | new_exit_e = EDGE_SUCC (new_loop->header, 1); | |
727 | ||
728 | redirect_edge_and_branch_force (new_exit_e, loop->header); | |
729 | set_immediate_dominator (CDI_DOMINATORS, loop->header, | |
730 | new_exit_e->src); | |
731 | ||
732 | /* We have to add phi args to the loop->header here as coming | |
733 | from new_exit_e edge. */ | |
734 | for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi)) | |
735 | { | |
736 | phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e); | |
737 | if (phi_arg) | |
738 | add_phi_arg (&phi, phi_arg, new_exit_e); | |
739 | } | |
740 | ||
741 | redirect_edge_and_branch_force (entry_e, new_loop->header); | |
742 | set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader); | |
743 | } | |
744 | ||
745 | flow_loop_scan (new_loop, LOOP_ALL); | |
746 | flow_loop_scan (loop, LOOP_ALL); | |
747 | free (new_bbs); | |
748 | free (bbs); | |
749 | ||
750 | return new_loop; | |
751 | } | |
752 | ||
753 | ||
754 | /* Given the condition statement COND, put it as the last statement | |
755 | of GUARD_BB; EXIT_BB is the basic block to skip the loop; | |
756 | Assumes that this is the single exit of the guarded loop. | |
757 | Returns the skip edge. */ | |
758 | ||
759 | static edge | |
760 | add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb) | |
761 | { | |
762 | block_stmt_iterator bsi; | |
763 | edge new_e, enter_e; | |
764 | tree cond_stmt, then_label, else_label; | |
765 | ||
766 | enter_e = EDGE_SUCC (guard_bb, 0); | |
767 | enter_e->flags &= ~EDGE_FALLTHRU; | |
768 | enter_e->flags |= EDGE_FALSE_VALUE; | |
769 | bsi = bsi_last (guard_bb); | |
770 | ||
771 | then_label = build1 (GOTO_EXPR, void_type_node, | |
772 | tree_block_label (exit_bb)); | |
773 | else_label = build1 (GOTO_EXPR, void_type_node, | |
774 | tree_block_label (enter_e->dest)); | |
775 | cond_stmt = build (COND_EXPR, void_type_node, cond, | |
776 | then_label, else_label); | |
777 | bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); | |
778 | /* Add new edge to connect entry block to the second loop. */ | |
779 | new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); | |
780 | set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb); | |
781 | return new_e; | |
782 | } | |
783 | ||
784 | ||
785 | /* This function verifies that certain restrictions apply to LOOP. */ | |
786 | ||
787 | static bool | |
788 | verify_loop_for_duplication (struct loop *loop, | |
789 | bool update_first_loop_count, edge e) | |
790 | { | |
791 | edge exit_e = loop->exit_edges [0]; | |
792 | edge entry_e = loop_preheader_edge (loop); | |
793 | ||
794 | /* We duplicate only innermost loops. */ | |
795 | if (loop->inner) | |
796 | { | |
797 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
798 | fprintf (dump_file, | |
799 | "Loop duplication failed. Loop is not innermost.\n"); | |
800 | return false; | |
801 | } | |
802 | ||
803 | /* Only loops with 1 exit. */ | |
804 | if (loop->num_exits != 1) | |
805 | { | |
806 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
807 | fprintf (dump_file, | |
808 | "More than one exit from loop.\n"); | |
809 | return false; | |
810 | } | |
811 | ||
812 | /* Only loops with 1 entry. */ | |
813 | if (loop->num_entries != 1) | |
814 | { | |
815 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
816 | fprintf (dump_file, | |
817 | "More than one exit from loop.\n"); | |
818 | return false; | |
819 | } | |
820 | ||
821 | /* All loops has outers, the only case loop->outer is NULL is for | |
822 | the function itself. */ | |
823 | if (!loop->outer) | |
824 | { | |
825 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
826 | fprintf (dump_file, | |
827 | "Loop is outer-most loop.\n"); | |
828 | return false; | |
829 | } | |
830 | ||
831 | /* Verify that new IV can be created and loop condition | |
832 | can be changed to make first loop iterate first_niters times. */ | |
833 | if (!update_first_loop_count) | |
834 | { | |
835 | tree orig_cond = get_loop_exit_condition (loop); | |
836 | block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src); | |
837 | ||
838 | if (!orig_cond) | |
839 | { | |
840 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
841 | fprintf (dump_file, | |
842 | "Loop has no exit condition.\n"); | |
843 | return false; | |
844 | } | |
845 | if (orig_cond != bsi_stmt (loop_exit_bsi)) | |
846 | { | |
847 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
848 | fprintf (dump_file, | |
849 | "Loop exit condition is not loop header last stmt.\n"); | |
850 | return false; | |
851 | } | |
852 | } | |
853 | ||
854 | /* Make sure E is either an entry or an exit edge. */ | |
855 | if (e != exit_e && e != entry_e) | |
856 | { | |
857 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
858 | fprintf (dump_file, | |
859 | "E is not loop entry or exit edge.\n"); | |
860 | return false; | |
861 | } | |
862 | ||
863 | return true; | |
864 | } | |
865 | ||
866 | ||
867 | /* Given LOOP this function duplicates it to the edge E. | |
868 | ||
869 | This transformation takes place before the loop is vectorized. | |
870 | For now, there are two main cases when it's used | |
871 | by the vectorizer: to support loops with unknown loop bounds | |
872 | (or loop bounds indivisible by vectorization factor) and to force the | |
873 | alignment of data references in the loop. In the first case, LOOP is | |
874 | duplicated to the exit edge, producing epilog loop. In the second case, LOOP | |
875 | is duplicated to the preheader edge thus generating prolog loop. In both | |
876 | cases, the original loop will be vectorized after the transformation. | |
877 | ||
878 | The edge E is supposed to be either preheader edge of the LOOP or | |
879 | its exit edge. If preheader edge is specified, the LOOP copy | |
880 | will precede the original one. Otherwise the copy will be located | |
881 | at the exit of the LOOP. | |
882 | ||
883 | FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate | |
884 | the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first | |
885 | loop will be iterated FIRST_NITERS times by introducing additional | |
886 | induction variable and replacing loop exit condition. If | |
887 | UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and | |
888 | the caller to tree_duplicate_loop_to_edge is responsible for updating | |
889 | the first loop count. | |
890 | ||
891 | NITERS (also SSA_NAME) parameter defines the number of iteration the | |
892 | original loop iterated. The function generates two if-then guards: | |
893 | one prior to the first loop and the other prior to the second loop. | |
894 | The first guard will be: | |
895 | ||
896 | if (FIRST_NITERS == 0) then skip the first loop | |
897 | ||
898 | The second guard will be: | |
899 | ||
900 | if (FIRST_NITERS == NITERS) then skip the second loop | |
901 | ||
902 | Thus the equivalence to the original code is guaranteed by correct values | |
903 | of NITERS and FIRST_NITERS and generation of if-then loop guards. | |
904 | ||
905 | For now this function supports only loop forms that are candidate for | |
906 | vectorization. Such types are the following: | |
907 | ||
908 | (1) only innermost loops | |
909 | (2) loops built from 2 basic blocks | |
910 | (3) loops with one entry and one exit | |
911 | (4) loops without function calls | |
912 | (5) loops without defs that are used after the loop | |
913 | ||
914 | (1), (3) are checked in this function; (2) - in function | |
915 | vect_analyze_loop_form; (4) - in function vect_analyze_data_refs; | |
916 | (5) is checked as part of the function vect_mark_stmts_to_be_vectorized, | |
917 | when excluding induction/reduction support. | |
918 | ||
919 | The function returns NULL in case one of these checks or | |
920 | transformations failed. */ | |
921 | ||
922 | struct loop* | |
923 | tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops, | |
924 | edge e, tree first_niters, | |
925 | tree niters, bool update_first_loop_count) | |
926 | { | |
927 | struct loop *new_loop = NULL, *first_loop, *second_loop; | |
928 | edge skip_e; | |
929 | tree pre_condition; | |
930 | bitmap definitions; | |
931 | basic_block first_exit_bb, second_exit_bb; | |
932 | basic_block pre_header_bb; | |
933 | edge exit_e = loop->exit_edges [0]; | |
934 | ||
935 | gcc_assert (!any_marked_for_rewrite_p ()); | |
936 | ||
937 | if (!verify_loop_for_duplication (loop, update_first_loop_count, e)) | |
938 | return NULL; | |
939 | ||
940 | /* We have to initialize cfg_hooks. Then, when calling | |
941 | cfg_hooks->split_edge, the function tree_split_edge | |
942 | is actually called and, when calling cfg_hooks->duplicate_block, | |
943 | the function tree_duplicate_bb is called. */ | |
944 | tree_register_cfg_hooks (); | |
945 | ||
946 | /* 1. Generate a copy of LOOP and put it on E (entry or exit). */ | |
947 | if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e))) | |
948 | { | |
949 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
950 | fprintf (dump_file, | |
951 | "The tree_duplicate_loop_to_edge_cfg failed.\n"); | |
952 | return NULL; | |
953 | } | |
954 | ||
955 | definitions = marked_ssa_names (); | |
956 | allocate_new_names (definitions); | |
957 | update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); | |
ed8b7558 | 958 | /* Here, using assumption (5), we do not propagate new names further |
a023975e OG |
959 | than on phis of the exit from the second loop. */ |
960 | rename_variables_in_loop (new_loop); | |
961 | free_new_names (definitions); | |
962 | ||
963 | if (e == exit_e) | |
964 | { | |
965 | first_loop = loop; | |
966 | second_loop = new_loop; | |
967 | } | |
968 | else | |
969 | { | |
970 | first_loop = new_loop; | |
971 | second_loop = loop; | |
972 | } | |
973 | ||
974 | /* 2. Generate bb between the loops. */ | |
975 | first_exit_bb = split_edge (first_loop->exit_edges[0]); | |
976 | add_bb_to_loop (first_exit_bb, first_loop->outer); | |
977 | ||
978 | /* We need to update here first loop exit edge | |
979 | and second loop preheader edge. */ | |
980 | flow_loop_scan (first_loop, LOOP_ALL); | |
981 | flow_loop_scan (second_loop, LOOP_ALL); | |
982 | ||
983 | /* 3. Make first loop iterate FIRST_NITERS times, if needed. */ | |
984 | if (!update_first_loop_count) | |
985 | { | |
986 | tree first_loop_latch_lbl = tree_block_label (first_loop->latch); | |
987 | tree first_loop_exit_lbl = tree_block_label (first_exit_bb); | |
988 | ||
989 | make_loop_iterate_ntimes (first_loop, first_niters, | |
990 | first_loop_latch_lbl, | |
991 | first_loop_exit_lbl); | |
992 | } | |
993 | ||
994 | /* 4. Add the guard before first loop: | |
995 | ||
996 | if FIRST_NITERS == 0 | |
997 | skip first loop | |
998 | else | |
999 | enter first loop */ | |
1000 | ||
1001 | /* 4a. Generate bb before first loop. */ | |
1002 | pre_header_bb = split_edge (loop_preheader_edge (first_loop)); | |
1003 | add_bb_to_loop (pre_header_bb, first_loop->outer); | |
1004 | ||
1005 | /* First loop preheader edge is changed. */ | |
1006 | flow_loop_scan (first_loop, LOOP_ALL); | |
79fe1b3b | 1007 | |
a023975e OG |
1008 | /* 4b. Generate guard condition. */ |
1009 | pre_condition = build (LE_EXPR, boolean_type_node, | |
1010 | first_niters, integer_zero_node); | |
1011 | ||
1012 | /* 4c. Add condition at the end of preheader bb. */ | |
1013 | skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb); | |
1014 | ||
ed8b7558 | 1015 | /* 4d. Update phis at first loop exit and propagate changes |
a023975e OG |
1016 | to the phis of second loop. */ |
1017 | update_phi_nodes_for_guard (skip_e, first_loop); | |
1018 | ||
1019 | /* 5. Add the guard before second loop: | |
1020 | ||
1021 | if FIRST_NITERS == NITERS SKIP | |
1022 | skip second loop | |
1023 | else | |
1024 | enter second loop */ | |
1025 | ||
1026 | /* 5a. Generate empty bb at the exit from the second loop. */ | |
1027 | second_exit_bb = split_edge (second_loop->exit_edges[0]); | |
1028 | add_bb_to_loop (second_exit_bb, second_loop->outer); | |
1029 | ||
1030 | /* Second loop preheader edge is changed. */ | |
1031 | flow_loop_scan (second_loop, LOOP_ALL); | |
1032 | ||
1033 | /* 5b. Generate guard condition. */ | |
1034 | pre_condition = build (EQ_EXPR, boolean_type_node, | |
1035 | first_niters, niters); | |
1036 | ||
1037 | /* 5c. Add condition at the end of preheader bb. */ | |
1038 | skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb); | |
1039 | update_phi_nodes_for_guard (skip_e, second_loop); | |
1040 | ||
1041 | BITMAP_XFREE (definitions); | |
1042 | unmark_all_for_rewrite (); | |
1043 | ||
1044 | return new_loop; | |
1045 | } | |
1046 | ||
1047 | ||
1048 | \f | |
1049 | /* Here the proper Vectorizer starts. */ | |
79fe1b3b DN |
1050 | |
1051 | /* Function new_stmt_vec_info. | |
1052 | ||
1053 | Create and initialize a new stmt_vec_info struct for STMT. */ | |
1054 | ||
1055 | stmt_vec_info | |
1056 | new_stmt_vec_info (tree stmt, struct loop *loop) | |
1057 | { | |
1058 | stmt_vec_info res; | |
1059 | res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); | |
1060 | ||
1061 | STMT_VINFO_TYPE (res) = undef_vec_info_type; | |
1062 | STMT_VINFO_STMT (res) = stmt; | |
1063 | STMT_VINFO_LOOP (res) = loop; | |
1064 | STMT_VINFO_RELEVANT_P (res) = 0; | |
1065 | STMT_VINFO_VECTYPE (res) = NULL; | |
1066 | STMT_VINFO_VEC_STMT (res) = NULL; | |
1067 | STMT_VINFO_DATA_REF (res) = NULL; | |
1068 | STMT_VINFO_MEMTAG (res) = NULL; | |
6775f1f3 | 1069 | STMT_VINFO_VECT_DR_BASE (res) = NULL; |
79fe1b3b DN |
1070 | |
1071 | return res; | |
1072 | } | |
1073 | ||
1074 | ||
1075 | /* Function new_loop_vec_info. | |
1076 | ||
1077 | Create and initialize a new loop_vec_info struct for LOOP, as well as | |
1078 | stmt_vec_info structs for all the stmts in LOOP. */ | |
1079 | ||
1080 | loop_vec_info | |
1081 | new_loop_vec_info (struct loop *loop) | |
1082 | { | |
1083 | loop_vec_info res; | |
1084 | basic_block *bbs; | |
1085 | block_stmt_iterator si; | |
1086 | unsigned int i; | |
1087 | ||
1088 | res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); | |
1089 | ||
1090 | bbs = get_loop_body (loop); | |
1091 | ||
1092 | /* Create stmt_info for all stmts in the loop. */ | |
1093 | for (i = 0; i < loop->num_nodes; i++) | |
1094 | { | |
1095 | basic_block bb = bbs[i]; | |
1096 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
1097 | { | |
1098 | tree stmt = bsi_stmt (si); | |
1099 | stmt_ann_t ann; | |
1100 | ||
1101 | get_stmt_operands (stmt); | |
1102 | ann = stmt_ann (stmt); | |
1103 | set_stmt_info (ann, new_stmt_vec_info (stmt, loop)); | |
1104 | } | |
1105 | } | |
1106 | ||
1107 | LOOP_VINFO_LOOP (res) = loop; | |
1108 | LOOP_VINFO_BBS (res) = bbs; | |
1109 | LOOP_VINFO_EXIT_COND (res) = NULL; | |
a023975e | 1110 | LOOP_VINFO_NITERS (res) = NULL; |
79fe1b3b | 1111 | LOOP_VINFO_VECTORIZABLE_P (res) = 0; |
a023975e | 1112 | LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false; |
79fe1b3b DN |
1113 | LOOP_VINFO_VECT_FACTOR (res) = 0; |
1114 | VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, | |
1115 | "loop_write_datarefs"); | |
1116 | VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20, | |
1117 | "loop_read_datarefs"); | |
a023975e OG |
1118 | |
1119 | for (i=0; i<MAX_NUMBER_OF_UNALIGNED_DATA_REFS; i++) | |
1120 | LOOP_UNALIGNED_DR (res, i) = NULL; | |
79fe1b3b DN |
1121 | return res; |
1122 | } | |
1123 | ||
1124 | ||
1125 | /* Function destroy_loop_vec_info. | |
1126 | ||
1127 | Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the | |
1128 | stmts in the loop. */ | |
1129 | ||
1130 | void | |
1131 | destroy_loop_vec_info (loop_vec_info loop_vinfo) | |
1132 | { | |
1133 | struct loop *loop; | |
1134 | basic_block *bbs; | |
1135 | int nbbs; | |
1136 | block_stmt_iterator si; | |
1137 | int j; | |
1138 | ||
1139 | if (!loop_vinfo) | |
1140 | return; | |
1141 | ||
1142 | loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1143 | ||
1144 | bbs = LOOP_VINFO_BBS (loop_vinfo); | |
1145 | nbbs = loop->num_nodes; | |
1146 | ||
1147 | for (j = 0; j < nbbs; j++) | |
1148 | { | |
1149 | basic_block bb = bbs[j]; | |
1150 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
1151 | { | |
1152 | tree stmt = bsi_stmt (si); | |
1153 | stmt_ann_t ann = stmt_ann (stmt); | |
1154 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1155 | free (stmt_info); | |
1156 | set_stmt_info (ann, NULL); | |
1157 | } | |
1158 | } | |
1159 | ||
1160 | free (LOOP_VINFO_BBS (loop_vinfo)); | |
1161 | varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); | |
1162 | varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo)); | |
1163 | ||
1164 | free (loop_vinfo); | |
1165 | } | |
1166 | ||
1167 | ||
1168 | /* Function debug_loop_stats. | |
1169 | ||
1170 | For vectorization statistics dumps. */ | |
1171 | ||
1172 | static bool | |
1173 | vect_debug_stats (struct loop *loop) | |
1174 | { | |
1175 | basic_block bb; | |
1176 | block_stmt_iterator si; | |
1177 | tree node = NULL_TREE; | |
1178 | ||
1179 | if (!dump_file || !(dump_flags & TDF_STATS)) | |
1180 | return false; | |
1181 | ||
1182 | if (!loop) | |
1183 | { | |
1184 | fprintf (dump_file, "\n"); | |
1185 | return true; | |
1186 | } | |
1187 | ||
1188 | if (!loop->header) | |
1189 | return false; | |
1190 | ||
1191 | bb = loop->header; | |
1192 | ||
1193 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
1194 | { | |
1195 | node = bsi_stmt (si); | |
1196 | if (node && EXPR_P (node) && EXPR_LOCUS (node)) | |
1197 | break; | |
1198 | } | |
1199 | ||
1200 | if (node && EXPR_P (node) && EXPR_LOCUS (node) | |
1201 | && EXPR_FILENAME (node) && EXPR_LINENO (node)) | |
1202 | { | |
1203 | fprintf (dump_file, "\nloop at %s:%d: ", | |
1204 | EXPR_FILENAME (node), EXPR_LINENO (node)); | |
1205 | return true; | |
1206 | } | |
1207 | ||
1208 | return false; | |
1209 | } | |
1210 | ||
1211 | ||
1212 | /* Function debug_loop_details. | |
1213 | ||
1214 | For vectorization debug dumps. */ | |
1215 | ||
1216 | static bool | |
1217 | vect_debug_details (struct loop *loop) | |
1218 | { | |
1219 | basic_block bb; | |
1220 | block_stmt_iterator si; | |
1221 | tree node = NULL_TREE; | |
1222 | ||
1223 | if (!dump_file || !(dump_flags & TDF_DETAILS)) | |
1224 | return false; | |
1225 | ||
1226 | if (!loop) | |
1227 | { | |
1228 | fprintf (dump_file, "\n"); | |
1229 | return true; | |
1230 | } | |
1231 | ||
1232 | if (!loop->header) | |
1233 | return false; | |
1234 | ||
1235 | bb = loop->header; | |
1236 | ||
1237 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
1238 | { | |
1239 | node = bsi_stmt (si); | |
1240 | if (node && EXPR_P (node) && EXPR_LOCUS (node)) | |
1241 | break; | |
1242 | } | |
1243 | ||
1244 | if (node && EXPR_P (node) && EXPR_LOCUS (node) | |
1245 | && EXPR_FILENAME (node) && EXPR_LINENO (node)) | |
1246 | { | |
1247 | fprintf (dump_file, "\nloop at %s:%d: ", | |
1248 | EXPR_FILENAME (node), EXPR_LINENO (node)); | |
1249 | return true; | |
1250 | } | |
1251 | ||
1252 | return false; | |
1253 | } | |
1254 | ||
6775f1f3 IR |
1255 | |
1256 | /* Function vect_get_ptr_offset | |
1257 | ||
1258 | Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */ | |
1259 | ||
1260 | static tree | |
1261 | vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED, | |
1262 | tree vectype ATTRIBUTE_UNUSED, | |
1263 | tree *offset ATTRIBUTE_UNUSED) | |
1264 | { | |
1265 | /* TODO: Use alignment information. */ | |
1266 | return NULL_TREE; | |
1267 | } | |
1268 | ||
1269 | ||
1270 | /* Function vect_get_base_and_bit_offset | |
1271 | ||
1272 | Return the BASE of the data reference EXPR. | |
1273 | If VECTYPE is given, also compute the OFFSET from BASE in bits. | |
1274 | E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in | |
1275 | bits of 'a.b[i] + 4B' from a. | |
1276 | ||
1277 | Input: | |
1278 | EXPR - the memory reference that is being analyzed | |
1279 | DR - the data_reference struct of the _original_ memory reference | |
1280 | (Note: DR_REF (DR) is not necessarily EXPR) | |
1281 | VECTYPE - the type that defines the alignment (i.e, we compute | |
1282 | alignment relative to TYPE_ALIGN(VECTYPE)) | |
79fe1b3b | 1283 | |
6775f1f3 IR |
1284 | Output: |
1285 | BASE (returned value) - the base of the data reference EXPR. | |
1286 | E.g, if EXPR is a.b[k].c[i][j] the returned | |
1287 | base is a. | |
1288 | OFFSET - offset of EXPR from BASE in bits | |
1289 | BASE_ALIGNED_P - indicates if BASE is aligned | |
1290 | ||
1291 | If something unexpected is encountered (an unsupported form of data-ref), | |
1292 | or if VECTYPE is given but OFFSET cannot be determined: | |
1293 | then NULL_TREE is returned. */ | |
79fe1b3b DN |
1294 | |
1295 | static tree | |
6775f1f3 IR |
1296 | vect_get_base_and_bit_offset (struct data_reference *dr, |
1297 | tree expr, | |
1298 | tree vectype, | |
1299 | loop_vec_info loop_vinfo, | |
1300 | tree *offset, | |
1301 | bool *base_aligned_p) | |
79fe1b3b | 1302 | { |
6775f1f3 IR |
1303 | tree this_offset = size_zero_node; |
1304 | tree base = NULL_TREE; | |
1305 | tree next_ref; | |
1306 | tree oprnd0, oprnd1; | |
1307 | struct data_reference *array_dr; | |
1308 | enum tree_code code = TREE_CODE (expr); | |
1309 | ||
1310 | *base_aligned_p = false; | |
79fe1b3b | 1311 | |
6775f1f3 | 1312 | switch (code) |
79fe1b3b | 1313 | { |
6775f1f3 IR |
1314 | /* These cases end the recursion: */ |
1315 | case VAR_DECL: | |
1316 | *offset = size_zero_node; | |
1317 | if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype)) | |
1318 | *base_aligned_p = true; | |
1319 | return expr; | |
1320 | ||
1321 | case SSA_NAME: | |
1322 | if (!vectype) | |
1323 | return expr; | |
1324 | ||
1325 | if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE) | |
1326 | return NULL_TREE; | |
1327 | ||
1328 | if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype)) | |
1329 | { | |
1330 | base = vect_get_ptr_offset (expr, vectype, offset); | |
1331 | if (base) | |
1332 | *base_aligned_p = true; | |
1333 | } | |
1334 | else | |
1335 | { | |
1336 | *base_aligned_p = true; | |
1337 | *offset = size_zero_node; | |
1338 | base = expr; | |
1339 | } | |
1340 | return base; | |
1341 | ||
1342 | case INTEGER_CST: | |
1343 | *offset = int_const_binop (MULT_EXPR, expr, | |
1344 | build_int_cst (NULL_TREE, BITS_PER_UNIT), 1); | |
1345 | return expr; | |
1346 | ||
1347 | /* These cases continue the recursion: */ | |
1348 | case COMPONENT_REF: | |
1349 | oprnd0 = TREE_OPERAND (expr, 0); | |
1350 | oprnd1 = TREE_OPERAND (expr, 1); | |
79fe1b3b DN |
1351 | |
1352 | this_offset = bit_position (oprnd1); | |
6775f1f3 IR |
1353 | if (vectype && !host_integerp (this_offset, 1)) |
1354 | return NULL_TREE; | |
1355 | next_ref = oprnd0; | |
1356 | break; | |
1357 | ||
1358 | case ADDR_EXPR: | |
1359 | oprnd0 = TREE_OPERAND (expr, 0); | |
1360 | next_ref = oprnd0; | |
1361 | break; | |
1362 | ||
1363 | case INDIRECT_REF: | |
1364 | oprnd0 = TREE_OPERAND (expr, 0); | |
1365 | next_ref = oprnd0; | |
1366 | break; | |
1367 | ||
1368 | case ARRAY_REF: | |
1369 | if (DR_REF (dr) != expr) | |
1370 | /* Build array data_reference struct if the existing DR_REF | |
1371 | doesn't match EXPR. This happens, for example, when the | |
1372 | EXPR is *T and T is initialized to &arr[indx]. The DR struct | |
1373 | contains information on the access of T, not of arr. In order | |
1374 | to continue the analysis, we create a new DR struct that | |
1375 | describes the access of arr. | |
1376 | */ | |
1377 | array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr)); | |
1378 | else | |
1379 | array_dr = dr; | |
1380 | ||
1381 | next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo, | |
1382 | vectype, &this_offset); | |
1383 | if (!next_ref) | |
79fe1b3b | 1384 | return NULL_TREE; |
79fe1b3b | 1385 | |
6775f1f3 IR |
1386 | if (vectype && |
1387 | TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype)) | |
79fe1b3b | 1388 | { |
6775f1f3 IR |
1389 | *offset = this_offset; |
1390 | *base_aligned_p = true; | |
1391 | return next_ref; | |
1392 | } | |
1393 | break; | |
79fe1b3b | 1394 | |
6775f1f3 IR |
1395 | case PLUS_EXPR: |
1396 | case MINUS_EXPR: | |
1397 | /* In case we have a PLUS_EXPR of the form | |
1398 | (oprnd0 + oprnd1), we assume that only oprnd0 determines the base. | |
1399 | This is verified in vect_get_symbl_and_dr. */ | |
1400 | oprnd0 = TREE_OPERAND (expr, 0); | |
1401 | oprnd1 = TREE_OPERAND (expr, 1); | |
1402 | ||
1403 | base = vect_get_base_and_bit_offset | |
1404 | (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p); | |
1405 | if (vectype && !base) | |
1406 | return NULL_TREE; | |
79fe1b3b | 1407 | |
6775f1f3 IR |
1408 | next_ref = oprnd0; |
1409 | break; | |
79fe1b3b | 1410 | |
6775f1f3 IR |
1411 | default: |
1412 | return NULL_TREE; | |
79fe1b3b DN |
1413 | } |
1414 | ||
6775f1f3 IR |
1415 | base = vect_get_base_and_bit_offset (dr, next_ref, vectype, |
1416 | loop_vinfo, offset, base_aligned_p); | |
1417 | ||
1418 | if (vectype && base) | |
1419 | { | |
1420 | *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1); | |
1421 | if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset)) | |
1422 | return NULL_TREE; | |
1423 | ||
1424 | if (vect_debug_details (NULL)) | |
1425 | { | |
1426 | print_generic_expr (dump_file, expr, TDF_SLIM); | |
1427 | fprintf (dump_file, " --> total offset for ref: "); | |
1428 | print_generic_expr (dump_file, *offset, TDF_SLIM); | |
1429 | } | |
1430 | } | |
1431 | return base; | |
79fe1b3b DN |
1432 | } |
1433 | ||
1434 | ||
1435 | /* Function vect_force_dr_alignment_p. | |
1436 | ||
1437 | Returns whether the alignment of a DECL can be forced to be aligned | |
1438 | on ALIGNMENT bit boundary. */ | |
1439 | ||
1440 | static bool | |
1441 | vect_can_force_dr_alignment_p (tree decl, unsigned int alignment) | |
1442 | { | |
1443 | if (TREE_CODE (decl) != VAR_DECL) | |
1444 | return false; | |
1445 | ||
1446 | if (DECL_EXTERNAL (decl)) | |
1447 | return false; | |
1448 | ||
1449 | if (TREE_STATIC (decl)) | |
1450 | return (alignment <= MAX_OFILE_ALIGNMENT); | |
1451 | else | |
7a8554ce DN |
1452 | /* This is not 100% correct. The absolute correct stack alignment |
1453 | is STACK_BOUNDARY. We're supposed to hope, but not assume, that | |
1454 | PREFERRED_STACK_BOUNDARY is honored by all translation units. | |
1455 | However, until someone implements forced stack alignment, SSE | |
1456 | isn't really usable without this. */ | |
1457 | return (alignment <= PREFERRED_STACK_BOUNDARY); | |
79fe1b3b DN |
1458 | } |
1459 | ||
1460 | ||
1461 | /* Function vect_get_new_vect_var. | |
1462 | ||
1463 | Returns a name for a new variable. The current naming scheme appends the | |
1464 | prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to | |
1465 | the name of vectorizer generated variables, and appends that to NAME if | |
1466 | provided. */ | |
1467 | ||
1468 | static tree | |
1469 | vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) | |
1470 | { | |
1471 | const char *prefix; | |
1472 | int prefix_len; | |
1473 | tree new_vect_var; | |
1474 | ||
1475 | if (var_kind == vect_simple_var) | |
1476 | prefix = "vect_"; | |
1477 | else | |
1478 | prefix = "vect_p"; | |
1479 | ||
1480 | prefix_len = strlen (prefix); | |
1481 | ||
1482 | if (name) | |
1483 | new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); | |
1484 | else | |
1485 | new_vect_var = create_tmp_var (type, prefix); | |
1486 | ||
1487 | return new_vect_var; | |
1488 | } | |
1489 | ||
1490 | ||
6775f1f3 | 1491 | /* Function vect_create_index_for_vector_ref. |
79fe1b3b DN |
1492 | |
1493 | Create (and return) an index variable, along with it's update chain in the | |
1494 | loop. This variable will be used to access a memory location in a vector | |
1495 | operation. | |
1496 | ||
1497 | Input: | |
6775f1f3 | 1498 | LOOP: The loop being vectorized. |
79fe1b3b DN |
1499 | BSI: The block_stmt_iterator where STMT is. Any new stmts created by this |
1500 | function can be added here, or in the loop pre-header. | |
1501 | ||
6775f1f3 IR |
1502 | Output: |
1503 | Return an index that will be used to index a vector array. It is expected | |
1504 | that a pointer to the first vector will be used as the base address for the | |
1505 | indexed reference. | |
1506 | ||
1507 | FORNOW: we are not trying to be efficient, just creating a new index each | |
1508 | time from scratch. At this time all vector references could use the same | |
1509 | index. | |
1510 | ||
1511 | TODO: create only one index to be used by all vector references. Record | |
1512 | the index in the LOOP_VINFO the first time this procedure is called and | |
1513 | return it on subsequent calls. The increment of this index must be placed | |
1514 | just before the conditional expression that ends the single block loop. */ | |
79fe1b3b DN |
1515 | |
1516 | static tree | |
6775f1f3 | 1517 | vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi) |
79fe1b3b | 1518 | { |
79fe1b3b | 1519 | tree init, step; |
79fe1b3b | 1520 | tree indx_before_incr, indx_after_incr; |
79fe1b3b | 1521 | |
6775f1f3 IR |
1522 | /* It is assumed that the base pointer used for vectorized access contains |
1523 | the address of the first vector. Therefore the index used for vectorized | |
1524 | access must be initialized to zero and incremented by 1. */ | |
79fe1b3b | 1525 | |
6775f1f3 IR |
1526 | init = integer_zero_node; |
1527 | step = integer_one_node; | |
1528 | ||
1529 | /* Assuming that bsi_insert is used with BSI_NEW_STMT */ | |
1530 | create_iv (init, step, NULL_TREE, loop, bsi, false, | |
1531 | &indx_before_incr, &indx_after_incr); | |
79fe1b3b | 1532 | |
6775f1f3 IR |
1533 | return indx_before_incr; |
1534 | } | |
79fe1b3b | 1535 | |
79fe1b3b | 1536 | |
6775f1f3 | 1537 | /* Function vect_create_addr_base_for_vector_ref. |
79fe1b3b | 1538 | |
6775f1f3 IR |
1539 | Create an expression that computes the address of the first memory location |
1540 | that will be accessed for a data reference. | |
79fe1b3b | 1541 | |
6775f1f3 IR |
1542 | Input: |
1543 | STMT: The statement containing the data reference. | |
7ccf35ed DN |
1544 | NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. |
1545 | OFFSET: Optional. If supplied, it is be added to the initial address. | |
79fe1b3b | 1546 | |
6775f1f3 | 1547 | Output: |
a023975e OG |
1548 | 1. Return an SSA_NAME whose value is the address of the memory location of |
1549 | the first vector of the data reference. | |
6775f1f3 IR |
1550 | 2. If new_stmt_list is not NULL_TREE after return then the caller must insert |
1551 | these statement(s) which define the returned SSA_NAME. | |
79fe1b3b | 1552 | |
6775f1f3 | 1553 | FORNOW: We are only handling array accesses with step 1. */ |
79fe1b3b | 1554 | |
6775f1f3 IR |
1555 | static tree |
1556 | vect_create_addr_base_for_vector_ref (tree stmt, | |
7ccf35ed DN |
1557 | tree *new_stmt_list, |
1558 | tree offset) | |
6775f1f3 IR |
1559 | { |
1560 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1561 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
1562 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); | |
1563 | tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info)); | |
1564 | tree base_name = unshare_expr (DR_BASE_NAME (dr)); | |
1565 | tree ref = DR_REF (dr); | |
1566 | tree data_ref_base_type = TREE_TYPE (data_ref_base); | |
1567 | tree scalar_type = TREE_TYPE (ref); | |
1568 | tree scalar_ptr_type = build_pointer_type (scalar_type); | |
1569 | tree access_fn; | |
1570 | tree init_val, step, init_oval; | |
1571 | bool ok; | |
1572 | bool is_ptr_ref, is_array_ref, is_addr_expr; | |
1573 | tree array_base; | |
1574 | tree vec_stmt; | |
1575 | tree new_temp; | |
1576 | tree array_ref; | |
1577 | tree addr_base, addr_expr; | |
1578 | tree dest, new_stmt; | |
79fe1b3b | 1579 | |
6775f1f3 IR |
1580 | /* Only the access function of the last index is relevant (i_n in |
1581 | a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */ | |
1582 | access_fn = DR_ACCESS_FN (dr, 0); | |
a023975e OG |
1583 | ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, |
1584 | true); | |
6775f1f3 IR |
1585 | if (!ok) |
1586 | init_oval = integer_zero_node; | |
1587 | ||
1588 | is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE | |
1589 | && TREE_CODE (data_ref_base) == SSA_NAME; | |
322ae40b | 1590 | is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE; |
6775f1f3 IR |
1591 | is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR |
1592 | || TREE_CODE (data_ref_base) == PLUS_EXPR | |
1593 | || TREE_CODE (data_ref_base) == MINUS_EXPR; | |
1594 | gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr); | |
1595 | ||
1596 | /** Create: &(base[init_val]) | |
1597 | ||
1598 | if data_ref_base is an ARRAY_TYPE: | |
1599 | base = data_ref_base | |
1600 | ||
1601 | if data_ref_base is the SSA_NAME of a POINTER_TYPE: | |
1602 | base = *((scalar_array *) data_ref_base) | |
1603 | **/ | |
1604 | ||
1605 | if (is_array_ref) | |
1606 | array_base = data_ref_base; | |
1607 | else /* is_ptr_ref or is_addr_expr */ | |
1608 | { | |
1609 | /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */ | |
1610 | tree scalar_array_type = build_array_type (scalar_type, 0); | |
1611 | tree scalar_array_ptr_type = build_pointer_type (scalar_array_type); | |
1612 | tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr"); | |
1613 | add_referenced_tmp_var (array_ptr); | |
1614 | ||
1615 | dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref"); | |
1616 | add_referenced_tmp_var (dest); | |
7ccf35ed DN |
1617 | data_ref_base = |
1618 | force_gimple_operand (data_ref_base, &new_stmt, false, dest); | |
6775f1f3 IR |
1619 | append_to_statement_list_force (new_stmt, new_stmt_list); |
1620 | ||
1621 | vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base); | |
1622 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt); | |
1623 | new_temp = make_ssa_name (array_ptr, vec_stmt); | |
1624 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
1625 | append_to_statement_list_force (vec_stmt, new_stmt_list); | |
1626 | ||
1627 | /* (*array_ptr) */ | |
1628 | array_base = build_fold_indirect_ref (new_temp); | |
1629 | } | |
1630 | ||
1631 | dest = create_tmp_var (TREE_TYPE (init_oval), "newinit"); | |
1632 | add_referenced_tmp_var (dest); | |
1633 | init_val = force_gimple_operand (init_oval, &new_stmt, false, dest); | |
1634 | append_to_statement_list_force (new_stmt, new_stmt_list); | |
1635 | ||
7ccf35ed DN |
1636 | if (offset) |
1637 | { | |
1638 | tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset"); | |
1639 | add_referenced_tmp_var (tmp); | |
1640 | vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset); | |
1641 | vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt); | |
1642 | init_val = make_ssa_name (tmp, vec_stmt); | |
1643 | TREE_OPERAND (vec_stmt, 0) = init_val; | |
1644 | append_to_statement_list_force (vec_stmt, new_stmt_list); | |
1645 | } | |
1646 | ||
6775f1f3 IR |
1647 | array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val, |
1648 | NULL_TREE, NULL_TREE); | |
1649 | addr_base = build_fold_addr_expr (array_ref); | |
1650 | ||
1651 | /* addr_expr = addr_base */ | |
1652 | addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var, | |
1653 | get_name (base_name)); | |
1654 | add_referenced_tmp_var (addr_expr); | |
1655 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base); | |
1656 | new_temp = make_ssa_name (addr_expr, vec_stmt); | |
1657 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
1658 | append_to_statement_list_force (vec_stmt, new_stmt_list); | |
7ccf35ed | 1659 | |
6775f1f3 | 1660 | return new_temp; |
79fe1b3b DN |
1661 | } |
1662 | ||
1663 | ||
1664 | /* Function get_vectype_for_scalar_type. | |
1665 | ||
1666 | Returns the vector type corresponding to SCALAR_TYPE as supported | |
1667 | by the target. */ | |
1668 | ||
1669 | static tree | |
1670 | get_vectype_for_scalar_type (tree scalar_type) | |
1671 | { | |
1672 | enum machine_mode inner_mode = TYPE_MODE (scalar_type); | |
1673 | int nbytes = GET_MODE_SIZE (inner_mode); | |
1674 | int nunits; | |
6775f1f3 | 1675 | tree vectype; |
79fe1b3b DN |
1676 | |
1677 | if (nbytes == 0) | |
1678 | return NULL_TREE; | |
1679 | ||
1680 | /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD) | |
1681 | is expected. */ | |
1682 | nunits = UNITS_PER_SIMD_WORD / nbytes; | |
1683 | ||
6775f1f3 | 1684 | vectype = build_vector_type (scalar_type, nunits); |
f0923257 DN |
1685 | if (vect_debug_details (NULL)) |
1686 | { | |
1687 | fprintf (dump_file, "get vectype with %d units of type ", nunits); | |
1688 | print_generic_expr (dump_file, scalar_type, TDF_SLIM); | |
1689 | } | |
1690 | ||
1691 | if (!vectype) | |
6775f1f3 | 1692 | return NULL_TREE; |
f0923257 DN |
1693 | |
1694 | if (vect_debug_details (NULL)) | |
1695 | { | |
1696 | fprintf (dump_file, "vectype: "); | |
1697 | print_generic_expr (dump_file, vectype, TDF_SLIM); | |
1698 | } | |
1699 | ||
1700 | if (!VECTOR_MODE_P (TYPE_MODE (vectype))) | |
1701 | { | |
1702 | /* TODO: tree-complex.c sometimes can parallelize operations | |
1703 | on generic vectors. We can vectorize the loop in that case, | |
1704 | but then we should re-run the lowering pass. */ | |
1705 | if (vect_debug_details (NULL)) | |
1706 | fprintf (dump_file, "mode not supported by target."); | |
1707 | return NULL_TREE; | |
1708 | } | |
1709 | ||
6775f1f3 | 1710 | return vectype; |
79fe1b3b DN |
1711 | } |
1712 | ||
1713 | ||
1714 | /* Function vect_align_data_ref. | |
1715 | ||
1716 | Handle mislignment of a memory accesses. | |
1717 | ||
1718 | FORNOW: Can't handle misaligned accesses. | |
1719 | Make sure that the dataref is aligned. */ | |
1720 | ||
1721 | static void | |
1722 | vect_align_data_ref (tree stmt) | |
1723 | { | |
1724 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1725 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); | |
1726 | ||
1727 | /* FORNOW: can't handle misaligned accesses; | |
1728 | all accesses expected to be aligned. */ | |
1e128c5f | 1729 | gcc_assert (aligned_access_p (dr)); |
79fe1b3b DN |
1730 | } |
1731 | ||
1732 | ||
7ccf35ed | 1733 | /* Function vect_create_data_ref_ptr. |
79fe1b3b DN |
1734 | |
1735 | Create a memory reference expression for vector access, to be used in a | |
7ccf35ed DN |
1736 | vector load/store stmt. The reference is based on a new pointer to vector |
1737 | type (vp). | |
79fe1b3b DN |
1738 | |
1739 | Input: | |
7ccf35ed DN |
1740 | 1. STMT: a stmt that references memory. Expected to be of the form |
1741 | MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>. | |
1742 | 2. BSI: block_stmt_iterator where new stmts can be added. | |
1743 | 3. OFFSET (optional): an offset to be added to the initial address accessed | |
1744 | by the data-ref in STMT. | |
1745 | 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain | |
1746 | pointing to the initial address. | |
79fe1b3b DN |
1747 | |
1748 | Output: | |
7ccf35ed DN |
1749 | 1. Declare a new ptr to vector_type, and have it point to the base of the |
1750 | data reference (initial addressed accessed by the data reference). | |
1751 | For example, for vector of type V8HI, the following code is generated: | |
1752 | ||
1753 | v8hi *vp; | |
1754 | vp = (v8hi *)initial_address; | |
1755 | ||
1756 | if OFFSET is not supplied: | |
1757 | initial_address = &a[init]; | |
1758 | if OFFSET is supplied: | |
1759 | initial_address = &a[init + OFFSET]; | |
1760 | ||
1761 | Return the initial_address in INITIAL_ADDRESS. | |
1762 | ||
1763 | 2. Create a data-reference in the loop based on the new vector pointer vp, | |
1764 | and using a new index variable 'idx' as follows: | |
1765 | ||
1766 | vp' = vp + update | |
1767 | ||
1768 | where if ONLY_INIT is true: | |
1769 | update = zero | |
1770 | and otherwise | |
1771 | update = idx + vector_type_size | |
1772 | ||
1773 | Return the pointer vp'. | |
1774 | ||
79fe1b3b DN |
1775 | |
1776 | FORNOW: handle only aligned and consecutive accesses. */ | |
1777 | ||
1778 | static tree | |
7ccf35ed DN |
1779 | vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset, |
1780 | tree *initial_address, bool only_init) | |
79fe1b3b | 1781 | { |
7ccf35ed | 1782 | tree base_name; |
79fe1b3b | 1783 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
6775f1f3 IR |
1784 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); |
1785 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
79fe1b3b DN |
1786 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
1787 | tree vect_ptr_type; | |
1788 | tree vect_ptr; | |
79fe1b3b | 1789 | tree tag; |
6775f1f3 IR |
1790 | v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); |
1791 | v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
1792 | vuse_optype vuses = STMT_VUSE_OPS (stmt); | |
1793 | int nvuses, nv_may_defs, nv_must_defs; | |
1794 | int i; | |
1795 | tree new_temp; | |
1796 | tree vec_stmt; | |
1797 | tree new_stmt_list = NULL_TREE; | |
1798 | tree idx; | |
7ccf35ed | 1799 | edge pe = loop_preheader_edge (loop); |
6775f1f3 | 1800 | basic_block new_bb; |
7ccf35ed DN |
1801 | tree vect_ptr_init; |
1802 | tree vectype_size; | |
1803 | tree ptr_update; | |
1804 | tree data_ref_ptr; | |
79fe1b3b | 1805 | |
6775f1f3 | 1806 | base_name = unshare_expr (DR_BASE_NAME (dr)); |
79fe1b3b DN |
1807 | if (vect_debug_details (NULL)) |
1808 | { | |
7ccf35ed | 1809 | tree data_ref_base = base_name; |
79fe1b3b DN |
1810 | fprintf (dump_file, "create array_ref of type: "); |
1811 | print_generic_expr (dump_file, vectype, TDF_SLIM); | |
6775f1f3 | 1812 | if (TREE_CODE (data_ref_base) == VAR_DECL) |
7ccf35ed | 1813 | fprintf (dump_file, "vectorizing a one dimensional array ref: "); |
6775f1f3 | 1814 | else if (TREE_CODE (data_ref_base) == ARRAY_REF) |
7ccf35ed | 1815 | fprintf (dump_file, "vectorizing a multidimensional array ref: "); |
6775f1f3 | 1816 | else if (TREE_CODE (data_ref_base) == COMPONENT_REF) |
7ccf35ed | 1817 | fprintf (dump_file, "vectorizing a record based array ref: "); |
6775f1f3 | 1818 | else if (TREE_CODE (data_ref_base) == SSA_NAME) |
7ccf35ed | 1819 | fprintf (dump_file, "vectorizing a pointer ref: "); |
6775f1f3 | 1820 | print_generic_expr (dump_file, base_name, TDF_SLIM); |
79fe1b3b DN |
1821 | } |
1822 | ||
7ccf35ed DN |
1823 | /** (1) Create the new vector-pointer variable: **/ |
1824 | ||
1825 | vect_ptr_type = build_pointer_type (vectype); | |
1826 | vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, | |
1827 | get_name (base_name)); | |
1828 | add_referenced_tmp_var (vect_ptr); | |
1829 | ||
1830 | ||
1831 | /** (2) Handle aliasing information of the new vector-pointer: **/ | |
1832 | ||
79fe1b3b | 1833 | tag = STMT_VINFO_MEMTAG (stmt_info); |
1e128c5f | 1834 | gcc_assert (tag); |
79fe1b3b | 1835 | get_var_ann (vect_ptr)->type_mem_tag = tag; |
7ccf35ed | 1836 | |
79fe1b3b | 1837 | /* Mark for renaming all aliased variables |
6775f1f3 IR |
1838 | (i.e, the may-aliases of the type-mem-tag). */ |
1839 | nvuses = NUM_VUSES (vuses); | |
1840 | nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); | |
1841 | nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); | |
1842 | for (i = 0; i < nvuses; i++) | |
79fe1b3b | 1843 | { |
6775f1f3 | 1844 | tree use = VUSE_OP (vuses, i); |
79fe1b3b DN |
1845 | if (TREE_CODE (use) == SSA_NAME) |
1846 | bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid); | |
1847 | } | |
6775f1f3 IR |
1848 | for (i = 0; i < nv_may_defs; i++) |
1849 | { | |
1850 | tree def = V_MAY_DEF_RESULT (v_may_defs, i); | |
1851 | if (TREE_CODE (def) == SSA_NAME) | |
1852 | bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); | |
1853 | } | |
1854 | for (i = 0; i < nv_must_defs; i++) | |
1855 | { | |
1856 | tree def = V_MUST_DEF_OP (v_must_defs, i); | |
1857 | if (TREE_CODE (def) == SSA_NAME) | |
1858 | bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); | |
1859 | } | |
79fe1b3b | 1860 | |
79fe1b3b | 1861 | |
7ccf35ed DN |
1862 | /** (3) Calculate the initial address the vector-pointer, and set |
1863 | the vector-pointer to point to it before the loop: **/ | |
79fe1b3b | 1864 | |
7ccf35ed DN |
1865 | /* Create: (&(base[init_val+offset]) in the loop preheader. */ |
1866 | new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, | |
1867 | offset); | |
1868 | pe = loop_preheader_edge (loop); | |
1869 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); | |
1870 | gcc_assert (!new_bb); | |
1871 | *initial_address = new_temp; | |
1872 | ||
1873 | /* Create: p = (vectype *) initial_base */ | |
6775f1f3 | 1874 | vec_stmt = fold_convert (vect_ptr_type, new_temp); |
79fe1b3b DN |
1875 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); |
1876 | new_temp = make_ssa_name (vect_ptr, vec_stmt); | |
1877 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
7ccf35ed DN |
1878 | new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt); |
1879 | gcc_assert (!new_bb); | |
1880 | vect_ptr_init = TREE_OPERAND (vec_stmt, 0); | |
1881 | ||
1882 | ||
1883 | /** (4) Handle the updating of the vector-pointer inside the loop: **/ | |
1884 | ||
1885 | if (only_init) /* No update in loop is required. */ | |
1886 | return vect_ptr_init; | |
79fe1b3b | 1887 | |
6775f1f3 | 1888 | idx = vect_create_index_for_vector_ref (loop, bsi); |
79fe1b3b | 1889 | |
7ccf35ed DN |
1890 | /* Create: update = idx * vectype_size */ |
1891 | ptr_update = create_tmp_var (integer_type_node, "update"); | |
1892 | add_referenced_tmp_var (ptr_update); | |
1893 | vectype_size = build_int_cst (integer_type_node, | |
1894 | GET_MODE_SIZE (TYPE_MODE (vectype))); | |
1895 | vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size); | |
1896 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt); | |
1897 | new_temp = make_ssa_name (ptr_update, vec_stmt); | |
1898 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
1899 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
79fe1b3b | 1900 | |
7ccf35ed DN |
1901 | /* Create: data_ref_ptr = vect_ptr_init + update */ |
1902 | vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp); | |
1903 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); | |
1904 | new_temp = make_ssa_name (vect_ptr, vec_stmt); | |
1905 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
1906 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
1907 | data_ref_ptr = TREE_OPERAND (vec_stmt, 0); | |
1908 | ||
1909 | return data_ref_ptr; | |
79fe1b3b DN |
1910 | } |
1911 | ||
1912 | ||
1913 | /* Function vect_create_destination_var. | |
1914 | ||
1915 | Create a new temporary of type VECTYPE. */ | |
1916 | ||
1917 | static tree | |
1918 | vect_create_destination_var (tree scalar_dest, tree vectype) | |
1919 | { | |
1920 | tree vec_dest; | |
1921 | const char *new_name; | |
1922 | ||
1e128c5f | 1923 | gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); |
79fe1b3b DN |
1924 | |
1925 | new_name = get_name (scalar_dest); | |
1926 | if (!new_name) | |
1927 | new_name = "var_"; | |
1928 | vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name); | |
1929 | add_referenced_tmp_var (vec_dest); | |
1930 | ||
1931 | return vec_dest; | |
1932 | } | |
1933 | ||
1934 | ||
1935 | /* Function vect_init_vector. | |
1936 | ||
1937 | Insert a new stmt (INIT_STMT) that initializes a new vector variable with | |
1938 | the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be | |
1939 | used in the vectorization of STMT. */ | |
1940 | ||
1941 | static tree | |
1942 | vect_init_vector (tree stmt, tree vector_var) | |
1943 | { | |
1944 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
1945 | struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo); | |
1946 | tree new_var; | |
1947 | tree init_stmt; | |
1948 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
1949 | tree vec_oprnd; | |
1950 | edge pe; | |
1951 | tree new_temp; | |
6775f1f3 | 1952 | basic_block new_bb; |
79fe1b3b DN |
1953 | |
1954 | new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_"); | |
1955 | add_referenced_tmp_var (new_var); | |
1956 | ||
1957 | init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var); | |
1958 | new_temp = make_ssa_name (new_var, init_stmt); | |
1959 | TREE_OPERAND (init_stmt, 0) = new_temp; | |
1960 | ||
1961 | pe = loop_preheader_edge (loop); | |
6775f1f3 IR |
1962 | new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); |
1963 | gcc_assert (!new_bb); | |
79fe1b3b DN |
1964 | |
1965 | if (vect_debug_details (NULL)) | |
1966 | { | |
1967 | fprintf (dump_file, "created new init_stmt: "); | |
1968 | print_generic_expr (dump_file, init_stmt, TDF_SLIM); | |
1969 | } | |
1970 | ||
1971 | vec_oprnd = TREE_OPERAND (init_stmt, 0); | |
1972 | return vec_oprnd; | |
1973 | } | |
1974 | ||
1975 | ||
1976 | /* Function vect_get_vec_def_for_operand. | |
1977 | ||
1978 | OP is an operand in STMT. This function returns a (vector) def that will be | |
1979 | used in the vectorized stmt for STMT. | |
1980 | ||
1981 | In the case that OP is an SSA_NAME which is defined in the loop, then | |
1982 | STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. | |
1983 | ||
1984 | In case OP is an invariant or constant, a new stmt that creates a vector def | |
1985 | needs to be introduced. */ | |
1986 | ||
1987 | static tree | |
1988 | vect_get_vec_def_for_operand (tree op, tree stmt) | |
1989 | { | |
1990 | tree vec_oprnd; | |
1991 | tree vec_stmt; | |
1992 | tree def_stmt; | |
1993 | stmt_vec_info def_stmt_info = NULL; | |
1994 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
1995 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
1996 | int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); | |
1997 | struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo); | |
1998 | basic_block bb; | |
1999 | tree vec_inv; | |
2000 | tree t = NULL_TREE; | |
2001 | tree def; | |
2002 | int i; | |
2003 | ||
2004 | if (vect_debug_details (NULL)) | |
2005 | { | |
2006 | fprintf (dump_file, "vect_get_vec_def_for_operand: "); | |
2007 | print_generic_expr (dump_file, op, TDF_SLIM); | |
2008 | } | |
2009 | ||
2010 | /** ===> Case 1: operand is a constant. **/ | |
2011 | ||
2012 | if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST) | |
2013 | { | |
2014 | /* Create 'vect_cst_ = {cst,cst,...,cst}' */ | |
2015 | ||
2016 | tree vec_cst; | |
79fe1b3b DN |
2017 | |
2018 | /* Build a tree with vector elements. */ | |
2019 | if (vect_debug_details (NULL)) | |
2020 | fprintf (dump_file, "Create vector_cst. nunits = %d", nunits); | |
2021 | ||
2022 | for (i = nunits - 1; i >= 0; --i) | |
2023 | { | |
2024 | t = tree_cons (NULL_TREE, op, t); | |
2025 | } | |
2026 | vec_cst = build_vector (vectype, t); | |
2027 | return vect_init_vector (stmt, vec_cst); | |
2028 | } | |
2029 | ||
1e128c5f | 2030 | gcc_assert (TREE_CODE (op) == SSA_NAME); |
79fe1b3b DN |
2031 | |
2032 | /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/ | |
2033 | ||
2034 | def_stmt = SSA_NAME_DEF_STMT (op); | |
2035 | def_stmt_info = vinfo_for_stmt (def_stmt); | |
2036 | ||
2037 | if (vect_debug_details (NULL)) | |
2038 | { | |
2039 | fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: "); | |
2040 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
2041 | } | |
2042 | ||
2043 | ||
2044 | /** ==> Case 2.1: operand is defined inside the loop. **/ | |
2045 | ||
2046 | if (def_stmt_info) | |
2047 | { | |
2048 | /* Get the def from the vectorized stmt. */ | |
2049 | ||
2050 | vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); | |
1e128c5f | 2051 | gcc_assert (vec_stmt); |
79fe1b3b DN |
2052 | vec_oprnd = TREE_OPERAND (vec_stmt, 0); |
2053 | return vec_oprnd; | |
2054 | } | |
2055 | ||
2056 | ||
2057 | /** ==> Case 2.2: operand is defined by the loop-header phi-node - | |
2058 | it is a reduction/induction. **/ | |
2059 | ||
2060 | bb = bb_for_stmt (def_stmt); | |
2061 | if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) | |
2062 | { | |
2063 | if (vect_debug_details (NULL)) | |
2064 | fprintf (dump_file, "reduction/induction - unsupported."); | |
1e128c5f | 2065 | internal_error ("no support for reduction/induction"); /* FORNOW */ |
79fe1b3b DN |
2066 | } |
2067 | ||
2068 | ||
2069 | /** ==> Case 2.3: operand is defined outside the loop - | |
2070 | it is a loop invariant. */ | |
2071 | ||
2072 | switch (TREE_CODE (def_stmt)) | |
2073 | { | |
2074 | case PHI_NODE: | |
2075 | def = PHI_RESULT (def_stmt); | |
2076 | break; | |
2077 | case MODIFY_EXPR: | |
2078 | def = TREE_OPERAND (def_stmt, 0); | |
2079 | break; | |
2080 | case NOP_EXPR: | |
2081 | def = TREE_OPERAND (def_stmt, 0); | |
1e128c5f | 2082 | gcc_assert (IS_EMPTY_STMT (def_stmt)); |
79fe1b3b DN |
2083 | def = op; |
2084 | break; | |
2085 | default: | |
2086 | if (vect_debug_details (NULL)) | |
2087 | { | |
2088 | fprintf (dump_file, "unsupported defining stmt: "); | |
2089 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
2090 | } | |
1e128c5f | 2091 | internal_error ("unsupported defining stmt"); |
79fe1b3b DN |
2092 | } |
2093 | ||
2094 | /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */ | |
2095 | ||
2096 | if (vect_debug_details (NULL)) | |
2097 | fprintf (dump_file, "Create vector_inv."); | |
2098 | ||
2099 | for (i = nunits - 1; i >= 0; --i) | |
2100 | { | |
2101 | t = tree_cons (NULL_TREE, def, t); | |
2102 | } | |
2103 | ||
2104 | vec_inv = build_constructor (vectype, t); | |
2105 | return vect_init_vector (stmt, vec_inv); | |
2106 | } | |
2107 | ||
2108 | ||
2109 | /* Function vect_finish_stmt_generation. | |
2110 | ||
2111 | Insert a new stmt. */ | |
2112 | ||
2113 | static void | |
2114 | vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) | |
2115 | { | |
2116 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
2117 | ||
2118 | if (vect_debug_details (NULL)) | |
2119 | { | |
2120 | fprintf (dump_file, "add new stmt: "); | |
2121 | print_generic_expr (dump_file, vec_stmt, TDF_SLIM); | |
2122 | } | |
2123 | ||
2124 | /* Make sure bsi points to the stmt that is being vectorized. */ | |
2125 | ||
7ccf35ed DN |
2126 | /* Assumption: any stmts created for the vectorization of stmt S were |
2127 | inserted before S. BSI is expected to point to S or some new stmt before S. */ | |
79fe1b3b DN |
2128 | |
2129 | while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi)) | |
2130 | bsi_next (bsi); | |
1e128c5f | 2131 | gcc_assert (stmt == bsi_stmt (*bsi)); |
79fe1b3b DN |
2132 | } |
2133 | ||
2134 | ||
2135 | /* Function vectorizable_assignment. | |
2136 | ||
2137 | Check if STMT performs an assignment (copy) that can be vectorized. | |
2138 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
2139 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
2140 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
2141 | ||
2142 | static bool | |
2143 | vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
2144 | { | |
2145 | tree vec_dest; | |
2146 | tree scalar_dest; | |
2147 | tree op; | |
2148 | tree vec_oprnd; | |
2149 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2150 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
2151 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
2152 | tree new_temp; | |
2153 | ||
2154 | /* Is vectorizable assignment? */ | |
2155 | ||
2156 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
2157 | return false; | |
2158 | ||
2159 | scalar_dest = TREE_OPERAND (stmt, 0); | |
2160 | if (TREE_CODE (scalar_dest) != SSA_NAME) | |
2161 | return false; | |
2162 | ||
2163 | op = TREE_OPERAND (stmt, 1); | |
2164 | if (!vect_is_simple_use (op, loop, NULL)) | |
2165 | { | |
2166 | if (vect_debug_details (NULL)) | |
2167 | fprintf (dump_file, "use not simple."); | |
2168 | return false; | |
2169 | } | |
2170 | ||
2171 | if (!vec_stmt) /* transformation not required. */ | |
2172 | { | |
2173 | STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; | |
2174 | return true; | |
2175 | } | |
2176 | ||
2177 | /** Trasform. **/ | |
2178 | if (vect_debug_details (NULL)) | |
2179 | fprintf (dump_file, "transform assignment."); | |
2180 | ||
2181 | /* Handle def. */ | |
2182 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2183 | ||
2184 | /* Handle use. */ | |
2185 | op = TREE_OPERAND (stmt, 1); | |
2186 | vec_oprnd = vect_get_vec_def_for_operand (op, stmt); | |
2187 | ||
2188 | /* Arguments are ready. create the new vector stmt. */ | |
2189 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); | |
2190 | new_temp = make_ssa_name (vec_dest, *vec_stmt); | |
2191 | TREE_OPERAND (*vec_stmt, 0) = new_temp; | |
2192 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
2193 | ||
2194 | return true; | |
2195 | } | |
2196 | ||
2197 | ||
2198 | /* Function vectorizable_operation. | |
2199 | ||
2200 | Check if STMT performs a binary or unary operation that can be vectorized. | |
2201 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
2202 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
2203 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
2204 | ||
2205 | static bool | |
2206 | vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
2207 | { | |
2208 | tree vec_dest; | |
2209 | tree scalar_dest; | |
2210 | tree operation; | |
2211 | tree op0, op1 = NULL; | |
2212 | tree vec_oprnd0, vec_oprnd1=NULL; | |
2213 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2214 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
2215 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
2216 | int i; | |
2217 | enum tree_code code; | |
2218 | enum machine_mode vec_mode; | |
2219 | tree new_temp; | |
2220 | int op_type; | |
2221 | tree op; | |
2222 | optab optab; | |
2223 | ||
2224 | /* Is STMT a vectorizable binary/unary operation? */ | |
2225 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
2226 | return false; | |
2227 | ||
2228 | if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) | |
2229 | return false; | |
2230 | ||
2231 | operation = TREE_OPERAND (stmt, 1); | |
2232 | code = TREE_CODE (operation); | |
2233 | optab = optab_for_tree_code (code, vectype); | |
2234 | ||
2235 | /* Support only unary or binary operations. */ | |
2236 | op_type = TREE_CODE_LENGTH (code); | |
2237 | if (op_type != unary_op && op_type != binary_op) | |
2238 | { | |
2239 | if (vect_debug_details (NULL)) | |
2240 | fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type); | |
2241 | return false; | |
2242 | } | |
2243 | ||
2244 | for (i = 0; i < op_type; i++) | |
2245 | { | |
2246 | op = TREE_OPERAND (operation, i); | |
2247 | if (!vect_is_simple_use (op, loop, NULL)) | |
2248 | { | |
2249 | if (vect_debug_details (NULL)) | |
2250 | fprintf (dump_file, "use not simple."); | |
2251 | return false; | |
2252 | } | |
2253 | } | |
2254 | ||
2255 | /* Supportable by target? */ | |
2256 | if (!optab) | |
2257 | { | |
2258 | if (vect_debug_details (NULL)) | |
2259 | fprintf (dump_file, "no optab."); | |
2260 | return false; | |
2261 | } | |
2262 | vec_mode = TYPE_MODE (vectype); | |
2263 | if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) | |
2264 | { | |
2265 | if (vect_debug_details (NULL)) | |
2266 | fprintf (dump_file, "op not supported by target."); | |
2267 | return false; | |
2268 | } | |
2269 | ||
2270 | if (!vec_stmt) /* transformation not required. */ | |
2271 | { | |
2272 | STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; | |
2273 | return true; | |
2274 | } | |
2275 | ||
02ca1718 | 2276 | /** Transform. **/ |
79fe1b3b DN |
2277 | |
2278 | if (vect_debug_details (NULL)) | |
2279 | fprintf (dump_file, "transform binary/unary operation."); | |
2280 | ||
2281 | /* Handle def. */ | |
2282 | scalar_dest = TREE_OPERAND (stmt, 0); | |
2283 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2284 | ||
2285 | /* Handle uses. */ | |
2286 | op0 = TREE_OPERAND (operation, 0); | |
2287 | vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt); | |
2288 | ||
2289 | if (op_type == binary_op) | |
2290 | { | |
2291 | op1 = TREE_OPERAND (operation, 1); | |
2292 | vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt); | |
2293 | } | |
2294 | ||
2295 | /* Arguments are ready. create the new vector stmt. */ | |
2296 | ||
2297 | if (op_type == binary_op) | |
2298 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, | |
2299 | build2 (code, vectype, vec_oprnd0, vec_oprnd1)); | |
2300 | else | |
2301 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, | |
2302 | build1 (code, vectype, vec_oprnd0)); | |
2303 | new_temp = make_ssa_name (vec_dest, *vec_stmt); | |
2304 | TREE_OPERAND (*vec_stmt, 0) = new_temp; | |
2305 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
2306 | ||
2307 | return true; | |
2308 | } | |
2309 | ||
2310 | ||
2311 | /* Function vectorizable_store. | |
2312 | ||
2313 | Check if STMT defines a non scalar data-ref (array/pointer/structure) that | |
2314 | can be vectorized. | |
2315 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
2316 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
2317 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
2318 | ||
2319 | static bool | |
2320 | vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
2321 | { | |
2322 | tree scalar_dest; | |
2323 | tree data_ref; | |
2324 | tree op; | |
2325 | tree vec_oprnd1; | |
2326 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2327 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
2328 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
2329 | enum machine_mode vec_mode; | |
7ccf35ed | 2330 | tree dummy; |
79fe1b3b DN |
2331 | |
2332 | /* Is vectorizable store? */ | |
2333 | ||
2334 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
2335 | return false; | |
2336 | ||
2337 | scalar_dest = TREE_OPERAND (stmt, 0); | |
2338 | if (TREE_CODE (scalar_dest) != ARRAY_REF | |
2339 | && TREE_CODE (scalar_dest) != INDIRECT_REF) | |
2340 | return false; | |
2341 | ||
2342 | op = TREE_OPERAND (stmt, 1); | |
2343 | if (!vect_is_simple_use (op, loop, NULL)) | |
2344 | { | |
2345 | if (vect_debug_details (NULL)) | |
2346 | fprintf (dump_file, "use not simple."); | |
2347 | return false; | |
2348 | } | |
2349 | ||
2350 | vec_mode = TYPE_MODE (vectype); | |
2351 | /* FORNOW. In some cases can vectorize even if data-type not supported | |
2352 | (e.g. - array initialization with 0). */ | |
2353 | if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) | |
2354 | return false; | |
2355 | ||
2356 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
2357 | return false; | |
2358 | ||
7ccf35ed | 2359 | |
79fe1b3b DN |
2360 | if (!vec_stmt) /* transformation not required. */ |
2361 | { | |
2362 | STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; | |
2363 | return true; | |
2364 | } | |
2365 | ||
2366 | /** Trasform. **/ | |
2367 | ||
2368 | if (vect_debug_details (NULL)) | |
2369 | fprintf (dump_file, "transform store"); | |
2370 | ||
2371 | /* Handle use - get the vectorized def from the defining stmt. */ | |
2372 | vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); | |
2373 | ||
2374 | /* Handle def. */ | |
7ccf35ed DN |
2375 | /* FORNOW: make sure the data reference is aligned. */ |
2376 | vect_align_data_ref (stmt); | |
2377 | data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); | |
2378 | data_ref = build_fold_indirect_ref (data_ref); | |
79fe1b3b DN |
2379 | |
2380 | /* Arguments are ready. create the new vector stmt. */ | |
2381 | *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1); | |
2382 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
2383 | ||
2384 | return true; | |
2385 | } | |
2386 | ||
2387 | ||
2388 | /* vectorizable_load. | |
2389 | ||
2390 | Check if STMT reads a non scalar data-ref (array/pointer/structure) that | |
2391 | can be vectorized. | |
2392 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
2393 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
2394 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
2395 | ||
2396 | static bool | |
2397 | vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
2398 | { | |
2399 | tree scalar_dest; | |
2400 | tree vec_dest = NULL; | |
2401 | tree data_ref = NULL; | |
2402 | tree op; | |
2403 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
7ccf35ed | 2404 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); |
79fe1b3b DN |
2405 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
2406 | tree new_temp; | |
7ccf35ed DN |
2407 | int mode; |
2408 | tree init_addr; | |
2409 | tree new_stmt; | |
2410 | tree dummy; | |
2411 | basic_block new_bb; | |
2412 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
2413 | edge pe = loop_preheader_edge (loop); | |
2414 | bool software_pipeline_loads_p = false; | |
79fe1b3b DN |
2415 | |
2416 | /* Is vectorizable load? */ | |
2417 | ||
2418 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
2419 | return false; | |
2420 | ||
2421 | scalar_dest = TREE_OPERAND (stmt, 0); | |
2422 | if (TREE_CODE (scalar_dest) != SSA_NAME) | |
2423 | return false; | |
2424 | ||
2425 | op = TREE_OPERAND (stmt, 1); | |
2426 | if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF) | |
2427 | return false; | |
2428 | ||
2429 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
2430 | return false; | |
2431 | ||
7ccf35ed DN |
2432 | mode = (int) TYPE_MODE (vectype); |
2433 | ||
79fe1b3b | 2434 | /* FORNOW. In some cases can vectorize even if data-type not supported |
7ccf35ed DN |
2435 | (e.g. - data copies). */ |
2436 | if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing) | |
2437 | { | |
2438 | if (vect_debug_details (loop)) | |
2439 | fprintf (dump_file, "Aligned load, but unsupported type."); | |
2440 | return false; | |
2441 | } | |
2442 | ||
2443 | if (!aligned_access_p (dr)) | |
2444 | { | |
2445 | if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing | |
2446 | && (!targetm.vectorize.builtin_mask_for_load | |
2447 | || targetm.vectorize.builtin_mask_for_load ())) | |
2448 | software_pipeline_loads_p = true; | |
2449 | else if (!targetm.vectorize.misaligned_mem_ok (mode)) | |
2450 | { | |
a023975e OG |
2451 | /* Possibly unaligned access, and can't software pipeline the loads. |
2452 | */ | |
7ccf35ed DN |
2453 | if (vect_debug_details (loop)) |
2454 | fprintf (dump_file, "Arbitrary load not supported."); | |
2455 | return false; | |
2456 | } | |
2457 | } | |
79fe1b3b DN |
2458 | |
2459 | if (!vec_stmt) /* transformation not required. */ | |
2460 | { | |
2461 | STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; | |
2462 | return true; | |
2463 | } | |
2464 | ||
2465 | /** Trasform. **/ | |
2466 | ||
2467 | if (vect_debug_details (NULL)) | |
2468 | fprintf (dump_file, "transform load."); | |
2469 | ||
7ccf35ed DN |
2470 | if (!software_pipeline_loads_p) |
2471 | { | |
2472 | /* Create: | |
2473 | p = initial_addr; | |
2474 | indx = 0; | |
2475 | loop { | |
2476 | vec_dest = *(p); | |
2477 | indx = indx + 1; | |
2478 | } | |
2479 | */ | |
2480 | ||
2481 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2482 | data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); | |
2483 | if (aligned_access_p (dr)) | |
2484 | data_ref = build_fold_indirect_ref (data_ref); | |
2485 | else | |
2486 | { | |
2487 | int mis = DR_MISALIGNMENT (dr); | |
2488 | tree tmis = (mis == -1 ? | |
2489 | integer_zero_node : | |
2490 | build_int_cst (integer_type_node, mis)); | |
2491 | tmis = int_const_binop (MULT_EXPR, tmis, | |
2492 | build_int_cst (integer_type_node, BITS_PER_UNIT), 1); | |
2493 | data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis); | |
2494 | } | |
2495 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); | |
2496 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
2497 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
2498 | vect_finish_stmt_generation (stmt, new_stmt, bsi); | |
2499 | } | |
2500 | else /* software-pipeline the loads */ | |
2501 | { | |
2502 | /* Create: | |
2503 | p1 = initial_addr; | |
2504 | msq_init = *(floor(p1)) | |
2505 | p2 = initial_addr + VS - 1; | |
2506 | magic = have_builtin ? builtin_result : initial_address; | |
2507 | indx = 0; | |
2508 | loop { | |
2509 | p2' = p2 + indx * vectype_size | |
2510 | lsq = *(floor(p2')) | |
2511 | vec_dest = realign_load (msq, lsq, magic) | |
2512 | indx = indx + 1; | |
2513 | msq = lsq; | |
2514 | } | |
2515 | */ | |
2516 | ||
2517 | tree offset; | |
2518 | tree magic; | |
2519 | tree phi_stmt; | |
2520 | tree msq_init; | |
2521 | tree msq, lsq; | |
2522 | tree dataref_ptr; | |
2523 | tree params; | |
2524 | ||
2525 | /* <1> Create msq_init = *(floor(p1)) in the loop preheader */ | |
2526 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2527 | data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, | |
2528 | &init_addr, true); | |
2529 | data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref); | |
2530 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); | |
2531 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
2532 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
2533 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); | |
2534 | gcc_assert (!new_bb); | |
2535 | msq_init = TREE_OPERAND (new_stmt, 0); | |
2536 | ||
2537 | ||
2538 | /* <2> Create lsq = *(floor(p2')) in the loop */ | |
2539 | offset = build_int_cst (integer_type_node, | |
2540 | GET_MODE_NUNITS (TYPE_MODE (vectype))); | |
2541 | offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1); | |
2542 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2543 | dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false); | |
2544 | data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); | |
2545 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); | |
2546 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
2547 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
2548 | vect_finish_stmt_generation (stmt, new_stmt, bsi); | |
2549 | lsq = TREE_OPERAND (new_stmt, 0); | |
2550 | ||
2551 | ||
2552 | /* <3> */ | |
2553 | if (targetm.vectorize.builtin_mask_for_load) | |
2554 | { | |
2555 | /* Create permutation mask, if required, in loop preheader. */ | |
2556 | tree builtin_decl; | |
2557 | params = build_tree_list (NULL_TREE, init_addr); | |
2558 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2559 | builtin_decl = targetm.vectorize.builtin_mask_for_load (); | |
2560 | new_stmt = build_function_call_expr (builtin_decl, params); | |
2561 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); | |
2562 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
2563 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
2564 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); | |
2565 | gcc_assert (!new_bb); | |
2566 | magic = TREE_OPERAND (new_stmt, 0); | |
2567 | } | |
2568 | else | |
2569 | { | |
a023975e OG |
2570 | /* Use current address instead of init_addr for reduced reg pressure. |
2571 | */ | |
7ccf35ed DN |
2572 | magic = dataref_ptr; |
2573 | } | |
79fe1b3b | 2574 | |
79fe1b3b | 2575 | |
7ccf35ed DN |
2576 | /* <4> Create msq = phi <msq_init, lsq> in loop */ |
2577 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2578 | msq = make_ssa_name (vec_dest, NULL_TREE); | |
2579 | phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */ | |
2580 | SSA_NAME_DEF_STMT (msq) = phi_stmt; | |
2581 | add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop)); | |
2582 | add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop)); | |
2583 | ||
79fe1b3b | 2584 | |
a023975e OG |
2585 | /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */ |
2586 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2587 | new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic); | |
2588 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); | |
2589 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
2590 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
2591 | vect_finish_stmt_generation (stmt, new_stmt, bsi); | |
2592 | } | |
2593 | ||
2594 | *vec_stmt = new_stmt; | |
2595 | return true; | |
2596 | } | |
2597 | ||
2598 | ||
2599 | /* Function vect_transform_stmt. | |
2600 | ||
2601 | Create a vectorized stmt to replace STMT, and insert it at BSI. */ | |
2602 | ||
2603 | static bool | |
2604 | vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) | |
2605 | { | |
2606 | bool is_store = false; | |
2607 | tree vec_stmt = NULL_TREE; | |
2608 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2609 | bool done; | |
2610 | ||
2611 | switch (STMT_VINFO_TYPE (stmt_info)) | |
2612 | { | |
2613 | case op_vec_info_type: | |
2614 | done = vectorizable_operation (stmt, bsi, &vec_stmt); | |
2615 | gcc_assert (done); | |
2616 | break; | |
2617 | ||
2618 | case assignment_vec_info_type: | |
2619 | done = vectorizable_assignment (stmt, bsi, &vec_stmt); | |
2620 | gcc_assert (done); | |
2621 | break; | |
2622 | ||
2623 | case load_vec_info_type: | |
2624 | done = vectorizable_load (stmt, bsi, &vec_stmt); | |
2625 | gcc_assert (done); | |
2626 | break; | |
2627 | ||
2628 | case store_vec_info_type: | |
2629 | done = vectorizable_store (stmt, bsi, &vec_stmt); | |
2630 | gcc_assert (done); | |
2631 | is_store = true; | |
2632 | break; | |
2633 | default: | |
2634 | if (vect_debug_details (NULL)) | |
2635 | fprintf (dump_file, "stmt not supported."); | |
2636 | gcc_unreachable (); | |
2637 | } | |
2638 | ||
2639 | STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; | |
2640 | ||
2641 | return is_store; | |
2642 | } | |
2643 | ||
2644 | ||
2645 | /* This function builds ni_name = number of iterations loop executes | |
2646 | on the loop preheader. */ | |
2647 | ||
2648 | static tree | |
2649 | vect_build_loop_niters (loop_vec_info loop_vinfo) | |
2650 | { | |
2651 | tree ni_name, stmt, var; | |
2652 | edge pe; | |
2653 | basic_block new_bb; | |
2654 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2655 | tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo)); | |
2656 | ||
2657 | var = create_tmp_var (TREE_TYPE (ni), "niters"); | |
2658 | add_referenced_tmp_var (var); | |
2659 | if (TREE_CODE (ni) == INTEGER_CST) | |
2660 | { | |
2661 | /* This case is generated when treating a known loop bound | |
2662 | indivisible by VF. Here we cannot use force_gimple_operand. */ | |
2663 | stmt = build (MODIFY_EXPR, void_type_node, var, ni); | |
2664 | ni_name = make_ssa_name (var, stmt); | |
2665 | TREE_OPERAND (stmt, 0) = ni_name; | |
2666 | } | |
2667 | else | |
2668 | ni_name = force_gimple_operand (ni, &stmt, false, var); | |
2669 | ||
2670 | pe = loop_preheader_edge (loop); | |
2671 | new_bb = bsi_insert_on_edge_immediate (pe, stmt); | |
2672 | if (new_bb) | |
2673 | add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father); | |
2674 | ||
2675 | return ni_name; | |
2676 | } | |
2677 | ||
2678 | ||
2679 | /* This function generates the following statements: | |
2680 | ||
2681 | ni_name = number of iterations loop executes | |
2682 | ratio = ni_name / vf | |
2683 | ratio_mult_vf_name = ratio * vf | |
2684 | ||
2685 | and places them at the loop preheader edge. */ | |
2686 | ||
2687 | static void | |
2688 | vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p, | |
2689 | tree *ratio_mult_vf_name_p, tree *ratio_p) | |
2690 | { | |
2691 | ||
2692 | edge pe; | |
2693 | basic_block new_bb; | |
2694 | tree stmt, ni_name; | |
2695 | tree ratio; | |
2696 | tree ratio_mult_vf_name, ratio_mult_vf; | |
2697 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2698 | tree ni = LOOP_VINFO_NITERS(loop_vinfo); | |
2699 | ||
2700 | int vf, i; | |
2701 | ||
2702 | /* Generate temporary variable that contains | |
2703 | number of iterations loop executes. */ | |
2704 | ||
2705 | ni_name = vect_build_loop_niters (loop_vinfo); | |
2706 | ||
2707 | /* ratio = ni / vf. | |
2708 | vf is power of 2; then if ratio = = n >> log2 (vf). */ | |
2709 | vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
2710 | ratio = vect_build_symbol_bound (ni_name, vf, loop); | |
2711 | ||
2712 | /* Update initial conditions of loop copy. */ | |
2713 | ||
2714 | /* ratio_mult_vf = ratio * vf; | |
2715 | then if ratio_mult_vf = ratio << log2 (vf). */ | |
2716 | ||
2717 | i = exact_log2 (vf); | |
2718 | ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); | |
2719 | add_referenced_tmp_var (ratio_mult_vf); | |
2720 | ||
2721 | ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE); | |
2722 | ||
2723 | stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name, | |
2724 | build2 (LSHIFT_EXPR, TREE_TYPE (ratio), | |
2725 | ratio, build_int_cst (unsigned_type_node, | |
2726 | i))); | |
2727 | ||
2728 | SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt; | |
2729 | ||
2730 | pe = loop_preheader_edge (loop); | |
2731 | new_bb = bsi_insert_on_edge_immediate (pe, stmt); | |
2732 | if (new_bb) | |
2733 | add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father); | |
2734 | ||
2735 | *ni_name_p = ni_name; | |
2736 | *ratio_mult_vf_name_p = ratio_mult_vf_name; | |
2737 | *ratio_p = ratio; | |
2738 | ||
2739 | return; | |
2740 | } | |
2741 | ||
2742 | ||
2743 | /* This function generates stmt | |
2744 | ||
2745 | tmp = n / vf; | |
2746 | ||
2747 | and attaches it to preheader of LOOP. */ | |
2748 | ||
2749 | static tree | |
2750 | vect_build_symbol_bound (tree n, int vf, struct loop * loop) | |
2751 | { | |
2752 | tree var, stmt, var_name; | |
2753 | edge pe; | |
2754 | basic_block new_bb; | |
2755 | int i; | |
2756 | ||
2757 | /* create temporary variable */ | |
2758 | var = create_tmp_var (TREE_TYPE (n), "bnd"); | |
2759 | add_referenced_tmp_var (var); | |
2760 | ||
2761 | var_name = make_ssa_name (var, NULL_TREE); | |
2762 | ||
2763 | /* vf is power of 2; then n/vf = n >> log2 (vf). */ | |
2764 | ||
2765 | i = exact_log2 (vf); | |
2766 | stmt = build2 (MODIFY_EXPR, void_type_node, var_name, | |
2767 | build2 (RSHIFT_EXPR, TREE_TYPE (n), | |
2768 | n, build_int_cst (unsigned_type_node,i))); | |
2769 | ||
2770 | SSA_NAME_DEF_STMT (var_name) = stmt; | |
2771 | ||
2772 | pe = loop_preheader_edge (loop); | |
2773 | new_bb = bsi_insert_on_edge_immediate (pe, stmt); | |
2774 | if (new_bb) | |
2775 | add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father); | |
2776 | else | |
2777 | if (vect_debug_details (NULL)) | |
2778 | fprintf (dump_file, "New bb on preheader edge was not generated."); | |
2779 | ||
2780 | return var_name; | |
2781 | } | |
2782 | ||
2783 | ||
2784 | /* Function vect_transform_loop_bound. | |
2785 | ||
2786 | Create a new exit condition for the loop. */ | |
2787 | ||
2788 | static void | |
2789 | vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters) | |
2790 | { | |
2791 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2792 | edge exit_edge = loop->single_exit; | |
2793 | block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src); | |
2794 | tree indx_before_incr, indx_after_incr; | |
2795 | tree orig_cond_expr; | |
2796 | HOST_WIDE_INT old_N = 0; | |
2797 | int vf; | |
2798 | tree cond_stmt; | |
2799 | tree new_loop_bound; | |
2800 | bool symbol_niters; | |
2801 | tree cond; | |
2802 | tree lb_type; | |
2803 | ||
2804 | symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo); | |
2805 | ||
2806 | if (!symbol_niters) | |
2807 | old_N = LOOP_VINFO_INT_NITERS (loop_vinfo); | |
2808 | ||
2809 | vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
2810 | ||
2811 | orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo); | |
2812 | #ifdef ENABLE_CHECKING | |
2813 | gcc_assert (orig_cond_expr); | |
2814 | #endif | |
2815 | gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi)); | |
2816 | ||
2817 | create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, | |
2818 | &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr); | |
2819 | ||
2820 | /* bsi_insert is using BSI_NEW_STMT. We need to bump it back | |
2821 | to point to the exit condition. */ | |
2822 | bsi_next (&loop_exit_bsi); | |
2823 | gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr); | |
2824 | ||
2825 | /* new loop exit test: */ | |
2826 | lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1)); | |
2827 | if (!symbol_niters) | |
2828 | new_loop_bound = fold_convert (lb_type, | |
2829 | build_int_cst (unsigned_type_node, | |
2830 | old_N/vf)); | |
2831 | else | |
2832 | new_loop_bound = niters; | |
2833 | ||
2834 | if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ | |
2835 | cond = build2 (GE_EXPR, boolean_type_node, | |
2836 | indx_after_incr, new_loop_bound); | |
2837 | else /* 'then' edge loops back. */ | |
2838 | cond = build2 (LT_EXPR, boolean_type_node, | |
2839 | indx_after_incr, new_loop_bound); | |
2840 | ||
2841 | cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond, | |
2842 | TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2)); | |
2843 | ||
2844 | bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT); | |
2845 | ||
2846 | /* remove old loop exit test: */ | |
2847 | bsi_remove (&loop_exit_bsi); | |
2848 | ||
2849 | if (vect_debug_details (NULL)) | |
2850 | print_generic_expr (dump_file, cond_stmt, TDF_SLIM); | |
2851 | } | |
2852 | ||
2853 | ||
2854 | /* Advance IVs of the loop (to be vectorized later) to correct position. | |
2855 | ||
2856 | When loop is vectorized, its IVs are not always advanced | |
2857 | correctly since vectorization changes the loop count. It's ok | |
2858 | in case epilog loop was not produced after original one before | |
2859 | vectorization process (the vectorizer checks that there is no uses | |
2860 | of IVs after the loop). However, in case the epilog loop was peeled, | |
2861 | IVs from original loop are used in epilog loop and should be | |
2862 | advanced correctly. | |
2863 | ||
2864 | Here we use access functions of IVs and number of | |
2865 | iteration loop executes in order to bring IVs to correct position. | |
2866 | ||
2867 | Function also update phis of basic block at the exit | |
2868 | from the loop. */ | |
2869 | ||
2870 | static void | |
2871 | vect_update_ivs_after_vectorizer (struct loop *loop, tree niters) | |
2872 | { | |
2873 | edge exit = loop->exit_edges[0]; | |
2874 | tree phi; | |
2875 | edge latch = loop_latch_edge (loop); | |
2876 | ||
2877 | /* Generate basic block at the exit from the loop. */ | |
2878 | basic_block new_bb = split_edge (exit); | |
2879 | add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father); | |
2880 | ||
2881 | loop->exit_edges[0] = EDGE_PRED (new_bb, 0); | |
2882 | ||
2883 | for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi)) | |
2884 | { | |
2885 | tree access_fn = NULL; | |
2886 | tree evolution_part; | |
2887 | tree init_expr; | |
2888 | tree step_expr; | |
2889 | tree var, stmt, ni, ni_name; | |
2890 | int i, j, num_elem1, num_elem2; | |
2891 | tree phi1; | |
2892 | block_stmt_iterator last_bsi; | |
2893 | ||
2894 | /* Skip virtual phi's. The data dependences that are associated with | |
2895 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |
2896 | ||
2897 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
2898 | { | |
2899 | if (vect_debug_details (NULL)) | |
2900 | fprintf (dump_file, "virtual phi. skip."); | |
2901 | continue; | |
2902 | } | |
2903 | ||
2904 | access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); | |
2905 | ||
2906 | evolution_part = evolution_part_in_loop_num (access_fn, loop->num); | |
2907 | ||
2908 | /* FORNOW: We do not transform initial conditions of IVs | |
2909 | which evolution functions are a polynomial of degree >= 2 or | |
2910 | exponential. */ | |
2911 | ||
2912 | step_expr = evolution_part; | |
2913 | init_expr = initial_condition (access_fn); | |
2914 | ||
2915 | ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr), | |
2916 | build2 (MULT_EXPR, TREE_TYPE (niters), | |
2917 | niters, step_expr), init_expr); | |
2918 | ||
2919 | var = create_tmp_var (TREE_TYPE (init_expr), "tmp"); | |
2920 | add_referenced_tmp_var (var); | |
2921 | ||
2922 | ni_name = force_gimple_operand (ni, &stmt, false, var); | |
2923 | ||
2924 | /* Insert stmt into new_bb. */ | |
2925 | last_bsi = bsi_last (new_bb); | |
2926 | bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT); | |
2927 | ||
2928 | /* Fix phi expressions in duplicated loop. */ | |
2929 | num_elem1 = PHI_NUM_ARGS (phi); | |
2930 | for (i = 0; i < num_elem1; i++) | |
2931 | if (PHI_ARG_EDGE (phi, i) == latch) | |
2932 | { | |
2933 | tree def = PHI_ARG_DEF (phi, i); | |
2934 | ||
2935 | for (phi1 = phi_nodes (EDGE_SUCC (new_bb, 0)->dest); phi1; | |
2936 | phi1 = TREE_CHAIN (phi1)) | |
2937 | { | |
2938 | num_elem2 = PHI_NUM_ARGS (phi1); | |
2939 | for (j = 0; j < num_elem2; j++) | |
2940 | if (PHI_ARG_DEF (phi1, j) == def) | |
2941 | { | |
2942 | SET_PHI_ARG_DEF (phi1, j, ni_name); | |
2943 | PHI_ARG_EDGE (phi1, j) = EDGE_SUCC (new_bb, 0); | |
2944 | break; | |
2945 | } | |
2946 | } | |
2947 | break; | |
2948 | } | |
2949 | } | |
2950 | ||
2951 | } | |
2952 | ||
2953 | ||
ed8b7558 | 2954 | /* This function is the main driver of transformation |
a023975e OG |
2955 | to be done for loop before vectorizing it in case of |
2956 | unknown loop bound. */ | |
2957 | ||
2958 | static void | |
2959 | vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio, | |
2960 | struct loops *loops) | |
2961 | { | |
2962 | ||
2963 | tree ni_name, ratio_mult_vf_name; | |
2964 | #ifdef ENABLE_CHECKING | |
2965 | int loop_num; | |
2966 | #endif | |
2967 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2968 | struct loop *new_loop; | |
2969 | ||
2970 | if (vect_debug_details (NULL)) | |
2971 | fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n"); | |
2972 | ||
2973 | /* Generate the following variables on the preheader of original loop: | |
2974 | ||
2975 | ni_name = number of iteration the original loop executes | |
2976 | ratio = ni_name / vf | |
2977 | ratio_mult_vf_name = ratio * vf */ | |
2978 | vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, | |
2979 | &ratio_mult_vf_name, ratio); | |
2980 | ||
2981 | /* Update loop info. */ | |
2982 | loop->pre_header = loop_preheader_edge (loop)->src; | |
2983 | loop->pre_header_edges[0] = loop_preheader_edge (loop); | |
2984 | ||
2985 | #ifdef ENABLE_CHECKING | |
2986 | loop_num = loop->num; | |
2987 | #endif | |
2988 | new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0], | |
2989 | ratio_mult_vf_name, ni_name, true); | |
2990 | #ifdef ENABLE_CHECKING | |
2991 | gcc_assert (new_loop); | |
2992 | gcc_assert (loop_num == loop->num); | |
2993 | #endif | |
2994 | ||
2995 | /* Update IVs of original loop as if they were advanced | |
2996 | by ratio_mult_vf_name steps. */ | |
2997 | ||
2998 | #ifdef ENABLE_CHECKING | |
2999 | /* Check existence of intermediate bb. */ | |
3000 | gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header); | |
3001 | #endif | |
3002 | vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name); | |
3003 | ||
3004 | return; | |
7ccf35ed | 3005 | |
79fe1b3b DN |
3006 | } |
3007 | ||
3008 | ||
a023975e | 3009 | /* Function vect_gen_niters_for_prolog_loop |
79fe1b3b | 3010 | |
a023975e OG |
3011 | Set the number of iterations for the loop represented by LOOP_VINFO |
3012 | to the minimum between NITERS (the original iteration count of the loop) | |
3013 | and the misalignment DR - the first data reference in the list | |
3014 | LOOP_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of this | |
3015 | loop, the data reference DR will refer to an aligned location. */ | |
79fe1b3b | 3016 | |
a023975e OG |
3017 | static tree |
3018 | vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters) | |
79fe1b3b | 3019 | { |
a023975e OG |
3020 | struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0); |
3021 | int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
3022 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3023 | tree var, stmt; | |
3024 | tree iters, iters_name; | |
3025 | edge pe; | |
3026 | basic_block new_bb; | |
3027 | tree dr_stmt = DR_STMT (dr); | |
3028 | stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); | |
3029 | tree start_addr, byte_miss_align, elem_miss_align; | |
3030 | int vec_type_align = | |
3031 | GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info))) | |
3032 | / BITS_PER_UNIT; | |
3033 | tree tmp1, tmp2; | |
3034 | tree new_stmt_list = NULL_TREE; | |
79fe1b3b | 3035 | |
a023975e OG |
3036 | start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, |
3037 | &new_stmt_list, NULL_TREE); | |
3038 | ||
3039 | pe = loop_preheader_edge (loop); | |
3040 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); | |
3041 | if (new_bb) | |
3042 | add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father); | |
3043 | ||
3044 | byte_miss_align = | |
3045 | build (BIT_AND_EXPR, integer_type_node, start_addr, | |
3046 | build (MINUS_EXPR, integer_type_node, | |
3047 | build_int_cst (unsigned_type_node, | |
3048 | vec_type_align), integer_one_node)); | |
3049 | tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf); | |
3050 | elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node, | |
3051 | byte_miss_align, tmp1); | |
3052 | ||
3053 | tmp2 = | |
3054 | build (BIT_AND_EXPR, integer_type_node, | |
3055 | build (MINUS_EXPR, integer_type_node, | |
3056 | build_int_cst (unsigned_type_node, vf), elem_miss_align), | |
3057 | build (MINUS_EXPR, integer_type_node, | |
3058 | build_int_cst (unsigned_type_node, vf), integer_one_node)); | |
3059 | ||
3060 | iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters); | |
3061 | var = create_tmp_var (TREE_TYPE (iters), "iters"); | |
3062 | add_referenced_tmp_var (var); | |
3063 | iters_name = force_gimple_operand (iters, &stmt, false, var); | |
3064 | ||
3065 | /* Insert stmt on loop preheader edge. */ | |
3066 | pe = loop_preheader_edge (loop); | |
3067 | new_bb = bsi_insert_on_edge_immediate (pe, stmt); | |
3068 | if (new_bb) | |
3069 | add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father); | |
79fe1b3b | 3070 | |
a023975e OG |
3071 | return iters_name; |
3072 | } | |
79fe1b3b | 3073 | |
79fe1b3b | 3074 | |
a023975e | 3075 | /* Function vect_update_niters_after_peeling |
79fe1b3b | 3076 | |
a023975e OG |
3077 | NITERS iterations were peeled from the loop represented by LOOP_VINFO. |
3078 | The new number of iterations is therefore original_niters - NITERS. | |
3079 | Record the new number of iterations in LOOP_VINFO. */ | |
79fe1b3b | 3080 | |
a023975e OG |
3081 | static void |
3082 | vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters) | |
3083 | { | |
3084 | tree n_iters = LOOP_VINFO_NITERS (loop_vinfo); | |
3085 | LOOP_VINFO_NITERS (loop_vinfo) = | |
3086 | build (MINUS_EXPR, integer_type_node, n_iters, niters); | |
79fe1b3b DN |
3087 | } |
3088 | ||
3089 | ||
a023975e | 3090 | /* Function vect_update_inits_of_dr |
79fe1b3b | 3091 | |
a023975e OG |
3092 | NITERS iterations were peeled from LOOP. DR represents a data reference |
3093 | in LOOP. This function updates the information recorded in DR to | |
3094 | account for the fact that the first NITERS iterations had already been | |
3095 | executed. Specifically, it updates the initial_condition of the | |
3096 | access_function of DR. */ | |
79fe1b3b DN |
3097 | |
3098 | static void | |
a023975e OG |
3099 | vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop, |
3100 | tree niters) | |
79fe1b3b | 3101 | { |
a023975e OG |
3102 | tree access_fn = DR_ACCESS_FN (dr, 0); |
3103 | tree init, init_new, step; | |
3104 | ||
3105 | step = evolution_part_in_loop_num (access_fn, loop->num); | |
3106 | init = initial_condition (access_fn); | |
3107 | ||
3108 | init_new = build (PLUS_EXPR, TREE_TYPE (init), | |
3109 | build (MULT_EXPR, TREE_TYPE (niters), | |
3110 | niters, step), init); | |
3111 | DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new); | |
3112 | ||
3113 | return; | |
3114 | } | |
79fe1b3b | 3115 | |
79fe1b3b | 3116 | |
a023975e | 3117 | /* Function vect_update_inits_of_drs |
79fe1b3b | 3118 | |
a023975e OG |
3119 | NITERS iterations were peeled from the loop represented by LOOP_VINFO. |
3120 | This function updates the information recorded for the data references in | |
3121 | the loop to account for the fact that the first NITERS iterations had | |
3122 | already been executed. Specifically, it updates the initial_condition of the | |
3123 | access_function of all the data_references in the loop. */ | |
79fe1b3b | 3124 | |
a023975e OG |
3125 | static void |
3126 | vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) | |
3127 | { | |
3128 | unsigned int i; | |
3129 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
3130 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
3131 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
79fe1b3b | 3132 | |
a023975e OG |
3133 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3134 | fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n"); | |
79fe1b3b | 3135 | |
a023975e OG |
3136 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) |
3137 | { | |
3138 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
3139 | vect_update_inits_of_dr (dr, loop, niters); | |
3140 | } | |
79fe1b3b | 3141 | |
a023975e OG |
3142 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) |
3143 | { | |
3144 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
3145 | vect_update_inits_of_dr (dr, loop, niters); | |
3146 | DR_MISALIGNMENT (dr) = -1; | |
3147 | } | |
3148 | } | |
79fe1b3b | 3149 | |
79fe1b3b | 3150 | |
a023975e | 3151 | /* Function vect_do_peeling_for_alignment |
79fe1b3b | 3152 | |
a023975e OG |
3153 | Peel the first 'niters' iterations of the loop represented by LOOP_VINFO. |
3154 | 'niters' is set to the misalignment of one of the data references in the | |
3155 | loop, thereby forcing it to refer to an aligned location at the beginning | |
3156 | of the execution of this loop. The data reference for which we are | |
3157 | peeling is chosen from LOOP_UNALIGNED_DR. */ | |
3158 | ||
3159 | static void | |
3160 | vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) | |
3161 | { | |
3162 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3163 | tree niters_of_prolog_loop, ni_name; | |
3164 | struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0); | |
79fe1b3b DN |
3165 | |
3166 | if (vect_debug_details (NULL)) | |
a023975e OG |
3167 | fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n"); |
3168 | ||
3169 | ni_name = vect_build_loop_niters (loop_vinfo); | |
3170 | niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); | |
3171 | ||
3172 | ||
3173 | /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ | |
3174 | tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop), | |
3175 | niters_of_prolog_loop, ni_name, false); | |
3176 | ||
3177 | ||
3178 | /* Update stmt info of dr according to which we peeled. */ | |
3179 | DR_MISALIGNMENT (dr) = 0; | |
3180 | ||
3181 | /* Update number of times loop executes. */ | |
3182 | vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop); | |
3183 | ||
3184 | /* Update all inits of access functions of all data refs. */ | |
3185 | vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); | |
3186 | ||
3187 | /* After peeling we have to reset scalar evolution analyzer. */ | |
3188 | scev_reset (); | |
3189 | ||
3190 | return; | |
79fe1b3b DN |
3191 | } |
3192 | ||
3193 | ||
3194 | /* Function vect_transform_loop. | |
3195 | ||
3196 | The analysis phase has determined that the loop is vectorizable. | |
3197 | Vectorize the loop - created vectorized stmts to replace the scalar | |
3198 | stmts in the loop, and update the loop exit condition. */ | |
3199 | ||
3200 | static void | |
3201 | vect_transform_loop (loop_vec_info loop_vinfo, | |
3202 | struct loops *loops ATTRIBUTE_UNUSED) | |
3203 | { | |
3204 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3205 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
3206 | int nbbs = loop->num_nodes; | |
3207 | block_stmt_iterator si; | |
3208 | int i; | |
a023975e | 3209 | tree ratio = NULL; |
79fe1b3b | 3210 | int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); |
79fe1b3b DN |
3211 | |
3212 | if (vect_debug_details (NULL)) | |
3213 | fprintf (dump_file, "\n<<vec_transform_loop>>\n"); | |
3214 | ||
a023975e OG |
3215 | |
3216 | /* Peel the loop if there are data refs with unknown alignment. | |
3217 | Only one data ref with unknown store is allowed. */ | |
3218 | ||
3219 | ||
3220 | if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo)) | |
3221 | vect_do_peeling_for_alignment (loop_vinfo, loops); | |
3222 | ||
3223 | /* If the loop has a symbolic number of iterations 'n' | |
3224 | (i.e. it's not a compile time constant), | |
3225 | then an epilog loop needs to be created. We therefore duplicate | |
3226 | the initial loop. The original loop will be vectorized, and will compute | |
3227 | the first (n/VF) iterations. The second copy of the loop will remain | |
3228 | serial and will compute the remaining (n%VF) iterations. | |
3229 | (VF is the vectorization factor). */ | |
3230 | ||
3231 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | |
3232 | vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops); | |
3233 | ||
3234 | /* FORNOW: we'll treat the case where niters is constant and | |
3235 | ||
3236 | niters % vf != 0 | |
3237 | ||
3238 | in the way similar to one with symbolic niters. | |
3239 | For this we'll generate variable which value is equal to niters. */ | |
3240 | ||
3241 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
3242 | && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)) | |
3243 | vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops); | |
3244 | ||
3245 | ||
79fe1b3b DN |
3246 | /* 1) Make sure the loop header has exactly two entries |
3247 | 2) Make sure we have a preheader basic block. */ | |
3248 | ||
628f6a4e | 3249 | gcc_assert (EDGE_COUNT (loop->header->preds) == 2); |
79fe1b3b DN |
3250 | |
3251 | loop_split_edge_with (loop_preheader_edge (loop), NULL); | |
3252 | ||
3253 | ||
3254 | /* FORNOW: the vectorizer supports only loops which body consist | |
3255 | of one basic block (header + empty latch). When the vectorizer will | |
3256 | support more involved loop forms, the order by which the BBs are | |
3257 | traversed need to be reconsidered. */ | |
3258 | ||
3259 | for (i = 0; i < nbbs; i++) | |
3260 | { | |
3261 | basic_block bb = bbs[i]; | |
3262 | ||
3263 | for (si = bsi_start (bb); !bsi_end_p (si);) | |
3264 | { | |
3265 | tree stmt = bsi_stmt (si); | |
3266 | stmt_vec_info stmt_info; | |
3267 | bool is_store; | |
79fe1b3b DN |
3268 | |
3269 | if (vect_debug_details (NULL)) | |
3270 | { | |
3271 | fprintf (dump_file, "------>vectorizing statement: "); | |
3272 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3273 | } | |
3274 | stmt_info = vinfo_for_stmt (stmt); | |
1e128c5f | 3275 | gcc_assert (stmt_info); |
79fe1b3b DN |
3276 | if (!STMT_VINFO_RELEVANT_P (stmt_info)) |
3277 | { | |
3278 | bsi_next (&si); | |
3279 | continue; | |
3280 | } | |
3281 | #ifdef ENABLE_CHECKING | |
3282 | /* FORNOW: Verify that all stmts operate on the same number of | |
3283 | units and no inner unrolling is necessary. */ | |
f0923257 | 3284 | gcc_assert (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info))) |
1e128c5f | 3285 | == vectorization_factor); |
79fe1b3b DN |
3286 | #endif |
3287 | /* -------- vectorize statement ------------ */ | |
3288 | if (vect_debug_details (NULL)) | |
3289 | fprintf (dump_file, "transform statement."); | |
3290 | ||
3291 | is_store = vect_transform_stmt (stmt, &si); | |
3292 | if (is_store) | |
3293 | { | |
3294 | /* free the attached stmt_vec_info and remove the stmt. */ | |
3295 | stmt_ann_t ann = stmt_ann (stmt); | |
3296 | free (stmt_info); | |
3297 | set_stmt_info (ann, NULL); | |
3298 | bsi_remove (&si); | |
3299 | continue; | |
3300 | } | |
3301 | ||
3302 | bsi_next (&si); | |
3303 | } /* stmts in BB */ | |
3304 | } /* BBs in loop */ | |
3305 | ||
a023975e | 3306 | vect_transform_loop_bound (loop_vinfo, ratio); |
79fe1b3b DN |
3307 | |
3308 | if (vect_debug_details (loop)) | |
3309 | fprintf (dump_file,"Success! loop vectorized."); | |
3310 | if (vect_debug_stats (loop)) | |
3311 | fprintf (dump_file, "LOOP VECTORIZED."); | |
3312 | } | |
3313 | ||
3314 | ||
3315 | /* Function vect_is_simple_use. | |
3316 | ||
3317 | Input: | |
3318 | LOOP - the loop that is being vectorized. | |
3319 | OPERAND - operand of a stmt in LOOP. | |
3320 | DEF - the defining stmt in case OPERAND is an SSA_NAME. | |
3321 | ||
3322 | Returns whether a stmt with OPERAND can be vectorized. | |
3323 | Supportable operands are constants, loop invariants, and operands that are | |
6cb38cd4 | 3324 | defined by the current iteration of the loop. Unsupportable operands are |
79fe1b3b DN |
3325 | those that are defined by a previous iteration of the loop (as is the case |
3326 | in reduction/induction computations). */ | |
3327 | ||
3328 | static bool | |
3329 | vect_is_simple_use (tree operand, struct loop *loop, tree *def) | |
3330 | { | |
3331 | tree def_stmt; | |
3332 | basic_block bb; | |
3333 | ||
3334 | if (def) | |
3335 | *def = NULL_TREE; | |
3336 | ||
3337 | if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) | |
3338 | return true; | |
3339 | ||
3340 | if (TREE_CODE (operand) != SSA_NAME) | |
3341 | return false; | |
3342 | ||
3343 | def_stmt = SSA_NAME_DEF_STMT (operand); | |
3344 | if (def_stmt == NULL_TREE ) | |
3345 | { | |
3346 | if (vect_debug_details (NULL)) | |
3347 | fprintf (dump_file, "no def_stmt."); | |
3348 | return false; | |
3349 | } | |
3350 | ||
3351 | /* empty stmt is expected only in case of a function argument. | |
3352 | (Otherwise - we expect a phi_node or a modify_expr). */ | |
3353 | if (IS_EMPTY_STMT (def_stmt)) | |
3354 | { | |
3355 | tree arg = TREE_OPERAND (def_stmt, 0); | |
3356 | if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST) | |
3357 | return true; | |
3358 | if (vect_debug_details (NULL)) | |
3359 | { | |
3360 | fprintf (dump_file, "Unexpected empty stmt: "); | |
3361 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
3362 | } | |
3363 | return false; | |
3364 | } | |
3365 | ||
3366 | /* phi_node inside the loop indicates an induction/reduction pattern. | |
3367 | This is not supported yet. */ | |
3368 | bb = bb_for_stmt (def_stmt); | |
3369 | if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) | |
3370 | { | |
3371 | if (vect_debug_details (NULL)) | |
3372 | fprintf (dump_file, "reduction/induction - unsupported."); | |
3373 | return false; /* FORNOW: not supported yet. */ | |
3374 | } | |
3375 | ||
3376 | /* Expecting a modify_expr or a phi_node. */ | |
3377 | if (TREE_CODE (def_stmt) == MODIFY_EXPR | |
3378 | || TREE_CODE (def_stmt) == PHI_NODE) | |
3379 | { | |
3380 | if (def) | |
3381 | *def = def_stmt; | |
3382 | return true; | |
3383 | } | |
3384 | ||
3385 | return false; | |
3386 | } | |
3387 | ||
3388 | ||
3389 | /* Function vect_analyze_operations. | |
3390 | ||
3391 | Scan the loop stmts and make sure they are all vectorizable. */ | |
3392 | ||
3393 | static bool | |
3394 | vect_analyze_operations (loop_vec_info loop_vinfo) | |
3395 | { | |
3396 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3397 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
3398 | int nbbs = loop->num_nodes; | |
3399 | block_stmt_iterator si; | |
3400 | int vectorization_factor = 0; | |
3401 | int i; | |
3402 | bool ok; | |
3403 | tree scalar_type; | |
3404 | ||
3405 | if (vect_debug_details (NULL)) | |
3406 | fprintf (dump_file, "\n<<vect_analyze_operations>>\n"); | |
3407 | ||
3408 | for (i = 0; i < nbbs; i++) | |
3409 | { | |
3410 | basic_block bb = bbs[i]; | |
3411 | ||
3412 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
3413 | { | |
3414 | tree stmt = bsi_stmt (si); | |
3415 | int nunits; | |
3416 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
3417 | tree vectype; | |
3418 | ||
3419 | if (vect_debug_details (NULL)) | |
3420 | { | |
3421 | fprintf (dump_file, "==> examining statement: "); | |
3422 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3423 | } | |
1e128c5f GB |
3424 | |
3425 | gcc_assert (stmt_info); | |
3426 | ||
79fe1b3b DN |
3427 | /* skip stmts which do not need to be vectorized. |
3428 | this is expected to include: | |
3429 | - the COND_EXPR which is the loop exit condition | |
3430 | - any LABEL_EXPRs in the loop | |
3431 | - computations that are used only for array indexing or loop | |
3432 | control */ | |
3433 | ||
3434 | if (!STMT_VINFO_RELEVANT_P (stmt_info)) | |
3435 | { | |
3436 | if (vect_debug_details (NULL)) | |
3437 | fprintf (dump_file, "irrelevant."); | |
3438 | continue; | |
3439 | } | |
3440 | ||
3441 | if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) | |
3442 | { | |
3443 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3444 | { | |
3445 | fprintf (dump_file, "not vectorized: vector stmt in loop:"); | |
3446 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3447 | } | |
3448 | return false; | |
3449 | } | |
3450 | ||
3451 | if (STMT_VINFO_DATA_REF (stmt_info)) | |
3452 | scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); | |
3453 | else if (TREE_CODE (stmt) == MODIFY_EXPR) | |
3454 | scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); | |
3455 | else | |
3456 | scalar_type = TREE_TYPE (stmt); | |
3457 | ||
3458 | if (vect_debug_details (NULL)) | |
3459 | { | |
3460 | fprintf (dump_file, "get vectype for scalar type: "); | |
3461 | print_generic_expr (dump_file, scalar_type, TDF_SLIM); | |
3462 | } | |
3463 | ||
3464 | vectype = get_vectype_for_scalar_type (scalar_type); | |
3465 | if (!vectype) | |
3466 | { | |
3467 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3468 | { | |
3469 | fprintf (dump_file, "not vectorized: unsupported data-type "); | |
3470 | print_generic_expr (dump_file, scalar_type, TDF_SLIM); | |
3471 | } | |
3472 | return false; | |
3473 | } | |
3474 | ||
3475 | if (vect_debug_details (NULL)) | |
3476 | { | |
3477 | fprintf (dump_file, "vectype: "); | |
3478 | print_generic_expr (dump_file, vectype, TDF_SLIM); | |
3479 | } | |
3480 | STMT_VINFO_VECTYPE (stmt_info) = vectype; | |
3481 | ||
3482 | ok = (vectorizable_operation (stmt, NULL, NULL) | |
3483 | || vectorizable_assignment (stmt, NULL, NULL) | |
3484 | || vectorizable_load (stmt, NULL, NULL) | |
3485 | || vectorizable_store (stmt, NULL, NULL)); | |
3486 | ||
3487 | if (!ok) | |
3488 | { | |
3489 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3490 | { | |
3491 | fprintf (dump_file, "not vectorized: stmt not supported: "); | |
3492 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3493 | } | |
3494 | return false; | |
3495 | } | |
3496 | ||
3497 | nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); | |
3498 | if (vect_debug_details (NULL)) | |
3499 | fprintf (dump_file, "nunits = %d", nunits); | |
3500 | ||
3501 | if (vectorization_factor) | |
3502 | { | |
3503 | /* FORNOW: don't allow mixed units. | |
3504 | This restriction will be relaxed in the future. */ | |
3505 | if (nunits != vectorization_factor) | |
3506 | { | |
3507 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3508 | fprintf (dump_file, "not vectorized: mixed data-types"); | |
3509 | return false; | |
3510 | } | |
3511 | } | |
3512 | else | |
3513 | vectorization_factor = nunits; | |
f0923257 DN |
3514 | |
3515 | #ifdef ENABLE_CHECKING | |
3516 | gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type)) | |
3517 | * vectorization_factor == UNITS_PER_SIMD_WORD); | |
3518 | #endif | |
79fe1b3b DN |
3519 | } |
3520 | } | |
3521 | ||
3522 | /* TODO: Analyze cost. Decide if worth while to vectorize. */ | |
f0923257 DN |
3523 | |
3524 | if (vectorization_factor <= 1) | |
79fe1b3b DN |
3525 | { |
3526 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3527 | fprintf (dump_file, "not vectorized: unsupported data-type"); | |
3528 | return false; | |
3529 | } | |
3530 | LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; | |
3531 | ||
a023975e OG |
3532 | |
3533 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
3534 | && vect_debug_details (NULL)) | |
79fe1b3b DN |
3535 | fprintf (dump_file, |
3536 | "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, | |
a023975e | 3537 | vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo)); |
79fe1b3b | 3538 | |
a023975e OG |
3539 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
3540 | && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) | |
79fe1b3b | 3541 | { |
a023975e OG |
3542 | /* In this case we have to generate epilog loop, that |
3543 | can be done only for loops with one entry edge. */ | |
3544 | if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1 | |
3545 | || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header)) | |
3546 | { | |
3547 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3548 | fprintf (dump_file, "not vectorized: more than one entry."); | |
3549 | return false; | |
3550 | } | |
79fe1b3b | 3551 | } |
a023975e | 3552 | |
79fe1b3b DN |
3553 | return true; |
3554 | } | |
3555 | ||
3556 | ||
3557 | /* Function exist_non_indexing_operands_for_use_p | |
3558 | ||
3559 | USE is one of the uses attached to STMT. Check if USE is | |
3560 | used in STMT for anything other than indexing an array. */ | |
3561 | ||
3562 | static bool | |
3563 | exist_non_indexing_operands_for_use_p (tree use, tree stmt) | |
3564 | { | |
3565 | tree operand; | |
3566 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
3567 | ||
3568 | /* USE corresponds to some operand in STMT. If there is no data | |
3569 | reference in STMT, then any operand that corresponds to USE | |
3570 | is not indexing an array. */ | |
3571 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
3572 | return true; | |
3573 | ||
3574 | /* STMT has a data_ref. FORNOW this means that its of one of | |
3575 | the following forms: | |
3576 | -1- ARRAY_REF = var | |
3577 | -2- var = ARRAY_REF | |
3578 | (This should have been verified in analyze_data_refs). | |
3579 | ||
3580 | 'var' in the second case corresponds to a def, not a use, | |
3581 | so USE cannot correspond to any operands that are not used | |
3582 | for array indexing. | |
3583 | ||
3584 | Therefore, all we need to check is if STMT falls into the | |
3585 | first case, and whether var corresponds to USE. */ | |
3586 | ||
3587 | if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME) | |
3588 | return false; | |
3589 | ||
3590 | operand = TREE_OPERAND (stmt, 1); | |
3591 | ||
3592 | if (TREE_CODE (operand) != SSA_NAME) | |
3593 | return false; | |
3594 | ||
3595 | if (operand == use) | |
3596 | return true; | |
3597 | ||
3598 | return false; | |
3599 | } | |
3600 | ||
3601 | ||
3602 | /* Function vect_is_simple_iv_evolution. | |
3603 | ||
3604 | FORNOW: A simple evolution of an induction variables in the loop is | |
3605 | considered a polynomial evolution with constant step. */ | |
3606 | ||
3607 | static bool | |
3608 | vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, | |
3609 | tree * step, bool strict) | |
3610 | { | |
3611 | tree init_expr; | |
3612 | tree step_expr; | |
3613 | ||
3614 | tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); | |
3615 | ||
3616 | /* When there is no evolution in this loop, the evolution function | |
3617 | is not "simple". */ | |
3618 | if (evolution_part == NULL_TREE) | |
3619 | return false; | |
3620 | ||
3621 | /* When the evolution is a polynomial of degree >= 2 | |
3622 | the evolution function is not "simple". */ | |
3623 | if (tree_is_chrec (evolution_part)) | |
3624 | return false; | |
3625 | ||
3626 | step_expr = evolution_part; | |
a023975e | 3627 | init_expr = unshare_expr (initial_condition (access_fn)); |
79fe1b3b DN |
3628 | |
3629 | if (vect_debug_details (NULL)) | |
3630 | { | |
3631 | fprintf (dump_file, "step: "); | |
3632 | print_generic_expr (dump_file, step_expr, TDF_SLIM); | |
3633 | fprintf (dump_file, ", init: "); | |
3634 | print_generic_expr (dump_file, init_expr, TDF_SLIM); | |
3635 | } | |
3636 | ||
3637 | *init = init_expr; | |
3638 | *step = step_expr; | |
3639 | ||
3640 | if (TREE_CODE (step_expr) != INTEGER_CST) | |
3641 | { | |
3642 | if (vect_debug_details (NULL)) | |
3643 | fprintf (dump_file, "step unknown."); | |
3644 | return false; | |
3645 | } | |
3646 | ||
3647 | if (strict) | |
3648 | if (!integer_onep (step_expr)) | |
3649 | { | |
3650 | if (vect_debug_details (NULL)) | |
3651 | print_generic_expr (dump_file, step_expr, TDF_SLIM); | |
3652 | return false; | |
3653 | } | |
3654 | ||
3655 | return true; | |
3656 | } | |
3657 | ||
3658 | ||
3659 | /* Function vect_analyze_scalar_cycles. | |
3660 | ||
3661 | Examine the cross iteration def-use cycles of scalar variables, by | |
3662 | analyzing the loop (scalar) PHIs; verify that the cross iteration def-use | |
3663 | cycles that they represent do not impede vectorization. | |
3664 | ||
3665 | FORNOW: Reduction as in the following loop, is not supported yet: | |
3666 | loop1: | |
3667 | for (i=0; i<N; i++) | |
3668 | sum += a[i]; | |
3669 | The cross-iteration cycle corresponding to variable 'sum' will be | |
3670 | considered too complicated and will impede vectorization. | |
3671 | ||
3672 | FORNOW: Induction as in the following loop, is not supported yet: | |
3673 | loop2: | |
3674 | for (i=0; i<N; i++) | |
3675 | a[i] = i; | |
3676 | ||
3677 | However, the following loop *is* vectorizable: | |
3678 | loop3: | |
3679 | for (i=0; i<N; i++) | |
3680 | a[i] = b[i]; | |
3681 | ||
3682 | In both loops there exists a def-use cycle for the variable i: | |
3683 | loop: i_2 = PHI (i_0, i_1) | |
3684 | a[i_2] = ...; | |
3685 | i_1 = i_2 + 1; | |
3686 | GOTO loop; | |
3687 | ||
3688 | The evolution of the above cycle is considered simple enough, | |
3689 | however, we also check that the cycle does not need to be | |
3690 | vectorized, i.e - we check that the variable that this cycle | |
3691 | defines is only used for array indexing or in stmts that do not | |
3692 | need to be vectorized. This is not the case in loop2, but it | |
3693 | *is* the case in loop3. */ | |
3694 | ||
3695 | static bool | |
3696 | vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) | |
3697 | { | |
3698 | tree phi; | |
3699 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3700 | basic_block bb = loop->header; | |
3701 | tree dummy; | |
3702 | ||
3703 | if (vect_debug_details (NULL)) | |
3704 | fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n"); | |
3705 | ||
3706 | for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) | |
3707 | { | |
3708 | tree access_fn = NULL; | |
3709 | ||
3710 | if (vect_debug_details (NULL)) | |
3711 | { | |
3712 | fprintf (dump_file, "Analyze phi: "); | |
3713 | print_generic_expr (dump_file, phi, TDF_SLIM); | |
3714 | } | |
3715 | ||
3716 | /* Skip virtual phi's. The data dependences that are associated with | |
3717 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |
3718 | ||
3719 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
3720 | { | |
3721 | if (vect_debug_details (NULL)) | |
3722 | fprintf (dump_file, "virtual phi. skip."); | |
3723 | continue; | |
3724 | } | |
3725 | ||
3726 | /* Analyze the evolution function. */ | |
3727 | ||
3728 | /* FORNOW: The only scalar cross-iteration cycles that we allow are | |
3729 | those of loop induction variables; This property is verified here. | |
3730 | ||
3731 | Furthermore, if that induction variable is used in an operation | |
3732 | that needs to be vectorized (i.e, is not solely used to index | |
3733 | arrays and check the exit condition) - we do not support its | |
3734 | vectorization yet. This property is verified in vect_is_simple_use, | |
3735 | during vect_analyze_operations. */ | |
3736 | ||
6775f1f3 IR |
3737 | access_fn = /* instantiate_parameters |
3738 | (loop,*/ | |
3739 | analyze_scalar_evolution (loop, PHI_RESULT (phi)); | |
79fe1b3b DN |
3740 | |
3741 | if (!access_fn) | |
3742 | { | |
3743 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3744 | fprintf (dump_file, "not vectorized: unsupported scalar cycle."); | |
3745 | return false; | |
3746 | } | |
3747 | ||
3748 | if (vect_debug_details (NULL)) | |
3749 | { | |
3750 | fprintf (dump_file, "Access function of PHI: "); | |
3751 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
3752 | } | |
3753 | ||
3754 | if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, | |
3755 | &dummy, false)) | |
3756 | { | |
3757 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3758 | fprintf (dump_file, "not vectorized: unsupported scalar cycle."); | |
3759 | return false; | |
3760 | } | |
3761 | } | |
3762 | ||
3763 | return true; | |
3764 | } | |
3765 | ||
3766 | ||
3767 | /* Function vect_analyze_data_ref_dependence. | |
3768 | ||
3769 | Return TRUE if there (might) exist a dependence between a memory-reference | |
3770 | DRA and a memory-reference DRB. */ | |
3771 | ||
3772 | static bool | |
3773 | vect_analyze_data_ref_dependence (struct data_reference *dra, | |
3774 | struct data_reference *drb, | |
3775 | struct loop *loop) | |
3776 | { | |
6775f1f3 | 3777 | bool differ_p; |
79fe1b3b | 3778 | struct data_dependence_relation *ddr; |
6775f1f3 | 3779 | |
79fe1b3b DN |
3780 | if (!array_base_name_differ_p (dra, drb, &differ_p)) |
3781 | { | |
6775f1f3 | 3782 | if (vect_debug_stats (loop) || vect_debug_details (loop)) |
79fe1b3b | 3783 | { |
6775f1f3 IR |
3784 | fprintf (dump_file, |
3785 | "not vectorized: can't determine dependence between: "); | |
79fe1b3b DN |
3786 | print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM); |
3787 | fprintf (dump_file, " and "); | |
3788 | print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM); | |
3789 | } | |
3790 | return true; | |
3791 | } | |
3792 | ||
3793 | if (differ_p) | |
3794 | return false; | |
3795 | ||
3796 | ddr = initialize_data_dependence_relation (dra, drb); | |
3797 | compute_affine_dependence (ddr); | |
3798 | ||
3799 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) | |
3800 | return false; | |
3801 | ||
3802 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3803 | { | |
3804 | fprintf (dump_file, | |
3805 | "not vectorized: possible dependence between data-refs "); | |
3806 | print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM); | |
3807 | fprintf (dump_file, " and "); | |
3808 | print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM); | |
3809 | } | |
3810 | ||
3811 | return true; | |
3812 | } | |
3813 | ||
3814 | ||
3815 | /* Function vect_analyze_data_ref_dependences. | |
3816 | ||
3817 | Examine all the data references in the loop, and make sure there do not | |
3818 | exist any data dependences between them. | |
3819 | ||
3820 | TODO: dependences which distance is greater than the vectorization factor | |
3821 | can be ignored. */ | |
3822 | ||
3823 | static bool | |
3824 | vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo) | |
3825 | { | |
3826 | unsigned int i, j; | |
3827 | varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
3828 | varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
3829 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3830 | ||
3831 | /* Examine store-store (output) dependences. */ | |
3832 | ||
3833 | if (vect_debug_details (NULL)) | |
3834 | fprintf (dump_file, "\n<<vect_analyze_dependences>>\n"); | |
3835 | ||
3836 | if (vect_debug_details (NULL)) | |
3837 | fprintf (dump_file, "compare all store-store pairs."); | |
3838 | ||
3839 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++) | |
3840 | { | |
3841 | for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) | |
3842 | { | |
3843 | struct data_reference *dra = | |
3844 | VARRAY_GENERIC_PTR (loop_write_refs, i); | |
3845 | struct data_reference *drb = | |
3846 | VARRAY_GENERIC_PTR (loop_write_refs, j); | |
3847 | if (vect_analyze_data_ref_dependence (dra, drb, loop)) | |
3848 | return false; | |
3849 | } | |
3850 | } | |
3851 | ||
3852 | /* Examine load-store (true/anti) dependences. */ | |
3853 | ||
3854 | if (vect_debug_details (NULL)) | |
3855 | fprintf (dump_file, "compare all load-store pairs."); | |
3856 | ||
3857 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++) | |
3858 | { | |
3859 | for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) | |
3860 | { | |
3861 | struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i); | |
3862 | struct data_reference *drb = | |
3863 | VARRAY_GENERIC_PTR (loop_write_refs, j); | |
3864 | if (vect_analyze_data_ref_dependence (dra, drb, loop)) | |
3865 | return false; | |
3866 | } | |
3867 | } | |
3868 | ||
3869 | return true; | |
3870 | } | |
3871 | ||
3872 | ||
3873 | /* Function vect_get_first_index. | |
3874 | ||
3875 | REF is a data reference. | |
3876 | If it is an ARRAY_REF: if its lower bound is simple enough, | |
3877 | put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE. | |
3878 | If it is not an ARRAY_REF: REF has no "first index"; | |
3879 | ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */ | |
3880 | ||
3881 | static bool | |
3882 | vect_get_first_index (tree ref, tree *array_first_index) | |
3883 | { | |
3884 | tree array_start; | |
3885 | ||
3886 | if (TREE_CODE (ref) != ARRAY_REF) | |
3887 | *array_first_index = size_zero_node; | |
3888 | else | |
3889 | { | |
3890 | array_start = array_ref_low_bound (ref); | |
3891 | if (!host_integerp (array_start,0)) | |
3892 | { | |
3893 | if (vect_debug_details (NULL)) | |
3894 | { | |
3895 | fprintf (dump_file, "array min val not simple integer cst."); | |
3896 | print_generic_expr (dump_file, array_start, TDF_DETAILS); | |
3897 | } | |
3898 | return false; | |
3899 | } | |
3900 | *array_first_index = array_start; | |
3901 | } | |
3902 | ||
3903 | return true; | |
3904 | } | |
3905 | ||
3906 | ||
6775f1f3 IR |
3907 | /* Function vect_compute_array_base_alignment. |
3908 | A utility function of vect_compute_array_ref_alignment. | |
3909 | ||
3910 | Compute the misalignment of ARRAY in bits. | |
3911 | ||
3912 | Input: | |
3913 | ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE. | |
d4a9b3a3 | 3914 | VECTYPE - we are interested in the misalignment modulo the size of vectype. |
6775f1f3 IR |
3915 | if NULL: don't compute misalignment, just return the base of ARRAY. |
3916 | PREV_DIMENSIONS - initialized to one. | |
3917 | MISALIGNMENT - the computed misalignment in bits. | |
3918 | ||
3919 | Output: | |
3920 | If VECTYPE is not NULL: | |
3921 | Return NULL_TREE if the misalignment cannot be computed. Otherwise, return | |
3922 | the base of the array, and put the computed misalignment in MISALIGNMENT. | |
3923 | If VECTYPE is NULL: | |
3924 | Return the base of the array. | |
3925 | ||
3926 | For a[idx_N]...[idx_2][idx_1][idx_0], the address of | |
3927 | a[idx_N]...[idx_2][idx_1] is | |
3928 | {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ... | |
3929 | ... + idx_N * dim_0 * ... * dim_N-1}. | |
3930 | (The misalignment of &a is not checked here). | |
3931 | Note, that every term contains dim_0, therefore, if dim_0 is a | |
3932 | multiple of NUNITS, the whole sum is a multiple of NUNITS. | |
3933 | Otherwise, if idx_1 is constant, and dim_1 is a multiple of | |
3934 | NUINTS, we can say that the misalignment of the sum is equal to | |
3935 | the misalignment of {idx_1 * dim_0}. If idx_1 is not constant, | |
3936 | we can't determine this array misalignment, and we return | |
3937 | false. | |
3938 | We proceed recursively in this manner, accumulating total misalignment | |
3939 | and the multiplication of previous dimensions for correct misalignment | |
3940 | calculation. */ | |
3941 | ||
3942 | static tree | |
3943 | vect_compute_array_base_alignment (tree array, | |
3944 | tree vectype, | |
3945 | tree *prev_dimensions, | |
3946 | tree *misalignment) | |
3947 | { | |
3948 | tree index; | |
3949 | tree domain; | |
3950 | tree dimension_size; | |
3951 | tree mis; | |
3952 | tree bits_per_vectype; | |
3953 | tree bits_per_vectype_unit; | |
3954 | ||
3955 | /* The 'stop condition' of the recursion. */ | |
3956 | if (TREE_CODE (array) != ARRAY_REF) | |
3957 | return array; | |
3958 | ||
3959 | if (!vectype) | |
3960 | /* Just get the base decl. */ | |
3961 | return vect_compute_array_base_alignment | |
3962 | (TREE_OPERAND (array, 0), NULL, NULL, NULL); | |
3963 | ||
3964 | if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) || | |
3965 | !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions)) | |
3966 | return NULL_TREE; | |
3967 | ||
3968 | domain = TYPE_DOMAIN (TREE_TYPE (array)); | |
3969 | dimension_size = | |
3970 | int_const_binop (PLUS_EXPR, | |
3971 | int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain), | |
3972 | TYPE_MIN_VALUE (domain), 1), | |
3973 | size_one_node, 1); | |
3974 | ||
3975 | /* Check if the dimension size is a multiple of NUNITS, the remaining sum | |
3976 | is a multiple of NUNITS: | |
3977 | ||
3978 | dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ? | |
3979 | */ | |
3980 | mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size, | |
3981 | build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1); | |
3982 | if (integer_zerop (mis)) | |
3983 | /* This array is aligned. Continue just in order to get the base decl. */ | |
3984 | return vect_compute_array_base_alignment | |
3985 | (TREE_OPERAND (array, 0), NULL, NULL, NULL); | |
3986 | ||
3987 | index = TREE_OPERAND (array, 1); | |
3988 | if (!host_integerp (index, 1)) | |
3989 | /* The current index is not constant. */ | |
3990 | return NULL_TREE; | |
3991 | ||
3992 | index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0); | |
3993 | ||
3994 | bits_per_vectype = fold_convert (unsigned_type_node, | |
3995 | build_int_cst (NULL_TREE, BITS_PER_UNIT * | |
3996 | GET_MODE_SIZE (TYPE_MODE (vectype)))); | |
3997 | bits_per_vectype_unit = fold_convert (unsigned_type_node, | |
3998 | build_int_cst (NULL_TREE, BITS_PER_UNIT * | |
3999 | GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype))))); | |
4000 | ||
4001 | /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed | |
4002 | earlier: | |
4003 | ||
4004 | *misalignment = | |
4005 | (*misalignment + index_val * dimension_size * *prev_dimensions) | |
4006 | % vectype_nunits; | |
4007 | */ | |
4008 | ||
4009 | mis = int_const_binop (MULT_EXPR, index, dimension_size, 1); | |
4010 | mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1); | |
4011 | mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1); | |
4012 | mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1); | |
4013 | *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1); | |
4014 | ||
4015 | ||
4016 | *prev_dimensions = int_const_binop (MULT_EXPR, | |
4017 | *prev_dimensions, dimension_size, 1); | |
4018 | ||
4019 | return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype, | |
4020 | prev_dimensions, | |
4021 | misalignment); | |
4022 | } | |
4023 | ||
4024 | ||
79fe1b3b DN |
4025 | /* Function vect_compute_data_ref_alignment |
4026 | ||
4027 | Compute the misalignment of the data reference DR. | |
4028 | ||
6775f1f3 IR |
4029 | Output: |
4030 | 1. If during the misalignment computation it is found that the data reference | |
4031 | cannot be vectorized then false is returned. | |
4032 | 2. DR_MISALIGNMENT (DR) is defined. | |
4033 | ||
79fe1b3b DN |
4034 | FOR NOW: No analysis is actually performed. Misalignment is calculated |
4035 | only for trivial cases. TODO. */ | |
4036 | ||
6775f1f3 | 4037 | static bool |
79fe1b3b | 4038 | vect_compute_data_ref_alignment (struct data_reference *dr, |
6775f1f3 | 4039 | loop_vec_info loop_vinfo) |
79fe1b3b DN |
4040 | { |
4041 | tree stmt = DR_STMT (dr); | |
6775f1f3 | 4042 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
79fe1b3b DN |
4043 | tree ref = DR_REF (dr); |
4044 | tree vectype; | |
79fe1b3b | 4045 | tree scalar_type; |
79fe1b3b | 4046 | tree offset = size_zero_node; |
6775f1f3 IR |
4047 | tree base, bit_offset, alignment; |
4048 | tree unit_bits = fold_convert (unsigned_type_node, | |
4049 | build_int_cst (NULL_TREE, BITS_PER_UNIT)); | |
4050 | tree dr_base; | |
4051 | bool base_aligned_p; | |
4052 | ||
79fe1b3b DN |
4053 | if (vect_debug_details (NULL)) |
4054 | fprintf (dump_file, "vect_compute_data_ref_alignment:"); | |
4055 | ||
4056 | /* Initialize misalignment to unknown. */ | |
4057 | DR_MISALIGNMENT (dr) = -1; | |
4058 | ||
4059 | scalar_type = TREE_TYPE (ref); | |
4060 | vectype = get_vectype_for_scalar_type (scalar_type); | |
4061 | if (!vectype) | |
4062 | { | |
4063 | if (vect_debug_details (NULL)) | |
4064 | { | |
4065 | fprintf (dump_file, "no vectype for stmt: "); | |
4066 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
6775f1f3 | 4067 | fprintf (dump_file, " scalar_type: "); |
79fe1b3b DN |
4068 | print_generic_expr (dump_file, scalar_type, TDF_DETAILS); |
4069 | } | |
6775f1f3 IR |
4070 | /* It is not possible to vectorize this data reference. */ |
4071 | return false; | |
79fe1b3b | 4072 | } |
6775f1f3 IR |
4073 | gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF); |
4074 | ||
4075 | if (TREE_CODE (ref) == ARRAY_REF) | |
4076 | dr_base = ref; | |
4077 | else | |
4078 | dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info); | |
79fe1b3b | 4079 | |
6775f1f3 IR |
4080 | base = vect_get_base_and_bit_offset (dr, dr_base, vectype, |
4081 | loop_vinfo, &bit_offset, &base_aligned_p); | |
4082 | if (!base) | |
79fe1b3b | 4083 | { |
6775f1f3 | 4084 | if (vect_debug_details (NULL)) |
79fe1b3b | 4085 | { |
6775f1f3 IR |
4086 | fprintf (dump_file, "Unknown alignment for access: "); |
4087 | print_generic_expr (dump_file, | |
4088 | STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM); | |
79fe1b3b | 4089 | } |
6775f1f3 IR |
4090 | return true; |
4091 | } | |
79fe1b3b | 4092 | |
6775f1f3 IR |
4093 | if (!base_aligned_p) |
4094 | { | |
4095 | if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))) | |
79fe1b3b DN |
4096 | { |
4097 | if (vect_debug_details (NULL)) | |
6775f1f3 IR |
4098 | { |
4099 | fprintf (dump_file, "can't force alignment of ref: "); | |
4100 | print_generic_expr (dump_file, ref, TDF_SLIM); | |
4101 | } | |
4102 | return true; | |
79fe1b3b | 4103 | } |
6775f1f3 IR |
4104 | |
4105 | /* Force the alignment of the decl. | |
4106 | NOTE: This is the only change to the code we make during | |
4107 | the analysis phase, before deciding to vectorize the loop. */ | |
4108 | if (vect_debug_details (NULL)) | |
4109 | fprintf (dump_file, "force alignment"); | |
4110 | DECL_ALIGN (base) = TYPE_ALIGN (vectype); | |
4111 | DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype); | |
4112 | } | |
79fe1b3b | 4113 | |
6775f1f3 IR |
4114 | /* At this point we assume that the base is aligned, and the offset from it |
4115 | (including index, if relevant) has been computed and is in BIT_OFFSET. */ | |
4116 | gcc_assert (base_aligned_p | |
4117 | || (TREE_CODE (base) == VAR_DECL | |
4118 | && DECL_ALIGN (base) >= TYPE_ALIGN (vectype))); | |
4119 | ||
4120 | /* Convert into bytes. */ | |
4121 | offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1); | |
4122 | /* Check that there is no remainder in bits. */ | |
4123 | bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1); | |
4124 | if (!integer_zerop (bit_offset)) | |
4125 | { | |
4126 | if (vect_debug_details (NULL)) | |
79fe1b3b | 4127 | { |
6775f1f3 IR |
4128 | fprintf (dump_file, "bit offset alignment: "); |
4129 | print_generic_expr (dump_file, bit_offset, TDF_SLIM); | |
79fe1b3b | 4130 | } |
6775f1f3 IR |
4131 | return false; |
4132 | } | |
4133 | ||
4134 | /* Alignment required, in bytes: */ | |
4135 | alignment = fold_convert (unsigned_type_node, | |
4136 | build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT)); | |
79fe1b3b | 4137 | |
6775f1f3 IR |
4138 | /* Modulo alignment. */ |
4139 | offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0); | |
4140 | if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset)) | |
4141 | { | |
4142 | if (vect_debug_details (NULL)) | |
4143 | fprintf (dump_file, "unexpected misalign value"); | |
4144 | return false; | |
79fe1b3b DN |
4145 | } |
4146 | ||
6775f1f3 | 4147 | DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1); |
79fe1b3b | 4148 | |
6775f1f3 IR |
4149 | if (vect_debug_details (NULL)) |
4150 | fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr)); | |
4151 | ||
4152 | return true; | |
4153 | } | |
4154 | ||
4155 | ||
4156 | /* Function vect_compute_array_ref_alignment | |
4157 | ||
4158 | Compute the alignment of an array-ref. | |
4159 | The alignment we compute here is relative to | |
4160 | TYPE_ALIGN(VECTYPE) boundary. | |
4161 | ||
4162 | Output: | |
4163 | OFFSET - the alignment in bits | |
4164 | Return value - the base of the array-ref. E.g, | |
4165 | if the array-ref is a.b[k].c[i][j] the returned | |
4166 | base is a.b[k].c | |
4167 | */ | |
4168 | ||
4169 | static tree | |
4170 | vect_compute_array_ref_alignment (struct data_reference *dr, | |
4171 | loop_vec_info loop_vinfo, | |
4172 | tree vectype, | |
4173 | tree *offset) | |
4174 | { | |
4175 | tree array_first_index = size_zero_node; | |
4176 | tree init; | |
4177 | tree ref = DR_REF (dr); | |
4178 | tree scalar_type = TREE_TYPE (ref); | |
4179 | tree oprnd0 = TREE_OPERAND (ref, 0); | |
4180 | tree dims = size_one_node; | |
4181 | tree misalign = size_zero_node; | |
4182 | tree next_ref, this_offset = size_zero_node; | |
4183 | tree nunits; | |
4184 | tree nbits; | |
4185 | ||
4186 | if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE) | |
a023975e OG |
4187 | /* The reference is an array without its last index. */ |
4188 | next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, | |
4189 | &misalign); | |
6775f1f3 | 4190 | else |
a023975e OG |
4191 | next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims, |
4192 | &misalign); | |
6775f1f3 IR |
4193 | if (!vectype) |
4194 | /* Alignment is not requested. Just return the base. */ | |
4195 | return next_ref; | |
4196 | ||
4197 | /* Compute alignment. */ | |
4198 | if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref) | |
4199 | return NULL_TREE; | |
4200 | this_offset = misalign; | |
4201 | ||
4202 | /* Check the first index accessed. */ | |
79fe1b3b DN |
4203 | if (!vect_get_first_index (ref, &array_first_index)) |
4204 | { | |
4205 | if (vect_debug_details (NULL)) | |
4206 | fprintf (dump_file, "no first_index for array."); | |
6775f1f3 | 4207 | return NULL_TREE; |
79fe1b3b | 4208 | } |
79fe1b3b | 4209 | |
6775f1f3 IR |
4210 | /* Check the index of the array_ref. */ |
4211 | init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0), | |
4212 | LOOP_VINFO_LOOP (loop_vinfo)->num); | |
79fe1b3b | 4213 | |
6775f1f3 IR |
4214 | /* FORNOW: In order to simplify the handling of alignment, we make sure |
4215 | that the first location at which the array is accessed ('init') is on an | |
79fe1b3b | 4216 | 'NUNITS' boundary, since we are assuming here that 'array base' is aligned. |
6775f1f3 IR |
4217 | This is too conservative, since we require that |
4218 | both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of | |
79fe1b3b DN |
4219 | NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}. |
4220 | This should be relaxed in the future. */ | |
4221 | ||
6775f1f3 | 4222 | if (!init || !host_integerp (init, 0)) |
79fe1b3b DN |
4223 | { |
4224 | if (vect_debug_details (NULL)) | |
6775f1f3 IR |
4225 | fprintf (dump_file, "non constant init. "); |
4226 | return NULL_TREE; | |
79fe1b3b DN |
4227 | } |
4228 | ||
79fe1b3b | 4229 | /* bytes per scalar element: */ |
6775f1f3 IR |
4230 | nunits = fold_convert (unsigned_type_node, |
4231 | build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type)))); | |
4232 | nbits = int_const_binop (MULT_EXPR, nunits, | |
4233 | build_int_cst (NULL_TREE, BITS_PER_UNIT), 1); | |
79fe1b3b | 4234 | |
6775f1f3 | 4235 | /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */ |
79fe1b3b | 4236 | misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0); |
6775f1f3 IR |
4237 | misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0); |
4238 | misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0); | |
79fe1b3b | 4239 | |
6775f1f3 IR |
4240 | /* TODO: allow negative misalign values. */ |
4241 | if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign)) | |
79fe1b3b DN |
4242 | { |
4243 | if (vect_debug_details (NULL)) | |
6775f1f3 IR |
4244 | fprintf (dump_file, "unexpected misalign value"); |
4245 | return NULL_TREE; | |
79fe1b3b | 4246 | } |
6775f1f3 IR |
4247 | *offset = misalign; |
4248 | return next_ref; | |
79fe1b3b DN |
4249 | } |
4250 | ||
4251 | ||
4252 | /* Function vect_compute_data_refs_alignment | |
4253 | ||
4254 | Compute the misalignment of data references in the loop. | |
4255 | This pass may take place at function granularity instead of at loop | |
4256 | granularity. | |
4257 | ||
4258 | FOR NOW: No analysis is actually performed. Misalignment is calculated | |
4259 | only for trivial cases. TODO. */ | |
4260 | ||
4261 | static void | |
4262 | vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) | |
4263 | { | |
4264 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
4265 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
4266 | unsigned int i; | |
6775f1f3 | 4267 | |
79fe1b3b DN |
4268 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) |
4269 | { | |
4270 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
4271 | vect_compute_data_ref_alignment (dr, loop_vinfo); | |
4272 | } | |
4273 | ||
4274 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
4275 | { | |
4276 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
4277 | vect_compute_data_ref_alignment (dr, loop_vinfo); | |
4278 | } | |
4279 | } | |
4280 | ||
4281 | ||
4282 | /* Function vect_enhance_data_refs_alignment | |
4283 | ||
4284 | This pass will use loop versioning and loop peeling in order to enhance | |
4285 | the alignment of data references in the loop. | |
4286 | ||
4287 | FOR NOW: we assume that whatever versioning/peeling takes place, only the | |
4288 | original loop is to be vectorized; Any other loops that are created by | |
4289 | the transformations performed in this pass - are not supposed to be | |
4290 | vectorized. This restriction will be relaxed. | |
4291 | ||
4292 | FOR NOW: No transformation is actually performed. TODO. */ | |
4293 | ||
4294 | static void | |
7ccf35ed | 4295 | vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED) |
79fe1b3b DN |
4296 | { |
4297 | /* | |
4298 | This pass will require a cost model to guide it whether to apply peeling | |
4299 | or versioning or a combination of the two. For example, the scheme that | |
4300 | intel uses when given a loop with several memory accesses, is as follows: | |
4301 | choose one memory access ('p') which alignment you want to force by doing | |
4302 | peeling. Then, either (1) generate a loop in which 'p' is aligned and all | |
4303 | other accesses are not necessarily aligned, or (2) use loop versioning to | |
4304 | generate one loop in which all accesses are aligned, and another loop in | |
4305 | which only 'p' is necessarily aligned. | |
4306 | ||
4307 | ("Automatic Intra-Register Vectorization for the Intel Architecture", | |
4308 | Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International | |
4309 | Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) | |
4310 | ||
4311 | Devising a cost model is the most critical aspect of this work. It will | |
4312 | guide us on which access to peel for, whether to use loop versioning, how | |
4313 | many versions to create, etc. The cost model will probably consist of | |
4314 | generic considerations as well as target specific considerations (on | |
4315 | powerpc for example, misaligned stores are more painful than misaligned | |
4316 | loads). | |
4317 | ||
4318 | Here is the general steps involved in alignment enhancements: | |
4319 | ||
4320 | -- original loop, before alignment analysis: | |
4321 | for (i=0; i<N; i++){ | |
4322 | x = q[i]; # DR_MISALIGNMENT(q) = unknown | |
4323 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
4324 | } | |
4325 | ||
4326 | -- After vect_compute_data_refs_alignment: | |
4327 | for (i=0; i<N; i++){ | |
4328 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
4329 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
4330 | } | |
4331 | ||
4332 | -- Possibility 1: we do loop versioning: | |
4333 | if (p is aligned) { | |
4334 | for (i=0; i<N; i++){ # loop 1A | |
4335 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
4336 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | |
4337 | } | |
4338 | } | |
4339 | else { | |
4340 | for (i=0; i<N; i++){ # loop 1B | |
4341 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
4342 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | |
4343 | } | |
4344 | } | |
4345 | ||
4346 | -- Possibility 2: we do loop peeling: | |
4347 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | |
4348 | x = q[i]; | |
4349 | p[i] = y; | |
4350 | } | |
4351 | for (i = 3; i < N; i++){ # loop 2A | |
4352 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
4353 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
4354 | } | |
4355 | ||
4356 | -- Possibility 3: combination of loop peeling and versioning: | |
4357 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | |
4358 | x = q[i]; | |
4359 | p[i] = y; | |
4360 | } | |
4361 | if (p is aligned) { | |
4362 | for (i = 3; i<N; i++){ # loop 3A | |
4363 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
4364 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | |
4365 | } | |
4366 | } | |
4367 | else { | |
4368 | for (i = 3; i<N; i++){ # loop 3B | |
4369 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
4370 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | |
4371 | } | |
4372 | } | |
4373 | ||
4374 | These loops are later passed to loop_transform to be vectorized. The | |
4375 | vectorizer will use the alignment information to guide the transformation | |
4376 | (whether to generate regular loads/stores, or with special handling for | |
4377 | misalignment). | |
4378 | */ | |
4379 | } | |
4380 | ||
4381 | ||
4382 | /* Function vect_analyze_data_refs_alignment | |
4383 | ||
4384 | Analyze the alignment of the data-references in the loop. | |
4385 | FOR NOW: Until support for misliagned accesses is in place, only if all | |
4386 | accesses are aligned can the loop be vectorized. This restriction will be | |
4387 | relaxed. */ | |
4388 | ||
4389 | static bool | |
4390 | vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) | |
4391 | { | |
4392 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
a023975e | 4393 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
7ccf35ed DN |
4394 | /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/ |
4395 | ||
79fe1b3b | 4396 | unsigned int i; |
a023975e | 4397 | unsigned int decide_peeling_count = 0; |
79fe1b3b DN |
4398 | |
4399 | if (vect_debug_details (NULL)) | |
4400 | fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n"); | |
4401 | ||
4402 | ||
4403 | /* This pass may take place at function granularity instead of at loop | |
4404 | granularity. */ | |
4405 | ||
4406 | vect_compute_data_refs_alignment (loop_vinfo); | |
4407 | ||
4408 | ||
4409 | /* This pass will use loop versioning and loop peeling in order to enhance | |
4410 | the alignment of data references in the loop. | |
4411 | FOR NOW: we assume that whatever versioning/peeling took place, the | |
4412 | original loop is to be vectorized. Any other loops that were created by | |
4413 | the transformations performed in this pass - are not supposed to be | |
4414 | vectorized. This restriction will be relaxed. */ | |
4415 | ||
4416 | vect_enhance_data_refs_alignment (loop_vinfo); | |
4417 | ||
4418 | ||
4419 | /* Finally, check that loop can be vectorized. | |
a023975e OG |
4420 | FOR NOW: Until support for misaligned stores is in place, only if all |
4421 | stores are aligned can the loop be vectorized. This restriction will be | |
4422 | relaxed. In the meantime, we can force the alignment of on of the | |
4423 | data-references in the loop using peeling. We currently use a heuristic | |
4424 | that peels the first misaligned store, but we plan to develop a | |
4425 | better cost model to guide the decision on which data-access to peel for. | |
4426 | */ | |
79fe1b3b DN |
4427 | |
4428 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
4429 | { | |
4430 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
4431 | if (!aligned_access_p (dr)) | |
4432 | { | |
a023975e OG |
4433 | /* Decide here whether we need peeling for alignment. */ |
4434 | decide_peeling_count++; | |
4435 | if (decide_peeling_count > MAX_NUMBER_OF_UNALIGNED_DATA_REFS) | |
4436 | { | |
4437 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4438 | fprintf (dump_file, | |
4439 | "not vectorized: multiple misaligned stores."); | |
4440 | return false; | |
4441 | } | |
4442 | else | |
4443 | { | |
4444 | LOOP_UNALIGNED_DR (loop_vinfo, decide_peeling_count - 1) = dr; | |
4445 | LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true; | |
4446 | } | |
79fe1b3b DN |
4447 | } |
4448 | } | |
4449 | ||
7ccf35ed DN |
4450 | /* The vectorizer now supports misaligned loads, so we don't fail anymore |
4451 | in the presence of a misaligned read dataref. For some targets however | |
4452 | it may be preferable not to vectorize in such a case as misaligned | |
4453 | accesses are very costly. This should be considered in the future. */ | |
4454 | /* | |
79fe1b3b DN |
4455 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) |
4456 | { | |
4457 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
4458 | if (!aligned_access_p (dr)) | |
4459 | { | |
4460 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
4461 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
4462 | fprintf (dump_file, "not vectorized: unaligned load."); | |
4463 | return false; | |
4464 | } | |
4465 | } | |
7ccf35ed | 4466 | */ |
79fe1b3b DN |
4467 | |
4468 | return true; | |
4469 | } | |
4470 | ||
4471 | ||
4472 | /* Function vect_analyze_data_ref_access. | |
4473 | ||
4474 | Analyze the access pattern of the data-reference DR. For now, a data access | |
4475 | has to consecutive and aligned to be considered vectorizable. */ | |
4476 | ||
4477 | static bool | |
4478 | vect_analyze_data_ref_access (struct data_reference *dr) | |
4479 | { | |
4480 | varray_type access_fns = DR_ACCESS_FNS (dr); | |
4481 | tree access_fn; | |
4482 | tree init, step; | |
6775f1f3 | 4483 | unsigned int dimensions, i; |
79fe1b3b | 4484 | |
6775f1f3 IR |
4485 | /* Check that in case of multidimensional array ref A[i1][i2]..[iN], |
4486 | i1, i2, ..., iN-1 are loop invariant (to make sure that the memory | |
4487 | access is contiguous). */ | |
4488 | dimensions = VARRAY_ACTIVE_SIZE (access_fns); | |
4489 | ||
4490 | for (i = 1; i < dimensions; i++) /* Not including the last dimension. */ | |
79fe1b3b | 4491 | { |
6775f1f3 | 4492 | access_fn = DR_ACCESS_FN (dr, i); |
79fe1b3b | 4493 | |
6775f1f3 IR |
4494 | if (evolution_part_in_loop_num (access_fn, |
4495 | loop_containing_stmt (DR_STMT (dr))->num)) | |
4496 | { | |
a023975e OG |
4497 | /* Evolution part is not NULL in this loop (it is neither constant |
4498 | nor invariant). */ | |
6775f1f3 IR |
4499 | if (vect_debug_details (NULL)) |
4500 | { | |
4501 | fprintf (dump_file, | |
a023975e | 4502 | "not vectorized: complicated multidim. array access."); |
6775f1f3 IR |
4503 | print_generic_expr (dump_file, access_fn, TDF_SLIM); |
4504 | } | |
4505 | return false; | |
4506 | } | |
4507 | } | |
4508 | ||
4509 | access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */ | |
4510 | if (!evolution_function_is_constant_p (access_fn) | |
4511 | && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num, | |
4512 | access_fn, &init, &step, true)) | |
79fe1b3b DN |
4513 | { |
4514 | if (vect_debug_details (NULL)) | |
4515 | { | |
a023975e | 4516 | fprintf (dump_file, "not vectorized: complicated access function."); |
79fe1b3b DN |
4517 | print_generic_expr (dump_file, access_fn, TDF_SLIM); |
4518 | } | |
4519 | return false; | |
4520 | } | |
6775f1f3 | 4521 | |
79fe1b3b DN |
4522 | return true; |
4523 | } | |
4524 | ||
4525 | ||
4526 | /* Function vect_analyze_data_ref_accesses. | |
4527 | ||
4528 | Analyze the access pattern of all the data references in the loop. | |
4529 | ||
4530 | FORNOW: the only access pattern that is considered vectorizable is a | |
4531 | simple step 1 (consecutive) access. | |
4532 | ||
6775f1f3 | 4533 | FORNOW: handle only arrays and pointer accesses. */ |
79fe1b3b DN |
4534 | |
4535 | static bool | |
4536 | vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo) | |
4537 | { | |
4538 | unsigned int i; | |
4539 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
4540 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
4541 | ||
4542 | if (vect_debug_details (NULL)) | |
4543 | fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n"); | |
4544 | ||
4545 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
4546 | { | |
4547 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
4548 | bool ok = vect_analyze_data_ref_access (dr); | |
4549 | if (!ok) | |
4550 | { | |
4551 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
4552 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
4553 | fprintf (dump_file, "not vectorized: complicated access pattern."); | |
4554 | return false; | |
4555 | } | |
4556 | } | |
4557 | ||
4558 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
4559 | { | |
4560 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
4561 | bool ok = vect_analyze_data_ref_access (dr); | |
4562 | if (!ok) | |
4563 | { | |
4564 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
4565 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
4566 | fprintf (dump_file, "not vectorized: complicated access pattern."); | |
4567 | return false; | |
4568 | } | |
4569 | } | |
4570 | ||
4571 | return true; | |
4572 | } | |
4573 | ||
4574 | ||
4575 | /* Function vect_analyze_pointer_ref_access. | |
4576 | ||
4577 | Input: | |
4578 | STMT - a stmt that contains a data-ref | |
4579 | MEMREF - a data-ref in STMT, which is an INDIRECT_REF. | |
4580 | ||
4581 | If the data-ref access is vectorizable, return a data_reference structure | |
4582 | that represents it (DR). Otherwise - return NULL. */ | |
4583 | ||
4584 | static struct data_reference * | |
4585 | vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read) | |
4586 | { | |
4587 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
4588 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
4589 | tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0)); | |
4590 | tree init, step; | |
4591 | int step_val; | |
4592 | tree reftype, innertype; | |
4593 | enum machine_mode innermode; | |
4594 | tree indx_access_fn; | |
4595 | int loopnum = loop->num; | |
4596 | struct data_reference *dr; | |
4597 | ||
4598 | if (!access_fn) | |
4599 | { | |
4600 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4601 | fprintf (dump_file, "not vectorized: complicated pointer access."); | |
4602 | return NULL; | |
4603 | } | |
4604 | ||
4605 | if (vect_debug_details (NULL)) | |
4606 | { | |
4607 | fprintf (dump_file, "Access function of ptr: "); | |
4608 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
4609 | } | |
4610 | ||
4611 | if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false)) | |
4612 | { | |
4613 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4614 | fprintf (dump_file, "not vectorized: pointer access is not simple."); | |
4615 | return NULL; | |
4616 | } | |
4617 | ||
6775f1f3 IR |
4618 | STRIP_NOPS (init); |
4619 | ||
4620 | if (!host_integerp (step,0)) | |
79fe1b3b DN |
4621 | { |
4622 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4623 | fprintf (dump_file, | |
6775f1f3 | 4624 | "not vectorized: non constant step for pointer access."); |
79fe1b3b DN |
4625 | return NULL; |
4626 | } | |
4627 | ||
4628 | step_val = TREE_INT_CST_LOW (step); | |
4629 | ||
4630 | reftype = TREE_TYPE (TREE_OPERAND (memref, 0)); | |
4631 | if (TREE_CODE (reftype) != POINTER_TYPE) | |
4632 | { | |
4633 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4634 | fprintf (dump_file, "not vectorized: unexpected pointer access form."); | |
4635 | return NULL; | |
4636 | } | |
4637 | ||
4638 | reftype = TREE_TYPE (init); | |
4639 | if (TREE_CODE (reftype) != POINTER_TYPE) | |
4640 | { | |
4641 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4642 | fprintf (dump_file, "not vectorized: unexpected pointer access form."); | |
4643 | return NULL; | |
4644 | } | |
4645 | ||
4646 | innertype = TREE_TYPE (reftype); | |
4647 | innermode = TYPE_MODE (innertype); | |
4648 | if (GET_MODE_SIZE (innermode) != step_val) | |
4649 | { | |
4650 | /* FORNOW: support only consecutive access */ | |
4651 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4652 | fprintf (dump_file, "not vectorized: non consecutive access."); | |
4653 | return NULL; | |
4654 | } | |
4655 | ||
4656 | indx_access_fn = | |
4657 | build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node); | |
4658 | if (vect_debug_details (NULL)) | |
4659 | { | |
4660 | fprintf (dump_file, "Access function of ptr indx: "); | |
4661 | print_generic_expr (dump_file, indx_access_fn, TDF_SLIM); | |
4662 | } | |
4663 | dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read); | |
4664 | return dr; | |
4665 | } | |
4666 | ||
4667 | ||
6775f1f3 IR |
4668 | /* Function vect_get_symbl_and_dr. |
4669 | ||
4670 | The function returns SYMBL - the relevant variable for | |
4671 | memory tag (for aliasing purposes). | |
4672 | Also data reference structure DR is created. | |
4673 | ||
4674 | Input: | |
4675 | MEMREF - data reference in STMT | |
4676 | IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF | |
4677 | ||
4678 | Output: | |
4679 | DR - data_reference struct for MEMREF | |
4680 | return value - the relevant variable for memory tag (for aliasing purposes). | |
4681 | ||
4682 | */ | |
4683 | ||
4684 | static tree | |
4685 | vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read, | |
4686 | loop_vec_info loop_vinfo, struct data_reference **dr) | |
4687 | { | |
4688 | tree symbl, oprnd0, oprnd1; | |
4689 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
4690 | tree offset; | |
4691 | tree array_base, base; | |
4692 | struct data_reference *new_dr; | |
4693 | bool base_aligned_p; | |
4694 | ||
4695 | *dr = NULL; | |
4696 | switch (TREE_CODE (memref)) | |
4697 | { | |
4698 | case INDIRECT_REF: | |
4699 | new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read); | |
4700 | if (! new_dr) | |
4701 | return NULL_TREE; | |
4702 | *dr = new_dr; | |
4703 | symbl = DR_BASE_NAME (new_dr); | |
4704 | STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl; | |
4705 | ||
4706 | switch (TREE_CODE (symbl)) | |
4707 | { | |
4708 | case PLUS_EXPR: | |
4709 | case MINUS_EXPR: | |
4710 | oprnd0 = TREE_OPERAND (symbl, 0); | |
4711 | oprnd1 = TREE_OPERAND (symbl, 1); | |
4712 | ||
4713 | STRIP_NOPS(oprnd1); | |
4714 | /* Only {address_base + offset} expressions are supported, | |
d4a9b3a3 | 4715 | where address_base can be POINTER_TYPE or ARRAY_TYPE and |
6775f1f3 IR |
4716 | offset can be anything but POINTER_TYPE or ARRAY_TYPE. |
4717 | TODO: swap operands if {offset + address_base}. */ | |
4718 | if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE | |
4719 | && TREE_CODE (oprnd1) != INTEGER_CST) | |
4720 | || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE) | |
4721 | return NULL_TREE; | |
4722 | ||
4723 | if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE) | |
4724 | symbl = oprnd0; | |
4725 | else | |
4726 | symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read, | |
4727 | loop_vinfo, &new_dr); | |
4728 | ||
4729 | case SSA_NAME: | |
4730 | case ADDR_EXPR: | |
4731 | /* symbl remains unchanged. */ | |
4732 | break; | |
4733 | ||
4734 | default: | |
4735 | if (vect_debug_details (NULL)) | |
4736 | { | |
4737 | fprintf (dump_file, "unhandled data ref: "); | |
4738 | print_generic_expr (dump_file, memref, TDF_SLIM); | |
4739 | fprintf (dump_file, " (symbl "); | |
4740 | print_generic_expr (dump_file, symbl, TDF_SLIM); | |
4741 | fprintf (dump_file, ") in stmt "); | |
4742 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
4743 | } | |
4744 | return NULL_TREE; | |
4745 | } | |
4746 | break; | |
4747 | ||
4748 | case ARRAY_REF: | |
4749 | offset = size_zero_node; | |
6775f1f3 IR |
4750 | |
4751 | /* Store the array base in the stmt info. | |
4752 | For one dimensional array ref a[i], the base is a, | |
4753 | for multidimensional a[i1][i2]..[iN], the base is | |
4754 | a[i1][i2]..[iN-1]. */ | |
4755 | array_base = TREE_OPERAND (memref, 0); | |
4756 | STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base; | |
4757 | ||
4758 | new_dr = analyze_array (stmt, memref, is_read); | |
4759 | *dr = new_dr; | |
4760 | ||
4761 | /* Find the relevant symbol for aliasing purposes. */ | |
4762 | base = DR_BASE_NAME (new_dr); | |
4763 | switch (TREE_CODE (base)) | |
4764 | { | |
4765 | case VAR_DECL: | |
4766 | symbl = base; | |
4767 | break; | |
4768 | ||
4769 | case INDIRECT_REF: | |
4770 | symbl = TREE_OPERAND (base, 0); | |
4771 | break; | |
4772 | ||
4773 | case COMPONENT_REF: | |
4774 | /* Could have recorded more accurate information - | |
4775 | i.e, the actual FIELD_DECL that is being referenced - | |
4776 | but later passes expect VAR_DECL as the nmt. */ | |
4777 | symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE, | |
4778 | loop_vinfo, &offset, &base_aligned_p); | |
4779 | if (symbl) | |
4780 | break; | |
4781 | /* fall through */ | |
4782 | default: | |
4783 | if (vect_debug_details (NULL)) | |
4784 | { | |
4785 | fprintf (dump_file, "unhandled struct/class field access "); | |
4786 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
4787 | } | |
4788 | return NULL_TREE; | |
4789 | } | |
4790 | break; | |
4791 | ||
4792 | default: | |
4793 | if (vect_debug_details (NULL)) | |
4794 | { | |
4795 | fprintf (dump_file, "unhandled data ref: "); | |
4796 | print_generic_expr (dump_file, memref, TDF_SLIM); | |
4797 | fprintf (dump_file, " in stmt "); | |
4798 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
4799 | } | |
4800 | return NULL_TREE; | |
4801 | } | |
4802 | return symbl; | |
4803 | } | |
4804 | ||
4805 | ||
79fe1b3b DN |
4806 | /* Function vect_analyze_data_refs. |
4807 | ||
4808 | Find all the data references in the loop. | |
4809 | ||
6775f1f3 | 4810 | FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs |
79fe1b3b DN |
4811 | which base is really an array (not a pointer) and which alignment |
4812 | can be forced. This restriction will be relaxed. */ | |
4813 | ||
4814 | static bool | |
4815 | vect_analyze_data_refs (loop_vec_info loop_vinfo) | |
4816 | { | |
4817 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
4818 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
4819 | int nbbs = loop->num_nodes; | |
4820 | block_stmt_iterator si; | |
4821 | int j; | |
4822 | struct data_reference *dr; | |
6775f1f3 IR |
4823 | tree tag; |
4824 | tree address_base; | |
322ae40b IR |
4825 | bool base_aligned_p; |
4826 | tree offset; | |
79fe1b3b DN |
4827 | |
4828 | if (vect_debug_details (NULL)) | |
4829 | fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n"); | |
4830 | ||
4831 | for (j = 0; j < nbbs; j++) | |
4832 | { | |
4833 | basic_block bb = bbs[j]; | |
4834 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
4835 | { | |
4836 | bool is_read = false; | |
4837 | tree stmt = bsi_stmt (si); | |
4838 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
4839 | v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); | |
4840 | v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
4841 | vuse_optype vuses = STMT_VUSE_OPS (stmt); | |
4842 | varray_type *datarefs = NULL; | |
4843 | int nvuses, nv_may_defs, nv_must_defs; | |
4844 | tree memref = NULL; | |
79fe1b3b DN |
4845 | tree symbl; |
4846 | ||
4847 | /* Assumption: there exists a data-ref in stmt, if and only if | |
4848 | it has vuses/vdefs. */ | |
4849 | ||
4850 | if (!vuses && !v_may_defs && !v_must_defs) | |
4851 | continue; | |
4852 | ||
4853 | nvuses = NUM_VUSES (vuses); | |
4854 | nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); | |
4855 | nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); | |
4856 | ||
4857 | if (nvuses && (nv_may_defs || nv_must_defs)) | |
4858 | { | |
4859 | if (vect_debug_details (NULL)) | |
4860 | { | |
4861 | fprintf (dump_file, "unexpected vdefs and vuses in stmt: "); | |
4862 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
4863 | } | |
4864 | return false; | |
4865 | } | |
4866 | ||
4867 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
4868 | { | |
4869 | if (vect_debug_details (NULL)) | |
4870 | { | |
4871 | fprintf (dump_file, "unexpected vops in stmt: "); | |
4872 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
4873 | } | |
4874 | return false; | |
4875 | } | |
4876 | ||
4877 | if (vuses) | |
4878 | { | |
4879 | memref = TREE_OPERAND (stmt, 1); | |
4880 | datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo)); | |
4881 | is_read = true; | |
4882 | } | |
4883 | else /* vdefs */ | |
4884 | { | |
4885 | memref = TREE_OPERAND (stmt, 0); | |
4886 | datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); | |
4887 | is_read = false; | |
4888 | } | |
4889 | ||
6775f1f3 IR |
4890 | /* Analyze MEMREF. If it is of a supported form, build data_reference |
4891 | struct for it (DR) and find the relevant symbol for aliasing | |
4892 | purposes. */ | |
a023975e OG |
4893 | symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, |
4894 | &dr); | |
6775f1f3 | 4895 | if (!symbl) |
79fe1b3b DN |
4896 | { |
4897 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4898 | { | |
6775f1f3 | 4899 | fprintf (dump_file, "not vectorized: unhandled data ref: "); |
79fe1b3b DN |
4900 | print_generic_expr (dump_file, stmt, TDF_SLIM); |
4901 | } | |
4902 | return false; | |
4903 | } | |
6775f1f3 | 4904 | |
79fe1b3b | 4905 | /* Find and record the memtag assigned to this data-ref. */ |
6775f1f3 | 4906 | switch (TREE_CODE (symbl)) |
79fe1b3b | 4907 | { |
6775f1f3 IR |
4908 | case VAR_DECL: |
4909 | STMT_VINFO_MEMTAG (stmt_info) = symbl; | |
4910 | break; | |
4911 | ||
4912 | case SSA_NAME: | |
79fe1b3b DN |
4913 | symbl = SSA_NAME_VAR (symbl); |
4914 | tag = get_var_ann (symbl)->type_mem_tag; | |
4915 | if (!tag) | |
4916 | { | |
4917 | tree ptr = TREE_OPERAND (memref, 0); | |
4918 | if (TREE_CODE (ptr) == SSA_NAME) | |
4919 | tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag; | |
4920 | } | |
4921 | if (!tag) | |
4922 | { | |
4923 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4924 | fprintf (dump_file, "not vectorized: no memtag for ref."); | |
4925 | return false; | |
4926 | } | |
4927 | STMT_VINFO_MEMTAG (stmt_info) = tag; | |
6775f1f3 IR |
4928 | break; |
4929 | ||
4930 | case ADDR_EXPR: | |
4931 | address_base = TREE_OPERAND (symbl, 0); | |
4932 | ||
4933 | switch (TREE_CODE (address_base)) | |
4934 | { | |
4935 | case ARRAY_REF: | |
a023975e OG |
4936 | dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), |
4937 | DR_IS_READ(dr)); | |
322ae40b IR |
4938 | STMT_VINFO_MEMTAG (stmt_info) = |
4939 | vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE, | |
4940 | loop_vinfo, &offset, | |
4941 | &base_aligned_p); | |
6775f1f3 IR |
4942 | break; |
4943 | ||
4944 | case VAR_DECL: | |
4945 | STMT_VINFO_MEMTAG (stmt_info) = address_base; | |
4946 | break; | |
4947 | ||
4948 | default: | |
4949 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4950 | { | |
a023975e OG |
4951 | fprintf (dump_file, |
4952 | "not vectorized: unhandled address expr: "); | |
6775f1f3 IR |
4953 | print_generic_expr (dump_file, stmt, TDF_SLIM); |
4954 | } | |
4955 | return false; | |
4956 | } | |
4957 | break; | |
4958 | ||
4959 | default: | |
79fe1b3b DN |
4960 | if (vect_debug_stats (loop) || vect_debug_details (loop)) |
4961 | { | |
4962 | fprintf (dump_file, "not vectorized: unsupported data-ref: "); | |
4963 | print_generic_expr (dump_file, memref, TDF_SLIM); | |
4964 | } | |
4965 | return false; | |
6775f1f3 | 4966 | } |
79fe1b3b DN |
4967 | |
4968 | VARRAY_PUSH_GENERIC_PTR (*datarefs, dr); | |
4969 | STMT_VINFO_DATA_REF (stmt_info) = dr; | |
4970 | } | |
4971 | } | |
4972 | ||
4973 | return true; | |
4974 | } | |
4975 | ||
4976 | ||
8c27b7d4 | 4977 | /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ |
79fe1b3b DN |
4978 | |
4979 | /* Function vect_mark_relevant. | |
4980 | ||
4981 | Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ | |
4982 | ||
4983 | static void | |
4984 | vect_mark_relevant (varray_type worklist, tree stmt) | |
4985 | { | |
4986 | stmt_vec_info stmt_info; | |
4987 | ||
4988 | if (vect_debug_details (NULL)) | |
4989 | fprintf (dump_file, "mark relevant."); | |
4990 | ||
4991 | if (TREE_CODE (stmt) == PHI_NODE) | |
4992 | { | |
4993 | VARRAY_PUSH_TREE (worklist, stmt); | |
4994 | return; | |
4995 | } | |
4996 | ||
4997 | stmt_info = vinfo_for_stmt (stmt); | |
4998 | ||
4999 | if (!stmt_info) | |
5000 | { | |
5001 | if (vect_debug_details (NULL)) | |
5002 | { | |
5003 | fprintf (dump_file, "mark relevant: no stmt info!!."); | |
5004 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
5005 | } | |
5006 | return; | |
5007 | } | |
5008 | ||
5009 | if (STMT_VINFO_RELEVANT_P (stmt_info)) | |
5010 | { | |
5011 | if (vect_debug_details (NULL)) | |
5012 | fprintf (dump_file, "already marked relevant."); | |
5013 | return; | |
5014 | } | |
5015 | ||
5016 | STMT_VINFO_RELEVANT_P (stmt_info) = 1; | |
5017 | VARRAY_PUSH_TREE (worklist, stmt); | |
5018 | } | |
5019 | ||
5020 | ||
5021 | /* Function vect_stmt_relevant_p. | |
5022 | ||
5023 | Return true if STMT in loop that is represented by LOOP_VINFO is | |
5024 | "relevant for vectorization". | |
5025 | ||
5026 | A stmt is considered "relevant for vectorization" if: | |
5027 | - it has uses outside the loop. | |
5028 | - it has vdefs (it alters memory). | |
5029 | - control stmts in the loop (except for the exit condition). | |
5030 | ||
5031 | CHECKME: what other side effects would the vectorizer allow? */ | |
5032 | ||
5033 | static bool | |
5034 | vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo) | |
5035 | { | |
5036 | v_may_def_optype v_may_defs; | |
5037 | v_must_def_optype v_must_defs; | |
5038 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
5039 | int i; | |
5040 | dataflow_t df; | |
5041 | int num_uses; | |
5042 | ||
5043 | /* cond stmt other than loop exit cond. */ | |
5044 | if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo))) | |
5045 | return true; | |
5046 | ||
5047 | /* changing memory. */ | |
5048 | v_may_defs = STMT_V_MAY_DEF_OPS (stmt); | |
5049 | v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
5050 | if (v_may_defs || v_must_defs) | |
5051 | { | |
5052 | if (vect_debug_details (NULL)) | |
5053 | fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs."); | |
5054 | return true; | |
5055 | } | |
5056 | ||
5057 | /* uses outside the loop. */ | |
5058 | df = get_immediate_uses (stmt); | |
5059 | num_uses = num_immediate_uses (df); | |
5060 | for (i = 0; i < num_uses; i++) | |
5061 | { | |
5062 | tree use = immediate_use (df, i); | |
5063 | basic_block bb = bb_for_stmt (use); | |
5064 | if (!flow_bb_inside_loop_p (loop, bb)) | |
5065 | { | |
5066 | if (vect_debug_details (NULL)) | |
5067 | fprintf (dump_file, "vec_stmt_relevant_p: used out of loop."); | |
5068 | return true; | |
5069 | } | |
5070 | } | |
5071 | ||
5072 | return false; | |
5073 | } | |
5074 | ||
5075 | ||
5076 | /* Function vect_mark_stmts_to_be_vectorized. | |
5077 | ||
5078 | Not all stmts in the loop need to be vectorized. For example: | |
5079 | ||
5080 | for i... | |
5081 | for j... | |
5082 | 1. T0 = i + j | |
5083 | 2. T1 = a[T0] | |
5084 | ||
5085 | 3. j = j + 1 | |
5086 | ||
5087 | Stmt 1 and 3 do not need to be vectorized, because loop control and | |
5088 | addressing of vectorized data-refs are handled differently. | |
5089 | ||
5090 | This pass detects such stmts. */ | |
5091 | ||
5092 | static bool | |
5093 | vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) | |
5094 | { | |
5095 | varray_type worklist; | |
5096 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
5097 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
5098 | unsigned int nbbs = loop->num_nodes; | |
5099 | block_stmt_iterator si; | |
5100 | tree stmt; | |
5101 | stmt_ann_t ann; | |
5102 | unsigned int i; | |
5103 | int j; | |
5104 | use_optype use_ops; | |
5105 | stmt_vec_info stmt_info; | |
5106 | ||
5107 | if (vect_debug_details (NULL)) | |
5108 | fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n"); | |
5109 | ||
5110 | VARRAY_TREE_INIT (worklist, 64, "work list"); | |
5111 | ||
5112 | /* 1. Init worklist. */ | |
5113 | ||
5114 | for (i = 0; i < nbbs; i++) | |
5115 | { | |
5116 | basic_block bb = bbs[i]; | |
5117 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
5118 | { | |
5119 | stmt = bsi_stmt (si); | |
5120 | ||
5121 | if (vect_debug_details (NULL)) | |
5122 | { | |
5123 | fprintf (dump_file, "init: stmt relevant? "); | |
5124 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
5125 | } | |
5126 | ||
5127 | stmt_info = vinfo_for_stmt (stmt); | |
5128 | STMT_VINFO_RELEVANT_P (stmt_info) = 0; | |
5129 | ||
5130 | if (vect_stmt_relevant_p (stmt, loop_vinfo)) | |
5131 | vect_mark_relevant (worklist, stmt); | |
5132 | } | |
5133 | } | |
5134 | ||
5135 | ||
5136 | /* 2. Process_worklist */ | |
5137 | ||
5138 | while (VARRAY_ACTIVE_SIZE (worklist) > 0) | |
5139 | { | |
5140 | stmt = VARRAY_TOP_TREE (worklist); | |
5141 | VARRAY_POP (worklist); | |
5142 | ||
5143 | if (vect_debug_details (NULL)) | |
5144 | { | |
5145 | fprintf (dump_file, "worklist: examine stmt: "); | |
5146 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
5147 | } | |
5148 | ||
5149 | /* Examine the USES in this statement. Mark all the statements which | |
5150 | feed this statement's uses as "relevant", unless the USE is used as | |
5151 | an array index. */ | |
5152 | ||
5153 | if (TREE_CODE (stmt) == PHI_NODE) | |
5154 | { | |
5155 | /* follow the def-use chain inside the loop. */ | |
5156 | for (j = 0; j < PHI_NUM_ARGS (stmt); j++) | |
5157 | { | |
5158 | tree arg = PHI_ARG_DEF (stmt, j); | |
5159 | tree def_stmt = NULL_TREE; | |
5160 | basic_block bb; | |
5161 | if (!vect_is_simple_use (arg, loop, &def_stmt)) | |
5162 | { | |
5163 | if (vect_debug_details (NULL)) | |
5164 | fprintf (dump_file, "worklist: unsupported use."); | |
5165 | varray_clear (worklist); | |
5166 | return false; | |
5167 | } | |
5168 | if (!def_stmt) | |
5169 | continue; | |
5170 | ||
5171 | if (vect_debug_details (NULL)) | |
5172 | { | |
5173 | fprintf (dump_file, "worklist: def_stmt: "); | |
5174 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
5175 | } | |
5176 | ||
5177 | bb = bb_for_stmt (def_stmt); | |
5178 | if (flow_bb_inside_loop_p (loop, bb)) | |
5179 | vect_mark_relevant (worklist, def_stmt); | |
5180 | } | |
5181 | } | |
5182 | ||
5183 | ann = stmt_ann (stmt); | |
5184 | use_ops = USE_OPS (ann); | |
5185 | ||
5186 | for (i = 0; i < NUM_USES (use_ops); i++) | |
5187 | { | |
5188 | tree use = USE_OP (use_ops, i); | |
5189 | ||
5190 | /* We are only interested in uses that need to be vectorized. Uses | |
5191 | that are used for address computation are not considered relevant. | |
5192 | */ | |
5193 | if (exist_non_indexing_operands_for_use_p (use, stmt)) | |
5194 | { | |
5195 | tree def_stmt = NULL_TREE; | |
5196 | basic_block bb; | |
5197 | if (!vect_is_simple_use (use, loop, &def_stmt)) | |
5198 | { | |
5199 | if (vect_debug_details (NULL)) | |
5200 | fprintf (dump_file, "worklist: unsupported use."); | |
5201 | varray_clear (worklist); | |
5202 | return false; | |
5203 | } | |
5204 | ||
5205 | if (!def_stmt) | |
5206 | continue; | |
5207 | ||
5208 | if (vect_debug_details (NULL)) | |
5209 | { | |
5210 | fprintf (dump_file, "worklist: examine use %d: ", i); | |
5211 | print_generic_expr (dump_file, use, TDF_SLIM); | |
5212 | } | |
5213 | ||
5214 | bb = bb_for_stmt (def_stmt); | |
5215 | if (flow_bb_inside_loop_p (loop, bb)) | |
5216 | vect_mark_relevant (worklist, def_stmt); | |
5217 | } | |
5218 | } | |
5219 | } /* while worklist */ | |
5220 | ||
5221 | varray_clear (worklist); | |
5222 | return true; | |
5223 | } | |
5224 | ||
5225 | ||
a023975e OG |
5226 | /* Function vect_analyze_loop_with_symbolic_num_of_iters. |
5227 | ||
5228 | In case the number of iterations that LOOP iterates in unknown at compile | |
5229 | time, an epilog loop will be generated, and the loop induction variables | |
5230 | (IVs) will be "advanced" to the value they are supposed to take just before | |
5231 | the epilog loop. Here we check that the access function of the loop IVs | |
5232 | and the expression that represents the loop bound are simple enough. | |
5233 | These restrictions will be relaxed in the future. */ | |
5234 | ||
5235 | static bool | |
5236 | vect_analyze_loop_with_symbolic_num_of_iters (tree niters, | |
5237 | struct loop *loop) | |
5238 | { | |
5239 | basic_block bb = loop->header; | |
5240 | tree phi; | |
5241 | ||
5242 | if (vect_debug_details (NULL)) | |
5243 | fprintf (dump_file, | |
5244 | "\n<<vect_analyze_loop_with_symbolic_num_of_iters>>\n"); | |
5245 | ||
5246 | if (chrec_contains_undetermined (niters)) | |
5247 | { | |
5248 | if (vect_debug_details (NULL)) | |
5249 | fprintf (dump_file, "Infinite number of iterations."); | |
5250 | return false; | |
5251 | } | |
5252 | ||
5253 | if (!niters) | |
5254 | { | |
5255 | if (vect_debug_details (NULL)) | |
5256 | fprintf (dump_file, "niters is NULL pointer."); | |
5257 | return false; | |
5258 | } | |
5259 | ||
5260 | if (vect_debug_details (NULL)) | |
5261 | { | |
5262 | fprintf (dump_file, "Symbolic number of iterations is "); | |
5263 | print_generic_expr (dump_file, niters, TDF_DETAILS); | |
5264 | } | |
5265 | ||
5266 | /* Analyze phi functions of the loop header. */ | |
5267 | ||
5268 | for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) | |
5269 | { | |
5270 | tree access_fn = NULL; | |
5271 | tree evolution_part; | |
5272 | ||
5273 | if (vect_debug_details (NULL)) | |
5274 | { | |
5275 | fprintf (dump_file, "Analyze phi: "); | |
5276 | print_generic_expr (dump_file, phi, TDF_SLIM); | |
5277 | } | |
5278 | ||
5279 | /* Skip virtual phi's. The data dependences that are associated with | |
5280 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |
5281 | ||
5282 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
5283 | { | |
5284 | if (vect_debug_details (NULL)) | |
5285 | fprintf (dump_file, "virtual phi. skip."); | |
5286 | continue; | |
5287 | } | |
5288 | ||
5289 | /* Analyze the evolution function. */ | |
5290 | ||
5291 | access_fn = instantiate_parameters | |
5292 | (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); | |
5293 | ||
5294 | if (!access_fn) | |
5295 | { | |
5296 | if (vect_debug_details (NULL)) | |
5297 | fprintf (dump_file, "No Access function."); | |
5298 | return false; | |
5299 | } | |
5300 | ||
5301 | if (vect_debug_details (NULL)) | |
5302 | { | |
5303 | fprintf (dump_file, "Access function of PHI: "); | |
5304 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
5305 | } | |
5306 | ||
5307 | evolution_part = evolution_part_in_loop_num (access_fn, loop->num); | |
5308 | ||
5309 | if (evolution_part == NULL_TREE) | |
5310 | return false; | |
5311 | ||
5312 | /* FORNOW: We do not transform initial conditions of IVs | |
5313 | which evolution functions are a polynomial of degree >= 2. */ | |
5314 | ||
5315 | if (tree_is_chrec (evolution_part)) | |
5316 | return false; | |
5317 | } | |
5318 | ||
5319 | return true; | |
5320 | } | |
5321 | ||
5322 | ||
79fe1b3b DN |
5323 | /* Function vect_get_loop_niters. |
5324 | ||
5325 | Determine how many iterations the loop is executed. */ | |
5326 | ||
5327 | static tree | |
a023975e | 5328 | vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) |
79fe1b3b DN |
5329 | { |
5330 | tree niters; | |
5331 | ||
5332 | if (vect_debug_details (NULL)) | |
5333 | fprintf (dump_file, "\n<<get_loop_niters>>\n"); | |
5334 | ||
5335 | niters = number_of_iterations_in_loop (loop); | |
5336 | ||
5337 | if (niters != NULL_TREE | |
a023975e | 5338 | && niters != chrec_dont_know) |
79fe1b3b | 5339 | { |
a023975e | 5340 | *number_of_iterations = niters; |
79fe1b3b DN |
5341 | |
5342 | if (vect_debug_details (NULL)) | |
a023975e OG |
5343 | { |
5344 | fprintf (dump_file, "==> get_loop_niters:" ); | |
5345 | print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM); | |
5346 | } | |
79fe1b3b DN |
5347 | } |
5348 | ||
5349 | return get_loop_exit_condition (loop); | |
5350 | } | |
5351 | ||
5352 | ||
5353 | /* Function vect_analyze_loop_form. | |
5354 | ||
5355 | Verify the following restrictions (some may be relaxed in the future): | |
5356 | - it's an inner-most loop | |
5357 | - number of BBs = 2 (which are the loop header and the latch) | |
5358 | - the loop has a pre-header | |
5359 | - the loop has a single entry and exit | |
5360 | - the loop exit condition is simple enough, and the number of iterations | |
5361 | can be analyzed (a countable loop). */ | |
5362 | ||
5363 | static loop_vec_info | |
5364 | vect_analyze_loop_form (struct loop *loop) | |
5365 | { | |
5366 | loop_vec_info loop_vinfo; | |
5367 | tree loop_cond; | |
a023975e | 5368 | tree number_of_iterations = NULL; |
79fe1b3b DN |
5369 | |
5370 | if (vect_debug_details (loop)) | |
5371 | fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n"); | |
5372 | ||
82b85a85 ZD |
5373 | if (loop->inner |
5374 | || !loop->single_exit | |
5375 | || loop->num_nodes != 2) | |
79fe1b3b DN |
5376 | { |
5377 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5378 | { | |
5379 | fprintf (dump_file, "not vectorized: bad loop form. "); | |
82b85a85 | 5380 | if (loop->inner) |
79fe1b3b | 5381 | fprintf (dump_file, "nested loop."); |
82b85a85 ZD |
5382 | else if (!loop->single_exit) |
5383 | fprintf (dump_file, "multiple exits."); | |
5384 | else if (loop->num_nodes != 2) | |
79fe1b3b | 5385 | fprintf (dump_file, "too many BBs in loop."); |
79fe1b3b DN |
5386 | } |
5387 | ||
5388 | return NULL; | |
5389 | } | |
5390 | ||
5391 | /* We assume that the loop exit condition is at the end of the loop. i.e, | |
5392 | that the loop is represented as a do-while (with a proper if-guard | |
5393 | before the loop if needed), where the loop header contains all the | |
5394 | executable statements, and the latch is empty. */ | |
5395 | if (!empty_block_p (loop->latch)) | |
5396 | { | |
5397 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5398 | fprintf (dump_file, "not vectorized: unexpectd loop form."); | |
5399 | return NULL; | |
5400 | } | |
5401 | ||
5402 | if (empty_block_p (loop->header)) | |
5403 | { | |
5404 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5405 | fprintf (dump_file, "not vectorized: empty loop."); | |
5406 | return NULL; | |
5407 | } | |
5408 | ||
5409 | loop_cond = vect_get_loop_niters (loop, &number_of_iterations); | |
5410 | if (!loop_cond) | |
5411 | { | |
5412 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5413 | fprintf (dump_file, "not vectorized: complicated exit condition."); | |
5414 | return NULL; | |
5415 | } | |
a023975e OG |
5416 | |
5417 | if (!number_of_iterations) | |
79fe1b3b DN |
5418 | { |
5419 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
a023975e OG |
5420 | fprintf (dump_file, |
5421 | "not vectorized: number of iterations cannot be computed."); | |
79fe1b3b DN |
5422 | return NULL; |
5423 | } | |
5424 | ||
a023975e OG |
5425 | loop_vinfo = new_loop_vec_info (loop); |
5426 | LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; | |
5427 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | |
5428 | { | |
5429 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5430 | fprintf (dump_file, "loop bound unknown."); | |
5431 | ||
5432 | /* Unknown loop bound. */ | |
5433 | if (!vect_analyze_loop_with_symbolic_num_of_iters | |
5434 | (number_of_iterations, loop)) | |
5435 | { | |
5436 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5437 | fprintf (dump_file, | |
5438 | "not vectorized: can't determine loop bound."); | |
5439 | return NULL; | |
5440 | } | |
5441 | else | |
5442 | { | |
5443 | /* We need only one loop entry for unknown loop bound support. */ | |
5444 | if (loop->num_entries != 1 || !loop->pre_header) | |
5445 | { | |
5446 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5447 | fprintf (dump_file, | |
5448 | "not vectorized: more than one loop entry."); | |
5449 | return NULL; | |
5450 | } | |
5451 | } | |
5452 | } | |
5453 | else | |
5454 | if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0) | |
79fe1b3b DN |
5455 | { |
5456 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5457 | fprintf (dump_file, "not vectorized: number of iterations = 0."); | |
5458 | return NULL; | |
5459 | } | |
5460 | ||
79fe1b3b | 5461 | LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; |
79fe1b3b DN |
5462 | |
5463 | return loop_vinfo; | |
5464 | } | |
5465 | ||
5466 | ||
5467 | /* Function vect_analyze_loop. | |
5468 | ||
5469 | Apply a set of analyses on LOOP, and create a loop_vec_info struct | |
5470 | for it. The different analyses will record information in the | |
5471 | loop_vec_info struct. */ | |
5472 | ||
5473 | static loop_vec_info | |
5474 | vect_analyze_loop (struct loop *loop) | |
5475 | { | |
5476 | bool ok; | |
5477 | loop_vec_info loop_vinfo; | |
5478 | ||
5479 | if (vect_debug_details (NULL)) | |
5480 | fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n"); | |
5481 | ||
5482 | /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */ | |
5483 | ||
5484 | loop_vinfo = vect_analyze_loop_form (loop); | |
5485 | if (!loop_vinfo) | |
5486 | { | |
5487 | if (vect_debug_details (loop)) | |
5488 | fprintf (dump_file, "bad loop form."); | |
5489 | return NULL; | |
5490 | } | |
5491 | ||
5492 | /* Find all data references in the loop (which correspond to vdefs/vuses) | |
5493 | and analyze their evolution in the loop. | |
5494 | ||
6775f1f3 | 5495 | FORNOW: Handle only simple, array references, which |
79fe1b3b DN |
5496 | alignment can be forced, and aligned pointer-references. */ |
5497 | ||
5498 | ok = vect_analyze_data_refs (loop_vinfo); | |
5499 | if (!ok) | |
5500 | { | |
5501 | if (vect_debug_details (loop)) | |
5502 | fprintf (dump_file, "bad data references."); | |
5503 | destroy_loop_vec_info (loop_vinfo); | |
5504 | return NULL; | |
5505 | } | |
5506 | ||
79fe1b3b DN |
5507 | /* Data-flow analysis to detect stmts that do not need to be vectorized. */ |
5508 | ||
5509 | ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); | |
5510 | if (!ok) | |
5511 | { | |
5512 | if (vect_debug_details (loop)) | |
5513 | fprintf (dump_file, "unexpected pattern."); | |
5514 | if (vect_debug_details (loop)) | |
5515 | fprintf (dump_file, "not vectorized: unexpected pattern."); | |
5516 | destroy_loop_vec_info (loop_vinfo); | |
5517 | return NULL; | |
5518 | } | |
5519 | ||
79fe1b3b DN |
5520 | /* Check that all cross-iteration scalar data-flow cycles are OK. |
5521 | Cross-iteration cycles caused by virtual phis are analyzed separately. */ | |
5522 | ||
5523 | ok = vect_analyze_scalar_cycles (loop_vinfo); | |
5524 | if (!ok) | |
5525 | { | |
5526 | if (vect_debug_details (loop)) | |
5527 | fprintf (dump_file, "bad scalar cycle."); | |
5528 | destroy_loop_vec_info (loop_vinfo); | |
5529 | return NULL; | |
5530 | } | |
5531 | ||
79fe1b3b DN |
5532 | /* Analyze data dependences between the data-refs in the loop. |
5533 | FORNOW: fail at the first data dependence that we encounter. */ | |
5534 | ||
5535 | ok = vect_analyze_data_ref_dependences (loop_vinfo); | |
5536 | if (!ok) | |
5537 | { | |
5538 | if (vect_debug_details (loop)) | |
5539 | fprintf (dump_file, "bad data dependence."); | |
5540 | destroy_loop_vec_info (loop_vinfo); | |
5541 | return NULL; | |
5542 | } | |
5543 | ||
79fe1b3b DN |
5544 | /* Analyze the access patterns of the data-refs in the loop (consecutive, |
5545 | complex, etc.). FORNOW: Only handle consecutive access pattern. */ | |
5546 | ||
5547 | ok = vect_analyze_data_ref_accesses (loop_vinfo); | |
5548 | if (!ok) | |
5549 | { | |
5550 | if (vect_debug_details (loop)) | |
5551 | fprintf (dump_file, "bad data access."); | |
5552 | destroy_loop_vec_info (loop_vinfo); | |
5553 | return NULL; | |
5554 | } | |
5555 | ||
79fe1b3b DN |
5556 | /* Analyze the alignment of the data-refs in the loop. |
5557 | FORNOW: Only aligned accesses are handled. */ | |
5558 | ||
5559 | ok = vect_analyze_data_refs_alignment (loop_vinfo); | |
5560 | if (!ok) | |
5561 | { | |
5562 | if (vect_debug_details (loop)) | |
5563 | fprintf (dump_file, "bad data alignment."); | |
5564 | destroy_loop_vec_info (loop_vinfo); | |
5565 | return NULL; | |
5566 | } | |
5567 | ||
79fe1b3b DN |
5568 | /* Scan all the operations in the loop and make sure they are |
5569 | vectorizable. */ | |
5570 | ||
5571 | ok = vect_analyze_operations (loop_vinfo); | |
5572 | if (!ok) | |
5573 | { | |
5574 | if (vect_debug_details (loop)) | |
5575 | fprintf (dump_file, "bad operation or unsupported loop bound."); | |
5576 | destroy_loop_vec_info (loop_vinfo); | |
5577 | return NULL; | |
5578 | } | |
5579 | ||
5580 | LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; | |
5581 | ||
5582 | return loop_vinfo; | |
5583 | } | |
5584 | ||
5585 | ||
5586 | /* Function need_imm_uses_for. | |
5587 | ||
5588 | Return whether we ought to include information for 'var' | |
5589 | when calculating immediate uses. For this pass we only want use | |
5590 | information for non-virtual variables. */ | |
5591 | ||
5592 | static bool | |
5593 | need_imm_uses_for (tree var) | |
5594 | { | |
5595 | return is_gimple_reg (var); | |
5596 | } | |
5597 | ||
5598 | ||
5599 | /* Function vectorize_loops. | |
5600 | ||
5601 | Entry Point to loop vectorization phase. */ | |
5602 | ||
5603 | void | |
5604 | vectorize_loops (struct loops *loops) | |
5605 | { | |
5606 | unsigned int i, loops_num; | |
5607 | unsigned int num_vectorized_loops = 0; | |
5608 | ||
5609 | /* Does the target support SIMD? */ | |
5610 | /* FORNOW: until more sophisticated machine modelling is in place. */ | |
5611 | if (!UNITS_PER_SIMD_WORD) | |
5612 | { | |
5613 | if (vect_debug_details (NULL)) | |
5614 | fprintf (dump_file, "vectorizer: target vector size is not defined."); | |
5615 | return; | |
5616 | } | |
5617 | ||
5618 | compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for); | |
5619 | ||
5620 | /* ----------- Analyze loops. ----------- */ | |
5621 | ||
5622 | /* If some loop was duplicated, it gets bigger number | |
5623 | than all previously defined loops. This fact allows us to run | |
5624 | only over initial loops skipping newly generated ones. */ | |
5625 | loops_num = loops->num; | |
5626 | for (i = 1; i < loops_num; i++) | |
5627 | { | |
5628 | loop_vec_info loop_vinfo; | |
5629 | struct loop *loop = loops->parray[i]; | |
5630 | ||
5631 | if (!loop) | |
5632 | continue; | |
5633 | ||
79fe1b3b DN |
5634 | loop_vinfo = vect_analyze_loop (loop); |
5635 | loop->aux = loop_vinfo; | |
5636 | ||
5637 | if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) | |
5638 | continue; | |
5639 | ||
5640 | vect_transform_loop (loop_vinfo, loops); | |
5641 | num_vectorized_loops++; | |
5642 | } | |
5643 | ||
5644 | if (vect_debug_stats (NULL) || vect_debug_details (NULL)) | |
5645 | fprintf (dump_file, "\nvectorized %u loops in function.\n", | |
5646 | num_vectorized_loops); | |
5647 | ||
5648 | /* ----------- Finalize. ----------- */ | |
5649 | ||
5650 | free_df (); | |
5651 | for (i = 1; i < loops_num; i++) | |
5652 | { | |
5653 | struct loop *loop = loops->parray[i]; | |
6775f1f3 IR |
5654 | loop_vec_info loop_vinfo; |
5655 | ||
79fe1b3b | 5656 | if (!loop) |
6775f1f3 IR |
5657 | continue; |
5658 | loop_vinfo = loop->aux; | |
79fe1b3b DN |
5659 | destroy_loop_vec_info (loop_vinfo); |
5660 | loop->aux = NULL; | |
5661 | } | |
5662 | ||
79fe1b3b DN |
5663 | rewrite_into_ssa (false); |
5664 | if (bitmap_first_set_bit (vars_to_rename) >= 0) | |
5665 | { | |
5666 | /* The rewrite of ssa names may cause violation of loop closed ssa | |
5667 | form invariants. TODO -- avoid these rewrites completely. | |
5668 | Information in virtual phi nodes is sufficient for it. */ | |
5669 | rewrite_into_loop_closed_ssa (); | |
5670 | } | |
a023975e | 5671 | rewrite_into_loop_closed_ssa (); |
79fe1b3b DN |
5672 | bitmap_clear (vars_to_rename); |
5673 | } |