]>
Commit | Line | Data |
---|---|---|
79fe1b3b DN |
1 | /* Loop Vectorization |
2 | Copyright (C) 2003, 2004 Free Software Foundation, Inc. | |
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 2, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
19 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
20 | 02111-1307, USA. */ | |
21 | ||
22 | /* Loop Vectorization Pass. | |
23 | ||
24 | This pass tries to vectorize loops. This first implementation focuses on | |
25 | simple inner-most loops, with no conditional control flow, and a set of | |
26 | simple operations which vector form can be expressed using existing | |
27 | tree codes (PLUS, MULT etc). | |
28 | ||
29 | For example, the vectorizer transforms the following simple loop: | |
30 | ||
31 | short a[N]; short b[N]; short c[N]; int i; | |
32 | ||
33 | for (i=0; i<N; i++){ | |
34 | a[i] = b[i] + c[i]; | |
35 | } | |
36 | ||
37 | as if it was manually vectorized by rewriting the source code into: | |
38 | ||
39 | typedef int __attribute__((mode(V8HI))) v8hi; | |
40 | short a[N]; short b[N]; short c[N]; int i; | |
41 | v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c; | |
42 | v8hi va, vb, vc; | |
43 | ||
44 | for (i=0; i<N/8; i++){ | |
45 | vb = pb[i]; | |
46 | vc = pc[i]; | |
47 | va = vb + vc; | |
48 | pa[i] = va; | |
49 | } | |
50 | ||
51 | The main entry to this pass is vectorize_loops(), in which | |
52 | the vectorizer applies a set of analyses on a given set of loops, | |
53 | followed by the actual vectorization transformation for the loops that | |
54 | had successfully passed the analysis phase. | |
55 | ||
56 | Throughout this pass we make a distinction between two types of | |
57 | data: scalars (which are represented by SSA_NAMES), and memory references | |
58 | ("data-refs"). These two types of data require different handling both | |
59 | during analysis and transformation. The types of data-refs that the | |
6775f1f3 IR |
60 | vectorizer currently supports are ARRAY_REFS which base is an array DECL |
61 | (not a pointer), and INDIRECT_REFS through pointers; both array and pointer | |
62 | accesses are required to have a simple (consecutive) access pattern. | |
79fe1b3b DN |
63 | |
64 | Analysis phase: | |
65 | =============== | |
66 | The driver for the analysis phase is vect_analyze_loop_nest(). | |
67 | It applies a set of analyses, some of which rely on the scalar evolution | |
68 | analyzer (scev) developed by Sebastian Pop. | |
69 | ||
70 | During the analysis phase the vectorizer records some information | |
71 | per stmt in a "stmt_vec_info" struct which is attached to each stmt in the | |
72 | loop, as well as general information about the loop as a whole, which is | |
73 | recorded in a "loop_vec_info" struct attached to each loop. | |
74 | ||
75 | Transformation phase: | |
76 | ===================== | |
77 | The loop transformation phase scans all the stmts in the loop, and | |
78 | creates a vector stmt (or a sequence of stmts) for each scalar stmt S in | |
79 | the loop that needs to be vectorized. It insert the vector code sequence | |
80 | just before the scalar stmt S, and records a pointer to the vector code | |
81 | in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct | |
82 | attached to S). This pointer will be used for the vectorization of following | |
83 | stmts which use the def of stmt S. Stmt S is removed if it writes to memory; | |
84 | otherwise, we rely on dead code elimination for removing it. | |
85 | ||
86 | For example, say stmt S1 was vectorized into stmt VS1: | |
87 | ||
88 | VS1: vb = px[i]; | |
89 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |
90 | S2: a = b; | |
91 | ||
92 | To vectorize stmt S2, the vectorizer first finds the stmt that defines | |
93 | the operand 'b' (S1), and gets the relevant vector def 'vb' from the | |
94 | vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The | |
95 | resulting sequence would be: | |
96 | ||
97 | VS1: vb = px[i]; | |
98 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |
99 | VS2: va = vb; | |
100 | S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2 | |
101 | ||
102 | Operands that are not SSA_NAMEs, are data-refs that appear in | |
103 | load/store operations (like 'x[i]' in S1), and are handled differently. | |
104 | ||
105 | Target modeling: | |
106 | ================= | |
107 | Currently the only target specific information that is used is the | |
108 | size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can | |
109 | support different sizes of vectors, for now will need to specify one value | |
110 | for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future. | |
111 | ||
112 | Since we only vectorize operations which vector form can be | |
113 | expressed using existing tree codes, to verify that an operation is | |
114 | supported, the vectorizer checks the relevant optab at the relevant | |
115 | machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If | |
116 | the value found is CODE_FOR_nothing, then there's no target support, and | |
117 | we can't vectorize the stmt. | |
118 | ||
119 | For additional information on this project see: | |
120 | http://gcc.gnu.org/projects/tree-ssa/vectorization.html | |
121 | */ | |
122 | ||
123 | #include "config.h" | |
124 | #include "system.h" | |
125 | #include "coretypes.h" | |
126 | #include "tm.h" | |
127 | #include "errors.h" | |
128 | #include "ggc.h" | |
129 | #include "tree.h" | |
130 | #include "target.h" | |
131 | ||
132 | #include "rtl.h" | |
133 | #include "basic-block.h" | |
134 | #include "diagnostic.h" | |
135 | #include "tree-flow.h" | |
136 | #include "tree-dump.h" | |
137 | #include "timevar.h" | |
138 | #include "cfgloop.h" | |
139 | #include "cfglayout.h" | |
140 | #include "expr.h" | |
141 | #include "optabs.h" | |
a023975e | 142 | #include "toplev.h" |
79fe1b3b DN |
143 | #include "tree-chrec.h" |
144 | #include "tree-data-ref.h" | |
145 | #include "tree-scalar-evolution.h" | |
146 | #include "tree-vectorizer.h" | |
147 | #include "tree-pass.h" | |
618bb89c | 148 | #include "langhooks.h" |
79fe1b3b | 149 | |
f88a8cfa DN |
150 | |
151 | /************************************************************************* | |
152 | Simple Loop Peeling Utilities | |
153 | *************************************************************************/ | |
154 | ||
155 | /* Entry point for peeling of simple loops. | |
156 | Peel the first/last iterations of a loop. | |
157 | It can be used outside of the vectorizer for loops that are simple enough | |
158 | (see function documentation). In the vectorizer it is used to peel the | |
159 | last few iterations when the loop bound is unknown or does not evenly | |
160 | divide by the vectorization factor, and to peel the first few iterations | |
161 | to force the alignment of data references in the loop. */ | |
162 | struct loop *slpeel_tree_peel_loop_to_edge | |
163 | (struct loop *, struct loops *, edge, tree, tree, bool); | |
164 | static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg | |
165 | (struct loop *, struct loops *, edge); | |
166 | static void slpeel_update_phis_for_duplicate_loop | |
167 | (struct loop *, struct loop *, bool after); | |
63dfe6ff | 168 | static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool); |
335d3d54 | 169 | static void slpeel_make_loop_iterate_ntimes (struct loop *, tree); |
63dfe6ff | 170 | static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block); |
d6901754 | 171 | static bool slpeel_can_duplicate_loop_p (struct loop *, edge); |
f88a8cfa DN |
172 | static void allocate_new_names (bitmap); |
173 | static void rename_use_op (use_operand_p); | |
174 | static void rename_def_op (def_operand_p, tree); | |
175 | static void rename_variables_in_bb (basic_block); | |
176 | static void free_new_names (bitmap); | |
177 | static void rename_variables_in_loop (struct loop *); | |
1d8a9009 AT |
178 | #ifdef ENABLE_CHECKING |
179 | static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *); | |
180 | #endif | |
f88a8cfa DN |
181 | |
182 | ||
183 | /************************************************************************* | |
184 | Vectorization Utilities. | |
185 | *************************************************************************/ | |
186 | ||
79fe1b3b DN |
187 | /* Main analysis functions. */ |
188 | static loop_vec_info vect_analyze_loop (struct loop *); | |
189 | static loop_vec_info vect_analyze_loop_form (struct loop *); | |
190 | static bool vect_analyze_data_refs (loop_vec_info); | |
191 | static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); | |
192 | static bool vect_analyze_scalar_cycles (loop_vec_info); | |
193 | static bool vect_analyze_data_ref_accesses (loop_vec_info); | |
194 | static bool vect_analyze_data_refs_alignment (loop_vec_info); | |
0dc0a70b | 195 | static bool vect_compute_data_refs_alignment (loop_vec_info); |
79fe1b3b DN |
196 | static bool vect_analyze_operations (loop_vec_info); |
197 | ||
198 | /* Main code transformation functions. */ | |
199 | static void vect_transform_loop (loop_vec_info, struct loops *); | |
79fe1b3b DN |
200 | static bool vect_transform_stmt (tree, block_stmt_iterator *); |
201 | static bool vectorizable_load (tree, block_stmt_iterator *, tree *); | |
202 | static bool vectorizable_store (tree, block_stmt_iterator *, tree *); | |
203 | static bool vectorizable_operation (tree, block_stmt_iterator *, tree *); | |
204 | static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); | |
0dc0a70b DN |
205 | static enum dr_alignment_support vect_supportable_dr_alignment |
206 | (struct data_reference *); | |
79fe1b3b DN |
207 | static void vect_align_data_ref (tree); |
208 | static void vect_enhance_data_refs_alignment (loop_vec_info); | |
209 | ||
210 | /* Utility functions for the analyses. */ | |
211 | static bool vect_is_simple_use (tree , struct loop *, tree *); | |
212 | static bool exist_non_indexing_operands_for_use_p (tree, tree); | |
213 | static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool); | |
2b0729ba | 214 | static void vect_mark_relevant (varray_type *, tree); |
79fe1b3b | 215 | static bool vect_stmt_relevant_p (tree, loop_vec_info); |
a023975e | 216 | static tree vect_get_loop_niters (struct loop *, tree *); |
6775f1f3 | 217 | static bool vect_compute_data_ref_alignment |
79fe1b3b DN |
218 | (struct data_reference *, loop_vec_info); |
219 | static bool vect_analyze_data_ref_access (struct data_reference *); | |
220 | static bool vect_get_first_index (tree, tree *); | |
221 | static bool vect_can_force_dr_alignment_p (tree, unsigned int); | |
7ccf35ed DN |
222 | static struct data_reference * vect_analyze_pointer_ref_access |
223 | (tree, tree, bool); | |
d6901754 | 224 | static bool vect_can_advance_ivs_p (struct loop *); |
6775f1f3 IR |
225 | static tree vect_get_base_and_bit_offset |
226 | (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*); | |
227 | static struct data_reference * vect_analyze_pointer_ref_access | |
228 | (tree, tree, bool); | |
229 | static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *); | |
230 | static tree vect_compute_array_ref_alignment | |
231 | (struct data_reference *, loop_vec_info, tree, tree *); | |
232 | static tree vect_get_ptr_offset (tree, tree, tree *); | |
233 | static tree vect_get_symbl_and_dr | |
234 | (tree, tree, bool, loop_vec_info, struct data_reference **); | |
1de6a873 IR |
235 | static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *, |
236 | tree *, tree *); | |
79fe1b3b DN |
237 | |
238 | /* Utility functions for the code transformation. */ | |
239 | static tree vect_create_destination_var (tree, tree); | |
7ccf35ed DN |
240 | static tree vect_create_data_ref_ptr |
241 | (tree, block_stmt_iterator *, tree, tree *, bool); | |
242 | static tree vect_create_index_for_vector_ref | |
243 | (struct loop *, block_stmt_iterator *); | |
244 | static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); | |
79fe1b3b DN |
245 | static tree get_vectype_for_scalar_type (tree); |
246 | static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); | |
247 | static tree vect_get_vec_def_for_operand (tree, tree); | |
248 | static tree vect_init_vector (tree, tree); | |
249 | static void vect_finish_stmt_generation | |
250 | (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); | |
251 | ||
f88a8cfa DN |
252 | /* Utility function dealing with loop peeling (not peeling itself). */ |
253 | static void vect_generate_tmps_on_preheader | |
254 | (loop_vec_info, tree *, tree *, tree *); | |
a023975e | 255 | static tree vect_build_loop_niters (loop_vec_info); |
63dfe6ff | 256 | static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge); |
a023975e | 257 | static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); |
f88a8cfa DN |
258 | static void vect_update_inits_of_dr |
259 | (struct data_reference *, struct loop *, tree niters); | |
a023975e OG |
260 | static void vect_update_inits_of_drs (loop_vec_info, tree); |
261 | static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *); | |
335d3d54 | 262 | static void vect_do_peeling_for_loop_bound |
f88a8cfa | 263 | (loop_vec_info, tree *, struct loops *); |
a023975e | 264 | |
79fe1b3b DN |
265 | /* Utilities for creation and deletion of vec_info structs. */ |
266 | loop_vec_info new_loop_vec_info (struct loop *loop); | |
267 | void destroy_loop_vec_info (loop_vec_info); | |
268 | stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop); | |
269 | ||
a023975e OG |
270 | static bool vect_debug_stats (struct loop *loop); |
271 | static bool vect_debug_details (struct loop *loop); | |
272 | ||
273 | \f | |
f88a8cfa DN |
274 | /************************************************************************* |
275 | Simple Loop Peeling Utilities | |
276 | ||
277 | Utilities to support loop peeling for vectorization purposes. | |
278 | *************************************************************************/ | |
a023975e OG |
279 | |
280 | ||
281 | /* For each definition in DEFINITIONS this function allocates | |
282 | new ssa name. */ | |
283 | ||
284 | static void | |
285 | allocate_new_names (bitmap definitions) | |
286 | { | |
287 | unsigned ver; | |
288 | bitmap_iterator bi; | |
289 | ||
290 | EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi) | |
291 | { | |
292 | tree def = ssa_name (ver); | |
293 | tree *new_name_ptr = xmalloc (sizeof (tree)); | |
294 | ||
295 | bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def); | |
296 | ||
297 | *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def)); | |
298 | SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal; | |
299 | ||
300 | SSA_NAME_AUX (def) = new_name_ptr; | |
301 | } | |
302 | } | |
303 | ||
304 | ||
305 | /* Renames the use *OP_P. */ | |
306 | ||
307 | static void | |
308 | rename_use_op (use_operand_p op_p) | |
309 | { | |
310 | tree *new_name_ptr; | |
311 | ||
312 | if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME) | |
313 | return; | |
314 | ||
315 | new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p)); | |
316 | ||
317 | /* Something defined outside of the loop. */ | |
318 | if (!new_name_ptr) | |
319 | return; | |
320 | ||
321 | /* An ordinary ssa name defined in the loop. */ | |
322 | ||
323 | SET_USE (op_p, *new_name_ptr); | |
324 | } | |
325 | ||
326 | ||
327 | /* Renames the def *OP_P in statement STMT. */ | |
328 | ||
329 | static void | |
330 | rename_def_op (def_operand_p op_p, tree stmt) | |
331 | { | |
332 | tree *new_name_ptr; | |
333 | ||
334 | if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME) | |
335 | return; | |
336 | ||
337 | new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p)); | |
338 | ||
339 | /* Something defined outside of the loop. */ | |
340 | if (!new_name_ptr) | |
341 | return; | |
342 | ||
343 | /* An ordinary ssa name defined in the loop. */ | |
344 | ||
345 | SET_DEF (op_p, *new_name_ptr); | |
346 | SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt; | |
347 | } | |
348 | ||
349 | ||
350 | /* Renames the variables in basic block BB. */ | |
351 | ||
352 | static void | |
353 | rename_variables_in_bb (basic_block bb) | |
354 | { | |
355 | tree phi; | |
356 | block_stmt_iterator bsi; | |
357 | tree stmt; | |
358 | stmt_ann_t ann; | |
359 | use_optype uses; | |
360 | vuse_optype vuses; | |
361 | def_optype defs; | |
362 | v_may_def_optype v_may_defs; | |
363 | v_must_def_optype v_must_defs; | |
364 | unsigned i; | |
365 | edge e; | |
366 | edge_iterator ei; | |
63dfe6ff | 367 | struct loop *loop = bb->loop_father; |
a023975e | 368 | |
bb29d951 | 369 | for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) |
a023975e OG |
370 | rename_def_op (PHI_RESULT_PTR (phi), phi); |
371 | ||
372 | for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) | |
373 | { | |
374 | stmt = bsi_stmt (bsi); | |
375 | get_stmt_operands (stmt); | |
376 | ann = stmt_ann (stmt); | |
377 | ||
378 | uses = USE_OPS (ann); | |
379 | for (i = 0; i < NUM_USES (uses); i++) | |
380 | rename_use_op (USE_OP_PTR (uses, i)); | |
381 | ||
382 | defs = DEF_OPS (ann); | |
383 | for (i = 0; i < NUM_DEFS (defs); i++) | |
384 | rename_def_op (DEF_OP_PTR (defs, i), stmt); | |
385 | ||
386 | vuses = VUSE_OPS (ann); | |
387 | for (i = 0; i < NUM_VUSES (vuses); i++) | |
388 | rename_use_op (VUSE_OP_PTR (vuses, i)); | |
389 | ||
390 | v_may_defs = V_MAY_DEF_OPS (ann); | |
391 | for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++) | |
392 | { | |
393 | rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i)); | |
394 | rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt); | |
395 | } | |
396 | ||
397 | v_must_defs = V_MUST_DEF_OPS (ann); | |
398 | for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++) | |
52328bf6 DB |
399 | { |
400 | rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i)); | |
401 | rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt); | |
402 | } | |
a023975e OG |
403 | } |
404 | ||
405 | FOR_EACH_EDGE (e, ei, bb->succs) | |
63dfe6ff DN |
406 | { |
407 | if (!flow_bb_inside_loop_p (loop, e->dest)) | |
408 | continue; | |
409 | for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi)) | |
410 | rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e)); | |
411 | } | |
a023975e OG |
412 | } |
413 | ||
414 | ||
415 | /* Releases the structures holding the new ssa names. */ | |
416 | ||
417 | static void | |
418 | free_new_names (bitmap definitions) | |
419 | { | |
420 | unsigned ver; | |
421 | bitmap_iterator bi; | |
422 | ||
423 | EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi) | |
424 | { | |
425 | tree def = ssa_name (ver); | |
426 | ||
427 | if (SSA_NAME_AUX (def)) | |
428 | { | |
429 | free (SSA_NAME_AUX (def)); | |
430 | SSA_NAME_AUX (def) = NULL; | |
431 | } | |
432 | } | |
433 | } | |
434 | ||
435 | ||
436 | /* Renames variables in new generated LOOP. */ | |
437 | ||
438 | static void | |
439 | rename_variables_in_loop (struct loop *loop) | |
440 | { | |
441 | unsigned i; | |
442 | basic_block *bbs; | |
443 | ||
444 | bbs = get_loop_body (loop); | |
445 | ||
446 | for (i = 0; i < loop->num_nodes; i++) | |
447 | rename_variables_in_bb (bbs[i]); | |
448 | ||
449 | free (bbs); | |
450 | } | |
451 | ||
452 | ||
63dfe6ff | 453 | /* Update the PHI nodes of NEW_LOOP. |
a023975e | 454 | |
63dfe6ff DN |
455 | NEW_LOOP is a duplicate of ORIG_LOOP. |
456 | AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP: | |
457 | AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it | |
458 | executes before it. */ | |
a023975e OG |
459 | |
460 | static void | |
63dfe6ff | 461 | slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, |
f88a8cfa | 462 | struct loop *new_loop, bool after) |
a023975e | 463 | { |
a023975e | 464 | tree *new_name_ptr, new_ssa_name; |
63dfe6ff DN |
465 | tree phi_new, phi_orig; |
466 | tree def; | |
467 | edge orig_loop_latch = loop_latch_edge (orig_loop); | |
468 | edge orig_entry_e = loop_preheader_edge (orig_loop); | |
469 | edge new_loop_exit_e = new_loop->exit_edges[0]; | |
470 | edge new_loop_entry_e = loop_preheader_edge (new_loop); | |
471 | edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e); | |
472 | ||
473 | /* | |
474 | step 1. For each loop-header-phi: | |
475 | Add the first phi argument for the phi in NEW_LOOP | |
476 | (the one associated with the entry of NEW_LOOP) | |
477 | ||
478 | step 2. For each loop-header-phi: | |
479 | Add the second phi argument for the phi in NEW_LOOP | |
480 | (the one associated with the latch of NEW_LOOP) | |
a023975e | 481 | |
63dfe6ff | 482 | step 3. Update the phis in the successor block of NEW_LOOP. |
a023975e | 483 | |
63dfe6ff DN |
484 | case 1: NEW_LOOP was placed before ORIG_LOOP: |
485 | The successor block of NEW_LOOP is the header of ORIG_LOOP. | |
486 | Updating the phis in the successor block can therefore be done | |
487 | along with the scanning of the loop header phis, because the | |
488 | header blocks of ORIG_LOOP and NEW_LOOP have exactly the same | |
489 | phi nodes, organized in the same order. | |
490 | ||
491 | case 2: NEW_LOOP was placed after ORIG_LOOP: | |
492 | The successor block of NEW_LOOP is the original exit block of | |
493 | ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis. | |
494 | We postpone updating these phis to a later stage (when | |
495 | loop guards are added). | |
496 | */ | |
497 | ||
498 | ||
499 | /* Scan the phis in the headers of the old and new loops | |
500 | (they are organized in exactly the same order). */ | |
a023975e | 501 | |
a023975e | 502 | for (phi_new = phi_nodes (new_loop->header), |
63dfe6ff DN |
503 | phi_orig = phi_nodes (orig_loop->header); |
504 | phi_new && phi_orig; | |
505 | phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig)) | |
a023975e | 506 | { |
63dfe6ff DN |
507 | /* step 1. */ |
508 | def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e); | |
d2e398df | 509 | add_phi_arg (phi_new, def, new_loop_entry_e); |
a023975e | 510 | |
63dfe6ff DN |
511 | /* step 2. */ |
512 | def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch); | |
a023975e | 513 | if (TREE_CODE (def) != SSA_NAME) |
63dfe6ff | 514 | continue; |
a023975e OG |
515 | |
516 | new_name_ptr = SSA_NAME_AUX (def); | |
a023975e | 517 | if (!new_name_ptr) |
63dfe6ff DN |
518 | /* Something defined outside of the loop. */ |
519 | continue; | |
a023975e OG |
520 | |
521 | /* An ordinary ssa name defined in the loop. */ | |
522 | new_ssa_name = *new_name_ptr; | |
d2e398df | 523 | add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop)); |
a023975e | 524 | |
63dfe6ff DN |
525 | /* step 3 (case 1). */ |
526 | if (!after) | |
527 | { | |
528 | gcc_assert (new_loop_exit_e == orig_entry_e); | |
529 | SET_PHI_ARG_DEF (phi_orig, | |
530 | phi_arg_from_edge (phi_orig, new_loop_exit_e), | |
531 | new_ssa_name); | |
532 | } | |
a023975e OG |
533 | } |
534 | } | |
535 | ||
536 | ||
537 | /* Update PHI nodes for a guard of the LOOP. | |
538 | ||
63dfe6ff DN |
539 | Input: |
540 | - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that | |
541 | controls whether LOOP is to be executed. GUARD_EDGE is the edge that | |
542 | originates from the guard-bb, skips LOOP and reaches the (unique) exit | |
543 | bb of LOOP. This loop-exit-bb is an empty bb with one successor. | |
544 | We denote this bb NEW_MERGE_BB because it had a single predecessor (the | |
545 | LOOP header) before the guard code was added, and now it became a merge | |
546 | point of two paths - the path that ends with the LOOP exit-edge, and | |
547 | the path that ends with GUARD_EDGE. | |
548 | ||
549 | This function creates and updates the relevant phi nodes to account for | |
550 | the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB: | |
551 | 1. Create phi nodes at NEW_MERGE_BB. | |
552 | 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted | |
553 | UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB | |
554 | was added: | |
555 | ||
556 | ===> The CFG before the guard-code was added: | |
557 | LOOP_header_bb: | |
558 | if (exit_loop) goto update_bb : LOOP_header_bb | |
559 | update_bb: | |
560 | ||
561 | ==> The CFG after the guard-code was added: | |
562 | guard_bb: | |
563 | if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb | |
564 | LOOP_header_bb: | |
565 | if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb | |
566 | new_merge_bb: | |
567 | goto update_bb | |
568 | update_bb: | |
569 | ||
570 | - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in | |
571 | UPDATE_BB are loop entry phis, like the phis in the LOOP header, | |
572 | organized in the same order. | |
573 | If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are | |
574 | loop exit phis. | |
575 | ||
576 | - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another | |
577 | "original" loop). FALSE if LOOP is an original loop (not a newly | |
e7a531ae | 578 | created copy). The SSA_NAME_AUX fields of the defs in the original |
63dfe6ff DN |
579 | loop are the corresponding new ssa-names used in the new duplicated |
580 | loop copy. IS_NEW_LOOP indicates which of the two args of the phi | |
581 | nodes in UPDATE_BB takes the original ssa-name, and which takes the | |
582 | new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with | |
583 | the LOOP-exit-edge takes the new-name, and the phi-arg that is | |
584 | associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is | |
585 | FALSE, it's the other way around. | |
586 | */ | |
a023975e OG |
587 | |
588 | static void | |
63dfe6ff DN |
589 | slpeel_update_phi_nodes_for_guard (edge guard_edge, |
590 | struct loop *loop, | |
591 | bool entry_phis, | |
592 | bool is_new_loop) | |
a023975e | 593 | { |
63dfe6ff DN |
594 | tree orig_phi, new_phi, update_phi; |
595 | tree guard_arg, loop_arg; | |
596 | basic_block new_merge_bb = guard_edge->dest; | |
597 | edge e = EDGE_SUCC (new_merge_bb, 0); | |
598 | basic_block update_bb = e->dest; | |
599 | basic_block orig_bb = (entry_phis ? loop->header : update_bb); | |
600 | ||
601 | for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb); | |
602 | orig_phi && update_phi; | |
603 | orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi)) | |
604 | { | |
605 | /* 1. Generate new phi node in NEW_MERGE_BB: */ | |
606 | new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
607 | new_merge_bb); | |
a023975e | 608 | |
63dfe6ff DN |
609 | /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge |
610 | of LOOP. Set the two phi args in NEW_PHI for these edges: */ | |
611 | if (entry_phis) | |
612 | { | |
613 | loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, | |
614 | EDGE_SUCC (loop->latch, 0)); | |
615 | guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]); | |
616 | } | |
617 | else /* exit phis */ | |
618 | { | |
619 | tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); | |
620 | tree *new_name_ptr = SSA_NAME_AUX (orig_def); | |
621 | tree new_name; | |
622 | ||
623 | if (new_name_ptr) | |
624 | new_name = *new_name_ptr; | |
625 | else | |
626 | /* Something defined outside of the loop */ | |
627 | new_name = orig_def; | |
628 | ||
629 | if (is_new_loop) | |
630 | { | |
631 | guard_arg = orig_def; | |
632 | loop_arg = new_name; | |
633 | } | |
634 | else | |
635 | { | |
636 | guard_arg = new_name; | |
637 | loop_arg = orig_def; | |
638 | } | |
639 | } | |
d2e398df KH |
640 | add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]); |
641 | add_phi_arg (new_phi, guard_arg, guard_edge); | |
63dfe6ff DN |
642 | |
643 | /* 3. Update phi in successor block. */ | |
644 | gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg | |
645 | || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg); | |
646 | SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e), | |
647 | PHI_RESULT (new_phi)); | |
648 | } | |
a023975e | 649 | |
63dfe6ff | 650 | set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb))); |
a023975e OG |
651 | } |
652 | ||
653 | ||
654 | /* Make the LOOP iterate NITERS times. This is done by adding a new IV | |
335d3d54 DN |
655 | that starts at zero, increases by one and its limit is NITERS. |
656 | ||
657 | Assumption: the exit-condition of LOOP is the last stmt in the loop. */ | |
a023975e OG |
658 | |
659 | static void | |
335d3d54 | 660 | slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) |
a023975e OG |
661 | { |
662 | tree indx_before_incr, indx_after_incr, cond_stmt, cond; | |
663 | tree orig_cond; | |
664 | edge exit_edge = loop->exit_edges[0]; | |
665 | block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src); | |
335d3d54 DN |
666 | tree begin_label = tree_block_label (loop->latch); |
667 | tree exit_label = tree_block_label (loop->single_exit->dest); | |
618bb89c DN |
668 | tree init = build_int_cst (TREE_TYPE (niters), 0); |
669 | tree step = build_int_cst (TREE_TYPE (niters), 1); | |
bfe5acd6 DN |
670 | tree then_label; |
671 | tree else_label; | |
a023975e | 672 | |
a023975e OG |
673 | orig_cond = get_loop_exit_condition (loop); |
674 | gcc_assert (orig_cond); | |
618bb89c | 675 | create_iv (init, step, NULL_TREE, loop, |
a023975e OG |
676 | &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr); |
677 | ||
678 | /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get | |
679 | back to the exit condition statement. */ | |
680 | bsi_next (&loop_exit_bsi); | |
681 | gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond); | |
682 | ||
a023975e | 683 | if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ |
bfe5acd6 DN |
684 | { |
685 | cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters); | |
686 | then_label = build1 (GOTO_EXPR, void_type_node, exit_label); | |
687 | else_label = build1 (GOTO_EXPR, void_type_node, begin_label); | |
688 | } | |
471854f8 | 689 | else /* 'then' edge loops back. */ |
bfe5acd6 DN |
690 | { |
691 | cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters); | |
692 | then_label = build1 (GOTO_EXPR, void_type_node, begin_label); | |
693 | else_label = build1 (GOTO_EXPR, void_type_node, exit_label); | |
694 | } | |
a023975e | 695 | |
e9c00ceb | 696 | cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond, |
bfe5acd6 | 697 | then_label, else_label); |
a023975e OG |
698 | bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT); |
699 | ||
700 | /* Remove old loop exit test: */ | |
701 | bsi_remove (&loop_exit_bsi); | |
702 | ||
703 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
704 | print_generic_expr (dump_file, cond_stmt, TDF_SLIM); | |
d6f6ef21 DN |
705 | |
706 | loop->nb_iterations = niters; | |
a023975e OG |
707 | } |
708 | ||
709 | ||
710 | /* Given LOOP this function generates a new copy of it and puts it | |
711 | on E which is either the entry or exit of LOOP. */ | |
712 | ||
713 | static struct loop * | |
f88a8cfa DN |
714 | slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, |
715 | edge e) | |
a023975e OG |
716 | { |
717 | struct loop *new_loop; | |
718 | basic_block *new_bbs, *bbs; | |
719 | bool at_exit; | |
720 | bool was_imm_dom; | |
721 | basic_block exit_dest; | |
722 | tree phi, phi_arg; | |
723 | ||
724 | at_exit = (e == loop->exit_edges[0]); | |
725 | if (!at_exit && e != loop_preheader_edge (loop)) | |
726 | { | |
727 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
63dfe6ff | 728 | fprintf (dump_file, "Edge is not an entry nor an exit edge.\n"); |
a023975e OG |
729 | return NULL; |
730 | } | |
731 | ||
732 | bbs = get_loop_body (loop); | |
733 | ||
734 | /* Check whether duplication is possible. */ | |
735 | if (!can_copy_bbs_p (bbs, loop->num_nodes)) | |
736 | { | |
737 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
63dfe6ff | 738 | fprintf (dump_file, "Cannot copy basic blocks.\n"); |
a023975e OG |
739 | free (bbs); |
740 | return NULL; | |
741 | } | |
742 | ||
743 | /* Generate new loop structure. */ | |
744 | new_loop = duplicate_loop (loops, loop, loop->outer); | |
745 | if (!new_loop) | |
746 | { | |
747 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
63dfe6ff | 748 | fprintf (dump_file, "duplicate_loop returns NULL.\n"); |
a023975e OG |
749 | free (bbs); |
750 | return NULL; | |
751 | } | |
752 | ||
753 | exit_dest = loop->exit_edges[0]->dest; | |
754 | was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, | |
755 | exit_dest) == loop->header ? | |
756 | true : false); | |
757 | ||
758 | new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes); | |
759 | ||
760 | copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL); | |
761 | ||
762 | /* Duplicating phi args at exit bbs as coming | |
763 | also from exit of duplicated loop. */ | |
bb29d951 | 764 | for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi)) |
a023975e OG |
765 | { |
766 | phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]); | |
767 | if (phi_arg) | |
768 | { | |
769 | edge new_loop_exit_edge; | |
770 | ||
771 | if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch) | |
772 | new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1); | |
773 | else | |
774 | new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0); | |
775 | ||
d2e398df | 776 | add_phi_arg (phi, phi_arg, new_loop_exit_edge); |
a023975e OG |
777 | } |
778 | } | |
779 | ||
780 | if (at_exit) /* Add the loop copy at exit. */ | |
781 | { | |
782 | redirect_edge_and_branch_force (e, new_loop->header); | |
783 | set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src); | |
784 | if (was_imm_dom) | |
785 | set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header); | |
786 | } | |
787 | else /* Add the copy at entry. */ | |
788 | { | |
789 | edge new_exit_e; | |
790 | edge entry_e = loop_preheader_edge (loop); | |
791 | basic_block preheader = entry_e->src; | |
792 | ||
793 | if (!flow_bb_inside_loop_p (new_loop, | |
794 | EDGE_SUCC (new_loop->header, 0)->dest)) | |
795 | new_exit_e = EDGE_SUCC (new_loop->header, 0); | |
796 | else | |
797 | new_exit_e = EDGE_SUCC (new_loop->header, 1); | |
798 | ||
799 | redirect_edge_and_branch_force (new_exit_e, loop->header); | |
800 | set_immediate_dominator (CDI_DOMINATORS, loop->header, | |
801 | new_exit_e->src); | |
802 | ||
803 | /* We have to add phi args to the loop->header here as coming | |
804 | from new_exit_e edge. */ | |
bb29d951 | 805 | for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) |
a023975e OG |
806 | { |
807 | phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e); | |
808 | if (phi_arg) | |
d2e398df | 809 | add_phi_arg (phi, phi_arg, new_exit_e); |
a023975e OG |
810 | } |
811 | ||
812 | redirect_edge_and_branch_force (entry_e, new_loop->header); | |
813 | set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader); | |
814 | } | |
815 | ||
816 | flow_loop_scan (new_loop, LOOP_ALL); | |
817 | flow_loop_scan (loop, LOOP_ALL); | |
818 | free (new_bbs); | |
819 | free (bbs); | |
820 | ||
821 | return new_loop; | |
822 | } | |
823 | ||
824 | ||
825 | /* Given the condition statement COND, put it as the last statement | |
826 | of GUARD_BB; EXIT_BB is the basic block to skip the loop; | |
827 | Assumes that this is the single exit of the guarded loop. | |
828 | Returns the skip edge. */ | |
829 | ||
830 | static edge | |
63dfe6ff DN |
831 | slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb, |
832 | basic_block dom_bb) | |
a023975e OG |
833 | { |
834 | block_stmt_iterator bsi; | |
835 | edge new_e, enter_e; | |
836 | tree cond_stmt, then_label, else_label; | |
837 | ||
838 | enter_e = EDGE_SUCC (guard_bb, 0); | |
839 | enter_e->flags &= ~EDGE_FALLTHRU; | |
840 | enter_e->flags |= EDGE_FALSE_VALUE; | |
841 | bsi = bsi_last (guard_bb); | |
842 | ||
843 | then_label = build1 (GOTO_EXPR, void_type_node, | |
844 | tree_block_label (exit_bb)); | |
845 | else_label = build1 (GOTO_EXPR, void_type_node, | |
846 | tree_block_label (enter_e->dest)); | |
e9c00ceb | 847 | cond_stmt = build3 (COND_EXPR, void_type_node, cond, |
a023975e OG |
848 | then_label, else_label); |
849 | bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); | |
850 | /* Add new edge to connect entry block to the second loop. */ | |
851 | new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); | |
63dfe6ff | 852 | set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); |
a023975e OG |
853 | return new_e; |
854 | } | |
855 | ||
856 | ||
d6901754 DN |
857 | /* This function verifies that the following restrictions apply to LOOP: |
858 | (1) it is innermost | |
859 | (2) it consists of exactly 2 basic blocks - header, and an empty latch. | |
860 | (3) it is single entry, single exit | |
861 | (4) its exit condition is the last stmt in the header | |
862 | (5) E is the entry/exit edge of LOOP. | |
863 | */ | |
a023975e OG |
864 | |
865 | static bool | |
d6901754 | 866 | slpeel_can_duplicate_loop_p (struct loop *loop, edge e) |
a023975e OG |
867 | { |
868 | edge exit_e = loop->exit_edges [0]; | |
869 | edge entry_e = loop_preheader_edge (loop); | |
d6901754 DN |
870 | tree orig_cond = get_loop_exit_condition (loop); |
871 | block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src); | |
a023975e | 872 | |
d6901754 DN |
873 | if (any_marked_for_rewrite_p ()) |
874 | return false; | |
a023975e | 875 | |
d6901754 DN |
876 | if (loop->inner |
877 | /* All loops have an outer scope; the only case loop->outer is NULL is for | |
878 | the function itself. */ | |
879 | || !loop->outer | |
880 | || loop->num_nodes != 2 | |
881 | || !empty_block_p (loop->latch) | |
882 | || loop->num_exits != 1 | |
883 | || loop->num_entries != 1 | |
884 | /* Verify that new loop exit condition can be trivially modified. */ | |
885 | || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi)) | |
886 | || (e != exit_e && e != entry_e)) | |
887 | return false; | |
a023975e OG |
888 | |
889 | return true; | |
890 | } | |
891 | ||
1d8a9009 | 892 | #ifdef ENABLE_CHECKING |
63dfe6ff DN |
893 | static void |
894 | slpeel_verify_cfg_after_peeling (struct loop *first_loop, | |
895 | struct loop *second_loop) | |
896 | { | |
897 | basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest; | |
898 | basic_block loop2_entry_bb = second_loop->pre_header; | |
899 | basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src; | |
900 | ||
901 | /* A guard that controls whether the second_loop is to be executed or skipped | |
902 | is placed in first_loop->exit. first_loopt->exit therefore has two | |
903 | successors - one is the preheader of second_loop, and the other is a bb | |
904 | after second_loop. | |
905 | */ | |
906 | gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2); | |
a023975e | 907 | |
a023975e | 908 | |
63dfe6ff DN |
909 | /* 1. Verify that one of the successors of first_loopt->exit is the preheader |
910 | of second_loop. */ | |
a023975e | 911 | |
63dfe6ff DN |
912 | /* The preheader of new_loop is expected to have two predessors: |
913 | first_loop->exit and the block that precedes first_loop. */ | |
914 | ||
915 | gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2 | |
916 | && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb | |
917 | && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb) | |
918 | || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb | |
919 | && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb))); | |
920 | ||
921 | /* Verify that the other successor of first_loopt->exit is after the | |
922 | second_loop. */ | |
923 | /* TODO */ | |
924 | } | |
1d8a9009 | 925 | #endif |
a023975e | 926 | |
63dfe6ff | 927 | /* Function slpeel_tree_peel_loop_to_edge. |
a023975e | 928 | |
63dfe6ff DN |
929 | Peel the first (last) iterations of LOOP into a new prolog (epilog) loop |
930 | that is placed on the entry (exit) edge E of LOOP. After this transformation | |
931 | we have two loops one after the other - first-loop iterates FIRST_NITERS | |
932 | times, and second-loop iterates the remainder NITERS - FIRST_NITERS times. | |
a023975e | 933 | |
63dfe6ff DN |
934 | Input: |
935 | - LOOP: the loop to be peeled. | |
936 | - E: the exit or entry edge of LOOP. | |
937 | If it is the entry edge, we peel the first iterations of LOOP. In this | |
938 | case first-loop is LOOP, and second-loop is the newly created loop. | |
939 | If it is the exit edge, we peel the last iterations of LOOP. In this | |
940 | case, first-loop is the newly created loop, and second-loop is LOOP. | |
941 | - NITERS: the number of iterations that LOOP iterates. | |
942 | - FIRST_NITERS: the number of iterations that the first-loop should iterate. | |
e7a531ae | 943 | - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible |
63dfe6ff DN |
944 | for updating the loop bound of the first-loop to FIRST_NITERS. If it |
945 | is false, the caller of this function may want to take care of this | |
e7a531ae | 946 | (this can be useful if we don't want new stmts added to first-loop). |
a023975e | 947 | |
63dfe6ff DN |
948 | Output: |
949 | The function returns a pointer to the new loop-copy, or NULL if it failed | |
e7a531ae | 950 | to perform the transformation. |
63dfe6ff DN |
951 | |
952 | The function generates two if-then-else guards: one before the first loop, | |
953 | and the other before the second loop: | |
954 | The first guard is: | |
955 | if (FIRST_NITERS == 0) then skip the first loop, | |
956 | and go directly to the second loop. | |
957 | The second guard is: | |
958 | if (FIRST_NITERS == NITERS) then skip the second loop. | |
959 | ||
960 | FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p). | |
961 | FORNOW the resulting code will not be in loop-closed-ssa form. | |
962 | */ | |
a023975e | 963 | |
a023975e | 964 | struct loop* |
f88a8cfa DN |
965 | slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, |
966 | edge e, tree first_niters, | |
967 | tree niters, bool update_first_loop_count) | |
a023975e OG |
968 | { |
969 | struct loop *new_loop = NULL, *first_loop, *second_loop; | |
970 | edge skip_e; | |
971 | tree pre_condition; | |
972 | bitmap definitions; | |
63dfe6ff DN |
973 | basic_block bb_before_second_loop, bb_after_second_loop; |
974 | basic_block bb_before_first_loop; | |
975 | basic_block bb_between_loops; | |
a023975e | 976 | edge exit_e = loop->exit_edges [0]; |
63dfe6ff | 977 | |
d6901754 DN |
978 | if (!slpeel_can_duplicate_loop_p (loop, e)) |
979 | return NULL; | |
63dfe6ff DN |
980 | |
981 | /* We have to initialize cfg_hooks. Then, when calling | |
a023975e | 982 | cfg_hooks->split_edge, the function tree_split_edge |
63dfe6ff | 983 | is actually called and, when calling cfg_hooks->duplicate_block, |
a023975e OG |
984 | the function tree_duplicate_bb is called. */ |
985 | tree_register_cfg_hooks (); | |
986 | ||
63dfe6ff DN |
987 | |
988 | /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP). | |
989 | Resulting CFG would be: | |
990 | ||
991 | first_loop: | |
992 | do { | |
993 | } while ... | |
994 | ||
995 | second_loop: | |
996 | do { | |
997 | } while ... | |
998 | ||
999 | orig_exit_bb: | |
1000 | */ | |
1001 | ||
f88a8cfa | 1002 | if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e))) |
a023975e | 1003 | { |
63dfe6ff DN |
1004 | if (vect_debug_stats (loop) || vect_debug_details (loop)) |
1005 | fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n"); | |
a023975e OG |
1006 | return NULL; |
1007 | } | |
63dfe6ff | 1008 | |
a023975e OG |
1009 | if (e == exit_e) |
1010 | { | |
63dfe6ff | 1011 | /* NEW_LOOP was placed after LOOP. */ |
a023975e OG |
1012 | first_loop = loop; |
1013 | second_loop = new_loop; | |
1014 | } | |
63dfe6ff | 1015 | else |
a023975e | 1016 | { |
63dfe6ff | 1017 | /* NEW_LOOP was placed before LOOP. */ |
a023975e OG |
1018 | first_loop = new_loop; |
1019 | second_loop = loop; | |
1020 | } | |
1021 | ||
63dfe6ff DN |
1022 | definitions = marked_ssa_names (); |
1023 | allocate_new_names (definitions); | |
1024 | slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); | |
1025 | rename_variables_in_loop (new_loop); | |
a023975e | 1026 | |
a023975e | 1027 | |
63dfe6ff DN |
1028 | /* 2. Add the guard that controls whether the first loop is executed. |
1029 | Resulting CFG would be: | |
1030 | ||
1031 | bb_before_first_loop: | |
1032 | if (FIRST_NITERS == 0) GOTO bb_before_second_loop | |
1033 | GOTO first-loop | |
1034 | ||
1035 | first_loop: | |
1036 | do { | |
1037 | } while ... | |
1038 | ||
1039 | bb_before_second_loop: | |
a023975e | 1040 | |
63dfe6ff DN |
1041 | second_loop: |
1042 | do { | |
1043 | } while ... | |
a023975e | 1044 | |
63dfe6ff DN |
1045 | orig_exit_bb: |
1046 | */ | |
a023975e | 1047 | |
63dfe6ff DN |
1048 | bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); |
1049 | add_bb_to_loop (bb_before_first_loop, first_loop->outer); | |
1050 | bb_before_second_loop = split_edge (first_loop->exit_edges[0]); | |
1051 | add_bb_to_loop (bb_before_second_loop, first_loop->outer); | |
a023975e | 1052 | flow_loop_scan (first_loop, LOOP_ALL); |
63dfe6ff DN |
1053 | flow_loop_scan (second_loop, LOOP_ALL); |
1054 | ||
1055 | pre_condition = | |
e9c00ceb | 1056 | build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node); |
63dfe6ff DN |
1057 | skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, |
1058 | bb_before_second_loop, bb_before_first_loop); | |
1059 | slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */, | |
1060 | first_loop == new_loop); | |
1061 | ||
1062 | ||
1063 | /* 3. Add the guard that controls whether the second loop is executed. | |
1064 | Resulting CFG would be: | |
79fe1b3b | 1065 | |
63dfe6ff DN |
1066 | bb_before_first_loop: |
1067 | if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop) | |
1068 | GOTO first-loop | |
a023975e | 1069 | |
63dfe6ff DN |
1070 | first_loop: |
1071 | do { | |
1072 | } while ... | |
a023975e | 1073 | |
63dfe6ff DN |
1074 | bb_between_loops: |
1075 | if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop) | |
1076 | GOTO bb_before_second_loop | |
a023975e | 1077 | |
63dfe6ff | 1078 | bb_before_second_loop: |
a023975e | 1079 | |
63dfe6ff DN |
1080 | second_loop: |
1081 | do { | |
1082 | } while ... | |
a023975e | 1083 | |
63dfe6ff | 1084 | bb_after_second_loop: |
a023975e | 1085 | |
63dfe6ff DN |
1086 | orig_exit_bb: |
1087 | */ | |
1088 | ||
1089 | bb_between_loops = split_edge (first_loop->exit_edges[0]); | |
1090 | add_bb_to_loop (bb_between_loops, first_loop->outer); | |
1091 | bb_after_second_loop = split_edge (second_loop->exit_edges[0]); | |
1092 | add_bb_to_loop (bb_after_second_loop, second_loop->outer); | |
1093 | flow_loop_scan (first_loop, LOOP_ALL); | |
a023975e OG |
1094 | flow_loop_scan (second_loop, LOOP_ALL); |
1095 | ||
e9c00ceb | 1096 | pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters); |
63dfe6ff DN |
1097 | skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, |
1098 | bb_after_second_loop, bb_before_first_loop); | |
1099 | slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */, | |
1100 | second_loop == new_loop); | |
1101 | ||
1102 | /* Flow loop scan does not update loop->single_exit field. */ | |
1103 | first_loop->single_exit = first_loop->exit_edges[0]; | |
1104 | second_loop->single_exit = second_loop->exit_edges[0]; | |
a023975e | 1105 | |
63dfe6ff DN |
1106 | /* 4. Make first-loop iterate FIRST_NITERS times, if requested. |
1107 | */ | |
1108 | if (update_first_loop_count) | |
1109 | slpeel_make_loop_iterate_ntimes (first_loop, first_niters); | |
a023975e | 1110 | |
63dfe6ff | 1111 | free_new_names (definitions); |
a023975e OG |
1112 | BITMAP_XFREE (definitions); |
1113 | unmark_all_for_rewrite (); | |
63dfe6ff | 1114 | |
a023975e OG |
1115 | return new_loop; |
1116 | } | |
1117 | ||
a023975e OG |
1118 | \f |
1119 | /* Here the proper Vectorizer starts. */ | |
79fe1b3b | 1120 | |
f88a8cfa DN |
1121 | /************************************************************************* |
1122 | Vectorization Utilities. | |
1123 | *************************************************************************/ | |
1124 | ||
79fe1b3b DN |
1125 | /* Function new_stmt_vec_info. |
1126 | ||
1127 | Create and initialize a new stmt_vec_info struct for STMT. */ | |
1128 | ||
1129 | stmt_vec_info | |
1130 | new_stmt_vec_info (tree stmt, struct loop *loop) | |
1131 | { | |
1132 | stmt_vec_info res; | |
1133 | res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); | |
1134 | ||
1135 | STMT_VINFO_TYPE (res) = undef_vec_info_type; | |
1136 | STMT_VINFO_STMT (res) = stmt; | |
1137 | STMT_VINFO_LOOP (res) = loop; | |
1138 | STMT_VINFO_RELEVANT_P (res) = 0; | |
1139 | STMT_VINFO_VECTYPE (res) = NULL; | |
1140 | STMT_VINFO_VEC_STMT (res) = NULL; | |
1141 | STMT_VINFO_DATA_REF (res) = NULL; | |
1142 | STMT_VINFO_MEMTAG (res) = NULL; | |
6775f1f3 | 1143 | STMT_VINFO_VECT_DR_BASE (res) = NULL; |
1de6a873 IR |
1144 | STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE; |
1145 | STMT_VINFO_VECT_STEP (res) = NULL_TREE; | |
1146 | STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false; | |
1147 | STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE; | |
79fe1b3b DN |
1148 | |
1149 | return res; | |
1150 | } | |
1151 | ||
1152 | ||
1153 | /* Function new_loop_vec_info. | |
1154 | ||
1155 | Create and initialize a new loop_vec_info struct for LOOP, as well as | |
1156 | stmt_vec_info structs for all the stmts in LOOP. */ | |
1157 | ||
1158 | loop_vec_info | |
1159 | new_loop_vec_info (struct loop *loop) | |
1160 | { | |
1161 | loop_vec_info res; | |
1162 | basic_block *bbs; | |
1163 | block_stmt_iterator si; | |
1164 | unsigned int i; | |
1165 | ||
1166 | res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); | |
1167 | ||
1168 | bbs = get_loop_body (loop); | |
1169 | ||
1170 | /* Create stmt_info for all stmts in the loop. */ | |
1171 | for (i = 0; i < loop->num_nodes; i++) | |
1172 | { | |
1173 | basic_block bb = bbs[i]; | |
1174 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
1175 | { | |
1176 | tree stmt = bsi_stmt (si); | |
1177 | stmt_ann_t ann; | |
1178 | ||
1179 | get_stmt_operands (stmt); | |
1180 | ann = stmt_ann (stmt); | |
1181 | set_stmt_info (ann, new_stmt_vec_info (stmt, loop)); | |
1182 | } | |
1183 | } | |
1184 | ||
1185 | LOOP_VINFO_LOOP (res) = loop; | |
1186 | LOOP_VINFO_BBS (res) = bbs; | |
1187 | LOOP_VINFO_EXIT_COND (res) = NULL; | |
a023975e | 1188 | LOOP_VINFO_NITERS (res) = NULL; |
79fe1b3b | 1189 | LOOP_VINFO_VECTORIZABLE_P (res) = 0; |
a023975e | 1190 | LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false; |
79fe1b3b DN |
1191 | LOOP_VINFO_VECT_FACTOR (res) = 0; |
1192 | VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, | |
1193 | "loop_write_datarefs"); | |
1194 | VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20, | |
1195 | "loop_read_datarefs"); | |
0dc0a70b | 1196 | LOOP_VINFO_UNALIGNED_DR (res) = NULL; |
a023975e | 1197 | |
79fe1b3b DN |
1198 | return res; |
1199 | } | |
1200 | ||
1201 | ||
1202 | /* Function destroy_loop_vec_info. | |
1203 | ||
1204 | Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the | |
1205 | stmts in the loop. */ | |
1206 | ||
1207 | void | |
1208 | destroy_loop_vec_info (loop_vec_info loop_vinfo) | |
1209 | { | |
1210 | struct loop *loop; | |
1211 | basic_block *bbs; | |
1212 | int nbbs; | |
1213 | block_stmt_iterator si; | |
1214 | int j; | |
1215 | ||
1216 | if (!loop_vinfo) | |
1217 | return; | |
1218 | ||
1219 | loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1220 | ||
1221 | bbs = LOOP_VINFO_BBS (loop_vinfo); | |
1222 | nbbs = loop->num_nodes; | |
1223 | ||
1224 | for (j = 0; j < nbbs; j++) | |
1225 | { | |
1226 | basic_block bb = bbs[j]; | |
1227 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
1228 | { | |
1229 | tree stmt = bsi_stmt (si); | |
1230 | stmt_ann_t ann = stmt_ann (stmt); | |
1231 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1232 | free (stmt_info); | |
1233 | set_stmt_info (ann, NULL); | |
1234 | } | |
1235 | } | |
1236 | ||
1237 | free (LOOP_VINFO_BBS (loop_vinfo)); | |
1238 | varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); | |
1239 | varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo)); | |
1240 | ||
1241 | free (loop_vinfo); | |
1242 | } | |
1243 | ||
1244 | ||
1245 | /* Function debug_loop_stats. | |
1246 | ||
1247 | For vectorization statistics dumps. */ | |
1248 | ||
1249 | static bool | |
1250 | vect_debug_stats (struct loop *loop) | |
1251 | { | |
1252 | basic_block bb; | |
1253 | block_stmt_iterator si; | |
1254 | tree node = NULL_TREE; | |
1255 | ||
1256 | if (!dump_file || !(dump_flags & TDF_STATS)) | |
1257 | return false; | |
1258 | ||
1259 | if (!loop) | |
1260 | { | |
1261 | fprintf (dump_file, "\n"); | |
1262 | return true; | |
1263 | } | |
1264 | ||
1265 | if (!loop->header) | |
1266 | return false; | |
1267 | ||
1268 | bb = loop->header; | |
1269 | ||
1270 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
1271 | { | |
1272 | node = bsi_stmt (si); | |
1273 | if (node && EXPR_P (node) && EXPR_LOCUS (node)) | |
1274 | break; | |
1275 | } | |
1276 | ||
1277 | if (node && EXPR_P (node) && EXPR_LOCUS (node) | |
1278 | && EXPR_FILENAME (node) && EXPR_LINENO (node)) | |
1279 | { | |
1280 | fprintf (dump_file, "\nloop at %s:%d: ", | |
1281 | EXPR_FILENAME (node), EXPR_LINENO (node)); | |
1282 | return true; | |
1283 | } | |
1284 | ||
1285 | return false; | |
1286 | } | |
1287 | ||
1288 | ||
1289 | /* Function debug_loop_details. | |
1290 | ||
1291 | For vectorization debug dumps. */ | |
1292 | ||
1293 | static bool | |
1294 | vect_debug_details (struct loop *loop) | |
1295 | { | |
1296 | basic_block bb; | |
1297 | block_stmt_iterator si; | |
1298 | tree node = NULL_TREE; | |
1299 | ||
1300 | if (!dump_file || !(dump_flags & TDF_DETAILS)) | |
1301 | return false; | |
1302 | ||
1303 | if (!loop) | |
1304 | { | |
1305 | fprintf (dump_file, "\n"); | |
1306 | return true; | |
1307 | } | |
1308 | ||
1309 | if (!loop->header) | |
1310 | return false; | |
1311 | ||
1312 | bb = loop->header; | |
1313 | ||
1314 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
1315 | { | |
1316 | node = bsi_stmt (si); | |
1317 | if (node && EXPR_P (node) && EXPR_LOCUS (node)) | |
1318 | break; | |
1319 | } | |
1320 | ||
1321 | if (node && EXPR_P (node) && EXPR_LOCUS (node) | |
1322 | && EXPR_FILENAME (node) && EXPR_LINENO (node)) | |
1323 | { | |
1324 | fprintf (dump_file, "\nloop at %s:%d: ", | |
1325 | EXPR_FILENAME (node), EXPR_LINENO (node)); | |
1326 | return true; | |
1327 | } | |
1328 | ||
1329 | return false; | |
1330 | } | |
1331 | ||
6775f1f3 IR |
1332 | |
1333 | /* Function vect_get_ptr_offset | |
1334 | ||
1335 | Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */ | |
1336 | ||
1337 | static tree | |
1338 | vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED, | |
1339 | tree vectype ATTRIBUTE_UNUSED, | |
1340 | tree *offset ATTRIBUTE_UNUSED) | |
1341 | { | |
1342 | /* TODO: Use alignment information. */ | |
1343 | return NULL_TREE; | |
1344 | } | |
1345 | ||
1346 | ||
1de6a873 IR |
1347 | /* Function vect_analyze_offset_expr |
1348 | ||
1349 | Given an offset expression EXPR received from get_inner_reference, analyze | |
1350 | it and create an expression for INITIAL_OFFSET by substituting the variables | |
1351 | of EXPR with initial_condition of the corresponding access_fn in the loop. | |
1352 | E.g., | |
1353 | for i | |
1354 | for (j = 3; j < N; j++) | |
1355 | a[j].b[i][j] = 0; | |
1356 | ||
1357 | For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be | |
1358 | subsituted, since its access_fn in the inner loop is i. 'j' will be | |
1359 | substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where | |
1360 | C` = 3 * C_j + C. | |
1361 | ||
1362 | Compute MISALIGN (the misalignment of the data reference initial access from | |
1363 | its base) if possible. Misalignment can be calculated only if all the | |
1364 | variables can be substitued with constants, or if a variable is multiplied | |
1365 | by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot | |
1366 | be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple | |
1367 | of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo | |
1368 | VECTYPE_ALIGNMENT computation in the caller of this function). | |
1369 | ||
1370 | STEP is an evolution of the data reference in this loop in bytes. | |
1371 | In the above example, STEP is C_j. | |
1372 | ||
1373 | Return FALSE, if the analysis fails, e.g., there is no access_fn for a | |
1374 | variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP) | |
1375 | are NULL_TREEs. Otherwise, return TRUE. | |
1376 | ||
1377 | */ | |
1378 | ||
1379 | static bool | |
1380 | vect_analyze_offset_expr (tree expr, | |
1381 | struct loop *loop, | |
1382 | tree vectype_alignment, | |
1383 | tree *initial_offset, | |
1384 | tree *misalign, | |
1385 | tree *step) | |
1386 | { | |
1387 | tree oprnd0; | |
1388 | tree oprnd1; | |
1389 | tree left_offset = size_zero_node; | |
1390 | tree right_offset = size_zero_node; | |
1391 | tree left_misalign = size_zero_node; | |
1392 | tree right_misalign = size_zero_node; | |
1393 | tree left_step = size_zero_node; | |
1394 | tree right_step = size_zero_node; | |
1395 | enum tree_code code; | |
1396 | tree init, evolution, def_stmt; | |
1397 | ||
1398 | STRIP_NOPS (expr); | |
1399 | ||
1400 | *step = NULL_TREE; | |
1401 | *misalign = NULL_TREE; | |
1402 | *initial_offset = NULL_TREE; | |
1403 | ||
1404 | /* Stop conditions: | |
1405 | 1. Constant. */ | |
1406 | if (TREE_CONSTANT (expr)) | |
1407 | { | |
1408 | *initial_offset = fold_convert (sizetype, expr); | |
1409 | *misalign = fold_convert (sizetype, expr); | |
1410 | *step = size_zero_node; | |
1411 | return true; | |
1412 | } | |
1413 | ||
1414 | /* 2. Variable. Try to substitute with initial_condition of the corresponding | |
1415 | access_fn in the current loop. */ | |
1416 | if (SSA_VAR_P (expr)) | |
1417 | { | |
1418 | tree access_fn = analyze_scalar_evolution (loop, expr); | |
1419 | ||
1420 | if (access_fn == chrec_dont_know) | |
1421 | /* No access_fn. */ | |
1422 | return false; | |
1423 | ||
1424 | init = initial_condition_in_loop_num (access_fn, loop->num); | |
1425 | if (init == expr) | |
1426 | { | |
1427 | def_stmt = SSA_NAME_DEF_STMT (init); | |
1428 | if (def_stmt | |
1429 | && !IS_EMPTY_STMT (def_stmt) | |
1430 | && flow_bb_inside_loop_p (loop, bb_for_stmt (def_stmt))) | |
1431 | /* Not enough information: may be not loop invariant. | |
1432 | E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its | |
1433 | initial_condition is D, but it depends on i - loop's induction | |
1434 | variable. */ | |
1435 | return false; | |
1436 | } | |
1437 | ||
1438 | evolution = evolution_part_in_loop_num (access_fn, loop->num); | |
1439 | if (evolution && TREE_CODE (evolution) != INTEGER_CST) | |
1440 | /* Evolution is not constant. */ | |
1441 | return false; | |
1442 | ||
1443 | if (TREE_CONSTANT (init)) | |
1444 | *misalign = fold_convert (sizetype, init); | |
1445 | else | |
1446 | /* Not constant, misalignment cannot be calculated. */ | |
1447 | *misalign = NULL_TREE; | |
1448 | ||
1449 | *initial_offset = fold_convert (sizetype, init); | |
1450 | ||
1451 | *step = evolution ? fold_convert (sizetype, evolution) : size_zero_node; | |
1452 | return true; | |
1453 | } | |
1454 | ||
1455 | /* Recursive computation. */ | |
1456 | oprnd0 = TREE_OPERAND (expr, 0); | |
1457 | oprnd1 = TREE_OPERAND (expr, 1); | |
1458 | ||
1459 | if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset, | |
1460 | &left_misalign, &left_step) | |
1461 | || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment, | |
1462 | &right_offset, &right_misalign, &right_step)) | |
1463 | return false; | |
1464 | ||
1465 | /* The type of the operation: plus, minus or mult. */ | |
1466 | code = TREE_CODE (expr); | |
1467 | switch (code) | |
1468 | { | |
1469 | case MULT_EXPR: | |
1470 | if (!TREE_CONSTANT (right_offset)) | |
1471 | /* RIGHT_OFFSET can be not constant. For example, for arrays of variable | |
1472 | sized types. | |
1473 | FORNOW: We don't support such cases. */ | |
1474 | return false; | |
1475 | ||
1476 | /* Misalignment computation. */ | |
1477 | if (SSA_VAR_P (left_offset)) | |
1478 | { | |
1479 | /* If the left side contains variable that cannot be substituted with | |
1480 | constant, we check if the right side is a multiple of ALIGNMENT. */ | |
1481 | if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset, | |
1482 | vectype_alignment))) | |
1483 | *misalign = size_zero_node; | |
1484 | else | |
1485 | /* If the remainder is not zero or the right side isn't constant, we | |
1486 | can't compute misalignment. */ | |
1487 | *misalign = NULL_TREE; | |
1488 | } | |
1489 | else | |
1490 | { | |
1491 | /* The left operand was successfully substituted with constant. */ | |
1492 | if (left_misalign) | |
1493 | /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is | |
1494 | NULL_TREE. */ | |
1495 | *misalign = size_binop (code, left_misalign, right_misalign); | |
1496 | else | |
1497 | *misalign = NULL_TREE; | |
1498 | } | |
1499 | ||
1500 | /* Step calculation. */ | |
1501 | /* Multiply the step by the right operand. */ | |
1502 | *step = size_binop (MULT_EXPR, left_step, right_offset); | |
1503 | break; | |
1504 | ||
1505 | case PLUS_EXPR: | |
1506 | case MINUS_EXPR: | |
1507 | /* Combine the recursive calculations for step and misalignment. */ | |
1508 | *step = size_binop (code, left_step, right_step); | |
1509 | ||
1510 | if (left_misalign && right_misalign) | |
1511 | *misalign = size_binop (code, left_misalign, right_misalign); | |
1512 | else | |
1513 | *misalign = NULL_TREE; | |
1514 | ||
1515 | break; | |
1516 | ||
1517 | default: | |
1518 | gcc_unreachable (); | |
1519 | } | |
1520 | ||
1521 | /* Compute offset. */ | |
1522 | *initial_offset = fold_convert (sizetype, | |
1523 | fold (build2 (code, TREE_TYPE (left_offset), | |
1524 | left_offset, | |
1525 | right_offset))); | |
1526 | return true; | |
1527 | } | |
1528 | ||
1529 | ||
6775f1f3 IR |
1530 | /* Function vect_get_base_and_bit_offset |
1531 | ||
1532 | Return the BASE of the data reference EXPR. | |
1533 | If VECTYPE is given, also compute the OFFSET from BASE in bits. | |
1534 | E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in | |
1535 | bits of 'a.b[i] + 4B' from a. | |
1536 | ||
1537 | Input: | |
1538 | EXPR - the memory reference that is being analyzed | |
1539 | DR - the data_reference struct of the _original_ memory reference | |
1540 | (Note: DR_REF (DR) is not necessarily EXPR) | |
1541 | VECTYPE - the type that defines the alignment (i.e, we compute | |
1542 | alignment relative to TYPE_ALIGN(VECTYPE)) | |
79fe1b3b | 1543 | |
6775f1f3 IR |
1544 | Output: |
1545 | BASE (returned value) - the base of the data reference EXPR. | |
1546 | E.g, if EXPR is a.b[k].c[i][j] the returned | |
1547 | base is a. | |
1548 | OFFSET - offset of EXPR from BASE in bits | |
1549 | BASE_ALIGNED_P - indicates if BASE is aligned | |
1550 | ||
1551 | If something unexpected is encountered (an unsupported form of data-ref), | |
1552 | or if VECTYPE is given but OFFSET cannot be determined: | |
1553 | then NULL_TREE is returned. */ | |
79fe1b3b DN |
1554 | |
1555 | static tree | |
6775f1f3 IR |
1556 | vect_get_base_and_bit_offset (struct data_reference *dr, |
1557 | tree expr, | |
1558 | tree vectype, | |
1559 | loop_vec_info loop_vinfo, | |
1560 | tree *offset, | |
1561 | bool *base_aligned_p) | |
79fe1b3b | 1562 | { |
6775f1f3 IR |
1563 | tree this_offset = size_zero_node; |
1564 | tree base = NULL_TREE; | |
1565 | tree next_ref; | |
1566 | tree oprnd0, oprnd1; | |
1567 | struct data_reference *array_dr; | |
1568 | enum tree_code code = TREE_CODE (expr); | |
1569 | ||
1570 | *base_aligned_p = false; | |
79fe1b3b | 1571 | |
6775f1f3 | 1572 | switch (code) |
79fe1b3b | 1573 | { |
6775f1f3 IR |
1574 | /* These cases end the recursion: */ |
1575 | case VAR_DECL: | |
1576 | *offset = size_zero_node; | |
1577 | if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype)) | |
1578 | *base_aligned_p = true; | |
1579 | return expr; | |
1580 | ||
1581 | case SSA_NAME: | |
1582 | if (!vectype) | |
1583 | return expr; | |
1584 | ||
1585 | if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE) | |
1586 | return NULL_TREE; | |
1587 | ||
1588 | if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype)) | |
1589 | { | |
1590 | base = vect_get_ptr_offset (expr, vectype, offset); | |
1591 | if (base) | |
1592 | *base_aligned_p = true; | |
1593 | } | |
1594 | else | |
1595 | { | |
1596 | *base_aligned_p = true; | |
1597 | *offset = size_zero_node; | |
1598 | base = expr; | |
1599 | } | |
1600 | return base; | |
1601 | ||
1602 | case INTEGER_CST: | |
1603 | *offset = int_const_binop (MULT_EXPR, expr, | |
1604 | build_int_cst (NULL_TREE, BITS_PER_UNIT), 1); | |
1605 | return expr; | |
1606 | ||
1607 | /* These cases continue the recursion: */ | |
1608 | case COMPONENT_REF: | |
1609 | oprnd0 = TREE_OPERAND (expr, 0); | |
1610 | oprnd1 = TREE_OPERAND (expr, 1); | |
79fe1b3b DN |
1611 | |
1612 | this_offset = bit_position (oprnd1); | |
6775f1f3 IR |
1613 | if (vectype && !host_integerp (this_offset, 1)) |
1614 | return NULL_TREE; | |
1615 | next_ref = oprnd0; | |
1616 | break; | |
1617 | ||
1618 | case ADDR_EXPR: | |
1619 | oprnd0 = TREE_OPERAND (expr, 0); | |
1620 | next_ref = oprnd0; | |
1621 | break; | |
1622 | ||
1623 | case INDIRECT_REF: | |
1624 | oprnd0 = TREE_OPERAND (expr, 0); | |
1625 | next_ref = oprnd0; | |
1626 | break; | |
1627 | ||
1628 | case ARRAY_REF: | |
1629 | if (DR_REF (dr) != expr) | |
1630 | /* Build array data_reference struct if the existing DR_REF | |
1631 | doesn't match EXPR. This happens, for example, when the | |
1632 | EXPR is *T and T is initialized to &arr[indx]. The DR struct | |
1633 | contains information on the access of T, not of arr. In order | |
1634 | to continue the analysis, we create a new DR struct that | |
1635 | describes the access of arr. | |
1636 | */ | |
1637 | array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr)); | |
1638 | else | |
1639 | array_dr = dr; | |
1640 | ||
1641 | next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo, | |
1642 | vectype, &this_offset); | |
1643 | if (!next_ref) | |
79fe1b3b | 1644 | return NULL_TREE; |
79fe1b3b | 1645 | |
6775f1f3 IR |
1646 | if (vectype && |
1647 | TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype)) | |
79fe1b3b | 1648 | { |
6775f1f3 IR |
1649 | *offset = this_offset; |
1650 | *base_aligned_p = true; | |
1651 | return next_ref; | |
1652 | } | |
1653 | break; | |
79fe1b3b | 1654 | |
6775f1f3 IR |
1655 | case PLUS_EXPR: |
1656 | case MINUS_EXPR: | |
1657 | /* In case we have a PLUS_EXPR of the form | |
1658 | (oprnd0 + oprnd1), we assume that only oprnd0 determines the base. | |
1659 | This is verified in vect_get_symbl_and_dr. */ | |
1660 | oprnd0 = TREE_OPERAND (expr, 0); | |
1661 | oprnd1 = TREE_OPERAND (expr, 1); | |
1662 | ||
1663 | base = vect_get_base_and_bit_offset | |
1664 | (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p); | |
1665 | if (vectype && !base) | |
1666 | return NULL_TREE; | |
79fe1b3b | 1667 | |
6775f1f3 IR |
1668 | next_ref = oprnd0; |
1669 | break; | |
79fe1b3b | 1670 | |
6775f1f3 IR |
1671 | default: |
1672 | return NULL_TREE; | |
79fe1b3b DN |
1673 | } |
1674 | ||
6775f1f3 IR |
1675 | base = vect_get_base_and_bit_offset (dr, next_ref, vectype, |
1676 | loop_vinfo, offset, base_aligned_p); | |
1677 | ||
1678 | if (vectype && base) | |
1679 | { | |
1680 | *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1); | |
1681 | if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset)) | |
1682 | return NULL_TREE; | |
1683 | ||
1684 | if (vect_debug_details (NULL)) | |
1685 | { | |
1686 | print_generic_expr (dump_file, expr, TDF_SLIM); | |
1687 | fprintf (dump_file, " --> total offset for ref: "); | |
1688 | print_generic_expr (dump_file, *offset, TDF_SLIM); | |
1689 | } | |
1690 | } | |
1691 | return base; | |
79fe1b3b DN |
1692 | } |
1693 | ||
1694 | ||
1695 | /* Function vect_force_dr_alignment_p. | |
1696 | ||
1697 | Returns whether the alignment of a DECL can be forced to be aligned | |
1698 | on ALIGNMENT bit boundary. */ | |
1699 | ||
1700 | static bool | |
1701 | vect_can_force_dr_alignment_p (tree decl, unsigned int alignment) | |
1702 | { | |
1703 | if (TREE_CODE (decl) != VAR_DECL) | |
1704 | return false; | |
1705 | ||
1706 | if (DECL_EXTERNAL (decl)) | |
1707 | return false; | |
1708 | ||
d75bf0ca DN |
1709 | if (TREE_ASM_WRITTEN (decl)) |
1710 | return false; | |
1711 | ||
79fe1b3b DN |
1712 | if (TREE_STATIC (decl)) |
1713 | return (alignment <= MAX_OFILE_ALIGNMENT); | |
1714 | else | |
7a8554ce DN |
1715 | /* This is not 100% correct. The absolute correct stack alignment |
1716 | is STACK_BOUNDARY. We're supposed to hope, but not assume, that | |
1717 | PREFERRED_STACK_BOUNDARY is honored by all translation units. | |
1718 | However, until someone implements forced stack alignment, SSE | |
1719 | isn't really usable without this. */ | |
1720 | return (alignment <= PREFERRED_STACK_BOUNDARY); | |
79fe1b3b DN |
1721 | } |
1722 | ||
1723 | ||
1724 | /* Function vect_get_new_vect_var. | |
1725 | ||
1726 | Returns a name for a new variable. The current naming scheme appends the | |
1727 | prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to | |
1728 | the name of vectorizer generated variables, and appends that to NAME if | |
1729 | provided. */ | |
1730 | ||
1731 | static tree | |
1732 | vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) | |
1733 | { | |
1734 | const char *prefix; | |
1735 | int prefix_len; | |
1736 | tree new_vect_var; | |
1737 | ||
1738 | if (var_kind == vect_simple_var) | |
1739 | prefix = "vect_"; | |
1740 | else | |
1741 | prefix = "vect_p"; | |
1742 | ||
1743 | prefix_len = strlen (prefix); | |
1744 | ||
1745 | if (name) | |
1746 | new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); | |
1747 | else | |
1748 | new_vect_var = create_tmp_var (type, prefix); | |
1749 | ||
1750 | return new_vect_var; | |
1751 | } | |
1752 | ||
1753 | ||
6775f1f3 | 1754 | /* Function vect_create_index_for_vector_ref. |
79fe1b3b DN |
1755 | |
1756 | Create (and return) an index variable, along with it's update chain in the | |
1757 | loop. This variable will be used to access a memory location in a vector | |
1758 | operation. | |
1759 | ||
1760 | Input: | |
6775f1f3 | 1761 | LOOP: The loop being vectorized. |
79fe1b3b DN |
1762 | BSI: The block_stmt_iterator where STMT is. Any new stmts created by this |
1763 | function can be added here, or in the loop pre-header. | |
1764 | ||
6775f1f3 IR |
1765 | Output: |
1766 | Return an index that will be used to index a vector array. It is expected | |
1767 | that a pointer to the first vector will be used as the base address for the | |
1768 | indexed reference. | |
1769 | ||
1770 | FORNOW: we are not trying to be efficient, just creating a new index each | |
1771 | time from scratch. At this time all vector references could use the same | |
1772 | index. | |
1773 | ||
1774 | TODO: create only one index to be used by all vector references. Record | |
1775 | the index in the LOOP_VINFO the first time this procedure is called and | |
1776 | return it on subsequent calls. The increment of this index must be placed | |
1777 | just before the conditional expression that ends the single block loop. */ | |
79fe1b3b DN |
1778 | |
1779 | static tree | |
6775f1f3 | 1780 | vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi) |
79fe1b3b | 1781 | { |
79fe1b3b | 1782 | tree init, step; |
79fe1b3b | 1783 | tree indx_before_incr, indx_after_incr; |
79fe1b3b | 1784 | |
6775f1f3 IR |
1785 | /* It is assumed that the base pointer used for vectorized access contains |
1786 | the address of the first vector. Therefore the index used for vectorized | |
1787 | access must be initialized to zero and incremented by 1. */ | |
79fe1b3b | 1788 | |
6775f1f3 IR |
1789 | init = integer_zero_node; |
1790 | step = integer_one_node; | |
1791 | ||
1792 | /* Assuming that bsi_insert is used with BSI_NEW_STMT */ | |
1793 | create_iv (init, step, NULL_TREE, loop, bsi, false, | |
1794 | &indx_before_incr, &indx_after_incr); | |
79fe1b3b | 1795 | |
6775f1f3 IR |
1796 | return indx_before_incr; |
1797 | } | |
79fe1b3b | 1798 | |
79fe1b3b | 1799 | |
6775f1f3 | 1800 | /* Function vect_create_addr_base_for_vector_ref. |
79fe1b3b | 1801 | |
6775f1f3 IR |
1802 | Create an expression that computes the address of the first memory location |
1803 | that will be accessed for a data reference. | |
79fe1b3b | 1804 | |
6775f1f3 IR |
1805 | Input: |
1806 | STMT: The statement containing the data reference. | |
7ccf35ed DN |
1807 | NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. |
1808 | OFFSET: Optional. If supplied, it is be added to the initial address. | |
79fe1b3b | 1809 | |
6775f1f3 | 1810 | Output: |
a023975e OG |
1811 | 1. Return an SSA_NAME whose value is the address of the memory location of |
1812 | the first vector of the data reference. | |
6775f1f3 IR |
1813 | 2. If new_stmt_list is not NULL_TREE after return then the caller must insert |
1814 | these statement(s) which define the returned SSA_NAME. | |
79fe1b3b | 1815 | |
6775f1f3 | 1816 | FORNOW: We are only handling array accesses with step 1. */ |
79fe1b3b | 1817 | |
6775f1f3 IR |
1818 | static tree |
1819 | vect_create_addr_base_for_vector_ref (tree stmt, | |
7ccf35ed DN |
1820 | tree *new_stmt_list, |
1821 | tree offset) | |
6775f1f3 IR |
1822 | { |
1823 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1824 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
1825 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); | |
1826 | tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info)); | |
1827 | tree base_name = unshare_expr (DR_BASE_NAME (dr)); | |
1828 | tree ref = DR_REF (dr); | |
1829 | tree data_ref_base_type = TREE_TYPE (data_ref_base); | |
1830 | tree scalar_type = TREE_TYPE (ref); | |
1831 | tree scalar_ptr_type = build_pointer_type (scalar_type); | |
1832 | tree access_fn; | |
1833 | tree init_val, step, init_oval; | |
1834 | bool ok; | |
1835 | bool is_ptr_ref, is_array_ref, is_addr_expr; | |
1836 | tree array_base; | |
1837 | tree vec_stmt; | |
1838 | tree new_temp; | |
1839 | tree array_ref; | |
1840 | tree addr_base, addr_expr; | |
1841 | tree dest, new_stmt; | |
79fe1b3b | 1842 | |
6775f1f3 | 1843 | /* Only the access function of the last index is relevant (i_n in |
471854f8 | 1844 | a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */ |
6775f1f3 | 1845 | access_fn = DR_ACCESS_FN (dr, 0); |
a023975e OG |
1846 | ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, |
1847 | true); | |
6775f1f3 IR |
1848 | if (!ok) |
1849 | init_oval = integer_zero_node; | |
1850 | ||
1851 | is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE | |
1852 | && TREE_CODE (data_ref_base) == SSA_NAME; | |
322ae40b | 1853 | is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE; |
6775f1f3 IR |
1854 | is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR |
1855 | || TREE_CODE (data_ref_base) == PLUS_EXPR | |
1856 | || TREE_CODE (data_ref_base) == MINUS_EXPR; | |
1857 | gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr); | |
1858 | ||
1859 | /** Create: &(base[init_val]) | |
1860 | ||
1861 | if data_ref_base is an ARRAY_TYPE: | |
1862 | base = data_ref_base | |
1863 | ||
1864 | if data_ref_base is the SSA_NAME of a POINTER_TYPE: | |
1865 | base = *((scalar_array *) data_ref_base) | |
1866 | **/ | |
1867 | ||
1868 | if (is_array_ref) | |
1869 | array_base = data_ref_base; | |
1870 | else /* is_ptr_ref or is_addr_expr */ | |
1871 | { | |
1872 | /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */ | |
1873 | tree scalar_array_type = build_array_type (scalar_type, 0); | |
1874 | tree scalar_array_ptr_type = build_pointer_type (scalar_array_type); | |
1875 | tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr"); | |
1876 | add_referenced_tmp_var (array_ptr); | |
1877 | ||
1878 | dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref"); | |
1879 | add_referenced_tmp_var (dest); | |
7ccf35ed DN |
1880 | data_ref_base = |
1881 | force_gimple_operand (data_ref_base, &new_stmt, false, dest); | |
6775f1f3 IR |
1882 | append_to_statement_list_force (new_stmt, new_stmt_list); |
1883 | ||
1884 | vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base); | |
1885 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt); | |
1886 | new_temp = make_ssa_name (array_ptr, vec_stmt); | |
1887 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
1888 | append_to_statement_list_force (vec_stmt, new_stmt_list); | |
1889 | ||
1890 | /* (*array_ptr) */ | |
1891 | array_base = build_fold_indirect_ref (new_temp); | |
1892 | } | |
1893 | ||
1894 | dest = create_tmp_var (TREE_TYPE (init_oval), "newinit"); | |
1895 | add_referenced_tmp_var (dest); | |
1896 | init_val = force_gimple_operand (init_oval, &new_stmt, false, dest); | |
1897 | append_to_statement_list_force (new_stmt, new_stmt_list); | |
1898 | ||
7ccf35ed DN |
1899 | if (offset) |
1900 | { | |
1901 | tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset"); | |
1902 | add_referenced_tmp_var (tmp); | |
1903 | vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset); | |
1904 | vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt); | |
1905 | init_val = make_ssa_name (tmp, vec_stmt); | |
1906 | TREE_OPERAND (vec_stmt, 0) = init_val; | |
1907 | append_to_statement_list_force (vec_stmt, new_stmt_list); | |
1908 | } | |
1909 | ||
6775f1f3 IR |
1910 | array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val, |
1911 | NULL_TREE, NULL_TREE); | |
1912 | addr_base = build_fold_addr_expr (array_ref); | |
1913 | ||
1914 | /* addr_expr = addr_base */ | |
1915 | addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var, | |
1916 | get_name (base_name)); | |
1917 | add_referenced_tmp_var (addr_expr); | |
1918 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base); | |
1919 | new_temp = make_ssa_name (addr_expr, vec_stmt); | |
1920 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
1921 | append_to_statement_list_force (vec_stmt, new_stmt_list); | |
7ccf35ed | 1922 | |
6775f1f3 | 1923 | return new_temp; |
79fe1b3b DN |
1924 | } |
1925 | ||
1926 | ||
1927 | /* Function get_vectype_for_scalar_type. | |
1928 | ||
1929 | Returns the vector type corresponding to SCALAR_TYPE as supported | |
1930 | by the target. */ | |
1931 | ||
1932 | static tree | |
1933 | get_vectype_for_scalar_type (tree scalar_type) | |
1934 | { | |
1935 | enum machine_mode inner_mode = TYPE_MODE (scalar_type); | |
1936 | int nbytes = GET_MODE_SIZE (inner_mode); | |
1937 | int nunits; | |
6775f1f3 | 1938 | tree vectype; |
79fe1b3b DN |
1939 | |
1940 | if (nbytes == 0) | |
1941 | return NULL_TREE; | |
1942 | ||
1943 | /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD) | |
1944 | is expected. */ | |
1945 | nunits = UNITS_PER_SIMD_WORD / nbytes; | |
1946 | ||
6775f1f3 | 1947 | vectype = build_vector_type (scalar_type, nunits); |
f0923257 DN |
1948 | if (vect_debug_details (NULL)) |
1949 | { | |
1950 | fprintf (dump_file, "get vectype with %d units of type ", nunits); | |
1951 | print_generic_expr (dump_file, scalar_type, TDF_SLIM); | |
1952 | } | |
1953 | ||
1954 | if (!vectype) | |
6775f1f3 | 1955 | return NULL_TREE; |
f0923257 DN |
1956 | |
1957 | if (vect_debug_details (NULL)) | |
1958 | { | |
1959 | fprintf (dump_file, "vectype: "); | |
1960 | print_generic_expr (dump_file, vectype, TDF_SLIM); | |
1961 | } | |
1962 | ||
1963 | if (!VECTOR_MODE_P (TYPE_MODE (vectype))) | |
1964 | { | |
1965 | /* TODO: tree-complex.c sometimes can parallelize operations | |
1966 | on generic vectors. We can vectorize the loop in that case, | |
1967 | but then we should re-run the lowering pass. */ | |
1968 | if (vect_debug_details (NULL)) | |
1969 | fprintf (dump_file, "mode not supported by target."); | |
1970 | return NULL_TREE; | |
1971 | } | |
1972 | ||
6775f1f3 | 1973 | return vectype; |
79fe1b3b DN |
1974 | } |
1975 | ||
1976 | ||
1977 | /* Function vect_align_data_ref. | |
1978 | ||
1979 | Handle mislignment of a memory accesses. | |
1980 | ||
1981 | FORNOW: Can't handle misaligned accesses. | |
1982 | Make sure that the dataref is aligned. */ | |
1983 | ||
1984 | static void | |
1985 | vect_align_data_ref (tree stmt) | |
1986 | { | |
1987 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1988 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); | |
1989 | ||
1990 | /* FORNOW: can't handle misaligned accesses; | |
1991 | all accesses expected to be aligned. */ | |
1e128c5f | 1992 | gcc_assert (aligned_access_p (dr)); |
79fe1b3b DN |
1993 | } |
1994 | ||
1995 | ||
7ccf35ed | 1996 | /* Function vect_create_data_ref_ptr. |
79fe1b3b DN |
1997 | |
1998 | Create a memory reference expression for vector access, to be used in a | |
7ccf35ed DN |
1999 | vector load/store stmt. The reference is based on a new pointer to vector |
2000 | type (vp). | |
79fe1b3b DN |
2001 | |
2002 | Input: | |
7ccf35ed DN |
2003 | 1. STMT: a stmt that references memory. Expected to be of the form |
2004 | MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>. | |
2005 | 2. BSI: block_stmt_iterator where new stmts can be added. | |
2006 | 3. OFFSET (optional): an offset to be added to the initial address accessed | |
2007 | by the data-ref in STMT. | |
2008 | 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain | |
2009 | pointing to the initial address. | |
79fe1b3b DN |
2010 | |
2011 | Output: | |
7ccf35ed DN |
2012 | 1. Declare a new ptr to vector_type, and have it point to the base of the |
2013 | data reference (initial addressed accessed by the data reference). | |
2014 | For example, for vector of type V8HI, the following code is generated: | |
2015 | ||
2016 | v8hi *vp; | |
2017 | vp = (v8hi *)initial_address; | |
2018 | ||
2019 | if OFFSET is not supplied: | |
2020 | initial_address = &a[init]; | |
2021 | if OFFSET is supplied: | |
2022 | initial_address = &a[init + OFFSET]; | |
2023 | ||
2024 | Return the initial_address in INITIAL_ADDRESS. | |
2025 | ||
2026 | 2. Create a data-reference in the loop based on the new vector pointer vp, | |
2027 | and using a new index variable 'idx' as follows: | |
2028 | ||
2029 | vp' = vp + update | |
2030 | ||
2031 | where if ONLY_INIT is true: | |
2032 | update = zero | |
2033 | and otherwise | |
2034 | update = idx + vector_type_size | |
2035 | ||
2036 | Return the pointer vp'. | |
2037 | ||
79fe1b3b DN |
2038 | |
2039 | FORNOW: handle only aligned and consecutive accesses. */ | |
2040 | ||
2041 | static tree | |
7ccf35ed DN |
2042 | vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset, |
2043 | tree *initial_address, bool only_init) | |
79fe1b3b | 2044 | { |
7ccf35ed | 2045 | tree base_name; |
79fe1b3b | 2046 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
6775f1f3 IR |
2047 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); |
2048 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
79fe1b3b DN |
2049 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
2050 | tree vect_ptr_type; | |
2051 | tree vect_ptr; | |
79fe1b3b | 2052 | tree tag; |
6775f1f3 IR |
2053 | v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); |
2054 | v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
2055 | vuse_optype vuses = STMT_VUSE_OPS (stmt); | |
2056 | int nvuses, nv_may_defs, nv_must_defs; | |
2057 | int i; | |
2058 | tree new_temp; | |
2059 | tree vec_stmt; | |
2060 | tree new_stmt_list = NULL_TREE; | |
2061 | tree idx; | |
7ccf35ed | 2062 | edge pe = loop_preheader_edge (loop); |
6775f1f3 | 2063 | basic_block new_bb; |
7ccf35ed DN |
2064 | tree vect_ptr_init; |
2065 | tree vectype_size; | |
2066 | tree ptr_update; | |
2067 | tree data_ref_ptr; | |
e088c552 | 2068 | tree type, tmp, size; |
79fe1b3b | 2069 | |
6775f1f3 | 2070 | base_name = unshare_expr (DR_BASE_NAME (dr)); |
79fe1b3b DN |
2071 | if (vect_debug_details (NULL)) |
2072 | { | |
7ccf35ed | 2073 | tree data_ref_base = base_name; |
79fe1b3b DN |
2074 | fprintf (dump_file, "create array_ref of type: "); |
2075 | print_generic_expr (dump_file, vectype, TDF_SLIM); | |
6775f1f3 | 2076 | if (TREE_CODE (data_ref_base) == VAR_DECL) |
1de6a873 | 2077 | fprintf (dump_file, "\nvectorizing a one dimensional array ref: "); |
6775f1f3 | 2078 | else if (TREE_CODE (data_ref_base) == ARRAY_REF) |
1de6a873 | 2079 | fprintf (dump_file, "\nvectorizing a multidimensional array ref: "); |
6775f1f3 | 2080 | else if (TREE_CODE (data_ref_base) == COMPONENT_REF) |
1de6a873 | 2081 | fprintf (dump_file, "\nvectorizing a record based array ref: "); |
6775f1f3 | 2082 | else if (TREE_CODE (data_ref_base) == SSA_NAME) |
1de6a873 | 2083 | fprintf (dump_file, "\nvectorizing a pointer ref: "); |
6775f1f3 | 2084 | print_generic_expr (dump_file, base_name, TDF_SLIM); |
79fe1b3b DN |
2085 | } |
2086 | ||
7ccf35ed DN |
2087 | /** (1) Create the new vector-pointer variable: **/ |
2088 | ||
2089 | vect_ptr_type = build_pointer_type (vectype); | |
2090 | vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, | |
2091 | get_name (base_name)); | |
2092 | add_referenced_tmp_var (vect_ptr); | |
2093 | ||
2094 | ||
2095 | /** (2) Handle aliasing information of the new vector-pointer: **/ | |
2096 | ||
79fe1b3b | 2097 | tag = STMT_VINFO_MEMTAG (stmt_info); |
1e128c5f | 2098 | gcc_assert (tag); |
79fe1b3b | 2099 | get_var_ann (vect_ptr)->type_mem_tag = tag; |
7ccf35ed | 2100 | |
79fe1b3b | 2101 | /* Mark for renaming all aliased variables |
6775f1f3 IR |
2102 | (i.e, the may-aliases of the type-mem-tag). */ |
2103 | nvuses = NUM_VUSES (vuses); | |
2104 | nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); | |
2105 | nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); | |
2106 | for (i = 0; i < nvuses; i++) | |
79fe1b3b | 2107 | { |
6775f1f3 | 2108 | tree use = VUSE_OP (vuses, i); |
79fe1b3b DN |
2109 | if (TREE_CODE (use) == SSA_NAME) |
2110 | bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid); | |
2111 | } | |
6775f1f3 IR |
2112 | for (i = 0; i < nv_may_defs; i++) |
2113 | { | |
2114 | tree def = V_MAY_DEF_RESULT (v_may_defs, i); | |
2115 | if (TREE_CODE (def) == SSA_NAME) | |
2116 | bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); | |
2117 | } | |
2118 | for (i = 0; i < nv_must_defs; i++) | |
2119 | { | |
52328bf6 | 2120 | tree def = V_MUST_DEF_RESULT (v_must_defs, i); |
6775f1f3 IR |
2121 | if (TREE_CODE (def) == SSA_NAME) |
2122 | bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); | |
2123 | } | |
79fe1b3b | 2124 | |
79fe1b3b | 2125 | |
7ccf35ed DN |
2126 | /** (3) Calculate the initial address the vector-pointer, and set |
2127 | the vector-pointer to point to it before the loop: **/ | |
79fe1b3b | 2128 | |
7ccf35ed DN |
2129 | /* Create: (&(base[init_val+offset]) in the loop preheader. */ |
2130 | new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, | |
2131 | offset); | |
2132 | pe = loop_preheader_edge (loop); | |
2133 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); | |
2134 | gcc_assert (!new_bb); | |
2135 | *initial_address = new_temp; | |
2136 | ||
2137 | /* Create: p = (vectype *) initial_base */ | |
6775f1f3 | 2138 | vec_stmt = fold_convert (vect_ptr_type, new_temp); |
79fe1b3b DN |
2139 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); |
2140 | new_temp = make_ssa_name (vect_ptr, vec_stmt); | |
2141 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
7ccf35ed DN |
2142 | new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt); |
2143 | gcc_assert (!new_bb); | |
2144 | vect_ptr_init = TREE_OPERAND (vec_stmt, 0); | |
2145 | ||
2146 | ||
2147 | /** (4) Handle the updating of the vector-pointer inside the loop: **/ | |
2148 | ||
2149 | if (only_init) /* No update in loop is required. */ | |
2150 | return vect_ptr_init; | |
79fe1b3b | 2151 | |
6775f1f3 | 2152 | idx = vect_create_index_for_vector_ref (loop, bsi); |
79fe1b3b | 2153 | |
7ccf35ed | 2154 | /* Create: update = idx * vectype_size */ |
e088c552 DN |
2155 | tmp = create_tmp_var (integer_type_node, "update"); |
2156 | add_referenced_tmp_var (tmp); | |
2157 | size = TYPE_SIZE (vect_ptr_type); | |
2158 | type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); | |
2159 | ptr_update = create_tmp_var (type, "update"); | |
7ccf35ed DN |
2160 | add_referenced_tmp_var (ptr_update); |
2161 | vectype_size = build_int_cst (integer_type_node, | |
2162 | GET_MODE_SIZE (TYPE_MODE (vectype))); | |
2163 | vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size); | |
e088c552 DN |
2164 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt); |
2165 | new_temp = make_ssa_name (tmp, vec_stmt); | |
2166 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
2167 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
2168 | vec_stmt = fold_convert (type, new_temp); | |
7ccf35ed DN |
2169 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt); |
2170 | new_temp = make_ssa_name (ptr_update, vec_stmt); | |
2171 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
2172 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
79fe1b3b | 2173 | |
7ccf35ed DN |
2174 | /* Create: data_ref_ptr = vect_ptr_init + update */ |
2175 | vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp); | |
2176 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); | |
2177 | new_temp = make_ssa_name (vect_ptr, vec_stmt); | |
2178 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
2179 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
2180 | data_ref_ptr = TREE_OPERAND (vec_stmt, 0); | |
2181 | ||
2182 | return data_ref_ptr; | |
79fe1b3b DN |
2183 | } |
2184 | ||
2185 | ||
2186 | /* Function vect_create_destination_var. | |
2187 | ||
2188 | Create a new temporary of type VECTYPE. */ | |
2189 | ||
2190 | static tree | |
2191 | vect_create_destination_var (tree scalar_dest, tree vectype) | |
2192 | { | |
2193 | tree vec_dest; | |
2194 | const char *new_name; | |
2195 | ||
1e128c5f | 2196 | gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); |
79fe1b3b DN |
2197 | |
2198 | new_name = get_name (scalar_dest); | |
2199 | if (!new_name) | |
2200 | new_name = "var_"; | |
2201 | vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name); | |
2202 | add_referenced_tmp_var (vec_dest); | |
2203 | ||
2204 | return vec_dest; | |
2205 | } | |
2206 | ||
2207 | ||
2208 | /* Function vect_init_vector. | |
2209 | ||
2210 | Insert a new stmt (INIT_STMT) that initializes a new vector variable with | |
2211 | the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be | |
2212 | used in the vectorization of STMT. */ | |
2213 | ||
2214 | static tree | |
2215 | vect_init_vector (tree stmt, tree vector_var) | |
2216 | { | |
2217 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
2218 | struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo); | |
2219 | tree new_var; | |
2220 | tree init_stmt; | |
2221 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
2222 | tree vec_oprnd; | |
2223 | edge pe; | |
2224 | tree new_temp; | |
6775f1f3 | 2225 | basic_block new_bb; |
79fe1b3b DN |
2226 | |
2227 | new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_"); | |
2228 | add_referenced_tmp_var (new_var); | |
2229 | ||
2230 | init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var); | |
2231 | new_temp = make_ssa_name (new_var, init_stmt); | |
2232 | TREE_OPERAND (init_stmt, 0) = new_temp; | |
2233 | ||
2234 | pe = loop_preheader_edge (loop); | |
6775f1f3 IR |
2235 | new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); |
2236 | gcc_assert (!new_bb); | |
79fe1b3b DN |
2237 | |
2238 | if (vect_debug_details (NULL)) | |
2239 | { | |
2240 | fprintf (dump_file, "created new init_stmt: "); | |
2241 | print_generic_expr (dump_file, init_stmt, TDF_SLIM); | |
2242 | } | |
2243 | ||
2244 | vec_oprnd = TREE_OPERAND (init_stmt, 0); | |
2245 | return vec_oprnd; | |
2246 | } | |
2247 | ||
2248 | ||
2249 | /* Function vect_get_vec_def_for_operand. | |
2250 | ||
2251 | OP is an operand in STMT. This function returns a (vector) def that will be | |
2252 | used in the vectorized stmt for STMT. | |
2253 | ||
2254 | In the case that OP is an SSA_NAME which is defined in the loop, then | |
2255 | STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. | |
2256 | ||
2257 | In case OP is an invariant or constant, a new stmt that creates a vector def | |
2258 | needs to be introduced. */ | |
2259 | ||
2260 | static tree | |
2261 | vect_get_vec_def_for_operand (tree op, tree stmt) | |
2262 | { | |
2263 | tree vec_oprnd; | |
2264 | tree vec_stmt; | |
2265 | tree def_stmt; | |
2266 | stmt_vec_info def_stmt_info = NULL; | |
2267 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
2268 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
2269 | int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); | |
2270 | struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo); | |
2271 | basic_block bb; | |
2272 | tree vec_inv; | |
2273 | tree t = NULL_TREE; | |
2274 | tree def; | |
2275 | int i; | |
2276 | ||
2277 | if (vect_debug_details (NULL)) | |
2278 | { | |
2279 | fprintf (dump_file, "vect_get_vec_def_for_operand: "); | |
2280 | print_generic_expr (dump_file, op, TDF_SLIM); | |
2281 | } | |
2282 | ||
2283 | /** ===> Case 1: operand is a constant. **/ | |
2284 | ||
2285 | if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST) | |
2286 | { | |
2287 | /* Create 'vect_cst_ = {cst,cst,...,cst}' */ | |
2288 | ||
2289 | tree vec_cst; | |
79fe1b3b DN |
2290 | |
2291 | /* Build a tree with vector elements. */ | |
2292 | if (vect_debug_details (NULL)) | |
2293 | fprintf (dump_file, "Create vector_cst. nunits = %d", nunits); | |
2294 | ||
2295 | for (i = nunits - 1; i >= 0; --i) | |
2296 | { | |
2297 | t = tree_cons (NULL_TREE, op, t); | |
2298 | } | |
2299 | vec_cst = build_vector (vectype, t); | |
2300 | return vect_init_vector (stmt, vec_cst); | |
2301 | } | |
2302 | ||
1e128c5f | 2303 | gcc_assert (TREE_CODE (op) == SSA_NAME); |
79fe1b3b DN |
2304 | |
2305 | /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/ | |
2306 | ||
2307 | def_stmt = SSA_NAME_DEF_STMT (op); | |
2308 | def_stmt_info = vinfo_for_stmt (def_stmt); | |
2309 | ||
2310 | if (vect_debug_details (NULL)) | |
2311 | { | |
2312 | fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: "); | |
2313 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
2314 | } | |
2315 | ||
2316 | ||
2317 | /** ==> Case 2.1: operand is defined inside the loop. **/ | |
2318 | ||
2319 | if (def_stmt_info) | |
2320 | { | |
2321 | /* Get the def from the vectorized stmt. */ | |
2322 | ||
2323 | vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); | |
1e128c5f | 2324 | gcc_assert (vec_stmt); |
79fe1b3b DN |
2325 | vec_oprnd = TREE_OPERAND (vec_stmt, 0); |
2326 | return vec_oprnd; | |
2327 | } | |
2328 | ||
2329 | ||
2330 | /** ==> Case 2.2: operand is defined by the loop-header phi-node - | |
2331 | it is a reduction/induction. **/ | |
2332 | ||
2333 | bb = bb_for_stmt (def_stmt); | |
2334 | if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) | |
2335 | { | |
2336 | if (vect_debug_details (NULL)) | |
2337 | fprintf (dump_file, "reduction/induction - unsupported."); | |
1e128c5f | 2338 | internal_error ("no support for reduction/induction"); /* FORNOW */ |
79fe1b3b DN |
2339 | } |
2340 | ||
2341 | ||
2342 | /** ==> Case 2.3: operand is defined outside the loop - | |
2343 | it is a loop invariant. */ | |
2344 | ||
2345 | switch (TREE_CODE (def_stmt)) | |
2346 | { | |
2347 | case PHI_NODE: | |
2348 | def = PHI_RESULT (def_stmt); | |
2349 | break; | |
2350 | case MODIFY_EXPR: | |
2351 | def = TREE_OPERAND (def_stmt, 0); | |
2352 | break; | |
2353 | case NOP_EXPR: | |
2354 | def = TREE_OPERAND (def_stmt, 0); | |
1e128c5f | 2355 | gcc_assert (IS_EMPTY_STMT (def_stmt)); |
79fe1b3b DN |
2356 | def = op; |
2357 | break; | |
2358 | default: | |
2359 | if (vect_debug_details (NULL)) | |
2360 | { | |
2361 | fprintf (dump_file, "unsupported defining stmt: "); | |
2362 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
2363 | } | |
1e128c5f | 2364 | internal_error ("unsupported defining stmt"); |
79fe1b3b DN |
2365 | } |
2366 | ||
2367 | /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */ | |
2368 | ||
2369 | if (vect_debug_details (NULL)) | |
2370 | fprintf (dump_file, "Create vector_inv."); | |
2371 | ||
2372 | for (i = nunits - 1; i >= 0; --i) | |
2373 | { | |
2374 | t = tree_cons (NULL_TREE, def, t); | |
2375 | } | |
2376 | ||
2377 | vec_inv = build_constructor (vectype, t); | |
2378 | return vect_init_vector (stmt, vec_inv); | |
2379 | } | |
2380 | ||
2381 | ||
2382 | /* Function vect_finish_stmt_generation. | |
2383 | ||
2384 | Insert a new stmt. */ | |
2385 | ||
2386 | static void | |
2387 | vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) | |
2388 | { | |
2389 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
2390 | ||
2391 | if (vect_debug_details (NULL)) | |
2392 | { | |
2393 | fprintf (dump_file, "add new stmt: "); | |
2394 | print_generic_expr (dump_file, vec_stmt, TDF_SLIM); | |
2395 | } | |
2396 | ||
2397 | /* Make sure bsi points to the stmt that is being vectorized. */ | |
2398 | ||
7ccf35ed | 2399 | /* Assumption: any stmts created for the vectorization of stmt S were |
63dfe6ff DN |
2400 | inserted before S. BSI is expected to point to S or some new stmt before S. |
2401 | */ | |
79fe1b3b DN |
2402 | |
2403 | while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi)) | |
2404 | bsi_next (bsi); | |
1e128c5f | 2405 | gcc_assert (stmt == bsi_stmt (*bsi)); |
79fe1b3b DN |
2406 | } |
2407 | ||
2408 | ||
2409 | /* Function vectorizable_assignment. | |
2410 | ||
2411 | Check if STMT performs an assignment (copy) that can be vectorized. | |
2412 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
2413 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
2414 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
2415 | ||
2416 | static bool | |
2417 | vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
2418 | { | |
2419 | tree vec_dest; | |
2420 | tree scalar_dest; | |
2421 | tree op; | |
2422 | tree vec_oprnd; | |
2423 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2424 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
2425 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
2426 | tree new_temp; | |
2427 | ||
2428 | /* Is vectorizable assignment? */ | |
2429 | ||
2430 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
2431 | return false; | |
2432 | ||
2433 | scalar_dest = TREE_OPERAND (stmt, 0); | |
2434 | if (TREE_CODE (scalar_dest) != SSA_NAME) | |
2435 | return false; | |
2436 | ||
2437 | op = TREE_OPERAND (stmt, 1); | |
2438 | if (!vect_is_simple_use (op, loop, NULL)) | |
2439 | { | |
2440 | if (vect_debug_details (NULL)) | |
2441 | fprintf (dump_file, "use not simple."); | |
2442 | return false; | |
2443 | } | |
2444 | ||
2445 | if (!vec_stmt) /* transformation not required. */ | |
2446 | { | |
2447 | STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; | |
2448 | return true; | |
2449 | } | |
2450 | ||
2451 | /** Trasform. **/ | |
2452 | if (vect_debug_details (NULL)) | |
2453 | fprintf (dump_file, "transform assignment."); | |
2454 | ||
2455 | /* Handle def. */ | |
2456 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2457 | ||
2458 | /* Handle use. */ | |
2459 | op = TREE_OPERAND (stmt, 1); | |
2460 | vec_oprnd = vect_get_vec_def_for_operand (op, stmt); | |
2461 | ||
2462 | /* Arguments are ready. create the new vector stmt. */ | |
2463 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); | |
2464 | new_temp = make_ssa_name (vec_dest, *vec_stmt); | |
2465 | TREE_OPERAND (*vec_stmt, 0) = new_temp; | |
2466 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
2467 | ||
2468 | return true; | |
2469 | } | |
2470 | ||
2471 | ||
2472 | /* Function vectorizable_operation. | |
2473 | ||
2474 | Check if STMT performs a binary or unary operation that can be vectorized. | |
2475 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
2476 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
2477 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
2478 | ||
2479 | static bool | |
2480 | vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
2481 | { | |
2482 | tree vec_dest; | |
2483 | tree scalar_dest; | |
2484 | tree operation; | |
2485 | tree op0, op1 = NULL; | |
2486 | tree vec_oprnd0, vec_oprnd1=NULL; | |
2487 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2488 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
2489 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
2490 | int i; | |
2491 | enum tree_code code; | |
2492 | enum machine_mode vec_mode; | |
2493 | tree new_temp; | |
2494 | int op_type; | |
2495 | tree op; | |
2496 | optab optab; | |
2497 | ||
2498 | /* Is STMT a vectorizable binary/unary operation? */ | |
2499 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
2500 | return false; | |
2501 | ||
2502 | if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) | |
2503 | return false; | |
2504 | ||
2505 | operation = TREE_OPERAND (stmt, 1); | |
2506 | code = TREE_CODE (operation); | |
2507 | optab = optab_for_tree_code (code, vectype); | |
2508 | ||
2509 | /* Support only unary or binary operations. */ | |
2510 | op_type = TREE_CODE_LENGTH (code); | |
2511 | if (op_type != unary_op && op_type != binary_op) | |
2512 | { | |
2513 | if (vect_debug_details (NULL)) | |
2514 | fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type); | |
2515 | return false; | |
2516 | } | |
2517 | ||
2518 | for (i = 0; i < op_type; i++) | |
2519 | { | |
2520 | op = TREE_OPERAND (operation, i); | |
2521 | if (!vect_is_simple_use (op, loop, NULL)) | |
2522 | { | |
2523 | if (vect_debug_details (NULL)) | |
2524 | fprintf (dump_file, "use not simple."); | |
2525 | return false; | |
2526 | } | |
2527 | } | |
2528 | ||
2529 | /* Supportable by target? */ | |
2530 | if (!optab) | |
2531 | { | |
2532 | if (vect_debug_details (NULL)) | |
2533 | fprintf (dump_file, "no optab."); | |
2534 | return false; | |
2535 | } | |
2536 | vec_mode = TYPE_MODE (vectype); | |
2537 | if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) | |
2538 | { | |
2539 | if (vect_debug_details (NULL)) | |
2540 | fprintf (dump_file, "op not supported by target."); | |
2541 | return false; | |
2542 | } | |
2543 | ||
2544 | if (!vec_stmt) /* transformation not required. */ | |
2545 | { | |
2546 | STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; | |
2547 | return true; | |
2548 | } | |
2549 | ||
02ca1718 | 2550 | /** Transform. **/ |
79fe1b3b DN |
2551 | |
2552 | if (vect_debug_details (NULL)) | |
2553 | fprintf (dump_file, "transform binary/unary operation."); | |
2554 | ||
2555 | /* Handle def. */ | |
2556 | scalar_dest = TREE_OPERAND (stmt, 0); | |
2557 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2558 | ||
2559 | /* Handle uses. */ | |
2560 | op0 = TREE_OPERAND (operation, 0); | |
2561 | vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt); | |
2562 | ||
2563 | if (op_type == binary_op) | |
2564 | { | |
2565 | op1 = TREE_OPERAND (operation, 1); | |
2566 | vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt); | |
2567 | } | |
2568 | ||
2569 | /* Arguments are ready. create the new vector stmt. */ | |
2570 | ||
2571 | if (op_type == binary_op) | |
2572 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, | |
2573 | build2 (code, vectype, vec_oprnd0, vec_oprnd1)); | |
2574 | else | |
2575 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, | |
2576 | build1 (code, vectype, vec_oprnd0)); | |
2577 | new_temp = make_ssa_name (vec_dest, *vec_stmt); | |
2578 | TREE_OPERAND (*vec_stmt, 0) = new_temp; | |
2579 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
2580 | ||
2581 | return true; | |
2582 | } | |
2583 | ||
2584 | ||
2585 | /* Function vectorizable_store. | |
2586 | ||
2587 | Check if STMT defines a non scalar data-ref (array/pointer/structure) that | |
2588 | can be vectorized. | |
2589 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
2590 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
2591 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
2592 | ||
2593 | static bool | |
2594 | vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
2595 | { | |
2596 | tree scalar_dest; | |
2597 | tree data_ref; | |
2598 | tree op; | |
2599 | tree vec_oprnd1; | |
2600 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
0dc0a70b | 2601 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); |
79fe1b3b DN |
2602 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
2603 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
2604 | enum machine_mode vec_mode; | |
7ccf35ed | 2605 | tree dummy; |
0dc0a70b | 2606 | enum dr_alignment_support alignment_support_cheme; |
79fe1b3b DN |
2607 | |
2608 | /* Is vectorizable store? */ | |
2609 | ||
2610 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
2611 | return false; | |
2612 | ||
2613 | scalar_dest = TREE_OPERAND (stmt, 0); | |
2614 | if (TREE_CODE (scalar_dest) != ARRAY_REF | |
2615 | && TREE_CODE (scalar_dest) != INDIRECT_REF) | |
2616 | return false; | |
2617 | ||
2618 | op = TREE_OPERAND (stmt, 1); | |
2619 | if (!vect_is_simple_use (op, loop, NULL)) | |
2620 | { | |
2621 | if (vect_debug_details (NULL)) | |
2622 | fprintf (dump_file, "use not simple."); | |
2623 | return false; | |
2624 | } | |
2625 | ||
2626 | vec_mode = TYPE_MODE (vectype); | |
2627 | /* FORNOW. In some cases can vectorize even if data-type not supported | |
2628 | (e.g. - array initialization with 0). */ | |
2629 | if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) | |
2630 | return false; | |
2631 | ||
2632 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
2633 | return false; | |
2634 | ||
7ccf35ed | 2635 | |
79fe1b3b DN |
2636 | if (!vec_stmt) /* transformation not required. */ |
2637 | { | |
2638 | STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; | |
2639 | return true; | |
2640 | } | |
2641 | ||
2642 | /** Trasform. **/ | |
2643 | ||
2644 | if (vect_debug_details (NULL)) | |
2645 | fprintf (dump_file, "transform store"); | |
2646 | ||
0dc0a70b DN |
2647 | alignment_support_cheme = vect_supportable_dr_alignment (dr); |
2648 | gcc_assert (alignment_support_cheme); | |
2649 | gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */ | |
2650 | ||
79fe1b3b DN |
2651 | /* Handle use - get the vectorized def from the defining stmt. */ |
2652 | vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); | |
2653 | ||
2654 | /* Handle def. */ | |
7ccf35ed DN |
2655 | /* FORNOW: make sure the data reference is aligned. */ |
2656 | vect_align_data_ref (stmt); | |
2657 | data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); | |
2658 | data_ref = build_fold_indirect_ref (data_ref); | |
79fe1b3b DN |
2659 | |
2660 | /* Arguments are ready. create the new vector stmt. */ | |
2661 | *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1); | |
2662 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
2663 | ||
2664 | return true; | |
2665 | } | |
2666 | ||
2667 | ||
2668 | /* vectorizable_load. | |
2669 | ||
2670 | Check if STMT reads a non scalar data-ref (array/pointer/structure) that | |
2671 | can be vectorized. | |
2672 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
2673 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
2674 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
2675 | ||
2676 | static bool | |
2677 | vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
2678 | { | |
2679 | tree scalar_dest; | |
2680 | tree vec_dest = NULL; | |
2681 | tree data_ref = NULL; | |
2682 | tree op; | |
2683 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
7ccf35ed | 2684 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); |
79fe1b3b DN |
2685 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
2686 | tree new_temp; | |
7ccf35ed DN |
2687 | int mode; |
2688 | tree init_addr; | |
2689 | tree new_stmt; | |
2690 | tree dummy; | |
2691 | basic_block new_bb; | |
2692 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
2693 | edge pe = loop_preheader_edge (loop); | |
0dc0a70b | 2694 | enum dr_alignment_support alignment_support_cheme; |
79fe1b3b DN |
2695 | |
2696 | /* Is vectorizable load? */ | |
2697 | ||
2698 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
2699 | return false; | |
2700 | ||
2701 | scalar_dest = TREE_OPERAND (stmt, 0); | |
2702 | if (TREE_CODE (scalar_dest) != SSA_NAME) | |
2703 | return false; | |
2704 | ||
2705 | op = TREE_OPERAND (stmt, 1); | |
2706 | if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF) | |
2707 | return false; | |
2708 | ||
2709 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
2710 | return false; | |
2711 | ||
7ccf35ed DN |
2712 | mode = (int) TYPE_MODE (vectype); |
2713 | ||
79fe1b3b | 2714 | /* FORNOW. In some cases can vectorize even if data-type not supported |
7ccf35ed DN |
2715 | (e.g. - data copies). */ |
2716 | if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing) | |
2717 | { | |
2718 | if (vect_debug_details (loop)) | |
2719 | fprintf (dump_file, "Aligned load, but unsupported type."); | |
2720 | return false; | |
2721 | } | |
2722 | ||
79fe1b3b DN |
2723 | if (!vec_stmt) /* transformation not required. */ |
2724 | { | |
2725 | STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; | |
2726 | return true; | |
2727 | } | |
2728 | ||
2729 | /** Trasform. **/ | |
2730 | ||
2731 | if (vect_debug_details (NULL)) | |
2732 | fprintf (dump_file, "transform load."); | |
2733 | ||
0dc0a70b DN |
2734 | alignment_support_cheme = vect_supportable_dr_alignment (dr); |
2735 | gcc_assert (alignment_support_cheme); | |
2736 | ||
2737 | if (alignment_support_cheme == dr_aligned | |
2738 | || alignment_support_cheme == dr_unaligned_supported) | |
7ccf35ed DN |
2739 | { |
2740 | /* Create: | |
2741 | p = initial_addr; | |
2742 | indx = 0; | |
2743 | loop { | |
2744 | vec_dest = *(p); | |
2745 | indx = indx + 1; | |
2746 | } | |
2747 | */ | |
2748 | ||
2749 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2750 | data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); | |
2751 | if (aligned_access_p (dr)) | |
2752 | data_ref = build_fold_indirect_ref (data_ref); | |
2753 | else | |
2754 | { | |
2755 | int mis = DR_MISALIGNMENT (dr); | |
2756 | tree tmis = (mis == -1 ? | |
2757 | integer_zero_node : | |
2758 | build_int_cst (integer_type_node, mis)); | |
2759 | tmis = int_const_binop (MULT_EXPR, tmis, | |
2760 | build_int_cst (integer_type_node, BITS_PER_UNIT), 1); | |
2761 | data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis); | |
2762 | } | |
2763 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); | |
2764 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
2765 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
2766 | vect_finish_stmt_generation (stmt, new_stmt, bsi); | |
2767 | } | |
0dc0a70b | 2768 | else if (alignment_support_cheme == dr_unaligned_software_pipeline) |
7ccf35ed DN |
2769 | { |
2770 | /* Create: | |
2771 | p1 = initial_addr; | |
2772 | msq_init = *(floor(p1)) | |
2773 | p2 = initial_addr + VS - 1; | |
2774 | magic = have_builtin ? builtin_result : initial_address; | |
2775 | indx = 0; | |
2776 | loop { | |
2777 | p2' = p2 + indx * vectype_size | |
2778 | lsq = *(floor(p2')) | |
2779 | vec_dest = realign_load (msq, lsq, magic) | |
2780 | indx = indx + 1; | |
2781 | msq = lsq; | |
2782 | } | |
2783 | */ | |
2784 | ||
2785 | tree offset; | |
2786 | tree magic; | |
2787 | tree phi_stmt; | |
2788 | tree msq_init; | |
2789 | tree msq, lsq; | |
2790 | tree dataref_ptr; | |
2791 | tree params; | |
2792 | ||
2793 | /* <1> Create msq_init = *(floor(p1)) in the loop preheader */ | |
2794 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2795 | data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, | |
2796 | &init_addr, true); | |
2797 | data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref); | |
2798 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); | |
2799 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
2800 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
2801 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); | |
2802 | gcc_assert (!new_bb); | |
2803 | msq_init = TREE_OPERAND (new_stmt, 0); | |
2804 | ||
2805 | ||
2806 | /* <2> Create lsq = *(floor(p2')) in the loop */ | |
2807 | offset = build_int_cst (integer_type_node, | |
2808 | GET_MODE_NUNITS (TYPE_MODE (vectype))); | |
2809 | offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1); | |
2810 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2811 | dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false); | |
2812 | data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); | |
2813 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); | |
2814 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
2815 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
2816 | vect_finish_stmt_generation (stmt, new_stmt, bsi); | |
2817 | lsq = TREE_OPERAND (new_stmt, 0); | |
2818 | ||
2819 | ||
2820 | /* <3> */ | |
2821 | if (targetm.vectorize.builtin_mask_for_load) | |
2822 | { | |
2823 | /* Create permutation mask, if required, in loop preheader. */ | |
2824 | tree builtin_decl; | |
2825 | params = build_tree_list (NULL_TREE, init_addr); | |
2826 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2827 | builtin_decl = targetm.vectorize.builtin_mask_for_load (); | |
2828 | new_stmt = build_function_call_expr (builtin_decl, params); | |
2829 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); | |
2830 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
2831 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
2832 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); | |
2833 | gcc_assert (!new_bb); | |
2834 | magic = TREE_OPERAND (new_stmt, 0); | |
7d5f9cc6 DN |
2835 | |
2836 | /* Since we have just created a CALL_EXPR, we may need to | |
2837 | rename call-clobbered variables. */ | |
2838 | mark_call_clobbered_vars_to_rename (); | |
7ccf35ed DN |
2839 | } |
2840 | else | |
2841 | { | |
a023975e OG |
2842 | /* Use current address instead of init_addr for reduced reg pressure. |
2843 | */ | |
7ccf35ed DN |
2844 | magic = dataref_ptr; |
2845 | } | |
79fe1b3b | 2846 | |
79fe1b3b | 2847 | |
7ccf35ed DN |
2848 | /* <4> Create msq = phi <msq_init, lsq> in loop */ |
2849 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2850 | msq = make_ssa_name (vec_dest, NULL_TREE); | |
2851 | phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */ | |
2852 | SSA_NAME_DEF_STMT (msq) = phi_stmt; | |
d2e398df KH |
2853 | add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop)); |
2854 | add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop)); | |
7ccf35ed | 2855 | |
79fe1b3b | 2856 | |
a023975e OG |
2857 | /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */ |
2858 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
2859 | new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic); | |
2860 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); | |
2861 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
2862 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
2863 | vect_finish_stmt_generation (stmt, new_stmt, bsi); | |
2864 | } | |
0dc0a70b DN |
2865 | else |
2866 | gcc_unreachable (); | |
a023975e OG |
2867 | |
2868 | *vec_stmt = new_stmt; | |
2869 | return true; | |
2870 | } | |
2871 | ||
2872 | ||
0dc0a70b DN |
2873 | /* Function vect_supportable_dr_alignment |
2874 | ||
2875 | Return whether the data reference DR is supported with respect to its | |
2876 | alignment. */ | |
2877 | ||
2878 | static enum dr_alignment_support | |
2879 | vect_supportable_dr_alignment (struct data_reference *dr) | |
2880 | { | |
2881 | tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); | |
2882 | enum machine_mode mode = (int) TYPE_MODE (vectype); | |
2883 | ||
2884 | if (aligned_access_p (dr)) | |
2885 | return dr_aligned; | |
2886 | ||
2887 | /* Possibly unaligned access. */ | |
2888 | ||
2889 | if (DR_IS_READ (dr)) | |
2890 | { | |
2891 | if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing | |
2892 | && (!targetm.vectorize.builtin_mask_for_load | |
2893 | || targetm.vectorize.builtin_mask_for_load ())) | |
2894 | return dr_unaligned_software_pipeline; | |
2895 | ||
1e0598e2 RH |
2896 | if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing) |
2897 | /* Can't software pipeline the loads, but can at least do them. */ | |
0dc0a70b DN |
2898 | return dr_unaligned_supported; |
2899 | } | |
2900 | ||
2901 | /* Unsupported. */ | |
2902 | return dr_unaligned_unsupported; | |
2903 | } | |
2904 | ||
2905 | ||
a023975e OG |
2906 | /* Function vect_transform_stmt. |
2907 | ||
2908 | Create a vectorized stmt to replace STMT, and insert it at BSI. */ | |
2909 | ||
2910 | static bool | |
2911 | vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) | |
2912 | { | |
2913 | bool is_store = false; | |
2914 | tree vec_stmt = NULL_TREE; | |
2915 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2916 | bool done; | |
2917 | ||
2918 | switch (STMT_VINFO_TYPE (stmt_info)) | |
2919 | { | |
2920 | case op_vec_info_type: | |
2921 | done = vectorizable_operation (stmt, bsi, &vec_stmt); | |
2922 | gcc_assert (done); | |
2923 | break; | |
2924 | ||
2925 | case assignment_vec_info_type: | |
2926 | done = vectorizable_assignment (stmt, bsi, &vec_stmt); | |
2927 | gcc_assert (done); | |
2928 | break; | |
2929 | ||
2930 | case load_vec_info_type: | |
2931 | done = vectorizable_load (stmt, bsi, &vec_stmt); | |
2932 | gcc_assert (done); | |
2933 | break; | |
2934 | ||
2935 | case store_vec_info_type: | |
2936 | done = vectorizable_store (stmt, bsi, &vec_stmt); | |
2937 | gcc_assert (done); | |
2938 | is_store = true; | |
2939 | break; | |
2940 | default: | |
2941 | if (vect_debug_details (NULL)) | |
2942 | fprintf (dump_file, "stmt not supported."); | |
2943 | gcc_unreachable (); | |
2944 | } | |
2945 | ||
2946 | STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; | |
2947 | ||
2948 | return is_store; | |
2949 | } | |
2950 | ||
2951 | ||
2952 | /* This function builds ni_name = number of iterations loop executes | |
2953 | on the loop preheader. */ | |
2954 | ||
2955 | static tree | |
2956 | vect_build_loop_niters (loop_vec_info loop_vinfo) | |
2957 | { | |
2958 | tree ni_name, stmt, var; | |
2959 | edge pe; | |
a023975e | 2960 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
618bb89c | 2961 | tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); |
a023975e OG |
2962 | |
2963 | var = create_tmp_var (TREE_TYPE (ni), "niters"); | |
2964 | add_referenced_tmp_var (var); | |
618bb89c | 2965 | ni_name = force_gimple_operand (ni, &stmt, false, var); |
a023975e OG |
2966 | |
2967 | pe = loop_preheader_edge (loop); | |
4951c3fd | 2968 | if (stmt) |
e9c00ceb DN |
2969 | { |
2970 | basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); | |
2971 | gcc_assert (!new_bb); | |
2972 | } | |
a023975e OG |
2973 | |
2974 | return ni_name; | |
2975 | } | |
2976 | ||
2977 | ||
2978 | /* This function generates the following statements: | |
2979 | ||
2980 | ni_name = number of iterations loop executes | |
2981 | ratio = ni_name / vf | |
2982 | ratio_mult_vf_name = ratio * vf | |
2983 | ||
2984 | and places them at the loop preheader edge. */ | |
2985 | ||
2986 | static void | |
e9c00ceb DN |
2987 | vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, |
2988 | tree *ni_name_ptr, | |
2989 | tree *ratio_mult_vf_name_ptr, | |
2990 | tree *ratio_name_ptr) | |
a023975e OG |
2991 | { |
2992 | ||
2993 | edge pe; | |
2994 | basic_block new_bb; | |
2995 | tree stmt, ni_name; | |
e9c00ceb DN |
2996 | tree var; |
2997 | tree ratio_name; | |
2998 | tree ratio_mult_vf_name; | |
a023975e | 2999 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
e9c00ceb DN |
3000 | tree ni = LOOP_VINFO_NITERS (loop_vinfo); |
3001 | int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
3002 | tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf)); | |
3003 | ||
3004 | pe = loop_preheader_edge (loop); | |
a023975e OG |
3005 | |
3006 | /* Generate temporary variable that contains | |
3007 | number of iterations loop executes. */ | |
3008 | ||
3009 | ni_name = vect_build_loop_niters (loop_vinfo); | |
3010 | ||
e9c00ceb | 3011 | /* Create: ratio = ni >> log2(vf) */ |
a023975e | 3012 | |
e9c00ceb DN |
3013 | var = create_tmp_var (TREE_TYPE (ni), "bnd"); |
3014 | add_referenced_tmp_var (var); | |
3015 | ratio_name = make_ssa_name (var, NULL_TREE); | |
3016 | stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name, | |
3017 | build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf)); | |
3018 | SSA_NAME_DEF_STMT (ratio_name) = stmt; | |
a023975e | 3019 | |
e9c00ceb DN |
3020 | pe = loop_preheader_edge (loop); |
3021 | new_bb = bsi_insert_on_edge_immediate (pe, stmt); | |
3022 | gcc_assert (!new_bb); | |
3023 | ||
3024 | /* Create: ratio_mult_vf = ratio << log2 (vf). */ | |
a023975e | 3025 | |
e9c00ceb DN |
3026 | var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); |
3027 | add_referenced_tmp_var (var); | |
3028 | ratio_mult_vf_name = make_ssa_name (var, NULL_TREE); | |
a023975e | 3029 | stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name, |
e9c00ceb | 3030 | build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf)); |
a023975e OG |
3031 | SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt; |
3032 | ||
3033 | pe = loop_preheader_edge (loop); | |
3034 | new_bb = bsi_insert_on_edge_immediate (pe, stmt); | |
e9c00ceb | 3035 | gcc_assert (!new_bb); |
a023975e | 3036 | |
e9c00ceb DN |
3037 | *ni_name_ptr = ni_name; |
3038 | *ratio_mult_vf_name_ptr = ratio_mult_vf_name; | |
3039 | *ratio_name_ptr = ratio_name; | |
a023975e OG |
3040 | |
3041 | return; | |
3042 | } | |
3043 | ||
3044 | ||
00803cd5 DN |
3045 | /* Function vect_update_ivs_after_vectorizer. |
3046 | ||
3047 | "Advance" the induction variables of LOOP to the value they should take | |
3048 | after the execution of LOOP. This is currently necessary because the | |
3049 | vectorizer does not handle induction variables that are used after the | |
3050 | loop. Such a situation occurs when the last iterations of LOOP are | |
3051 | peeled, because: | |
3052 | 1. We introduced new uses after LOOP for IVs that were not originally used | |
3053 | after LOOP: the IVs of LOOP are now used by an epilog loop. | |
3054 | 2. LOOP is going to be vectorized; this means that it will iterate N/VF | |
3055 | times, whereas the loop IVs should be bumped N times. | |
3056 | ||
3057 | Input: | |
3058 | - LOOP - a loop that is going to be vectorized. The last few iterations | |
3059 | of LOOP were peeled. | |
3060 | - NITERS - the number of iterations that LOOP executes (before it is | |
3061 | vectorized). i.e, the number of times the ivs should be bumped. | |
63dfe6ff DN |
3062 | - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path |
3063 | coming out from LOOP on which there are uses of the LOOP ivs | |
3064 | (this is the path from LOOP->exit to epilog_loop->preheader). | |
00803cd5 | 3065 | |
63dfe6ff DN |
3066 | The new definitions of the ivs are placed in LOOP->exit. |
3067 | The phi args associated with the edge UPDATE_E in the bb | |
3068 | UPDATE_E->dest are updated accordingly. | |
00803cd5 DN |
3069 | |
3070 | Assumption 1: Like the rest of the vectorizer, this function assumes | |
3071 | a single loop exit that has a single predecessor. | |
3072 | ||
3073 | Assumption 2: The phi nodes in the LOOP header and in update_bb are | |
3074 | organized in the same order. | |
3075 | ||
3076 | Assumption 3: The access function of the ivs is simple enough (see | |
3077 | vect_can_advance_ivs_p). This assumption will be relaxed in the future. | |
63dfe6ff DN |
3078 | |
3079 | Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path | |
3080 | coming out of LOOP on which the ivs of LOOP are used (this is the path | |
3081 | that leads to the epilog loop; other paths skip the epilog loop). This | |
3082 | path starts with the edge UPDATE_E, and its destination (denoted update_bb) | |
3083 | needs to have its phis updated. | |
00803cd5 | 3084 | */ |
a023975e OG |
3085 | |
3086 | static void | |
63dfe6ff | 3087 | vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e) |
a023975e | 3088 | { |
63dfe6ff | 3089 | basic_block exit_bb = loop->exit_edges[0]->dest; |
00803cd5 | 3090 | tree phi, phi1; |
63dfe6ff | 3091 | basic_block update_bb = update_e->dest; |
a023975e | 3092 | |
63dfe6ff DN |
3093 | /* gcc_assert (vect_can_advance_ivs_p (loop)); */ |
3094 | ||
3095 | /* Make sure there exists a single-predecessor exit bb: */ | |
3096 | gcc_assert (EDGE_COUNT (exit_bb->preds) == 1); | |
00803cd5 | 3097 | |
00803cd5 DN |
3098 | for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb); |
3099 | phi && phi1; | |
3100 | phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1)) | |
a023975e OG |
3101 | { |
3102 | tree access_fn = NULL; | |
3103 | tree evolution_part; | |
3104 | tree init_expr; | |
3105 | tree step_expr; | |
3106 | tree var, stmt, ni, ni_name; | |
a023975e OG |
3107 | block_stmt_iterator last_bsi; |
3108 | ||
63dfe6ff | 3109 | /* Skip virtual phi's. */ |
a023975e OG |
3110 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) |
3111 | { | |
3112 | if (vect_debug_details (NULL)) | |
3113 | fprintf (dump_file, "virtual phi. skip."); | |
3114 | continue; | |
3115 | } | |
3116 | ||
3117 | access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); | |
00803cd5 DN |
3118 | gcc_assert (access_fn); |
3119 | evolution_part = | |
3120 | unshare_expr (evolution_part_in_loop_num (access_fn, loop->num)); | |
63dfe6ff | 3121 | gcc_assert (evolution_part != NULL_TREE); |
a023975e | 3122 | |
63dfe6ff DN |
3123 | /* FORNOW: We do not support IVs whose evolution function is a polynomial |
3124 | of degree >= 2 or exponential. */ | |
00803cd5 | 3125 | gcc_assert (!tree_is_chrec (evolution_part)); |
a023975e OG |
3126 | |
3127 | step_expr = evolution_part; | |
00803cd5 | 3128 | init_expr = unshare_expr (initial_condition (access_fn)); |
a023975e OG |
3129 | |
3130 | ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr), | |
3131 | build2 (MULT_EXPR, TREE_TYPE (niters), | |
3132 | niters, step_expr), init_expr); | |
3133 | ||
3134 | var = create_tmp_var (TREE_TYPE (init_expr), "tmp"); | |
3135 | add_referenced_tmp_var (var); | |
3136 | ||
3137 | ni_name = force_gimple_operand (ni, &stmt, false, var); | |
3138 | ||
63dfe6ff DN |
3139 | /* Insert stmt into exit_bb. */ |
3140 | last_bsi = bsi_last (exit_bb); | |
00803cd5 | 3141 | if (stmt) |
63dfe6ff | 3142 | bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT); |
a023975e | 3143 | |
63dfe6ff | 3144 | /* Fix phi expressions in the successor bb. */ |
00803cd5 DN |
3145 | gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) == |
3146 | PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0))); | |
3147 | SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name); | |
a023975e | 3148 | } |
a023975e OG |
3149 | } |
3150 | ||
3151 | ||
63dfe6ff DN |
3152 | /* Function vect_do_peeling_for_loop_bound |
3153 | ||
3154 | Peel the last iterations of the loop represented by LOOP_VINFO. | |
3155 | The peeled iterations form a new epilog loop. Given that the loop now | |
3156 | iterates NITERS times, the new epilog loop iterates | |
3157 | NITERS % VECTORIZATION_FACTOR times. | |
3158 | ||
3159 | The original loop will later be made to iterate | |
3160 | NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */ | |
a023975e OG |
3161 | |
3162 | static void | |
63dfe6ff | 3163 | vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, |
335d3d54 | 3164 | struct loops *loops) |
a023975e OG |
3165 | { |
3166 | ||
3167 | tree ni_name, ratio_mult_vf_name; | |
63dfe6ff DN |
3168 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
3169 | struct loop *new_loop; | |
3170 | edge update_e; | |
a023975e OG |
3171 | #ifdef ENABLE_CHECKING |
3172 | int loop_num; | |
3173 | #endif | |
a023975e OG |
3174 | |
3175 | if (vect_debug_details (NULL)) | |
3176 | fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n"); | |
3177 | ||
3178 | /* Generate the following variables on the preheader of original loop: | |
3179 | ||
3180 | ni_name = number of iteration the original loop executes | |
3181 | ratio = ni_name / vf | |
3182 | ratio_mult_vf_name = ratio * vf */ | |
3183 | vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, | |
3184 | &ratio_mult_vf_name, ratio); | |
3185 | ||
3186 | /* Update loop info. */ | |
3187 | loop->pre_header = loop_preheader_edge (loop)->src; | |
3188 | loop->pre_header_edges[0] = loop_preheader_edge (loop); | |
3189 | ||
3190 | #ifdef ENABLE_CHECKING | |
3191 | loop_num = loop->num; | |
3192 | #endif | |
f88a8cfa | 3193 | new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0], |
63dfe6ff | 3194 | ratio_mult_vf_name, ni_name, false); |
a023975e OG |
3195 | #ifdef ENABLE_CHECKING |
3196 | gcc_assert (new_loop); | |
3197 | gcc_assert (loop_num == loop->num); | |
63dfe6ff | 3198 | slpeel_verify_cfg_after_peeling (loop, new_loop); |
a023975e OG |
3199 | #endif |
3200 | ||
63dfe6ff DN |
3201 | /* A guard that controls whether the new_loop is to be executed or skipped |
3202 | is placed in LOOP->exit. LOOP->exit therefore has two successors - one | |
3203 | is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other | |
3204 | is a bb after NEW_LOOP, where these IVs are not used. Find the edge that | |
3205 | is on the path where the LOOP IVs are used and need to be updated. */ | |
3206 | ||
3207 | if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest) | |
3208 | update_e = EDGE_PRED (new_loop->pre_header, 0); | |
3209 | else | |
3210 | update_e = EDGE_PRED (new_loop->pre_header, 1); | |
3211 | ||
a023975e OG |
3212 | /* Update IVs of original loop as if they were advanced |
3213 | by ratio_mult_vf_name steps. */ | |
63dfe6ff | 3214 | vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e); |
a023975e | 3215 | |
63dfe6ff DN |
3216 | /* After peeling we have to reset scalar evolution analyzer. */ |
3217 | scev_reset (); | |
a023975e OG |
3218 | |
3219 | return; | |
79fe1b3b DN |
3220 | } |
3221 | ||
3222 | ||
a023975e | 3223 | /* Function vect_gen_niters_for_prolog_loop |
79fe1b3b | 3224 | |
a023975e | 3225 | Set the number of iterations for the loop represented by LOOP_VINFO |
618bb89c | 3226 | to the minimum between LOOP_NITERS (the original iteration count of the loop) |
0dc0a70b DN |
3227 | and the misalignment of DR - the first data reference recorded in |
3228 | LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of | |
618bb89c DN |
3229 | this loop, the data reference DR will refer to an aligned location. |
3230 | ||
3231 | The following computation is generated: | |
3232 | ||
3233 | compute address misalignment in bytes: | |
3234 | addr_mis = addr & (vectype_size - 1) | |
3235 | ||
3236 | prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) ) | |
3237 | ||
3238 | (elem_size = element type size; an element is the scalar element | |
3239 | whose type is the inner type of the vectype) */ | |
79fe1b3b | 3240 | |
a023975e | 3241 | static tree |
618bb89c | 3242 | vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) |
79fe1b3b | 3243 | { |
0dc0a70b | 3244 | struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); |
a023975e OG |
3245 | int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); |
3246 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3247 | tree var, stmt; | |
3248 | tree iters, iters_name; | |
3249 | edge pe; | |
3250 | basic_block new_bb; | |
3251 | tree dr_stmt = DR_STMT (dr); | |
3252 | stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); | |
618bb89c DN |
3253 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
3254 | int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; | |
3255 | tree elem_misalign; | |
3256 | tree byte_misalign; | |
3257 | tree new_stmts = NULL_TREE; | |
3258 | tree start_addr = | |
3259 | vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE); | |
3260 | tree ptr_type = TREE_TYPE (start_addr); | |
3261 | tree size = TYPE_SIZE (ptr_type); | |
37ea4e67 | 3262 | tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); |
618bb89c DN |
3263 | tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); |
3264 | tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1); | |
3265 | tree niters_type = TREE_TYPE (loop_niters); | |
3266 | tree elem_size_log = | |
3267 | build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf)); | |
3268 | tree vf_tree = build_int_cst (unsigned_type_node, vf); | |
a023975e OG |
3269 | |
3270 | pe = loop_preheader_edge (loop); | |
618bb89c | 3271 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmts); |
e9c00ceb | 3272 | gcc_assert (!new_bb); |
a023975e | 3273 | |
618bb89c DN |
3274 | /* Create: byte_misalign = addr & (vectype_size - 1) */ |
3275 | byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1); | |
3276 | ||
3277 | /* Create: elem_misalign = byte_misalign / element_size */ | |
3278 | elem_misalign = | |
3279 | build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log); | |
a023975e | 3280 | |
618bb89c DN |
3281 | /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */ |
3282 | iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign); | |
3283 | iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1); | |
3284 | iters = fold_convert (niters_type, iters); | |
e9c00ceb | 3285 | |
618bb89c | 3286 | /* Create: prolog_loop_niters = min (iters, loop_niters) */ |
e9c00ceb DN |
3287 | /* If the loop bound is known at compile time we already verified that it is |
3288 | greater than vf; since the misalignment ('iters') is at most vf, there's | |
3289 | no need to generate the MIN_EXPR in this case. */ | |
3290 | if (!host_integerp (loop_niters, 0)) | |
3291 | iters = build2 (MIN_EXPR, niters_type, iters, loop_niters); | |
3292 | ||
618bb89c | 3293 | var = create_tmp_var (niters_type, "prolog_loop_niters"); |
a023975e OG |
3294 | add_referenced_tmp_var (var); |
3295 | iters_name = force_gimple_operand (iters, &stmt, false, var); | |
3296 | ||
3297 | /* Insert stmt on loop preheader edge. */ | |
3298 | pe = loop_preheader_edge (loop); | |
4951c3fd | 3299 | if (stmt) |
e9c00ceb DN |
3300 | { |
3301 | basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); | |
3302 | gcc_assert (!new_bb); | |
3303 | } | |
79fe1b3b | 3304 | |
a023975e OG |
3305 | return iters_name; |
3306 | } | |
79fe1b3b | 3307 | |
79fe1b3b | 3308 | |
a023975e | 3309 | /* Function vect_update_inits_of_dr |
79fe1b3b | 3310 | |
a023975e OG |
3311 | NITERS iterations were peeled from LOOP. DR represents a data reference |
3312 | in LOOP. This function updates the information recorded in DR to | |
3313 | account for the fact that the first NITERS iterations had already been | |
3314 | executed. Specifically, it updates the initial_condition of the | |
3315 | access_function of DR. */ | |
79fe1b3b DN |
3316 | |
3317 | static void | |
a023975e OG |
3318 | vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop, |
3319 | tree niters) | |
79fe1b3b | 3320 | { |
a023975e OG |
3321 | tree access_fn = DR_ACCESS_FN (dr, 0); |
3322 | tree init, init_new, step; | |
3323 | ||
3324 | step = evolution_part_in_loop_num (access_fn, loop->num); | |
3325 | init = initial_condition (access_fn); | |
3326 | ||
e9c00ceb DN |
3327 | init_new = build2 (PLUS_EXPR, TREE_TYPE (init), |
3328 | build2 (MULT_EXPR, TREE_TYPE (niters), | |
a023975e OG |
3329 | niters, step), init); |
3330 | DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new); | |
3331 | ||
3332 | return; | |
3333 | } | |
79fe1b3b | 3334 | |
79fe1b3b | 3335 | |
a023975e | 3336 | /* Function vect_update_inits_of_drs |
79fe1b3b | 3337 | |
a023975e OG |
3338 | NITERS iterations were peeled from the loop represented by LOOP_VINFO. |
3339 | This function updates the information recorded for the data references in | |
3340 | the loop to account for the fact that the first NITERS iterations had | |
3341 | already been executed. Specifically, it updates the initial_condition of the | |
3342 | access_function of all the data_references in the loop. */ | |
79fe1b3b | 3343 | |
a023975e OG |
3344 | static void |
3345 | vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) | |
3346 | { | |
3347 | unsigned int i; | |
3348 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
3349 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
3350 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
79fe1b3b | 3351 | |
a023975e OG |
3352 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3353 | fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n"); | |
79fe1b3b | 3354 | |
a023975e OG |
3355 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) |
3356 | { | |
3357 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
3358 | vect_update_inits_of_dr (dr, loop, niters); | |
3359 | } | |
79fe1b3b | 3360 | |
a023975e OG |
3361 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) |
3362 | { | |
3363 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
3364 | vect_update_inits_of_dr (dr, loop, niters); | |
a023975e OG |
3365 | } |
3366 | } | |
79fe1b3b | 3367 | |
79fe1b3b | 3368 | |
a023975e | 3369 | /* Function vect_do_peeling_for_alignment |
79fe1b3b | 3370 | |
a023975e OG |
3371 | Peel the first 'niters' iterations of the loop represented by LOOP_VINFO. |
3372 | 'niters' is set to the misalignment of one of the data references in the | |
3373 | loop, thereby forcing it to refer to an aligned location at the beginning | |
3374 | of the execution of this loop. The data reference for which we are | |
0dc0a70b | 3375 | peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ |
a023975e OG |
3376 | |
3377 | static void | |
3378 | vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) | |
3379 | { | |
3380 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3381 | tree niters_of_prolog_loop, ni_name; | |
335d3d54 | 3382 | tree n_iters; |
63dfe6ff | 3383 | struct loop *new_loop; |
79fe1b3b DN |
3384 | |
3385 | if (vect_debug_details (NULL)) | |
a023975e OG |
3386 | fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n"); |
3387 | ||
3388 | ni_name = vect_build_loop_niters (loop_vinfo); | |
3389 | niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); | |
3390 | ||
a023975e | 3391 | /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ |
63dfe6ff DN |
3392 | new_loop = |
3393 | slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), | |
3394 | niters_of_prolog_loop, ni_name, true); | |
3395 | #ifdef ENABLE_CHECKING | |
3396 | gcc_assert (new_loop); | |
3397 | slpeel_verify_cfg_after_peeling (new_loop, loop); | |
3398 | #endif | |
a023975e | 3399 | |
a023975e | 3400 | /* Update number of times loop executes. */ |
335d3d54 DN |
3401 | n_iters = LOOP_VINFO_NITERS (loop_vinfo); |
3402 | LOOP_VINFO_NITERS (loop_vinfo) = | |
618bb89c | 3403 | build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); |
a023975e | 3404 | |
63dfe6ff | 3405 | /* Update the init conditions of the access functions of all data refs. */ |
a023975e OG |
3406 | vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); |
3407 | ||
3408 | /* After peeling we have to reset scalar evolution analyzer. */ | |
3409 | scev_reset (); | |
3410 | ||
3411 | return; | |
79fe1b3b DN |
3412 | } |
3413 | ||
3414 | ||
3415 | /* Function vect_transform_loop. | |
3416 | ||
3417 | The analysis phase has determined that the loop is vectorizable. | |
3418 | Vectorize the loop - created vectorized stmts to replace the scalar | |
3419 | stmts in the loop, and update the loop exit condition. */ | |
3420 | ||
3421 | static void | |
3422 | vect_transform_loop (loop_vec_info loop_vinfo, | |
3423 | struct loops *loops ATTRIBUTE_UNUSED) | |
3424 | { | |
3425 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3426 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
3427 | int nbbs = loop->num_nodes; | |
3428 | block_stmt_iterator si; | |
3429 | int i; | |
a023975e | 3430 | tree ratio = NULL; |
79fe1b3b | 3431 | int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); |
79fe1b3b DN |
3432 | |
3433 | if (vect_debug_details (NULL)) | |
3434 | fprintf (dump_file, "\n<<vec_transform_loop>>\n"); | |
3435 | ||
a023975e OG |
3436 | |
3437 | /* Peel the loop if there are data refs with unknown alignment. | |
3438 | Only one data ref with unknown store is allowed. */ | |
3439 | ||
a023975e OG |
3440 | if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo)) |
3441 | vect_do_peeling_for_alignment (loop_vinfo, loops); | |
3442 | ||
335d3d54 DN |
3443 | /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a |
3444 | compile time constant), or it is a constant that doesn't divide by the | |
3445 | vectorization factor, then an epilog loop needs to be created. | |
3446 | We therefore duplicate the loop: the original loop will be vectorized, | |
3447 | and will compute the first (n/VF) iterations. The second copy of the loop | |
3448 | will remain scalar and will compute the remaining (n%VF) iterations. | |
a023975e OG |
3449 | (VF is the vectorization factor). */ |
3450 | ||
335d3d54 DN |
3451 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
3452 | || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
3453 | && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)) | |
3454 | vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops); | |
618bb89c DN |
3455 | else |
3456 | ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), | |
3457 | LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); | |
a023975e | 3458 | |
79fe1b3b DN |
3459 | /* 1) Make sure the loop header has exactly two entries |
3460 | 2) Make sure we have a preheader basic block. */ | |
3461 | ||
628f6a4e | 3462 | gcc_assert (EDGE_COUNT (loop->header->preds) == 2); |
79fe1b3b DN |
3463 | |
3464 | loop_split_edge_with (loop_preheader_edge (loop), NULL); | |
3465 | ||
3466 | ||
3467 | /* FORNOW: the vectorizer supports only loops which body consist | |
3468 | of one basic block (header + empty latch). When the vectorizer will | |
3469 | support more involved loop forms, the order by which the BBs are | |
3470 | traversed need to be reconsidered. */ | |
3471 | ||
3472 | for (i = 0; i < nbbs; i++) | |
3473 | { | |
3474 | basic_block bb = bbs[i]; | |
3475 | ||
3476 | for (si = bsi_start (bb); !bsi_end_p (si);) | |
3477 | { | |
3478 | tree stmt = bsi_stmt (si); | |
3479 | stmt_vec_info stmt_info; | |
3480 | bool is_store; | |
79fe1b3b DN |
3481 | |
3482 | if (vect_debug_details (NULL)) | |
3483 | { | |
3484 | fprintf (dump_file, "------>vectorizing statement: "); | |
3485 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3486 | } | |
3487 | stmt_info = vinfo_for_stmt (stmt); | |
1e128c5f | 3488 | gcc_assert (stmt_info); |
79fe1b3b DN |
3489 | if (!STMT_VINFO_RELEVANT_P (stmt_info)) |
3490 | { | |
3491 | bsi_next (&si); | |
3492 | continue; | |
3493 | } | |
3494 | #ifdef ENABLE_CHECKING | |
3495 | /* FORNOW: Verify that all stmts operate on the same number of | |
3496 | units and no inner unrolling is necessary. */ | |
0dc0a70b DN |
3497 | gcc_assert |
3498 | (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info))) | |
3499 | == vectorization_factor); | |
79fe1b3b DN |
3500 | #endif |
3501 | /* -------- vectorize statement ------------ */ | |
3502 | if (vect_debug_details (NULL)) | |
3503 | fprintf (dump_file, "transform statement."); | |
3504 | ||
3505 | is_store = vect_transform_stmt (stmt, &si); | |
3506 | if (is_store) | |
3507 | { | |
3508 | /* free the attached stmt_vec_info and remove the stmt. */ | |
3509 | stmt_ann_t ann = stmt_ann (stmt); | |
3510 | free (stmt_info); | |
3511 | set_stmt_info (ann, NULL); | |
3512 | bsi_remove (&si); | |
3513 | continue; | |
3514 | } | |
3515 | ||
3516 | bsi_next (&si); | |
3517 | } /* stmts in BB */ | |
3518 | } /* BBs in loop */ | |
3519 | ||
618bb89c | 3520 | slpeel_make_loop_iterate_ntimes (loop, ratio); |
79fe1b3b DN |
3521 | |
3522 | if (vect_debug_details (loop)) | |
3523 | fprintf (dump_file,"Success! loop vectorized."); | |
3524 | if (vect_debug_stats (loop)) | |
3525 | fprintf (dump_file, "LOOP VECTORIZED."); | |
3526 | } | |
3527 | ||
3528 | ||
3529 | /* Function vect_is_simple_use. | |
3530 | ||
3531 | Input: | |
3532 | LOOP - the loop that is being vectorized. | |
3533 | OPERAND - operand of a stmt in LOOP. | |
3534 | DEF - the defining stmt in case OPERAND is an SSA_NAME. | |
3535 | ||
3536 | Returns whether a stmt with OPERAND can be vectorized. | |
3537 | Supportable operands are constants, loop invariants, and operands that are | |
6cb38cd4 | 3538 | defined by the current iteration of the loop. Unsupportable operands are |
79fe1b3b DN |
3539 | those that are defined by a previous iteration of the loop (as is the case |
3540 | in reduction/induction computations). */ | |
3541 | ||
3542 | static bool | |
3543 | vect_is_simple_use (tree operand, struct loop *loop, tree *def) | |
3544 | { | |
3545 | tree def_stmt; | |
3546 | basic_block bb; | |
3547 | ||
3548 | if (def) | |
3549 | *def = NULL_TREE; | |
3550 | ||
3551 | if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) | |
3552 | return true; | |
3553 | ||
3554 | if (TREE_CODE (operand) != SSA_NAME) | |
3555 | return false; | |
3556 | ||
3557 | def_stmt = SSA_NAME_DEF_STMT (operand); | |
3558 | if (def_stmt == NULL_TREE ) | |
3559 | { | |
3560 | if (vect_debug_details (NULL)) | |
3561 | fprintf (dump_file, "no def_stmt."); | |
3562 | return false; | |
3563 | } | |
3564 | ||
3565 | /* empty stmt is expected only in case of a function argument. | |
3566 | (Otherwise - we expect a phi_node or a modify_expr). */ | |
3567 | if (IS_EMPTY_STMT (def_stmt)) | |
3568 | { | |
3569 | tree arg = TREE_OPERAND (def_stmt, 0); | |
3570 | if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST) | |
3571 | return true; | |
3572 | if (vect_debug_details (NULL)) | |
3573 | { | |
3574 | fprintf (dump_file, "Unexpected empty stmt: "); | |
3575 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
3576 | } | |
3577 | return false; | |
3578 | } | |
3579 | ||
3580 | /* phi_node inside the loop indicates an induction/reduction pattern. | |
3581 | This is not supported yet. */ | |
3582 | bb = bb_for_stmt (def_stmt); | |
3583 | if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) | |
3584 | { | |
3585 | if (vect_debug_details (NULL)) | |
3586 | fprintf (dump_file, "reduction/induction - unsupported."); | |
3587 | return false; /* FORNOW: not supported yet. */ | |
3588 | } | |
3589 | ||
3590 | /* Expecting a modify_expr or a phi_node. */ | |
3591 | if (TREE_CODE (def_stmt) == MODIFY_EXPR | |
3592 | || TREE_CODE (def_stmt) == PHI_NODE) | |
3593 | { | |
3594 | if (def) | |
3595 | *def = def_stmt; | |
3596 | return true; | |
3597 | } | |
3598 | ||
3599 | return false; | |
3600 | } | |
3601 | ||
3602 | ||
3603 | /* Function vect_analyze_operations. | |
3604 | ||
3605 | Scan the loop stmts and make sure they are all vectorizable. */ | |
3606 | ||
3607 | static bool | |
3608 | vect_analyze_operations (loop_vec_info loop_vinfo) | |
3609 | { | |
3610 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3611 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
3612 | int nbbs = loop->num_nodes; | |
3613 | block_stmt_iterator si; | |
e9c00ceb | 3614 | unsigned int vectorization_factor = 0; |
79fe1b3b DN |
3615 | int i; |
3616 | bool ok; | |
3617 | tree scalar_type; | |
3618 | ||
3619 | if (vect_debug_details (NULL)) | |
3620 | fprintf (dump_file, "\n<<vect_analyze_operations>>\n"); | |
3621 | ||
3622 | for (i = 0; i < nbbs; i++) | |
3623 | { | |
3624 | basic_block bb = bbs[i]; | |
3625 | ||
3626 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
3627 | { | |
3628 | tree stmt = bsi_stmt (si); | |
e9c00ceb | 3629 | unsigned int nunits; |
79fe1b3b DN |
3630 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
3631 | tree vectype; | |
3632 | ||
3633 | if (vect_debug_details (NULL)) | |
3634 | { | |
3635 | fprintf (dump_file, "==> examining statement: "); | |
3636 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3637 | } | |
1e128c5f GB |
3638 | |
3639 | gcc_assert (stmt_info); | |
3640 | ||
79fe1b3b DN |
3641 | /* skip stmts which do not need to be vectorized. |
3642 | this is expected to include: | |
3643 | - the COND_EXPR which is the loop exit condition | |
3644 | - any LABEL_EXPRs in the loop | |
3645 | - computations that are used only for array indexing or loop | |
3646 | control */ | |
3647 | ||
3648 | if (!STMT_VINFO_RELEVANT_P (stmt_info)) | |
3649 | { | |
3650 | if (vect_debug_details (NULL)) | |
3651 | fprintf (dump_file, "irrelevant."); | |
3652 | continue; | |
3653 | } | |
3654 | ||
3655 | if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) | |
3656 | { | |
3657 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3658 | { | |
3659 | fprintf (dump_file, "not vectorized: vector stmt in loop:"); | |
3660 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3661 | } | |
3662 | return false; | |
3663 | } | |
3664 | ||
3665 | if (STMT_VINFO_DATA_REF (stmt_info)) | |
3666 | scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); | |
3667 | else if (TREE_CODE (stmt) == MODIFY_EXPR) | |
3668 | scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); | |
3669 | else | |
3670 | scalar_type = TREE_TYPE (stmt); | |
3671 | ||
3672 | if (vect_debug_details (NULL)) | |
3673 | { | |
3674 | fprintf (dump_file, "get vectype for scalar type: "); | |
3675 | print_generic_expr (dump_file, scalar_type, TDF_SLIM); | |
3676 | } | |
3677 | ||
3678 | vectype = get_vectype_for_scalar_type (scalar_type); | |
3679 | if (!vectype) | |
3680 | { | |
3681 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3682 | { | |
3683 | fprintf (dump_file, "not vectorized: unsupported data-type "); | |
3684 | print_generic_expr (dump_file, scalar_type, TDF_SLIM); | |
3685 | } | |
3686 | return false; | |
3687 | } | |
3688 | ||
3689 | if (vect_debug_details (NULL)) | |
3690 | { | |
3691 | fprintf (dump_file, "vectype: "); | |
3692 | print_generic_expr (dump_file, vectype, TDF_SLIM); | |
3693 | } | |
3694 | STMT_VINFO_VECTYPE (stmt_info) = vectype; | |
3695 | ||
3696 | ok = (vectorizable_operation (stmt, NULL, NULL) | |
3697 | || vectorizable_assignment (stmt, NULL, NULL) | |
3698 | || vectorizable_load (stmt, NULL, NULL) | |
3699 | || vectorizable_store (stmt, NULL, NULL)); | |
3700 | ||
3701 | if (!ok) | |
3702 | { | |
3703 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3704 | { | |
3705 | fprintf (dump_file, "not vectorized: stmt not supported: "); | |
3706 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3707 | } | |
3708 | return false; | |
3709 | } | |
3710 | ||
3711 | nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); | |
3712 | if (vect_debug_details (NULL)) | |
3713 | fprintf (dump_file, "nunits = %d", nunits); | |
3714 | ||
3715 | if (vectorization_factor) | |
3716 | { | |
3717 | /* FORNOW: don't allow mixed units. | |
3718 | This restriction will be relaxed in the future. */ | |
3719 | if (nunits != vectorization_factor) | |
3720 | { | |
3721 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3722 | fprintf (dump_file, "not vectorized: mixed data-types"); | |
3723 | return false; | |
3724 | } | |
3725 | } | |
3726 | else | |
3727 | vectorization_factor = nunits; | |
f0923257 DN |
3728 | |
3729 | #ifdef ENABLE_CHECKING | |
3730 | gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type)) | |
3731 | * vectorization_factor == UNITS_PER_SIMD_WORD); | |
3732 | #endif | |
79fe1b3b DN |
3733 | } |
3734 | } | |
3735 | ||
3736 | /* TODO: Analyze cost. Decide if worth while to vectorize. */ | |
f0923257 DN |
3737 | |
3738 | if (vectorization_factor <= 1) | |
79fe1b3b DN |
3739 | { |
3740 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3741 | fprintf (dump_file, "not vectorized: unsupported data-type"); | |
3742 | return false; | |
3743 | } | |
3744 | LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; | |
3745 | ||
d6901754 DN |
3746 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL)) |
3747 | fprintf (dump_file, | |
3748 | "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, | |
3749 | vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo)); | |
3750 | ||
e9c00ceb DN |
3751 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
3752 | && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor) | |
3753 | { | |
3754 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3755 | fprintf (dump_file, "not vectorized: iteration count too small."); | |
3756 | return false; | |
3757 | } | |
3758 | ||
d6901754 DN |
3759 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
3760 | || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) | |
3761 | { | |
3762 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3763 | fprintf (dump_file, "epilog loop required."); | |
3764 | if (!vect_can_advance_ivs_p (loop)) | |
3765 | { | |
3766 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3767 | fprintf (dump_file, "not vectorized: can't create epilog loop 1."); | |
3768 | return false; | |
3769 | } | |
3770 | if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0])) | |
3771 | { | |
3772 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3773 | fprintf (dump_file, "not vectorized: can't create epilog loop 2."); | |
3774 | return false; | |
3775 | } | |
79fe1b3b | 3776 | } |
d6901754 | 3777 | |
79fe1b3b DN |
3778 | return true; |
3779 | } | |
3780 | ||
3781 | ||
3782 | /* Function exist_non_indexing_operands_for_use_p | |
3783 | ||
3784 | USE is one of the uses attached to STMT. Check if USE is | |
3785 | used in STMT for anything other than indexing an array. */ | |
3786 | ||
3787 | static bool | |
3788 | exist_non_indexing_operands_for_use_p (tree use, tree stmt) | |
3789 | { | |
3790 | tree operand; | |
3791 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
3792 | ||
3793 | /* USE corresponds to some operand in STMT. If there is no data | |
3794 | reference in STMT, then any operand that corresponds to USE | |
3795 | is not indexing an array. */ | |
3796 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
3797 | return true; | |
3798 | ||
3799 | /* STMT has a data_ref. FORNOW this means that its of one of | |
3800 | the following forms: | |
3801 | -1- ARRAY_REF = var | |
3802 | -2- var = ARRAY_REF | |
3803 | (This should have been verified in analyze_data_refs). | |
3804 | ||
3805 | 'var' in the second case corresponds to a def, not a use, | |
3806 | so USE cannot correspond to any operands that are not used | |
3807 | for array indexing. | |
3808 | ||
3809 | Therefore, all we need to check is if STMT falls into the | |
3810 | first case, and whether var corresponds to USE. */ | |
3811 | ||
3812 | if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME) | |
3813 | return false; | |
3814 | ||
3815 | operand = TREE_OPERAND (stmt, 1); | |
3816 | ||
3817 | if (TREE_CODE (operand) != SSA_NAME) | |
3818 | return false; | |
3819 | ||
3820 | if (operand == use) | |
3821 | return true; | |
3822 | ||
3823 | return false; | |
3824 | } | |
3825 | ||
3826 | ||
3827 | /* Function vect_is_simple_iv_evolution. | |
3828 | ||
3829 | FORNOW: A simple evolution of an induction variables in the loop is | |
3830 | considered a polynomial evolution with constant step. */ | |
3831 | ||
3832 | static bool | |
3833 | vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, | |
3834 | tree * step, bool strict) | |
3835 | { | |
3836 | tree init_expr; | |
3837 | tree step_expr; | |
3838 | ||
3839 | tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); | |
3840 | ||
3841 | /* When there is no evolution in this loop, the evolution function | |
3842 | is not "simple". */ | |
3843 | if (evolution_part == NULL_TREE) | |
3844 | return false; | |
3845 | ||
3846 | /* When the evolution is a polynomial of degree >= 2 | |
3847 | the evolution function is not "simple". */ | |
3848 | if (tree_is_chrec (evolution_part)) | |
3849 | return false; | |
3850 | ||
3851 | step_expr = evolution_part; | |
a023975e | 3852 | init_expr = unshare_expr (initial_condition (access_fn)); |
79fe1b3b DN |
3853 | |
3854 | if (vect_debug_details (NULL)) | |
3855 | { | |
3856 | fprintf (dump_file, "step: "); | |
3857 | print_generic_expr (dump_file, step_expr, TDF_SLIM); | |
3858 | fprintf (dump_file, ", init: "); | |
3859 | print_generic_expr (dump_file, init_expr, TDF_SLIM); | |
3860 | } | |
3861 | ||
3862 | *init = init_expr; | |
3863 | *step = step_expr; | |
3864 | ||
3865 | if (TREE_CODE (step_expr) != INTEGER_CST) | |
3866 | { | |
3867 | if (vect_debug_details (NULL)) | |
3868 | fprintf (dump_file, "step unknown."); | |
3869 | return false; | |
3870 | } | |
3871 | ||
3872 | if (strict) | |
3873 | if (!integer_onep (step_expr)) | |
3874 | { | |
3875 | if (vect_debug_details (NULL)) | |
3876 | print_generic_expr (dump_file, step_expr, TDF_SLIM); | |
3877 | return false; | |
3878 | } | |
3879 | ||
3880 | return true; | |
3881 | } | |
3882 | ||
3883 | ||
3884 | /* Function vect_analyze_scalar_cycles. | |
3885 | ||
3886 | Examine the cross iteration def-use cycles of scalar variables, by | |
3887 | analyzing the loop (scalar) PHIs; verify that the cross iteration def-use | |
3888 | cycles that they represent do not impede vectorization. | |
3889 | ||
3890 | FORNOW: Reduction as in the following loop, is not supported yet: | |
3891 | loop1: | |
3892 | for (i=0; i<N; i++) | |
3893 | sum += a[i]; | |
3894 | The cross-iteration cycle corresponding to variable 'sum' will be | |
3895 | considered too complicated and will impede vectorization. | |
3896 | ||
3897 | FORNOW: Induction as in the following loop, is not supported yet: | |
3898 | loop2: | |
3899 | for (i=0; i<N; i++) | |
3900 | a[i] = i; | |
3901 | ||
3902 | However, the following loop *is* vectorizable: | |
3903 | loop3: | |
3904 | for (i=0; i<N; i++) | |
3905 | a[i] = b[i]; | |
3906 | ||
3907 | In both loops there exists a def-use cycle for the variable i: | |
3908 | loop: i_2 = PHI (i_0, i_1) | |
3909 | a[i_2] = ...; | |
3910 | i_1 = i_2 + 1; | |
3911 | GOTO loop; | |
3912 | ||
3913 | The evolution of the above cycle is considered simple enough, | |
3914 | however, we also check that the cycle does not need to be | |
3915 | vectorized, i.e - we check that the variable that this cycle | |
3916 | defines is only used for array indexing or in stmts that do not | |
3917 | need to be vectorized. This is not the case in loop2, but it | |
3918 | *is* the case in loop3. */ | |
3919 | ||
3920 | static bool | |
3921 | vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) | |
3922 | { | |
3923 | tree phi; | |
3924 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3925 | basic_block bb = loop->header; | |
3926 | tree dummy; | |
3927 | ||
3928 | if (vect_debug_details (NULL)) | |
3929 | fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n"); | |
3930 | ||
bb29d951 | 3931 | for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) |
79fe1b3b DN |
3932 | { |
3933 | tree access_fn = NULL; | |
3934 | ||
3935 | if (vect_debug_details (NULL)) | |
3936 | { | |
3937 | fprintf (dump_file, "Analyze phi: "); | |
3938 | print_generic_expr (dump_file, phi, TDF_SLIM); | |
3939 | } | |
3940 | ||
3941 | /* Skip virtual phi's. The data dependences that are associated with | |
3942 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |
3943 | ||
3944 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
3945 | { | |
3946 | if (vect_debug_details (NULL)) | |
3947 | fprintf (dump_file, "virtual phi. skip."); | |
3948 | continue; | |
3949 | } | |
3950 | ||
3951 | /* Analyze the evolution function. */ | |
3952 | ||
3953 | /* FORNOW: The only scalar cross-iteration cycles that we allow are | |
3954 | those of loop induction variables; This property is verified here. | |
3955 | ||
3956 | Furthermore, if that induction variable is used in an operation | |
3957 | that needs to be vectorized (i.e, is not solely used to index | |
3958 | arrays and check the exit condition) - we do not support its | |
3959 | vectorization yet. This property is verified in vect_is_simple_use, | |
3960 | during vect_analyze_operations. */ | |
3961 | ||
6775f1f3 IR |
3962 | access_fn = /* instantiate_parameters |
3963 | (loop,*/ | |
3964 | analyze_scalar_evolution (loop, PHI_RESULT (phi)); | |
79fe1b3b DN |
3965 | |
3966 | if (!access_fn) | |
3967 | { | |
3968 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3969 | fprintf (dump_file, "not vectorized: unsupported scalar cycle."); | |
3970 | return false; | |
3971 | } | |
3972 | ||
3973 | if (vect_debug_details (NULL)) | |
3974 | { | |
3975 | fprintf (dump_file, "Access function of PHI: "); | |
3976 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
3977 | } | |
3978 | ||
3979 | if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, | |
3980 | &dummy, false)) | |
3981 | { | |
3982 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3983 | fprintf (dump_file, "not vectorized: unsupported scalar cycle."); | |
3984 | return false; | |
3985 | } | |
3986 | } | |
3987 | ||
3988 | return true; | |
3989 | } | |
3990 | ||
3991 | ||
3992 | /* Function vect_analyze_data_ref_dependence. | |
3993 | ||
3994 | Return TRUE if there (might) exist a dependence between a memory-reference | |
3995 | DRA and a memory-reference DRB. */ | |
3996 | ||
3997 | static bool | |
3998 | vect_analyze_data_ref_dependence (struct data_reference *dra, | |
3999 | struct data_reference *drb, | |
4000 | struct loop *loop) | |
4001 | { | |
6775f1f3 | 4002 | bool differ_p; |
79fe1b3b | 4003 | struct data_dependence_relation *ddr; |
6775f1f3 | 4004 | |
79fe1b3b DN |
4005 | if (!array_base_name_differ_p (dra, drb, &differ_p)) |
4006 | { | |
6775f1f3 | 4007 | if (vect_debug_stats (loop) || vect_debug_details (loop)) |
79fe1b3b | 4008 | { |
6775f1f3 IR |
4009 | fprintf (dump_file, |
4010 | "not vectorized: can't determine dependence between: "); | |
79fe1b3b DN |
4011 | print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM); |
4012 | fprintf (dump_file, " and "); | |
4013 | print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM); | |
4014 | } | |
4015 | return true; | |
4016 | } | |
4017 | ||
4018 | if (differ_p) | |
4019 | return false; | |
4020 | ||
4021 | ddr = initialize_data_dependence_relation (dra, drb); | |
4022 | compute_affine_dependence (ddr); | |
4023 | ||
4024 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) | |
4025 | return false; | |
4026 | ||
4027 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4028 | { | |
4029 | fprintf (dump_file, | |
4030 | "not vectorized: possible dependence between data-refs "); | |
4031 | print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM); | |
4032 | fprintf (dump_file, " and "); | |
4033 | print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM); | |
4034 | } | |
4035 | ||
4036 | return true; | |
4037 | } | |
4038 | ||
4039 | ||
4040 | /* Function vect_analyze_data_ref_dependences. | |
4041 | ||
4042 | Examine all the data references in the loop, and make sure there do not | |
4043 | exist any data dependences between them. | |
4044 | ||
4045 | TODO: dependences which distance is greater than the vectorization factor | |
471854f8 | 4046 | can be ignored. */ |
79fe1b3b DN |
4047 | |
4048 | static bool | |
4049 | vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo) | |
4050 | { | |
4051 | unsigned int i, j; | |
4052 | varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
4053 | varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
4054 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
4055 | ||
4056 | /* Examine store-store (output) dependences. */ | |
4057 | ||
4058 | if (vect_debug_details (NULL)) | |
4059 | fprintf (dump_file, "\n<<vect_analyze_dependences>>\n"); | |
4060 | ||
4061 | if (vect_debug_details (NULL)) | |
4062 | fprintf (dump_file, "compare all store-store pairs."); | |
4063 | ||
4064 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++) | |
4065 | { | |
4066 | for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) | |
4067 | { | |
4068 | struct data_reference *dra = | |
4069 | VARRAY_GENERIC_PTR (loop_write_refs, i); | |
4070 | struct data_reference *drb = | |
4071 | VARRAY_GENERIC_PTR (loop_write_refs, j); | |
4072 | if (vect_analyze_data_ref_dependence (dra, drb, loop)) | |
4073 | return false; | |
4074 | } | |
4075 | } | |
4076 | ||
4077 | /* Examine load-store (true/anti) dependences. */ | |
4078 | ||
4079 | if (vect_debug_details (NULL)) | |
4080 | fprintf (dump_file, "compare all load-store pairs."); | |
4081 | ||
4082 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++) | |
4083 | { | |
4084 | for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) | |
4085 | { | |
4086 | struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i); | |
4087 | struct data_reference *drb = | |
4088 | VARRAY_GENERIC_PTR (loop_write_refs, j); | |
4089 | if (vect_analyze_data_ref_dependence (dra, drb, loop)) | |
4090 | return false; | |
4091 | } | |
4092 | } | |
4093 | ||
4094 | return true; | |
4095 | } | |
4096 | ||
4097 | ||
4098 | /* Function vect_get_first_index. | |
4099 | ||
4100 | REF is a data reference. | |
4101 | If it is an ARRAY_REF: if its lower bound is simple enough, | |
4102 | put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE. | |
4103 | If it is not an ARRAY_REF: REF has no "first index"; | |
4104 | ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */ | |
4105 | ||
4106 | static bool | |
4107 | vect_get_first_index (tree ref, tree *array_first_index) | |
4108 | { | |
4109 | tree array_start; | |
4110 | ||
4111 | if (TREE_CODE (ref) != ARRAY_REF) | |
4112 | *array_first_index = size_zero_node; | |
4113 | else | |
4114 | { | |
4115 | array_start = array_ref_low_bound (ref); | |
e9c00ceb | 4116 | if (!host_integerp (array_start, 0)) |
79fe1b3b DN |
4117 | { |
4118 | if (vect_debug_details (NULL)) | |
4119 | { | |
4120 | fprintf (dump_file, "array min val not simple integer cst."); | |
4121 | print_generic_expr (dump_file, array_start, TDF_DETAILS); | |
4122 | } | |
4123 | return false; | |
4124 | } | |
4125 | *array_first_index = array_start; | |
4126 | } | |
4127 | ||
4128 | return true; | |
4129 | } | |
4130 | ||
4131 | ||
6775f1f3 IR |
4132 | /* Function vect_compute_array_base_alignment. |
4133 | A utility function of vect_compute_array_ref_alignment. | |
4134 | ||
4135 | Compute the misalignment of ARRAY in bits. | |
4136 | ||
4137 | Input: | |
4138 | ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE. | |
d4a9b3a3 | 4139 | VECTYPE - we are interested in the misalignment modulo the size of vectype. |
6775f1f3 IR |
4140 | if NULL: don't compute misalignment, just return the base of ARRAY. |
4141 | PREV_DIMENSIONS - initialized to one. | |
4142 | MISALIGNMENT - the computed misalignment in bits. | |
4143 | ||
4144 | Output: | |
4145 | If VECTYPE is not NULL: | |
4146 | Return NULL_TREE if the misalignment cannot be computed. Otherwise, return | |
4147 | the base of the array, and put the computed misalignment in MISALIGNMENT. | |
4148 | If VECTYPE is NULL: | |
4149 | Return the base of the array. | |
4150 | ||
4151 | For a[idx_N]...[idx_2][idx_1][idx_0], the address of | |
4152 | a[idx_N]...[idx_2][idx_1] is | |
4153 | {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ... | |
4154 | ... + idx_N * dim_0 * ... * dim_N-1}. | |
4155 | (The misalignment of &a is not checked here). | |
4156 | Note, that every term contains dim_0, therefore, if dim_0 is a | |
4157 | multiple of NUNITS, the whole sum is a multiple of NUNITS. | |
4158 | Otherwise, if idx_1 is constant, and dim_1 is a multiple of | |
4159 | NUINTS, we can say that the misalignment of the sum is equal to | |
4160 | the misalignment of {idx_1 * dim_0}. If idx_1 is not constant, | |
4161 | we can't determine this array misalignment, and we return | |
4162 | false. | |
4163 | We proceed recursively in this manner, accumulating total misalignment | |
4164 | and the multiplication of previous dimensions for correct misalignment | |
4165 | calculation. */ | |
4166 | ||
4167 | static tree | |
4168 | vect_compute_array_base_alignment (tree array, | |
4169 | tree vectype, | |
4170 | tree *prev_dimensions, | |
4171 | tree *misalignment) | |
4172 | { | |
4173 | tree index; | |
4174 | tree domain; | |
4175 | tree dimension_size; | |
4176 | tree mis; | |
4177 | tree bits_per_vectype; | |
4178 | tree bits_per_vectype_unit; | |
4179 | ||
4180 | /* The 'stop condition' of the recursion. */ | |
4181 | if (TREE_CODE (array) != ARRAY_REF) | |
4182 | return array; | |
4183 | ||
4184 | if (!vectype) | |
4185 | /* Just get the base decl. */ | |
4186 | return vect_compute_array_base_alignment | |
4187 | (TREE_OPERAND (array, 0), NULL, NULL, NULL); | |
4188 | ||
4189 | if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) || | |
4190 | !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions)) | |
4191 | return NULL_TREE; | |
4192 | ||
4193 | domain = TYPE_DOMAIN (TREE_TYPE (array)); | |
4194 | dimension_size = | |
4195 | int_const_binop (PLUS_EXPR, | |
4196 | int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain), | |
4197 | TYPE_MIN_VALUE (domain), 1), | |
4198 | size_one_node, 1); | |
4199 | ||
4200 | /* Check if the dimension size is a multiple of NUNITS, the remaining sum | |
4201 | is a multiple of NUNITS: | |
4202 | ||
4203 | dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ? | |
4204 | */ | |
4205 | mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size, | |
4206 | build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1); | |
4207 | if (integer_zerop (mis)) | |
4208 | /* This array is aligned. Continue just in order to get the base decl. */ | |
4209 | return vect_compute_array_base_alignment | |
4210 | (TREE_OPERAND (array, 0), NULL, NULL, NULL); | |
4211 | ||
4212 | index = TREE_OPERAND (array, 1); | |
4213 | if (!host_integerp (index, 1)) | |
4214 | /* The current index is not constant. */ | |
4215 | return NULL_TREE; | |
4216 | ||
4217 | index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0); | |
4218 | ||
4219 | bits_per_vectype = fold_convert (unsigned_type_node, | |
4220 | build_int_cst (NULL_TREE, BITS_PER_UNIT * | |
4221 | GET_MODE_SIZE (TYPE_MODE (vectype)))); | |
4222 | bits_per_vectype_unit = fold_convert (unsigned_type_node, | |
4223 | build_int_cst (NULL_TREE, BITS_PER_UNIT * | |
4224 | GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype))))); | |
4225 | ||
4226 | /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed | |
4227 | earlier: | |
4228 | ||
4229 | *misalignment = | |
4230 | (*misalignment + index_val * dimension_size * *prev_dimensions) | |
4231 | % vectype_nunits; | |
4232 | */ | |
4233 | ||
4234 | mis = int_const_binop (MULT_EXPR, index, dimension_size, 1); | |
4235 | mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1); | |
4236 | mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1); | |
4237 | mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1); | |
4238 | *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1); | |
4239 | ||
4240 | ||
4241 | *prev_dimensions = int_const_binop (MULT_EXPR, | |
4242 | *prev_dimensions, dimension_size, 1); | |
4243 | ||
4244 | return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype, | |
4245 | prev_dimensions, | |
4246 | misalignment); | |
4247 | } | |
4248 | ||
4249 | ||
79fe1b3b DN |
4250 | /* Function vect_compute_data_ref_alignment |
4251 | ||
4252 | Compute the misalignment of the data reference DR. | |
4253 | ||
6775f1f3 IR |
4254 | Output: |
4255 | 1. If during the misalignment computation it is found that the data reference | |
4256 | cannot be vectorized then false is returned. | |
4257 | 2. DR_MISALIGNMENT (DR) is defined. | |
4258 | ||
79fe1b3b DN |
4259 | FOR NOW: No analysis is actually performed. Misalignment is calculated |
4260 | only for trivial cases. TODO. */ | |
4261 | ||
6775f1f3 | 4262 | static bool |
79fe1b3b | 4263 | vect_compute_data_ref_alignment (struct data_reference *dr, |
6775f1f3 | 4264 | loop_vec_info loop_vinfo) |
79fe1b3b DN |
4265 | { |
4266 | tree stmt = DR_STMT (dr); | |
6775f1f3 | 4267 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
79fe1b3b DN |
4268 | tree ref = DR_REF (dr); |
4269 | tree vectype; | |
79fe1b3b | 4270 | tree scalar_type; |
79fe1b3b | 4271 | tree offset = size_zero_node; |
6775f1f3 IR |
4272 | tree base, bit_offset, alignment; |
4273 | tree unit_bits = fold_convert (unsigned_type_node, | |
4274 | build_int_cst (NULL_TREE, BITS_PER_UNIT)); | |
4275 | tree dr_base; | |
4276 | bool base_aligned_p; | |
4277 | ||
79fe1b3b DN |
4278 | if (vect_debug_details (NULL)) |
4279 | fprintf (dump_file, "vect_compute_data_ref_alignment:"); | |
4280 | ||
4281 | /* Initialize misalignment to unknown. */ | |
4282 | DR_MISALIGNMENT (dr) = -1; | |
4283 | ||
4284 | scalar_type = TREE_TYPE (ref); | |
4285 | vectype = get_vectype_for_scalar_type (scalar_type); | |
4286 | if (!vectype) | |
4287 | { | |
4288 | if (vect_debug_details (NULL)) | |
4289 | { | |
4290 | fprintf (dump_file, "no vectype for stmt: "); | |
4291 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
6775f1f3 | 4292 | fprintf (dump_file, " scalar_type: "); |
79fe1b3b DN |
4293 | print_generic_expr (dump_file, scalar_type, TDF_DETAILS); |
4294 | } | |
471854f8 | 4295 | /* It is not possible to vectorize this data reference. */ |
6775f1f3 | 4296 | return false; |
79fe1b3b | 4297 | } |
0dc0a70b | 4298 | STMT_VINFO_VECTYPE (stmt_info) = vectype; |
6775f1f3 IR |
4299 | gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF); |
4300 | ||
4301 | if (TREE_CODE (ref) == ARRAY_REF) | |
4302 | dr_base = ref; | |
4303 | else | |
4304 | dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info); | |
79fe1b3b | 4305 | |
6775f1f3 IR |
4306 | base = vect_get_base_and_bit_offset (dr, dr_base, vectype, |
4307 | loop_vinfo, &bit_offset, &base_aligned_p); | |
4308 | if (!base) | |
79fe1b3b | 4309 | { |
6775f1f3 | 4310 | if (vect_debug_details (NULL)) |
79fe1b3b | 4311 | { |
6775f1f3 IR |
4312 | fprintf (dump_file, "Unknown alignment for access: "); |
4313 | print_generic_expr (dump_file, | |
4314 | STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM); | |
79fe1b3b | 4315 | } |
6775f1f3 IR |
4316 | return true; |
4317 | } | |
79fe1b3b | 4318 | |
6775f1f3 IR |
4319 | if (!base_aligned_p) |
4320 | { | |
4321 | if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))) | |
79fe1b3b DN |
4322 | { |
4323 | if (vect_debug_details (NULL)) | |
6775f1f3 IR |
4324 | { |
4325 | fprintf (dump_file, "can't force alignment of ref: "); | |
4326 | print_generic_expr (dump_file, ref, TDF_SLIM); | |
4327 | } | |
4328 | return true; | |
79fe1b3b | 4329 | } |
6775f1f3 IR |
4330 | |
4331 | /* Force the alignment of the decl. | |
4332 | NOTE: This is the only change to the code we make during | |
4333 | the analysis phase, before deciding to vectorize the loop. */ | |
4334 | if (vect_debug_details (NULL)) | |
4335 | fprintf (dump_file, "force alignment"); | |
4336 | DECL_ALIGN (base) = TYPE_ALIGN (vectype); | |
d75bf0ca | 4337 | DECL_USER_ALIGN (base) = 1; |
6775f1f3 | 4338 | } |
79fe1b3b | 4339 | |
6775f1f3 IR |
4340 | /* At this point we assume that the base is aligned, and the offset from it |
4341 | (including index, if relevant) has been computed and is in BIT_OFFSET. */ | |
4342 | gcc_assert (base_aligned_p | |
4343 | || (TREE_CODE (base) == VAR_DECL | |
4344 | && DECL_ALIGN (base) >= TYPE_ALIGN (vectype))); | |
4345 | ||
4346 | /* Convert into bytes. */ | |
4347 | offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1); | |
4348 | /* Check that there is no remainder in bits. */ | |
4349 | bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1); | |
4350 | if (!integer_zerop (bit_offset)) | |
4351 | { | |
4352 | if (vect_debug_details (NULL)) | |
79fe1b3b | 4353 | { |
6775f1f3 IR |
4354 | fprintf (dump_file, "bit offset alignment: "); |
4355 | print_generic_expr (dump_file, bit_offset, TDF_SLIM); | |
79fe1b3b | 4356 | } |
6775f1f3 IR |
4357 | return false; |
4358 | } | |
4359 | ||
4360 | /* Alignment required, in bytes: */ | |
4361 | alignment = fold_convert (unsigned_type_node, | |
4362 | build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT)); | |
79fe1b3b | 4363 | |
6775f1f3 IR |
4364 | /* Modulo alignment. */ |
4365 | offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0); | |
4366 | if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset)) | |
4367 | { | |
4368 | if (vect_debug_details (NULL)) | |
4369 | fprintf (dump_file, "unexpected misalign value"); | |
4370 | return false; | |
79fe1b3b DN |
4371 | } |
4372 | ||
6775f1f3 | 4373 | DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1); |
79fe1b3b | 4374 | |
6775f1f3 IR |
4375 | if (vect_debug_details (NULL)) |
4376 | fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr)); | |
4377 | ||
4378 | return true; | |
4379 | } | |
4380 | ||
4381 | ||
4382 | /* Function vect_compute_array_ref_alignment | |
4383 | ||
4384 | Compute the alignment of an array-ref. | |
4385 | The alignment we compute here is relative to | |
4386 | TYPE_ALIGN(VECTYPE) boundary. | |
4387 | ||
4388 | Output: | |
4389 | OFFSET - the alignment in bits | |
4390 | Return value - the base of the array-ref. E.g, | |
4391 | if the array-ref is a.b[k].c[i][j] the returned | |
4392 | base is a.b[k].c | |
4393 | */ | |
4394 | ||
4395 | static tree | |
4396 | vect_compute_array_ref_alignment (struct data_reference *dr, | |
4397 | loop_vec_info loop_vinfo, | |
4398 | tree vectype, | |
4399 | tree *offset) | |
4400 | { | |
4401 | tree array_first_index = size_zero_node; | |
4402 | tree init; | |
4403 | tree ref = DR_REF (dr); | |
4404 | tree scalar_type = TREE_TYPE (ref); | |
4405 | tree oprnd0 = TREE_OPERAND (ref, 0); | |
4406 | tree dims = size_one_node; | |
4407 | tree misalign = size_zero_node; | |
4408 | tree next_ref, this_offset = size_zero_node; | |
4409 | tree nunits; | |
4410 | tree nbits; | |
4411 | ||
4412 | if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE) | |
471854f8 | 4413 | /* The reference is an array without its last index. */ |
a023975e OG |
4414 | next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, |
4415 | &misalign); | |
6775f1f3 | 4416 | else |
a023975e OG |
4417 | next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims, |
4418 | &misalign); | |
6775f1f3 IR |
4419 | if (!vectype) |
4420 | /* Alignment is not requested. Just return the base. */ | |
4421 | return next_ref; | |
4422 | ||
4423 | /* Compute alignment. */ | |
4424 | if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref) | |
4425 | return NULL_TREE; | |
4426 | this_offset = misalign; | |
4427 | ||
4428 | /* Check the first index accessed. */ | |
79fe1b3b DN |
4429 | if (!vect_get_first_index (ref, &array_first_index)) |
4430 | { | |
4431 | if (vect_debug_details (NULL)) | |
4432 | fprintf (dump_file, "no first_index for array."); | |
6775f1f3 | 4433 | return NULL_TREE; |
79fe1b3b | 4434 | } |
79fe1b3b | 4435 | |
6775f1f3 IR |
4436 | /* Check the index of the array_ref. */ |
4437 | init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0), | |
4438 | LOOP_VINFO_LOOP (loop_vinfo)->num); | |
79fe1b3b | 4439 | |
6775f1f3 IR |
4440 | /* FORNOW: In order to simplify the handling of alignment, we make sure |
4441 | that the first location at which the array is accessed ('init') is on an | |
79fe1b3b | 4442 | 'NUNITS' boundary, since we are assuming here that 'array base' is aligned. |
6775f1f3 IR |
4443 | This is too conservative, since we require that |
4444 | both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of | |
79fe1b3b DN |
4445 | NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}. |
4446 | This should be relaxed in the future. */ | |
4447 | ||
6775f1f3 | 4448 | if (!init || !host_integerp (init, 0)) |
79fe1b3b DN |
4449 | { |
4450 | if (vect_debug_details (NULL)) | |
6775f1f3 IR |
4451 | fprintf (dump_file, "non constant init. "); |
4452 | return NULL_TREE; | |
79fe1b3b DN |
4453 | } |
4454 | ||
79fe1b3b | 4455 | /* bytes per scalar element: */ |
6775f1f3 IR |
4456 | nunits = fold_convert (unsigned_type_node, |
4457 | build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type)))); | |
4458 | nbits = int_const_binop (MULT_EXPR, nunits, | |
4459 | build_int_cst (NULL_TREE, BITS_PER_UNIT), 1); | |
79fe1b3b | 4460 | |
6775f1f3 | 4461 | /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */ |
79fe1b3b | 4462 | misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0); |
6775f1f3 IR |
4463 | misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0); |
4464 | misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0); | |
79fe1b3b | 4465 | |
6775f1f3 IR |
4466 | /* TODO: allow negative misalign values. */ |
4467 | if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign)) | |
79fe1b3b DN |
4468 | { |
4469 | if (vect_debug_details (NULL)) | |
6775f1f3 IR |
4470 | fprintf (dump_file, "unexpected misalign value"); |
4471 | return NULL_TREE; | |
79fe1b3b | 4472 | } |
6775f1f3 IR |
4473 | *offset = misalign; |
4474 | return next_ref; | |
79fe1b3b DN |
4475 | } |
4476 | ||
4477 | ||
4478 | /* Function vect_compute_data_refs_alignment | |
4479 | ||
4480 | Compute the misalignment of data references in the loop. | |
4481 | This pass may take place at function granularity instead of at loop | |
4482 | granularity. | |
4483 | ||
4484 | FOR NOW: No analysis is actually performed. Misalignment is calculated | |
4485 | only for trivial cases. TODO. */ | |
4486 | ||
0dc0a70b | 4487 | static bool |
79fe1b3b DN |
4488 | vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) |
4489 | { | |
4490 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
4491 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
4492 | unsigned int i; | |
6775f1f3 | 4493 | |
79fe1b3b DN |
4494 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) |
4495 | { | |
4496 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
0dc0a70b DN |
4497 | if (!vect_compute_data_ref_alignment (dr, loop_vinfo)) |
4498 | return false; | |
79fe1b3b DN |
4499 | } |
4500 | ||
4501 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
4502 | { | |
4503 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
0dc0a70b DN |
4504 | if (!vect_compute_data_ref_alignment (dr, loop_vinfo)) |
4505 | return false; | |
79fe1b3b | 4506 | } |
0dc0a70b DN |
4507 | |
4508 | return true; | |
79fe1b3b DN |
4509 | } |
4510 | ||
4511 | ||
4512 | /* Function vect_enhance_data_refs_alignment | |
4513 | ||
4514 | This pass will use loop versioning and loop peeling in order to enhance | |
4515 | the alignment of data references in the loop. | |
4516 | ||
4517 | FOR NOW: we assume that whatever versioning/peeling takes place, only the | |
4518 | original loop is to be vectorized; Any other loops that are created by | |
4519 | the transformations performed in this pass - are not supposed to be | |
d6901754 | 4520 | vectorized. This restriction will be relaxed. */ |
79fe1b3b DN |
4521 | |
4522 | static void | |
0dc0a70b | 4523 | vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) |
79fe1b3b | 4524 | { |
0dc0a70b DN |
4525 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); |
4526 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
4527 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
4528 | unsigned int i; | |
4529 | ||
79fe1b3b DN |
4530 | /* |
4531 | This pass will require a cost model to guide it whether to apply peeling | |
4532 | or versioning or a combination of the two. For example, the scheme that | |
4533 | intel uses when given a loop with several memory accesses, is as follows: | |
4534 | choose one memory access ('p') which alignment you want to force by doing | |
4535 | peeling. Then, either (1) generate a loop in which 'p' is aligned and all | |
4536 | other accesses are not necessarily aligned, or (2) use loop versioning to | |
4537 | generate one loop in which all accesses are aligned, and another loop in | |
4538 | which only 'p' is necessarily aligned. | |
4539 | ||
4540 | ("Automatic Intra-Register Vectorization for the Intel Architecture", | |
4541 | Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International | |
4542 | Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) | |
4543 | ||
4544 | Devising a cost model is the most critical aspect of this work. It will | |
4545 | guide us on which access to peel for, whether to use loop versioning, how | |
4546 | many versions to create, etc. The cost model will probably consist of | |
4547 | generic considerations as well as target specific considerations (on | |
4548 | powerpc for example, misaligned stores are more painful than misaligned | |
4549 | loads). | |
4550 | ||
4551 | Here is the general steps involved in alignment enhancements: | |
4552 | ||
4553 | -- original loop, before alignment analysis: | |
4554 | for (i=0; i<N; i++){ | |
4555 | x = q[i]; # DR_MISALIGNMENT(q) = unknown | |
4556 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
4557 | } | |
4558 | ||
4559 | -- After vect_compute_data_refs_alignment: | |
4560 | for (i=0; i<N; i++){ | |
4561 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
4562 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
4563 | } | |
4564 | ||
4565 | -- Possibility 1: we do loop versioning: | |
4566 | if (p is aligned) { | |
4567 | for (i=0; i<N; i++){ # loop 1A | |
4568 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
4569 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | |
4570 | } | |
4571 | } | |
4572 | else { | |
4573 | for (i=0; i<N; i++){ # loop 1B | |
4574 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
4575 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | |
4576 | } | |
4577 | } | |
4578 | ||
4579 | -- Possibility 2: we do loop peeling: | |
4580 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | |
4581 | x = q[i]; | |
4582 | p[i] = y; | |
4583 | } | |
4584 | for (i = 3; i < N; i++){ # loop 2A | |
4585 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
4586 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
4587 | } | |
4588 | ||
4589 | -- Possibility 3: combination of loop peeling and versioning: | |
4590 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | |
4591 | x = q[i]; | |
4592 | p[i] = y; | |
4593 | } | |
4594 | if (p is aligned) { | |
4595 | for (i = 3; i<N; i++){ # loop 3A | |
4596 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
4597 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | |
4598 | } | |
4599 | } | |
4600 | else { | |
4601 | for (i = 3; i<N; i++){ # loop 3B | |
4602 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
4603 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | |
4604 | } | |
4605 | } | |
4606 | ||
4607 | These loops are later passed to loop_transform to be vectorized. The | |
4608 | vectorizer will use the alignment information to guide the transformation | |
4609 | (whether to generate regular loads/stores, or with special handling for | |
4610 | misalignment). | |
4611 | */ | |
0dc0a70b DN |
4612 | |
4613 | /* (1) Peeling to force alignment. */ | |
4614 | ||
4615 | /* (1.1) Decide whether to perform peeling, and how many iterations to peel: | |
4616 | Considerations: | |
4617 | + How many accesses will become aligned due to the peeling | |
4618 | - How many accesses will become unaligned due to the peeling, | |
4619 | and the cost of misaligned accesses. | |
4620 | - The cost of peeling (the extra runtime checks, the increase | |
4621 | in code size). | |
4622 | ||
4623 | The scheme we use FORNOW: peel to force the alignment of the first | |
896b242c | 4624 | misaligned store in the loop. |
00803cd5 | 4625 | Rationale: misaligned stores are not yet supported. |
0dc0a70b DN |
4626 | |
4627 | TODO: Use a better cost model. */ | |
4628 | ||
4629 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
4630 | { | |
4631 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
4632 | if (!aligned_access_p (dr)) | |
4633 | { | |
4634 | LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr; | |
4635 | LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true; | |
4636 | break; | |
4637 | } | |
4638 | } | |
4639 | ||
4640 | if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo)) | |
4641 | { | |
4642 | if (vect_debug_details (loop)) | |
4643 | fprintf (dump_file, "Peeling for alignment will not be applied."); | |
4644 | return; | |
4645 | } | |
4646 | else | |
4647 | if (vect_debug_details (loop)) | |
4648 | fprintf (dump_file, "Peeling for alignment will be applied."); | |
4649 | ||
4650 | ||
4651 | /* (1.2) Update the alignment info according to the peeling factor. | |
4652 | If the misalignment of the DR we peel for is M, then the | |
4653 | peeling factor is VF - M, and the misalignment of each access DR_i | |
4654 | in the loop is DR_MISALIGNMENT (DR_i) + VF - M. | |
4655 | If the misalignment of the DR we peel for is unknown, then the | |
4656 | misalignment of each access DR_i in the loop is also unknown. | |
4657 | ||
4658 | FORNOW: set the misalignment of the accesses to unknown even | |
4659 | if the peeling factor is known at compile time. | |
4660 | ||
4661 | TODO: - if the peeling factor is known at compile time, use that | |
4662 | when updating the misalignment info of the loop DRs. | |
4663 | - consider accesses that are known to have the same | |
4664 | alignment, even if that alignment is unknown. */ | |
4665 | ||
4666 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
4667 | { | |
4668 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
4669 | if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo)) | |
4670 | DR_MISALIGNMENT (dr) = 0; | |
4671 | else | |
4672 | DR_MISALIGNMENT (dr) = -1; | |
4673 | } | |
4674 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
4675 | { | |
4676 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
4677 | if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo)) | |
4678 | DR_MISALIGNMENT (dr) = 0; | |
4679 | else | |
4680 | DR_MISALIGNMENT (dr) = -1; | |
4681 | } | |
79fe1b3b DN |
4682 | } |
4683 | ||
4684 | ||
4685 | /* Function vect_analyze_data_refs_alignment | |
4686 | ||
4687 | Analyze the alignment of the data-references in the loop. | |
4688 | FOR NOW: Until support for misliagned accesses is in place, only if all | |
4689 | accesses are aligned can the loop be vectorized. This restriction will be | |
4690 | relaxed. */ | |
4691 | ||
4692 | static bool | |
4693 | vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) | |
4694 | { | |
0dc0a70b | 4695 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); |
79fe1b3b | 4696 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); |
a023975e | 4697 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
0dc0a70b | 4698 | enum dr_alignment_support supportable_dr_alignment; |
79fe1b3b DN |
4699 | unsigned int i; |
4700 | ||
4701 | if (vect_debug_details (NULL)) | |
4702 | fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n"); | |
4703 | ||
4704 | ||
4705 | /* This pass may take place at function granularity instead of at loop | |
4706 | granularity. */ | |
4707 | ||
0dc0a70b DN |
4708 | if (!vect_compute_data_refs_alignment (loop_vinfo)) |
4709 | { | |
4710 | if (vect_debug_details (loop) || vect_debug_stats (loop)) | |
4711 | fprintf (dump_file, | |
4712 | "not vectorized: can't calculate alignment for data ref."); | |
4713 | return false; | |
4714 | } | |
79fe1b3b DN |
4715 | |
4716 | ||
0dc0a70b DN |
4717 | /* This pass will decide on using loop versioning and/or loop peeling in |
4718 | order to enhance the alignment of data references in the loop. */ | |
79fe1b3b DN |
4719 | |
4720 | vect_enhance_data_refs_alignment (loop_vinfo); | |
4721 | ||
4722 | ||
0dc0a70b DN |
4723 | /* Finally, check that all the data references in the loop can be |
4724 | handled with respect to their alignment. */ | |
79fe1b3b | 4725 | |
0dc0a70b | 4726 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) |
79fe1b3b | 4727 | { |
0dc0a70b DN |
4728 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); |
4729 | supportable_dr_alignment = vect_supportable_dr_alignment (dr); | |
4730 | if (!supportable_dr_alignment) | |
79fe1b3b | 4731 | { |
0dc0a70b DN |
4732 | if (vect_debug_details (loop) || vect_debug_stats (loop)) |
4733 | fprintf (dump_file, "not vectorized: unsupported unaligned load."); | |
4734 | return false; | |
79fe1b3b DN |
4735 | } |
4736 | } | |
0dc0a70b | 4737 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) |
79fe1b3b | 4738 | { |
0dc0a70b DN |
4739 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); |
4740 | supportable_dr_alignment = vect_supportable_dr_alignment (dr); | |
4741 | if (!supportable_dr_alignment) | |
79fe1b3b | 4742 | { |
0dc0a70b DN |
4743 | if (vect_debug_details (loop) || vect_debug_stats (loop)) |
4744 | fprintf (dump_file, "not vectorized: unsupported unaligned store."); | |
79fe1b3b DN |
4745 | return false; |
4746 | } | |
4747 | } | |
4748 | ||
4749 | return true; | |
4750 | } | |
4751 | ||
4752 | ||
4753 | /* Function vect_analyze_data_ref_access. | |
4754 | ||
4755 | Analyze the access pattern of the data-reference DR. For now, a data access | |
4756 | has to consecutive and aligned to be considered vectorizable. */ | |
4757 | ||
4758 | static bool | |
4759 | vect_analyze_data_ref_access (struct data_reference *dr) | |
4760 | { | |
4761 | varray_type access_fns = DR_ACCESS_FNS (dr); | |
4762 | tree access_fn; | |
4763 | tree init, step; | |
6775f1f3 | 4764 | unsigned int dimensions, i; |
79fe1b3b | 4765 | |
6775f1f3 IR |
4766 | /* Check that in case of multidimensional array ref A[i1][i2]..[iN], |
4767 | i1, i2, ..., iN-1 are loop invariant (to make sure that the memory | |
4768 | access is contiguous). */ | |
4769 | dimensions = VARRAY_ACTIVE_SIZE (access_fns); | |
4770 | ||
4771 | for (i = 1; i < dimensions; i++) /* Not including the last dimension. */ | |
79fe1b3b | 4772 | { |
6775f1f3 | 4773 | access_fn = DR_ACCESS_FN (dr, i); |
79fe1b3b | 4774 | |
6775f1f3 IR |
4775 | if (evolution_part_in_loop_num (access_fn, |
4776 | loop_containing_stmt (DR_STMT (dr))->num)) | |
4777 | { | |
a023975e | 4778 | /* Evolution part is not NULL in this loop (it is neither constant |
471854f8 | 4779 | nor invariant). */ |
6775f1f3 IR |
4780 | if (vect_debug_details (NULL)) |
4781 | { | |
4782 | fprintf (dump_file, | |
a023975e | 4783 | "not vectorized: complicated multidim. array access."); |
6775f1f3 IR |
4784 | print_generic_expr (dump_file, access_fn, TDF_SLIM); |
4785 | } | |
4786 | return false; | |
4787 | } | |
4788 | } | |
4789 | ||
4790 | access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */ | |
4791 | if (!evolution_function_is_constant_p (access_fn) | |
4792 | && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num, | |
4793 | access_fn, &init, &step, true)) | |
79fe1b3b DN |
4794 | { |
4795 | if (vect_debug_details (NULL)) | |
4796 | { | |
a023975e | 4797 | fprintf (dump_file, "not vectorized: complicated access function."); |
79fe1b3b DN |
4798 | print_generic_expr (dump_file, access_fn, TDF_SLIM); |
4799 | } | |
4800 | return false; | |
4801 | } | |
6775f1f3 | 4802 | |
79fe1b3b DN |
4803 | return true; |
4804 | } | |
4805 | ||
4806 | ||
4807 | /* Function vect_analyze_data_ref_accesses. | |
4808 | ||
4809 | Analyze the access pattern of all the data references in the loop. | |
4810 | ||
4811 | FORNOW: the only access pattern that is considered vectorizable is a | |
4812 | simple step 1 (consecutive) access. | |
4813 | ||
6775f1f3 | 4814 | FORNOW: handle only arrays and pointer accesses. */ |
79fe1b3b DN |
4815 | |
4816 | static bool | |
4817 | vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo) | |
4818 | { | |
4819 | unsigned int i; | |
4820 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
4821 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
4822 | ||
4823 | if (vect_debug_details (NULL)) | |
4824 | fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n"); | |
4825 | ||
4826 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
4827 | { | |
4828 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
4829 | bool ok = vect_analyze_data_ref_access (dr); | |
4830 | if (!ok) | |
4831 | { | |
4832 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
4833 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
4834 | fprintf (dump_file, "not vectorized: complicated access pattern."); | |
4835 | return false; | |
4836 | } | |
4837 | } | |
4838 | ||
4839 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
4840 | { | |
4841 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
4842 | bool ok = vect_analyze_data_ref_access (dr); | |
4843 | if (!ok) | |
4844 | { | |
4845 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
4846 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
4847 | fprintf (dump_file, "not vectorized: complicated access pattern."); | |
4848 | return false; | |
4849 | } | |
4850 | } | |
4851 | ||
4852 | return true; | |
4853 | } | |
4854 | ||
4855 | ||
4856 | /* Function vect_analyze_pointer_ref_access. | |
4857 | ||
4858 | Input: | |
4859 | STMT - a stmt that contains a data-ref | |
4860 | MEMREF - a data-ref in STMT, which is an INDIRECT_REF. | |
4861 | ||
4862 | If the data-ref access is vectorizable, return a data_reference structure | |
471854f8 | 4863 | that represents it (DR). Otherwise - return NULL. */ |
79fe1b3b DN |
4864 | |
4865 | static struct data_reference * | |
4866 | vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read) | |
4867 | { | |
4868 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
4869 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
4870 | tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0)); | |
4871 | tree init, step; | |
4872 | int step_val; | |
4873 | tree reftype, innertype; | |
4874 | enum machine_mode innermode; | |
4875 | tree indx_access_fn; | |
4876 | int loopnum = loop->num; | |
4877 | struct data_reference *dr; | |
4878 | ||
4879 | if (!access_fn) | |
4880 | { | |
4881 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4882 | fprintf (dump_file, "not vectorized: complicated pointer access."); | |
4883 | return NULL; | |
4884 | } | |
4885 | ||
4886 | if (vect_debug_details (NULL)) | |
4887 | { | |
4888 | fprintf (dump_file, "Access function of ptr: "); | |
4889 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
4890 | } | |
4891 | ||
4892 | if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false)) | |
4893 | { | |
4894 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4895 | fprintf (dump_file, "not vectorized: pointer access is not simple."); | |
4896 | return NULL; | |
4897 | } | |
4898 | ||
6775f1f3 IR |
4899 | STRIP_NOPS (init); |
4900 | ||
4901 | if (!host_integerp (step,0)) | |
79fe1b3b DN |
4902 | { |
4903 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4904 | fprintf (dump_file, | |
6775f1f3 | 4905 | "not vectorized: non constant step for pointer access."); |
79fe1b3b DN |
4906 | return NULL; |
4907 | } | |
4908 | ||
4909 | step_val = TREE_INT_CST_LOW (step); | |
4910 | ||
4911 | reftype = TREE_TYPE (TREE_OPERAND (memref, 0)); | |
4912 | if (TREE_CODE (reftype) != POINTER_TYPE) | |
4913 | { | |
4914 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4915 | fprintf (dump_file, "not vectorized: unexpected pointer access form."); | |
4916 | return NULL; | |
4917 | } | |
4918 | ||
4919 | reftype = TREE_TYPE (init); | |
4920 | if (TREE_CODE (reftype) != POINTER_TYPE) | |
4921 | { | |
4922 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4923 | fprintf (dump_file, "not vectorized: unexpected pointer access form."); | |
4924 | return NULL; | |
4925 | } | |
4926 | ||
4927 | innertype = TREE_TYPE (reftype); | |
4928 | innermode = TYPE_MODE (innertype); | |
4929 | if (GET_MODE_SIZE (innermode) != step_val) | |
4930 | { | |
4931 | /* FORNOW: support only consecutive access */ | |
4932 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
4933 | fprintf (dump_file, "not vectorized: non consecutive access."); | |
4934 | return NULL; | |
4935 | } | |
4936 | ||
4937 | indx_access_fn = | |
4938 | build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node); | |
4939 | if (vect_debug_details (NULL)) | |
4940 | { | |
4941 | fprintf (dump_file, "Access function of ptr indx: "); | |
4942 | print_generic_expr (dump_file, indx_access_fn, TDF_SLIM); | |
4943 | } | |
4944 | dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read); | |
4945 | return dr; | |
4946 | } | |
4947 | ||
4948 | ||
6775f1f3 IR |
4949 | /* Function vect_get_symbl_and_dr. |
4950 | ||
4951 | The function returns SYMBL - the relevant variable for | |
4952 | memory tag (for aliasing purposes). | |
4953 | Also data reference structure DR is created. | |
4954 | ||
6e611d92 IR |
4955 | This function handles three kinds of MEMREF: |
4956 | ||
4957 | It is called from vect_analyze_data_refs with a MEMREF that is either an | |
4958 | ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins"). | |
4959 | It builds a DR for them using vect_get_base_and_bit_offset, and calls itself | |
4960 | recursively to retrieve the relevant memtag for the MEMREF, "peeling" the | |
4961 | MEMREF along the way. During the recursive calls, the function may be called | |
4962 | with a MEMREF for which the recursion has to continue - PLUS_EXPR, | |
4963 | MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"), | |
4964 | and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL | |
4965 | and SSA_NAME (this is category 3 - "recursion stop condition"). | |
4966 | ||
4967 | When the MEMREF falls into category 1 there is still no data reference struct | |
4968 | (DR) available. It is created by this function, and then, along the recursion, | |
4969 | MEMREF will fall into category 2 or 3, in which case a DR will have already | |
4970 | been created, but the analysis continues to retrieve the MEMTAG. | |
4971 | ||
6775f1f3 IR |
4972 | Input: |
4973 | MEMREF - data reference in STMT | |
4974 | IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF | |
4975 | ||
4976 | Output: | |
4977 | DR - data_reference struct for MEMREF | |
4978 | return value - the relevant variable for memory tag (for aliasing purposes). | |
4979 | ||
4980 | */ | |
4981 | ||
4982 | static tree | |
4983 | vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read, | |
4984 | loop_vec_info loop_vinfo, struct data_reference **dr) | |
4985 | { | |
4986 | tree symbl, oprnd0, oprnd1; | |
4987 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
4988 | tree offset; | |
6e611d92 | 4989 | tree tag; |
6775f1f3 IR |
4990 | struct data_reference *new_dr; |
4991 | bool base_aligned_p; | |
4992 | ||
6e611d92 | 4993 | if (*dr) |
6775f1f3 | 4994 | { |
6e611d92 IR |
4995 | /* Category 3: recursion stop condition. */ |
4996 | /* (1) A DR already exists. We only need to get the relevant memtag for | |
4997 | MEMREF, the rest of the data was already initialized. */ | |
6775f1f3 | 4998 | |
6e611d92 IR |
4999 | switch (TREE_CODE (memref)) |
5000 | { | |
5001 | /* (1.1) Stop condition: find the relevant memtag and return. */ | |
6775f1f3 | 5002 | case SSA_NAME: |
6e611d92 IR |
5003 | symbl = SSA_NAME_VAR (memref); |
5004 | tag = get_var_ann (symbl)->type_mem_tag; | |
5005 | if (!tag) | |
6775f1f3 | 5006 | { |
6e611d92 IR |
5007 | tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0); |
5008 | if (TREE_CODE (ptr) == SSA_NAME) | |
5009 | tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag; | |
6775f1f3 | 5010 | } |
6e611d92 IR |
5011 | if (!tag) |
5012 | { | |
5013 | if (vect_debug_details (NULL)) | |
5014 | fprintf (dump_file, "not vectorized: no memtag for ref."); | |
5015 | return NULL_TREE; | |
5016 | } | |
5017 | return tag; | |
6775f1f3 | 5018 | |
6775f1f3 | 5019 | case VAR_DECL: |
6e611d92 IR |
5020 | case PARM_DECL: |
5021 | return memref; | |
6775f1f3 | 5022 | |
6e611d92 IR |
5023 | /* Category 2: recursion continues. */ |
5024 | /* (1.2) A recursive call to find the relevant memtag is required. */ | |
6775f1f3 | 5025 | case INDIRECT_REF: |
6e611d92 IR |
5026 | symbl = TREE_OPERAND (memref, 0); |
5027 | break; /* For recursive call. */ | |
6775f1f3 IR |
5028 | |
5029 | case COMPONENT_REF: | |
5030 | /* Could have recorded more accurate information - | |
5031 | i.e, the actual FIELD_DECL that is being referenced - | |
6e611d92 IR |
5032 | but later passes expect VAR_DECL as the nmt. */ |
5033 | /* Fall through. */ | |
5034 | ||
5035 | case ADDR_EXPR: | |
5036 | symbl = vect_get_base_and_bit_offset ((*dr), memref, NULL_TREE, | |
6775f1f3 | 5037 | loop_vinfo, &offset, &base_aligned_p); |
6e611d92 IR |
5038 | break; /* For recursive call. */ |
5039 | ||
5040 | case PLUS_EXPR: | |
5041 | case MINUS_EXPR: | |
5042 | /* Although DR exists, we have to call the function recursively to | |
5043 | build MEMTAG for such expression. This is handled below. */ | |
5044 | oprnd0 = TREE_OPERAND (memref, 0); | |
5045 | oprnd1 = TREE_OPERAND (memref, 1); | |
5046 | ||
5047 | STRIP_NOPS (oprnd1); | |
5048 | /* Supported plus/minus expressions are of the form | |
5049 | {address_base + offset}, such that address_base is of type | |
5050 | POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER, | |
5051 | or it's not of type POINTER/ARRAY. | |
5052 | TODO: swap operands if {offset + address_base}. */ | |
5053 | if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE | |
5054 | && TREE_CODE (oprnd1) != INTEGER_CST) | |
5055 | || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE) | |
5056 | return NULL_TREE; | |
5057 | ||
5058 | symbl = oprnd0; | |
5059 | break; /* For recursive call. */ | |
5060 | ||
6775f1f3 | 5061 | default: |
6775f1f3 IR |
5062 | return NULL_TREE; |
5063 | } | |
6e611d92 IR |
5064 | } |
5065 | else | |
5066 | { | |
5067 | /* Category 1: recursion begins. */ | |
5068 | /* (2) A DR does not exist yet and must be built, followed by a | |
5069 | recursive call to get the relevant memtag for MEMREF. */ | |
6775f1f3 | 5070 | |
6e611d92 IR |
5071 | switch (TREE_CODE (memref)) |
5072 | { | |
5073 | case INDIRECT_REF: | |
5074 | new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read); | |
5075 | if (!new_dr) | |
5076 | return NULL_TREE; | |
5077 | *dr = new_dr; | |
5078 | symbl = DR_BASE_NAME (new_dr); | |
5079 | STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl; | |
5080 | break; | |
5081 | ||
5082 | case ARRAY_REF: | |
5083 | new_dr = analyze_array (stmt, memref, is_read); | |
5084 | *dr = new_dr; | |
5085 | symbl = DR_BASE_NAME (new_dr); | |
5086 | STMT_VINFO_VECT_DR_BASE (stmt_info) = TREE_OPERAND (memref, 0); | |
5087 | break; | |
5088 | ||
5089 | default: | |
5090 | /* TODO: Support data-refs of form a[i].p for unions and single | |
5091 | field structures. */ | |
5092 | return NULL_TREE; | |
5093 | } | |
6775f1f3 | 5094 | } |
6e611d92 IR |
5095 | |
5096 | if (!symbl) | |
5097 | return NULL_TREE; | |
5098 | /* Recursive call to retrieve the relevant memtag. */ | |
5099 | tag = vect_get_symbl_and_dr (symbl, stmt, is_read, loop_vinfo, dr); | |
5100 | return tag; | |
6775f1f3 IR |
5101 | } |
5102 | ||
5103 | ||
79fe1b3b DN |
5104 | /* Function vect_analyze_data_refs. |
5105 | ||
5106 | Find all the data references in the loop. | |
5107 | ||
6775f1f3 | 5108 | FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs |
79fe1b3b | 5109 | which base is really an array (not a pointer) and which alignment |
471854f8 | 5110 | can be forced. This restriction will be relaxed. */ |
79fe1b3b DN |
5111 | |
5112 | static bool | |
5113 | vect_analyze_data_refs (loop_vec_info loop_vinfo) | |
5114 | { | |
5115 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
5116 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
5117 | int nbbs = loop->num_nodes; | |
5118 | block_stmt_iterator si; | |
5119 | int j; | |
c21accc5 | 5120 | struct data_reference *dr; |
79fe1b3b DN |
5121 | |
5122 | if (vect_debug_details (NULL)) | |
5123 | fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n"); | |
5124 | ||
5125 | for (j = 0; j < nbbs; j++) | |
5126 | { | |
5127 | basic_block bb = bbs[j]; | |
5128 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
5129 | { | |
5130 | bool is_read = false; | |
5131 | tree stmt = bsi_stmt (si); | |
5132 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
5133 | v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); | |
5134 | v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
5135 | vuse_optype vuses = STMT_VUSE_OPS (stmt); | |
5136 | varray_type *datarefs = NULL; | |
5137 | int nvuses, nv_may_defs, nv_must_defs; | |
5138 | tree memref = NULL; | |
79fe1b3b DN |
5139 | tree symbl; |
5140 | ||
5141 | /* Assumption: there exists a data-ref in stmt, if and only if | |
5142 | it has vuses/vdefs. */ | |
5143 | ||
5144 | if (!vuses && !v_may_defs && !v_must_defs) | |
5145 | continue; | |
5146 | ||
5147 | nvuses = NUM_VUSES (vuses); | |
5148 | nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); | |
5149 | nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); | |
5150 | ||
5151 | if (nvuses && (nv_may_defs || nv_must_defs)) | |
5152 | { | |
5153 | if (vect_debug_details (NULL)) | |
5154 | { | |
5155 | fprintf (dump_file, "unexpected vdefs and vuses in stmt: "); | |
5156 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
5157 | } | |
5158 | return false; | |
5159 | } | |
5160 | ||
5161 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
5162 | { | |
5163 | if (vect_debug_details (NULL)) | |
5164 | { | |
5165 | fprintf (dump_file, "unexpected vops in stmt: "); | |
5166 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
5167 | } | |
5168 | return false; | |
5169 | } | |
5170 | ||
5171 | if (vuses) | |
5172 | { | |
5173 | memref = TREE_OPERAND (stmt, 1); | |
5174 | datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo)); | |
5175 | is_read = true; | |
5176 | } | |
5177 | else /* vdefs */ | |
5178 | { | |
5179 | memref = TREE_OPERAND (stmt, 0); | |
5180 | datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); | |
5181 | is_read = false; | |
5182 | } | |
5183 | ||
6775f1f3 IR |
5184 | /* Analyze MEMREF. If it is of a supported form, build data_reference |
5185 | struct for it (DR) and find the relevant symbol for aliasing | |
5186 | purposes. */ | |
6e611d92 | 5187 | dr = NULL; |
a023975e OG |
5188 | symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, |
5189 | &dr); | |
6775f1f3 | 5190 | if (!symbl) |
79fe1b3b DN |
5191 | { |
5192 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5193 | { | |
6775f1f3 | 5194 | fprintf (dump_file, "not vectorized: unhandled data ref: "); |
79fe1b3b DN |
5195 | print_generic_expr (dump_file, stmt, TDF_SLIM); |
5196 | } | |
5197 | return false; | |
5198 | } | |
6e611d92 | 5199 | STMT_VINFO_MEMTAG (stmt_info) = symbl; |
79fe1b3b DN |
5200 | VARRAY_PUSH_GENERIC_PTR (*datarefs, dr); |
5201 | STMT_VINFO_DATA_REF (stmt_info) = dr; | |
5202 | } | |
5203 | } | |
5204 | ||
5205 | return true; | |
5206 | } | |
5207 | ||
5208 | ||
8c27b7d4 | 5209 | /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ |
79fe1b3b DN |
5210 | |
5211 | /* Function vect_mark_relevant. | |
5212 | ||
5213 | Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ | |
5214 | ||
5215 | static void | |
2b0729ba | 5216 | vect_mark_relevant (varray_type *worklist, tree stmt) |
79fe1b3b DN |
5217 | { |
5218 | stmt_vec_info stmt_info; | |
5219 | ||
5220 | if (vect_debug_details (NULL)) | |
5221 | fprintf (dump_file, "mark relevant."); | |
5222 | ||
5223 | if (TREE_CODE (stmt) == PHI_NODE) | |
5224 | { | |
2b0729ba | 5225 | VARRAY_PUSH_TREE (*worklist, stmt); |
79fe1b3b DN |
5226 | return; |
5227 | } | |
5228 | ||
5229 | stmt_info = vinfo_for_stmt (stmt); | |
5230 | ||
5231 | if (!stmt_info) | |
5232 | { | |
5233 | if (vect_debug_details (NULL)) | |
5234 | { | |
5235 | fprintf (dump_file, "mark relevant: no stmt info!!."); | |
5236 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
5237 | } | |
5238 | return; | |
5239 | } | |
5240 | ||
5241 | if (STMT_VINFO_RELEVANT_P (stmt_info)) | |
5242 | { | |
5243 | if (vect_debug_details (NULL)) | |
5244 | fprintf (dump_file, "already marked relevant."); | |
5245 | return; | |
5246 | } | |
5247 | ||
5248 | STMT_VINFO_RELEVANT_P (stmt_info) = 1; | |
2b0729ba | 5249 | VARRAY_PUSH_TREE (*worklist, stmt); |
79fe1b3b DN |
5250 | } |
5251 | ||
5252 | ||
5253 | /* Function vect_stmt_relevant_p. | |
5254 | ||
5255 | Return true if STMT in loop that is represented by LOOP_VINFO is | |
5256 | "relevant for vectorization". | |
5257 | ||
5258 | A stmt is considered "relevant for vectorization" if: | |
5259 | - it has uses outside the loop. | |
5260 | - it has vdefs (it alters memory). | |
5261 | - control stmts in the loop (except for the exit condition). | |
5262 | ||
5263 | CHECKME: what other side effects would the vectorizer allow? */ | |
5264 | ||
5265 | static bool | |
5266 | vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo) | |
5267 | { | |
5268 | v_may_def_optype v_may_defs; | |
5269 | v_must_def_optype v_must_defs; | |
5270 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
5271 | int i; | |
5272 | dataflow_t df; | |
5273 | int num_uses; | |
5274 | ||
5275 | /* cond stmt other than loop exit cond. */ | |
5276 | if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo))) | |
5277 | return true; | |
5278 | ||
5279 | /* changing memory. */ | |
5280 | v_may_defs = STMT_V_MAY_DEF_OPS (stmt); | |
5281 | v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
5282 | if (v_may_defs || v_must_defs) | |
5283 | { | |
5284 | if (vect_debug_details (NULL)) | |
5285 | fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs."); | |
5286 | return true; | |
5287 | } | |
5288 | ||
5289 | /* uses outside the loop. */ | |
5290 | df = get_immediate_uses (stmt); | |
5291 | num_uses = num_immediate_uses (df); | |
5292 | for (i = 0; i < num_uses; i++) | |
5293 | { | |
5294 | tree use = immediate_use (df, i); | |
5295 | basic_block bb = bb_for_stmt (use); | |
5296 | if (!flow_bb_inside_loop_p (loop, bb)) | |
5297 | { | |
5298 | if (vect_debug_details (NULL)) | |
5299 | fprintf (dump_file, "vec_stmt_relevant_p: used out of loop."); | |
5300 | return true; | |
5301 | } | |
5302 | } | |
5303 | ||
5304 | return false; | |
5305 | } | |
5306 | ||
5307 | ||
5308 | /* Function vect_mark_stmts_to_be_vectorized. | |
5309 | ||
5310 | Not all stmts in the loop need to be vectorized. For example: | |
5311 | ||
5312 | for i... | |
5313 | for j... | |
5314 | 1. T0 = i + j | |
5315 | 2. T1 = a[T0] | |
5316 | ||
5317 | 3. j = j + 1 | |
5318 | ||
5319 | Stmt 1 and 3 do not need to be vectorized, because loop control and | |
5320 | addressing of vectorized data-refs are handled differently. | |
5321 | ||
5322 | This pass detects such stmts. */ | |
5323 | ||
5324 | static bool | |
5325 | vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) | |
5326 | { | |
5327 | varray_type worklist; | |
5328 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
5329 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
5330 | unsigned int nbbs = loop->num_nodes; | |
5331 | block_stmt_iterator si; | |
5332 | tree stmt; | |
5333 | stmt_ann_t ann; | |
5334 | unsigned int i; | |
5335 | int j; | |
5336 | use_optype use_ops; | |
5337 | stmt_vec_info stmt_info; | |
5338 | ||
5339 | if (vect_debug_details (NULL)) | |
5340 | fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n"); | |
5341 | ||
5342 | VARRAY_TREE_INIT (worklist, 64, "work list"); | |
5343 | ||
5344 | /* 1. Init worklist. */ | |
5345 | ||
5346 | for (i = 0; i < nbbs; i++) | |
5347 | { | |
5348 | basic_block bb = bbs[i]; | |
5349 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
5350 | { | |
5351 | stmt = bsi_stmt (si); | |
5352 | ||
5353 | if (vect_debug_details (NULL)) | |
5354 | { | |
5355 | fprintf (dump_file, "init: stmt relevant? "); | |
5356 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
5357 | } | |
5358 | ||
5359 | stmt_info = vinfo_for_stmt (stmt); | |
5360 | STMT_VINFO_RELEVANT_P (stmt_info) = 0; | |
5361 | ||
5362 | if (vect_stmt_relevant_p (stmt, loop_vinfo)) | |
2b0729ba | 5363 | vect_mark_relevant (&worklist, stmt); |
79fe1b3b DN |
5364 | } |
5365 | } | |
5366 | ||
5367 | ||
5368 | /* 2. Process_worklist */ | |
5369 | ||
5370 | while (VARRAY_ACTIVE_SIZE (worklist) > 0) | |
5371 | { | |
5372 | stmt = VARRAY_TOP_TREE (worklist); | |
5373 | VARRAY_POP (worklist); | |
5374 | ||
5375 | if (vect_debug_details (NULL)) | |
5376 | { | |
5377 | fprintf (dump_file, "worklist: examine stmt: "); | |
5378 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
5379 | } | |
5380 | ||
5381 | /* Examine the USES in this statement. Mark all the statements which | |
5382 | feed this statement's uses as "relevant", unless the USE is used as | |
5383 | an array index. */ | |
5384 | ||
5385 | if (TREE_CODE (stmt) == PHI_NODE) | |
5386 | { | |
5387 | /* follow the def-use chain inside the loop. */ | |
5388 | for (j = 0; j < PHI_NUM_ARGS (stmt); j++) | |
5389 | { | |
5390 | tree arg = PHI_ARG_DEF (stmt, j); | |
5391 | tree def_stmt = NULL_TREE; | |
5392 | basic_block bb; | |
5393 | if (!vect_is_simple_use (arg, loop, &def_stmt)) | |
5394 | { | |
5395 | if (vect_debug_details (NULL)) | |
5396 | fprintf (dump_file, "worklist: unsupported use."); | |
5397 | varray_clear (worklist); | |
5398 | return false; | |
5399 | } | |
5400 | if (!def_stmt) | |
5401 | continue; | |
5402 | ||
5403 | if (vect_debug_details (NULL)) | |
5404 | { | |
5405 | fprintf (dump_file, "worklist: def_stmt: "); | |
5406 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
5407 | } | |
5408 | ||
5409 | bb = bb_for_stmt (def_stmt); | |
5410 | if (flow_bb_inside_loop_p (loop, bb)) | |
2b0729ba | 5411 | vect_mark_relevant (&worklist, def_stmt); |
79fe1b3b DN |
5412 | } |
5413 | } | |
5414 | ||
5415 | ann = stmt_ann (stmt); | |
5416 | use_ops = USE_OPS (ann); | |
5417 | ||
5418 | for (i = 0; i < NUM_USES (use_ops); i++) | |
5419 | { | |
5420 | tree use = USE_OP (use_ops, i); | |
5421 | ||
5422 | /* We are only interested in uses that need to be vectorized. Uses | |
5423 | that are used for address computation are not considered relevant. | |
5424 | */ | |
5425 | if (exist_non_indexing_operands_for_use_p (use, stmt)) | |
5426 | { | |
5427 | tree def_stmt = NULL_TREE; | |
5428 | basic_block bb; | |
5429 | if (!vect_is_simple_use (use, loop, &def_stmt)) | |
5430 | { | |
5431 | if (vect_debug_details (NULL)) | |
5432 | fprintf (dump_file, "worklist: unsupported use."); | |
5433 | varray_clear (worklist); | |
5434 | return false; | |
5435 | } | |
5436 | ||
5437 | if (!def_stmt) | |
5438 | continue; | |
5439 | ||
5440 | if (vect_debug_details (NULL)) | |
5441 | { | |
5442 | fprintf (dump_file, "worklist: examine use %d: ", i); | |
5443 | print_generic_expr (dump_file, use, TDF_SLIM); | |
5444 | } | |
5445 | ||
5446 | bb = bb_for_stmt (def_stmt); | |
5447 | if (flow_bb_inside_loop_p (loop, bb)) | |
2b0729ba | 5448 | vect_mark_relevant (&worklist, def_stmt); |
79fe1b3b DN |
5449 | } |
5450 | } | |
5451 | } /* while worklist */ | |
5452 | ||
5453 | varray_clear (worklist); | |
5454 | return true; | |
5455 | } | |
5456 | ||
5457 | ||
d6901754 | 5458 | /* Function vect_can_advance_ivs_p |
a023975e OG |
5459 | |
5460 | In case the number of iterations that LOOP iterates in unknown at compile | |
5461 | time, an epilog loop will be generated, and the loop induction variables | |
5462 | (IVs) will be "advanced" to the value they are supposed to take just before | |
d6901754 | 5463 | the epilog loop. Here we check that the access function of the loop IVs |
a023975e OG |
5464 | and the expression that represents the loop bound are simple enough. |
5465 | These restrictions will be relaxed in the future. */ | |
5466 | ||
5467 | static bool | |
d6901754 | 5468 | vect_can_advance_ivs_p (struct loop *loop) |
a023975e OG |
5469 | { |
5470 | basic_block bb = loop->header; | |
5471 | tree phi; | |
5472 | ||
a023975e OG |
5473 | /* Analyze phi functions of the loop header. */ |
5474 | ||
bb29d951 | 5475 | for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) |
a023975e OG |
5476 | { |
5477 | tree access_fn = NULL; | |
5478 | tree evolution_part; | |
5479 | ||
5480 | if (vect_debug_details (NULL)) | |
5481 | { | |
5482 | fprintf (dump_file, "Analyze phi: "); | |
5483 | print_generic_expr (dump_file, phi, TDF_SLIM); | |
5484 | } | |
5485 | ||
5486 | /* Skip virtual phi's. The data dependences that are associated with | |
5487 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |
5488 | ||
5489 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
5490 | { | |
5491 | if (vect_debug_details (NULL)) | |
5492 | fprintf (dump_file, "virtual phi. skip."); | |
5493 | continue; | |
5494 | } | |
5495 | ||
5496 | /* Analyze the evolution function. */ | |
5497 | ||
5498 | access_fn = instantiate_parameters | |
5499 | (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); | |
5500 | ||
5501 | if (!access_fn) | |
5502 | { | |
5503 | if (vect_debug_details (NULL)) | |
5504 | fprintf (dump_file, "No Access function."); | |
5505 | return false; | |
5506 | } | |
5507 | ||
5508 | if (vect_debug_details (NULL)) | |
5509 | { | |
5510 | fprintf (dump_file, "Access function of PHI: "); | |
5511 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
5512 | } | |
5513 | ||
5514 | evolution_part = evolution_part_in_loop_num (access_fn, loop->num); | |
5515 | ||
5516 | if (evolution_part == NULL_TREE) | |
5517 | return false; | |
5518 | ||
5519 | /* FORNOW: We do not transform initial conditions of IVs | |
5520 | which evolution functions are a polynomial of degree >= 2. */ | |
5521 | ||
5522 | if (tree_is_chrec (evolution_part)) | |
5523 | return false; | |
5524 | } | |
5525 | ||
d6901754 | 5526 | return true; |
a023975e OG |
5527 | } |
5528 | ||
5529 | ||
79fe1b3b DN |
5530 | /* Function vect_get_loop_niters. |
5531 | ||
d6901754 DN |
5532 | Determine how many iterations the loop is executed. |
5533 | If an expression that represents the number of iterations | |
5534 | can be constructed, place it in NUMBER_OF_ITERATIONS. | |
5535 | Return the loop exit condition. */ | |
79fe1b3b DN |
5536 | |
5537 | static tree | |
a023975e | 5538 | vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) |
79fe1b3b DN |
5539 | { |
5540 | tree niters; | |
5541 | ||
5542 | if (vect_debug_details (NULL)) | |
5543 | fprintf (dump_file, "\n<<get_loop_niters>>\n"); | |
5544 | ||
5545 | niters = number_of_iterations_in_loop (loop); | |
5546 | ||
5547 | if (niters != NULL_TREE | |
a023975e | 5548 | && niters != chrec_dont_know) |
79fe1b3b | 5549 | { |
a023975e | 5550 | *number_of_iterations = niters; |
79fe1b3b DN |
5551 | |
5552 | if (vect_debug_details (NULL)) | |
a023975e OG |
5553 | { |
5554 | fprintf (dump_file, "==> get_loop_niters:" ); | |
5555 | print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM); | |
5556 | } | |
79fe1b3b DN |
5557 | } |
5558 | ||
5559 | return get_loop_exit_condition (loop); | |
5560 | } | |
5561 | ||
5562 | ||
5563 | /* Function vect_analyze_loop_form. | |
5564 | ||
5565 | Verify the following restrictions (some may be relaxed in the future): | |
5566 | - it's an inner-most loop | |
5567 | - number of BBs = 2 (which are the loop header and the latch) | |
5568 | - the loop has a pre-header | |
5569 | - the loop has a single entry and exit | |
5570 | - the loop exit condition is simple enough, and the number of iterations | |
5571 | can be analyzed (a countable loop). */ | |
5572 | ||
5573 | static loop_vec_info | |
5574 | vect_analyze_loop_form (struct loop *loop) | |
5575 | { | |
5576 | loop_vec_info loop_vinfo; | |
5577 | tree loop_cond; | |
a023975e | 5578 | tree number_of_iterations = NULL; |
d6901754 | 5579 | bool rescan = false; |
79fe1b3b DN |
5580 | |
5581 | if (vect_debug_details (loop)) | |
5582 | fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n"); | |
5583 | ||
82b85a85 ZD |
5584 | if (loop->inner |
5585 | || !loop->single_exit | |
d6901754 DN |
5586 | || loop->num_nodes != 2 |
5587 | || EDGE_COUNT (loop->header->preds) != 2 | |
5588 | || loop->num_entries != 1) | |
79fe1b3b DN |
5589 | { |
5590 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5591 | { | |
5592 | fprintf (dump_file, "not vectorized: bad loop form. "); | |
82b85a85 | 5593 | if (loop->inner) |
79fe1b3b | 5594 | fprintf (dump_file, "nested loop."); |
82b85a85 ZD |
5595 | else if (!loop->single_exit) |
5596 | fprintf (dump_file, "multiple exits."); | |
5597 | else if (loop->num_nodes != 2) | |
79fe1b3b | 5598 | fprintf (dump_file, "too many BBs in loop."); |
d6901754 DN |
5599 | else if (EDGE_COUNT (loop->header->preds) != 2) |
5600 | fprintf (dump_file, "too many incoming edges."); | |
5601 | else if (loop->num_entries != 1) | |
5602 | fprintf (dump_file, "too many entries."); | |
79fe1b3b DN |
5603 | } |
5604 | ||
5605 | return NULL; | |
5606 | } | |
5607 | ||
5608 | /* We assume that the loop exit condition is at the end of the loop. i.e, | |
5609 | that the loop is represented as a do-while (with a proper if-guard | |
5610 | before the loop if needed), where the loop header contains all the | |
5611 | executable statements, and the latch is empty. */ | |
5612 | if (!empty_block_p (loop->latch)) | |
5613 | { | |
5614 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5615 | fprintf (dump_file, "not vectorized: unexpectd loop form."); | |
5616 | return NULL; | |
5617 | } | |
5618 | ||
d6901754 DN |
5619 | /* Make sure we have a preheader basic block. */ |
5620 | if (!loop->pre_header) | |
5621 | { | |
5622 | rescan = true; | |
5623 | loop_split_edge_with (loop_preheader_edge (loop), NULL); | |
5624 | } | |
5625 | ||
5626 | /* Make sure there exists a single-predecessor exit bb: */ | |
5627 | if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1) | |
5628 | { | |
5629 | rescan = true; | |
5630 | loop_split_edge_with (loop->exit_edges[0], NULL); | |
5631 | } | |
5632 | ||
5633 | if (rescan) | |
5634 | { | |
5635 | flow_loop_scan (loop, LOOP_ALL); | |
5636 | /* Flow loop scan does not update loop->single_exit field. */ | |
5637 | loop->single_exit = loop->exit_edges[0]; | |
5638 | } | |
5639 | ||
79fe1b3b DN |
5640 | if (empty_block_p (loop->header)) |
5641 | { | |
5642 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5643 | fprintf (dump_file, "not vectorized: empty loop."); | |
5644 | return NULL; | |
5645 | } | |
5646 | ||
5647 | loop_cond = vect_get_loop_niters (loop, &number_of_iterations); | |
5648 | if (!loop_cond) | |
5649 | { | |
5650 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5651 | fprintf (dump_file, "not vectorized: complicated exit condition."); | |
5652 | return NULL; | |
5653 | } | |
a023975e OG |
5654 | |
5655 | if (!number_of_iterations) | |
79fe1b3b DN |
5656 | { |
5657 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
a023975e OG |
5658 | fprintf (dump_file, |
5659 | "not vectorized: number of iterations cannot be computed."); | |
79fe1b3b DN |
5660 | return NULL; |
5661 | } | |
5662 | ||
d6901754 DN |
5663 | if (chrec_contains_undetermined (number_of_iterations)) |
5664 | { | |
5665 | if (vect_debug_details (NULL)) | |
5666 | fprintf (dump_file, "Infinite number of iterations."); | |
5667 | return false; | |
5668 | } | |
5669 | ||
a023975e OG |
5670 | loop_vinfo = new_loop_vec_info (loop); |
5671 | LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; | |
d6901754 | 5672 | |
a023975e OG |
5673 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) |
5674 | { | |
d6901754 DN |
5675 | if (vect_debug_details (loop)) |
5676 | { | |
5677 | fprintf (dump_file, "loop bound unknown.\n"); | |
5678 | fprintf (dump_file, "Symbolic number of iterations is "); | |
5679 | print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS); | |
5680 | } | |
a023975e OG |
5681 | } |
5682 | else | |
5683 | if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0) | |
79fe1b3b DN |
5684 | { |
5685 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
5686 | fprintf (dump_file, "not vectorized: number of iterations = 0."); | |
5687 | return NULL; | |
5688 | } | |
5689 | ||
79fe1b3b | 5690 | LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; |
79fe1b3b DN |
5691 | |
5692 | return loop_vinfo; | |
5693 | } | |
5694 | ||
5695 | ||
5696 | /* Function vect_analyze_loop. | |
5697 | ||
5698 | Apply a set of analyses on LOOP, and create a loop_vec_info struct | |
5699 | for it. The different analyses will record information in the | |
5700 | loop_vec_info struct. */ | |
5701 | ||
5702 | static loop_vec_info | |
5703 | vect_analyze_loop (struct loop *loop) | |
5704 | { | |
5705 | bool ok; | |
5706 | loop_vec_info loop_vinfo; | |
5707 | ||
5708 | if (vect_debug_details (NULL)) | |
5709 | fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n"); | |
5710 | ||
5711 | /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */ | |
5712 | ||
5713 | loop_vinfo = vect_analyze_loop_form (loop); | |
5714 | if (!loop_vinfo) | |
5715 | { | |
5716 | if (vect_debug_details (loop)) | |
5717 | fprintf (dump_file, "bad loop form."); | |
5718 | return NULL; | |
5719 | } | |
5720 | ||
5721 | /* Find all data references in the loop (which correspond to vdefs/vuses) | |
5722 | and analyze their evolution in the loop. | |
5723 | ||
6775f1f3 | 5724 | FORNOW: Handle only simple, array references, which |
79fe1b3b DN |
5725 | alignment can be forced, and aligned pointer-references. */ |
5726 | ||
5727 | ok = vect_analyze_data_refs (loop_vinfo); | |
5728 | if (!ok) | |
5729 | { | |
5730 | if (vect_debug_details (loop)) | |
5731 | fprintf (dump_file, "bad data references."); | |
5732 | destroy_loop_vec_info (loop_vinfo); | |
5733 | return NULL; | |
5734 | } | |
5735 | ||
79fe1b3b DN |
5736 | /* Data-flow analysis to detect stmts that do not need to be vectorized. */ |
5737 | ||
5738 | ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); | |
5739 | if (!ok) | |
5740 | { | |
5741 | if (vect_debug_details (loop)) | |
5742 | fprintf (dump_file, "unexpected pattern."); | |
5743 | if (vect_debug_details (loop)) | |
5744 | fprintf (dump_file, "not vectorized: unexpected pattern."); | |
5745 | destroy_loop_vec_info (loop_vinfo); | |
5746 | return NULL; | |
5747 | } | |
5748 | ||
79fe1b3b DN |
5749 | /* Check that all cross-iteration scalar data-flow cycles are OK. |
5750 | Cross-iteration cycles caused by virtual phis are analyzed separately. */ | |
5751 | ||
5752 | ok = vect_analyze_scalar_cycles (loop_vinfo); | |
5753 | if (!ok) | |
5754 | { | |
5755 | if (vect_debug_details (loop)) | |
5756 | fprintf (dump_file, "bad scalar cycle."); | |
5757 | destroy_loop_vec_info (loop_vinfo); | |
5758 | return NULL; | |
5759 | } | |
5760 | ||
79fe1b3b DN |
5761 | /* Analyze data dependences between the data-refs in the loop. |
5762 | FORNOW: fail at the first data dependence that we encounter. */ | |
5763 | ||
5764 | ok = vect_analyze_data_ref_dependences (loop_vinfo); | |
5765 | if (!ok) | |
5766 | { | |
5767 | if (vect_debug_details (loop)) | |
5768 | fprintf (dump_file, "bad data dependence."); | |
5769 | destroy_loop_vec_info (loop_vinfo); | |
5770 | return NULL; | |
5771 | } | |
5772 | ||
79fe1b3b DN |
5773 | /* Analyze the access patterns of the data-refs in the loop (consecutive, |
5774 | complex, etc.). FORNOW: Only handle consecutive access pattern. */ | |
5775 | ||
5776 | ok = vect_analyze_data_ref_accesses (loop_vinfo); | |
5777 | if (!ok) | |
5778 | { | |
5779 | if (vect_debug_details (loop)) | |
5780 | fprintf (dump_file, "bad data access."); | |
5781 | destroy_loop_vec_info (loop_vinfo); | |
5782 | return NULL; | |
5783 | } | |
5784 | ||
79fe1b3b DN |
5785 | /* Analyze the alignment of the data-refs in the loop. |
5786 | FORNOW: Only aligned accesses are handled. */ | |
5787 | ||
5788 | ok = vect_analyze_data_refs_alignment (loop_vinfo); | |
5789 | if (!ok) | |
5790 | { | |
5791 | if (vect_debug_details (loop)) | |
5792 | fprintf (dump_file, "bad data alignment."); | |
5793 | destroy_loop_vec_info (loop_vinfo); | |
5794 | return NULL; | |
5795 | } | |
5796 | ||
79fe1b3b DN |
5797 | /* Scan all the operations in the loop and make sure they are |
5798 | vectorizable. */ | |
5799 | ||
5800 | ok = vect_analyze_operations (loop_vinfo); | |
5801 | if (!ok) | |
5802 | { | |
5803 | if (vect_debug_details (loop)) | |
5804 | fprintf (dump_file, "bad operation or unsupported loop bound."); | |
5805 | destroy_loop_vec_info (loop_vinfo); | |
5806 | return NULL; | |
5807 | } | |
5808 | ||
5809 | LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; | |
5810 | ||
5811 | return loop_vinfo; | |
5812 | } | |
5813 | ||
5814 | ||
5815 | /* Function need_imm_uses_for. | |
5816 | ||
5817 | Return whether we ought to include information for 'var' | |
5818 | when calculating immediate uses. For this pass we only want use | |
5819 | information for non-virtual variables. */ | |
5820 | ||
5821 | static bool | |
5822 | need_imm_uses_for (tree var) | |
5823 | { | |
5824 | return is_gimple_reg (var); | |
5825 | } | |
5826 | ||
5827 | ||
5828 | /* Function vectorize_loops. | |
5829 | ||
5830 | Entry Point to loop vectorization phase. */ | |
5831 | ||
5832 | void | |
5833 | vectorize_loops (struct loops *loops) | |
5834 | { | |
5835 | unsigned int i, loops_num; | |
5836 | unsigned int num_vectorized_loops = 0; | |
5837 | ||
5838 | /* Does the target support SIMD? */ | |
5839 | /* FORNOW: until more sophisticated machine modelling is in place. */ | |
5840 | if (!UNITS_PER_SIMD_WORD) | |
5841 | { | |
5842 | if (vect_debug_details (NULL)) | |
5843 | fprintf (dump_file, "vectorizer: target vector size is not defined."); | |
5844 | return; | |
5845 | } | |
5846 | ||
d6901754 DN |
5847 | #ifdef ENABLE_CHECKING |
5848 | verify_loop_closed_ssa (); | |
5849 | #endif | |
5850 | ||
79fe1b3b DN |
5851 | compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for); |
5852 | ||
5853 | /* ----------- Analyze loops. ----------- */ | |
5854 | ||
5855 | /* If some loop was duplicated, it gets bigger number | |
5856 | than all previously defined loops. This fact allows us to run | |
5857 | only over initial loops skipping newly generated ones. */ | |
5858 | loops_num = loops->num; | |
5859 | for (i = 1; i < loops_num; i++) | |
5860 | { | |
5861 | loop_vec_info loop_vinfo; | |
5862 | struct loop *loop = loops->parray[i]; | |
5863 | ||
5864 | if (!loop) | |
5865 | continue; | |
5866 | ||
79fe1b3b DN |
5867 | loop_vinfo = vect_analyze_loop (loop); |
5868 | loop->aux = loop_vinfo; | |
5869 | ||
5870 | if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) | |
5871 | continue; | |
5872 | ||
5873 | vect_transform_loop (loop_vinfo, loops); | |
5874 | num_vectorized_loops++; | |
5875 | } | |
5876 | ||
5877 | if (vect_debug_stats (NULL) || vect_debug_details (NULL)) | |
5878 | fprintf (dump_file, "\nvectorized %u loops in function.\n", | |
5879 | num_vectorized_loops); | |
5880 | ||
5881 | /* ----------- Finalize. ----------- */ | |
5882 | ||
5883 | free_df (); | |
5884 | for (i = 1; i < loops_num; i++) | |
5885 | { | |
5886 | struct loop *loop = loops->parray[i]; | |
6775f1f3 IR |
5887 | loop_vec_info loop_vinfo; |
5888 | ||
79fe1b3b | 5889 | if (!loop) |
6775f1f3 IR |
5890 | continue; |
5891 | loop_vinfo = loop->aux; | |
79fe1b3b DN |
5892 | destroy_loop_vec_info (loop_vinfo); |
5893 | loop->aux = NULL; | |
5894 | } | |
5895 | ||
79fe1b3b | 5896 | rewrite_into_ssa (false); |
d6901754 | 5897 | rewrite_into_loop_closed_ssa (); /* FORNOW */ |
79fe1b3b DN |
5898 | bitmap_clear (vars_to_rename); |
5899 | } |