]>
Commit | Line | Data |
---|---|---|
1 | /* Vectorizer | |
2 | Copyright (C) 2003-2024 Free Software Foundation, Inc. | |
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 3, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #ifndef GCC_TREE_VECTORIZER_H | |
22 | #define GCC_TREE_VECTORIZER_H | |
23 | ||
24 | typedef class _stmt_vec_info *stmt_vec_info; | |
25 | typedef struct _slp_tree *slp_tree; | |
26 | ||
27 | #include "tree-data-ref.h" | |
28 | #include "tree-hash-traits.h" | |
29 | #include "target.h" | |
30 | #include "internal-fn.h" | |
31 | #include "tree-ssa-operands.h" | |
32 | #include "gimple-match.h" | |
33 | ||
34 | /* Used for naming of new temporaries. */ | |
35 | enum vect_var_kind { | |
36 | vect_simple_var, | |
37 | vect_pointer_var, | |
38 | vect_scalar_var, | |
39 | vect_mask_var | |
40 | }; | |
41 | ||
42 | /* Defines type of operation. */ | |
43 | enum operation_type { | |
44 | unary_op = 1, | |
45 | binary_op, | |
46 | ternary_op | |
47 | }; | |
48 | ||
49 | /* Define type of available alignment support. */ | |
50 | enum dr_alignment_support { | |
51 | dr_unaligned_unsupported, | |
52 | dr_unaligned_supported, | |
53 | dr_explicit_realign, | |
54 | dr_explicit_realign_optimized, | |
55 | dr_aligned | |
56 | }; | |
57 | ||
58 | /* Define type of def-use cross-iteration cycle. */ | |
59 | enum vect_def_type { | |
60 | vect_uninitialized_def = 0, | |
61 | vect_constant_def = 1, | |
62 | vect_external_def, | |
63 | vect_internal_def, | |
64 | vect_induction_def, | |
65 | vect_reduction_def, | |
66 | vect_double_reduction_def, | |
67 | vect_nested_cycle, | |
68 | vect_first_order_recurrence, | |
69 | vect_condition_def, | |
70 | vect_unknown_def_type | |
71 | }; | |
72 | ||
73 | /* Define operation type of linear/non-linear induction variable. */ | |
74 | enum vect_induction_op_type { | |
75 | vect_step_op_add = 0, | |
76 | vect_step_op_neg, | |
77 | vect_step_op_mul, | |
78 | vect_step_op_shl, | |
79 | vect_step_op_shr | |
80 | }; | |
81 | ||
82 | /* Define type of reduction. */ | |
83 | enum vect_reduction_type { | |
84 | TREE_CODE_REDUCTION, | |
85 | COND_REDUCTION, | |
86 | INTEGER_INDUC_COND_REDUCTION, | |
87 | CONST_COND_REDUCTION, | |
88 | ||
89 | /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop | |
90 | to implement: | |
91 | ||
92 | for (int i = 0; i < VF; ++i) | |
93 | res = cond[i] ? val[i] : res; */ | |
94 | EXTRACT_LAST_REDUCTION, | |
95 | ||
96 | /* Use a folding reduction within the loop to implement: | |
97 | ||
98 | for (int i = 0; i < VF; ++i) | |
99 | res = res OP val[i]; | |
100 | ||
101 | (with no reassocation). */ | |
102 | FOLD_LEFT_REDUCTION | |
103 | }; | |
104 | ||
105 | #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ | |
106 | || ((D) == vect_double_reduction_def) \ | |
107 | || ((D) == vect_nested_cycle)) | |
108 | ||
109 | /* Structure to encapsulate information about a group of like | |
110 | instructions to be presented to the target cost model. */ | |
111 | struct stmt_info_for_cost { | |
112 | int count; | |
113 | enum vect_cost_for_stmt kind; | |
114 | enum vect_cost_model_location where; | |
115 | stmt_vec_info stmt_info; | |
116 | slp_tree node; | |
117 | tree vectype; | |
118 | int misalign; | |
119 | }; | |
120 | ||
121 | typedef vec<stmt_info_for_cost> stmt_vector_for_cost; | |
122 | ||
123 | /* Maps base addresses to an innermost_loop_behavior and the stmt it was | |
124 | derived from that gives the maximum known alignment for that base. */ | |
125 | typedef hash_map<tree_operand_hash, | |
126 | std::pair<stmt_vec_info, innermost_loop_behavior *> > | |
127 | vec_base_alignments; | |
128 | ||
129 | /* Represents elements [START, START + LENGTH) of cyclical array OPS* | |
130 | (i.e. OPS repeated to give at least START + LENGTH elements) */ | |
131 | struct vect_scalar_ops_slice | |
132 | { | |
133 | tree op (unsigned int i) const; | |
134 | bool all_same_p () const; | |
135 | ||
136 | vec<tree> *ops; | |
137 | unsigned int start; | |
138 | unsigned int length; | |
139 | }; | |
140 | ||
141 | /* Return element I of the slice. */ | |
142 | inline tree | |
143 | vect_scalar_ops_slice::op (unsigned int i) const | |
144 | { | |
145 | return (*ops)[(i + start) % ops->length ()]; | |
146 | } | |
147 | ||
148 | /* Hash traits for vect_scalar_ops_slice. */ | |
149 | struct vect_scalar_ops_slice_hash : typed_noop_remove<vect_scalar_ops_slice> | |
150 | { | |
151 | typedef vect_scalar_ops_slice value_type; | |
152 | typedef vect_scalar_ops_slice compare_type; | |
153 | ||
154 | static const bool empty_zero_p = true; | |
155 | ||
156 | static void mark_deleted (value_type &s) { s.length = ~0U; } | |
157 | static void mark_empty (value_type &s) { s.length = 0; } | |
158 | static bool is_deleted (const value_type &s) { return s.length == ~0U; } | |
159 | static bool is_empty (const value_type &s) { return s.length == 0; } | |
160 | static hashval_t hash (const value_type &); | |
161 | static bool equal (const value_type &, const compare_type &); | |
162 | }; | |
163 | ||
164 | /************************************************************************ | |
165 | SLP | |
166 | ************************************************************************/ | |
167 | typedef vec<std::pair<unsigned, unsigned> > lane_permutation_t; | |
168 | typedef auto_vec<std::pair<unsigned, unsigned>, 16> auto_lane_permutation_t; | |
169 | typedef vec<unsigned> load_permutation_t; | |
170 | typedef auto_vec<unsigned, 16> auto_load_permutation_t; | |
171 | ||
172 | /* A computation tree of an SLP instance. Each node corresponds to a group of | |
173 | stmts to be packed in a SIMD stmt. */ | |
174 | struct _slp_tree { | |
175 | _slp_tree (); | |
176 | ~_slp_tree (); | |
177 | ||
178 | void push_vec_def (gimple *def); | |
179 | void push_vec_def (tree def) { vec_defs.quick_push (def); } | |
180 | ||
181 | /* Nodes that contain def-stmts of this node statements operands. */ | |
182 | vec<slp_tree> children; | |
183 | ||
184 | /* A group of scalar stmts to be vectorized together. */ | |
185 | vec<stmt_vec_info> stmts; | |
186 | /* A group of scalar operands to be vectorized together. */ | |
187 | vec<tree> ops; | |
188 | /* The representative that should be used for analysis and | |
189 | code generation. */ | |
190 | stmt_vec_info representative; | |
191 | ||
192 | /* Load permutation relative to the stores, NULL if there is no | |
193 | permutation. */ | |
194 | load_permutation_t load_permutation; | |
195 | /* Lane permutation of the operands scalar lanes encoded as pairs | |
196 | of { operand number, lane number }. The number of elements | |
197 | denotes the number of output lanes. */ | |
198 | lane_permutation_t lane_permutation; | |
199 | ||
200 | /* Selected SIMD clone's function info. First vector element | |
201 | is SIMD clone's function decl, followed by a pair of trees (base + step) | |
202 | for linear arguments (pair of NULLs for other arguments). */ | |
203 | vec<tree> simd_clone_info; | |
204 | ||
205 | tree vectype; | |
206 | /* Vectorized defs. */ | |
207 | vec<tree> vec_defs; | |
208 | /* Number of vector stmts that are created to replace the group of scalar | |
209 | stmts. It is calculated during the transformation phase as the number of | |
210 | scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF | |
211 | divided by vector size. */ | |
212 | unsigned int vec_stmts_size; | |
213 | ||
214 | /* Reference count in the SLP graph. */ | |
215 | unsigned int refcnt; | |
216 | /* The maximum number of vector elements for the subtree rooted | |
217 | at this node. */ | |
218 | poly_uint64 max_nunits; | |
219 | /* The DEF type of this node. */ | |
220 | enum vect_def_type def_type; | |
221 | /* The number of scalar lanes produced by this node. */ | |
222 | unsigned int lanes; | |
223 | /* The operation of this node. */ | |
224 | enum tree_code code; | |
225 | ||
226 | int vertex; | |
227 | ||
228 | /* If not NULL this is a cached failed SLP discovery attempt with | |
229 | the lanes that failed during SLP discovery as 'false'. This is | |
230 | a copy of the matches array. */ | |
231 | bool *failed; | |
232 | ||
233 | /* Allocate from slp_tree_pool. */ | |
234 | static void *operator new (size_t); | |
235 | ||
236 | /* Return memory to slp_tree_pool. */ | |
237 | static void operator delete (void *, size_t); | |
238 | ||
239 | /* Linked list of nodes to release when we free the slp_tree_pool. */ | |
240 | slp_tree next_node; | |
241 | slp_tree prev_node; | |
242 | }; | |
243 | ||
244 | /* The enum describes the type of operations that an SLP instance | |
245 | can perform. */ | |
246 | ||
247 | enum slp_instance_kind { | |
248 | slp_inst_kind_store, | |
249 | slp_inst_kind_reduc_group, | |
250 | slp_inst_kind_reduc_chain, | |
251 | slp_inst_kind_bb_reduc, | |
252 | slp_inst_kind_ctor | |
253 | }; | |
254 | ||
255 | /* SLP instance is a sequence of stmts in a loop that can be packed into | |
256 | SIMD stmts. */ | |
257 | typedef class _slp_instance { | |
258 | public: | |
259 | /* The root of SLP tree. */ | |
260 | slp_tree root; | |
261 | ||
262 | /* For vector constructors, the constructor stmt that the SLP tree is built | |
263 | from, NULL otherwise. */ | |
264 | vec<stmt_vec_info> root_stmts; | |
265 | ||
266 | /* For slp_inst_kind_bb_reduc the defs that were not vectorized, NULL | |
267 | otherwise. */ | |
268 | vec<tree> remain_defs; | |
269 | ||
270 | /* The unrolling factor required to vectorized this SLP instance. */ | |
271 | poly_uint64 unrolling_factor; | |
272 | ||
273 | /* The group of nodes that contain loads of this SLP instance. */ | |
274 | vec<slp_tree> loads; | |
275 | ||
276 | /* The SLP node containing the reduction PHIs. */ | |
277 | slp_tree reduc_phis; | |
278 | ||
279 | /* Vector cost of this entry to the SLP graph. */ | |
280 | stmt_vector_for_cost cost_vec; | |
281 | ||
282 | /* If this instance is the main entry of a subgraph the set of | |
283 | entries into the same subgraph, including itself. */ | |
284 | vec<_slp_instance *> subgraph_entries; | |
285 | ||
286 | /* The type of operation the SLP instance is performing. */ | |
287 | slp_instance_kind kind; | |
288 | ||
289 | dump_user_location_t location () const; | |
290 | } *slp_instance; | |
291 | ||
292 | ||
293 | /* Access Functions. */ | |
294 | #define SLP_INSTANCE_TREE(S) (S)->root | |
295 | #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor | |
296 | #define SLP_INSTANCE_LOADS(S) (S)->loads | |
297 | #define SLP_INSTANCE_ROOT_STMTS(S) (S)->root_stmts | |
298 | #define SLP_INSTANCE_REMAIN_DEFS(S) (S)->remain_defs | |
299 | #define SLP_INSTANCE_KIND(S) (S)->kind | |
300 | ||
301 | #define SLP_TREE_CHILDREN(S) (S)->children | |
302 | #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts | |
303 | #define SLP_TREE_SCALAR_OPS(S) (S)->ops | |
304 | #define SLP_TREE_REF_COUNT(S) (S)->refcnt | |
305 | #define SLP_TREE_VEC_DEFS(S) (S)->vec_defs | |
306 | #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size | |
307 | #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation | |
308 | #define SLP_TREE_LANE_PERMUTATION(S) (S)->lane_permutation | |
309 | #define SLP_TREE_SIMD_CLONE_INFO(S) (S)->simd_clone_info | |
310 | #define SLP_TREE_DEF_TYPE(S) (S)->def_type | |
311 | #define SLP_TREE_VECTYPE(S) (S)->vectype | |
312 | #define SLP_TREE_REPRESENTATIVE(S) (S)->representative | |
313 | #define SLP_TREE_LANES(S) (S)->lanes | |
314 | #define SLP_TREE_CODE(S) (S)->code | |
315 | ||
316 | enum vect_partial_vector_style { | |
317 | vect_partial_vectors_none, | |
318 | vect_partial_vectors_while_ult, | |
319 | vect_partial_vectors_avx512, | |
320 | vect_partial_vectors_len | |
321 | }; | |
322 | ||
323 | /* Key for map that records association between | |
324 | scalar conditions and corresponding loop mask, and | |
325 | is populated by vect_record_loop_mask. */ | |
326 | ||
327 | struct scalar_cond_masked_key | |
328 | { | |
329 | scalar_cond_masked_key (tree t, unsigned ncopies_) | |
330 | : ncopies (ncopies_) | |
331 | { | |
332 | get_cond_ops_from_tree (t); | |
333 | } | |
334 | ||
335 | void get_cond_ops_from_tree (tree); | |
336 | ||
337 | unsigned ncopies; | |
338 | bool inverted_p; | |
339 | tree_code code; | |
340 | tree op0; | |
341 | tree op1; | |
342 | }; | |
343 | ||
344 | template<> | |
345 | struct default_hash_traits<scalar_cond_masked_key> | |
346 | { | |
347 | typedef scalar_cond_masked_key compare_type; | |
348 | typedef scalar_cond_masked_key value_type; | |
349 | ||
350 | static inline hashval_t | |
351 | hash (value_type v) | |
352 | { | |
353 | inchash::hash h; | |
354 | h.add_int (v.code); | |
355 | inchash::add_expr (v.op0, h, 0); | |
356 | inchash::add_expr (v.op1, h, 0); | |
357 | h.add_int (v.ncopies); | |
358 | h.add_flag (v.inverted_p); | |
359 | return h.end (); | |
360 | } | |
361 | ||
362 | static inline bool | |
363 | equal (value_type existing, value_type candidate) | |
364 | { | |
365 | return (existing.ncopies == candidate.ncopies | |
366 | && existing.code == candidate.code | |
367 | && existing.inverted_p == candidate.inverted_p | |
368 | && operand_equal_p (existing.op0, candidate.op0, 0) | |
369 | && operand_equal_p (existing.op1, candidate.op1, 0)); | |
370 | } | |
371 | ||
372 | static const bool empty_zero_p = true; | |
373 | ||
374 | static inline void | |
375 | mark_empty (value_type &v) | |
376 | { | |
377 | v.ncopies = 0; | |
378 | v.inverted_p = false; | |
379 | } | |
380 | ||
381 | static inline bool | |
382 | is_empty (value_type v) | |
383 | { | |
384 | return v.ncopies == 0; | |
385 | } | |
386 | ||
387 | static inline void mark_deleted (value_type &) {} | |
388 | ||
389 | static inline bool is_deleted (const value_type &) | |
390 | { | |
391 | return false; | |
392 | } | |
393 | ||
394 | static inline void remove (value_type &) {} | |
395 | }; | |
396 | ||
397 | typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type; | |
398 | ||
399 | /* Key and map that records association between vector conditions and | |
400 | corresponding loop mask, and is populated by prepare_vec_mask. */ | |
401 | ||
402 | typedef pair_hash<tree_operand_hash, tree_operand_hash> tree_cond_mask_hash; | |
403 | typedef hash_set<tree_cond_mask_hash> vec_cond_masked_set_type; | |
404 | ||
405 | /* Describes two objects whose addresses must be unequal for the vectorized | |
406 | loop to be valid. */ | |
407 | typedef std::pair<tree, tree> vec_object_pair; | |
408 | ||
409 | /* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE. | |
410 | UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */ | |
411 | class vec_lower_bound { | |
412 | public: | |
413 | vec_lower_bound () {} | |
414 | vec_lower_bound (tree e, bool u, poly_uint64 m) | |
415 | : expr (e), unsigned_p (u), min_value (m) {} | |
416 | ||
417 | tree expr; | |
418 | bool unsigned_p; | |
419 | poly_uint64 min_value; | |
420 | }; | |
421 | ||
422 | /* Vectorizer state shared between different analyses like vector sizes | |
423 | of the same CFG region. */ | |
424 | class vec_info_shared { | |
425 | public: | |
426 | vec_info_shared(); | |
427 | ~vec_info_shared(); | |
428 | ||
429 | void save_datarefs(); | |
430 | void check_datarefs(); | |
431 | ||
432 | /* The number of scalar stmts. */ | |
433 | unsigned n_stmts; | |
434 | ||
435 | /* All data references. Freed by free_data_refs, so not an auto_vec. */ | |
436 | vec<data_reference_p> datarefs; | |
437 | vec<data_reference> datarefs_copy; | |
438 | ||
439 | /* The loop nest in which the data dependences are computed. */ | |
440 | auto_vec<loop_p> loop_nest; | |
441 | ||
442 | /* All data dependences. Freed by free_dependence_relations, so not | |
443 | an auto_vec. */ | |
444 | vec<ddr_p> ddrs; | |
445 | }; | |
446 | ||
447 | /* Vectorizer state common between loop and basic-block vectorization. */ | |
448 | class vec_info { | |
449 | public: | |
450 | typedef hash_set<int_hash<machine_mode, E_VOIDmode, E_BLKmode> > mode_set; | |
451 | enum vec_kind { bb, loop }; | |
452 | ||
453 | vec_info (vec_kind, vec_info_shared *); | |
454 | ~vec_info (); | |
455 | ||
456 | stmt_vec_info add_stmt (gimple *); | |
457 | stmt_vec_info add_pattern_stmt (gimple *, stmt_vec_info); | |
458 | stmt_vec_info lookup_stmt (gimple *); | |
459 | stmt_vec_info lookup_def (tree); | |
460 | stmt_vec_info lookup_single_use (tree); | |
461 | class dr_vec_info *lookup_dr (data_reference *); | |
462 | void move_dr (stmt_vec_info, stmt_vec_info); | |
463 | void remove_stmt (stmt_vec_info); | |
464 | void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *); | |
465 | void insert_on_entry (stmt_vec_info, gimple *); | |
466 | void insert_seq_on_entry (stmt_vec_info, gimple_seq); | |
467 | ||
468 | /* The type of vectorization. */ | |
469 | vec_kind kind; | |
470 | ||
471 | /* Shared vectorizer state. */ | |
472 | vec_info_shared *shared; | |
473 | ||
474 | /* The mapping of GIMPLE UID to stmt_vec_info. */ | |
475 | vec<stmt_vec_info> stmt_vec_infos; | |
476 | /* Whether the above mapping is complete. */ | |
477 | bool stmt_vec_info_ro; | |
478 | ||
479 | /* Whether we've done a transform we think OK to not update virtual | |
480 | SSA form. */ | |
481 | bool any_known_not_updated_vssa; | |
482 | ||
483 | /* The SLP graph. */ | |
484 | auto_vec<slp_instance> slp_instances; | |
485 | ||
486 | /* Maps base addresses to an innermost_loop_behavior that gives the maximum | |
487 | known alignment for that base. */ | |
488 | vec_base_alignments base_alignments; | |
489 | ||
490 | /* All interleaving chains of stores, represented by the first | |
491 | stmt in the chain. */ | |
492 | auto_vec<stmt_vec_info> grouped_stores; | |
493 | ||
494 | /* The set of vector modes used in the vectorized region. */ | |
495 | mode_set used_vector_modes; | |
496 | ||
497 | /* The argument we should pass to related_vector_mode when looking up | |
498 | the vector mode for a scalar mode, or VOIDmode if we haven't yet | |
499 | made any decisions about which vector modes to use. */ | |
500 | machine_mode vector_mode; | |
501 | ||
502 | /* The basic blocks in the vectorization region. For _loop_vec_info, | |
503 | the memory is internally managed, while for _bb_vec_info, it points | |
504 | to element space of an external auto_vec<>. This inconsistency is | |
505 | not a good class design pattern. TODO: improve it with an unified | |
506 | auto_vec<> whose lifetime is confined to vec_info object. */ | |
507 | basic_block *bbs; | |
508 | ||
509 | /* The count of the basic blocks in the vectorization region. */ | |
510 | unsigned int nbbs; | |
511 | ||
512 | private: | |
513 | stmt_vec_info new_stmt_vec_info (gimple *stmt); | |
514 | void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true); | |
515 | void free_stmt_vec_infos (); | |
516 | void free_stmt_vec_info (stmt_vec_info); | |
517 | }; | |
518 | ||
519 | class _loop_vec_info; | |
520 | class _bb_vec_info; | |
521 | ||
522 | template<> | |
523 | template<> | |
524 | inline bool | |
525 | is_a_helper <_loop_vec_info *>::test (vec_info *i) | |
526 | { | |
527 | return i->kind == vec_info::loop; | |
528 | } | |
529 | ||
530 | template<> | |
531 | template<> | |
532 | inline bool | |
533 | is_a_helper <_bb_vec_info *>::test (vec_info *i) | |
534 | { | |
535 | return i->kind == vec_info::bb; | |
536 | } | |
537 | ||
538 | /* In general, we can divide the vector statements in a vectorized loop | |
539 | into related groups ("rgroups") and say that for each rgroup there is | |
540 | some nS such that the rgroup operates on nS values from one scalar | |
541 | iteration followed by nS values from the next. That is, if VF is the | |
542 | vectorization factor of the loop, the rgroup operates on a sequence: | |
543 | ||
544 | (1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS) | |
545 | ||
546 | where (i,j) represents a scalar value with index j in a scalar | |
547 | iteration with index i. | |
548 | ||
549 | [ We use the term "rgroup" to emphasise that this grouping isn't | |
550 | necessarily the same as the grouping of statements used elsewhere. | |
551 | For example, if we implement a group of scalar loads using gather | |
552 | loads, we'll use a separate gather load for each scalar load, and | |
553 | thus each gather load will belong to its own rgroup. ] | |
554 | ||
555 | In general this sequence will occupy nV vectors concatenated | |
556 | together. If these vectors have nL lanes each, the total number | |
557 | of scalar values N is given by: | |
558 | ||
559 | N = nS * VF = nV * nL | |
560 | ||
561 | None of nS, VF, nV and nL are required to be a power of 2. nS and nV | |
562 | are compile-time constants but VF and nL can be variable (if the target | |
563 | supports variable-length vectors). | |
564 | ||
565 | In classical vectorization, each iteration of the vector loop would | |
566 | handle exactly VF iterations of the original scalar loop. However, | |
567 | in vector loops that are able to operate on partial vectors, a | |
568 | particular iteration of the vector loop might handle fewer than VF | |
569 | iterations of the scalar loop. The vector lanes that correspond to | |
570 | iterations of the scalar loop are said to be "active" and the other | |
571 | lanes are said to be "inactive". | |
572 | ||
573 | In such vector loops, many rgroups need to be controlled to ensure | |
574 | that they have no effect for the inactive lanes. Conceptually, each | |
575 | such rgroup needs a sequence of booleans in the same order as above, | |
576 | but with each (i,j) replaced by a boolean that indicates whether | |
577 | iteration i is active. This sequence occupies nV vector controls | |
578 | that again have nL lanes each. Thus the control sequence as a whole | |
579 | consists of VF independent booleans that are each repeated nS times. | |
580 | ||
581 | Taking mask-based approach as a partially-populated vectors example. | |
582 | We make the simplifying assumption that if a sequence of nV masks is | |
583 | suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by | |
584 | VIEW_CONVERTing it. This holds for all current targets that support | |
585 | fully-masked loops. For example, suppose the scalar loop is: | |
586 | ||
587 | float *f; | |
588 | double *d; | |
589 | for (int i = 0; i < n; ++i) | |
590 | { | |
591 | f[i * 2 + 0] += 1.0f; | |
592 | f[i * 2 + 1] += 2.0f; | |
593 | d[i] += 3.0; | |
594 | } | |
595 | ||
596 | and suppose that vectors have 256 bits. The vectorized f accesses | |
597 | will belong to one rgroup and the vectorized d access to another: | |
598 | ||
599 | f rgroup: nS = 2, nV = 1, nL = 8 | |
600 | d rgroup: nS = 1, nV = 1, nL = 4 | |
601 | VF = 4 | |
602 | ||
603 | [ In this simple example the rgroups do correspond to the normal | |
604 | SLP grouping scheme. ] | |
605 | ||
606 | If only the first three lanes are active, the masks we need are: | |
607 | ||
608 | f rgroup: 1 1 | 1 1 | 1 1 | 0 0 | |
609 | d rgroup: 1 | 1 | 1 | 0 | |
610 | ||
611 | Here we can use a mask calculated for f's rgroup for d's, but not | |
612 | vice versa. | |
613 | ||
614 | Thus for each value of nV, it is enough to provide nV masks, with the | |
615 | mask being calculated based on the highest nL (or, equivalently, based | |
616 | on the highest nS) required by any rgroup with that nV. We therefore | |
617 | represent the entire collection of masks as a two-level table, with the | |
618 | first level being indexed by nV - 1 (since nV == 0 doesn't exist) and | |
619 | the second being indexed by the mask index 0 <= i < nV. */ | |
620 | ||
621 | /* The controls (like masks or lengths) needed by rgroups with nV vectors, | |
622 | according to the description above. */ | |
623 | struct rgroup_controls { | |
624 | /* The largest nS for all rgroups that use these controls. | |
625 | For vect_partial_vectors_avx512 this is the constant nscalars_per_iter | |
626 | for all members of the group. */ | |
627 | unsigned int max_nscalars_per_iter; | |
628 | ||
629 | /* For the largest nS recorded above, the loop controls divide each scalar | |
630 | into FACTOR equal-sized pieces. This is useful if we need to split | |
631 | element-based accesses into byte-based accesses. | |
632 | For vect_partial_vectors_avx512 this records nV instead. */ | |
633 | unsigned int factor; | |
634 | ||
635 | /* This is a vector type with MAX_NSCALARS_PER_ITER * VF / nV elements. | |
636 | For mask-based controls, it is the type of the masks in CONTROLS. | |
637 | For length-based controls, it can be any vector type that has the | |
638 | specified number of elements; the type of the elements doesn't matter. */ | |
639 | tree type; | |
640 | ||
641 | /* When there is no uniformly used LOOP_VINFO_RGROUP_COMPARE_TYPE this | |
642 | is the rgroup specific type used. */ | |
643 | tree compare_type; | |
644 | ||
645 | /* A vector of nV controls, in iteration order. */ | |
646 | vec<tree> controls; | |
647 | ||
648 | /* In case of len_load and len_store with a bias there is only one | |
649 | rgroup. This holds the adjusted loop length for the this rgroup. */ | |
650 | tree bias_adjusted_ctrl; | |
651 | }; | |
652 | ||
653 | struct vec_loop_masks | |
654 | { | |
655 | bool is_empty () const { return mask_set.is_empty (); } | |
656 | ||
657 | /* Set to record vectype, nvector pairs. */ | |
658 | hash_set<pair_hash <nofree_ptr_hash <tree_node>, | |
659 | int_hash<unsigned, 0>>> mask_set; | |
660 | ||
661 | /* rgroup_controls used for the partial vector scheme. */ | |
662 | auto_vec<rgroup_controls> rgc_vec; | |
663 | }; | |
664 | ||
665 | typedef auto_vec<rgroup_controls> vec_loop_lens; | |
666 | ||
667 | typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec; | |
668 | ||
669 | /* Information about a reduction accumulator from the main loop that could | |
670 | conceivably be reused as the input to a reduction in an epilogue loop. */ | |
671 | struct vect_reusable_accumulator { | |
672 | /* The final value of the accumulator, which forms the input to the | |
673 | reduction operation. */ | |
674 | tree reduc_input; | |
675 | ||
676 | /* The stmt_vec_info that describes the reduction (i.e. the one for | |
677 | which is_reduc_info is true). */ | |
678 | stmt_vec_info reduc_info; | |
679 | }; | |
680 | ||
681 | /*-----------------------------------------------------------------*/ | |
682 | /* Info on vectorized loops. */ | |
683 | /*-----------------------------------------------------------------*/ | |
684 | typedef class _loop_vec_info : public vec_info { | |
685 | public: | |
686 | _loop_vec_info (class loop *, vec_info_shared *); | |
687 | ~_loop_vec_info (); | |
688 | ||
689 | /* The loop to which this info struct refers to. */ | |
690 | class loop *loop; | |
691 | ||
692 | /* Number of latch executions. */ | |
693 | tree num_itersm1; | |
694 | /* Number of iterations. */ | |
695 | tree num_iters; | |
696 | /* Number of iterations of the original loop. */ | |
697 | tree num_iters_unchanged; | |
698 | /* Condition under which this loop is analyzed and versioned. */ | |
699 | tree num_iters_assumptions; | |
700 | ||
701 | /* The cost of the vector code. */ | |
702 | class vector_costs *vector_costs; | |
703 | ||
704 | /* The cost of the scalar code. */ | |
705 | class vector_costs *scalar_costs; | |
706 | ||
707 | /* Threshold of number of iterations below which vectorization will not be | |
708 | performed. It is calculated from MIN_PROFITABLE_ITERS and | |
709 | param_min_vect_loop_bound. */ | |
710 | unsigned int th; | |
711 | ||
712 | /* When applying loop versioning, the vector form should only be used | |
713 | if the number of scalar iterations is >= this value, on top of all | |
714 | the other requirements. Ignored when loop versioning is not being | |
715 | used. */ | |
716 | poly_uint64 versioning_threshold; | |
717 | ||
718 | /* Unrolling factor */ | |
719 | poly_uint64 vectorization_factor; | |
720 | ||
721 | /* If this loop is an epilogue loop whose main loop can be skipped, | |
722 | MAIN_LOOP_EDGE is the edge from the main loop to this loop's | |
723 | preheader. SKIP_MAIN_LOOP_EDGE is then the edge that skips the | |
724 | main loop and goes straight to this loop's preheader. | |
725 | ||
726 | Both fields are null otherwise. */ | |
727 | edge main_loop_edge; | |
728 | edge skip_main_loop_edge; | |
729 | ||
730 | /* If this loop is an epilogue loop that might be skipped after executing | |
731 | the main loop, this edge is the one that skips the epilogue. */ | |
732 | edge skip_this_loop_edge; | |
733 | ||
734 | /* The vectorized form of a standard reduction replaces the original | |
735 | scalar code's final result (a loop-closed SSA PHI) with the result | |
736 | of a vector-to-scalar reduction operation. After vectorization, | |
737 | this variable maps these vector-to-scalar results to information | |
738 | about the reductions that generated them. */ | |
739 | hash_map<tree, vect_reusable_accumulator> reusable_accumulators; | |
740 | ||
741 | /* The number of times that the target suggested we unroll the vector loop | |
742 | in order to promote more ILP. This value will be used to re-analyze the | |
743 | loop for vectorization and if successful the value will be folded into | |
744 | vectorization_factor (and therefore exactly divides | |
745 | vectorization_factor). */ | |
746 | unsigned int suggested_unroll_factor; | |
747 | ||
748 | /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR | |
749 | if there is no particular limit. */ | |
750 | unsigned HOST_WIDE_INT max_vectorization_factor; | |
751 | ||
752 | /* The masks that a fully-masked loop should use to avoid operating | |
753 | on inactive scalars. */ | |
754 | vec_loop_masks masks; | |
755 | ||
756 | /* The lengths that a loop with length should use to avoid operating | |
757 | on inactive scalars. */ | |
758 | vec_loop_lens lens; | |
759 | ||
760 | /* Set of scalar conditions that have loop mask applied. */ | |
761 | scalar_cond_masked_set_type scalar_cond_masked_set; | |
762 | ||
763 | /* Set of vector conditions that have loop mask applied. */ | |
764 | vec_cond_masked_set_type vec_cond_masked_set; | |
765 | ||
766 | /* If we are using a loop mask to align memory addresses, this variable | |
767 | contains the number of vector elements that we should skip in the | |
768 | first iteration of the vector loop (i.e. the number of leading | |
769 | elements that should be false in the first mask). */ | |
770 | tree mask_skip_niters; | |
771 | ||
772 | /* The type that the loop control IV should be converted to before | |
773 | testing which of the VF scalars are active and inactive. | |
774 | Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ | |
775 | tree rgroup_compare_type; | |
776 | ||
777 | /* For #pragma omp simd if (x) loops the x expression. If constant 0, | |
778 | the loop should not be vectorized, if constant non-zero, simd_if_cond | |
779 | shouldn't be set and loop vectorized normally, if SSA_NAME, the loop | |
780 | should be versioned on that condition, using scalar loop if the condition | |
781 | is false and vectorized loop otherwise. */ | |
782 | tree simd_if_cond; | |
783 | ||
784 | /* The type that the vector loop control IV should have when | |
785 | LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */ | |
786 | tree rgroup_iv_type; | |
787 | ||
788 | /* The style used for implementing partial vectors when | |
789 | LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */ | |
790 | vect_partial_vector_style partial_vector_style; | |
791 | ||
792 | /* Unknown DRs according to which loop was peeled. */ | |
793 | class dr_vec_info *unaligned_dr; | |
794 | ||
795 | /* peeling_for_alignment indicates whether peeling for alignment will take | |
796 | place, and what the peeling factor should be: | |
797 | peeling_for_alignment = X means: | |
798 | If X=0: Peeling for alignment will not be applied. | |
799 | If X>0: Peel first X iterations. | |
800 | If X=-1: Generate a runtime test to calculate the number of iterations | |
801 | to be peeled, using the dataref recorded in the field | |
802 | unaligned_dr. */ | |
803 | int peeling_for_alignment; | |
804 | ||
805 | /* The mask used to check the alignment of pointers or arrays. */ | |
806 | int ptr_mask; | |
807 | ||
808 | /* Data Dependence Relations defining address ranges that are candidates | |
809 | for a run-time aliasing check. */ | |
810 | auto_vec<ddr_p> may_alias_ddrs; | |
811 | ||
812 | /* Data Dependence Relations defining address ranges together with segment | |
813 | lengths from which the run-time aliasing check is built. */ | |
814 | auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs; | |
815 | ||
816 | /* Check that the addresses of each pair of objects is unequal. */ | |
817 | auto_vec<vec_object_pair> check_unequal_addrs; | |
818 | ||
819 | /* List of values that are required to be nonzero. This is used to check | |
820 | whether things like "x[i * n] += 1;" are safe and eventually gets added | |
821 | to the checks for lower bounds below. */ | |
822 | auto_vec<tree> check_nonzero; | |
823 | ||
824 | /* List of values that need to be checked for a minimum value. */ | |
825 | auto_vec<vec_lower_bound> lower_bounds; | |
826 | ||
827 | /* Statements in the loop that have data references that are candidates for a | |
828 | runtime (loop versioning) misalignment check. */ | |
829 | auto_vec<stmt_vec_info> may_misalign_stmts; | |
830 | ||
831 | /* Reduction cycles detected in the loop. Used in loop-aware SLP. */ | |
832 | auto_vec<stmt_vec_info> reductions; | |
833 | ||
834 | /* All reduction chains in the loop, represented by the first | |
835 | stmt in the chain. */ | |
836 | auto_vec<stmt_vec_info> reduction_chains; | |
837 | ||
838 | /* Cost vector for a single scalar iteration. */ | |
839 | auto_vec<stmt_info_for_cost> scalar_cost_vec; | |
840 | ||
841 | /* Map of IV base/step expressions to inserted name in the preheader. */ | |
842 | hash_map<tree_operand_hash, tree> *ivexpr_map; | |
843 | ||
844 | /* Map of OpenMP "omp simd array" scan variables to corresponding | |
845 | rhs of the store of the initializer. */ | |
846 | hash_map<tree, tree> *scan_map; | |
847 | ||
848 | /* The unrolling factor needed to SLP the loop. In case of that pure SLP is | |
849 | applied to the loop, i.e., no unrolling is needed, this is 1. */ | |
850 | poly_uint64 slp_unrolling_factor; | |
851 | ||
852 | /* The factor used to over weight those statements in an inner loop | |
853 | relative to the loop being vectorized. */ | |
854 | unsigned int inner_loop_cost_factor; | |
855 | ||
856 | /* Is the loop vectorizable? */ | |
857 | bool vectorizable; | |
858 | ||
859 | /* Records whether we still have the option of vectorizing this loop | |
860 | using partially-populated vectors; in other words, whether it is | |
861 | still possible for one iteration of the vector loop to handle | |
862 | fewer than VF scalars. */ | |
863 | bool can_use_partial_vectors_p; | |
864 | ||
865 | /* True if we've decided to use partially-populated vectors, so that | |
866 | the vector loop can handle fewer than VF scalars. */ | |
867 | bool using_partial_vectors_p; | |
868 | ||
869 | /* True if we've decided to use a decrementing loop control IV that counts | |
870 | scalars. This can be done for any loop that: | |
871 | ||
872 | (a) uses length "controls"; and | |
873 | (b) can iterate more than once. */ | |
874 | bool using_decrementing_iv_p; | |
875 | ||
876 | /* True if we've decided to use output of select_vl to adjust IV of | |
877 | both loop control and data reference pointer. This is only true | |
878 | for single-rgroup control. */ | |
879 | bool using_select_vl_p; | |
880 | ||
881 | /* True if we've decided to use partially-populated vectors for the | |
882 | epilogue of loop. */ | |
883 | bool epil_using_partial_vectors_p; | |
884 | ||
885 | /* The bias for len_load and len_store. For now, only 0 and -1 are | |
886 | supported. -1 must be used when a backend does not support | |
887 | len_load/len_store with a length of zero. */ | |
888 | signed char partial_load_store_bias; | |
889 | ||
890 | /* When we have grouped data accesses with gaps, we may introduce invalid | |
891 | memory accesses. We peel the last iteration of the loop to prevent | |
892 | this. */ | |
893 | bool peeling_for_gaps; | |
894 | ||
895 | /* When the number of iterations is not a multiple of the vector size | |
896 | we need to peel off iterations at the end to form an epilogue loop. */ | |
897 | bool peeling_for_niter; | |
898 | ||
899 | /* When the loop has early breaks that we can vectorize we need to peel | |
900 | the loop for the break finding loop. */ | |
901 | bool early_breaks; | |
902 | ||
903 | /* List of loop additional IV conditionals found in the loop. */ | |
904 | auto_vec<gcond *> conds; | |
905 | ||
906 | /* Main loop IV cond. */ | |
907 | gcond* loop_iv_cond; | |
908 | ||
909 | /* True if there are no loop carried data dependencies in the loop. | |
910 | If loop->safelen <= 1, then this is always true, either the loop | |
911 | didn't have any loop carried data dependencies, or the loop is being | |
912 | vectorized guarded with some runtime alias checks, or couldn't | |
913 | be vectorized at all, but then this field shouldn't be used. | |
914 | For loop->safelen >= 2, the user has asserted that there are no | |
915 | backward dependencies, but there still could be loop carried forward | |
916 | dependencies in such loops. This flag will be false if normal | |
917 | vectorizer data dependency analysis would fail or require versioning | |
918 | for alias, but because of loop->safelen >= 2 it has been vectorized | |
919 | even without versioning for alias. E.g. in: | |
920 | #pragma omp simd | |
921 | for (int i = 0; i < m; i++) | |
922 | a[i] = a[i + k] * c; | |
923 | (or #pragma simd or #pragma ivdep) we can vectorize this and it will | |
924 | DTRT even for k > 0 && k < m, but without safelen we would not | |
925 | vectorize this, so this field would be false. */ | |
926 | bool no_data_dependencies; | |
927 | ||
928 | /* Mark loops having masked stores. */ | |
929 | bool has_mask_store; | |
930 | ||
931 | /* Queued scaling factor for the scalar loop. */ | |
932 | profile_probability scalar_loop_scaling; | |
933 | ||
934 | /* If if-conversion versioned this loop before conversion, this is the | |
935 | loop version without if-conversion. */ | |
936 | class loop *scalar_loop; | |
937 | ||
938 | /* For loops being epilogues of already vectorized loops | |
939 | this points to the original vectorized loop. Otherwise NULL. */ | |
940 | _loop_vec_info *orig_loop_info; | |
941 | ||
942 | /* Used to store loop_vec_infos of epilogues of this loop during | |
943 | analysis. */ | |
944 | vec<_loop_vec_info *> epilogue_vinfos; | |
945 | ||
946 | /* The controlling loop IV for the current loop when vectorizing. This IV | |
947 | controls the natural exits of the loop. */ | |
948 | edge vec_loop_iv_exit; | |
949 | ||
950 | /* The controlling loop IV for the epilogue loop when vectorizing. This IV | |
951 | controls the natural exits of the loop. */ | |
952 | edge vec_epilogue_loop_iv_exit; | |
953 | ||
954 | /* The controlling loop IV for the scalar loop being vectorized. This IV | |
955 | controls the natural exits of the loop. */ | |
956 | edge scalar_loop_iv_exit; | |
957 | ||
958 | /* Used to store the list of stores needing to be moved if doing early | |
959 | break vectorization as they would violate the scalar loop semantics if | |
960 | vectorized in their current location. These are stored in order that they | |
961 | need to be moved. */ | |
962 | auto_vec<gimple *> early_break_stores; | |
963 | ||
964 | /* The final basic block where to move statements to. In the case of | |
965 | multiple exits this could be pretty far away. */ | |
966 | basic_block early_break_dest_bb; | |
967 | ||
968 | /* Statements whose VUSES need updating if early break vectorization is to | |
969 | happen. */ | |
970 | auto_vec<gimple*> early_break_vuses; | |
971 | } *loop_vec_info; | |
972 | ||
973 | /* Access Functions. */ | |
974 | #define LOOP_VINFO_LOOP(L) (L)->loop | |
975 | #define LOOP_VINFO_IV_EXIT(L) (L)->vec_loop_iv_exit | |
976 | #define LOOP_VINFO_EPILOGUE_IV_EXIT(L) (L)->vec_epilogue_loop_iv_exit | |
977 | #define LOOP_VINFO_SCALAR_IV_EXIT(L) (L)->scalar_loop_iv_exit | |
978 | #define LOOP_VINFO_BBS(L) (L)->bbs | |
979 | #define LOOP_VINFO_NBBS(L) (L)->nbbs | |
980 | #define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1 | |
981 | #define LOOP_VINFO_NITERS(L) (L)->num_iters | |
982 | /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after | |
983 | prologue peeling retain total unchanged scalar loop iterations for | |
984 | cost model. */ | |
985 | #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged | |
986 | #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions | |
987 | #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th | |
988 | #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold | |
989 | #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable | |
990 | #define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p | |
991 | #define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p | |
992 | #define LOOP_VINFO_USING_DECREMENTING_IV_P(L) (L)->using_decrementing_iv_p | |
993 | #define LOOP_VINFO_USING_SELECT_VL_P(L) (L)->using_select_vl_p | |
994 | #define LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P(L) \ | |
995 | (L)->epil_using_partial_vectors_p | |
996 | #define LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS(L) (L)->partial_load_store_bias | |
997 | #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor | |
998 | #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor | |
999 | #define LOOP_VINFO_MASKS(L) (L)->masks | |
1000 | #define LOOP_VINFO_LENS(L) (L)->lens | |
1001 | #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters | |
1002 | #define LOOP_VINFO_RGROUP_COMPARE_TYPE(L) (L)->rgroup_compare_type | |
1003 | #define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type | |
1004 | #define LOOP_VINFO_PARTIAL_VECTORS_STYLE(L) (L)->partial_vector_style | |
1005 | #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask | |
1006 | #define LOOP_VINFO_N_STMTS(L) (L)->shared->n_stmts | |
1007 | #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest | |
1008 | #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs | |
1009 | #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs | |
1010 | #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) | |
1011 | #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment | |
1012 | #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr | |
1013 | #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts | |
1014 | #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs | |
1015 | #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs | |
1016 | #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs | |
1017 | #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero | |
1018 | #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds | |
1019 | #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores | |
1020 | #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances | |
1021 | #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor | |
1022 | #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions | |
1023 | #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains | |
1024 | #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps | |
1025 | #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter | |
1026 | #define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks | |
1027 | #define LOOP_VINFO_EARLY_BRK_STORES(L) (L)->early_break_stores | |
1028 | #define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \ | |
1029 | (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src) | |
1030 | #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb | |
1031 | #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses | |
1032 | #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds | |
1033 | #define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond | |
1034 | #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies | |
1035 | #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop | |
1036 | #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling | |
1037 | #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store | |
1038 | #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec | |
1039 | #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info | |
1040 | #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond | |
1041 | #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor | |
1042 | ||
1043 | #define LOOP_VINFO_FULLY_MASKED_P(L) \ | |
1044 | (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \ | |
1045 | && !LOOP_VINFO_MASKS (L).is_empty ()) | |
1046 | ||
1047 | #define LOOP_VINFO_FULLY_WITH_LENGTH_P(L) \ | |
1048 | (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \ | |
1049 | && !LOOP_VINFO_LENS (L).is_empty ()) | |
1050 | ||
1051 | #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ | |
1052 | ((L)->may_misalign_stmts.length () > 0) | |
1053 | #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ | |
1054 | ((L)->comp_alias_ddrs.length () > 0 \ | |
1055 | || (L)->check_unequal_addrs.length () > 0 \ | |
1056 | || (L)->lower_bounds.length () > 0) | |
1057 | #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ | |
1058 | (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) | |
1059 | #define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L) \ | |
1060 | (LOOP_VINFO_SIMD_IF_COND (L)) | |
1061 | #define LOOP_REQUIRES_VERSIONING(L) \ | |
1062 | (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ | |
1063 | || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ | |
1064 | || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \ | |
1065 | || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L)) | |
1066 | ||
1067 | #define LOOP_VINFO_NITERS_KNOWN_P(L) \ | |
1068 | (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) | |
1069 | ||
1070 | #define LOOP_VINFO_EPILOGUE_P(L) \ | |
1071 | (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL) | |
1072 | ||
1073 | #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \ | |
1074 | (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L))) | |
1075 | ||
1076 | /* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL | |
1077 | value signifies success, and a NULL value signifies failure, supporting | |
1078 | propagating an opt_problem * describing the failure back up the call | |
1079 | stack. */ | |
1080 | typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info; | |
1081 | ||
1082 | inline loop_vec_info | |
1083 | loop_vec_info_for_loop (class loop *loop) | |
1084 | { | |
1085 | return (loop_vec_info) loop->aux; | |
1086 | } | |
1087 | ||
1088 | struct slp_root | |
1089 | { | |
1090 | slp_root (slp_instance_kind kind_, vec<stmt_vec_info> stmts_, | |
1091 | vec<stmt_vec_info> roots_, vec<tree> remain_ = vNULL) | |
1092 | : kind(kind_), stmts(stmts_), roots(roots_), remain(remain_) {} | |
1093 | slp_instance_kind kind; | |
1094 | vec<stmt_vec_info> stmts; | |
1095 | vec<stmt_vec_info> roots; | |
1096 | vec<tree> remain; | |
1097 | }; | |
1098 | ||
1099 | typedef class _bb_vec_info : public vec_info | |
1100 | { | |
1101 | public: | |
1102 | _bb_vec_info (vec<basic_block> bbs, vec_info_shared *); | |
1103 | ~_bb_vec_info (); | |
1104 | ||
1105 | vec<slp_root> roots; | |
1106 | } *bb_vec_info; | |
1107 | ||
1108 | #define BB_VINFO_BBS(B) (B)->bbs | |
1109 | #define BB_VINFO_NBBS(B) (B)->nbbs | |
1110 | #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores | |
1111 | #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances | |
1112 | #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs | |
1113 | #define BB_VINFO_DDRS(B) (B)->shared->ddrs | |
1114 | ||
1115 | /*-----------------------------------------------------------------*/ | |
1116 | /* Info on vectorized defs. */ | |
1117 | /*-----------------------------------------------------------------*/ | |
1118 | enum stmt_vec_info_type { | |
1119 | undef_vec_info_type = 0, | |
1120 | load_vec_info_type, | |
1121 | store_vec_info_type, | |
1122 | shift_vec_info_type, | |
1123 | op_vec_info_type, | |
1124 | call_vec_info_type, | |
1125 | call_simd_clone_vec_info_type, | |
1126 | assignment_vec_info_type, | |
1127 | condition_vec_info_type, | |
1128 | comparison_vec_info_type, | |
1129 | reduc_vec_info_type, | |
1130 | induc_vec_info_type, | |
1131 | type_promotion_vec_info_type, | |
1132 | type_demotion_vec_info_type, | |
1133 | type_conversion_vec_info_type, | |
1134 | cycle_phi_info_type, | |
1135 | lc_phi_info_type, | |
1136 | phi_info_type, | |
1137 | recurr_info_type, | |
1138 | loop_exit_ctrl_vec_info_type | |
1139 | }; | |
1140 | ||
1141 | /* Indicates whether/how a variable is used in the scope of loop/basic | |
1142 | block. */ | |
1143 | enum vect_relevant { | |
1144 | vect_unused_in_scope = 0, | |
1145 | ||
1146 | /* The def is only used outside the loop. */ | |
1147 | vect_used_only_live, | |
1148 | /* The def is in the inner loop, and the use is in the outer loop, and the | |
1149 | use is a reduction stmt. */ | |
1150 | vect_used_in_outer_by_reduction, | |
1151 | /* The def is in the inner loop, and the use is in the outer loop (and is | |
1152 | not part of reduction). */ | |
1153 | vect_used_in_outer, | |
1154 | ||
1155 | /* defs that feed computations that end up (only) in a reduction. These | |
1156 | defs may be used by non-reduction stmts, but eventually, any | |
1157 | computations/values that are affected by these defs are used to compute | |
1158 | a reduction (i.e. don't get stored to memory, for example). We use this | |
1159 | to identify computations that we can change the order in which they are | |
1160 | computed. */ | |
1161 | vect_used_by_reduction, | |
1162 | ||
1163 | vect_used_in_scope | |
1164 | }; | |
1165 | ||
1166 | /* The type of vectorization that can be applied to the stmt: regular loop-based | |
1167 | vectorization; pure SLP - the stmt is a part of SLP instances and does not | |
1168 | have uses outside SLP instances; or hybrid SLP and loop-based - the stmt is | |
1169 | a part of SLP instance and also must be loop-based vectorized, since it has | |
1170 | uses outside SLP sequences. | |
1171 | ||
1172 | In the loop context the meanings of pure and hybrid SLP are slightly | |
1173 | different. By saying that pure SLP is applied to the loop, we mean that we | |
1174 | exploit only intra-iteration parallelism in the loop; i.e., the loop can be | |
1175 | vectorized without doing any conceptual unrolling, cause we don't pack | |
1176 | together stmts from different iterations, only within a single iteration. | |
1177 | Loop hybrid SLP means that we exploit both intra-iteration and | |
1178 | inter-iteration parallelism (e.g., number of elements in the vector is 4 | |
1179 | and the slp-group-size is 2, in which case we don't have enough parallelism | |
1180 | within an iteration, so we obtain the rest of the parallelism from subsequent | |
1181 | iterations by unrolling the loop by 2). */ | |
1182 | enum slp_vect_type { | |
1183 | loop_vect = 0, | |
1184 | pure_slp, | |
1185 | hybrid | |
1186 | }; | |
1187 | ||
1188 | /* Says whether a statement is a load, a store of a vectorized statement | |
1189 | result, or a store of an invariant value. */ | |
1190 | enum vec_load_store_type { | |
1191 | VLS_LOAD, | |
1192 | VLS_STORE, | |
1193 | VLS_STORE_INVARIANT | |
1194 | }; | |
1195 | ||
1196 | /* Describes how we're going to vectorize an individual load or store, | |
1197 | or a group of loads or stores. */ | |
1198 | enum vect_memory_access_type { | |
1199 | /* An access to an invariant address. This is used only for loads. */ | |
1200 | VMAT_INVARIANT, | |
1201 | ||
1202 | /* A simple contiguous access. */ | |
1203 | VMAT_CONTIGUOUS, | |
1204 | ||
1205 | /* A contiguous access that goes down in memory rather than up, | |
1206 | with no additional permutation. This is used only for stores | |
1207 | of invariants. */ | |
1208 | VMAT_CONTIGUOUS_DOWN, | |
1209 | ||
1210 | /* A simple contiguous access in which the elements need to be permuted | |
1211 | after loading or before storing. Only used for loop vectorization; | |
1212 | SLP uses separate permutes. */ | |
1213 | VMAT_CONTIGUOUS_PERMUTE, | |
1214 | ||
1215 | /* A simple contiguous access in which the elements need to be reversed | |
1216 | after loading or before storing. */ | |
1217 | VMAT_CONTIGUOUS_REVERSE, | |
1218 | ||
1219 | /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */ | |
1220 | VMAT_LOAD_STORE_LANES, | |
1221 | ||
1222 | /* An access in which each scalar element is loaded or stored | |
1223 | individually. */ | |
1224 | VMAT_ELEMENTWISE, | |
1225 | ||
1226 | /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped | |
1227 | SLP accesses. Each unrolled iteration uses a contiguous load | |
1228 | or store for the whole group, but the groups from separate iterations | |
1229 | are combined in the same way as for VMAT_ELEMENTWISE. */ | |
1230 | VMAT_STRIDED_SLP, | |
1231 | ||
1232 | /* The access uses gather loads or scatter stores. */ | |
1233 | VMAT_GATHER_SCATTER | |
1234 | }; | |
1235 | ||
1236 | class dr_vec_info { | |
1237 | public: | |
1238 | /* The data reference itself. */ | |
1239 | data_reference *dr; | |
1240 | /* The statement that contains the data reference. */ | |
1241 | stmt_vec_info stmt; | |
1242 | /* The analysis group this DR belongs to when doing BB vectorization. | |
1243 | DRs of the same group belong to the same conditional execution context. */ | |
1244 | unsigned group; | |
1245 | /* The misalignment in bytes of the reference, or -1 if not known. */ | |
1246 | int misalignment; | |
1247 | /* The byte alignment that we'd ideally like the reference to have, | |
1248 | and the value that misalignment is measured against. */ | |
1249 | poly_uint64 target_alignment; | |
1250 | /* If true the alignment of base_decl needs to be increased. */ | |
1251 | bool base_misaligned; | |
1252 | tree base_decl; | |
1253 | ||
1254 | /* Stores current vectorized loop's offset. To be added to the DR's | |
1255 | offset to calculate current offset of data reference. */ | |
1256 | tree offset; | |
1257 | }; | |
1258 | ||
1259 | typedef struct data_reference *dr_p; | |
1260 | ||
1261 | class _stmt_vec_info { | |
1262 | public: | |
1263 | ||
1264 | enum stmt_vec_info_type type; | |
1265 | ||
1266 | /* Indicates whether this stmts is part of a computation whose result is | |
1267 | used outside the loop. */ | |
1268 | bool live; | |
1269 | ||
1270 | /* Stmt is part of some pattern (computation idiom) */ | |
1271 | bool in_pattern_p; | |
1272 | ||
1273 | /* True if the statement was created during pattern recognition as | |
1274 | part of the replacement for RELATED_STMT. This implies that the | |
1275 | statement isn't part of any basic block, although for convenience | |
1276 | its gimple_bb is the same as for RELATED_STMT. */ | |
1277 | bool pattern_stmt_p; | |
1278 | ||
1279 | /* Is this statement vectorizable or should it be skipped in (partial) | |
1280 | vectorization. */ | |
1281 | bool vectorizable; | |
1282 | ||
1283 | /* The stmt to which this info struct refers to. */ | |
1284 | gimple *stmt; | |
1285 | ||
1286 | /* The vector type to be used for the LHS of this statement. */ | |
1287 | tree vectype; | |
1288 | ||
1289 | /* The vectorized stmts. */ | |
1290 | vec<gimple *> vec_stmts; | |
1291 | ||
1292 | /* The following is relevant only for stmts that contain a non-scalar | |
1293 | data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have | |
1294 | at most one such data-ref. */ | |
1295 | ||
1296 | dr_vec_info dr_aux; | |
1297 | ||
1298 | /* Information about the data-ref relative to this loop | |
1299 | nest (the loop that is being considered for vectorization). */ | |
1300 | innermost_loop_behavior dr_wrt_vec_loop; | |
1301 | ||
1302 | /* For loop PHI nodes, the base and evolution part of it. This makes sure | |
1303 | this information is still available in vect_update_ivs_after_vectorizer | |
1304 | where we may not be able to re-analyze the PHI nodes evolution as | |
1305 | peeling for the prologue loop can make it unanalyzable. The evolution | |
1306 | part is still correct after peeling, but the base may have changed from | |
1307 | the version here. */ | |
1308 | tree loop_phi_evolution_base_unchanged; | |
1309 | tree loop_phi_evolution_part; | |
1310 | enum vect_induction_op_type loop_phi_evolution_type; | |
1311 | ||
1312 | /* Used for various bookkeeping purposes, generally holding a pointer to | |
1313 | some other stmt S that is in some way "related" to this stmt. | |
1314 | Current use of this field is: | |
1315 | If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is | |
1316 | true): S is the "pattern stmt" that represents (and replaces) the | |
1317 | sequence of stmts that constitutes the pattern. Similarly, the | |
1318 | related_stmt of the "pattern stmt" points back to this stmt (which is | |
1319 | the last stmt in the original sequence of stmts that constitutes the | |
1320 | pattern). */ | |
1321 | stmt_vec_info related_stmt; | |
1322 | ||
1323 | /* Used to keep a sequence of def stmts of a pattern stmt if such exists. | |
1324 | The sequence is attached to the original statement rather than the | |
1325 | pattern statement. */ | |
1326 | gimple_seq pattern_def_seq; | |
1327 | ||
1328 | /* Selected SIMD clone's function info. First vector element | |
1329 | is SIMD clone's function decl, followed by a pair of trees (base + step) | |
1330 | for linear arguments (pair of NULLs for other arguments). */ | |
1331 | vec<tree> simd_clone_info; | |
1332 | ||
1333 | /* Classify the def of this stmt. */ | |
1334 | enum vect_def_type def_type; | |
1335 | ||
1336 | /* Whether the stmt is SLPed, loop-based vectorized, or both. */ | |
1337 | enum slp_vect_type slp_type; | |
1338 | ||
1339 | /* Interleaving and reduction chains info. */ | |
1340 | /* First element in the group. */ | |
1341 | stmt_vec_info first_element; | |
1342 | /* Pointer to the next element in the group. */ | |
1343 | stmt_vec_info next_element; | |
1344 | /* The size of the group. */ | |
1345 | unsigned int size; | |
1346 | /* For stores, number of stores from this group seen. We vectorize the last | |
1347 | one. */ | |
1348 | unsigned int store_count; | |
1349 | /* For loads only, the gap from the previous load. For consecutive loads, GAP | |
1350 | is 1. */ | |
1351 | unsigned int gap; | |
1352 | ||
1353 | /* The minimum negative dependence distance this stmt participates in | |
1354 | or zero if none. */ | |
1355 | unsigned int min_neg_dist; | |
1356 | ||
1357 | /* Not all stmts in the loop need to be vectorized. e.g, the increment | |
1358 | of the loop induction variable and computation of array indexes. relevant | |
1359 | indicates whether the stmt needs to be vectorized. */ | |
1360 | enum vect_relevant relevant; | |
1361 | ||
1362 | /* For loads if this is a gather, for stores if this is a scatter. */ | |
1363 | bool gather_scatter_p; | |
1364 | ||
1365 | /* True if this is an access with loop-invariant stride. */ | |
1366 | bool strided_p; | |
1367 | ||
1368 | /* For both loads and stores. */ | |
1369 | unsigned simd_lane_access_p : 3; | |
1370 | ||
1371 | /* Classifies how the load or store is going to be implemented | |
1372 | for loop vectorization. */ | |
1373 | vect_memory_access_type memory_access_type; | |
1374 | ||
1375 | /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */ | |
1376 | tree induc_cond_initial_val; | |
1377 | ||
1378 | /* If not NULL the value to be added to compute final reduction value. */ | |
1379 | tree reduc_epilogue_adjustment; | |
1380 | ||
1381 | /* On a reduction PHI the reduction type as detected by | |
1382 | vect_is_simple_reduction and vectorizable_reduction. */ | |
1383 | enum vect_reduction_type reduc_type; | |
1384 | ||
1385 | /* The original reduction code, to be used in the epilogue. */ | |
1386 | code_helper reduc_code; | |
1387 | /* An internal function we should use in the epilogue. */ | |
1388 | internal_fn reduc_fn; | |
1389 | ||
1390 | /* On a stmt participating in the reduction the index of the operand | |
1391 | on the reduction SSA cycle. */ | |
1392 | int reduc_idx; | |
1393 | ||
1394 | /* On a reduction PHI the def returned by vect_force_simple_reduction. | |
1395 | On the def returned by vect_force_simple_reduction the | |
1396 | corresponding PHI. */ | |
1397 | stmt_vec_info reduc_def; | |
1398 | ||
1399 | /* The vector input type relevant for reduction vectorization. */ | |
1400 | tree reduc_vectype_in; | |
1401 | ||
1402 | /* The vector type for performing the actual reduction. */ | |
1403 | tree reduc_vectype; | |
1404 | ||
1405 | /* If IS_REDUC_INFO is true and if the vector code is performing | |
1406 | N scalar reductions in parallel, this variable gives the initial | |
1407 | scalar values of those N reductions. */ | |
1408 | vec<tree> reduc_initial_values; | |
1409 | ||
1410 | /* If IS_REDUC_INFO is true and if the vector code is performing | |
1411 | N scalar reductions in parallel, this variable gives the vectorized code's | |
1412 | final (scalar) result for each of those N reductions. In other words, | |
1413 | REDUC_SCALAR_RESULTS[I] replaces the original scalar code's loop-closed | |
1414 | SSA PHI for reduction number I. */ | |
1415 | vec<tree> reduc_scalar_results; | |
1416 | ||
1417 | /* Only meaningful if IS_REDUC_INFO. If non-null, the reduction is | |
1418 | being performed by an epilogue loop and we have decided to reuse | |
1419 | this accumulator from the main loop. */ | |
1420 | vect_reusable_accumulator *reused_accumulator; | |
1421 | ||
1422 | /* Whether we force a single cycle PHI during reduction vectorization. */ | |
1423 | bool force_single_cycle; | |
1424 | ||
1425 | /* Whether on this stmt reduction meta is recorded. */ | |
1426 | bool is_reduc_info; | |
1427 | ||
1428 | /* If nonzero, the lhs of the statement could be truncated to this | |
1429 | many bits without affecting any users of the result. */ | |
1430 | unsigned int min_output_precision; | |
1431 | ||
1432 | /* If nonzero, all non-boolean input operands have the same precision, | |
1433 | and they could each be truncated to this many bits without changing | |
1434 | the result. */ | |
1435 | unsigned int min_input_precision; | |
1436 | ||
1437 | /* If OPERATION_BITS is nonzero, the statement could be performed on | |
1438 | an integer with the sign and number of bits given by OPERATION_SIGN | |
1439 | and OPERATION_BITS without changing the result. */ | |
1440 | unsigned int operation_precision; | |
1441 | signop operation_sign; | |
1442 | ||
1443 | /* If the statement produces a boolean result, this value describes | |
1444 | how we should choose the associated vector type. The possible | |
1445 | values are: | |
1446 | ||
1447 | - an integer precision N if we should use the vector mask type | |
1448 | associated with N-bit integers. This is only used if all relevant | |
1449 | input booleans also want the vector mask type for N-bit integers, | |
1450 | or if we can convert them into that form by pattern-matching. | |
1451 | ||
1452 | - ~0U if we considered choosing a vector mask type but decided | |
1453 | to treat the boolean as a normal integer type instead. | |
1454 | ||
1455 | - 0 otherwise. This means either that the operation isn't one that | |
1456 | could have a vector mask type (and so should have a normal vector | |
1457 | type instead) or that we simply haven't made a choice either way. */ | |
1458 | unsigned int mask_precision; | |
1459 | ||
1460 | /* True if this is only suitable for SLP vectorization. */ | |
1461 | bool slp_vect_only_p; | |
1462 | ||
1463 | /* True if this is a pattern that can only be handled by SLP | |
1464 | vectorization. */ | |
1465 | bool slp_vect_pattern_only_p; | |
1466 | }; | |
1467 | ||
1468 | /* Information about a gather/scatter call. */ | |
1469 | struct gather_scatter_info { | |
1470 | /* The internal function to use for the gather/scatter operation, | |
1471 | or IFN_LAST if a built-in function should be used instead. */ | |
1472 | internal_fn ifn; | |
1473 | ||
1474 | /* The FUNCTION_DECL for the built-in gather/scatter function, | |
1475 | or null if an internal function should be used instead. */ | |
1476 | tree decl; | |
1477 | ||
1478 | /* The loop-invariant base value. */ | |
1479 | tree base; | |
1480 | ||
1481 | /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ | |
1482 | tree offset; | |
1483 | ||
1484 | /* Each offset element should be multiplied by this amount before | |
1485 | being added to the base. */ | |
1486 | int scale; | |
1487 | ||
1488 | /* The definition type for the vectorized offset. */ | |
1489 | enum vect_def_type offset_dt; | |
1490 | ||
1491 | /* The type of the vectorized offset. */ | |
1492 | tree offset_vectype; | |
1493 | ||
1494 | /* The type of the scalar elements after loading or before storing. */ | |
1495 | tree element_type; | |
1496 | ||
1497 | /* The type of the scalar elements being loaded or stored. */ | |
1498 | tree memory_type; | |
1499 | }; | |
1500 | ||
1501 | /* Access Functions. */ | |
1502 | #define STMT_VINFO_TYPE(S) (S)->type | |
1503 | #define STMT_VINFO_STMT(S) (S)->stmt | |
1504 | #define STMT_VINFO_RELEVANT(S) (S)->relevant | |
1505 | #define STMT_VINFO_LIVE_P(S) (S)->live | |
1506 | #define STMT_VINFO_VECTYPE(S) (S)->vectype | |
1507 | #define STMT_VINFO_VEC_STMTS(S) (S)->vec_stmts | |
1508 | #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable | |
1509 | #define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0) | |
1510 | #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p | |
1511 | #define STMT_VINFO_STRIDED_P(S) (S)->strided_p | |
1512 | #define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type | |
1513 | #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p | |
1514 | #define STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL(S) (S)->induc_cond_initial_val | |
1515 | #define STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT(S) (S)->reduc_epilogue_adjustment | |
1516 | #define STMT_VINFO_REDUC_IDX(S) (S)->reduc_idx | |
1517 | #define STMT_VINFO_FORCE_SINGLE_CYCLE(S) (S)->force_single_cycle | |
1518 | ||
1519 | #define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop | |
1520 | #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address | |
1521 | #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init | |
1522 | #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset | |
1523 | #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step | |
1524 | #define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment | |
1525 | #define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \ | |
1526 | (S)->dr_wrt_vec_loop.base_misalignment | |
1527 | #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ | |
1528 | (S)->dr_wrt_vec_loop.offset_alignment | |
1529 | #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ | |
1530 | (S)->dr_wrt_vec_loop.step_alignment | |
1531 | ||
1532 | #define STMT_VINFO_DR_INFO(S) \ | |
1533 | (gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux) | |
1534 | ||
1535 | #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p | |
1536 | #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt | |
1537 | #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq | |
1538 | #define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info | |
1539 | #define STMT_VINFO_DEF_TYPE(S) (S)->def_type | |
1540 | #define STMT_VINFO_GROUPED_ACCESS(S) \ | |
1541 | ((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S)) | |
1542 | #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged | |
1543 | #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part | |
1544 | #define STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE(S) (S)->loop_phi_evolution_type | |
1545 | #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist | |
1546 | #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type | |
1547 | #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code | |
1548 | #define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn | |
1549 | #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def | |
1550 | #define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype | |
1551 | #define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in | |
1552 | #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p | |
1553 | #define STMT_VINFO_SLP_VECT_ONLY_PATTERN(S) (S)->slp_vect_pattern_only_p | |
1554 | ||
1555 | #define DR_GROUP_FIRST_ELEMENT(S) \ | |
1556 | (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element) | |
1557 | #define DR_GROUP_NEXT_ELEMENT(S) \ | |
1558 | (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element) | |
1559 | #define DR_GROUP_SIZE(S) \ | |
1560 | (gcc_checking_assert ((S)->dr_aux.dr), (S)->size) | |
1561 | #define DR_GROUP_STORE_COUNT(S) \ | |
1562 | (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count) | |
1563 | #define DR_GROUP_GAP(S) \ | |
1564 | (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap) | |
1565 | ||
1566 | #define REDUC_GROUP_FIRST_ELEMENT(S) \ | |
1567 | (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element) | |
1568 | #define REDUC_GROUP_NEXT_ELEMENT(S) \ | |
1569 | (gcc_checking_assert (!(S)->dr_aux.dr), (S)->next_element) | |
1570 | #define REDUC_GROUP_SIZE(S) \ | |
1571 | (gcc_checking_assert (!(S)->dr_aux.dr), (S)->size) | |
1572 | ||
1573 | #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope) | |
1574 | ||
1575 | #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid) | |
1576 | #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) | |
1577 | #define STMT_SLP_TYPE(S) (S)->slp_type | |
1578 | ||
1579 | /* Contains the scalar or vector costs for a vec_info. */ | |
1580 | class vector_costs | |
1581 | { | |
1582 | public: | |
1583 | vector_costs (vec_info *, bool); | |
1584 | virtual ~vector_costs () {} | |
1585 | ||
1586 | /* Update the costs in response to adding COUNT copies of a statement. | |
1587 | ||
1588 | - WHERE specifies whether the cost occurs in the loop prologue, | |
1589 | the loop body, or the loop epilogue. | |
1590 | - KIND is the kind of statement, which is always meaningful. | |
1591 | - STMT_INFO or NODE, if nonnull, describe the statement that will be | |
1592 | vectorized. | |
1593 | - VECTYPE, if nonnull, is the vector type that the vectorized | |
1594 | statement will operate on. Note that this should be used in | |
1595 | preference to STMT_VINFO_VECTYPE (STMT_INFO) since the latter | |
1596 | is not correct for SLP. | |
1597 | - for unaligned_load and unaligned_store statements, MISALIGN is | |
1598 | the byte misalignment of the load or store relative to the target's | |
1599 | preferred alignment for VECTYPE, or DR_MISALIGNMENT_UNKNOWN | |
1600 | if the misalignment is not known. | |
1601 | ||
1602 | Return the calculated cost as well as recording it. The return | |
1603 | value is used for dumping purposes. */ | |
1604 | virtual unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, | |
1605 | stmt_vec_info stmt_info, | |
1606 | slp_tree node, | |
1607 | tree vectype, int misalign, | |
1608 | vect_cost_model_location where); | |
1609 | ||
1610 | /* Finish calculating the cost of the code. The results can be | |
1611 | read back using the functions below. | |
1612 | ||
1613 | If the costs describe vector code, SCALAR_COSTS gives the costs | |
1614 | of the corresponding scalar code, otherwise it is null. */ | |
1615 | virtual void finish_cost (const vector_costs *scalar_costs); | |
1616 | ||
1617 | /* The costs in THIS and OTHER both describe ways of vectorizing | |
1618 | a main loop. Return true if the costs described by THIS are | |
1619 | cheaper than the costs described by OTHER. Return false if any | |
1620 | of the following are true: | |
1621 | ||
1622 | - THIS and OTHER are of equal cost | |
1623 | - OTHER is better than THIS | |
1624 | - we can't be sure about the relative costs of THIS and OTHER. */ | |
1625 | virtual bool better_main_loop_than_p (const vector_costs *other) const; | |
1626 | ||
1627 | /* Likewise, but the costs in THIS and OTHER both describe ways of | |
1628 | vectorizing an epilogue loop of MAIN_LOOP. */ | |
1629 | virtual bool better_epilogue_loop_than_p (const vector_costs *other, | |
1630 | loop_vec_info main_loop) const; | |
1631 | ||
1632 | unsigned int prologue_cost () const; | |
1633 | unsigned int body_cost () const; | |
1634 | unsigned int epilogue_cost () const; | |
1635 | unsigned int outside_cost () const; | |
1636 | unsigned int total_cost () const; | |
1637 | unsigned int suggested_unroll_factor () const; | |
1638 | ||
1639 | protected: | |
1640 | unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location, | |
1641 | unsigned int); | |
1642 | unsigned int adjust_cost_for_freq (stmt_vec_info, vect_cost_model_location, | |
1643 | unsigned int); | |
1644 | int compare_inside_loop_cost (const vector_costs *) const; | |
1645 | int compare_outside_loop_cost (const vector_costs *) const; | |
1646 | ||
1647 | /* The region of code that we're considering vectorizing. */ | |
1648 | vec_info *m_vinfo; | |
1649 | ||
1650 | /* True if we're costing the scalar code, false if we're costing | |
1651 | the vector code. */ | |
1652 | bool m_costing_for_scalar; | |
1653 | ||
1654 | /* The costs of the three regions, indexed by vect_cost_model_location. */ | |
1655 | unsigned int m_costs[3]; | |
1656 | ||
1657 | /* The suggested unrolling factor determined at finish_cost. */ | |
1658 | unsigned int m_suggested_unroll_factor; | |
1659 | ||
1660 | /* True if finish_cost has been called. */ | |
1661 | bool m_finished; | |
1662 | }; | |
1663 | ||
1664 | /* Create costs for VINFO. COSTING_FOR_SCALAR is true if the costs | |
1665 | are for scalar code, false if they are for vector code. */ | |
1666 | ||
1667 | inline | |
1668 | vector_costs::vector_costs (vec_info *vinfo, bool costing_for_scalar) | |
1669 | : m_vinfo (vinfo), | |
1670 | m_costing_for_scalar (costing_for_scalar), | |
1671 | m_costs (), | |
1672 | m_suggested_unroll_factor(1), | |
1673 | m_finished (false) | |
1674 | { | |
1675 | } | |
1676 | ||
1677 | /* Return the cost of the prologue code (in abstract units). */ | |
1678 | ||
1679 | inline unsigned int | |
1680 | vector_costs::prologue_cost () const | |
1681 | { | |
1682 | gcc_checking_assert (m_finished); | |
1683 | return m_costs[vect_prologue]; | |
1684 | } | |
1685 | ||
1686 | /* Return the cost of the body code (in abstract units). */ | |
1687 | ||
1688 | inline unsigned int | |
1689 | vector_costs::body_cost () const | |
1690 | { | |
1691 | gcc_checking_assert (m_finished); | |
1692 | return m_costs[vect_body]; | |
1693 | } | |
1694 | ||
1695 | /* Return the cost of the epilogue code (in abstract units). */ | |
1696 | ||
1697 | inline unsigned int | |
1698 | vector_costs::epilogue_cost () const | |
1699 | { | |
1700 | gcc_checking_assert (m_finished); | |
1701 | return m_costs[vect_epilogue]; | |
1702 | } | |
1703 | ||
1704 | /* Return the cost of the prologue and epilogue code (in abstract units). */ | |
1705 | ||
1706 | inline unsigned int | |
1707 | vector_costs::outside_cost () const | |
1708 | { | |
1709 | return prologue_cost () + epilogue_cost (); | |
1710 | } | |
1711 | ||
1712 | /* Return the cost of the prologue, body and epilogue code | |
1713 | (in abstract units). */ | |
1714 | ||
1715 | inline unsigned int | |
1716 | vector_costs::total_cost () const | |
1717 | { | |
1718 | return body_cost () + outside_cost (); | |
1719 | } | |
1720 | ||
1721 | /* Return the suggested unroll factor. */ | |
1722 | ||
1723 | inline unsigned int | |
1724 | vector_costs::suggested_unroll_factor () const | |
1725 | { | |
1726 | gcc_checking_assert (m_finished); | |
1727 | return m_suggested_unroll_factor; | |
1728 | } | |
1729 | ||
1730 | #define VECT_MAX_COST 1000 | |
1731 | ||
1732 | /* The maximum number of intermediate steps required in multi-step type | |
1733 | conversion. */ | |
1734 | #define MAX_INTERM_CVT_STEPS 3 | |
1735 | ||
1736 | #define MAX_VECTORIZATION_FACTOR INT_MAX | |
1737 | ||
1738 | /* Nonzero if TYPE represents a (scalar) boolean type or type | |
1739 | in the middle-end compatible with it (unsigned precision 1 integral | |
1740 | types). Used to determine which types should be vectorized as | |
1741 | VECTOR_BOOLEAN_TYPE_P. */ | |
1742 | ||
1743 | #define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) \ | |
1744 | (TREE_CODE (TYPE) == BOOLEAN_TYPE \ | |
1745 | || ((TREE_CODE (TYPE) == INTEGER_TYPE \ | |
1746 | || TREE_CODE (TYPE) == ENUMERAL_TYPE) \ | |
1747 | && TYPE_PRECISION (TYPE) == 1 \ | |
1748 | && TYPE_UNSIGNED (TYPE))) | |
1749 | ||
1750 | inline bool | |
1751 | nested_in_vect_loop_p (class loop *loop, stmt_vec_info stmt_info) | |
1752 | { | |
1753 | return (loop->inner | |
1754 | && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father)); | |
1755 | } | |
1756 | ||
1757 | /* PHI is either a scalar reduction phi or a scalar induction phi. | |
1758 | Return the initial value of the variable on entry to the containing | |
1759 | loop. */ | |
1760 | ||
1761 | inline tree | |
1762 | vect_phi_initial_value (gphi *phi) | |
1763 | { | |
1764 | basic_block bb = gimple_bb (phi); | |
1765 | edge pe = loop_preheader_edge (bb->loop_father); | |
1766 | gcc_assert (pe->dest == bb); | |
1767 | return PHI_ARG_DEF_FROM_EDGE (phi, pe); | |
1768 | } | |
1769 | ||
1770 | /* Return true if STMT_INFO should produce a vector mask type rather than | |
1771 | a normal nonmask type. */ | |
1772 | ||
1773 | inline bool | |
1774 | vect_use_mask_type_p (stmt_vec_info stmt_info) | |
1775 | { | |
1776 | return stmt_info->mask_precision && stmt_info->mask_precision != ~0U; | |
1777 | } | |
1778 | ||
1779 | /* Return TRUE if a statement represented by STMT_INFO is a part of a | |
1780 | pattern. */ | |
1781 | ||
1782 | inline bool | |
1783 | is_pattern_stmt_p (stmt_vec_info stmt_info) | |
1784 | { | |
1785 | return stmt_info->pattern_stmt_p; | |
1786 | } | |
1787 | ||
1788 | /* If STMT_INFO is a pattern statement, return the statement that it | |
1789 | replaces, otherwise return STMT_INFO itself. */ | |
1790 | ||
1791 | inline stmt_vec_info | |
1792 | vect_orig_stmt (stmt_vec_info stmt_info) | |
1793 | { | |
1794 | if (is_pattern_stmt_p (stmt_info)) | |
1795 | return STMT_VINFO_RELATED_STMT (stmt_info); | |
1796 | return stmt_info; | |
1797 | } | |
1798 | ||
1799 | /* Return the later statement between STMT1_INFO and STMT2_INFO. */ | |
1800 | ||
1801 | inline stmt_vec_info | |
1802 | get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) | |
1803 | { | |
1804 | if (gimple_uid (vect_orig_stmt (stmt1_info)->stmt) | |
1805 | > gimple_uid (vect_orig_stmt (stmt2_info)->stmt)) | |
1806 | return stmt1_info; | |
1807 | else | |
1808 | return stmt2_info; | |
1809 | } | |
1810 | ||
1811 | /* If STMT_INFO has been replaced by a pattern statement, return the | |
1812 | replacement statement, otherwise return STMT_INFO itself. */ | |
1813 | ||
1814 | inline stmt_vec_info | |
1815 | vect_stmt_to_vectorize (stmt_vec_info stmt_info) | |
1816 | { | |
1817 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | |
1818 | return STMT_VINFO_RELATED_STMT (stmt_info); | |
1819 | return stmt_info; | |
1820 | } | |
1821 | ||
1822 | /* Return true if BB is a loop header. */ | |
1823 | ||
1824 | inline bool | |
1825 | is_loop_header_bb_p (basic_block bb) | |
1826 | { | |
1827 | if (bb == (bb->loop_father)->header) | |
1828 | return true; | |
1829 | ||
1830 | return false; | |
1831 | } | |
1832 | ||
1833 | /* Return pow2 (X). */ | |
1834 | ||
1835 | inline int | |
1836 | vect_pow2 (int x) | |
1837 | { | |
1838 | int i, res = 1; | |
1839 | ||
1840 | for (i = 0; i < x; i++) | |
1841 | res *= 2; | |
1842 | ||
1843 | return res; | |
1844 | } | |
1845 | ||
1846 | /* Alias targetm.vectorize.builtin_vectorization_cost. */ | |
1847 | ||
1848 | inline int | |
1849 | builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, | |
1850 | tree vectype, int misalign) | |
1851 | { | |
1852 | return targetm.vectorize.builtin_vectorization_cost (type_of_cost, | |
1853 | vectype, misalign); | |
1854 | } | |
1855 | ||
1856 | /* Get cost by calling cost target builtin. */ | |
1857 | ||
1858 | inline | |
1859 | int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost) | |
1860 | { | |
1861 | return builtin_vectorization_cost (type_of_cost, NULL, 0); | |
1862 | } | |
1863 | ||
1864 | /* Alias targetm.vectorize.init_cost. */ | |
1865 | ||
1866 | inline vector_costs * | |
1867 | init_cost (vec_info *vinfo, bool costing_for_scalar) | |
1868 | { | |
1869 | return targetm.vectorize.create_costs (vinfo, costing_for_scalar); | |
1870 | } | |
1871 | ||
1872 | extern void dump_stmt_cost (FILE *, int, enum vect_cost_for_stmt, | |
1873 | stmt_vec_info, slp_tree, tree, int, unsigned, | |
1874 | enum vect_cost_model_location); | |
1875 | ||
1876 | /* Alias targetm.vectorize.add_stmt_cost. */ | |
1877 | ||
1878 | inline unsigned | |
1879 | add_stmt_cost (vector_costs *costs, int count, | |
1880 | enum vect_cost_for_stmt kind, | |
1881 | stmt_vec_info stmt_info, slp_tree node, | |
1882 | tree vectype, int misalign, | |
1883 | enum vect_cost_model_location where) | |
1884 | { | |
1885 | unsigned cost = costs->add_stmt_cost (count, kind, stmt_info, node, vectype, | |
1886 | misalign, where); | |
1887 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1888 | dump_stmt_cost (dump_file, count, kind, stmt_info, node, vectype, misalign, | |
1889 | cost, where); | |
1890 | return cost; | |
1891 | } | |
1892 | ||
1893 | inline unsigned | |
1894 | add_stmt_cost (vector_costs *costs, int count, enum vect_cost_for_stmt kind, | |
1895 | enum vect_cost_model_location where) | |
1896 | { | |
1897 | gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken | |
1898 | || kind == scalar_stmt); | |
1899 | return add_stmt_cost (costs, count, kind, NULL, NULL, NULL_TREE, 0, where); | |
1900 | } | |
1901 | ||
1902 | /* Alias targetm.vectorize.add_stmt_cost. */ | |
1903 | ||
1904 | inline unsigned | |
1905 | add_stmt_cost (vector_costs *costs, stmt_info_for_cost *i) | |
1906 | { | |
1907 | return add_stmt_cost (costs, i->count, i->kind, i->stmt_info, i->node, | |
1908 | i->vectype, i->misalign, i->where); | |
1909 | } | |
1910 | ||
1911 | /* Alias targetm.vectorize.finish_cost. */ | |
1912 | ||
1913 | inline void | |
1914 | finish_cost (vector_costs *costs, const vector_costs *scalar_costs, | |
1915 | unsigned *prologue_cost, unsigned *body_cost, | |
1916 | unsigned *epilogue_cost, unsigned *suggested_unroll_factor = NULL) | |
1917 | { | |
1918 | costs->finish_cost (scalar_costs); | |
1919 | *prologue_cost = costs->prologue_cost (); | |
1920 | *body_cost = costs->body_cost (); | |
1921 | *epilogue_cost = costs->epilogue_cost (); | |
1922 | if (suggested_unroll_factor) | |
1923 | *suggested_unroll_factor = costs->suggested_unroll_factor (); | |
1924 | } | |
1925 | ||
1926 | inline void | |
1927 | add_stmt_costs (vector_costs *costs, stmt_vector_for_cost *cost_vec) | |
1928 | { | |
1929 | stmt_info_for_cost *cost; | |
1930 | unsigned i; | |
1931 | FOR_EACH_VEC_ELT (*cost_vec, i, cost) | |
1932 | add_stmt_cost (costs, cost->count, cost->kind, cost->stmt_info, | |
1933 | cost->node, cost->vectype, cost->misalign, cost->where); | |
1934 | } | |
1935 | ||
1936 | /*-----------------------------------------------------------------*/ | |
1937 | /* Info on data references alignment. */ | |
1938 | /*-----------------------------------------------------------------*/ | |
1939 | #define DR_MISALIGNMENT_UNKNOWN (-1) | |
1940 | #define DR_MISALIGNMENT_UNINITIALIZED (-2) | |
1941 | ||
1942 | inline void | |
1943 | set_dr_misalignment (dr_vec_info *dr_info, int val) | |
1944 | { | |
1945 | dr_info->misalignment = val; | |
1946 | } | |
1947 | ||
1948 | extern int dr_misalignment (dr_vec_info *dr_info, tree vectype, | |
1949 | poly_int64 offset = 0); | |
1950 | ||
1951 | #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL) | |
1952 | ||
1953 | /* Only defined once DR_MISALIGNMENT is defined. */ | |
1954 | inline const poly_uint64 | |
1955 | dr_target_alignment (dr_vec_info *dr_info) | |
1956 | { | |
1957 | if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)) | |
1958 | dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt)); | |
1959 | return dr_info->target_alignment; | |
1960 | } | |
1961 | #define DR_TARGET_ALIGNMENT(DR) dr_target_alignment (DR) | |
1962 | ||
1963 | inline void | |
1964 | set_dr_target_alignment (dr_vec_info *dr_info, poly_uint64 val) | |
1965 | { | |
1966 | dr_info->target_alignment = val; | |
1967 | } | |
1968 | #define SET_DR_TARGET_ALIGNMENT(DR, VAL) set_dr_target_alignment (DR, VAL) | |
1969 | ||
1970 | /* Return true if data access DR_INFO is aligned to the targets | |
1971 | preferred alignment for VECTYPE (which may be less than a full vector). */ | |
1972 | ||
1973 | inline bool | |
1974 | aligned_access_p (dr_vec_info *dr_info, tree vectype) | |
1975 | { | |
1976 | return (dr_misalignment (dr_info, vectype) == 0); | |
1977 | } | |
1978 | ||
1979 | /* Return TRUE if the (mis-)alignment of the data access is known with | |
1980 | respect to the targets preferred alignment for VECTYPE, and FALSE | |
1981 | otherwise. */ | |
1982 | ||
1983 | inline bool | |
1984 | known_alignment_for_access_p (dr_vec_info *dr_info, tree vectype) | |
1985 | { | |
1986 | return (dr_misalignment (dr_info, vectype) != DR_MISALIGNMENT_UNKNOWN); | |
1987 | } | |
1988 | ||
1989 | /* Return the minimum alignment in bytes that the vectorized version | |
1990 | of DR_INFO is guaranteed to have. */ | |
1991 | ||
1992 | inline unsigned int | |
1993 | vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype) | |
1994 | { | |
1995 | int misalignment = dr_misalignment (dr_info, vectype); | |
1996 | if (misalignment == DR_MISALIGNMENT_UNKNOWN) | |
1997 | return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr))); | |
1998 | else if (misalignment == 0) | |
1999 | return known_alignment (DR_TARGET_ALIGNMENT (dr_info)); | |
2000 | return misalignment & -misalignment; | |
2001 | } | |
2002 | ||
2003 | /* Return the behavior of DR_INFO with respect to the vectorization context | |
2004 | (which for outer loop vectorization might not be the behavior recorded | |
2005 | in DR_INFO itself). */ | |
2006 | ||
2007 | inline innermost_loop_behavior * | |
2008 | vect_dr_behavior (vec_info *vinfo, dr_vec_info *dr_info) | |
2009 | { | |
2010 | stmt_vec_info stmt_info = dr_info->stmt; | |
2011 | loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo); | |
2012 | if (loop_vinfo == NULL | |
2013 | || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)) | |
2014 | return &DR_INNERMOST (dr_info->dr); | |
2015 | else | |
2016 | return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info); | |
2017 | } | |
2018 | ||
2019 | /* Return the offset calculated by adding the offset of this DR_INFO to the | |
2020 | corresponding data_reference's offset. If CHECK_OUTER then use | |
2021 | vect_dr_behavior to select the appropriate data_reference to use. */ | |
2022 | ||
2023 | inline tree | |
2024 | get_dr_vinfo_offset (vec_info *vinfo, | |
2025 | dr_vec_info *dr_info, bool check_outer = false) | |
2026 | { | |
2027 | innermost_loop_behavior *base; | |
2028 | if (check_outer) | |
2029 | base = vect_dr_behavior (vinfo, dr_info); | |
2030 | else | |
2031 | base = &dr_info->dr->innermost; | |
2032 | ||
2033 | tree offset = base->offset; | |
2034 | ||
2035 | if (!dr_info->offset) | |
2036 | return offset; | |
2037 | ||
2038 | offset = fold_convert (sizetype, offset); | |
2039 | return fold_build2 (PLUS_EXPR, TREE_TYPE (dr_info->offset), offset, | |
2040 | dr_info->offset); | |
2041 | } | |
2042 | ||
2043 | ||
2044 | /* Return the vect cost model for LOOP. */ | |
2045 | inline enum vect_cost_model | |
2046 | loop_cost_model (loop_p loop) | |
2047 | { | |
2048 | if (loop != NULL | |
2049 | && loop->force_vectorize | |
2050 | && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) | |
2051 | return flag_simd_cost_model; | |
2052 | return flag_vect_cost_model; | |
2053 | } | |
2054 | ||
2055 | /* Return true if the vect cost model is unlimited. */ | |
2056 | inline bool | |
2057 | unlimited_cost_model (loop_p loop) | |
2058 | { | |
2059 | return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED; | |
2060 | } | |
2061 | ||
2062 | /* Return true if the loop described by LOOP_VINFO is fully-masked and | |
2063 | if the first iteration should use a partial mask in order to achieve | |
2064 | alignment. */ | |
2065 | ||
2066 | inline bool | |
2067 | vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo) | |
2068 | { | |
2069 | return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) | |
2070 | && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)); | |
2071 | } | |
2072 | ||
2073 | /* Return the number of vectors of type VECTYPE that are needed to get | |
2074 | NUNITS elements. NUNITS should be based on the vectorization factor, | |
2075 | so it is always a known multiple of the number of elements in VECTYPE. */ | |
2076 | ||
2077 | inline unsigned int | |
2078 | vect_get_num_vectors (poly_uint64 nunits, tree vectype) | |
2079 | { | |
2080 | return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant (); | |
2081 | } | |
2082 | ||
2083 | /* Return the number of copies needed for loop vectorization when | |
2084 | a statement operates on vectors of type VECTYPE. This is the | |
2085 | vectorization factor divided by the number of elements in | |
2086 | VECTYPE and is always known at compile time. */ | |
2087 | ||
2088 | inline unsigned int | |
2089 | vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype) | |
2090 | { | |
2091 | return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype); | |
2092 | } | |
2093 | ||
2094 | /* Update maximum unit count *MAX_NUNITS so that it accounts for | |
2095 | NUNITS. *MAX_NUNITS can be 1 if we haven't yet recorded anything. */ | |
2096 | ||
2097 | inline void | |
2098 | vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits) | |
2099 | { | |
2100 | /* All unit counts have the form vec_info::vector_size * X for some | |
2101 | rational X, so two unit sizes must have a common multiple. | |
2102 | Everything is a multiple of the initial value of 1. */ | |
2103 | *max_nunits = force_common_multiple (*max_nunits, nunits); | |
2104 | } | |
2105 | ||
2106 | /* Update maximum unit count *MAX_NUNITS so that it accounts for | |
2107 | the number of units in vector type VECTYPE. *MAX_NUNITS can be 1 | |
2108 | if we haven't yet recorded any vector types. */ | |
2109 | ||
2110 | inline void | |
2111 | vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype) | |
2112 | { | |
2113 | vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype)); | |
2114 | } | |
2115 | ||
2116 | /* Return the vectorization factor that should be used for costing | |
2117 | purposes while vectorizing the loop described by LOOP_VINFO. | |
2118 | Pick a reasonable estimate if the vectorization factor isn't | |
2119 | known at compile time. */ | |
2120 | ||
2121 | inline unsigned int | |
2122 | vect_vf_for_cost (loop_vec_info loop_vinfo) | |
2123 | { | |
2124 | return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); | |
2125 | } | |
2126 | ||
2127 | /* Estimate the number of elements in VEC_TYPE for costing purposes. | |
2128 | Pick a reasonable estimate if the exact number isn't known at | |
2129 | compile time. */ | |
2130 | ||
2131 | inline unsigned int | |
2132 | vect_nunits_for_cost (tree vec_type) | |
2133 | { | |
2134 | return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type)); | |
2135 | } | |
2136 | ||
2137 | /* Return the maximum possible vectorization factor for LOOP_VINFO. */ | |
2138 | ||
2139 | inline unsigned HOST_WIDE_INT | |
2140 | vect_max_vf (loop_vec_info loop_vinfo) | |
2141 | { | |
2142 | unsigned HOST_WIDE_INT vf; | |
2143 | if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf)) | |
2144 | return vf; | |
2145 | return MAX_VECTORIZATION_FACTOR; | |
2146 | } | |
2147 | ||
2148 | /* Return the size of the value accessed by unvectorized data reference | |
2149 | DR_INFO. This is only valid once STMT_VINFO_VECTYPE has been calculated | |
2150 | for the associated gimple statement, since that guarantees that DR_INFO | |
2151 | accesses either a scalar or a scalar equivalent. ("Scalar equivalent" | |
2152 | here includes things like V1SI, which can be vectorized in the same way | |
2153 | as a plain SI.) */ | |
2154 | ||
2155 | inline unsigned int | |
2156 | vect_get_scalar_dr_size (dr_vec_info *dr_info) | |
2157 | { | |
2158 | return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr)))); | |
2159 | } | |
2160 | ||
2161 | /* Return true if LOOP_VINFO requires a runtime check for whether the | |
2162 | vector loop is profitable. */ | |
2163 | ||
2164 | inline bool | |
2165 | vect_apply_runtime_profitability_check_p (loop_vec_info loop_vinfo) | |
2166 | { | |
2167 | unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); | |
2168 | return (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
2169 | && th >= vect_vf_for_cost (loop_vinfo)); | |
2170 | } | |
2171 | ||
2172 | /* Return true if CODE is a lane-reducing opcode. */ | |
2173 | ||
2174 | inline bool | |
2175 | lane_reducing_op_p (code_helper code) | |
2176 | { | |
2177 | return code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR; | |
2178 | } | |
2179 | ||
2180 | /* Return true if STMT is a lane-reducing statement. */ | |
2181 | ||
2182 | inline bool | |
2183 | lane_reducing_stmt_p (gimple *stmt) | |
2184 | { | |
2185 | if (auto *assign = dyn_cast <gassign *> (stmt)) | |
2186 | return lane_reducing_op_p (gimple_assign_rhs_code (assign)); | |
2187 | return false; | |
2188 | } | |
2189 | ||
2190 | /* Source location + hotness information. */ | |
2191 | extern dump_user_location_t vect_location; | |
2192 | ||
2193 | /* A macro for calling: | |
2194 | dump_begin_scope (MSG, vect_location); | |
2195 | via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc, | |
2196 | and then calling | |
2197 | dump_end_scope (); | |
2198 | once the object goes out of scope, thus capturing the nesting of | |
2199 | the scopes. | |
2200 | ||
2201 | These scopes affect dump messages within them: dump messages at the | |
2202 | top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those | |
2203 | in a nested scope implicitly default to MSG_PRIORITY_INTERNALS. */ | |
2204 | ||
2205 | #define DUMP_VECT_SCOPE(MSG) \ | |
2206 | AUTO_DUMP_SCOPE (MSG, vect_location) | |
2207 | ||
2208 | /* A sentinel class for ensuring that the "vect_location" global gets | |
2209 | reset at the end of a scope. | |
2210 | ||
2211 | The "vect_location" global is used during dumping and contains a | |
2212 | location_t, which could contain references to a tree block via the | |
2213 | ad-hoc data. This data is used for tracking inlining information, | |
2214 | but it's not a GC root; it's simply assumed that such locations never | |
2215 | get accessed if the blocks are optimized away. | |
2216 | ||
2217 | Hence we need to ensure that such locations are purged at the end | |
2218 | of any operations using them (e.g. via this class). */ | |
2219 | ||
2220 | class auto_purge_vect_location | |
2221 | { | |
2222 | public: | |
2223 | ~auto_purge_vect_location (); | |
2224 | }; | |
2225 | ||
2226 | /*-----------------------------------------------------------------*/ | |
2227 | /* Function prototypes. */ | |
2228 | /*-----------------------------------------------------------------*/ | |
2229 | ||
2230 | /* Simple loop peeling and versioning utilities for vectorizer's purposes - | |
2231 | in tree-vect-loop-manip.cc. */ | |
2232 | extern void vect_set_loop_condition (class loop *, edge, loop_vec_info, | |
2233 | tree, tree, tree, bool); | |
2234 | extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge, | |
2235 | const_edge); | |
2236 | class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, edge, | |
2237 | class loop *, edge, | |
2238 | edge, edge *, bool = true, | |
2239 | vec<basic_block> * = NULL); | |
2240 | class loop *vect_loop_versioning (loop_vec_info, gimple *); | |
2241 | extern class loop *vect_do_peeling (loop_vec_info, tree, tree, | |
2242 | tree *, tree *, tree *, int, bool, bool, | |
2243 | tree *); | |
2244 | extern tree vect_get_main_loop_result (loop_vec_info, tree, tree); | |
2245 | extern void vect_prepare_for_masked_peels (loop_vec_info); | |
2246 | extern dump_user_location_t find_loop_location (class loop *); | |
2247 | extern bool vect_can_advance_ivs_p (loop_vec_info); | |
2248 | extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code); | |
2249 | extern edge vec_init_loop_exit_info (class loop *); | |
2250 | extern void vect_iv_increment_position (edge, gimple_stmt_iterator *, bool *); | |
2251 | ||
2252 | /* In tree-vect-stmts.cc. */ | |
2253 | extern tree get_related_vectype_for_scalar_type (machine_mode, tree, | |
2254 | poly_uint64 = 0); | |
2255 | extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0); | |
2256 | extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree); | |
2257 | extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0); | |
2258 | extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree); | |
2259 | extern tree get_same_sized_vectype (tree, tree); | |
2260 | extern bool vect_chooses_same_modes_p (vec_info *, machine_mode); | |
2261 | extern bool vect_get_loop_mask_type (loop_vec_info); | |
2262 | extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, | |
2263 | stmt_vec_info * = NULL, gimple ** = NULL); | |
2264 | extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, | |
2265 | tree *, stmt_vec_info * = NULL, | |
2266 | gimple ** = NULL); | |
2267 | extern bool vect_is_simple_use (vec_info *, stmt_vec_info, slp_tree, | |
2268 | unsigned, tree *, slp_tree *, | |
2269 | enum vect_def_type *, | |
2270 | tree *, stmt_vec_info * = NULL); | |
2271 | extern bool vect_maybe_update_slp_op_vectype (slp_tree, tree); | |
2272 | extern tree perm_mask_for_reverse (tree); | |
2273 | extern bool supportable_widening_operation (vec_info*, code_helper, | |
2274 | stmt_vec_info, tree, tree, | |
2275 | code_helper*, code_helper*, | |
2276 | int*, vec<tree> *); | |
2277 | extern bool supportable_narrowing_operation (code_helper, tree, tree, | |
2278 | code_helper *, int *, | |
2279 | vec<tree> *); | |
2280 | extern bool supportable_indirect_convert_operation (code_helper, | |
2281 | tree, tree, | |
2282 | vec<std::pair<tree, tree_code> > *, | |
2283 | tree = NULL_TREE); | |
2284 | ||
2285 | extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, | |
2286 | enum vect_cost_for_stmt, stmt_vec_info, | |
2287 | tree, int, enum vect_cost_model_location); | |
2288 | extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, | |
2289 | enum vect_cost_for_stmt, slp_tree, | |
2290 | tree, int, enum vect_cost_model_location); | |
2291 | extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, | |
2292 | enum vect_cost_for_stmt, | |
2293 | enum vect_cost_model_location); | |
2294 | ||
2295 | /* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO. */ | |
2296 | ||
2297 | inline unsigned | |
2298 | record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, | |
2299 | enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, | |
2300 | int misalign, enum vect_cost_model_location where) | |
2301 | { | |
2302 | return record_stmt_cost (body_cost_vec, count, kind, stmt_info, | |
2303 | STMT_VINFO_VECTYPE (stmt_info), misalign, where); | |
2304 | } | |
2305 | ||
2306 | extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *); | |
2307 | extern void vect_finish_stmt_generation (vec_info *, stmt_vec_info, gimple *, | |
2308 | gimple_stmt_iterator *); | |
2309 | extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *); | |
2310 | extern tree vect_get_store_rhs (stmt_vec_info); | |
2311 | void vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info, unsigned, | |
2312 | tree op, vec<tree> *, tree = NULL); | |
2313 | void vect_get_vec_defs (vec_info *, stmt_vec_info, slp_tree, unsigned, | |
2314 | tree, vec<tree> *, | |
2315 | tree = NULL, vec<tree> * = NULL, | |
2316 | tree = NULL, vec<tree> * = NULL, | |
2317 | tree = NULL, vec<tree> * = NULL); | |
2318 | void vect_get_vec_defs (vec_info *, stmt_vec_info, slp_tree, unsigned, | |
2319 | tree, tree, vec<tree> *, | |
2320 | tree = NULL, tree = NULL, vec<tree> * = NULL, | |
2321 | tree = NULL, tree = NULL, vec<tree> * = NULL, | |
2322 | tree = NULL, tree = NULL, vec<tree> * = NULL); | |
2323 | extern tree vect_init_vector (vec_info *, stmt_vec_info, tree, tree, | |
2324 | gimple_stmt_iterator *); | |
2325 | extern tree vect_get_slp_vect_def (slp_tree, unsigned); | |
2326 | extern bool vect_transform_stmt (vec_info *, stmt_vec_info, | |
2327 | gimple_stmt_iterator *, | |
2328 | slp_tree, slp_instance); | |
2329 | extern void vect_remove_stores (vec_info *, stmt_vec_info); | |
2330 | extern bool vect_nop_conversion_p (stmt_vec_info); | |
2331 | extern opt_result vect_analyze_stmt (vec_info *, stmt_vec_info, bool *, | |
2332 | slp_tree, | |
2333 | slp_instance, stmt_vector_for_cost *); | |
2334 | extern void vect_get_load_cost (vec_info *, stmt_vec_info, int, | |
2335 | dr_alignment_support, int, bool, | |
2336 | unsigned int *, unsigned int *, | |
2337 | stmt_vector_for_cost *, | |
2338 | stmt_vector_for_cost *, bool); | |
2339 | extern void vect_get_store_cost (vec_info *, stmt_vec_info, int, | |
2340 | dr_alignment_support, int, | |
2341 | unsigned int *, stmt_vector_for_cost *); | |
2342 | extern bool vect_supportable_shift (vec_info *, enum tree_code, tree); | |
2343 | extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &); | |
2344 | extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &); | |
2345 | extern void optimize_mask_stores (class loop*); | |
2346 | extern tree vect_gen_while (gimple_seq *, tree, tree, tree, | |
2347 | const char * = nullptr); | |
2348 | extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); | |
2349 | extern opt_result vect_get_vector_types_for_stmt (vec_info *, | |
2350 | stmt_vec_info, tree *, | |
2351 | tree *, unsigned int = 0); | |
2352 | extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0); | |
2353 | ||
2354 | /* In tree-if-conv.cc. */ | |
2355 | extern bool ref_within_array_bound (gimple *, tree); | |
2356 | ||
2357 | /* In tree-vect-data-refs.cc. */ | |
2358 | extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64); | |
2359 | extern enum dr_alignment_support vect_supportable_dr_alignment | |
2360 | (vec_info *, dr_vec_info *, tree, int); | |
2361 | extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree); | |
2362 | extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *); | |
2363 | extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance); | |
2364 | extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); | |
2365 | extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); | |
2366 | extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance); | |
2367 | extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *); | |
2368 | extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); | |
2369 | extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, | |
2370 | tree, int, internal_fn *, tree *); | |
2371 | extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, | |
2372 | gather_scatter_info *); | |
2373 | extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, | |
2374 | vec<data_reference_p> *, | |
2375 | vec<int> *, int); | |
2376 | extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *); | |
2377 | extern void vect_record_base_alignments (vec_info *); | |
2378 | extern tree vect_create_data_ref_ptr (vec_info *, | |
2379 | stmt_vec_info, tree, class loop *, tree, | |
2380 | tree *, gimple_stmt_iterator *, | |
2381 | gimple **, bool, | |
2382 | tree = NULL_TREE); | |
2383 | extern tree bump_vector_ptr (vec_info *, tree, gimple *, gimple_stmt_iterator *, | |
2384 | stmt_vec_info, tree); | |
2385 | extern void vect_copy_ref_info (tree, tree); | |
2386 | extern tree vect_create_destination_var (tree, tree); | |
2387 | extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); | |
2388 | extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); | |
2389 | extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); | |
2390 | extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); | |
2391 | extern void vect_permute_store_chain (vec_info *, vec<tree> &, | |
2392 | unsigned int, stmt_vec_info, | |
2393 | gimple_stmt_iterator *, vec<tree> *); | |
2394 | extern tree vect_setup_realignment (vec_info *, | |
2395 | stmt_vec_info, gimple_stmt_iterator *, | |
2396 | tree *, enum dr_alignment_support, tree, | |
2397 | class loop **); | |
2398 | extern void vect_transform_grouped_load (vec_info *, stmt_vec_info, vec<tree>, | |
2399 | int, gimple_stmt_iterator *); | |
2400 | extern void vect_record_grouped_load_vectors (vec_info *, | |
2401 | stmt_vec_info, vec<tree>); | |
2402 | extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); | |
2403 | extern tree vect_get_new_ssa_name (tree, enum vect_var_kind, | |
2404 | const char * = NULL); | |
2405 | extern tree vect_create_addr_base_for_vector_ref (vec_info *, | |
2406 | stmt_vec_info, gimple_seq *, | |
2407 | tree); | |
2408 | ||
2409 | /* In tree-vect-loop.cc. */ | |
2410 | extern tree neutral_op_for_reduction (tree, code_helper, tree, bool = true); | |
2411 | extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo); | |
2412 | bool vect_rgroup_iv_might_wrap_p (loop_vec_info, rgroup_controls *); | |
2413 | /* Used in tree-vect-loop-manip.cc */ | |
2414 | extern opt_result vect_determine_partial_vectors_and_peeling (loop_vec_info); | |
2415 | /* Used in gimple-loop-interchange.c and tree-parloops.cc. */ | |
2416 | extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, | |
2417 | enum tree_code); | |
2418 | extern bool needs_fold_left_reduction_p (tree, code_helper); | |
2419 | /* Drive for loop analysis stage. */ | |
2420 | extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *); | |
2421 | extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); | |
2422 | extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, | |
2423 | tree *, bool); | |
2424 | extern tree vect_halve_mask_nunits (tree, machine_mode); | |
2425 | extern tree vect_double_mask_nunits (tree, machine_mode); | |
2426 | extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *, | |
2427 | unsigned int, tree, tree); | |
2428 | extern tree vect_get_loop_mask (loop_vec_info, gimple_stmt_iterator *, | |
2429 | vec_loop_masks *, | |
2430 | unsigned int, tree, unsigned int); | |
2431 | extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int, | |
2432 | tree, unsigned int); | |
2433 | extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *, | |
2434 | vec_loop_lens *, unsigned int, tree, | |
2435 | unsigned int, unsigned int); | |
2436 | extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *, | |
2437 | gimple_stmt_iterator *, vec_loop_lens *, | |
2438 | unsigned int, tree, tree, unsigned int, | |
2439 | unsigned int); | |
2440 | extern gimple_seq vect_gen_len (tree, tree, tree, tree); | |
2441 | extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info); | |
2442 | extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *); | |
2443 | ||
2444 | /* Drive for loop transformation stage. */ | |
2445 | extern class loop *vect_transform_loop (loop_vec_info, gimple *); | |
2446 | struct vect_loop_form_info | |
2447 | { | |
2448 | tree number_of_iterations; | |
2449 | tree number_of_iterationsm1; | |
2450 | tree assumptions; | |
2451 | auto_vec<gcond *> conds; | |
2452 | gcond *inner_loop_cond; | |
2453 | edge loop_exit; | |
2454 | }; | |
2455 | extern opt_result vect_analyze_loop_form (class loop *, vect_loop_form_info *); | |
2456 | extern loop_vec_info vect_create_loop_vinfo (class loop *, vec_info_shared *, | |
2457 | const vect_loop_form_info *, | |
2458 | loop_vec_info = nullptr); | |
2459 | extern bool vectorizable_live_operation (vec_info *, stmt_vec_info, | |
2460 | slp_tree, slp_instance, int, | |
2461 | bool, stmt_vector_for_cost *); | |
2462 | extern bool vectorizable_reduction (loop_vec_info, stmt_vec_info, | |
2463 | slp_tree, slp_instance, | |
2464 | stmt_vector_for_cost *); | |
2465 | extern bool vectorizable_induction (loop_vec_info, stmt_vec_info, | |
2466 | gimple **, slp_tree, | |
2467 | stmt_vector_for_cost *); | |
2468 | extern bool vect_transform_reduction (loop_vec_info, stmt_vec_info, | |
2469 | gimple_stmt_iterator *, | |
2470 | gimple **, slp_tree); | |
2471 | extern bool vect_transform_cycle_phi (loop_vec_info, stmt_vec_info, | |
2472 | gimple **, | |
2473 | slp_tree, slp_instance); | |
2474 | extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info, | |
2475 | gimple **, slp_tree); | |
2476 | extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree, | |
2477 | stmt_vector_for_cost *); | |
2478 | extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info, | |
2479 | gimple **, slp_tree, stmt_vector_for_cost *); | |
2480 | extern bool vect_emulated_vector_p (tree); | |
2481 | extern bool vect_can_vectorize_without_simd_p (tree_code); | |
2482 | extern bool vect_can_vectorize_without_simd_p (code_helper); | |
2483 | extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, | |
2484 | stmt_vector_for_cost *, | |
2485 | stmt_vector_for_cost *, | |
2486 | stmt_vector_for_cost *); | |
2487 | extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree); | |
2488 | ||
2489 | /* Nonlinear induction. */ | |
2490 | extern tree vect_peel_nonlinear_iv_init (gimple_seq*, tree, tree, | |
2491 | tree, enum vect_induction_op_type); | |
2492 | ||
2493 | /* In tree-vect-slp.cc. */ | |
2494 | extern void vect_slp_init (void); | |
2495 | extern void vect_slp_fini (void); | |
2496 | extern void vect_free_slp_instance (slp_instance); | |
2497 | extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec<tree> &, | |
2498 | gimple_stmt_iterator *, poly_uint64, | |
2499 | bool, unsigned *, | |
2500 | unsigned * = nullptr, bool = false); | |
2501 | extern bool vect_slp_analyze_operations (vec_info *); | |
2502 | extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &); | |
2503 | extern opt_result vect_analyze_slp (vec_info *, unsigned); | |
2504 | extern bool vect_make_slp_decision (loop_vec_info); | |
2505 | extern void vect_detect_hybrid_slp (loop_vec_info); | |
2506 | extern void vect_optimize_slp (vec_info *); | |
2507 | extern void vect_gather_slp_loads (vec_info *); | |
2508 | extern void vect_get_slp_defs (slp_tree, vec<tree> *); | |
2509 | extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *, | |
2510 | unsigned n = -1U); | |
2511 | extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop); | |
2512 | extern bool vect_slp_function (function *); | |
2513 | extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); | |
2514 | extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree); | |
2515 | extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); | |
2516 | extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree, | |
2517 | unsigned int * = NULL, | |
2518 | tree * = NULL, tree * = NULL); | |
2519 | extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree, | |
2520 | const vec<tree> &, unsigned int, vec<tree> &); | |
2521 | extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info); | |
2522 | extern slp_tree vect_create_new_slp_node (unsigned, tree_code); | |
2523 | extern void vect_free_slp_tree (slp_tree); | |
2524 | extern bool compatible_calls_p (gcall *, gcall *); | |
2525 | extern int vect_slp_child_index_for_operand (const gimple *, int op, bool); | |
2526 | ||
2527 | extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree, | |
2528 | gimple_stmt_iterator *); | |
2529 | ||
2530 | /* In tree-vect-patterns.cc. */ | |
2531 | extern void | |
2532 | vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree); | |
2533 | extern bool vect_get_range_info (tree, wide_int*, wide_int*); | |
2534 | ||
2535 | /* Pattern recognition functions. | |
2536 | Additional pattern recognition functions can (and will) be added | |
2537 | in the future. */ | |
2538 | void vect_pattern_recog (vec_info *); | |
2539 | ||
2540 | /* In tree-vectorizer.cc. */ | |
2541 | unsigned vectorize_loops (void); | |
2542 | void vect_free_loop_info_assumptions (class loop *); | |
2543 | gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL); | |
2544 | bool vect_stmt_dominates_stmt_p (gimple *, gimple *); | |
2545 | ||
2546 | /* SLP Pattern matcher types, tree-vect-slp-patterns.cc. */ | |
2547 | ||
2548 | /* Forward declaration of possible two operands operation that can be matched | |
2549 | by the complex numbers pattern matchers. */ | |
2550 | enum _complex_operation : unsigned; | |
2551 | ||
2552 | /* All possible load permute values that could result from the partial data-flow | |
2553 | analysis. */ | |
2554 | typedef enum _complex_perm_kinds { | |
2555 | PERM_UNKNOWN, | |
2556 | PERM_EVENODD, | |
2557 | PERM_ODDEVEN, | |
2558 | PERM_ODDODD, | |
2559 | PERM_EVENEVEN, | |
2560 | /* Can be combined with any other PERM values. */ | |
2561 | PERM_TOP | |
2562 | } complex_perm_kinds_t; | |
2563 | ||
2564 | /* Cache from nodes to the load permutation they represent. */ | |
2565 | typedef hash_map <slp_tree, complex_perm_kinds_t> | |
2566 | slp_tree_to_load_perm_map_t; | |
2567 | ||
2568 | /* Cache from nodes pair to being compatible or not. */ | |
2569 | typedef pair_hash <nofree_ptr_hash <_slp_tree>, | |
2570 | nofree_ptr_hash <_slp_tree>> slp_node_hash; | |
2571 | typedef hash_map <slp_node_hash, bool> slp_compat_nodes_map_t; | |
2572 | ||
2573 | ||
2574 | /* Vector pattern matcher base class. All SLP pattern matchers must inherit | |
2575 | from this type. */ | |
2576 | ||
2577 | class vect_pattern | |
2578 | { | |
2579 | protected: | |
2580 | /* The number of arguments that the IFN requires. */ | |
2581 | unsigned m_num_args; | |
2582 | ||
2583 | /* The internal function that will be used when a pattern is created. */ | |
2584 | internal_fn m_ifn; | |
2585 | ||
2586 | /* The current node being inspected. */ | |
2587 | slp_tree *m_node; | |
2588 | ||
2589 | /* The list of operands to be the children for the node produced when the | |
2590 | internal function is created. */ | |
2591 | vec<slp_tree> m_ops; | |
2592 | ||
2593 | /* Default constructor where NODE is the root of the tree to inspect. */ | |
2594 | vect_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
2595 | { | |
2596 | this->m_ifn = ifn; | |
2597 | this->m_node = node; | |
2598 | this->m_ops.create (0); | |
2599 | if (m_ops) | |
2600 | this->m_ops.safe_splice (*m_ops); | |
2601 | } | |
2602 | ||
2603 | public: | |
2604 | ||
2605 | /* Create a new instance of the pattern matcher class of the given type. */ | |
2606 | static vect_pattern* recognize (slp_tree_to_load_perm_map_t *, | |
2607 | slp_compat_nodes_map_t *, slp_tree *); | |
2608 | ||
2609 | /* Build the pattern from the data collected so far. */ | |
2610 | virtual void build (vec_info *) = 0; | |
2611 | ||
2612 | /* Default destructor. */ | |
2613 | virtual ~vect_pattern () | |
2614 | { | |
2615 | this->m_ops.release (); | |
2616 | } | |
2617 | }; | |
2618 | ||
2619 | /* Function pointer to create a new pattern matcher from a generic type. */ | |
2620 | typedef vect_pattern* (*vect_pattern_decl_t) (slp_tree_to_load_perm_map_t *, | |
2621 | slp_compat_nodes_map_t *, | |
2622 | slp_tree *); | |
2623 | ||
2624 | /* List of supported pattern matchers. */ | |
2625 | extern vect_pattern_decl_t slp_patterns[]; | |
2626 | ||
2627 | /* Number of supported pattern matchers. */ | |
2628 | extern size_t num__slp_patterns; | |
2629 | ||
2630 | /* ---------------------------------------------------------------------- | |
2631 | Target support routines | |
2632 | ----------------------------------------------------------------------- | |
2633 | The following routines are provided to simplify costing decisions in | |
2634 | target code. Please add more as needed. */ | |
2635 | ||
2636 | /* Return true if an operaton of kind KIND for STMT_INFO represents | |
2637 | the extraction of an element from a vector in preparation for | |
2638 | storing the element to memory. */ | |
2639 | inline bool | |
2640 | vect_is_store_elt_extraction (vect_cost_for_stmt kind, stmt_vec_info stmt_info) | |
2641 | { | |
2642 | return (kind == vec_to_scalar | |
2643 | && STMT_VINFO_DATA_REF (stmt_info) | |
2644 | && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))); | |
2645 | } | |
2646 | ||
2647 | /* Return true if STMT_INFO represents part of a reduction. */ | |
2648 | inline bool | |
2649 | vect_is_reduction (stmt_vec_info stmt_info) | |
2650 | { | |
2651 | return STMT_VINFO_REDUC_IDX (stmt_info) >= 0; | |
2652 | } | |
2653 | ||
2654 | /* If STMT_INFO describes a reduction, return the vect_reduction_type | |
2655 | of the reduction it describes, otherwise return -1. */ | |
2656 | inline int | |
2657 | vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info) | |
2658 | { | |
2659 | if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo)) | |
2660 | if (STMT_VINFO_REDUC_DEF (stmt_info)) | |
2661 | { | |
2662 | stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); | |
2663 | return int (STMT_VINFO_REDUC_TYPE (reduc_info)); | |
2664 | } | |
2665 | return -1; | |
2666 | } | |
2667 | ||
2668 | /* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the | |
2669 | scalar type of the values being compared. Return null otherwise. */ | |
2670 | inline tree | |
2671 | vect_embedded_comparison_type (stmt_vec_info stmt_info) | |
2672 | { | |
2673 | if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) | |
2674 | if (gimple_assign_rhs_code (assign) == COND_EXPR) | |
2675 | { | |
2676 | tree cond = gimple_assign_rhs1 (assign); | |
2677 | if (COMPARISON_CLASS_P (cond)) | |
2678 | return TREE_TYPE (TREE_OPERAND (cond, 0)); | |
2679 | } | |
2680 | return NULL_TREE; | |
2681 | } | |
2682 | ||
2683 | /* If STMT_INFO is a comparison or contains an embedded comparison, return the | |
2684 | scalar type of the values being compared. Return null otherwise. */ | |
2685 | inline tree | |
2686 | vect_comparison_type (stmt_vec_info stmt_info) | |
2687 | { | |
2688 | if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) | |
2689 | if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison) | |
2690 | return TREE_TYPE (gimple_assign_rhs1 (assign)); | |
2691 | return vect_embedded_comparison_type (stmt_info); | |
2692 | } | |
2693 | ||
2694 | /* Return true if STMT_INFO extends the result of a load. */ | |
2695 | inline bool | |
2696 | vect_is_extending_load (class vec_info *vinfo, stmt_vec_info stmt_info) | |
2697 | { | |
2698 | /* Although this is quite large for an inline function, this part | |
2699 | at least should be inline. */ | |
2700 | gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); | |
2701 | if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) | |
2702 | return false; | |
2703 | ||
2704 | tree rhs = gimple_assign_rhs1 (stmt_info->stmt); | |
2705 | tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); | |
2706 | tree rhs_type = TREE_TYPE (rhs); | |
2707 | if (!INTEGRAL_TYPE_P (lhs_type) | |
2708 | || !INTEGRAL_TYPE_P (rhs_type) | |
2709 | || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type)) | |
2710 | return false; | |
2711 | ||
2712 | stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); | |
2713 | return (def_stmt_info | |
2714 | && STMT_VINFO_DATA_REF (def_stmt_info) | |
2715 | && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info))); | |
2716 | } | |
2717 | ||
2718 | /* Return true if STMT_INFO is an integer truncation. */ | |
2719 | inline bool | |
2720 | vect_is_integer_truncation (stmt_vec_info stmt_info) | |
2721 | { | |
2722 | gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); | |
2723 | if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) | |
2724 | return false; | |
2725 | ||
2726 | tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); | |
2727 | tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign)); | |
2728 | return (INTEGRAL_TYPE_P (lhs_type) | |
2729 | && INTEGRAL_TYPE_P (rhs_type) | |
2730 | && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type)); | |
2731 | } | |
2732 | ||
2733 | /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code, | |
2734 | or internal_fn contained in ch, respectively. */ | |
2735 | gimple * vect_gimple_build (tree, code_helper, tree, tree = NULL_TREE); | |
2736 | #endif /* GCC_TREE_VECTORIZER_H */ |