]> gcc.gnu.org Git - gcc.git/blame - gcc/tree-vect-slp.c
re PR tree-optimization/42732 ([graphite] ICE: verify_ssa failed)
[gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a
IR
1/* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
3 Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
31#include "diagnostic.h"
32#include "tree-flow.h"
33#include "tree-dump.h"
34#include "cfgloop.h"
35#include "cfglayout.h"
36#include "expr.h"
37#include "recog.h"
38#include "optabs.h"
39#include "tree-vectorizer.h"
40
a70d6342
IR
41/* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
43
44LOC
45find_bb_location (basic_block bb)
46{
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
49
50 if (!bb)
51 return UNKNOWN_LOC;
52
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54 {
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
58 }
59
60 return UNKNOWN_LOC;
61}
62
63
ebfd146a
IR
64/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
65
66static void
67vect_free_slp_tree (slp_tree node)
68{
69 if (!node)
70 return;
71
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
b8698a0f 74
ebfd146a
IR
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
b8698a0f 77
ebfd146a 78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
b8698a0f 79
ebfd146a
IR
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
82
83 free (node);
84}
85
86
87/* Free the memory allocated for the SLP instance. */
88
89void
90vect_free_slp_instance (slp_instance instance)
91{
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
95}
96
97
98/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
101
102static bool
a70d6342 103vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
b8698a0f 104 slp_tree slp_node, gimple stmt,
a70d6342 105 VEC (gimple, heap) **def_stmts0,
ebfd146a
IR
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
b8698a0f 109 tree *first_stmt_def0_type,
ebfd146a
IR
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
114{
115 tree oprnd;
116 unsigned int i, number_of_oprnds;
117 tree def;
118 gimple def_stmt;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
b8698a0f 120 stmt_vec_info stmt_info =
ebfd146a
IR
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
a70d6342 123 struct loop *loop = NULL;
b8698a0f 124
a70d6342
IR
125 if (loop_vinfo)
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
127
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
130
131 for (i = 0; i < number_of_oprnds; i++)
132 {
133 oprnd = gimple_op (stmt, i + 1);
134
b8698a0f 135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 136 &dt[i])
ebfd146a
IR
137 || (!def_stmt && dt[i] != vect_constant_def))
138 {
b8698a0f 139 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
140 {
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
143 }
144
145 return false;
146 }
147
a70d6342
IR
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
ebfd146a 150 pattern. */
a70d6342 151 if (loop && def_stmt && gimple_bb (def_stmt)
ebfd146a
IR
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
155 {
156 if (!*first_stmt_dt0)
157 *pattern0 = true;
158 else
159 {
160 if (i == 1 && !*first_stmt_dt1)
161 *pattern1 = true;
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
163 {
164 if (vect_print_dump_info (REPORT_DETAILS))
165 {
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
169 }
170
171 return false;
172 }
173 }
174
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
177
178 if (*dt == vect_unknown_def_type)
179 {
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
182 return false;
183 }
184
185 switch (gimple_code (def_stmt))
186 {
187 case GIMPLE_PHI:
188 def = gimple_phi_result (def_stmt);
189 break;
190
191 case GIMPLE_ASSIGN:
192 def = gimple_assign_lhs (def_stmt);
193 break;
194
195 default:
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
198 return false;
199 }
200 }
201
202 if (!*first_stmt_dt0)
203 {
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
206 if (def)
207 *first_stmt_def0_type = TREE_TYPE (def);
208 else
209 *first_stmt_const_oprnd = oprnd;
210
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
215 else
216 /* Store. */
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
218 }
b8698a0f 219
ebfd146a
IR
220 else
221 {
222 if (!*first_stmt_dt1 && i == 1)
223 {
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
226 if (def)
227 *first_stmt_def1_type = TREE_TYPE (def);
228 else
229 {
b8698a0f 230 /* We assume that the stmt contains only one constant
ebfd146a
IR
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
233 {
b8698a0f 234 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 235 fprintf (vect_dump, "Build SLP failed: two constant "
b8698a0f 236 "oprnds in stmt");
ebfd146a
IR
237 return false;
238 }
239 *first_stmt_const_oprnd = oprnd;
240 }
241 }
242 else
243 {
b8698a0f 244 /* Not first stmt of the group, check that the def-stmt/s match
ebfd146a 245 the def-stmt/s of the first stmt. */
b8698a0f 246 if ((i == 0
ebfd146a
IR
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
249 && *first_stmt_def0_type != TREE_TYPE (def))))
b8698a0f 250 || (i == 1
ebfd146a
IR
251 && (*first_stmt_dt1 != dt[i]
252 || (*first_stmt_def1_type && def
b8698a0f
L
253 && *first_stmt_def1_type != TREE_TYPE (def))))
254 || (!def
255 && TREE_TYPE (*first_stmt_const_oprnd)
ebfd146a 256 != TREE_TYPE (oprnd)))
b8698a0f
L
257 {
258 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 259 fprintf (vect_dump, "Build SLP failed: different types ");
b8698a0f 260
ebfd146a
IR
261 return false;
262 }
263 }
264 }
265
266 /* Check the types of the definitions. */
267 switch (dt[i])
268 {
269 case vect_constant_def:
8644a673 270 case vect_external_def:
ebfd146a 271 break;
b8698a0f 272
8644a673 273 case vect_internal_def:
ebfd146a
IR
274 if (i == 0)
275 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
276 else
277 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
278 break;
279
280 default:
281 /* FORNOW: Not supported. */
b8698a0f 282 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
283 {
284 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
285 print_generic_expr (vect_dump, def, TDF_SLIM);
286 }
287
288 return false;
289 }
290 }
291
292 return true;
293}
294
295
296/* Recursively build an SLP tree starting from NODE.
b8698a0f
L
297 Fail (and return FALSE) if def-stmts are not isomorphic, require data
298 permutation or are of unsupported types of operation. Otherwise, return
ebfd146a
IR
299 TRUE. */
300
301static bool
b8698a0f 302vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
a70d6342
IR
303 slp_tree *node, unsigned int group_size,
304 int *inside_cost, int *outside_cost,
305 int ncopies_for_cost, unsigned int *max_nunits,
ebfd146a 306 VEC (int, heap) **load_permutation,
a70d6342
IR
307 VEC (slp_tree, heap) **loads,
308 unsigned int vectorization_factor)
ebfd146a
IR
309{
310 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
311 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
312 unsigned int i;
313 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
314 gimple stmt = VEC_index (gimple, stmts, 0);
81f40b79
ILT
315 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
316 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
317 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
ebfd146a
IR
318 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
319 tree lhs;
320 bool stop_recursion = false, need_same_oprnds = false;
321 tree vectype, scalar_type, first_op1 = NULL_TREE;
a70d6342 322 unsigned int ncopies;
ebfd146a
IR
323 optab optab;
324 int icode;
325 enum machine_mode optab_op2_mode;
326 enum machine_mode vec_mode;
327 tree first_stmt_const_oprnd = NULL_TREE;
328 struct data_reference *first_dr;
329 bool pattern0 = false, pattern1 = false;
330 HOST_WIDE_INT dummy;
331 bool permutation = false;
332 unsigned int load_place;
333 gimple first_load;
334
335 /* For every stmt in NODE find its def stmt/s. */
336 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
337 {
b8698a0f 338 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
339 {
340 fprintf (vect_dump, "Build SLP for ");
341 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
342 }
343
344 lhs = gimple_get_lhs (stmt);
345 if (lhs == NULL_TREE)
346 {
b8698a0f 347 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
348 {
349 fprintf (vect_dump,
350 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
351 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
352 }
b8698a0f 353
ebfd146a
IR
354 return false;
355 }
356
b8698a0f 357 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
358 vectype = get_vectype_for_scalar_type (scalar_type);
359 if (!vectype)
360 {
361 if (vect_print_dump_info (REPORT_SLP))
362 {
363 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
364 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
365 }
366 return false;
367 }
b8698a0f 368
ebfd146a 369 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
370 if (ncopies != 1)
371 {
372 if (vect_print_dump_info (REPORT_SLP))
373 fprintf (vect_dump, "SLP with multiple types ");
ebfd146a 374
a70d6342
IR
375 /* FORNOW: multiple types are unsupported in BB SLP. */
376 if (bb_vinfo)
377 return false;
378 }
b8698a0f 379
ebfd146a
IR
380 /* In case of multiple types we need to detect the smallest type. */
381 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
382 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 383
ebfd146a
IR
384 if (is_gimple_call (stmt))
385 rhs_code = CALL_EXPR;
386 else
387 rhs_code = gimple_assign_rhs_code (stmt);
388
389 /* Check the operation. */
390 if (i == 0)
391 {
392 first_stmt_code = rhs_code;
393
b8698a0f 394 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
395 vector shift with scalar shift operand. */
396 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
397 || rhs_code == LROTATE_EXPR
398 || rhs_code == RROTATE_EXPR)
399 {
400 vec_mode = TYPE_MODE (vectype);
401
402 /* First see if we have a vector/vector shift. */
403 optab = optab_for_tree_code (rhs_code, vectype,
404 optab_vector);
405
406 if (!optab
407 || (optab->handlers[(int) vec_mode].insn_code
408 == CODE_FOR_nothing))
409 {
410 /* No vector/vector shift, try for a vector/scalar shift. */
411 optab = optab_for_tree_code (rhs_code, vectype,
412 optab_scalar);
413
414 if (!optab)
415 {
416 if (vect_print_dump_info (REPORT_SLP))
417 fprintf (vect_dump, "Build SLP failed: no optab.");
418 return false;
419 }
420 icode = (int) optab->handlers[(int) vec_mode].insn_code;
421 if (icode == CODE_FOR_nothing)
422 {
423 if (vect_print_dump_info (REPORT_SLP))
424 fprintf (vect_dump, "Build SLP failed: "
425 "op not supported by target.");
426 return false;
427 }
428 optab_op2_mode = insn_data[icode].operand[2].mode;
429 if (!VECTOR_MODE_P (optab_op2_mode))
430 {
431 need_same_oprnds = true;
432 first_op1 = gimple_assign_rhs2 (stmt);
433 }
434 }
435 }
436 }
437 else
438 {
439 if (first_stmt_code != rhs_code
440 && (first_stmt_code != IMAGPART_EXPR
441 || rhs_code != REALPART_EXPR)
442 && (first_stmt_code != REALPART_EXPR
443 || rhs_code != IMAGPART_EXPR))
444 {
b8698a0f 445 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 446 {
b8698a0f 447 fprintf (vect_dump,
ebfd146a
IR
448 "Build SLP failed: different operation in stmt ");
449 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
450 }
b8698a0f 451
ebfd146a
IR
452 return false;
453 }
b8698a0f
L
454
455 if (need_same_oprnds
ebfd146a
IR
456 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
457 {
b8698a0f 458 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 459 {
b8698a0f 460 fprintf (vect_dump,
ebfd146a
IR
461 "Build SLP failed: different shift arguments in ");
462 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
463 }
b8698a0f 464
ebfd146a
IR
465 return false;
466 }
467 }
468
469 /* Strided store or load. */
470 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
471 {
472 if (REFERENCE_CLASS_P (lhs))
473 {
474 /* Store. */
b8698a0f
L
475 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
476 stmt, &def_stmts0, &def_stmts1,
477 &first_stmt_dt0,
478 &first_stmt_dt1,
479 &first_stmt_def0_type,
ebfd146a
IR
480 &first_stmt_def1_type,
481 &first_stmt_const_oprnd,
482 ncopies_for_cost,
483 &pattern0, &pattern1))
484 return false;
485 }
486 else
487 {
488 /* Load. */
489 /* FORNOW: Check that there is no gap between the loads. */
490 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
491 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
492 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
493 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
494 {
495 if (vect_print_dump_info (REPORT_SLP))
496 {
497 fprintf (vect_dump, "Build SLP failed: strided "
498 "loads have gaps ");
499 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
500 }
b8698a0f 501
ebfd146a
IR
502 return false;
503 }
2f0fa28e
IR
504
505 /* Check that the size of interleaved loads group is not
506 greater than the SLP group size. */
507 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
508 > ncopies * group_size)
509 {
510 if (vect_print_dump_info (REPORT_SLP))
511 {
512 fprintf (vect_dump, "Build SLP failed: the number of "
513 "interleaved loads is greater than"
514 " the SLP group size ");
515 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
516 }
517
518 return false;
519 }
b8698a0f 520
ebfd146a 521 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
b8698a0f 522
ebfd146a
IR
523 if (first_load == stmt)
524 {
525 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
526 if (vect_supportable_dr_alignment (first_dr)
527 == dr_unaligned_unsupported)
528 {
529 if (vect_print_dump_info (REPORT_SLP))
530 {
531 fprintf (vect_dump, "Build SLP failed: unsupported "
532 "unaligned load ");
533 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
534 }
b8698a0f 535
ebfd146a
IR
536 return false;
537 }
b8698a0f 538
ebfd146a
IR
539 /* Analyze costs (for the first stmt in the group). */
540 vect_model_load_cost (vinfo_for_stmt (stmt),
541 ncopies_for_cost, *node);
542 }
b8698a0f 543
ebfd146a
IR
544 /* Store the place of this load in the interleaving chain. In
545 case that permutation is needed we later decide if a specific
546 permutation is supported. */
547 load_place = vect_get_place_in_interleaving_chain (stmt,
548 first_load);
549 if (load_place != i)
550 permutation = true;
b8698a0f 551
ebfd146a 552 VEC_safe_push (int, heap, *load_permutation, load_place);
b8698a0f 553
ebfd146a
IR
554 /* We stop the tree when we reach a group of loads. */
555 stop_recursion = true;
556 continue;
557 }
558 } /* Strided access. */
559 else
560 {
561 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
562 {
563 /* Not strided load. */
b8698a0f 564 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
565 {
566 fprintf (vect_dump, "Build SLP failed: not strided load ");
567 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
568 }
569
570 /* FORNOW: Not strided loads are not supported. */
571 return false;
572 }
573
574 /* Not memory operation. */
575 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
576 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
577 {
b8698a0f 578 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
579 {
580 fprintf (vect_dump, "Build SLP failed: operation");
581 fprintf (vect_dump, " unsupported ");
582 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
583 }
584
585 return false;
586 }
587
b8698a0f 588 /* Find the def-stmts. */
a70d6342 589 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
ebfd146a 590 &def_stmts0, &def_stmts1,
b8698a0f
L
591 &first_stmt_dt0, &first_stmt_dt1,
592 &first_stmt_def0_type,
ebfd146a
IR
593 &first_stmt_def1_type,
594 &first_stmt_const_oprnd,
595 ncopies_for_cost,
596 &pattern0, &pattern1))
597 return false;
598 }
599 }
600
601 /* Add the costs of the node to the overall instance costs. */
b8698a0f 602 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
ebfd146a
IR
603 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
604
605 /* Strided loads were reached - stop the recursion. */
606 if (stop_recursion)
607 {
608 if (permutation)
609 {
b8698a0f
L
610 VEC_safe_push (slp_tree, heap, *loads, *node);
611 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
ebfd146a
IR
612 }
613
614 return true;
615 }
616
b8698a0f 617 /* Create SLP_TREE nodes for the definition node/s. */
8644a673 618 if (first_stmt_dt0 == vect_internal_def)
ebfd146a
IR
619 {
620 slp_tree left_node = XNEW (struct _slp_tree);
621 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
622 SLP_TREE_VEC_STMTS (left_node) = NULL;
623 SLP_TREE_LEFT (left_node) = NULL;
624 SLP_TREE_RIGHT (left_node) = NULL;
625 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
626 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
b8698a0f
L
627 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
628 inside_cost, outside_cost, ncopies_for_cost,
a70d6342
IR
629 max_nunits, load_permutation, loads,
630 vectorization_factor))
ebfd146a 631 return false;
b8698a0f 632
ebfd146a
IR
633 SLP_TREE_LEFT (*node) = left_node;
634 }
635
8644a673 636 if (first_stmt_dt1 == vect_internal_def)
ebfd146a
IR
637 {
638 slp_tree right_node = XNEW (struct _slp_tree);
639 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
640 SLP_TREE_VEC_STMTS (right_node) = NULL;
641 SLP_TREE_LEFT (right_node) = NULL;
642 SLP_TREE_RIGHT (right_node) = NULL;
643 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
644 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
a70d6342 645 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
ebfd146a 646 inside_cost, outside_cost, ncopies_for_cost,
a70d6342
IR
647 max_nunits, load_permutation, loads,
648 vectorization_factor))
ebfd146a 649 return false;
b8698a0f 650
ebfd146a
IR
651 SLP_TREE_RIGHT (*node) = right_node;
652 }
653
654 return true;
655}
656
657
658static void
659vect_print_slp_tree (slp_tree node)
660{
661 int i;
662 gimple stmt;
663
664 if (!node)
665 return;
666
667 fprintf (vect_dump, "node ");
668 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
669 {
670 fprintf (vect_dump, "\n\tstmt %d ", i);
b8698a0f 671 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
ebfd146a
IR
672 }
673 fprintf (vect_dump, "\n");
674
675 vect_print_slp_tree (SLP_TREE_LEFT (node));
676 vect_print_slp_tree (SLP_TREE_RIGHT (node));
677}
678
679
b8698a0f
L
680/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
681 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
682 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
683 stmts in NODE are to be marked. */
684
685static void
686vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
687{
688 int i;
689 gimple stmt;
690
691 if (!node)
692 return;
693
694 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
695 if (j < 0 || i == j)
696 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
697
698 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
699 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
700}
701
702
a70d6342
IR
703/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
704
705static void
706vect_mark_slp_stmts_relevant (slp_tree node)
707{
708 int i;
709 gimple stmt;
710 stmt_vec_info stmt_info;
711
712 if (!node)
713 return;
714
715 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
716 {
717 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 718 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
719 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
720 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
721 }
722
723 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
724 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
725}
726
727
b8698a0f 728/* Check if the permutation required by the SLP INSTANCE is supported.
ebfd146a
IR
729 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
730
731static bool
732vect_supported_slp_permutation_p (slp_instance instance)
733{
734 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
735 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
736 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
737 VEC (slp_tree, heap) *sorted_loads = NULL;
738 int index;
739 slp_tree *tmp_loads = NULL;
b8698a0f 740 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
ebfd146a 741 slp_tree load;
b8698a0f
L
742
743 /* FORNOW: The only supported loads permutation is loads from the same
ebfd146a 744 location in all the loads in the node, when the data-refs in
b8698a0f 745 nodes of LOADS constitute an interleaving chain.
ebfd146a
IR
746 Sort the nodes according to the order of accesses in the chain. */
747 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
b8698a0f
L
748 for (i = 0, j = 0;
749 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
750 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
ebfd146a
IR
751 i += group_size, j++)
752 {
753 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
754 /* Check that the loads are all in the same interleaving chain. */
755 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
756 {
757 if (vect_print_dump_info (REPORT_DETAILS))
758 {
759 fprintf (vect_dump, "Build SLP failed: unsupported data "
760 "permutation ");
761 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
762 }
b8698a0f 763
ebfd146a 764 free (tmp_loads);
b8698a0f 765 return false;
ebfd146a
IR
766 }
767
768 tmp_loads[index] = load;
769 }
b8698a0f 770
ebfd146a
IR
771 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
772 for (i = 0; i < group_size; i++)
773 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
774
775 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
776 SLP_INSTANCE_LOADS (instance) = sorted_loads;
777 free (tmp_loads);
778
779 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
780 SLP_INSTANCE_UNROLLING_FACTOR (instance),
781 instance, true))
782 return false;
783
784 return true;
785}
786
787
788/* Check if the required load permutation is supported.
789 LOAD_PERMUTATION contains a list of indices of the loads.
790 In SLP this permutation is relative to the order of strided stores that are
791 the base of the SLP instance. */
792
793static bool
794vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
795 VEC (int, heap) *load_permutation)
796{
797 int i = 0, j, prev = -1, next, k;
798 bool supported;
7417f6c0 799 sbitmap load_index;
ebfd146a 800
a70d6342 801 /* FORNOW: permutations are only supported in SLP. */
ebfd146a
IR
802 if (!slp_instn)
803 return false;
804
805 if (vect_print_dump_info (REPORT_SLP))
806 {
807 fprintf (vect_dump, "Load permutation ");
808 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
809 fprintf (vect_dump, "%d ", next);
810 }
811
b8698a0f
L
812 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
813 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
ebfd146a
IR
814 well. */
815 if (VEC_length (int, load_permutation)
816 != (unsigned int) (group_size * group_size))
817 return false;
818
819 supported = true;
7417f6c0
IR
820 load_index = sbitmap_alloc (group_size);
821 sbitmap_zero (load_index);
ebfd146a
IR
822 for (j = 0; j < group_size; j++)
823 {
824 for (i = j * group_size, k = 0;
825 VEC_iterate (int, load_permutation, i, next) && k < group_size;
826 i++, k++)
827 {
828 if (i != j * group_size && next != prev)
829 {
830 supported = false;
831 break;
832 }
833
834 prev = next;
b8698a0f 835 }
7417f6c0
IR
836
837 if (TEST_BIT (load_index, prev))
838 {
839 supported = false;
840 break;
841 }
842
843 SET_BIT (load_index, prev);
ebfd146a 844 }
7417f6c0
IR
845
846 sbitmap_free (load_index);
ebfd146a
IR
847
848 if (supported && i == group_size * group_size
849 && vect_supported_slp_permutation_p (slp_instn))
850 return true;
851
b8698a0f 852 return false;
ebfd146a
IR
853}
854
855
b8698a0f 856/* Find the first load in the loop that belongs to INSTANCE.
ebfd146a 857 When loads are in several SLP nodes, there can be a case in which the first
b8698a0f 858 load does not appear in the first SLP node to be transformed, causing
ebfd146a
IR
859 incorrect order of statements. Since we generate all the loads together,
860 they must be inserted before the first load of the SLP instance and not
861 before the first load of the first node of the instance. */
b8698a0f
L
862static gimple
863vect_find_first_load_in_slp_instance (slp_instance instance)
ebfd146a
IR
864{
865 int i, j;
866 slp_tree load_node;
867 gimple first_load = NULL, load;
868
b8698a0f
L
869 for (i = 0;
870 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
ebfd146a 871 i++)
b8698a0f 872 for (j = 0;
ebfd146a
IR
873 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
874 j++)
875 first_load = get_earlier_stmt (load, first_load);
b8698a0f 876
ebfd146a
IR
877 return first_load;
878}
879
880
881/* Analyze an SLP instance starting from a group of strided stores. Call
b8698a0f 882 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
883 Return FALSE if it's impossible to SLP any stmt in the loop. */
884
885static bool
a70d6342
IR
886vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
887 gimple stmt)
ebfd146a
IR
888{
889 slp_instance new_instance;
890 slp_tree node = XNEW (struct _slp_tree);
891 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
892 unsigned int unrolling_factor = 1, nunits;
893 tree vectype, scalar_type;
894 gimple next;
0f900dfa 895 unsigned int vectorization_factor = 0;
ebfd146a
IR
896 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
897 unsigned int max_nunits = 0;
898 VEC (int, heap) *load_permutation;
899 VEC (slp_tree, heap) *loads;
b8698a0f 900
ebfd146a
IR
901 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
902 vinfo_for_stmt (stmt))));
903 vectype = get_vectype_for_scalar_type (scalar_type);
904 if (!vectype)
905 {
906 if (vect_print_dump_info (REPORT_SLP))
907 {
908 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
909 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
910 }
911 return false;
912 }
913
914 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
915 if (loop_vinfo)
916 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
917 else
918 /* No multitypes in BB SLP. */
919 vectorization_factor = nunits;
920
a70d6342
IR
921 /* Calculate the unrolling factor. */
922 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
923 if (unrolling_factor != 1 && !loop_vinfo)
924 {
925 if (vect_print_dump_info (REPORT_SLP))
e9dbe7bb
IR
926 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
927 " block SLP");
b8698a0f 928
a70d6342
IR
929 return false;
930 }
931
b8698a0f 932 /* Create a node (a root of the SLP tree) for the packed strided stores. */
ebfd146a
IR
933 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
934 next = stmt;
935 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
936 while (next)
937 {
938 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
939 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
940 }
941
942 SLP_TREE_VEC_STMTS (node) = NULL;
943 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
944 SLP_TREE_LEFT (node) = NULL;
945 SLP_TREE_RIGHT (node) = NULL;
946 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
947 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
948
ebfd146a
IR
949 /* Calculate the number of vector stmts to create based on the unrolling
950 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
951 GROUP_SIZE / NUNITS otherwise. */
952 ncopies_for_cost = unrolling_factor * group_size / nunits;
b8698a0f
L
953
954 load_permutation = VEC_alloc (int, heap, group_size * group_size);
955 loads = VEC_alloc (slp_tree, heap, group_size);
ebfd146a
IR
956
957 /* Build the tree for the SLP instance. */
b8698a0f
L
958 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
959 &inside_cost, &outside_cost, ncopies_for_cost,
960 &max_nunits, &load_permutation, &loads,
a70d6342 961 vectorization_factor))
ebfd146a 962 {
b8698a0f 963 /* Create a new SLP instance. */
ebfd146a
IR
964 new_instance = XNEW (struct _slp_instance);
965 SLP_INSTANCE_TREE (new_instance) = node;
966 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
967 /* Calculate the unrolling factor based on the smallest type in the
968 loop. */
969 if (max_nunits > nunits)
970 unrolling_factor = least_common_multiple (max_nunits, group_size)
971 / group_size;
b8698a0f 972
ebfd146a
IR
973 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
974 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
975 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
976 SLP_INSTANCE_LOADS (new_instance) = loads;
977 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
978 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
979 if (VEC_length (slp_tree, loads))
980 {
981 if (!vect_supported_load_permutation_p (new_instance, group_size,
b8698a0f 982 load_permutation))
ebfd146a
IR
983 {
984 if (vect_print_dump_info (REPORT_SLP))
985 {
986 fprintf (vect_dump, "Build SLP failed: unsupported load "
987 "permutation ");
988 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
989 }
990
991 vect_free_slp_instance (new_instance);
992 return false;
993 }
994
995 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
996 = vect_find_first_load_in_slp_instance (new_instance);
997 }
998 else
999 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1000
a70d6342 1001 if (loop_vinfo)
b8698a0f
L
1002 VEC_safe_push (slp_instance, heap,
1003 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
a70d6342
IR
1004 new_instance);
1005 else
1006 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1007 new_instance);
b8698a0f 1008
ebfd146a
IR
1009 if (vect_print_dump_info (REPORT_SLP))
1010 vect_print_slp_tree (node);
1011
1012 return true;
1013 }
1014
1015 /* Failed to SLP. */
1016 /* Free the allocated memory. */
1017 vect_free_slp_tree (node);
1018 VEC_free (int, heap, load_permutation);
1019 VEC_free (slp_tree, heap, loads);
b8698a0f 1020
a70d6342 1021 return false;
ebfd146a
IR
1022}
1023
1024
1025/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1026 trees of packed scalar stmts if SLP is possible. */
1027
1028bool
a70d6342 1029vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
1030{
1031 unsigned int i;
a70d6342 1032 VEC (gimple, heap) *strided_stores;
ebfd146a 1033 gimple store;
a70d6342 1034 bool ok = false;
ebfd146a
IR
1035
1036 if (vect_print_dump_info (REPORT_SLP))
1037 fprintf (vect_dump, "=== vect_analyze_slp ===");
1038
a70d6342
IR
1039 if (loop_vinfo)
1040 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1041 else
1042 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
b8698a0f 1043
ebfd146a 1044 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
a70d6342
IR
1045 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1046 ok = true;
ebfd146a 1047
b8698a0f 1048 if (bb_vinfo && !ok)
a70d6342
IR
1049 {
1050 if (vect_print_dump_info (REPORT_SLP))
1051 fprintf (vect_dump, "Failed to SLP the basic block.");
1052
1053 return false;
1054 }
ebfd146a
IR
1055
1056 return true;
1057}
1058
1059
1060/* For each possible SLP instance decide whether to SLP it and calculate overall
1061 unrolling factor needed to SLP the loop. */
1062
1063void
1064vect_make_slp_decision (loop_vec_info loop_vinfo)
1065{
1066 unsigned int i, unrolling_factor = 1;
1067 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1068 slp_instance instance;
1069 int decided_to_slp = 0;
1070
1071 if (vect_print_dump_info (REPORT_SLP))
1072 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1073
1074 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1075 {
1076 /* FORNOW: SLP if you can. */
1077 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1078 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1079
b8698a0f
L
1080 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1081 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ebfd146a
IR
1082 loop-based vectorization. Such stmts will be marked as HYBRID. */
1083 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1084 decided_to_slp++;
1085 }
1086
1087 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1088
b8698a0f
L
1089 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1090 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
ebfd146a
IR
1091 decided_to_slp, unrolling_factor);
1092}
1093
1094
1095/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1096 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1097
1098static void
1099vect_detect_hybrid_slp_stmts (slp_tree node)
1100{
1101 int i;
1102 gimple stmt;
1103 imm_use_iterator imm_iter;
1104 gimple use_stmt;
1105
1106 if (!node)
1107 return;
1108
1109 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1110 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1111 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1112 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1113 if (vinfo_for_stmt (use_stmt)
1114 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
1115 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
1116 vect_mark_slp_stmts (node, hybrid, i);
1117
1118 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1119 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1120}
1121
1122
1123/* Find stmts that must be both vectorized and SLPed. */
1124
1125void
1126vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1127{
1128 unsigned int i;
1129 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1130 slp_instance instance;
1131
1132 if (vect_print_dump_info (REPORT_SLP))
1133 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1134
1135 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1136 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1137}
1138
a70d6342
IR
1139
1140/* Create and initialize a new bb_vec_info struct for BB, as well as
1141 stmt_vec_info structs for all the stmts in it. */
b8698a0f 1142
a70d6342
IR
1143static bb_vec_info
1144new_bb_vec_info (basic_block bb)
1145{
1146 bb_vec_info res = NULL;
1147 gimple_stmt_iterator gsi;
1148
1149 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1150 BB_VINFO_BB (res) = bb;
1151
1152 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1153 {
1154 gimple stmt = gsi_stmt (gsi);
1155 gimple_set_uid (stmt, 0);
1156 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1157 }
1158
1159 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1160 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1161
1162 bb->aux = res;
1163 return res;
1164}
1165
1166
1167/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1168 stmts in the basic block. */
1169
1170static void
1171destroy_bb_vec_info (bb_vec_info bb_vinfo)
1172{
1173 basic_block bb;
1174 gimple_stmt_iterator si;
1175
1176 if (!bb_vinfo)
1177 return;
1178
1179 bb = BB_VINFO_BB (bb_vinfo);
1180
1181 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1182 {
1183 gimple stmt = gsi_stmt (si);
1184 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1185
1186 if (stmt_info)
1187 /* Free stmt_vec_info. */
1188 free_stmt_vec_info (stmt);
1189 }
1190
1191 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1192 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1193 free (bb_vinfo);
1194 bb->aux = NULL;
1195}
1196
1197
1198/* Analyze statements contained in SLP tree node after recursively analyzing
1199 the subtree. Return TRUE if the operations are supported. */
1200
1201static bool
1202vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1203{
1204 bool dummy;
1205 int i;
1206 gimple stmt;
1207
1208 if (!node)
1209 return true;
1210
1211 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1212 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1213 return false;
1214
1215 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1216 {
1217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1218 gcc_assert (stmt_info);
1219 gcc_assert (PURE_SLP_STMT (stmt_info));
1220
1221 if (!vect_analyze_stmt (stmt, &dummy, node))
1222 return false;
1223 }
1224
1225 return true;
1226}
1227
1228
1229/* Analyze statements in SLP instances of the basic block. Return TRUE if the
1230 operations are supported. */
1231
1232static bool
1233vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1234{
1235 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1236 slp_instance instance;
1237 int i;
1238
1239 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1240 {
b8698a0f 1241 if (!vect_slp_analyze_node_operations (bb_vinfo,
a70d6342
IR
1242 SLP_INSTANCE_TREE (instance)))
1243 {
1244 vect_free_slp_instance (instance);
1245 VEC_ordered_remove (slp_instance, slp_instances, i);
1246 }
1247 else
1248 i++;
b8698a0f
L
1249 }
1250
a70d6342
IR
1251 if (!VEC_length (slp_instance, slp_instances))
1252 return false;
1253
1254 return true;
1255}
1256
1257
1258/* Cheick if the basic block can be vectorized. */
1259
1260bb_vec_info
1261vect_slp_analyze_bb (basic_block bb)
1262{
1263 bb_vec_info bb_vinfo;
1264 VEC (ddr_p, heap) *ddrs;
1265 VEC (slp_instance, heap) *slp_instances;
1266 slp_instance instance;
1267 int i, insns = 0;
1268 gimple_stmt_iterator gsi;
1269
1270 if (vect_print_dump_info (REPORT_DETAILS))
1271 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1272
1273 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1274 insns++;
1275
1276 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1277 {
1278 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1279 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1280 "block.\n");
1281
1282 return NULL;
1283 }
1284
1285 bb_vinfo = new_bb_vec_info (bb);
1286 if (!bb_vinfo)
1287 return NULL;
1288
1289 if (!vect_analyze_data_refs (NULL, bb_vinfo))
1290 {
1291 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1292 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1293 "block.\n");
b8698a0f 1294
a70d6342
IR
1295 destroy_bb_vec_info (bb_vinfo);
1296 return NULL;
1297 }
1298
1299 ddrs = BB_VINFO_DDRS (bb_vinfo);
b8698a0f 1300 if (!VEC_length (ddr_p, ddrs))
a70d6342
IR
1301 {
1302 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1303 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1304 "block.\n");
1305
1306 destroy_bb_vec_info (bb_vinfo);
1307 return NULL;
1308 }
1309
1310 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1311 {
1312 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1313 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1314 "block.\n");
b8698a0f 1315
a70d6342
IR
1316 destroy_bb_vec_info (bb_vinfo);
1317 return NULL;
1318 }
b8698a0f 1319
a70d6342
IR
1320 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
1321 {
1322 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1323 fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
1324 " block.\n");
b8698a0f 1325
a70d6342
IR
1326 destroy_bb_vec_info (bb_vinfo);
1327 return NULL;
1328 }
1329
1330 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1331 {
1332 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1333 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1334 "block.\n");
b8698a0f 1335
a70d6342
IR
1336 destroy_bb_vec_info (bb_vinfo);
1337 return NULL;
1338 }
1339
1340 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1341 {
1342 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1343 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1344 "block.\n");
1345
1346 destroy_bb_vec_info (bb_vinfo);
1347 return NULL;
1348 }
1349
1350 /* Check the SLP opportunities in the basic block, analyze and build SLP
1351 trees. */
1352 if (!vect_analyze_slp (NULL, bb_vinfo))
1353 {
1354 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1355 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1356 "in basic block.\n");
1357
1358 destroy_bb_vec_info (bb_vinfo);
1359 return NULL;
1360 }
b8698a0f 1361
a70d6342
IR
1362 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1363
1364 /* Mark all the statements that we want to vectorize as pure SLP and
1365 relevant. */
1366 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1367 {
1368 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1369 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 1370 }
a70d6342
IR
1371
1372 if (!vect_slp_analyze_operations (bb_vinfo))
1373 {
1374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1375 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1376
1377 destroy_bb_vec_info (bb_vinfo);
1378 return NULL;
1379 }
1380
1381 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 1382 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
a70d6342
IR
1383
1384 return bb_vinfo;
1385}
1386
1387
b8698a0f 1388/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
ebfd146a
IR
1389 the number of created vector stmts depends on the unrolling factor). However,
1390 the actual number of vector stmts for every SLP node depends on VF which is
1391 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
b8698a0f 1392 In this function we assume that the inside costs calculated in
ebfd146a
IR
1393 vect_model_xxx_cost are linear in ncopies. */
1394
1395void
1396vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1397{
1398 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1399 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1400 slp_instance instance;
1401
1402 if (vect_print_dump_info (REPORT_SLP))
1403 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1404
1405 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1406 /* We assume that costs are linear in ncopies. */
b8698a0f
L
1407 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1408 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
ebfd146a
IR
1409}
1410
a70d6342 1411
b8698a0f
L
1412/* For constant and loop invariant defs of SLP_NODE this function returns
1413 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
ebfd146a
IR
1414 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1415 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1416
1417static void
1418vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1419 unsigned int op_num, unsigned int number_of_vectors)
1420{
1421 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1422 gimple stmt = VEC_index (gimple, stmts, 0);
1423 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1424 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1425 int nunits;
1426 tree vec_cst;
1427 tree t = NULL_TREE;
1428 int j, number_of_places_left_in_vector;
1429 tree vector_type;
1430 tree op, vop;
1431 int group_size = VEC_length (gimple, stmts);
1432 unsigned int vec_num, i;
1433 int number_of_copies = 1;
1434 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1435 bool constant_p, is_store;
1436
1437 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1438 {
1439 is_store = true;
1440 op = gimple_assign_rhs1 (stmt);
1441 }
1442 else
1443 {
1444 is_store = false;
1445 op = gimple_op (stmt, op_num + 1);
1446 }
1447
1448 if (CONSTANT_CLASS_P (op))
1449 {
1450 vector_type = vectype;
1451 constant_p = true;
1452 }
1453 else
1454 {
b8698a0f 1455 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
ebfd146a
IR
1456 gcc_assert (vector_type);
1457 constant_p = false;
1458 }
1459
1460 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1461
1462 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 1463 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
1464
1465 For example, we have two scalar operands, s1 and s2 (e.g., group of
1466 strided accesses of size two), while NUNITS is four (i.e., four scalars
1467 of this type can be packed in a vector). The output vector will contain
1468 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1469 will be 2).
1470
b8698a0f 1471 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
1472 containing the operands.
1473
1474 For example, NUNITS is four as before, and the group size is 8
1475 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1476 {s5, s6, s7, s8}. */
b8698a0f 1477
ebfd146a
IR
1478 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1479
1480 number_of_places_left_in_vector = nunits;
1481 for (j = 0; j < number_of_copies; j++)
1482 {
1483 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1484 {
1485 if (is_store)
1486 op = gimple_assign_rhs1 (stmt);
1487 else
1488 op = gimple_op (stmt, op_num + 1);
b8698a0f 1489
ebfd146a
IR
1490 /* Create 'vect_ = {op0,op1,...,opn}'. */
1491 t = tree_cons (NULL_TREE, op, t);
1492
1493 number_of_places_left_in_vector--;
1494
1495 if (number_of_places_left_in_vector == 0)
1496 {
1497 number_of_places_left_in_vector = nunits;
1498
1499 if (constant_p)
1500 vec_cst = build_vector (vector_type, t);
1501 else
1502 vec_cst = build_constructor_from_list (vector_type, t);
1503 VEC_quick_push (tree, voprnds,
1504 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1505 t = NULL_TREE;
1506 }
1507 }
1508 }
1509
b8698a0f 1510 /* Since the vectors are created in the reverse order, we should invert
ebfd146a
IR
1511 them. */
1512 vec_num = VEC_length (tree, voprnds);
1513 for (j = vec_num - 1; j >= 0; j--)
1514 {
1515 vop = VEC_index (tree, voprnds, j);
1516 VEC_quick_push (tree, *vec_oprnds, vop);
1517 }
1518
1519 VEC_free (tree, heap, voprnds);
1520
1521 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
1522 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1523 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a
IR
1524 to replicate the vectors. */
1525 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1526 {
1527 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1528 VEC_quick_push (tree, *vec_oprnds, vop);
1529 }
1530}
1531
1532
1533/* Get vectorized definitions from SLP_NODE that contains corresponding
1534 vectorized def-stmts. */
1535
1536static void
1537vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1538{
1539 tree vec_oprnd;
1540 gimple vec_def_stmt;
1541 unsigned int i;
1542
1543 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1544
1545 for (i = 0;
1546 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1547 i++)
1548 {
1549 gcc_assert (vec_def_stmt);
1550 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1551 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1552 }
1553}
1554
1555
b8698a0f
L
1556/* Get vectorized definitions for SLP_NODE.
1557 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
1558 call vect_get_constant_vectors() to create vector stmts.
1559 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1560 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
b8698a0f 1561 vect_get_slp_vect_defs() to retrieve them.
ebfd146a 1562 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
b8698a0f
L
1563 the right node. This is used when the second operand must remain scalar. */
1564
ebfd146a
IR
1565void
1566vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1567 VEC (tree,heap) **vec_oprnds1)
1568{
1569 gimple first_stmt;
1570 enum tree_code code;
1571 int number_of_vects;
b8698a0f 1572 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
ebfd146a
IR
1573
1574 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1575 /* The number of vector defs is determined by the number of vector statements
1576 in the node from which we get those statements. */
b8698a0f 1577 if (SLP_TREE_LEFT (slp_node))
ebfd146a
IR
1578 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1579 else
1580 {
1581 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1582 /* Number of vector stmts was calculated according to LHS in
1583 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1584 necessary. See vect_get_smallest_scalar_type() for details. */
1585 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1586 &rhs_size_unit);
1587 if (rhs_size_unit != lhs_size_unit)
1588 {
1589 number_of_vects *= rhs_size_unit;
1590 number_of_vects /= lhs_size_unit;
1591 }
1592 }
1593
1594 /* Allocate memory for vectorized defs. */
1595 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1596
1597 /* SLP_NODE corresponds either to a group of stores or to a group of
1598 unary/binary operations. We don't call this function for loads. */
1599 if (SLP_TREE_LEFT (slp_node))
1600 /* The defs are already vectorized. */
1601 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1602 else
1603 /* Build vectors from scalar defs. */
1604 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1605
1606 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1607 /* Since we don't call this function with loads, this is a group of
1608 stores. */
1609 return;
1610
1611 code = gimple_assign_rhs_code (first_stmt);
1612 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1613 return;
1614
1615 /* The number of vector defs is determined by the number of vector statements
1616 in the node from which we get those statements. */
1617 if (SLP_TREE_RIGHT (slp_node))
1618 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1619 else
1620 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1621
1622 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1623
1624 if (SLP_TREE_RIGHT (slp_node))
1625 /* The defs are already vectorized. */
1626 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1627 else
1628 /* Build vectors from scalar defs. */
1629 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1630}
1631
a70d6342 1632
b8698a0f 1633/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
1634 building a vector of type MASK_TYPE from it) and two input vectors placed in
1635 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1636 shifting by STRIDE elements of DR_CHAIN for every copy.
1637 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 1638 copies).
ebfd146a
IR
1639 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1640 the created stmts must be inserted. */
1641
1642static inline void
b8698a0f 1643vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
faf63e39 1644 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f
L
1645 gimple_stmt_iterator *gsi, slp_tree node,
1646 tree builtin_decl, tree vectype,
ebfd146a
IR
1647 VEC(tree,heap) *dr_chain,
1648 int ncopies, int vect_stmts_counter)
1649{
faf63e39 1650 tree perm_dest;
ebfd146a
IR
1651 gimple perm_stmt = NULL;
1652 stmt_vec_info next_stmt_info;
0f900dfa 1653 int i, stride;
ebfd146a 1654 tree first_vec, second_vec, data_ref;
ebfd146a
IR
1655 VEC (tree, heap) *params = NULL;
1656
ebfd146a 1657 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 1658
b8698a0f 1659 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 1660 stmts later. */
b8698a0f 1661 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
ebfd146a
IR
1662 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1663 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1664
1665 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1666 for (i = 0; i < ncopies; i++)
1667 {
1668 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1669 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1670
1671 /* Build argument list for the vectorized call. */
1672 VEC_free (tree, heap, params);
1673 params = VEC_alloc (tree, heap, 3);
1674 VEC_quick_push (tree, params, first_vec);
1675 VEC_quick_push (tree, params, second_vec);
1676 VEC_quick_push (tree, params, mask);
1677
1678 /* Generate the permute statement. */
1679 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1680 data_ref = make_ssa_name (perm_dest, perm_stmt);
1681 gimple_call_set_lhs (perm_stmt, data_ref);
1682 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 1683
b8698a0f
L
1684 /* Store the vector statement in NODE. */
1685 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
ebfd146a
IR
1686 stride * i + vect_stmts_counter, perm_stmt);
1687
1688 first_vec_indx += stride;
1689 second_vec_indx += stride;
1690 }
1691
1692 /* Mark the scalar stmt as vectorized. */
1693 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1694 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1695}
1696
1697
b8698a0f 1698/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 1699 return in CURRENT_MASK_ELEMENT its equivalent in target specific
b8698a0f 1700 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
1701 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1702 the next vector, i.e., the current first vector is not needed. */
b8698a0f 1703
ebfd146a 1704static bool
b8698a0f 1705vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 1706 int mask_nunits, bool only_one_vec, int index,
b8698a0f 1707 int *mask, int *current_mask_element,
ebfd146a
IR
1708 bool *need_next_vector)
1709{
1710 int i;
1711 static int number_of_mask_fixes = 1;
1712 static bool mask_fixed = false;
1713 static bool needs_first_vector = false;
1714
1715 /* Convert to target specific representation. */
1716 *current_mask_element = first_mask_element + m;
1717 /* Adjust the value in case it's a mask for second and third vectors. */
1718 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1719
1720 if (*current_mask_element < mask_nunits)
1721 needs_first_vector = true;
1722
1723 /* We have only one input vector to permute but the mask accesses values in
1724 the next vector as well. */
1725 if (only_one_vec && *current_mask_element >= mask_nunits)
1726 {
1727 if (vect_print_dump_info (REPORT_DETAILS))
1728 {
1729 fprintf (vect_dump, "permutation requires at least two vectors ");
1730 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1731 }
1732
1733 return false;
1734 }
1735
1736 /* The mask requires the next vector. */
1737 if (*current_mask_element >= mask_nunits * 2)
1738 {
1739 if (needs_first_vector || mask_fixed)
1740 {
1741 /* We either need the first vector too or have already moved to the
b8698a0f 1742 next vector. In both cases, this permutation needs three
ebfd146a
IR
1743 vectors. */
1744 if (vect_print_dump_info (REPORT_DETAILS))
1745 {
1746 fprintf (vect_dump, "permutation requires at "
1747 "least three vectors ");
1748 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1749 }
1750
1751 return false;
1752 }
1753
1754 /* We move to the next vector, dropping the first one and working with
1755 the second and the third - we need to adjust the values of the mask
1756 accordingly. */
1757 *current_mask_element -= mask_nunits * number_of_mask_fixes;
1758
1759 for (i = 0; i < index; i++)
1760 mask[i] -= mask_nunits * number_of_mask_fixes;
1761
1762 (number_of_mask_fixes)++;
1763 mask_fixed = true;
1764 }
1765
1766 *need_next_vector = mask_fixed;
1767
1768 /* This was the last element of this mask. Start a new one. */
1769 if (index == mask_nunits - 1)
1770 {
1771 number_of_mask_fixes = 1;
1772 mask_fixed = false;
1773 needs_first_vector = false;
1774 }
1775
1776 return true;
1777}
1778
1779
1780/* Generate vector permute statements from a list of loads in DR_CHAIN.
1781 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1782 permute statements for SLP_NODE_INSTANCE. */
1783bool
1784vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1785 gimple_stmt_iterator *gsi, int vf,
1786 slp_instance slp_node_instance, bool analyze_only)
1787{
1788 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1789 tree mask_element_type = NULL_TREE, mask_type;
1790 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1791 slp_tree node;
1792 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1793 gimple next_scalar_stmt;
1794 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1795 int first_mask_element;
1796 int index, unroll_factor, *mask, current_mask_element, ncopies;
1797 bool only_one_vec = false, need_next_vector = false;
1798 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1799
1800 if (!targetm.vectorize.builtin_vec_perm)
1801 {
1802 if (vect_print_dump_info (REPORT_DETAILS))
1803 {
1804 fprintf (vect_dump, "no builtin for vect permute for ");
1805 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1806 }
1807
1808 return false;
1809 }
1810
1811 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1812 &mask_element_type);
1813 if (!builtin_decl || !mask_element_type)
1814 {
1815 if (vect_print_dump_info (REPORT_DETAILS))
1816 {
1817 fprintf (vect_dump, "no builtin for vect permute for ");
1818 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1819 }
1820
1821 return false;
1822 }
1823
1824 mask_type = get_vectype_for_scalar_type (mask_element_type);
1825 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1826 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1827 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1828 scale = mask_nunits / nunits;
1829 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1830
1831 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1832 unrolling factor. */
b8698a0f 1833 orig_vec_stmts_num = group_size *
ebfd146a
IR
1834 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1835 if (orig_vec_stmts_num == 1)
1836 only_one_vec = true;
1837
b8698a0f 1838 /* Number of copies is determined by the final vectorization factor
ebfd146a 1839 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 1840 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 1841
b8698a0f
L
1842 /* Generate permutation masks for every NODE. Number of masks for each NODE
1843 is equal to GROUP_SIZE.
1844 E.g., we have a group of three nodes with three loads from the same
1845 location in each node, and the vector size is 4. I.e., we have a
1846 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
1847 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1848 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1849 ...
1850
1851 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1852 scpecific type, e.g., in bytes for Altivec.
b8698a0f 1853 The last mask is illegal since we assume two operands for permute
ebfd146a
IR
1854 operation, and the mask element values can't be outside that range. Hence,
1855 the last mask must be converted into {2,5,5,5}.
b8698a0f 1856 For the first two permutations we need the first and the second input
ebfd146a 1857 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 1858 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
1859 {c2,a3,b3,c3}. */
1860
1861 for (i = 0;
1862 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1863 i, node);
1864 i++)
1865 {
1866 scalar_index = 0;
1867 index = 0;
1868 vect_stmts_counter = 0;
1869 vec_index = 0;
1870 first_vec_index = vec_index++;
1871 if (only_one_vec)
1872 second_vec_index = first_vec_index;
1873 else
1874 second_vec_index = vec_index++;
1875
1876 for (j = 0; j < unroll_factor; j++)
1877 {
1878 for (k = 0; k < group_size; k++)
1879 {
1880 first_mask_element = (i + j * group_size) * scale;
1881 for (m = 0; m < scale; m++)
1882 {
b8698a0f 1883 if (!vect_get_mask_element (stmt, first_mask_element, m,
ebfd146a
IR
1884 mask_nunits, only_one_vec, index, mask,
1885 &current_mask_element, &need_next_vector))
1886 return false;
1887
1888 mask[index++] = current_mask_element;
b8698a0f 1889 }
ebfd146a
IR
1890
1891 if (index == mask_nunits)
1892 {
faf63e39
RH
1893 tree mask_vec = NULL;
1894
1895 while (--index >= 0)
1896 {
1897 tree t = build_int_cst (mask_element_type, mask[index]);
1898 mask_vec = tree_cons (NULL, t, mask_vec);
1899 }
1900 mask_vec = build_vector (mask_type, mask_vec);
1901 index = 0;
1902
1903 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
1904 mask_vec))
1905 {
1906 if (vect_print_dump_info (REPORT_DETAILS))
1907 {
1908 fprintf (vect_dump, "unsupported vect permute ");
1909 print_generic_expr (vect_dump, mask_vec, 0);
1910 }
1911 free (mask);
1912 return false;
1913 }
1914
ebfd146a
IR
1915 if (!analyze_only)
1916 {
1917 if (need_next_vector)
1918 {
1919 first_vec_index = second_vec_index;
1920 second_vec_index = vec_index;
1921 }
1922
1923 next_scalar_stmt = VEC_index (gimple,
1924 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1925
1926 vect_create_mask_and_perm (stmt, next_scalar_stmt,
faf63e39
RH
1927 mask_vec, first_vec_index, second_vec_index,
1928 gsi, node, builtin_decl, vectype, dr_chain,
1929 ncopies, vect_stmts_counter++);
ebfd146a 1930 }
b8698a0f
L
1931 }
1932 }
1933 }
1934 }
ebfd146a
IR
1935
1936 free (mask);
1937 return true;
1938}
1939
1940
1941
1942/* Vectorize SLP instance tree in postorder. */
1943
1944static bool
1945vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 1946 unsigned int vectorization_factor)
ebfd146a
IR
1947{
1948 gimple stmt;
1949 bool strided_store, is_store;
1950 gimple_stmt_iterator si;
1951 stmt_vec_info stmt_info;
1952 unsigned int vec_stmts_size, nunits, group_size;
1953 tree vectype;
1954 int i;
1955 slp_tree loads_node;
1956
1957 if (!node)
1958 return false;
1959
1960 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1961 vectorization_factor);
1962 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1963 vectorization_factor);
b8698a0f 1964
ebfd146a
IR
1965 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1966 stmt_info = vinfo_for_stmt (stmt);
1967
1968 /* VECTYPE is the type of the destination. */
1969 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1970 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1971 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1972
1973 /* For each SLP instance calculate number of vector stmts to be created
1974 for the scalar stmts in each node of the SLP tree. Number of vector
1975 elements in one vector iteration is the number of scalar elements in
1976 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1977 size. */
1978 vec_stmts_size = (vectorization_factor * group_size) / nunits;
1979
1980 /* In case of load permutation we have to allocate vectorized statements for
1981 all the nodes that participate in that permutation. */
1982 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1983 {
1984 for (i = 0;
1985 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1986 i++)
1987 {
1988 if (!SLP_TREE_VEC_STMTS (loads_node))
1989 {
1990 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
1991 vec_stmts_size);
1992 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
1993 }
1994 }
1995 }
1996
1997 if (!SLP_TREE_VEC_STMTS (node))
1998 {
1999 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2000 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2001 }
2002
2003 if (vect_print_dump_info (REPORT_DETAILS))
2004 {
2005 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2006 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 2007 }
ebfd146a
IR
2008
2009 /* Loads should be inserted before the first load. */
2010 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2011 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2012 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2013 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2014 else
2015 si = gsi_for_stmt (stmt);
b8698a0f 2016
ebfd146a
IR
2017 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2018 if (is_store)
2019 {
2020 if (DR_GROUP_FIRST_DR (stmt_info))
2021 /* If IS_STORE is TRUE, the vectorization of the
2022 interleaving chain was completed - free all the stores in
2023 the chain. */
2024 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2025 else
2026 /* FORNOW: SLP originates only from strided stores. */
2027 gcc_unreachable ();
2028
2029 return true;
2030 }
2031
2032 /* FORNOW: SLP originates only from strided stores. */
2033 return false;
2034}
2035
2036
2037bool
a70d6342 2038vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 2039{
a70d6342 2040 VEC (slp_instance, heap) *slp_instances;
ebfd146a 2041 slp_instance instance;
a70d6342 2042 unsigned int i, vf;
ebfd146a
IR
2043 bool is_store = false;
2044
a70d6342
IR
2045 if (loop_vinfo)
2046 {
2047 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2048 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 2049 }
a70d6342
IR
2050 else
2051 {
2052 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2053 vf = 1;
b8698a0f 2054 }
a70d6342 2055
ebfd146a
IR
2056 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2057 {
2058 /* Schedule the tree of INSTANCE. */
2059 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 2060 instance, vf);
8644a673
IR
2061 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2062 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
2063 fprintf (vect_dump, "vectorizing stmts using SLP.");
2064 }
2065
2066 return is_store;
2067}
a70d6342
IR
2068
2069
2070/* Vectorize the basic block. */
2071
2072void
2073vect_slp_transform_bb (basic_block bb)
2074{
2075 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2076 gimple_stmt_iterator si;
2077
2078 gcc_assert (bb_vinfo);
2079
2080 if (vect_print_dump_info (REPORT_DETAILS))
2081 fprintf (vect_dump, "SLPing BB\n");
2082
2083 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2084 {
2085 gimple stmt = gsi_stmt (si);
2086 stmt_vec_info stmt_info;
2087
2088 if (vect_print_dump_info (REPORT_DETAILS))
2089 {
2090 fprintf (vect_dump, "------>SLPing statement: ");
2091 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2092 }
2093
2094 stmt_info = vinfo_for_stmt (stmt);
2095 gcc_assert (stmt_info);
2096
2097 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2098 if (STMT_SLP_TYPE (stmt_info))
2099 {
2100 vect_schedule_slp (NULL, bb_vinfo);
2101 break;
2102 }
2103 }
2104
2105 mark_sym_for_renaming (gimple_vop (cfun));
2106 /* The memory tags and pointers in vectorized statements need to
2107 have their SSA forms updated. FIXME, why can't this be delayed
2108 until all the loops have been transformed? */
2109 update_ssa (TODO_update_ssa);
2110
2111 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 2112 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
a70d6342 2113
12aaf609
IR
2114 destroy_bb_vec_info (bb_vinfo);
2115}
a70d6342 2116
This page took 0.780551 seconds and 5 git commands to generate.