]> gcc.gnu.org Git - gcc.git/blame - gcc/tree-vect-slp.c
re PR bootstrap/45751 (Bootstrap failure: at stage 1 xgcc segfault)
[gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
c75c517d
SB
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838
JM
31#include "tree-pretty-print.h"
32#include "gimple-pretty-print.h"
ebfd146a
IR
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "cfgloop.h"
36#include "cfglayout.h"
37#include "expr.h"
38#include "recog.h"
39#include "optabs.h"
40#include "tree-vectorizer.h"
41
a70d6342
IR
42/* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
44
45LOC
46find_bb_location (basic_block bb)
47{
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
50
51 if (!bb)
52 return UNKNOWN_LOC;
53
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 {
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
59 }
60
61 return UNKNOWN_LOC;
62}
63
64
ebfd146a
IR
65/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66
67static void
68vect_free_slp_tree (slp_tree node)
69{
70 if (!node)
71 return;
72
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
b8698a0f 75
ebfd146a
IR
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
b8698a0f 78
ebfd146a 79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
b8698a0f 80
ebfd146a
IR
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
83
84 free (node);
85}
86
87
88/* Free the memory allocated for the SLP instance. */
89
90void
91vect_free_slp_instance (slp_instance instance)
92{
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
96}
97
98
99/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
102
103static bool
a70d6342 104vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
b8698a0f 105 slp_tree slp_node, gimple stmt,
a70d6342 106 VEC (gimple, heap) **def_stmts0,
ebfd146a
IR
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
b8698a0f 110 tree *first_stmt_def0_type,
ebfd146a
IR
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
115{
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
b8698a0f 121 stmt_vec_info stmt_info =
ebfd146a
IR
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
a70d6342 124 struct loop *loop = NULL;
b8698a0f 125
a70d6342
IR
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
128
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131
132 for (i = 0; i < number_of_oprnds; i++)
133 {
134 oprnd = gimple_op (stmt, i + 1);
135
b8698a0f 136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 137 &dt[i])
ebfd146a
IR
138 || (!def_stmt && dt[i] != vect_constant_def))
139 {
b8698a0f 140 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
141 {
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
144 }
145
146 return false;
147 }
148
a70d6342 149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 150 from the pattern. Check that all the stmts of the node are in the
ebfd146a 151 pattern. */
a70d6342 152 if (loop && def_stmt && gimple_bb (def_stmt)
ebfd146a
IR
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 {
157 if (!*first_stmt_dt0)
158 *pattern0 = true;
159 else
160 {
161 if (i == 1 && !*first_stmt_dt1)
162 *pattern1 = true;
163 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 {
165 if (vect_print_dump_info (REPORT_DETAILS))
166 {
167 fprintf (vect_dump, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
170 }
171
172 return false;
173 }
174 }
175
176 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
177 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178
179 if (*dt == vect_unknown_def_type)
180 {
181 if (vect_print_dump_info (REPORT_DETAILS))
182 fprintf (vect_dump, "Unsupported pattern.");
183 return false;
184 }
185
186 switch (gimple_code (def_stmt))
187 {
188 case GIMPLE_PHI:
189 def = gimple_phi_result (def_stmt);
190 break;
191
192 case GIMPLE_ASSIGN:
193 def = gimple_assign_lhs (def_stmt);
194 break;
195
196 default:
197 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "unsupported defining stmt: ");
199 return false;
200 }
201 }
202
203 if (!*first_stmt_dt0)
204 {
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0 = dt[i];
207 if (def)
208 *first_stmt_def0_type = TREE_TYPE (def);
209 else
210 *first_stmt_const_oprnd = oprnd;
211
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class != GIMPLE_SINGLE_RHS)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
216 else
217 /* Store. */
218 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
219 }
b8698a0f 220
ebfd146a
IR
221 else
222 {
223 if (!*first_stmt_dt1 && i == 1)
224 {
225 /* op1 of the first stmt of the group - store its info. */
226 *first_stmt_dt1 = dt[i];
227 if (def)
228 *first_stmt_def1_type = TREE_TYPE (def);
229 else
230 {
b8698a0f 231 /* We assume that the stmt contains only one constant
ebfd146a
IR
232 operand. We fail otherwise, to be on the safe side. */
233 if (*first_stmt_const_oprnd)
234 {
b8698a0f 235 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 236 fprintf (vect_dump, "Build SLP failed: two constant "
b8698a0f 237 "oprnds in stmt");
ebfd146a
IR
238 return false;
239 }
240 *first_stmt_const_oprnd = oprnd;
241 }
242 }
243 else
244 {
b8698a0f 245 /* Not first stmt of the group, check that the def-stmt/s match
ebfd146a 246 the def-stmt/s of the first stmt. */
b8698a0f 247 if ((i == 0
ebfd146a
IR
248 && (*first_stmt_dt0 != dt[i]
249 || (*first_stmt_def0_type && def
8533c9d8
SP
250 && !types_compatible_p (*first_stmt_def0_type,
251 TREE_TYPE (def)))))
b8698a0f 252 || (i == 1
ebfd146a
IR
253 && (*first_stmt_dt1 != dt[i]
254 || (*first_stmt_def1_type && def
8533c9d8
SP
255 && !types_compatible_p (*first_stmt_def1_type,
256 TREE_TYPE (def)))))
b8698a0f 257 || (!def
8533c9d8
SP
258 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
259 TREE_TYPE (oprnd))))
b8698a0f
L
260 {
261 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 262 fprintf (vect_dump, "Build SLP failed: different types ");
b8698a0f 263
ebfd146a
IR
264 return false;
265 }
266 }
267 }
268
269 /* Check the types of the definitions. */
270 switch (dt[i])
271 {
272 case vect_constant_def:
8644a673 273 case vect_external_def:
ebfd146a 274 break;
b8698a0f 275
8644a673 276 case vect_internal_def:
b5aeb3bb 277 case vect_reduction_def:
ebfd146a
IR
278 if (i == 0)
279 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
280 else
281 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
282 break;
283
284 default:
285 /* FORNOW: Not supported. */
b8698a0f 286 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
287 {
288 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
289 print_generic_expr (vect_dump, def, TDF_SLIM);
290 }
291
292 return false;
293 }
294 }
295
296 return true;
297}
298
299
300/* Recursively build an SLP tree starting from NODE.
b8698a0f 301 Fail (and return FALSE) if def-stmts are not isomorphic, require data
ff802fa1 302 permutation or are of unsupported types of operation. Otherwise, return
ebfd146a
IR
303 TRUE. */
304
305static bool
b8698a0f 306vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
a70d6342
IR
307 slp_tree *node, unsigned int group_size,
308 int *inside_cost, int *outside_cost,
309 int ncopies_for_cost, unsigned int *max_nunits,
ebfd146a 310 VEC (int, heap) **load_permutation,
a70d6342
IR
311 VEC (slp_tree, heap) **loads,
312 unsigned int vectorization_factor)
ebfd146a
IR
313{
314 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
315 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
316 unsigned int i;
317 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
318 gimple stmt = VEC_index (gimple, stmts, 0);
81f40b79
ILT
319 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
320 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
2200fc49 321 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
ebfd146a
IR
322 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
323 tree lhs;
324 bool stop_recursion = false, need_same_oprnds = false;
325 tree vectype, scalar_type, first_op1 = NULL_TREE;
a70d6342 326 unsigned int ncopies;
ebfd146a
IR
327 optab optab;
328 int icode;
329 enum machine_mode optab_op2_mode;
330 enum machine_mode vec_mode;
331 tree first_stmt_const_oprnd = NULL_TREE;
332 struct data_reference *first_dr;
333 bool pattern0 = false, pattern1 = false;
334 HOST_WIDE_INT dummy;
335 bool permutation = false;
336 unsigned int load_place;
b5aeb3bb 337 gimple first_load, prev_first_load = NULL;
ebfd146a
IR
338
339 /* For every stmt in NODE find its def stmt/s. */
ac47786e 340 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
ebfd146a 341 {
b8698a0f 342 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
343 {
344 fprintf (vect_dump, "Build SLP for ");
345 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
346 }
347
4b5caab7
IR
348 /* Fail to vectorize statements marked as unvectorizable. */
349 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
350 {
351 if (vect_print_dump_info (REPORT_SLP))
352 {
353 fprintf (vect_dump,
354 "Build SLP failed: unvectorizable statement ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
356 }
357
358 return false;
359 }
360
ebfd146a
IR
361 lhs = gimple_get_lhs (stmt);
362 if (lhs == NULL_TREE)
363 {
b8698a0f 364 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
365 {
366 fprintf (vect_dump,
367 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
368 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
369 }
b8698a0f 370
ebfd146a
IR
371 return false;
372 }
373
b8698a0f 374 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
375 vectype = get_vectype_for_scalar_type (scalar_type);
376 if (!vectype)
377 {
378 if (vect_print_dump_info (REPORT_SLP))
379 {
380 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
381 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
382 }
383 return false;
384 }
b8698a0f 385
ebfd146a 386 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
387 if (ncopies != 1)
388 {
389 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "SLP with multiple types ");
ebfd146a 391
a70d6342
IR
392 /* FORNOW: multiple types are unsupported in BB SLP. */
393 if (bb_vinfo)
394 return false;
395 }
b8698a0f 396
ebfd146a
IR
397 /* In case of multiple types we need to detect the smallest type. */
398 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
399 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 400
ebfd146a
IR
401 if (is_gimple_call (stmt))
402 rhs_code = CALL_EXPR;
403 else
404 rhs_code = gimple_assign_rhs_code (stmt);
405
406 /* Check the operation. */
407 if (i == 0)
408 {
409 first_stmt_code = rhs_code;
410
b8698a0f 411 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
412 vector shift with scalar shift operand. */
413 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
414 || rhs_code == LROTATE_EXPR
415 || rhs_code == RROTATE_EXPR)
416 {
417 vec_mode = TYPE_MODE (vectype);
418
419 /* First see if we have a vector/vector shift. */
420 optab = optab_for_tree_code (rhs_code, vectype,
421 optab_vector);
422
423 if (!optab
947131ba 424 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
425 {
426 /* No vector/vector shift, try for a vector/scalar shift. */
427 optab = optab_for_tree_code (rhs_code, vectype,
428 optab_scalar);
429
430 if (!optab)
431 {
432 if (vect_print_dump_info (REPORT_SLP))
433 fprintf (vect_dump, "Build SLP failed: no optab.");
434 return false;
435 }
947131ba 436 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
437 if (icode == CODE_FOR_nothing)
438 {
439 if (vect_print_dump_info (REPORT_SLP))
440 fprintf (vect_dump, "Build SLP failed: "
441 "op not supported by target.");
442 return false;
443 }
444 optab_op2_mode = insn_data[icode].operand[2].mode;
445 if (!VECTOR_MODE_P (optab_op2_mode))
446 {
447 need_same_oprnds = true;
448 first_op1 = gimple_assign_rhs2 (stmt);
449 }
450 }
451 }
452 }
453 else
454 {
455 if (first_stmt_code != rhs_code
456 && (first_stmt_code != IMAGPART_EXPR
457 || rhs_code != REALPART_EXPR)
458 && (first_stmt_code != REALPART_EXPR
69f11a13
IR
459 || rhs_code != IMAGPART_EXPR)
460 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
461 && (first_stmt_code == ARRAY_REF
462 || first_stmt_code == INDIRECT_REF
463 || first_stmt_code == COMPONENT_REF
464 || first_stmt_code == MEM_REF)))
ebfd146a 465 {
b8698a0f 466 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 467 {
b8698a0f 468 fprintf (vect_dump,
ebfd146a
IR
469 "Build SLP failed: different operation in stmt ");
470 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
471 }
b8698a0f 472
ebfd146a
IR
473 return false;
474 }
b8698a0f
L
475
476 if (need_same_oprnds
ebfd146a
IR
477 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
478 {
b8698a0f 479 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 480 {
b8698a0f 481 fprintf (vect_dump,
ebfd146a
IR
482 "Build SLP failed: different shift arguments in ");
483 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
484 }
b8698a0f 485
ebfd146a
IR
486 return false;
487 }
488 }
489
490 /* Strided store or load. */
491 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
492 {
493 if (REFERENCE_CLASS_P (lhs))
494 {
495 /* Store. */
b8698a0f
L
496 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
497 stmt, &def_stmts0, &def_stmts1,
498 &first_stmt_dt0,
499 &first_stmt_dt1,
500 &first_stmt_def0_type,
ebfd146a
IR
501 &first_stmt_def1_type,
502 &first_stmt_const_oprnd,
503 ncopies_for_cost,
504 &pattern0, &pattern1))
505 return false;
506 }
b5aeb3bb
IR
507 else
508 {
509 /* Load. */
510 /* FORNOW: Check that there is no gap between the loads. */
511 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
512 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
513 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
514 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
515 {
516 if (vect_print_dump_info (REPORT_SLP))
517 {
518 fprintf (vect_dump, "Build SLP failed: strided "
519 "loads have gaps ");
520 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
521 }
b8698a0f 522
b5aeb3bb
IR
523 return false;
524 }
2f0fa28e 525
b5aeb3bb
IR
526 /* Check that the size of interleaved loads group is not
527 greater than the SLP group size. */
528 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
529 {
530 if (vect_print_dump_info (REPORT_SLP))
531 {
532 fprintf (vect_dump, "Build SLP failed: the number of "
533 "interleaved loads is greater than"
534 " the SLP group size ");
535 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
536 }
b8698a0f 537
b5aeb3bb
IR
538 return false;
539 }
540
541 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
542 if (prev_first_load)
543 {
544 /* Check that there are no loads from different interleaving
ff802fa1 545 chains in the same node. The only exception is complex
b5aeb3bb
IR
546 numbers. */
547 if (prev_first_load != first_load
548 && rhs_code != REALPART_EXPR
549 && rhs_code != IMAGPART_EXPR)
550 {
551 if (vect_print_dump_info (REPORT_SLP))
552 {
553 fprintf (vect_dump, "Build SLP failed: different "
554 "interleaving chains in one node ");
555 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
556 }
557
558 return false;
559 }
560 }
561 else
562 prev_first_load = first_load;
b8698a0f 563
ebfd146a
IR
564 if (first_load == stmt)
565 {
566 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
720f5239 567 if (vect_supportable_dr_alignment (first_dr, false)
ebfd146a
IR
568 == dr_unaligned_unsupported)
569 {
570 if (vect_print_dump_info (REPORT_SLP))
571 {
572 fprintf (vect_dump, "Build SLP failed: unsupported "
573 "unaligned load ");
574 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
575 }
b8698a0f 576
ebfd146a
IR
577 return false;
578 }
b8698a0f 579
ebfd146a
IR
580 /* Analyze costs (for the first stmt in the group). */
581 vect_model_load_cost (vinfo_for_stmt (stmt),
582 ncopies_for_cost, *node);
583 }
b8698a0f 584
ff802fa1 585 /* Store the place of this load in the interleaving chain. In
ebfd146a
IR
586 case that permutation is needed we later decide if a specific
587 permutation is supported. */
588 load_place = vect_get_place_in_interleaving_chain (stmt,
589 first_load);
590 if (load_place != i)
591 permutation = true;
b8698a0f 592
ebfd146a 593 VEC_safe_push (int, heap, *load_permutation, load_place);
b8698a0f 594
ebfd146a
IR
595 /* We stop the tree when we reach a group of loads. */
596 stop_recursion = true;
597 continue;
598 }
599 } /* Strided access. */
600 else
601 {
602 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
603 {
604 /* Not strided load. */
b8698a0f 605 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
606 {
607 fprintf (vect_dump, "Build SLP failed: not strided load ");
608 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
609 }
610
611 /* FORNOW: Not strided loads are not supported. */
612 return false;
613 }
614
615 /* Not memory operation. */
616 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
617 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
618 {
b8698a0f 619 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
620 {
621 fprintf (vect_dump, "Build SLP failed: operation");
622 fprintf (vect_dump, " unsupported ");
623 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
624 }
625
626 return false;
627 }
628
b8698a0f 629 /* Find the def-stmts. */
a70d6342 630 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
ebfd146a 631 &def_stmts0, &def_stmts1,
b8698a0f
L
632 &first_stmt_dt0, &first_stmt_dt1,
633 &first_stmt_def0_type,
ebfd146a
IR
634 &first_stmt_def1_type,
635 &first_stmt_const_oprnd,
636 ncopies_for_cost,
637 &pattern0, &pattern1))
638 return false;
639 }
640 }
641
642 /* Add the costs of the node to the overall instance costs. */
b8698a0f 643 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
ebfd146a
IR
644 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
645
646 /* Strided loads were reached - stop the recursion. */
647 if (stop_recursion)
648 {
649 if (permutation)
650 {
b8698a0f 651 VEC_safe_push (slp_tree, heap, *loads, *node);
35e1a5e7 652 *inside_cost
720f5239 653 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
35e1a5e7 654 * group_size;
ebfd146a 655 }
2200fc49
IR
656 else
657 {
658 /* We don't check here complex numbers chains, so we keep them in
659 LOADS for further check in vect_supported_load_permutation_p. */
660 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
661 VEC_safe_push (slp_tree, heap, *loads, *node);
662 }
ebfd146a
IR
663
664 return true;
665 }
666
b8698a0f 667 /* Create SLP_TREE nodes for the definition node/s. */
8644a673 668 if (first_stmt_dt0 == vect_internal_def)
ebfd146a
IR
669 {
670 slp_tree left_node = XNEW (struct _slp_tree);
671 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
672 SLP_TREE_VEC_STMTS (left_node) = NULL;
673 SLP_TREE_LEFT (left_node) = NULL;
674 SLP_TREE_RIGHT (left_node) = NULL;
675 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
676 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
b8698a0f
L
677 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
678 inside_cost, outside_cost, ncopies_for_cost,
a70d6342
IR
679 max_nunits, load_permutation, loads,
680 vectorization_factor))
ebfd146a 681 return false;
b8698a0f 682
ebfd146a
IR
683 SLP_TREE_LEFT (*node) = left_node;
684 }
685
8644a673 686 if (first_stmt_dt1 == vect_internal_def)
ebfd146a
IR
687 {
688 slp_tree right_node = XNEW (struct _slp_tree);
689 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
690 SLP_TREE_VEC_STMTS (right_node) = NULL;
691 SLP_TREE_LEFT (right_node) = NULL;
692 SLP_TREE_RIGHT (right_node) = NULL;
693 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
694 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
a70d6342 695 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
ebfd146a 696 inside_cost, outside_cost, ncopies_for_cost,
a70d6342
IR
697 max_nunits, load_permutation, loads,
698 vectorization_factor))
ebfd146a 699 return false;
b8698a0f 700
ebfd146a
IR
701 SLP_TREE_RIGHT (*node) = right_node;
702 }
703
704 return true;
705}
706
707
708static void
709vect_print_slp_tree (slp_tree node)
710{
711 int i;
712 gimple stmt;
713
714 if (!node)
715 return;
716
717 fprintf (vect_dump, "node ");
ac47786e 718 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
719 {
720 fprintf (vect_dump, "\n\tstmt %d ", i);
b8698a0f 721 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
ebfd146a
IR
722 }
723 fprintf (vect_dump, "\n");
724
725 vect_print_slp_tree (SLP_TREE_LEFT (node));
726 vect_print_slp_tree (SLP_TREE_RIGHT (node));
727}
728
729
b8698a0f
L
730/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
731 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 732 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
733 stmts in NODE are to be marked. */
734
735static void
736vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
737{
738 int i;
739 gimple stmt;
740
741 if (!node)
742 return;
743
ac47786e 744 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
745 if (j < 0 || i == j)
746 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
747
748 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
749 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
750}
751
752
a70d6342
IR
753/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
754
755static void
756vect_mark_slp_stmts_relevant (slp_tree node)
757{
758 int i;
759 gimple stmt;
760 stmt_vec_info stmt_info;
761
762 if (!node)
763 return;
764
ac47786e 765 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
766 {
767 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 768 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
769 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
770 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
771 }
772
773 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
774 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
775}
776
777
b8698a0f 778/* Check if the permutation required by the SLP INSTANCE is supported.
ebfd146a
IR
779 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
780
781static bool
782vect_supported_slp_permutation_p (slp_instance instance)
783{
784 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
785 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
786 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
787 VEC (slp_tree, heap) *sorted_loads = NULL;
788 int index;
789 slp_tree *tmp_loads = NULL;
b8698a0f 790 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
ebfd146a 791 slp_tree load;
b8698a0f
L
792
793 /* FORNOW: The only supported loads permutation is loads from the same
ebfd146a 794 location in all the loads in the node, when the data-refs in
b8698a0f 795 nodes of LOADS constitute an interleaving chain.
ebfd146a
IR
796 Sort the nodes according to the order of accesses in the chain. */
797 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
b8698a0f
L
798 for (i = 0, j = 0;
799 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
800 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
ebfd146a
IR
801 i += group_size, j++)
802 {
803 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
804 /* Check that the loads are all in the same interleaving chain. */
805 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
806 {
807 if (vect_print_dump_info (REPORT_DETAILS))
808 {
809 fprintf (vect_dump, "Build SLP failed: unsupported data "
810 "permutation ");
811 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
812 }
b8698a0f 813
ebfd146a 814 free (tmp_loads);
b8698a0f 815 return false;
ebfd146a
IR
816 }
817
818 tmp_loads[index] = load;
819 }
b8698a0f 820
ebfd146a
IR
821 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
822 for (i = 0; i < group_size; i++)
823 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
824
825 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
826 SLP_INSTANCE_LOADS (instance) = sorted_loads;
827 free (tmp_loads);
828
829 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
830 SLP_INSTANCE_UNROLLING_FACTOR (instance),
831 instance, true))
832 return false;
833
834 return true;
835}
836
837
b5aeb3bb
IR
838/* Rearrange the statements of NODE according to PERMUTATION. */
839
840static void
841vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
842 VEC (int, heap) *permutation)
843{
844 gimple stmt;
845 VEC (gimple, heap) *tmp_stmts;
846 unsigned int index, i;
847
848 if (!node)
849 return;
850
851 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
852 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
853
854 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
855 tmp_stmts = VEC_alloc (gimple, heap, group_size);
856
857 for (i = 0; i < group_size; i++)
858 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
859
ac47786e 860 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
b5aeb3bb
IR
861 {
862 index = VEC_index (int, permutation, i);
863 VEC_replace (gimple, tmp_stmts, index, stmt);
864 }
865
866 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
867 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
868}
869
870
ebfd146a
IR
871/* Check if the required load permutation is supported.
872 LOAD_PERMUTATION contains a list of indices of the loads.
873 In SLP this permutation is relative to the order of strided stores that are
874 the base of the SLP instance. */
875
876static bool
877vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
878 VEC (int, heap) *load_permutation)
879{
b5aeb3bb
IR
880 int i = 0, j, prev = -1, next, k, number_of_groups;
881 bool supported, bad_permutation = false;
7417f6c0 882 sbitmap load_index;
2200fc49
IR
883 slp_tree node, other_complex_node;
884 gimple stmt, first = NULL, other_node_first;
885 unsigned complex_numbers = 0;
ebfd146a 886
a70d6342 887 /* FORNOW: permutations are only supported in SLP. */
ebfd146a
IR
888 if (!slp_instn)
889 return false;
890
891 if (vect_print_dump_info (REPORT_SLP))
892 {
893 fprintf (vect_dump, "Load permutation ");
ac47786e 894 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
ebfd146a
IR
895 fprintf (vect_dump, "%d ", next);
896 }
897
b5aeb3bb
IR
898 /* In case of reduction every load permutation is allowed, since the order
899 of the reduction statements is not important (as opposed to the case of
ff802fa1 900 strided stores). The only condition we need to check is that all the
b5aeb3bb
IR
901 load nodes are of the same size and have the same permutation (and then
902 rearrange all the nodes of the SLP instance according to this
903 permutation). */
904
905 /* Check that all the load nodes are of the same size. */
ac47786e 906 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49
IR
907 {
908 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
909 != (unsigned) group_size)
910 return false;
911
912 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
913 if (is_gimple_assign (stmt)
914 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
915 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
916 complex_numbers++;
917 }
918
919 /* Complex operands can be swapped as following:
920 real_c = real_b + real_a;
921 imag_c = imag_a + imag_b;
922 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
ff802fa1 923 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
2200fc49
IR
924 chains are mixed, they match the above pattern. */
925 if (complex_numbers)
926 {
ac47786e 927 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49 928 {
ac47786e 929 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
2200fc49
IR
930 {
931 if (j == 0)
932 first = stmt;
933 else
934 {
935 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != first)
936 {
937 if (complex_numbers != 2)
938 return false;
939
940 if (i == 0)
941 k = 1;
942 else
943 k = 0;
944
945 other_complex_node = VEC_index (slp_tree,
946 SLP_INSTANCE_LOADS (slp_instn), k);
947 other_node_first = VEC_index (gimple,
948 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
949
950 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))
951 != other_node_first)
952 return false;
953 }
954 }
955 }
956 }
957 }
958
959 /* We checked that this case ok, so there is no need to proceed with
960 permutation tests. */
961 if (complex_numbers == 2)
962 {
963 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
964 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
965 return true;
966 }
967
b5aeb3bb
IR
968 node = SLP_INSTANCE_TREE (slp_instn);
969 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
970 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
971 instance, not all the loads belong to the same node or interleaving
ff802fa1 972 group. Hence, we need to divide them into groups according to
b5aeb3bb
IR
973 GROUP_SIZE. */
974 number_of_groups = VEC_length (int, load_permutation) / group_size;
975
976 /* Reduction (there are no data-refs in the root). */
977 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
978 {
979 int first_group_load_index;
980
981 /* Compare all the permutation sequences to the first one. */
982 for (i = 1; i < number_of_groups; i++)
983 {
984 k = 0;
985 for (j = i * group_size; j < i * group_size + group_size; j++)
986 {
987 next = VEC_index (int, load_permutation, j);
988 first_group_load_index = VEC_index (int, load_permutation, k);
989
990 if (next != first_group_load_index)
991 {
992 bad_permutation = true;
993 break;
994 }
995
996 k++;
997 }
998
999 if (bad_permutation)
1000 break;
1001 }
1002
1003 if (!bad_permutation)
1004 {
ff802fa1 1005 /* This permutaion is valid for reduction. Since the order of the
b5aeb3bb
IR
1006 statements in the nodes is not important unless they are memory
1007 accesses, we can rearrange the statements in all the nodes
1008 according to the order of the loads. */
1009 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1010 load_permutation);
1011 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1012 return true;
1013 }
1014 }
1015
b8698a0f
L
1016 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1017 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
b5aeb3bb 1018 well (unless it's reduction). */
ebfd146a
IR
1019 if (VEC_length (int, load_permutation)
1020 != (unsigned int) (group_size * group_size))
1021 return false;
1022
1023 supported = true;
7417f6c0
IR
1024 load_index = sbitmap_alloc (group_size);
1025 sbitmap_zero (load_index);
ebfd146a
IR
1026 for (j = 0; j < group_size; j++)
1027 {
1028 for (i = j * group_size, k = 0;
1029 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1030 i++, k++)
1031 {
1032 if (i != j * group_size && next != prev)
1033 {
1034 supported = false;
1035 break;
1036 }
1037
1038 prev = next;
b8698a0f 1039 }
7417f6c0
IR
1040
1041 if (TEST_BIT (load_index, prev))
1042 {
1043 supported = false;
1044 break;
1045 }
1046
1047 SET_BIT (load_index, prev);
ebfd146a 1048 }
59eefaa6
IR
1049
1050 for (j = 0; j < group_size; j++)
1051 if (!TEST_BIT (load_index, j))
1052 return false;
1053
7417f6c0 1054 sbitmap_free (load_index);
ebfd146a
IR
1055
1056 if (supported && i == group_size * group_size
1057 && vect_supported_slp_permutation_p (slp_instn))
1058 return true;
1059
b8698a0f 1060 return false;
ebfd146a
IR
1061}
1062
1063
b8698a0f 1064/* Find the first load in the loop that belongs to INSTANCE.
ebfd146a 1065 When loads are in several SLP nodes, there can be a case in which the first
b8698a0f 1066 load does not appear in the first SLP node to be transformed, causing
ff802fa1 1067 incorrect order of statements. Since we generate all the loads together,
ebfd146a
IR
1068 they must be inserted before the first load of the SLP instance and not
1069 before the first load of the first node of the instance. */
ff802fa1 1070
b8698a0f
L
1071static gimple
1072vect_find_first_load_in_slp_instance (slp_instance instance)
ebfd146a
IR
1073{
1074 int i, j;
1075 slp_tree load_node;
1076 gimple first_load = NULL, load;
1077
ac47786e
NF
1078 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1079 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
ebfd146a 1080 first_load = get_earlier_stmt (load, first_load);
b8698a0f 1081
ebfd146a
IR
1082 return first_load;
1083}
1084
1085
e4a707c4 1086/* Find the last store in SLP INSTANCE. */
ff802fa1 1087
e4a707c4
IR
1088static gimple
1089vect_find_last_store_in_slp_instance (slp_instance instance)
1090{
1091 int i;
1092 slp_tree node;
1093 gimple last_store = NULL, store;
1094
1095 node = SLP_INSTANCE_TREE (instance);
1096 for (i = 0;
1097 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1098 i++)
1099 last_store = get_later_stmt (store, last_store);
1100
1101 return last_store;
1102}
1103
1104
ff802fa1 1105/* Analyze an SLP instance starting from a group of strided stores. Call
b8698a0f 1106 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1107 Return FALSE if it's impossible to SLP any stmt in the loop. */
1108
1109static bool
a70d6342
IR
1110vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1111 gimple stmt)
ebfd146a
IR
1112{
1113 slp_instance new_instance;
1114 slp_tree node = XNEW (struct _slp_tree);
1115 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1116 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1117 tree vectype, scalar_type = NULL_TREE;
ebfd146a 1118 gimple next;
0f900dfa 1119 unsigned int vectorization_factor = 0;
b5aeb3bb 1120 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
ebfd146a
IR
1121 unsigned int max_nunits = 0;
1122 VEC (int, heap) *load_permutation;
1123 VEC (slp_tree, heap) *loads;
b5aeb3bb
IR
1124 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1125
1126 if (dr)
1127 {
1128 scalar_type = TREE_TYPE (DR_REF (dr));
1129 vectype = get_vectype_for_scalar_type (scalar_type);
1130 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1131 }
1132 else
1133 {
1134 gcc_assert (loop_vinfo);
1135 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1136 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1137 }
b8698a0f 1138
ebfd146a
IR
1139 if (!vectype)
1140 {
1141 if (vect_print_dump_info (REPORT_SLP))
1142 {
1143 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1144 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1145 }
b5aeb3bb 1146
ebfd146a
IR
1147 return false;
1148 }
1149
1150 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
1151 if (loop_vinfo)
1152 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1153 else
1154 /* No multitypes in BB SLP. */
1155 vectorization_factor = nunits;
1156
a70d6342
IR
1157 /* Calculate the unrolling factor. */
1158 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1159 if (unrolling_factor != 1 && !loop_vinfo)
1160 {
1161 if (vect_print_dump_info (REPORT_SLP))
e9dbe7bb
IR
1162 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1163 " block SLP");
b8698a0f 1164
a70d6342
IR
1165 return false;
1166 }
1167
b8698a0f 1168 /* Create a node (a root of the SLP tree) for the packed strided stores. */
ebfd146a
IR
1169 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1170 next = stmt;
b5aeb3bb 1171 if (dr)
ebfd146a 1172 {
b5aeb3bb
IR
1173 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1174 while (next)
1175 {
1176 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1177 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1178 }
1179 }
1180 else
1181 {
1182 /* Collect reduction statements. */
1183 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1184 next);
1185 i++)
1186 {
1187 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1188 if (vect_print_dump_info (REPORT_DETAILS))
1189 {
1190 fprintf (vect_dump, "pushing reduction into node: ");
1191 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1192 }
1193 }
ebfd146a
IR
1194 }
1195
1196 SLP_TREE_VEC_STMTS (node) = NULL;
1197 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1198 SLP_TREE_LEFT (node) = NULL;
1199 SLP_TREE_RIGHT (node) = NULL;
1200 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1201 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1202
ebfd146a
IR
1203 /* Calculate the number of vector stmts to create based on the unrolling
1204 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1205 GROUP_SIZE / NUNITS otherwise. */
1206 ncopies_for_cost = unrolling_factor * group_size / nunits;
b8698a0f
L
1207
1208 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1209 loads = VEC_alloc (slp_tree, heap, group_size);
ebfd146a
IR
1210
1211 /* Build the tree for the SLP instance. */
b8698a0f
L
1212 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1213 &inside_cost, &outside_cost, ncopies_for_cost,
1214 &max_nunits, &load_permutation, &loads,
a70d6342 1215 vectorization_factor))
ebfd146a 1216 {
b8698a0f 1217 /* Create a new SLP instance. */
ebfd146a
IR
1218 new_instance = XNEW (struct _slp_instance);
1219 SLP_INSTANCE_TREE (new_instance) = node;
1220 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1221 /* Calculate the unrolling factor based on the smallest type in the
1222 loop. */
1223 if (max_nunits > nunits)
1224 unrolling_factor = least_common_multiple (max_nunits, group_size)
1225 / group_size;
b8698a0f 1226
ebfd146a
IR
1227 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1228 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1229 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1230 SLP_INSTANCE_LOADS (new_instance) = loads;
1231 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1232 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1233 if (VEC_length (slp_tree, loads))
1234 {
1235 if (!vect_supported_load_permutation_p (new_instance, group_size,
b8698a0f 1236 load_permutation))
ebfd146a
IR
1237 {
1238 if (vect_print_dump_info (REPORT_SLP))
1239 {
1240 fprintf (vect_dump, "Build SLP failed: unsupported load "
1241 "permutation ");
1242 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1243 }
1244
1245 vect_free_slp_instance (new_instance);
1246 return false;
1247 }
1248
1249 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1250 = vect_find_first_load_in_slp_instance (new_instance);
1251 }
1252 else
1253 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1254
a70d6342 1255 if (loop_vinfo)
b8698a0f
L
1256 VEC_safe_push (slp_instance, heap,
1257 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
a70d6342
IR
1258 new_instance);
1259 else
1260 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1261 new_instance);
b8698a0f 1262
ebfd146a
IR
1263 if (vect_print_dump_info (REPORT_SLP))
1264 vect_print_slp_tree (node);
1265
1266 return true;
1267 }
1268
1269 /* Failed to SLP. */
1270 /* Free the allocated memory. */
1271 vect_free_slp_tree (node);
1272 VEC_free (int, heap, load_permutation);
1273 VEC_free (slp_tree, heap, loads);
b8698a0f 1274
a70d6342 1275 return false;
ebfd146a
IR
1276}
1277
1278
ff802fa1 1279/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1280 trees of packed scalar stmts if SLP is possible. */
1281
1282bool
a70d6342 1283vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
1284{
1285 unsigned int i;
b5aeb3bb 1286 VEC (gimple, heap) *strided_stores, *reductions = NULL;
ebfd146a 1287 gimple store;
a70d6342 1288 bool ok = false;
ebfd146a
IR
1289
1290 if (vect_print_dump_info (REPORT_SLP))
1291 fprintf (vect_dump, "=== vect_analyze_slp ===");
1292
a70d6342 1293 if (loop_vinfo)
b5aeb3bb
IR
1294 {
1295 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1296 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1297 }
a70d6342
IR
1298 else
1299 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
b8698a0f 1300
b5aeb3bb 1301 /* Find SLP sequences starting from groups of strided stores. */
ac47786e 1302 FOR_EACH_VEC_ELT (gimple, strided_stores, i, store)
a70d6342
IR
1303 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1304 ok = true;
ebfd146a 1305
b8698a0f 1306 if (bb_vinfo && !ok)
a70d6342
IR
1307 {
1308 if (vect_print_dump_info (REPORT_SLP))
1309 fprintf (vect_dump, "Failed to SLP the basic block.");
1310
1311 return false;
1312 }
ebfd146a 1313
b5aeb3bb 1314 /* Find SLP sequences starting from groups of reductions. */
74500b3e 1315 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
b5aeb3bb
IR
1316 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1317 VEC_index (gimple, reductions, 0)))
1318 ok = true;
1319
ebfd146a
IR
1320 return true;
1321}
1322
1323
1324/* For each possible SLP instance decide whether to SLP it and calculate overall
1325 unrolling factor needed to SLP the loop. */
1326
1327void
1328vect_make_slp_decision (loop_vec_info loop_vinfo)
1329{
1330 unsigned int i, unrolling_factor = 1;
1331 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1332 slp_instance instance;
1333 int decided_to_slp = 0;
1334
1335 if (vect_print_dump_info (REPORT_SLP))
1336 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1337
ac47786e 1338 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
1339 {
1340 /* FORNOW: SLP if you can. */
1341 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1342 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1343
ff802fa1 1344 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 1345 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 1346 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
1347 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1348 decided_to_slp++;
1349 }
1350
1351 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1352
b8698a0f
L
1353 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1354 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
ebfd146a
IR
1355 decided_to_slp, unrolling_factor);
1356}
1357
1358
1359/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 1360 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
1361
1362static void
1363vect_detect_hybrid_slp_stmts (slp_tree node)
1364{
1365 int i;
1366 gimple stmt;
1367 imm_use_iterator imm_iter;
1368 gimple use_stmt;
99f51320 1369 stmt_vec_info stmt_vinfo;
ebfd146a
IR
1370
1371 if (!node)
1372 return;
1373
ac47786e 1374 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1375 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1376 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1377 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
99f51320
IR
1378 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1379 && !STMT_SLP_TYPE (stmt_vinfo)
1380 && (STMT_VINFO_RELEVANT (stmt_vinfo)
b5aeb3bb
IR
1381 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1382 && !(gimple_code (use_stmt) == GIMPLE_PHI
1383 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1384 == vect_reduction_def))
ebfd146a
IR
1385 vect_mark_slp_stmts (node, hybrid, i);
1386
1387 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1388 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1389}
1390
1391
1392/* Find stmts that must be both vectorized and SLPed. */
1393
1394void
1395vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1396{
1397 unsigned int i;
1398 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1399 slp_instance instance;
1400
1401 if (vect_print_dump_info (REPORT_SLP))
1402 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1403
ac47786e 1404 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
1405 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1406}
1407
a70d6342
IR
1408
1409/* Create and initialize a new bb_vec_info struct for BB, as well as
1410 stmt_vec_info structs for all the stmts in it. */
b8698a0f 1411
a70d6342
IR
1412static bb_vec_info
1413new_bb_vec_info (basic_block bb)
1414{
1415 bb_vec_info res = NULL;
1416 gimple_stmt_iterator gsi;
1417
1418 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1419 BB_VINFO_BB (res) = bb;
1420
1421 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1422 {
1423 gimple stmt = gsi_stmt (gsi);
1424 gimple_set_uid (stmt, 0);
1425 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1426 }
1427
1428 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1429 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1430
1431 bb->aux = res;
1432 return res;
1433}
1434
1435
1436/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1437 stmts in the basic block. */
1438
1439static void
1440destroy_bb_vec_info (bb_vec_info bb_vinfo)
1441{
1442 basic_block bb;
1443 gimple_stmt_iterator si;
1444
1445 if (!bb_vinfo)
1446 return;
1447
1448 bb = BB_VINFO_BB (bb_vinfo);
1449
1450 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1451 {
1452 gimple stmt = gsi_stmt (si);
1453 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1454
1455 if (stmt_info)
1456 /* Free stmt_vec_info. */
1457 free_stmt_vec_info (stmt);
1458 }
1459
1460 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1461 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1462 free (bb_vinfo);
1463 bb->aux = NULL;
1464}
1465
1466
1467/* Analyze statements contained in SLP tree node after recursively analyzing
1468 the subtree. Return TRUE if the operations are supported. */
1469
1470static bool
1471vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1472{
1473 bool dummy;
1474 int i;
1475 gimple stmt;
1476
1477 if (!node)
1478 return true;
1479
1480 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1481 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1482 return false;
1483
ac47786e 1484 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1485 {
1486 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1487 gcc_assert (stmt_info);
1488 gcc_assert (PURE_SLP_STMT (stmt_info));
1489
1490 if (!vect_analyze_stmt (stmt, &dummy, node))
1491 return false;
1492 }
1493
1494 return true;
1495}
1496
1497
ff802fa1 1498/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
1499 operations are supported. */
1500
1501static bool
1502vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1503{
1504 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1505 slp_instance instance;
1506 int i;
1507
1508 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1509 {
b8698a0f 1510 if (!vect_slp_analyze_node_operations (bb_vinfo,
a70d6342
IR
1511 SLP_INSTANCE_TREE (instance)))
1512 {
1513 vect_free_slp_instance (instance);
1514 VEC_ordered_remove (slp_instance, slp_instances, i);
1515 }
1516 else
1517 i++;
b8698a0f
L
1518 }
1519
a70d6342
IR
1520 if (!VEC_length (slp_instance, slp_instances))
1521 return false;
1522
1523 return true;
1524}
1525
e4a707c4
IR
1526/* Check if loads and stores are mixed in the basic block (in that
1527 case if we are not sure that the accesses differ, we can't vectorize the
ff802fa1 1528 basic block). Also return FALSE in case that there is statement marked as
e4a707c4
IR
1529 not vectorizable. */
1530
1531static bool
1532vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1533{
1534 basic_block bb = BB_VINFO_BB (bb_vinfo);
1535 gimple_stmt_iterator si;
1536 bool detected_store = false;
1537 gimple stmt;
1538 struct data_reference *dr;
1539
1540 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1541 {
1542 stmt = gsi_stmt (si);
1543
1544 /* We can't allow not analyzed statements, since they may contain data
1545 accesses. */
1546 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1547 return false;
1548
1549 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1550 continue;
1551
1552 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1553 if (DR_IS_READ (dr) && detected_store)
1554 return false;
1555
1556 if (!DR_IS_READ (dr))
1557 detected_store = true;
1558 }
1559
1560 return true;
1561}
a70d6342 1562
69f11a13
IR
1563/* Check if vectorization of the basic block is profitable. */
1564
1565static bool
1566vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1567{
1568 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1569 slp_instance instance;
1570 int i;
1571 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1572 unsigned int stmt_cost;
1573 gimple stmt;
1574 gimple_stmt_iterator si;
1575 basic_block bb = BB_VINFO_BB (bb_vinfo);
1576 stmt_vec_info stmt_info = NULL;
1577 tree dummy_type = NULL;
1578 int dummy = 0;
1579
1580 /* Calculate vector costs. */
ac47786e 1581 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
69f11a13
IR
1582 {
1583 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1584 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1585 }
1586
1587 /* Calculate scalar cost. */
1588 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1589 {
1590 stmt = gsi_stmt (si);
1591 stmt_info = vinfo_for_stmt (stmt);
1592
1593 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1594 || !PURE_SLP_STMT (stmt_info))
1595 continue;
1596
1597 if (STMT_VINFO_DATA_REF (stmt_info))
1598 {
1599 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1600 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1601 (scalar_load, dummy_type, dummy);
1602 else
1603 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1604 (scalar_store, dummy_type, dummy);
1605 }
1606 else
1607 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1608 (scalar_stmt, dummy_type, dummy);
1609
1610 scalar_cost += stmt_cost;
1611 }
1612
1613 if (vect_print_dump_info (REPORT_COST))
1614 {
1615 fprintf (vect_dump, "Cost model analysis: \n");
1616 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1617 vec_inside_cost);
1618 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1619 vec_outside_cost);
1620 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1621 }
1622
1623 /* Vectorization is profitable if its cost is less than the cost of scalar
1624 version. */
1625 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1626 return false;
1627
1628 return true;
1629}
1630
1631/* Check if the basic block can be vectorized. */
a70d6342
IR
1632
1633bb_vec_info
1634vect_slp_analyze_bb (basic_block bb)
1635{
1636 bb_vec_info bb_vinfo;
1637 VEC (ddr_p, heap) *ddrs;
1638 VEC (slp_instance, heap) *slp_instances;
1639 slp_instance instance;
1640 int i, insns = 0;
1641 gimple_stmt_iterator gsi;
777e1f09
RG
1642 int min_vf = 2;
1643 int max_vf = MAX_VECTORIZATION_FACTOR;
e4a707c4
IR
1644 bool data_dependence_in_bb = false;
1645
a70d6342
IR
1646
1647 if (vect_print_dump_info (REPORT_DETAILS))
1648 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1649
1650 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
c7da0354
RG
1651 {
1652 gimple stmt = gsi_stmt (gsi);
1653 if (!is_gimple_debug (stmt)
1654 && !gimple_nop_p (stmt)
69e3b47f 1655 && gimple_code (stmt) != GIMPLE_LABEL)
c7da0354
RG
1656 insns++;
1657 }
a70d6342
IR
1658
1659 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1660 {
1661 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1662 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1663 "block.\n");
1664
1665 return NULL;
1666 }
1667
1668 bb_vinfo = new_bb_vec_info (bb);
1669 if (!bb_vinfo)
1670 return NULL;
1671
777e1f09 1672 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
a70d6342
IR
1673 {
1674 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1675 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1676 "block.\n");
b8698a0f 1677
a70d6342
IR
1678 destroy_bb_vec_info (bb_vinfo);
1679 return NULL;
1680 }
1681
1682 ddrs = BB_VINFO_DDRS (bb_vinfo);
b8698a0f 1683 if (!VEC_length (ddr_p, ddrs))
a70d6342
IR
1684 {
1685 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1686 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1687 "block.\n");
1688
1689 destroy_bb_vec_info (bb_vinfo);
1690 return NULL;
1691 }
1692
e4a707c4
IR
1693 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1694 &data_dependence_in_bb)
1695 || min_vf > max_vf
1696 || (data_dependence_in_bb
1697 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
777e1f09
RG
1698 {
1699 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1700 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1701 "in basic block.\n");
1702
1703 destroy_bb_vec_info (bb_vinfo);
1704 return NULL;
1705 }
1706
a70d6342
IR
1707 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1708 {
1709 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1710 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1711 "block.\n");
b8698a0f 1712
a70d6342
IR
1713 destroy_bb_vec_info (bb_vinfo);
1714 return NULL;
1715 }
b8698a0f 1716
a70d6342
IR
1717 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1718 {
1719 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1720 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1721 "block.\n");
b8698a0f 1722
a70d6342
IR
1723 destroy_bb_vec_info (bb_vinfo);
1724 return NULL;
1725 }
1726
1727 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1728 {
1729 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1730 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1731 "block.\n");
1732
1733 destroy_bb_vec_info (bb_vinfo);
1734 return NULL;
1735 }
1736
1737 /* Check the SLP opportunities in the basic block, analyze and build SLP
1738 trees. */
1739 if (!vect_analyze_slp (NULL, bb_vinfo))
1740 {
1741 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1742 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1743 "in basic block.\n");
1744
1745 destroy_bb_vec_info (bb_vinfo);
1746 return NULL;
1747 }
b8698a0f 1748
a70d6342
IR
1749 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1750
1751 /* Mark all the statements that we want to vectorize as pure SLP and
1752 relevant. */
ac47786e 1753 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
a70d6342
IR
1754 {
1755 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1756 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 1757 }
a70d6342
IR
1758
1759 if (!vect_slp_analyze_operations (bb_vinfo))
1760 {
1761 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1762 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1763
1764 destroy_bb_vec_info (bb_vinfo);
1765 return NULL;
1766 }
1767
69f11a13
IR
1768 /* Cost model: check if the vectorization is worthwhile. */
1769 if (flag_vect_cost_model
1770 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1771 {
1772 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1773 fprintf (vect_dump, "not vectorized: vectorization is not "
1774 "profitable.\n");
1775
1776 destroy_bb_vec_info (bb_vinfo);
1777 return NULL;
1778 }
1779
a70d6342 1780 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 1781 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
a70d6342
IR
1782
1783 return bb_vinfo;
1784}
1785
1786
b8698a0f 1787/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
ff802fa1
IR
1788 the number of created vector stmts depends on the unrolling factor).
1789 However, the actual number of vector stmts for every SLP node depends on
1790 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1791 should be updated. In this function we assume that the inside costs
1792 calculated in vect_model_xxx_cost are linear in ncopies. */
ebfd146a
IR
1793
1794void
1795vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1796{
1797 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1798 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1799 slp_instance instance;
1800
1801 if (vect_print_dump_info (REPORT_SLP))
1802 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1803
ac47786e 1804 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a 1805 /* We assume that costs are linear in ncopies. */
b8698a0f
L
1806 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1807 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
ebfd146a
IR
1808}
1809
a70d6342 1810
b8698a0f
L
1811/* For constant and loop invariant defs of SLP_NODE this function returns
1812 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
ebfd146a 1813 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
b5aeb3bb
IR
1814 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1815 REDUC_INDEX is the index of the reduction operand in the statements, unless
1816 it is -1. */
ebfd146a
IR
1817
1818static void
1819vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
b5aeb3bb
IR
1820 unsigned int op_num, unsigned int number_of_vectors,
1821 int reduc_index)
ebfd146a
IR
1822{
1823 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1824 gimple stmt = VEC_index (gimple, stmts, 0);
1825 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a
IR
1826 int nunits;
1827 tree vec_cst;
1828 tree t = NULL_TREE;
1829 int j, number_of_places_left_in_vector;
1830 tree vector_type;
1831 tree op, vop;
1832 int group_size = VEC_length (gimple, stmts);
1833 unsigned int vec_num, i;
1834 int number_of_copies = 1;
1835 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1836 bool constant_p, is_store;
b5aeb3bb
IR
1837 tree neutral_op = NULL;
1838
1839 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1840 {
1841 enum tree_code code = gimple_assign_rhs_code (stmt);
1842 if (reduc_index == -1)
1843 {
1844 VEC_free (tree, heap, *vec_oprnds);
1845 return;
1846 }
1847
1848 op_num = reduc_index - 1;
1849 op = gimple_op (stmt, op_num + 1);
1850 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 1851 we need either neutral operands or the original operands. See
b5aeb3bb
IR
1852 get_initial_def_for_reduction() for details. */
1853 switch (code)
1854 {
1855 case WIDEN_SUM_EXPR:
1856 case DOT_PROD_EXPR:
1857 case PLUS_EXPR:
1858 case MINUS_EXPR:
1859 case BIT_IOR_EXPR:
1860 case BIT_XOR_EXPR:
1861 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1862 neutral_op = build_real (TREE_TYPE (op), dconst0);
1863 else
1864 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1865
1866 break;
1867
1868 case MULT_EXPR:
b5aeb3bb
IR
1869 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1870 neutral_op = build_real (TREE_TYPE (op), dconst1);
1871 else
1872 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1873
1874 break;
1875
c1e822d5
IR
1876 case BIT_AND_EXPR:
1877 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1878 break;
1879
b5aeb3bb
IR
1880 default:
1881 neutral_op = NULL;
1882 }
1883 }
ebfd146a
IR
1884
1885 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1886 {
1887 is_store = true;
1888 op = gimple_assign_rhs1 (stmt);
1889 }
1890 else
1891 {
1892 is_store = false;
1893 op = gimple_op (stmt, op_num + 1);
1894 }
1895
1896 if (CONSTANT_CLASS_P (op))
cd481d83 1897 constant_p = true;
ebfd146a 1898 else
cd481d83
IR
1899 constant_p = false;
1900
1901 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1902 gcc_assert (vector_type);
ebfd146a
IR
1903
1904 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1905
1906 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 1907 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
1908
1909 For example, we have two scalar operands, s1 and s2 (e.g., group of
1910 strided accesses of size two), while NUNITS is four (i.e., four scalars
1911 of this type can be packed in a vector). The output vector will contain
1912 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1913 will be 2).
1914
b8698a0f 1915 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
1916 containing the operands.
1917
1918 For example, NUNITS is four as before, and the group size is 8
1919 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1920 {s5, s6, s7, s8}. */
b8698a0f 1921
ebfd146a
IR
1922 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1923
1924 number_of_places_left_in_vector = nunits;
1925 for (j = 0; j < number_of_copies; j++)
1926 {
1927 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1928 {
1929 if (is_store)
1930 op = gimple_assign_rhs1 (stmt);
1931 else
1932 op = gimple_op (stmt, op_num + 1);
b8698a0f 1933
b5aeb3bb
IR
1934 if (reduc_index != -1)
1935 {
1936 struct loop *loop = (gimple_bb (stmt))->loop_father;
1937 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1938
1939 gcc_assert (loop);
1940 /* Get the def before the loop. */
1941 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1942 loop_preheader_edge (loop));
1943 if (j != (number_of_copies - 1) && neutral_op)
1944 op = neutral_op;
1945 }
1946
ebfd146a
IR
1947 /* Create 'vect_ = {op0,op1,...,opn}'. */
1948 t = tree_cons (NULL_TREE, op, t);
1949
1950 number_of_places_left_in_vector--;
1951
1952 if (number_of_places_left_in_vector == 0)
1953 {
1954 number_of_places_left_in_vector = nunits;
1955
1956 if (constant_p)
1957 vec_cst = build_vector (vector_type, t);
1958 else
1959 vec_cst = build_constructor_from_list (vector_type, t);
1960 VEC_quick_push (tree, voprnds,
1961 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1962 t = NULL_TREE;
1963 }
1964 }
1965 }
1966
b8698a0f 1967 /* Since the vectors are created in the reverse order, we should invert
ebfd146a
IR
1968 them. */
1969 vec_num = VEC_length (tree, voprnds);
1970 for (j = vec_num - 1; j >= 0; j--)
1971 {
1972 vop = VEC_index (tree, voprnds, j);
1973 VEC_quick_push (tree, *vec_oprnds, vop);
1974 }
1975
1976 VEC_free (tree, heap, voprnds);
1977
1978 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
1979 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1980 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a
IR
1981 to replicate the vectors. */
1982 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1983 {
b5aeb3bb
IR
1984 tree neutral_vec = NULL;
1985
1986 if (neutral_op)
1987 {
1988 if (!neutral_vec)
1989 {
1990 t = NULL;
1991 for (i = 0; i < (unsigned) nunits; i++)
1992 t = tree_cons (NULL_TREE, neutral_op, t);
1993 neutral_vec = build_vector (vector_type, t);
1994 }
1995
1996 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1997 }
1998 else
1999 {
2000 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2001 VEC_quick_push (tree, *vec_oprnds, vop);
2002 }
ebfd146a
IR
2003 }
2004}
2005
2006
2007/* Get vectorized definitions from SLP_NODE that contains corresponding
2008 vectorized def-stmts. */
2009
2010static void
2011vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2012{
2013 tree vec_oprnd;
2014 gimple vec_def_stmt;
2015 unsigned int i;
2016
2017 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2018
ac47786e 2019 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
2020 {
2021 gcc_assert (vec_def_stmt);
2022 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2023 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2024 }
2025}
2026
2027
b8698a0f
L
2028/* Get vectorized definitions for SLP_NODE.
2029 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
2030 call vect_get_constant_vectors() to create vector stmts.
2031 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2032 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
b8698a0f 2033 vect_get_slp_vect_defs() to retrieve them.
ebfd146a 2034 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
b8698a0f
L
2035 the right node. This is used when the second operand must remain scalar. */
2036
ebfd146a
IR
2037void
2038vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
b5aeb3bb 2039 VEC (tree,heap) **vec_oprnds1, int reduc_index)
ebfd146a
IR
2040{
2041 gimple first_stmt;
2042 enum tree_code code;
2043 int number_of_vects;
b8698a0f 2044 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
ebfd146a
IR
2045
2046 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2047 /* The number of vector defs is determined by the number of vector statements
2048 in the node from which we get those statements. */
b8698a0f 2049 if (SLP_TREE_LEFT (slp_node))
ebfd146a
IR
2050 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2051 else
2052 {
2053 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2054 /* Number of vector stmts was calculated according to LHS in
2055 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
ff802fa1 2056 necessary. See vect_get_smallest_scalar_type () for details. */
ebfd146a
IR
2057 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2058 &rhs_size_unit);
2059 if (rhs_size_unit != lhs_size_unit)
2060 {
2061 number_of_vects *= rhs_size_unit;
2062 number_of_vects /= lhs_size_unit;
2063 }
2064 }
2065
2066 /* Allocate memory for vectorized defs. */
2067 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2068
2069 /* SLP_NODE corresponds either to a group of stores or to a group of
ff802fa1 2070 unary/binary operations. We don't call this function for loads.
b5aeb3bb
IR
2071 For reduction defs we call vect_get_constant_vectors(), since we are
2072 looking for initial loop invariant values. */
2073 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
ebfd146a
IR
2074 /* The defs are already vectorized. */
2075 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2076 else
2077 /* Build vectors from scalar defs. */
b5aeb3bb
IR
2078 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
2079 reduc_index);
ebfd146a
IR
2080
2081 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2082 /* Since we don't call this function with loads, this is a group of
2083 stores. */
2084 return;
2085
b5aeb3bb
IR
2086 /* For reductions, we only need initial values. */
2087 if (reduc_index != -1)
2088 return;
2089
ebfd146a
IR
2090 code = gimple_assign_rhs_code (first_stmt);
2091 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
2092 return;
2093
2094 /* The number of vector defs is determined by the number of vector statements
2095 in the node from which we get those statements. */
2096 if (SLP_TREE_RIGHT (slp_node))
2097 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2098 else
2099 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2100
2101 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2102
2103 if (SLP_TREE_RIGHT (slp_node))
2104 /* The defs are already vectorized. */
2105 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2106 else
2107 /* Build vectors from scalar defs. */
b5aeb3bb 2108 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
ebfd146a
IR
2109}
2110
a70d6342 2111
b8698a0f 2112/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
2113 building a vector of type MASK_TYPE from it) and two input vectors placed in
2114 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2115 shifting by STRIDE elements of DR_CHAIN for every copy.
2116 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 2117 copies).
ebfd146a
IR
2118 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2119 the created stmts must be inserted. */
2120
2121static inline void
b8698a0f 2122vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
faf63e39 2123 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f
L
2124 gimple_stmt_iterator *gsi, slp_tree node,
2125 tree builtin_decl, tree vectype,
ebfd146a
IR
2126 VEC(tree,heap) *dr_chain,
2127 int ncopies, int vect_stmts_counter)
2128{
faf63e39 2129 tree perm_dest;
ebfd146a
IR
2130 gimple perm_stmt = NULL;
2131 stmt_vec_info next_stmt_info;
0f900dfa 2132 int i, stride;
ebfd146a 2133 tree first_vec, second_vec, data_ref;
ebfd146a 2134
ebfd146a 2135 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 2136
b8698a0f 2137 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 2138 stmts later. */
b8698a0f 2139 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
ebfd146a
IR
2140 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2141 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2142
2143 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2144 for (i = 0; i < ncopies; i++)
2145 {
2146 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2147 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2148
ebfd146a 2149 /* Generate the permute statement. */
36c4a482
NF
2150 perm_stmt = gimple_build_call (builtin_decl,
2151 3, first_vec, second_vec, mask);
ebfd146a
IR
2152 data_ref = make_ssa_name (perm_dest, perm_stmt);
2153 gimple_call_set_lhs (perm_stmt, data_ref);
2154 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 2155
b8698a0f
L
2156 /* Store the vector statement in NODE. */
2157 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
ebfd146a
IR
2158 stride * i + vect_stmts_counter, perm_stmt);
2159
2160 first_vec_indx += stride;
2161 second_vec_indx += stride;
2162 }
2163
2164 /* Mark the scalar stmt as vectorized. */
2165 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2166 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2167}
2168
2169
b8698a0f 2170/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 2171 return in CURRENT_MASK_ELEMENT its equivalent in target specific
ff802fa1 2172 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
2173 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2174 the next vector, i.e., the current first vector is not needed. */
b8698a0f 2175
ebfd146a 2176static bool
b8698a0f 2177vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 2178 int mask_nunits, bool only_one_vec, int index,
b8698a0f 2179 int *mask, int *current_mask_element,
ebfd146a
IR
2180 bool *need_next_vector)
2181{
2182 int i;
2183 static int number_of_mask_fixes = 1;
2184 static bool mask_fixed = false;
2185 static bool needs_first_vector = false;
2186
2187 /* Convert to target specific representation. */
2188 *current_mask_element = first_mask_element + m;
2189 /* Adjust the value in case it's a mask for second and third vectors. */
2190 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
2191
2192 if (*current_mask_element < mask_nunits)
2193 needs_first_vector = true;
2194
2195 /* We have only one input vector to permute but the mask accesses values in
2196 the next vector as well. */
2197 if (only_one_vec && *current_mask_element >= mask_nunits)
2198 {
2199 if (vect_print_dump_info (REPORT_DETAILS))
2200 {
2201 fprintf (vect_dump, "permutation requires at least two vectors ");
2202 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2203 }
2204
2205 return false;
2206 }
2207
2208 /* The mask requires the next vector. */
2209 if (*current_mask_element >= mask_nunits * 2)
2210 {
2211 if (needs_first_vector || mask_fixed)
2212 {
2213 /* We either need the first vector too or have already moved to the
b8698a0f 2214 next vector. In both cases, this permutation needs three
ebfd146a
IR
2215 vectors. */
2216 if (vect_print_dump_info (REPORT_DETAILS))
2217 {
2218 fprintf (vect_dump, "permutation requires at "
2219 "least three vectors ");
2220 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2221 }
2222
2223 return false;
2224 }
2225
2226 /* We move to the next vector, dropping the first one and working with
2227 the second and the third - we need to adjust the values of the mask
2228 accordingly. */
2229 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2230
2231 for (i = 0; i < index; i++)
2232 mask[i] -= mask_nunits * number_of_mask_fixes;
2233
2234 (number_of_mask_fixes)++;
2235 mask_fixed = true;
2236 }
2237
2238 *need_next_vector = mask_fixed;
2239
2240 /* This was the last element of this mask. Start a new one. */
2241 if (index == mask_nunits - 1)
2242 {
2243 number_of_mask_fixes = 1;
2244 mask_fixed = false;
2245 needs_first_vector = false;
2246 }
2247
2248 return true;
2249}
2250
2251
2252/* Generate vector permute statements from a list of loads in DR_CHAIN.
2253 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2254 permute statements for SLP_NODE_INSTANCE. */
2255bool
2256vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2257 gimple_stmt_iterator *gsi, int vf,
2258 slp_instance slp_node_instance, bool analyze_only)
2259{
2260 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2261 tree mask_element_type = NULL_TREE, mask_type;
2262 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2263 slp_tree node;
2264 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2265 gimple next_scalar_stmt;
2266 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2267 int first_mask_element;
2268 int index, unroll_factor, *mask, current_mask_element, ncopies;
2269 bool only_one_vec = false, need_next_vector = false;
2270 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2271
2272 if (!targetm.vectorize.builtin_vec_perm)
2273 {
2274 if (vect_print_dump_info (REPORT_DETAILS))
2275 {
2276 fprintf (vect_dump, "no builtin for vect permute for ");
2277 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2278 }
2279
2280 return false;
2281 }
2282
2283 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2284 &mask_element_type);
2285 if (!builtin_decl || !mask_element_type)
2286 {
2287 if (vect_print_dump_info (REPORT_DETAILS))
2288 {
2289 fprintf (vect_dump, "no builtin for vect permute for ");
2290 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2291 }
2292
2293 return false;
2294 }
2295
2296 mask_type = get_vectype_for_scalar_type (mask_element_type);
2297 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2298 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2299 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2300 scale = mask_nunits / nunits;
2301 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2302
2303 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2304 unrolling factor. */
b8698a0f 2305 orig_vec_stmts_num = group_size *
ebfd146a
IR
2306 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2307 if (orig_vec_stmts_num == 1)
2308 only_one_vec = true;
2309
b8698a0f 2310 /* Number of copies is determined by the final vectorization factor
ebfd146a 2311 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 2312 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 2313
b8698a0f
L
2314 /* Generate permutation masks for every NODE. Number of masks for each NODE
2315 is equal to GROUP_SIZE.
2316 E.g., we have a group of three nodes with three loads from the same
2317 location in each node, and the vector size is 4. I.e., we have a
2318 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
2319 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2320 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2321 ...
2322
2323 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2324 scpecific type, e.g., in bytes for Altivec.
b8698a0f 2325 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
2326 operation, and the mask element values can't be outside that range.
2327 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 2328 For the first two permutations we need the first and the second input
ebfd146a 2329 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 2330 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
2331 {c2,a3,b3,c3}. */
2332
ac47786e 2333 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
ebfd146a
IR
2334 {
2335 scalar_index = 0;
2336 index = 0;
2337 vect_stmts_counter = 0;
2338 vec_index = 0;
2339 first_vec_index = vec_index++;
2340 if (only_one_vec)
2341 second_vec_index = first_vec_index;
2342 else
2343 second_vec_index = vec_index++;
2344
2345 for (j = 0; j < unroll_factor; j++)
2346 {
2347 for (k = 0; k < group_size; k++)
2348 {
2349 first_mask_element = (i + j * group_size) * scale;
2350 for (m = 0; m < scale; m++)
2351 {
b8698a0f 2352 if (!vect_get_mask_element (stmt, first_mask_element, m,
ebfd146a
IR
2353 mask_nunits, only_one_vec, index, mask,
2354 &current_mask_element, &need_next_vector))
2355 return false;
2356
2357 mask[index++] = current_mask_element;
b8698a0f 2358 }
ebfd146a
IR
2359
2360 if (index == mask_nunits)
2361 {
faf63e39
RH
2362 tree mask_vec = NULL;
2363
2364 while (--index >= 0)
2365 {
2366 tree t = build_int_cst (mask_element_type, mask[index]);
2367 mask_vec = tree_cons (NULL, t, mask_vec);
2368 }
2369 mask_vec = build_vector (mask_type, mask_vec);
2370 index = 0;
2371
2372 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2373 mask_vec))
2374 {
2375 if (vect_print_dump_info (REPORT_DETAILS))
2376 {
2377 fprintf (vect_dump, "unsupported vect permute ");
2378 print_generic_expr (vect_dump, mask_vec, 0);
2379 }
2380 free (mask);
2381 return false;
2382 }
2383
ebfd146a
IR
2384 if (!analyze_only)
2385 {
2386 if (need_next_vector)
2387 {
2388 first_vec_index = second_vec_index;
2389 second_vec_index = vec_index;
2390 }
2391
2392 next_scalar_stmt = VEC_index (gimple,
2393 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2394
2395 vect_create_mask_and_perm (stmt, next_scalar_stmt,
faf63e39
RH
2396 mask_vec, first_vec_index, second_vec_index,
2397 gsi, node, builtin_decl, vectype, dr_chain,
2398 ncopies, vect_stmts_counter++);
ebfd146a 2399 }
b8698a0f
L
2400 }
2401 }
2402 }
2403 }
ebfd146a
IR
2404
2405 free (mask);
2406 return true;
2407}
2408
2409
2410
2411/* Vectorize SLP instance tree in postorder. */
2412
2413static bool
2414vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 2415 unsigned int vectorization_factor)
ebfd146a
IR
2416{
2417 gimple stmt;
2418 bool strided_store, is_store;
2419 gimple_stmt_iterator si;
2420 stmt_vec_info stmt_info;
2421 unsigned int vec_stmts_size, nunits, group_size;
2422 tree vectype;
2423 int i;
2424 slp_tree loads_node;
2425
2426 if (!node)
2427 return false;
2428
2429 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2430 vectorization_factor);
2431 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2432 vectorization_factor);
b8698a0f 2433
ebfd146a
IR
2434 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2435 stmt_info = vinfo_for_stmt (stmt);
2436
2437 /* VECTYPE is the type of the destination. */
b690cc0f 2438 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2439 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2440 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2441
2442 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 2443 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
2444 elements in one vector iteration is the number of scalar elements in
2445 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2446 size. */
2447 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2448
2449 /* In case of load permutation we have to allocate vectorized statements for
2450 all the nodes that participate in that permutation. */
2451 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2452 {
ac47786e 2453 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
ebfd146a
IR
2454 {
2455 if (!SLP_TREE_VEC_STMTS (loads_node))
2456 {
2457 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2458 vec_stmts_size);
2459 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2460 }
2461 }
2462 }
2463
2464 if (!SLP_TREE_VEC_STMTS (node))
2465 {
2466 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2467 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2468 }
2469
2470 if (vect_print_dump_info (REPORT_DETAILS))
2471 {
2472 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2473 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 2474 }
ebfd146a
IR
2475
2476 /* Loads should be inserted before the first load. */
2477 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2478 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2479 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2480 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2481 else
2482 si = gsi_for_stmt (stmt);
b8698a0f 2483
e4a707c4
IR
2484 /* Stores should be inserted just before the last store. */
2485 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2486 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2487 {
2488 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2489 si = gsi_for_stmt (last_store);
2490 }
2491
ebfd146a 2492 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
b5aeb3bb 2493 return is_store;
ebfd146a
IR
2494}
2495
2496
ff802fa1
IR
2497/* Generate vector code for all SLP instances in the loop/basic block. */
2498
ebfd146a 2499bool
a70d6342 2500vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 2501{
a70d6342 2502 VEC (slp_instance, heap) *slp_instances;
ebfd146a 2503 slp_instance instance;
a70d6342 2504 unsigned int i, vf;
ebfd146a
IR
2505 bool is_store = false;
2506
a70d6342
IR
2507 if (loop_vinfo)
2508 {
2509 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2510 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 2511 }
a70d6342
IR
2512 else
2513 {
2514 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2515 vf = 1;
b8698a0f 2516 }
a70d6342 2517
ac47786e 2518 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
2519 {
2520 /* Schedule the tree of INSTANCE. */
2521 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 2522 instance, vf);
8644a673
IR
2523 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2524 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
2525 fprintf (vect_dump, "vectorizing stmts using SLP.");
2526 }
2527
ac47786e 2528 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
b5aeb3bb
IR
2529 {
2530 slp_tree root = SLP_INSTANCE_TREE (instance);
2531 gimple store;
2532 unsigned int j;
2533 gimple_stmt_iterator gsi;
2534
2535 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2536 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2537 {
2538 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2539 break;
2540
2541 /* Free the attached stmt_vec_info and remove the stmt. */
2542 gsi = gsi_for_stmt (store);
2543 gsi_remove (&gsi, true);
2544 free_stmt_vec_info (store);
2545 }
2546 }
2547
ebfd146a
IR
2548 return is_store;
2549}
a70d6342
IR
2550
2551
2552/* Vectorize the basic block. */
2553
2554void
2555vect_slp_transform_bb (basic_block bb)
2556{
2557 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2558 gimple_stmt_iterator si;
2559
2560 gcc_assert (bb_vinfo);
2561
2562 if (vect_print_dump_info (REPORT_DETAILS))
2563 fprintf (vect_dump, "SLPing BB\n");
2564
2565 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2566 {
2567 gimple stmt = gsi_stmt (si);
2568 stmt_vec_info stmt_info;
2569
2570 if (vect_print_dump_info (REPORT_DETAILS))
2571 {
2572 fprintf (vect_dump, "------>SLPing statement: ");
2573 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2574 }
2575
2576 stmt_info = vinfo_for_stmt (stmt);
2577 gcc_assert (stmt_info);
2578
2579 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2580 if (STMT_SLP_TYPE (stmt_info))
2581 {
2582 vect_schedule_slp (NULL, bb_vinfo);
2583 break;
2584 }
2585 }
2586
2587 mark_sym_for_renaming (gimple_vop (cfun));
2588 /* The memory tags and pointers in vectorized statements need to
2589 have their SSA forms updated. FIXME, why can't this be delayed
2590 until all the loops have been transformed? */
2591 update_ssa (TODO_update_ssa);
2592
2593 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 2594 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
a70d6342 2595
12aaf609
IR
2596 destroy_bb_vec_info (bb_vinfo);
2597}
a70d6342 2598
This page took 2.49478 seconds and 5 git commands to generate.