]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-vect-slp.c
C++-ify vec_info structures
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44
45
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
47
48 static void
49 vect_free_slp_tree (slp_tree node)
50 {
51 int i;
52 slp_tree child;
53
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55 vect_free_slp_tree (child);
56
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
61 {
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
64 }
65
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
69 SLP_TREE_LOAD_PERMUTATION (node).release ();
70
71 free (node);
72 }
73
74
75 /* Free the memory allocated for the SLP instance. */
76
77 void
78 vect_free_slp_instance (slp_instance instance)
79 {
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81 SLP_INSTANCE_LOADS (instance).release ();
82 free (instance);
83 }
84
85
86 /* Create an SLP node for SCALAR_STMTS. */
87
88 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
90 {
91 slp_tree node;
92 gimple *stmt = scalar_stmts[0];
93 unsigned int nops;
94
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
98 {
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
102 }
103 else if (gimple_code (stmt) == GIMPLE_PHI)
104 nops = 0;
105 else
106 return NULL;
107
108 node = XNEW (struct _slp_tree);
109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
110 SLP_TREE_VEC_STMTS (node).create (0);
111 SLP_TREE_CHILDREN (node).create (nops);
112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
113 SLP_TREE_TWO_OPERATORS (node) = false;
114 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
115
116 unsigned i;
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
119
120 return node;
121 }
122
123
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
126 node. */
127 typedef struct _slp_oprnd_info
128 {
129 /* Def-stmts for the operands. */
130 vec<gimple *> def_stmts;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
133 stmt. */
134 tree first_op_type;
135 enum vect_def_type first_dt;
136 bool first_pattern;
137 bool second_pattern;
138 } *slp_oprnd_info;
139
140
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
142 operand. */
143 static vec<slp_oprnd_info>
144 vect_create_oprnd_info (int nops, int group_size)
145 {
146 int i;
147 slp_oprnd_info oprnd_info;
148 vec<slp_oprnd_info> oprnds_info;
149
150 oprnds_info.create (nops);
151 for (i = 0; i < nops; i++)
152 {
153 oprnd_info = XNEW (struct _slp_oprnd_info);
154 oprnd_info->def_stmts.create (group_size);
155 oprnd_info->first_dt = vect_uninitialized_def;
156 oprnd_info->first_op_type = NULL_TREE;
157 oprnd_info->first_pattern = false;
158 oprnd_info->second_pattern = false;
159 oprnds_info.quick_push (oprnd_info);
160 }
161
162 return oprnds_info;
163 }
164
165
166 /* Free operands info. */
167
168 static void
169 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
170 {
171 int i;
172 slp_oprnd_info oprnd_info;
173
174 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
175 {
176 oprnd_info->def_stmts.release ();
177 XDELETE (oprnd_info);
178 }
179
180 oprnds_info.release ();
181 }
182
183
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
186
187 static int
188 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
189 {
190 gimple *next_stmt = first_stmt;
191 int result = 0;
192
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
194 return -1;
195
196 do
197 {
198 if (next_stmt == stmt)
199 return result;
200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
201 if (next_stmt)
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
203 }
204 while (next_stmt);
205
206 return -1;
207 }
208
209
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
220 ok return 0. */
221
222 static int
223 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
224 gimple *stmt, unsigned stmt_num,
225 vec<slp_oprnd_info> *oprnds_info)
226 {
227 tree oprnd;
228 unsigned int i, number_of_oprnds;
229 gimple *def_stmt;
230 enum vect_def_type dt = vect_uninitialized_def;
231 bool pattern = false;
232 slp_oprnd_info oprnd_info;
233 int first_op_idx = 1;
234 bool commutative = false;
235 bool first_op_cond = false;
236 bool first = stmt_num == 0;
237 bool second = stmt_num == 1;
238
239 if (is_gimple_call (stmt))
240 {
241 number_of_oprnds = gimple_call_num_args (stmt);
242 first_op_idx = 3;
243 }
244 else if (is_gimple_assign (stmt))
245 {
246 enum tree_code code = gimple_assign_rhs_code (stmt);
247 number_of_oprnds = gimple_num_ops (stmt) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
252 {
253 first_op_cond = true;
254 number_of_oprnds++;
255 }
256 else
257 commutative = commutative_tree_code (code);
258 }
259 else
260 return -1;
261
262 bool swapped = (*swap != 0);
263 gcc_assert (!swapped || first_op_cond);
264 for (i = 0; i < number_of_oprnds; i++)
265 {
266 again:
267 if (first_op_cond)
268 {
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map = maps[*swap];
272
273 if (i < 2)
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
275 else
276 oprnd = gimple_op (stmt, map[i]);
277 }
278 else
279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
280
281 oprnd_info = (*oprnds_info)[i];
282
283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
284 {
285 if (dump_enabled_p ())
286 {
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
291 }
292
293 return -1;
294 }
295
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
298 pattern. */
299 if (def_stmt && gimple_bb (def_stmt)
300 && vect_stmt_in_region_p (vinfo, def_stmt)
301 && vinfo_for_stmt (def_stmt)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
305 {
306 pattern = true;
307 if (!first && !oprnd_info->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info->first_dt != vect_reduction_def
311 || (!second && !oprnd_info->second_pattern)))
312 {
313 if (i == 0
314 && !swapped
315 && commutative)
316 {
317 swapped = true;
318 goto again;
319 }
320
321 if (dump_enabled_p ())
322 {
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
328 }
329
330 return 1;
331 }
332
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
335
336 if (dt == vect_unknown_def_type)
337 {
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "Unsupported pattern.\n");
341 return -1;
342 }
343
344 switch (gimple_code (def_stmt))
345 {
346 case GIMPLE_PHI:
347 case GIMPLE_ASSIGN:
348 break;
349
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
355 }
356 }
357
358 if (second)
359 oprnd_info->second_pattern = pattern;
360
361 if (first)
362 {
363 oprnd_info->first_dt = dt;
364 oprnd_info->first_pattern = pattern;
365 oprnd_info->first_op_type = TREE_TYPE (oprnd);
366 }
367 else
368 {
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info->first_dt != dt
375 && !(oprnd_info->first_dt == vect_reduction_def
376 && dt == vect_internal_def)
377 && !((oprnd_info->first_dt == vect_external_def
378 || oprnd_info->first_dt == vect_constant_def)
379 && (dt == vect_external_def
380 || dt == vect_constant_def)))
381 || !types_compatible_p (oprnd_info->first_op_type,
382 TREE_TYPE (oprnd))))
383 {
384 /* Try swapping operands if we got a mismatch. */
385 if (i == 0
386 && !swapped
387 && commutative)
388 {
389 swapped = true;
390 goto again;
391 }
392
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
395 "Build SLP failed: different types\n");
396
397 return 1;
398 }
399 }
400
401 /* Check the types of the definitions. */
402 switch (dt)
403 {
404 case vect_constant_def:
405 case vect_external_def:
406 break;
407
408 case vect_reduction_def:
409 case vect_induction_def:
410 case vect_internal_def:
411 oprnd_info->def_stmts.quick_push (def_stmt);
412 break;
413
414 default:
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
417 {
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
422 }
423
424 return -1;
425 }
426 }
427
428 /* Swap operands. */
429 if (swapped)
430 {
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
434 {
435 if (dump_enabled_p ())
436 {
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: cannot swap operands of "
439 "shared stmt ");
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
441 }
442 return -1;
443 }
444
445 if (first_op_cond)
446 {
447 tree cond = gimple_assign_rhs1 (stmt);
448 enum tree_code code = TREE_CODE (cond);
449
450 /* Swap. */
451 if (*swap == 1)
452 {
453 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
454 &TREE_OPERAND (cond, 1));
455 TREE_SET_CODE (cond, swap_tree_comparison (code));
456 }
457 /* Invert. */
458 else
459 {
460 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
461 gimple_assign_rhs3_ptr (stmt));
462 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
463 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
464 gcc_assert (code != ERROR_MARK);
465 TREE_SET_CODE (cond, code);
466 }
467 }
468 else
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
470 gimple_assign_rhs2_ptr (stmt));
471 if (dump_enabled_p ())
472 {
473 dump_printf_loc (MSG_NOTE, vect_location,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
476 }
477 }
478
479 *swap = swapped;
480 return 0;
481 }
482
483
484 /* Verify if the scalar stmts STMTS are isomorphic, require data
485 permutation or are of unsupported types of operation. Return
486 true if they are, otherwise return false and indicate in *MATCHES
487 which stmts are not isomorphic to the first one. If MATCHES[0]
488 is false then this indicates the comparison could not be
489 carried out or the stmts will never be vectorized by SLP.
490
491 Note COND_EXPR is possibly ismorphic to another one after swapping its
492 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
493 the first stmt by swapping the two operands of comparison; set SWAP[i]
494 to 2 if stmt I is isormorphic to the first stmt by inverting the code
495 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
496 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
497
498 static bool
499 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
500 vec<gimple *> stmts, unsigned int group_size,
501 unsigned nops, unsigned int *max_nunits,
502 bool *matches, bool *two_operators)
503 {
504 unsigned int i;
505 gimple *first_stmt = stmts[0], *stmt = stmts[0];
506 enum tree_code first_stmt_code = ERROR_MARK;
507 enum tree_code alt_stmt_code = ERROR_MARK;
508 enum tree_code rhs_code = ERROR_MARK;
509 enum tree_code first_cond_code = ERROR_MARK;
510 tree lhs;
511 bool need_same_oprnds = false;
512 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
513 optab optab;
514 int icode;
515 machine_mode optab_op2_mode;
516 machine_mode vec_mode;
517 HOST_WIDE_INT dummy;
518 gimple *first_load = NULL, *prev_first_load = NULL;
519
520 /* For every stmt in NODE find its def stmt/s. */
521 FOR_EACH_VEC_ELT (stmts, i, stmt)
522 {
523 swap[i] = 0;
524 matches[i] = false;
525
526 if (dump_enabled_p ())
527 {
528 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
529 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
530 }
531
532 /* Fail to vectorize statements marked as unvectorizable. */
533 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
534 {
535 if (dump_enabled_p ())
536 {
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
538 "Build SLP failed: unvectorizable statement ");
539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
540 }
541 /* Fatal mismatch. */
542 matches[0] = false;
543 return false;
544 }
545
546 lhs = gimple_get_lhs (stmt);
547 if (lhs == NULL_TREE)
548 {
549 if (dump_enabled_p ())
550 {
551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
552 "Build SLP failed: not GIMPLE_ASSIGN nor "
553 "GIMPLE_CALL ");
554 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
555 }
556 /* Fatal mismatch. */
557 matches[0] = false;
558 return false;
559 }
560
561 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
562 vectype = get_vectype_for_scalar_type (scalar_type);
563 if (!vectype)
564 {
565 if (dump_enabled_p ())
566 {
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
568 "Build SLP failed: unsupported data-type ");
569 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
570 scalar_type);
571 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
572 }
573 /* Fatal mismatch. */
574 matches[0] = false;
575 return false;
576 }
577
578 /* If populating the vector type requires unrolling then fail
579 before adjusting *max_nunits for basic-block vectorization. */
580 if (is_a <bb_vec_info> (vinfo)
581 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
582 {
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584 "Build SLP failed: unrolling required "
585 "in basic block SLP\n");
586 /* Fatal mismatch. */
587 matches[0] = false;
588 return false;
589 }
590
591 /* In case of multiple types we need to detect the smallest type. */
592 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
593 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
594
595 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
596 {
597 rhs_code = CALL_EXPR;
598 if (gimple_call_internal_p (call_stmt)
599 || gimple_call_tail_p (call_stmt)
600 || gimple_call_noreturn_p (call_stmt)
601 || !gimple_call_nothrow_p (call_stmt)
602 || gimple_call_chain (call_stmt))
603 {
604 if (dump_enabled_p ())
605 {
606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
607 "Build SLP failed: unsupported call type ");
608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
609 call_stmt, 0);
610 }
611 /* Fatal mismatch. */
612 matches[0] = false;
613 return false;
614 }
615 }
616 else
617 rhs_code = gimple_assign_rhs_code (stmt);
618
619 /* Check the operation. */
620 if (i == 0)
621 {
622 first_stmt_code = rhs_code;
623
624 /* Shift arguments should be equal in all the packed stmts for a
625 vector shift with scalar shift operand. */
626 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
627 || rhs_code == LROTATE_EXPR
628 || rhs_code == RROTATE_EXPR)
629 {
630 vec_mode = TYPE_MODE (vectype);
631
632 /* First see if we have a vector/vector shift. */
633 optab = optab_for_tree_code (rhs_code, vectype,
634 optab_vector);
635
636 if (!optab
637 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
638 {
639 /* No vector/vector shift, try for a vector/scalar shift. */
640 optab = optab_for_tree_code (rhs_code, vectype,
641 optab_scalar);
642
643 if (!optab)
644 {
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
647 "Build SLP failed: no optab.\n");
648 /* Fatal mismatch. */
649 matches[0] = false;
650 return false;
651 }
652 icode = (int) optab_handler (optab, vec_mode);
653 if (icode == CODE_FOR_nothing)
654 {
655 if (dump_enabled_p ())
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
657 "Build SLP failed: "
658 "op not supported by target.\n");
659 /* Fatal mismatch. */
660 matches[0] = false;
661 return false;
662 }
663 optab_op2_mode = insn_data[icode].operand[2].mode;
664 if (!VECTOR_MODE_P (optab_op2_mode))
665 {
666 need_same_oprnds = true;
667 first_op1 = gimple_assign_rhs2 (stmt);
668 }
669 }
670 }
671 else if (rhs_code == WIDEN_LSHIFT_EXPR)
672 {
673 need_same_oprnds = true;
674 first_op1 = gimple_assign_rhs2 (stmt);
675 }
676 }
677 else
678 {
679 if (first_stmt_code != rhs_code
680 && alt_stmt_code == ERROR_MARK)
681 alt_stmt_code = rhs_code;
682 if (first_stmt_code != rhs_code
683 && (first_stmt_code != IMAGPART_EXPR
684 || rhs_code != REALPART_EXPR)
685 && (first_stmt_code != REALPART_EXPR
686 || rhs_code != IMAGPART_EXPR)
687 /* Handle mismatches in plus/minus by computing both
688 and merging the results. */
689 && !((first_stmt_code == PLUS_EXPR
690 || first_stmt_code == MINUS_EXPR)
691 && (alt_stmt_code == PLUS_EXPR
692 || alt_stmt_code == MINUS_EXPR)
693 && rhs_code == alt_stmt_code)
694 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
695 && (first_stmt_code == ARRAY_REF
696 || first_stmt_code == BIT_FIELD_REF
697 || first_stmt_code == INDIRECT_REF
698 || first_stmt_code == COMPONENT_REF
699 || first_stmt_code == MEM_REF)))
700 {
701 if (dump_enabled_p ())
702 {
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
704 "Build SLP failed: different operation "
705 "in stmt ");
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "original stmt ");
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
710 first_stmt, 0);
711 }
712 /* Mismatch. */
713 continue;
714 }
715
716 if (need_same_oprnds
717 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
718 {
719 if (dump_enabled_p ())
720 {
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different shift "
723 "arguments in ");
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
725 }
726 /* Mismatch. */
727 continue;
728 }
729
730 if (rhs_code == CALL_EXPR)
731 {
732 gimple *first_stmt = stmts[0];
733 if (gimple_call_num_args (stmt) != nops
734 || !operand_equal_p (gimple_call_fn (first_stmt),
735 gimple_call_fn (stmt), 0)
736 || gimple_call_fntype (first_stmt)
737 != gimple_call_fntype (stmt))
738 {
739 if (dump_enabled_p ())
740 {
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: different calls in ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
744 stmt, 0);
745 }
746 /* Mismatch. */
747 continue;
748 }
749 }
750 }
751
752 /* Grouped store or load. */
753 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
754 {
755 if (REFERENCE_CLASS_P (lhs))
756 {
757 /* Store. */
758 ;
759 }
760 else
761 {
762 /* Load. */
763 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
764 if (prev_first_load)
765 {
766 /* Check that there are no loads from different interleaving
767 chains in the same node. */
768 if (prev_first_load != first_load)
769 {
770 if (dump_enabled_p ())
771 {
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
773 vect_location,
774 "Build SLP failed: different "
775 "interleaving chains in one node ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
777 stmt, 0);
778 }
779 /* Mismatch. */
780 continue;
781 }
782 }
783 else
784 prev_first_load = first_load;
785 }
786 } /* Grouped access. */
787 else
788 {
789 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
790 {
791 /* Not grouped load. */
792 if (dump_enabled_p ())
793 {
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
795 "Build SLP failed: not grouped load ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
797 }
798
799 /* FORNOW: Not grouped loads are not supported. */
800 /* Fatal mismatch. */
801 matches[0] = false;
802 return false;
803 }
804
805 /* Not memory operation. */
806 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
807 && TREE_CODE_CLASS (rhs_code) != tcc_unary
808 && TREE_CODE_CLASS (rhs_code) != tcc_expression
809 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
810 && rhs_code != CALL_EXPR)
811 {
812 if (dump_enabled_p ())
813 {
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
815 "Build SLP failed: operation");
816 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
818 }
819 /* Fatal mismatch. */
820 matches[0] = false;
821 return false;
822 }
823
824 if (rhs_code == COND_EXPR)
825 {
826 tree cond_expr = gimple_assign_rhs1 (stmt);
827 enum tree_code cond_code = TREE_CODE (cond_expr);
828 enum tree_code swap_code = ERROR_MARK;
829 enum tree_code invert_code = ERROR_MARK;
830
831 if (i == 0)
832 first_cond_code = TREE_CODE (cond_expr);
833 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
834 {
835 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
836 swap_code = swap_tree_comparison (cond_code);
837 invert_code = invert_tree_comparison (cond_code, honor_nans);
838 }
839
840 if (first_cond_code == cond_code)
841 ;
842 /* Isomorphic can be achieved by swapping. */
843 else if (first_cond_code == swap_code)
844 swap[i] = 1;
845 /* Isomorphic can be achieved by inverting. */
846 else if (first_cond_code == invert_code)
847 swap[i] = 2;
848 else
849 {
850 if (dump_enabled_p ())
851 {
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
853 "Build SLP failed: different"
854 " operation");
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
856 stmt, 0);
857 }
858 /* Mismatch. */
859 continue;
860 }
861 }
862 }
863
864 matches[i] = true;
865 }
866
867 for (i = 0; i < group_size; ++i)
868 if (!matches[i])
869 return false;
870
871 /* If we allowed a two-operation SLP node verify the target can cope
872 with the permute we are going to use. */
873 if (alt_stmt_code != ERROR_MARK
874 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
875 {
876 unsigned char *sel
877 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
878 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
879 {
880 sel[i] = i;
881 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
882 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
883 }
884 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
885 {
886 for (i = 0; i < group_size; ++i)
887 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
888 {
889 matches[i] = false;
890 if (dump_enabled_p ())
891 {
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
893 "Build SLP failed: different operation "
894 "in stmt ");
895 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
896 stmts[i], 0);
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
898 "original stmt ");
899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
900 first_stmt, 0);
901 }
902 }
903 return false;
904 }
905 *two_operators = true;
906 }
907
908 return true;
909 }
910
911 /* Recursively build an SLP tree starting from NODE.
912 Fail (and return a value not equal to zero) if def-stmts are not
913 isomorphic, require data permutation or are of unsupported types of
914 operation. Otherwise, return 0.
915 The value returned is the depth in the SLP tree where a mismatch
916 was found. */
917
918 static slp_tree
919 vect_build_slp_tree (vec_info *vinfo,
920 vec<gimple *> stmts, unsigned int group_size,
921 unsigned int *max_nunits,
922 vec<slp_tree> *loads,
923 bool *matches, unsigned *npermutes, unsigned *tree_size,
924 unsigned max_tree_size)
925 {
926 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
927 gimple *stmt;
928 slp_tree node;
929
930 matches[0] = false;
931
932 stmt = stmts[0];
933 if (is_gimple_call (stmt))
934 nops = gimple_call_num_args (stmt);
935 else if (is_gimple_assign (stmt))
936 {
937 nops = gimple_num_ops (stmt) - 1;
938 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
939 nops++;
940 }
941 else if (gimple_code (stmt) == GIMPLE_PHI)
942 nops = 0;
943 else
944 return NULL;
945
946 /* If the SLP node is a PHI (induction or reduction), terminate
947 the recursion. */
948 if (gimple_code (stmt) == GIMPLE_PHI)
949 {
950 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
951 /* Induction from different IVs is not supported. */
952 if (def_type == vect_induction_def)
953 {
954 FOR_EACH_VEC_ELT (stmts, i, stmt)
955 if (stmt != stmts[0])
956 return NULL;
957 }
958 else
959 {
960 /* Else def types have to match. */
961 FOR_EACH_VEC_ELT (stmts, i, stmt)
962 {
963 /* But for reduction chains only check on the first stmt. */
964 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
965 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
966 continue;
967 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
968 return NULL;
969 }
970 }
971 node = vect_create_new_slp_node (stmts);
972 return node;
973 }
974
975
976 bool two_operators = false;
977 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
978 if (!vect_build_slp_tree_1 (vinfo, swap,
979 stmts, group_size, nops,
980 &this_max_nunits, matches, &two_operators))
981 return NULL;
982
983 /* If the SLP node is a load, terminate the recursion. */
984 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
985 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
986 {
987 *max_nunits = this_max_nunits;
988 node = vect_create_new_slp_node (stmts);
989 loads->safe_push (node);
990 return node;
991 }
992
993 /* Get at the operands, verifying they are compatible. */
994 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
995 slp_oprnd_info oprnd_info;
996 FOR_EACH_VEC_ELT (stmts, i, stmt)
997 {
998 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
999 stmt, i, &oprnds_info);
1000 if (res != 0)
1001 matches[(res == -1) ? 0 : i] = false;
1002 if (!matches[0])
1003 break;
1004 }
1005 for (i = 0; i < group_size; ++i)
1006 if (!matches[i])
1007 {
1008 vect_free_oprnd_info (oprnds_info);
1009 return NULL;
1010 }
1011
1012 auto_vec<slp_tree, 4> children;
1013 auto_vec<slp_tree> this_loads;
1014
1015 stmt = stmts[0];
1016
1017 /* Create SLP_TREE nodes for the definition node/s. */
1018 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1019 {
1020 slp_tree child;
1021 unsigned old_nloads = this_loads.length ();
1022 unsigned old_tree_size = this_tree_size;
1023 unsigned int j;
1024
1025 if (oprnd_info->first_dt != vect_internal_def
1026 && oprnd_info->first_dt != vect_reduction_def
1027 && oprnd_info->first_dt != vect_induction_def)
1028 continue;
1029
1030 if (++this_tree_size > max_tree_size)
1031 {
1032 FOR_EACH_VEC_ELT (children, j, child)
1033 vect_free_slp_tree (child);
1034 vect_free_oprnd_info (oprnds_info);
1035 return NULL;
1036 }
1037
1038 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1039 group_size, &this_max_nunits,
1040 &this_loads, matches, npermutes,
1041 &this_tree_size,
1042 max_tree_size)) != NULL)
1043 {
1044 /* If we have all children of child built up from scalars then just
1045 throw that away and build it up this node from scalars. */
1046 if (!SLP_TREE_CHILDREN (child).is_empty ()
1047 /* ??? Rejecting patterns this way doesn't work. We'd have to
1048 do extra work to cancel the pattern so the uses see the
1049 scalar version. */
1050 && !is_pattern_stmt_p
1051 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1052 {
1053 slp_tree grandchild;
1054
1055 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1056 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1057 break;
1058 if (!grandchild)
1059 {
1060 /* Roll back. */
1061 this_loads.truncate (old_nloads);
1062 this_tree_size = old_tree_size;
1063 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1064 vect_free_slp_tree (grandchild);
1065 SLP_TREE_CHILDREN (child).truncate (0);
1066
1067 dump_printf_loc (MSG_NOTE, vect_location,
1068 "Building parent vector operands from "
1069 "scalars instead\n");
1070 oprnd_info->def_stmts = vNULL;
1071 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1072 children.safe_push (child);
1073 continue;
1074 }
1075 }
1076
1077 oprnd_info->def_stmts = vNULL;
1078 children.safe_push (child);
1079 continue;
1080 }
1081
1082 /* If the SLP build failed fatally and we analyze a basic-block
1083 simply treat nodes we fail to build as externally defined
1084 (and thus build vectors from the scalar defs).
1085 The cost model will reject outright expensive cases.
1086 ??? This doesn't treat cases where permutation ultimatively
1087 fails (or we don't try permutation below). Ideally we'd
1088 even compute a permutation that will end up with the maximum
1089 SLP tree size... */
1090 if (is_a <bb_vec_info> (vinfo)
1091 && !matches[0]
1092 /* ??? Rejecting patterns this way doesn't work. We'd have to
1093 do extra work to cancel the pattern so the uses see the
1094 scalar version. */
1095 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1096 {
1097 dump_printf_loc (MSG_NOTE, vect_location,
1098 "Building vector operands from scalars\n");
1099 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1100 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1101 children.safe_push (child);
1102 oprnd_info->def_stmts = vNULL;
1103 continue;
1104 }
1105
1106 /* If the SLP build for operand zero failed and operand zero
1107 and one can be commutated try that for the scalar stmts
1108 that failed the match. */
1109 if (i == 0
1110 /* A first scalar stmt mismatch signals a fatal mismatch. */
1111 && matches[0]
1112 /* ??? For COND_EXPRs we can swap the comparison operands
1113 as well as the arms under some constraints. */
1114 && nops == 2
1115 && oprnds_info[1]->first_dt == vect_internal_def
1116 && is_gimple_assign (stmt)
1117 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1118 && ! two_operators
1119 /* Do so only if the number of not successful permutes was nor more
1120 than a cut-ff as re-trying the recursive match on
1121 possibly each level of the tree would expose exponential
1122 behavior. */
1123 && *npermutes < 4)
1124 {
1125 /* Verify if we can safely swap or if we committed to a specific
1126 operand order already. */
1127 for (j = 0; j < group_size; ++j)
1128 if (!matches[j]
1129 && (swap[j] != 0
1130 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1131 {
1132 if (dump_enabled_p ())
1133 {
1134 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1135 "Build SLP failed: cannot swap operands "
1136 "of shared stmt ");
1137 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1138 stmts[j], 0);
1139 }
1140 goto fail;
1141 }
1142
1143 /* Swap mismatched definition stmts. */
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "Re-trying with swapped operands of stmts ");
1146 for (j = 0; j < group_size; ++j)
1147 if (!matches[j])
1148 {
1149 std::swap (oprnds_info[0]->def_stmts[j],
1150 oprnds_info[1]->def_stmts[j]);
1151 dump_printf (MSG_NOTE, "%d ", j);
1152 }
1153 dump_printf (MSG_NOTE, "\n");
1154 /* And try again with scratch 'matches' ... */
1155 bool *tem = XALLOCAVEC (bool, group_size);
1156 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1157 group_size, &this_max_nunits,
1158 &this_loads, tem, npermutes,
1159 &this_tree_size,
1160 max_tree_size)) != NULL)
1161 {
1162 /* ... so if successful we can apply the operand swapping
1163 to the GIMPLE IL. This is necessary because for example
1164 vect_get_slp_defs uses operand indexes and thus expects
1165 canonical operand order. This is also necessary even
1166 if we end up building the operand from scalars as
1167 we'll continue to process swapped operand two. */
1168 for (j = 0; j < group_size; ++j)
1169 {
1170 gimple *stmt = stmts[j];
1171 gimple_set_plf (stmt, GF_PLF_1, false);
1172 }
1173 for (j = 0; j < group_size; ++j)
1174 {
1175 gimple *stmt = stmts[j];
1176 if (!matches[j])
1177 {
1178 /* Avoid swapping operands twice. */
1179 if (gimple_plf (stmt, GF_PLF_1))
1180 continue;
1181 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1182 gimple_assign_rhs2_ptr (stmt));
1183 gimple_set_plf (stmt, GF_PLF_1, true);
1184 }
1185 }
1186 /* Verify we swap all duplicates or none. */
1187 if (flag_checking)
1188 for (j = 0; j < group_size; ++j)
1189 {
1190 gimple *stmt = stmts[j];
1191 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1192 }
1193
1194 /* If we have all children of child built up from scalars then
1195 just throw that away and build it up this node from scalars. */
1196 if (!SLP_TREE_CHILDREN (child).is_empty ()
1197 /* ??? Rejecting patterns this way doesn't work. We'd have
1198 to do extra work to cancel the pattern so the uses see the
1199 scalar version. */
1200 && !is_pattern_stmt_p
1201 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1202 {
1203 unsigned int j;
1204 slp_tree grandchild;
1205
1206 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1207 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1208 break;
1209 if (!grandchild)
1210 {
1211 /* Roll back. */
1212 this_loads.truncate (old_nloads);
1213 this_tree_size = old_tree_size;
1214 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1215 vect_free_slp_tree (grandchild);
1216 SLP_TREE_CHILDREN (child).truncate (0);
1217
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "Building parent vector operands from "
1220 "scalars instead\n");
1221 oprnd_info->def_stmts = vNULL;
1222 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1223 children.safe_push (child);
1224 continue;
1225 }
1226 }
1227
1228 oprnd_info->def_stmts = vNULL;
1229 children.safe_push (child);
1230 continue;
1231 }
1232
1233 ++*npermutes;
1234 }
1235
1236 fail:
1237 gcc_assert (child == NULL);
1238 FOR_EACH_VEC_ELT (children, j, child)
1239 vect_free_slp_tree (child);
1240 vect_free_oprnd_info (oprnds_info);
1241 return NULL;
1242 }
1243
1244 vect_free_oprnd_info (oprnds_info);
1245
1246 if (tree_size)
1247 *tree_size += this_tree_size;
1248 *max_nunits = this_max_nunits;
1249 loads->safe_splice (this_loads);
1250
1251 node = vect_create_new_slp_node (stmts);
1252 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1253 SLP_TREE_CHILDREN (node).splice (children);
1254 return node;
1255 }
1256
1257 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1258
1259 static void
1260 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1261 {
1262 int i;
1263 gimple *stmt;
1264 slp_tree child;
1265
1266 dump_printf_loc (dump_kind, loc, "node%s\n",
1267 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1268 ? " (external)" : "");
1269 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1270 {
1271 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1272 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1273 }
1274 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1275 vect_print_slp_tree (dump_kind, loc, child);
1276 }
1277
1278
1279 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1280 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1281 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1282 stmts in NODE are to be marked. */
1283
1284 static void
1285 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1286 {
1287 int i;
1288 gimple *stmt;
1289 slp_tree child;
1290
1291 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1292 return;
1293
1294 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1295 if (j < 0 || i == j)
1296 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1297
1298 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1299 vect_mark_slp_stmts (child, mark, j);
1300 }
1301
1302
1303 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1304
1305 static void
1306 vect_mark_slp_stmts_relevant (slp_tree node)
1307 {
1308 int i;
1309 gimple *stmt;
1310 stmt_vec_info stmt_info;
1311 slp_tree child;
1312
1313 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1314 return;
1315
1316 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1317 {
1318 stmt_info = vinfo_for_stmt (stmt);
1319 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1320 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1321 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1322 }
1323
1324 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1325 vect_mark_slp_stmts_relevant (child);
1326 }
1327
1328
1329 /* Rearrange the statements of NODE according to PERMUTATION. */
1330
1331 static void
1332 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1333 vec<unsigned> permutation)
1334 {
1335 gimple *stmt;
1336 vec<gimple *> tmp_stmts;
1337 unsigned int i;
1338 slp_tree child;
1339
1340 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1341 vect_slp_rearrange_stmts (child, group_size, permutation);
1342
1343 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1344 tmp_stmts.create (group_size);
1345 tmp_stmts.quick_grow_cleared (group_size);
1346
1347 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1348 tmp_stmts[permutation[i]] = stmt;
1349
1350 SLP_TREE_SCALAR_STMTS (node).release ();
1351 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1352 }
1353
1354
1355 /* Attempt to reorder stmts in a reduction chain so that we don't
1356 require any load permutation. Return true if that was possible,
1357 otherwise return false. */
1358
1359 static bool
1360 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1361 {
1362 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1363 unsigned int i, j;
1364 unsigned int lidx;
1365 slp_tree node, load;
1366
1367 /* Compare all the permutation sequences to the first one. We know
1368 that at least one load is permuted. */
1369 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1370 if (!node->load_permutation.exists ())
1371 return false;
1372 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1373 {
1374 if (!load->load_permutation.exists ())
1375 return false;
1376 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1377 if (lidx != node->load_permutation[j])
1378 return false;
1379 }
1380
1381 /* Check that the loads in the first sequence are different and there
1382 are no gaps between them. */
1383 auto_sbitmap load_index (group_size);
1384 bitmap_clear (load_index);
1385 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1386 {
1387 if (lidx >= group_size)
1388 return false;
1389 if (bitmap_bit_p (load_index, lidx))
1390 return false;
1391
1392 bitmap_set_bit (load_index, lidx);
1393 }
1394 for (i = 0; i < group_size; i++)
1395 if (!bitmap_bit_p (load_index, i))
1396 return false;
1397
1398 /* This permutation is valid for reduction. Since the order of the
1399 statements in the nodes is not important unless they are memory
1400 accesses, we can rearrange the statements in all the nodes
1401 according to the order of the loads. */
1402 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1403 node->load_permutation);
1404
1405 /* We are done, no actual permutations need to be generated. */
1406 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1407 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1408 {
1409 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1410 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1411 /* But we have to keep those permutations that are required because
1412 of handling of gaps. */
1413 if (unrolling_factor == 1
1414 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1415 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1416 SLP_TREE_LOAD_PERMUTATION (node).release ();
1417 else
1418 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1419 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1420 }
1421
1422 return true;
1423 }
1424
1425 /* Check if the required load permutations in the SLP instance
1426 SLP_INSTN are supported. */
1427
1428 static bool
1429 vect_supported_load_permutation_p (slp_instance slp_instn)
1430 {
1431 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1432 unsigned int i, j, k, next;
1433 slp_tree node;
1434 gimple *stmt, *load, *next_load;
1435
1436 if (dump_enabled_p ())
1437 {
1438 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1439 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1440 if (node->load_permutation.exists ())
1441 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1442 dump_printf (MSG_NOTE, "%d ", next);
1443 else
1444 for (k = 0; k < group_size; ++k)
1445 dump_printf (MSG_NOTE, "%d ", k);
1446 dump_printf (MSG_NOTE, "\n");
1447 }
1448
1449 /* In case of reduction every load permutation is allowed, since the order
1450 of the reduction statements is not important (as opposed to the case of
1451 grouped stores). The only condition we need to check is that all the
1452 load nodes are of the same size and have the same permutation (and then
1453 rearrange all the nodes of the SLP instance according to this
1454 permutation). */
1455
1456 /* Check that all the load nodes are of the same size. */
1457 /* ??? Can't we assert this? */
1458 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1459 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1460 return false;
1461
1462 node = SLP_INSTANCE_TREE (slp_instn);
1463 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1464
1465 /* Reduction (there are no data-refs in the root).
1466 In reduction chain the order of the loads is not important. */
1467 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1468 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1469 vect_attempt_slp_rearrange_stmts (slp_instn);
1470
1471 /* In basic block vectorization we allow any subchain of an interleaving
1472 chain.
1473 FORNOW: not supported in loop SLP because of realignment compications. */
1474 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1475 {
1476 /* Check whether the loads in an instance form a subchain and thus
1477 no permutation is necessary. */
1478 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1479 {
1480 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1481 continue;
1482 bool subchain_p = true;
1483 next_load = NULL;
1484 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1485 {
1486 if (j != 0
1487 && (next_load != load
1488 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1489 {
1490 subchain_p = false;
1491 break;
1492 }
1493 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1494 }
1495 if (subchain_p)
1496 SLP_TREE_LOAD_PERMUTATION (node).release ();
1497 else
1498 {
1499 stmt_vec_info group_info
1500 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1501 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1502 unsigned nunits
1503 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1504 unsigned k, maxk = 0;
1505 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1506 if (k > maxk)
1507 maxk = k;
1508 /* In BB vectorization we may not actually use a loaded vector
1509 accessing elements in excess of GROUP_SIZE. */
1510 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1511 {
1512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1513 "BB vectorization with gaps at the end of "
1514 "a load is not supported\n");
1515 return false;
1516 }
1517
1518 /* Verify the permutation can be generated. */
1519 vec<tree> tem;
1520 unsigned n_perms;
1521 if (!vect_transform_slp_perm_load (node, tem, NULL,
1522 1, slp_instn, true, &n_perms))
1523 {
1524 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1525 vect_location,
1526 "unsupported load permutation\n");
1527 return false;
1528 }
1529 }
1530 }
1531 return true;
1532 }
1533
1534 /* For loop vectorization verify we can generate the permutation. */
1535 unsigned n_perms;
1536 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1537 if (node->load_permutation.exists ()
1538 && !vect_transform_slp_perm_load
1539 (node, vNULL, NULL,
1540 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
1541 &n_perms))
1542 return false;
1543
1544 return true;
1545 }
1546
1547
1548 /* Find the last store in SLP INSTANCE. */
1549
1550 gimple *
1551 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1552 {
1553 gimple *last = NULL, *stmt;
1554
1555 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1556 {
1557 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1558 if (is_pattern_stmt_p (stmt_vinfo))
1559 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1560 else
1561 last = get_later_stmt (stmt, last);
1562 }
1563
1564 return last;
1565 }
1566
1567 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1568
1569 static void
1570 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1571 stmt_vector_for_cost *prologue_cost_vec,
1572 stmt_vector_for_cost *body_cost_vec,
1573 unsigned ncopies_for_cost)
1574 {
1575 unsigned i, j;
1576 slp_tree child;
1577 gimple *stmt;
1578 stmt_vec_info stmt_info;
1579 tree lhs;
1580
1581 /* Recurse down the SLP tree. */
1582 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1583 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1584 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1585 body_cost_vec, ncopies_for_cost);
1586
1587 /* Look at the first scalar stmt to determine the cost. */
1588 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1589 stmt_info = vinfo_for_stmt (stmt);
1590 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1591 {
1592 vect_memory_access_type memory_access_type
1593 = (STMT_VINFO_STRIDED_P (stmt_info)
1594 ? VMAT_STRIDED_SLP
1595 : VMAT_CONTIGUOUS);
1596 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1597 vect_model_store_cost (stmt_info, ncopies_for_cost,
1598 memory_access_type, vect_uninitialized_def,
1599 node, prologue_cost_vec, body_cost_vec);
1600 else
1601 {
1602 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1603 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1604 {
1605 /* If the load is permuted then the alignment is determined by
1606 the first group element not by the first scalar stmt DR. */
1607 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1608 stmt_info = vinfo_for_stmt (stmt);
1609 /* Record the cost for the permutation. */
1610 unsigned n_perms;
1611 vect_transform_slp_perm_load (node, vNULL, NULL,
1612 ncopies_for_cost, instance, true,
1613 &n_perms);
1614 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1615 stmt_info, 0, vect_body);
1616 unsigned nunits
1617 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1618 /* And adjust the number of loads performed. This handles
1619 redundancies as well as loads that are later dead. */
1620 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1621 bitmap_clear (perm);
1622 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1623 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1624 ncopies_for_cost = 0;
1625 bool load_seen = false;
1626 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1627 {
1628 if (i % nunits == 0)
1629 {
1630 if (load_seen)
1631 ncopies_for_cost++;
1632 load_seen = false;
1633 }
1634 if (bitmap_bit_p (perm, i))
1635 load_seen = true;
1636 }
1637 if (load_seen)
1638 ncopies_for_cost++;
1639 gcc_assert (ncopies_for_cost
1640 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1641 + nunits - 1) / nunits);
1642 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1643 }
1644 /* Record the cost for the vector loads. */
1645 vect_model_load_cost (stmt_info, ncopies_for_cost,
1646 memory_access_type, node, prologue_cost_vec,
1647 body_cost_vec);
1648 return;
1649 }
1650 }
1651 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1652 {
1653 /* ncopies_for_cost is the number of IVs we generate. */
1654 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1655 stmt_info, 0, vect_body);
1656
1657 /* Prologue cost for the initial values and step vector. */
1658 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1659 CONSTANT_CLASS_P
1660 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1661 (stmt_info))
1662 ? vector_load : vec_construct,
1663 stmt_info, 0, vect_prologue);
1664 record_stmt_cost (prologue_cost_vec, 1,
1665 CONSTANT_CLASS_P
1666 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1667 ? vector_load : vec_construct,
1668 stmt_info, 0, vect_prologue);
1669
1670 /* ??? No easy way to get at the actual number of vector stmts
1671 to be geneated and thus the derived IVs. */
1672 }
1673 else
1674 {
1675 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1676 stmt_info, 0, vect_body);
1677 if (SLP_TREE_TWO_OPERATORS (node))
1678 {
1679 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1680 stmt_info, 0, vect_body);
1681 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1682 stmt_info, 0, vect_body);
1683 }
1684 }
1685
1686 /* Push SLP node def-type to stmts. */
1687 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1688 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1689 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1690 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1691
1692 /* Scan operands and account for prologue cost of constants/externals.
1693 ??? This over-estimates cost for multiple uses and should be
1694 re-engineered. */
1695 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1696 lhs = gimple_get_lhs (stmt);
1697 for (i = 0; i < gimple_num_ops (stmt); ++i)
1698 {
1699 tree op = gimple_op (stmt, i);
1700 gimple *def_stmt;
1701 enum vect_def_type dt;
1702 if (!op || op == lhs)
1703 continue;
1704 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1705 {
1706 /* Without looking at the actual initializer a vector of
1707 constants can be implemented as load from the constant pool.
1708 ??? We need to pass down stmt_info for a vector type
1709 even if it points to the wrong stmt. */
1710 if (dt == vect_constant_def)
1711 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1712 stmt_info, 0, vect_prologue);
1713 else if (dt == vect_external_def)
1714 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1715 stmt_info, 0, vect_prologue);
1716 }
1717 }
1718
1719 /* Restore stmt def-types. */
1720 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1721 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1722 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1723 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1724 }
1725
1726 /* Compute the cost for the SLP instance INSTANCE. */
1727
1728 static void
1729 vect_analyze_slp_cost (slp_instance instance, void *data)
1730 {
1731 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1732 unsigned ncopies_for_cost;
1733 stmt_info_for_cost *si;
1734 unsigned i;
1735
1736 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_NOTE, vect_location,
1738 "=== vect_analyze_slp_cost ===\n");
1739
1740 /* Calculate the number of vector stmts to create based on the unrolling
1741 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1742 GROUP_SIZE / NUNITS otherwise. */
1743 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1744 slp_tree node = SLP_INSTANCE_TREE (instance);
1745 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1746 /* Adjust the group_size by the vectorization factor which is always one
1747 for basic-block vectorization. */
1748 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1749 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1750 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1751 /* For reductions look at a reduction operand in case the reduction
1752 operation is widening like DOT_PROD or SAD. */
1753 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1754 {
1755 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1756 switch (gimple_assign_rhs_code (stmt))
1757 {
1758 case DOT_PROD_EXPR:
1759 case SAD_EXPR:
1760 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1761 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1762 break;
1763 default:;
1764 }
1765 }
1766 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1767
1768 prologue_cost_vec.create (10);
1769 body_cost_vec.create (10);
1770 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1771 &prologue_cost_vec, &body_cost_vec,
1772 ncopies_for_cost);
1773
1774 /* Record the prologue costs, which were delayed until we were
1775 sure that SLP was successful. */
1776 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1777 {
1778 struct _stmt_vec_info *stmt_info
1779 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1780 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1781 si->misalign, vect_prologue);
1782 }
1783
1784 /* Record the instance's instructions in the target cost model. */
1785 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1786 {
1787 struct _stmt_vec_info *stmt_info
1788 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1789 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1790 si->misalign, vect_body);
1791 }
1792
1793 prologue_cost_vec.release ();
1794 body_cost_vec.release ();
1795 }
1796
1797 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1798 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1799 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1800 containing the remainder.
1801 Return the first stmt in the second group. */
1802
1803 static gimple *
1804 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1805 {
1806 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1807 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1808 gcc_assert (group1_size > 0);
1809 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1810 gcc_assert (group2_size > 0);
1811 GROUP_SIZE (first_vinfo) = group1_size;
1812
1813 gimple *stmt = first_stmt;
1814 for (unsigned i = group1_size; i > 1; i--)
1815 {
1816 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1817 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1818 }
1819 /* STMT is now the last element of the first group. */
1820 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1821 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1822
1823 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1824 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1825 {
1826 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1827 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1828 }
1829
1830 /* For the second group, the GROUP_GAP is that before the original group,
1831 plus skipping over the first vector. */
1832 GROUP_GAP (vinfo_for_stmt (group2)) =
1833 GROUP_GAP (first_vinfo) + group1_size;
1834
1835 /* GROUP_GAP of the first group now has to skip over the second group too. */
1836 GROUP_GAP (first_vinfo) += group2_size;
1837
1838 if (dump_enabled_p ())
1839 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1840 group1_size, group2_size);
1841
1842 return group2;
1843 }
1844
1845 /* Analyze an SLP instance starting from a group of grouped stores. Call
1846 vect_build_slp_tree to build a tree of packed stmts if possible.
1847 Return FALSE if it's impossible to SLP any stmt in the loop. */
1848
1849 static bool
1850 vect_analyze_slp_instance (vec_info *vinfo,
1851 gimple *stmt, unsigned max_tree_size)
1852 {
1853 slp_instance new_instance;
1854 slp_tree node;
1855 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1856 unsigned int unrolling_factor = 1, nunits;
1857 tree vectype, scalar_type = NULL_TREE;
1858 gimple *next;
1859 unsigned int i;
1860 unsigned int max_nunits = 0;
1861 vec<slp_tree> loads;
1862 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1863 vec<gimple *> scalar_stmts;
1864
1865 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1866 {
1867 if (dr)
1868 {
1869 scalar_type = TREE_TYPE (DR_REF (dr));
1870 vectype = get_vectype_for_scalar_type (scalar_type);
1871 }
1872 else
1873 {
1874 gcc_assert (is_a <loop_vec_info> (vinfo));
1875 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1876 }
1877
1878 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1879 }
1880 else
1881 {
1882 gcc_assert (is_a <loop_vec_info> (vinfo));
1883 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1884 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1885 }
1886
1887 if (!vectype)
1888 {
1889 if (dump_enabled_p ())
1890 {
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1892 "Build SLP failed: unsupported data-type ");
1893 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1894 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1895 }
1896
1897 return false;
1898 }
1899 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1900
1901 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1902 scalar_stmts.create (group_size);
1903 next = stmt;
1904 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1905 {
1906 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1907 while (next)
1908 {
1909 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1910 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1911 scalar_stmts.safe_push (
1912 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1913 else
1914 scalar_stmts.safe_push (next);
1915 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1916 }
1917 /* Mark the first element of the reduction chain as reduction to properly
1918 transform the node. In the reduction analysis phase only the last
1919 element of the chain is marked as reduction. */
1920 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1921 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1922 }
1923 else
1924 {
1925 /* Collect reduction statements. */
1926 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1927 for (i = 0; reductions.iterate (i, &next); i++)
1928 scalar_stmts.safe_push (next);
1929 }
1930
1931 loads.create (group_size);
1932
1933 /* Build the tree for the SLP instance. */
1934 bool *matches = XALLOCAVEC (bool, group_size);
1935 unsigned npermutes = 0;
1936 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1937 &max_nunits, &loads, matches, &npermutes,
1938 NULL, max_tree_size);
1939 if (node != NULL)
1940 {
1941 /* Calculate the unrolling factor based on the smallest type. */
1942 unrolling_factor
1943 = least_common_multiple (max_nunits, group_size) / group_size;
1944
1945 if (unrolling_factor != 1
1946 && is_a <bb_vec_info> (vinfo))
1947 {
1948
1949 if (max_nunits > group_size)
1950 {
1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1952 "Build SLP failed: store group "
1953 "size not a multiple of the vector size "
1954 "in basic block SLP\n");
1955 vect_free_slp_tree (node);
1956 loads.release ();
1957 return false;
1958 }
1959 /* Fatal mismatch. */
1960 matches[group_size/max_nunits * max_nunits] = false;
1961 vect_free_slp_tree (node);
1962 loads.release ();
1963 }
1964 else
1965 {
1966 /* Create a new SLP instance. */
1967 new_instance = XNEW (struct _slp_instance);
1968 SLP_INSTANCE_TREE (new_instance) = node;
1969 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1970 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1971 SLP_INSTANCE_LOADS (new_instance) = loads;
1972
1973 /* Compute the load permutation. */
1974 slp_tree load_node;
1975 bool loads_permuted = false;
1976 FOR_EACH_VEC_ELT (loads, i, load_node)
1977 {
1978 vec<unsigned> load_permutation;
1979 int j;
1980 gimple *load, *first_stmt;
1981 bool this_load_permuted = false;
1982 load_permutation.create (group_size);
1983 first_stmt = GROUP_FIRST_ELEMENT
1984 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1985 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1986 {
1987 int load_place = vect_get_place_in_interleaving_chain
1988 (load, first_stmt);
1989 gcc_assert (load_place != -1);
1990 if (load_place != j)
1991 this_load_permuted = true;
1992 load_permutation.safe_push (load_place);
1993 }
1994 if (!this_load_permuted
1995 /* The load requires permutation when unrolling exposes
1996 a gap either because the group is larger than the SLP
1997 group-size or because there is a gap between the groups. */
1998 && (unrolling_factor == 1
1999 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2000 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2001 {
2002 load_permutation.release ();
2003 continue;
2004 }
2005 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2006 loads_permuted = true;
2007 }
2008
2009 if (loads_permuted)
2010 {
2011 if (!vect_supported_load_permutation_p (new_instance))
2012 {
2013 if (dump_enabled_p ())
2014 {
2015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2016 "Build SLP failed: unsupported load "
2017 "permutation ");
2018 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2019 TDF_SLIM, stmt, 0);
2020 }
2021 vect_free_slp_instance (new_instance);
2022 return false;
2023 }
2024 }
2025
2026 /* If the loads and stores can be handled with load/store-lan
2027 instructions do not generate this SLP instance. */
2028 if (is_a <loop_vec_info> (vinfo)
2029 && loads_permuted
2030 && dr && vect_store_lanes_supported (vectype, group_size))
2031 {
2032 slp_tree load_node;
2033 FOR_EACH_VEC_ELT (loads, i, load_node)
2034 {
2035 gimple *first_stmt = GROUP_FIRST_ELEMENT
2036 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2037 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2038 /* Use SLP for strided accesses (or if we
2039 can't load-lanes). */
2040 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2041 || ! vect_load_lanes_supported
2042 (STMT_VINFO_VECTYPE (stmt_vinfo),
2043 GROUP_SIZE (stmt_vinfo)))
2044 break;
2045 }
2046 if (i == loads.length ())
2047 {
2048 if (dump_enabled_p ())
2049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2050 "Built SLP cancelled: can use "
2051 "load/store-lanes\n");
2052 vect_free_slp_instance (new_instance);
2053 return false;
2054 }
2055 }
2056
2057 vinfo->slp_instances.safe_push (new_instance);
2058
2059 if (dump_enabled_p ())
2060 {
2061 dump_printf_loc (MSG_NOTE, vect_location,
2062 "Final SLP tree for instance:\n");
2063 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2064 }
2065
2066 return true;
2067 }
2068 }
2069 else
2070 {
2071 /* Failed to SLP. */
2072 /* Free the allocated memory. */
2073 scalar_stmts.release ();
2074 loads.release ();
2075 }
2076
2077 /* For basic block SLP, try to break the group up into multiples of the
2078 vector size. */
2079 if (is_a <bb_vec_info> (vinfo)
2080 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2081 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2082 {
2083 /* We consider breaking the group only on VF boundaries from the existing
2084 start. */
2085 for (i = 0; i < group_size; i++)
2086 if (!matches[i]) break;
2087
2088 if (i >= nunits && i < group_size)
2089 {
2090 /* Split into two groups at the first vector boundary before i. */
2091 gcc_assert ((nunits & (nunits - 1)) == 0);
2092 unsigned group1_size = i & ~(nunits - 1);
2093
2094 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2095 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2096 /* If the first non-match was in the middle of a vector,
2097 skip the rest of that vector. */
2098 if (group1_size < i)
2099 {
2100 i = group1_size + nunits;
2101 if (i < group_size)
2102 rest = vect_split_slp_store_group (rest, nunits);
2103 }
2104 if (i < group_size)
2105 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2106 return res;
2107 }
2108 /* Even though the first vector did not all match, we might be able to SLP
2109 (some) of the remainder. FORNOW ignore this possibility. */
2110 }
2111
2112 return false;
2113 }
2114
2115
2116 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2117 trees of packed scalar stmts if SLP is possible. */
2118
2119 bool
2120 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2121 {
2122 unsigned int i;
2123 gimple *first_element;
2124
2125 if (dump_enabled_p ())
2126 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2127
2128 /* Find SLP sequences starting from groups of grouped stores. */
2129 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2130 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2131
2132 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2133 {
2134 if (loop_vinfo->reduction_chains.length () > 0)
2135 {
2136 /* Find SLP sequences starting from reduction chains. */
2137 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2138 if (! vect_analyze_slp_instance (vinfo, first_element,
2139 max_tree_size))
2140 {
2141 /* Dissolve reduction chain group. */
2142 gimple *next, *stmt = first_element;
2143 while (stmt)
2144 {
2145 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2146 next = GROUP_NEXT_ELEMENT (vinfo);
2147 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2148 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2149 stmt = next;
2150 }
2151 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2152 = vect_internal_def;
2153 }
2154 }
2155
2156 /* Find SLP sequences starting from groups of reductions. */
2157 if (loop_vinfo->reductions.length () > 1)
2158 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2159 max_tree_size);
2160 }
2161
2162 return true;
2163 }
2164
2165
2166 /* For each possible SLP instance decide whether to SLP it and calculate overall
2167 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2168 least one instance. */
2169
2170 bool
2171 vect_make_slp_decision (loop_vec_info loop_vinfo)
2172 {
2173 unsigned int i, unrolling_factor = 1;
2174 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2175 slp_instance instance;
2176 int decided_to_slp = 0;
2177
2178 if (dump_enabled_p ())
2179 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2180 "\n");
2181
2182 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2183 {
2184 /* FORNOW: SLP if you can. */
2185 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2186 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2187
2188 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2189 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2190 loop-based vectorization. Such stmts will be marked as HYBRID. */
2191 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2192 decided_to_slp++;
2193 }
2194
2195 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2196
2197 if (decided_to_slp && dump_enabled_p ())
2198 dump_printf_loc (MSG_NOTE, vect_location,
2199 "Decided to SLP %d instances. Unrolling factor %d\n",
2200 decided_to_slp, unrolling_factor);
2201
2202 return (decided_to_slp > 0);
2203 }
2204
2205
2206 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2207 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2208
2209 static void
2210 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2211 {
2212 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2213 imm_use_iterator imm_iter;
2214 gimple *use_stmt;
2215 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2216 slp_tree child;
2217 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2218 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2219 int j;
2220
2221 /* Propagate hybrid down the SLP tree. */
2222 if (stype == hybrid)
2223 ;
2224 else if (HYBRID_SLP_STMT (stmt_vinfo))
2225 stype = hybrid;
2226 else
2227 {
2228 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2229 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2230 /* If we get a pattern stmt here we have to use the LHS of the
2231 original stmt for immediate uses. */
2232 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2233 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2234 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2235 tree def;
2236 if (gimple_code (stmt) == GIMPLE_PHI)
2237 def = gimple_phi_result (stmt);
2238 else
2239 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2240 if (def)
2241 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2242 {
2243 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2244 continue;
2245 use_vinfo = vinfo_for_stmt (use_stmt);
2246 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2247 && STMT_VINFO_RELATED_STMT (use_vinfo))
2248 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2249 if (!STMT_SLP_TYPE (use_vinfo)
2250 && (STMT_VINFO_RELEVANT (use_vinfo)
2251 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2252 && !(gimple_code (use_stmt) == GIMPLE_PHI
2253 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2254 {
2255 if (dump_enabled_p ())
2256 {
2257 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2258 "def in non-SLP stmt: ");
2259 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2260 }
2261 stype = hybrid;
2262 }
2263 }
2264 }
2265
2266 if (stype == hybrid
2267 && !HYBRID_SLP_STMT (stmt_vinfo))
2268 {
2269 if (dump_enabled_p ())
2270 {
2271 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2272 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2273 }
2274 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2275 }
2276
2277 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2278 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2279 vect_detect_hybrid_slp_stmts (child, i, stype);
2280 }
2281
2282 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2283
2284 static tree
2285 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2286 {
2287 walk_stmt_info *wi = (walk_stmt_info *)data;
2288 struct loop *loopp = (struct loop *)wi->info;
2289
2290 if (wi->is_lhs)
2291 return NULL_TREE;
2292
2293 if (TREE_CODE (*tp) == SSA_NAME
2294 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2295 {
2296 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2297 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2298 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2299 {
2300 if (dump_enabled_p ())
2301 {
2302 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2303 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2304 }
2305 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2306 }
2307 }
2308
2309 return NULL_TREE;
2310 }
2311
2312 static tree
2313 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2314 walk_stmt_info *)
2315 {
2316 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2317 /* If the stmt is in a SLP instance then this isn't a reason
2318 to mark use definitions in other SLP instances as hybrid. */
2319 if (! STMT_SLP_TYPE (use_vinfo)
2320 && (STMT_VINFO_RELEVANT (use_vinfo)
2321 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2322 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2323 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2324 ;
2325 else
2326 *handled = true;
2327 return NULL_TREE;
2328 }
2329
2330 /* Find stmts that must be both vectorized and SLPed. */
2331
2332 void
2333 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2334 {
2335 unsigned int i;
2336 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2337 slp_instance instance;
2338
2339 if (dump_enabled_p ())
2340 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2341 "\n");
2342
2343 /* First walk all pattern stmt in the loop and mark defs of uses as
2344 hybrid because immediate uses in them are not recorded. */
2345 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2346 {
2347 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2348 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2349 gsi_next (&gsi))
2350 {
2351 gimple *stmt = gsi_stmt (gsi);
2352 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2353 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2354 {
2355 walk_stmt_info wi;
2356 memset (&wi, 0, sizeof (wi));
2357 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2358 gimple_stmt_iterator gsi2
2359 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2360 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2361 vect_detect_hybrid_slp_1, &wi);
2362 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2363 vect_detect_hybrid_slp_2,
2364 vect_detect_hybrid_slp_1, &wi);
2365 }
2366 }
2367 }
2368
2369 /* Then walk the SLP instance trees marking stmts with uses in
2370 non-SLP stmts as hybrid, also propagating hybrid down the
2371 SLP tree, collecting the above info on-the-fly. */
2372 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2373 {
2374 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2375 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2376 i, pure_slp);
2377 }
2378 }
2379
2380
2381 /* Initialize a bb_vec_info struct for the statements between
2382 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2383
2384 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2385 gimple_stmt_iterator region_end_in)
2386 : vec_info (vec_info::bb, init_cost (NULL)),
2387 bb (gsi_bb (region_begin_in)),
2388 region_begin (region_begin_in),
2389 region_end (region_end_in)
2390 {
2391 gimple_stmt_iterator gsi;
2392
2393 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2394 gsi_next (&gsi))
2395 {
2396 gimple *stmt = gsi_stmt (gsi);
2397 gimple_set_uid (stmt, 0);
2398 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2399 }
2400
2401 bb->aux = this;
2402 }
2403
2404
2405 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2406 stmts in the basic block. */
2407
2408 _bb_vec_info::~_bb_vec_info ()
2409 {
2410 for (gimple_stmt_iterator si = region_begin;
2411 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2412 {
2413 gimple *stmt = gsi_stmt (si);
2414 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2415
2416 if (stmt_info)
2417 /* Free stmt_vec_info. */
2418 free_stmt_vec_info (stmt);
2419
2420 /* Reset region marker. */
2421 gimple_set_uid (stmt, -1);
2422 }
2423
2424 bb->aux = NULL;
2425 }
2426
2427
2428 /* Analyze statements contained in SLP tree node after recursively analyzing
2429 the subtree. Return TRUE if the operations are supported. */
2430
2431 static bool
2432 vect_slp_analyze_node_operations (slp_tree node, slp_instance node_instance)
2433 {
2434 bool dummy;
2435 int i, j;
2436 gimple *stmt;
2437 slp_tree child;
2438
2439 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2440 return true;
2441
2442 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2443 if (!vect_slp_analyze_node_operations (child, node_instance))
2444 return false;
2445
2446 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2447 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2448 gcc_assert (stmt_info);
2449 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2450
2451 /* For BB vectorization vector types are assigned here.
2452 Memory accesses already got their vector type assigned
2453 in vect_analyze_data_refs. */
2454 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2455 if (bb_vinfo
2456 && ! STMT_VINFO_DATA_REF (stmt_info))
2457 {
2458 gcc_assert (PURE_SLP_STMT (stmt_info));
2459
2460 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2461 if (dump_enabled_p ())
2462 {
2463 dump_printf_loc (MSG_NOTE, vect_location,
2464 "get vectype for scalar type: ");
2465 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2466 dump_printf (MSG_NOTE, "\n");
2467 }
2468
2469 tree vectype = get_vectype_for_scalar_type (scalar_type);
2470 if (!vectype)
2471 {
2472 if (dump_enabled_p ())
2473 {
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "not SLPed: unsupported data-type ");
2476 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2477 scalar_type);
2478 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2479 }
2480 return false;
2481 }
2482
2483 if (dump_enabled_p ())
2484 {
2485 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2486 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2487 dump_printf (MSG_NOTE, "\n");
2488 }
2489
2490 gimple *sstmt;
2491 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2492 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2493 }
2494
2495 /* Push SLP node def-type to stmt operands. */
2496 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2497 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2498 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2499 = SLP_TREE_DEF_TYPE (child);
2500 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2501 /* Restore def-types. */
2502 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2503 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2504 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2505 = vect_internal_def;
2506 if (! res)
2507 return false;
2508
2509 return true;
2510 }
2511
2512
2513 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2514 operations are supported. */
2515
2516 bool
2517 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2518 {
2519 slp_instance instance;
2520 int i;
2521
2522 if (dump_enabled_p ())
2523 dump_printf_loc (MSG_NOTE, vect_location,
2524 "=== vect_slp_analyze_operations ===\n");
2525
2526 for (i = 0; slp_instances.iterate (i, &instance); )
2527 {
2528 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance),
2529 instance))
2530 {
2531 dump_printf_loc (MSG_NOTE, vect_location,
2532 "removing SLP instance operations starting from: ");
2533 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2534 SLP_TREE_SCALAR_STMTS
2535 (SLP_INSTANCE_TREE (instance))[0], 0);
2536 vect_free_slp_instance (instance);
2537 slp_instances.ordered_remove (i);
2538 }
2539 else
2540 {
2541 /* Compute the costs of the SLP instance. */
2542 vect_analyze_slp_cost (instance, data);
2543 i++;
2544 }
2545 }
2546
2547 if (!slp_instances.length ())
2548 return false;
2549
2550 return true;
2551 }
2552
2553
2554 /* Compute the scalar cost of the SLP node NODE and its children
2555 and return it. Do not account defs that are marked in LIFE and
2556 update LIFE according to uses of NODE. */
2557
2558 static unsigned
2559 vect_bb_slp_scalar_cost (basic_block bb,
2560 slp_tree node, vec<bool, va_heap> *life)
2561 {
2562 unsigned scalar_cost = 0;
2563 unsigned i;
2564 gimple *stmt;
2565 slp_tree child;
2566
2567 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2568 {
2569 unsigned stmt_cost;
2570 ssa_op_iter op_iter;
2571 def_operand_p def_p;
2572 stmt_vec_info stmt_info;
2573
2574 if ((*life)[i])
2575 continue;
2576
2577 /* If there is a non-vectorized use of the defs then the scalar
2578 stmt is kept live in which case we do not account it or any
2579 required defs in the SLP children in the scalar cost. This
2580 way we make the vectorization more costly when compared to
2581 the scalar cost. */
2582 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2583 {
2584 imm_use_iterator use_iter;
2585 gimple *use_stmt;
2586 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2587 if (!is_gimple_debug (use_stmt)
2588 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2589 use_stmt)
2590 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2591 {
2592 (*life)[i] = true;
2593 BREAK_FROM_IMM_USE_STMT (use_iter);
2594 }
2595 }
2596 if ((*life)[i])
2597 continue;
2598
2599 /* Count scalar stmts only once. */
2600 if (gimple_visited_p (stmt))
2601 continue;
2602 gimple_set_visited (stmt, true);
2603
2604 stmt_info = vinfo_for_stmt (stmt);
2605 if (STMT_VINFO_DATA_REF (stmt_info))
2606 {
2607 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2608 stmt_cost = vect_get_stmt_cost (scalar_load);
2609 else
2610 stmt_cost = vect_get_stmt_cost (scalar_store);
2611 }
2612 else
2613 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2614
2615 scalar_cost += stmt_cost;
2616 }
2617
2618 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2619 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2620 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2621
2622 return scalar_cost;
2623 }
2624
2625 /* Check if vectorization of the basic block is profitable. */
2626
2627 static bool
2628 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2629 {
2630 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2631 slp_instance instance;
2632 int i;
2633 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2634 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2635
2636 /* Calculate scalar cost. */
2637 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2638 {
2639 auto_vec<bool, 20> life;
2640 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2641 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2642 SLP_INSTANCE_TREE (instance),
2643 &life);
2644 }
2645
2646 /* Unset visited flag. */
2647 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2648 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2649 gimple_set_visited (gsi_stmt (gsi), false);
2650
2651 /* Complete the target-specific cost calculation. */
2652 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2653 &vec_inside_cost, &vec_epilogue_cost);
2654
2655 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2656
2657 if (dump_enabled_p ())
2658 {
2659 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2660 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2661 vec_inside_cost);
2662 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2663 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2664 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2665 }
2666
2667 /* Vectorization is profitable if its cost is more than the cost of scalar
2668 version. Note that we err on the vector side for equal cost because
2669 the cost estimate is otherwise quite pessimistic (constant uses are
2670 free on the scalar side but cost a load on the vector side for
2671 example). */
2672 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2673 return false;
2674
2675 return true;
2676 }
2677
2678 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2679 if so and sets fatal to true if failure is independent of
2680 current_vector_size. */
2681
2682 static bb_vec_info
2683 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2684 gimple_stmt_iterator region_end,
2685 vec<data_reference_p> datarefs, int n_stmts,
2686 bool &fatal)
2687 {
2688 bb_vec_info bb_vinfo;
2689 slp_instance instance;
2690 int i;
2691 int min_vf = 2;
2692
2693 /* The first group of checks is independent of the vector size. */
2694 fatal = true;
2695
2696 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2697 {
2698 if (dump_enabled_p ())
2699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2700 "not vectorized: too many instructions in "
2701 "basic block.\n");
2702 free_data_refs (datarefs);
2703 return NULL;
2704 }
2705
2706 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2707 if (!bb_vinfo)
2708 return NULL;
2709
2710 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2711
2712 /* Analyze the data references. */
2713
2714 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2715 {
2716 if (dump_enabled_p ())
2717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2718 "not vectorized: unhandled data-ref in basic "
2719 "block.\n");
2720
2721 delete bb_vinfo;
2722 return NULL;
2723 }
2724
2725 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2726 {
2727 if (dump_enabled_p ())
2728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2729 "not vectorized: not enough data-refs in "
2730 "basic block.\n");
2731
2732 delete bb_vinfo;
2733 return NULL;
2734 }
2735
2736 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2737 {
2738 if (dump_enabled_p ())
2739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2740 "not vectorized: unhandled data access in "
2741 "basic block.\n");
2742
2743 delete bb_vinfo;
2744 return NULL;
2745 }
2746
2747 /* If there are no grouped stores in the region there is no need
2748 to continue with pattern recog as vect_analyze_slp will fail
2749 anyway. */
2750 if (bb_vinfo->grouped_stores.is_empty ())
2751 {
2752 if (dump_enabled_p ())
2753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2754 "not vectorized: no grouped stores in "
2755 "basic block.\n");
2756
2757 delete bb_vinfo;
2758 return NULL;
2759 }
2760
2761 /* While the rest of the analysis below depends on it in some way. */
2762 fatal = false;
2763
2764 vect_pattern_recog (bb_vinfo);
2765
2766 /* Check the SLP opportunities in the basic block, analyze and build SLP
2767 trees. */
2768 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2769 {
2770 if (dump_enabled_p ())
2771 {
2772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2773 "Failed to SLP the basic block.\n");
2774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2775 "not vectorized: failed to find SLP opportunities "
2776 "in basic block.\n");
2777 }
2778
2779 delete bb_vinfo;
2780 return NULL;
2781 }
2782
2783 /* Analyze and verify the alignment of data references and the
2784 dependence in the SLP instances. */
2785 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2786 {
2787 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2788 || ! vect_slp_analyze_instance_dependence (instance))
2789 {
2790 dump_printf_loc (MSG_NOTE, vect_location,
2791 "removing SLP instance operations starting from: ");
2792 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2793 SLP_TREE_SCALAR_STMTS
2794 (SLP_INSTANCE_TREE (instance))[0], 0);
2795 vect_free_slp_instance (instance);
2796 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2797 continue;
2798 }
2799
2800 /* Mark all the statements that we want to vectorize as pure SLP and
2801 relevant. */
2802 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2803 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2804
2805 i++;
2806 }
2807 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2808 {
2809 delete bb_vinfo;
2810 return NULL;
2811 }
2812
2813 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2814 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2815 {
2816 if (dump_enabled_p ())
2817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2818 "not vectorized: bad operation in basic block.\n");
2819
2820 delete bb_vinfo;
2821 return NULL;
2822 }
2823
2824 /* Cost model: check if the vectorization is worthwhile. */
2825 if (!unlimited_cost_model (NULL)
2826 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2827 {
2828 if (dump_enabled_p ())
2829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2830 "not vectorized: vectorization is not "
2831 "profitable.\n");
2832
2833 delete bb_vinfo;
2834 return NULL;
2835 }
2836
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_NOTE, vect_location,
2839 "Basic block will be vectorized using SLP\n");
2840
2841 return bb_vinfo;
2842 }
2843
2844
2845 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2846 true if anything in the basic-block was vectorized. */
2847
2848 bool
2849 vect_slp_bb (basic_block bb)
2850 {
2851 bb_vec_info bb_vinfo;
2852 gimple_stmt_iterator gsi;
2853 unsigned int vector_sizes;
2854 bool any_vectorized = false;
2855
2856 if (dump_enabled_p ())
2857 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2858
2859 /* Autodetect first vector size we try. */
2860 current_vector_size = 0;
2861 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2862
2863 gsi = gsi_start_bb (bb);
2864
2865 while (1)
2866 {
2867 if (gsi_end_p (gsi))
2868 break;
2869
2870 gimple_stmt_iterator region_begin = gsi;
2871 vec<data_reference_p> datarefs = vNULL;
2872 int insns = 0;
2873
2874 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2875 {
2876 gimple *stmt = gsi_stmt (gsi);
2877 if (is_gimple_debug (stmt))
2878 continue;
2879 insns++;
2880
2881 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2882 vect_location = gimple_location (stmt);
2883
2884 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2885 break;
2886 }
2887
2888 /* Skip leading unhandled stmts. */
2889 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2890 {
2891 gsi_next (&gsi);
2892 continue;
2893 }
2894
2895 gimple_stmt_iterator region_end = gsi;
2896
2897 bool vectorized = false;
2898 bool fatal = false;
2899 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2900 datarefs, insns, fatal);
2901 if (bb_vinfo
2902 && dbg_cnt (vect_slp))
2903 {
2904 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2906
2907 vect_schedule_slp (bb_vinfo);
2908
2909 if (dump_enabled_p ())
2910 dump_printf_loc (MSG_NOTE, vect_location,
2911 "basic block part vectorized\n");
2912
2913 vectorized = true;
2914 }
2915 delete bb_vinfo;
2916
2917 any_vectorized |= vectorized;
2918
2919 vector_sizes &= ~current_vector_size;
2920 if (vectorized
2921 || vector_sizes == 0
2922 || current_vector_size == 0
2923 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2924 vector sizes will fail do not bother iterating. */
2925 || fatal)
2926 {
2927 if (gsi_end_p (region_end))
2928 break;
2929
2930 /* Skip the unhandled stmt. */
2931 gsi_next (&gsi);
2932
2933 /* And reset vector sizes. */
2934 current_vector_size = 0;
2935 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2936 }
2937 else
2938 {
2939 /* Try the next biggest vector size. */
2940 current_vector_size = 1 << floor_log2 (vector_sizes);
2941 if (dump_enabled_p ())
2942 dump_printf_loc (MSG_NOTE, vect_location,
2943 "***** Re-trying analysis with "
2944 "vector size %d\n", current_vector_size);
2945
2946 /* Start over. */
2947 gsi = region_begin;
2948 }
2949 }
2950
2951 return any_vectorized;
2952 }
2953
2954
2955 /* Return 1 if vector type of boolean constant which is OPNUM
2956 operand in statement STMT is a boolean vector. */
2957
2958 static bool
2959 vect_mask_constant_operand_p (gimple *stmt, int opnum)
2960 {
2961 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2962 enum tree_code code = gimple_expr_code (stmt);
2963 tree op, vectype;
2964 gimple *def_stmt;
2965 enum vect_def_type dt;
2966
2967 /* For comparison and COND_EXPR type is chosen depending
2968 on the other comparison operand. */
2969 if (TREE_CODE_CLASS (code) == tcc_comparison)
2970 {
2971 if (opnum)
2972 op = gimple_assign_rhs1 (stmt);
2973 else
2974 op = gimple_assign_rhs2 (stmt);
2975
2976 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2977 &dt, &vectype))
2978 gcc_unreachable ();
2979
2980 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2981 }
2982
2983 if (code == COND_EXPR)
2984 {
2985 tree cond = gimple_assign_rhs1 (stmt);
2986
2987 if (TREE_CODE (cond) == SSA_NAME)
2988 op = cond;
2989 else if (opnum)
2990 op = TREE_OPERAND (cond, 1);
2991 else
2992 op = TREE_OPERAND (cond, 0);
2993
2994 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2995 &dt, &vectype))
2996 gcc_unreachable ();
2997
2998 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2999 }
3000
3001 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3002 }
3003
3004
3005 /* For constant and loop invariant defs of SLP_NODE this function returns
3006 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3007 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3008 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3009 REDUC_INDEX is the index of the reduction operand in the statements, unless
3010 it is -1. */
3011
3012 static void
3013 vect_get_constant_vectors (tree op, slp_tree slp_node,
3014 vec<tree> *vec_oprnds,
3015 unsigned int op_num, unsigned int number_of_vectors)
3016 {
3017 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3018 gimple *stmt = stmts[0];
3019 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3020 unsigned nunits;
3021 tree vec_cst;
3022 tree *elts;
3023 unsigned j, number_of_places_left_in_vector;
3024 tree vector_type;
3025 tree vop;
3026 int group_size = stmts.length ();
3027 unsigned int vec_num, i;
3028 unsigned number_of_copies = 1;
3029 vec<tree> voprnds;
3030 voprnds.create (number_of_vectors);
3031 bool constant_p, is_store;
3032 tree neutral_op = NULL;
3033 enum tree_code code = gimple_expr_code (stmt);
3034 gimple_seq ctor_seq = NULL;
3035
3036 /* Check if vector type is a boolean vector. */
3037 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3038 && vect_mask_constant_operand_p (stmt, op_num))
3039 vector_type
3040 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3041 else
3042 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3043 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3044
3045 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3046 {
3047 is_store = true;
3048 op = gimple_assign_rhs1 (stmt);
3049 }
3050 else
3051 is_store = false;
3052
3053 gcc_assert (op);
3054
3055 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3056 created vectors. It is greater than 1 if unrolling is performed.
3057
3058 For example, we have two scalar operands, s1 and s2 (e.g., group of
3059 strided accesses of size two), while NUNITS is four (i.e., four scalars
3060 of this type can be packed in a vector). The output vector will contain
3061 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3062 will be 2).
3063
3064 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3065 containing the operands.
3066
3067 For example, NUNITS is four as before, and the group size is 8
3068 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3069 {s5, s6, s7, s8}. */
3070
3071 number_of_copies = nunits * number_of_vectors / group_size;
3072
3073 number_of_places_left_in_vector = nunits;
3074 constant_p = true;
3075 elts = XALLOCAVEC (tree, nunits);
3076 bool place_after_defs = false;
3077 for (j = 0; j < number_of_copies; j++)
3078 {
3079 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3080 {
3081 if (is_store)
3082 op = gimple_assign_rhs1 (stmt);
3083 else
3084 {
3085 switch (code)
3086 {
3087 case COND_EXPR:
3088 {
3089 tree cond = gimple_assign_rhs1 (stmt);
3090 if (TREE_CODE (cond) == SSA_NAME)
3091 op = gimple_op (stmt, op_num + 1);
3092 else if (op_num == 0 || op_num == 1)
3093 op = TREE_OPERAND (cond, op_num);
3094 else
3095 {
3096 if (op_num == 2)
3097 op = gimple_assign_rhs2 (stmt);
3098 else
3099 op = gimple_assign_rhs3 (stmt);
3100 }
3101 }
3102 break;
3103
3104 case CALL_EXPR:
3105 op = gimple_call_arg (stmt, op_num);
3106 break;
3107
3108 case LSHIFT_EXPR:
3109 case RSHIFT_EXPR:
3110 case LROTATE_EXPR:
3111 case RROTATE_EXPR:
3112 op = gimple_op (stmt, op_num + 1);
3113 /* Unlike the other binary operators, shifts/rotates have
3114 the shift count being int, instead of the same type as
3115 the lhs, so make sure the scalar is the right type if
3116 we are dealing with vectors of
3117 long long/long/short/char. */
3118 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3119 op = fold_convert (TREE_TYPE (vector_type), op);
3120 break;
3121
3122 default:
3123 op = gimple_op (stmt, op_num + 1);
3124 break;
3125 }
3126 }
3127
3128 /* Create 'vect_ = {op0,op1,...,opn}'. */
3129 number_of_places_left_in_vector--;
3130 tree orig_op = op;
3131 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3132 {
3133 if (CONSTANT_CLASS_P (op))
3134 {
3135 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3136 {
3137 /* Can't use VIEW_CONVERT_EXPR for booleans because
3138 of possibly different sizes of scalar value and
3139 vector element. */
3140 if (integer_zerop (op))
3141 op = build_int_cst (TREE_TYPE (vector_type), 0);
3142 else if (integer_onep (op))
3143 op = build_all_ones_cst (TREE_TYPE (vector_type));
3144 else
3145 gcc_unreachable ();
3146 }
3147 else
3148 op = fold_unary (VIEW_CONVERT_EXPR,
3149 TREE_TYPE (vector_type), op);
3150 gcc_assert (op && CONSTANT_CLASS_P (op));
3151 }
3152 else
3153 {
3154 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3155 gimple *init_stmt;
3156 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3157 {
3158 tree true_val
3159 = build_all_ones_cst (TREE_TYPE (vector_type));
3160 tree false_val
3161 = build_zero_cst (TREE_TYPE (vector_type));
3162 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3163 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3164 op, true_val,
3165 false_val);
3166 }
3167 else
3168 {
3169 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3170 op);
3171 init_stmt
3172 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3173 op);
3174 }
3175 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3176 op = new_temp;
3177 }
3178 }
3179 elts[number_of_places_left_in_vector] = op;
3180 if (!CONSTANT_CLASS_P (op))
3181 constant_p = false;
3182 if (TREE_CODE (orig_op) == SSA_NAME
3183 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3184 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3185 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3186 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3187 place_after_defs = true;
3188
3189 if (number_of_places_left_in_vector == 0)
3190 {
3191 if (constant_p)
3192 vec_cst = build_vector (vector_type, elts);
3193 else
3194 {
3195 vec<constructor_elt, va_gc> *v;
3196 unsigned k;
3197 vec_alloc (v, nunits);
3198 for (k = 0; k < nunits; ++k)
3199 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3200 vec_cst = build_constructor (vector_type, v);
3201 }
3202 tree init;
3203 gimple_stmt_iterator gsi;
3204 if (place_after_defs)
3205 {
3206 gsi = gsi_for_stmt
3207 (vect_find_last_scalar_stmt_in_slp (slp_node));
3208 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3209 }
3210 else
3211 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3212 if (ctor_seq != NULL)
3213 {
3214 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3215 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3216 GSI_SAME_STMT);
3217 ctor_seq = NULL;
3218 }
3219 voprnds.quick_push (init);
3220 place_after_defs = false;
3221 number_of_places_left_in_vector = nunits;
3222 constant_p = true;
3223 }
3224 }
3225 }
3226
3227 /* Since the vectors are created in the reverse order, we should invert
3228 them. */
3229 vec_num = voprnds.length ();
3230 for (j = vec_num; j != 0; j--)
3231 {
3232 vop = voprnds[j - 1];
3233 vec_oprnds->quick_push (vop);
3234 }
3235
3236 voprnds.release ();
3237
3238 /* In case that VF is greater than the unrolling factor needed for the SLP
3239 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3240 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3241 to replicate the vectors. */
3242 while (number_of_vectors > vec_oprnds->length ())
3243 {
3244 tree neutral_vec = NULL;
3245
3246 if (neutral_op)
3247 {
3248 if (!neutral_vec)
3249 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3250
3251 vec_oprnds->quick_push (neutral_vec);
3252 }
3253 else
3254 {
3255 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3256 vec_oprnds->quick_push (vop);
3257 }
3258 }
3259 }
3260
3261
3262 /* Get vectorized definitions from SLP_NODE that contains corresponding
3263 vectorized def-stmts. */
3264
3265 static void
3266 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3267 {
3268 tree vec_oprnd;
3269 gimple *vec_def_stmt;
3270 unsigned int i;
3271
3272 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3273
3274 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3275 {
3276 gcc_assert (vec_def_stmt);
3277 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3278 vec_oprnd = gimple_phi_result (vec_def_stmt);
3279 else
3280 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3281 vec_oprnds->quick_push (vec_oprnd);
3282 }
3283 }
3284
3285
3286 /* Get vectorized definitions for SLP_NODE.
3287 If the scalar definitions are loop invariants or constants, collect them and
3288 call vect_get_constant_vectors() to create vector stmts.
3289 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3290 must be stored in the corresponding child of SLP_NODE, and we call
3291 vect_get_slp_vect_defs () to retrieve them. */
3292
3293 void
3294 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3295 vec<vec<tree> > *vec_oprnds)
3296 {
3297 gimple *first_stmt;
3298 int number_of_vects = 0, i;
3299 unsigned int child_index = 0;
3300 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3301 slp_tree child = NULL;
3302 vec<tree> vec_defs;
3303 tree oprnd;
3304 bool vectorized_defs;
3305
3306 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3307 FOR_EACH_VEC_ELT (ops, i, oprnd)
3308 {
3309 /* For each operand we check if it has vectorized definitions in a child
3310 node or we need to create them (for invariants and constants). We
3311 check if the LHS of the first stmt of the next child matches OPRND.
3312 If it does, we found the correct child. Otherwise, we call
3313 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3314 to check this child node for the next operand. */
3315 vectorized_defs = false;
3316 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3317 {
3318 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3319
3320 /* We have to check both pattern and original def, if available. */
3321 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3322 {
3323 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3324 gimple *related
3325 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3326 tree first_def_op;
3327
3328 if (gimple_code (first_def) == GIMPLE_PHI)
3329 first_def_op = gimple_phi_result (first_def);
3330 else
3331 first_def_op = gimple_get_lhs (first_def);
3332 if (operand_equal_p (oprnd, first_def_op, 0)
3333 || (related
3334 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3335 {
3336 /* The number of vector defs is determined by the number of
3337 vector statements in the node from which we get those
3338 statements. */
3339 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3340 vectorized_defs = true;
3341 child_index++;
3342 }
3343 }
3344 else
3345 child_index++;
3346 }
3347
3348 if (!vectorized_defs)
3349 {
3350 if (i == 0)
3351 {
3352 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3353 /* Number of vector stmts was calculated according to LHS in
3354 vect_schedule_slp_instance (), fix it by replacing LHS with
3355 RHS, if necessary. See vect_get_smallest_scalar_type () for
3356 details. */
3357 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3358 &rhs_size_unit);
3359 if (rhs_size_unit != lhs_size_unit)
3360 {
3361 number_of_vects *= rhs_size_unit;
3362 number_of_vects /= lhs_size_unit;
3363 }
3364 }
3365 }
3366
3367 /* Allocate memory for vectorized defs. */
3368 vec_defs = vNULL;
3369 vec_defs.create (number_of_vects);
3370
3371 /* For reduction defs we call vect_get_constant_vectors (), since we are
3372 looking for initial loop invariant values. */
3373 if (vectorized_defs)
3374 /* The defs are already vectorized. */
3375 vect_get_slp_vect_defs (child, &vec_defs);
3376 else
3377 /* Build vectors from scalar defs. */
3378 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3379 number_of_vects);
3380
3381 vec_oprnds->quick_push (vec_defs);
3382 }
3383 }
3384
3385 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3386 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3387 permute statements for the SLP node NODE of the SLP instance
3388 SLP_NODE_INSTANCE. */
3389
3390 bool
3391 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3392 gimple_stmt_iterator *gsi, int vf,
3393 slp_instance slp_node_instance, bool analyze_only,
3394 unsigned *n_perms)
3395 {
3396 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3397 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3398 tree mask_element_type = NULL_TREE, mask_type;
3399 int nunits, vec_index = 0;
3400 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3401 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3402 int mask_element;
3403 unsigned char *mask;
3404 machine_mode mode;
3405
3406 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3407 return false;
3408
3409 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3410
3411 mode = TYPE_MODE (vectype);
3412
3413 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3414 same size as the vector element being permuted. */
3415 mask_element_type = lang_hooks.types.type_for_mode
3416 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3417 mask_type = get_vectype_for_scalar_type (mask_element_type);
3418 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3419 mask = XALLOCAVEC (unsigned char, nunits);
3420
3421 /* Initialize the vect stmts of NODE to properly insert the generated
3422 stmts later. */
3423 if (! analyze_only)
3424 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3425 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3426 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3427
3428 /* Generate permutation masks for every NODE. Number of masks for each NODE
3429 is equal to GROUP_SIZE.
3430 E.g., we have a group of three nodes with three loads from the same
3431 location in each node, and the vector size is 4. I.e., we have a
3432 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3433 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3434 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3435 ...
3436
3437 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3438 The last mask is illegal since we assume two operands for permute
3439 operation, and the mask element values can't be outside that range.
3440 Hence, the last mask must be converted into {2,5,5,5}.
3441 For the first two permutations we need the first and the second input
3442 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3443 we need the second and the third vectors: {b1,c1,a2,b2} and
3444 {c2,a3,b3,c3}. */
3445
3446 int vect_stmts_counter = 0;
3447 int index = 0;
3448 int first_vec_index = -1;
3449 int second_vec_index = -1;
3450 bool noop_p = true;
3451 *n_perms = 0;
3452
3453 for (int j = 0; j < vf; j++)
3454 {
3455 for (int k = 0; k < group_size; k++)
3456 {
3457 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3458 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3459 vec_index = i / nunits;
3460 mask_element = i % nunits;
3461 if (vec_index == first_vec_index
3462 || first_vec_index == -1)
3463 {
3464 first_vec_index = vec_index;
3465 }
3466 else if (vec_index == second_vec_index
3467 || second_vec_index == -1)
3468 {
3469 second_vec_index = vec_index;
3470 mask_element += nunits;
3471 }
3472 else
3473 {
3474 if (dump_enabled_p ())
3475 {
3476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3477 "permutation requires at "
3478 "least three vectors ");
3479 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3480 stmt, 0);
3481 }
3482 return false;
3483 }
3484
3485 gcc_assert (mask_element >= 0
3486 && mask_element < 2 * nunits);
3487 if (mask_element != index)
3488 noop_p = false;
3489 mask[index++] = mask_element;
3490
3491 if (index == nunits)
3492 {
3493 if (! noop_p
3494 && ! can_vec_perm_p (mode, false, mask))
3495 {
3496 if (dump_enabled_p ())
3497 {
3498 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3499 vect_location,
3500 "unsupported vect permute { ");
3501 for (i = 0; i < nunits; ++i)
3502 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3503 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3504 }
3505 return false;
3506 }
3507
3508 if (! noop_p)
3509 ++*n_perms;
3510
3511 if (!analyze_only)
3512 {
3513 tree mask_vec = NULL_TREE;
3514
3515 if (! noop_p)
3516 {
3517 tree *mask_elts = XALLOCAVEC (tree, nunits);
3518 for (int l = 0; l < nunits; ++l)
3519 mask_elts[l] = build_int_cst (mask_element_type,
3520 mask[l]);
3521 mask_vec = build_vector (mask_type, mask_elts);
3522 }
3523
3524 if (second_vec_index == -1)
3525 second_vec_index = first_vec_index;
3526
3527 /* Generate the permute statement if necessary. */
3528 tree first_vec = dr_chain[first_vec_index];
3529 tree second_vec = dr_chain[second_vec_index];
3530 gimple *perm_stmt;
3531 if (! noop_p)
3532 {
3533 tree perm_dest
3534 = vect_create_destination_var (gimple_assign_lhs (stmt),
3535 vectype);
3536 perm_dest = make_ssa_name (perm_dest);
3537 perm_stmt = gimple_build_assign (perm_dest,
3538 VEC_PERM_EXPR,
3539 first_vec, second_vec,
3540 mask_vec);
3541 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3542 }
3543 else
3544 /* If mask was NULL_TREE generate the requested
3545 identity transform. */
3546 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3547
3548 /* Store the vector statement in NODE. */
3549 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3550 }
3551
3552 index = 0;
3553 first_vec_index = -1;
3554 second_vec_index = -1;
3555 noop_p = true;
3556 }
3557 }
3558 }
3559
3560 return true;
3561 }
3562
3563
3564
3565 /* Vectorize SLP instance tree in postorder. */
3566
3567 static bool
3568 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3569 unsigned int vectorization_factor)
3570 {
3571 gimple *stmt;
3572 bool grouped_store, is_store;
3573 gimple_stmt_iterator si;
3574 stmt_vec_info stmt_info;
3575 unsigned int vec_stmts_size, nunits, group_size;
3576 tree vectype;
3577 int i, j;
3578 slp_tree child;
3579
3580 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3581 return false;
3582
3583 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3584 vect_schedule_slp_instance (child, instance, vectorization_factor);
3585
3586 /* Push SLP node def-type to stmts. */
3587 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3588 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3589 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3590 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3591
3592 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3593 stmt_info = vinfo_for_stmt (stmt);
3594
3595 /* VECTYPE is the type of the destination. */
3596 vectype = STMT_VINFO_VECTYPE (stmt_info);
3597 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3598 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3599
3600 /* For each SLP instance calculate number of vector stmts to be created
3601 for the scalar stmts in each node of the SLP tree. Number of vector
3602 elements in one vector iteration is the number of scalar elements in
3603 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3604 size.
3605 Unless this is a SLP reduction in which case the number of vector
3606 stmts is equal to the number of vector stmts of the children. */
3607 if (GROUP_FIRST_ELEMENT (stmt_info)
3608 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3609 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3610 else
3611 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3612
3613 if (!SLP_TREE_VEC_STMTS (node).exists ())
3614 {
3615 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3616 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3617 }
3618
3619 if (dump_enabled_p ())
3620 {
3621 dump_printf_loc (MSG_NOTE,vect_location,
3622 "------>vectorizing SLP node starting from: ");
3623 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3624 }
3625
3626 /* Vectorized stmts go before the last scalar stmt which is where
3627 all uses are ready. */
3628 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3629
3630 /* Mark the first element of the reduction chain as reduction to properly
3631 transform the node. In the analysis phase only the last element of the
3632 chain is marked as reduction. */
3633 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3634 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3635 {
3636 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3637 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3638 }
3639
3640 /* Handle two-operation SLP nodes by vectorizing the group with
3641 both operations and then performing a merge. */
3642 if (SLP_TREE_TWO_OPERATORS (node))
3643 {
3644 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3645 enum tree_code ocode = ERROR_MARK;
3646 gimple *ostmt;
3647 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3648 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3649 if (gimple_assign_rhs_code (ostmt) != code0)
3650 {
3651 mask[i] = 1;
3652 ocode = gimple_assign_rhs_code (ostmt);
3653 }
3654 else
3655 mask[i] = 0;
3656 if (ocode != ERROR_MARK)
3657 {
3658 vec<gimple *> v0;
3659 vec<gimple *> v1;
3660 unsigned j;
3661 tree tmask = NULL_TREE;
3662 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3663 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3664 SLP_TREE_VEC_STMTS (node).truncate (0);
3665 gimple_assign_set_rhs_code (stmt, ocode);
3666 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3667 gimple_assign_set_rhs_code (stmt, code0);
3668 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3669 SLP_TREE_VEC_STMTS (node).truncate (0);
3670 tree meltype = build_nonstandard_integer_type
3671 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3672 tree mvectype = get_same_sized_vectype (meltype, vectype);
3673 unsigned k = 0, l;
3674 for (j = 0; j < v0.length (); ++j)
3675 {
3676 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3677 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3678 {
3679 if (k >= group_size)
3680 k = 0;
3681 melts[l] = build_int_cst
3682 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3683 }
3684 tmask = build_vector (mvectype, melts);
3685
3686 /* ??? Not all targets support a VEC_PERM_EXPR with a
3687 constant mask that would translate to a vec_merge RTX
3688 (with their vec_perm_const_ok). We can either not
3689 vectorize in that case or let veclower do its job.
3690 Unfortunately that isn't too great and at least for
3691 plus/minus we'd eventually like to match targets
3692 vector addsub instructions. */
3693 gimple *vstmt;
3694 vstmt = gimple_build_assign (make_ssa_name (vectype),
3695 VEC_PERM_EXPR,
3696 gimple_assign_lhs (v0[j]),
3697 gimple_assign_lhs (v1[j]), tmask);
3698 vect_finish_stmt_generation (stmt, vstmt, &si);
3699 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3700 }
3701 v0.release ();
3702 v1.release ();
3703 return false;
3704 }
3705 }
3706 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3707
3708 /* Restore stmt def-types. */
3709 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3710 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3711 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3712 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3713
3714 return is_store;
3715 }
3716
3717 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3718 For loop vectorization this is done in vectorizable_call, but for SLP
3719 it needs to be deferred until end of vect_schedule_slp, because multiple
3720 SLP instances may refer to the same scalar stmt. */
3721
3722 static void
3723 vect_remove_slp_scalar_calls (slp_tree node)
3724 {
3725 gimple *stmt, *new_stmt;
3726 gimple_stmt_iterator gsi;
3727 int i;
3728 slp_tree child;
3729 tree lhs;
3730 stmt_vec_info stmt_info;
3731
3732 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3733 return;
3734
3735 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3736 vect_remove_slp_scalar_calls (child);
3737
3738 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3739 {
3740 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3741 continue;
3742 stmt_info = vinfo_for_stmt (stmt);
3743 if (stmt_info == NULL
3744 || is_pattern_stmt_p (stmt_info)
3745 || !PURE_SLP_STMT (stmt_info))
3746 continue;
3747 lhs = gimple_call_lhs (stmt);
3748 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3749 set_vinfo_for_stmt (new_stmt, stmt_info);
3750 set_vinfo_for_stmt (stmt, NULL);
3751 STMT_VINFO_STMT (stmt_info) = new_stmt;
3752 gsi = gsi_for_stmt (stmt);
3753 gsi_replace (&gsi, new_stmt, false);
3754 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3755 }
3756 }
3757
3758 /* Generate vector code for all SLP instances in the loop/basic block. */
3759
3760 bool
3761 vect_schedule_slp (vec_info *vinfo)
3762 {
3763 vec<slp_instance> slp_instances;
3764 slp_instance instance;
3765 unsigned int i, vf;
3766 bool is_store = false;
3767
3768 slp_instances = vinfo->slp_instances;
3769 if (is_a <loop_vec_info> (vinfo))
3770 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
3771 else
3772 vf = 1;
3773
3774 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3775 {
3776 /* Schedule the tree of INSTANCE. */
3777 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3778 instance, vf);
3779 if (dump_enabled_p ())
3780 dump_printf_loc (MSG_NOTE, vect_location,
3781 "vectorizing stmts using SLP.\n");
3782 }
3783
3784 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3785 {
3786 slp_tree root = SLP_INSTANCE_TREE (instance);
3787 gimple *store;
3788 unsigned int j;
3789 gimple_stmt_iterator gsi;
3790
3791 /* Remove scalar call stmts. Do not do this for basic-block
3792 vectorization as not all uses may be vectorized.
3793 ??? Why should this be necessary? DCE should be able to
3794 remove the stmts itself.
3795 ??? For BB vectorization we can as well remove scalar
3796 stmts starting from the SLP tree root if they have no
3797 uses. */
3798 if (is_a <loop_vec_info> (vinfo))
3799 vect_remove_slp_scalar_calls (root);
3800
3801 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3802 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3803 {
3804 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3805 break;
3806
3807 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3808 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3809 /* Free the attached stmt_vec_info and remove the stmt. */
3810 gsi = gsi_for_stmt (store);
3811 unlink_stmt_vdef (store);
3812 gsi_remove (&gsi, true);
3813 release_defs (store);
3814 free_stmt_vec_info (store);
3815 }
3816 }
3817
3818 return is_store;
3819 }
This page took 0.214708 seconds and 6 git commands to generate.