1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
49 vect_free_slp_tree (slp_tree node
)
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
55 vect_free_slp_tree (child
);
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt
))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
66 SLP_TREE_CHILDREN (node
).release ();
67 SLP_TREE_SCALAR_STMTS (node
).release ();
68 SLP_TREE_VEC_STMTS (node
).release ();
69 SLP_TREE_LOAD_PERMUTATION (node
).release ();
75 /* Free the memory allocated for the SLP instance. */
78 vect_free_slp_instance (slp_instance instance
)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
81 SLP_INSTANCE_LOADS (instance
).release ();
86 /* Create an SLP node for SCALAR_STMTS. */
89 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
92 gimple
*stmt
= scalar_stmts
[0];
95 if (is_gimple_call (stmt
))
96 nops
= gimple_call_num_args (stmt
);
97 else if (is_gimple_assign (stmt
))
99 nops
= gimple_num_ops (stmt
) - 1;
100 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
103 else if (gimple_code (stmt
) == GIMPLE_PHI
)
108 node
= XNEW (struct _slp_tree
);
109 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
110 SLP_TREE_VEC_STMTS (node
).create (0);
111 SLP_TREE_CHILDREN (node
).create (nops
);
112 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
113 SLP_TREE_TWO_OPERATORS (node
) = false;
114 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
117 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec
<gimple
*> def_stmts
;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
135 enum vect_def_type first_dt
;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
143 static vec
<slp_oprnd_info
>
144 vect_create_oprnd_info (int nops
, int group_size
)
147 slp_oprnd_info oprnd_info
;
148 vec
<slp_oprnd_info
> oprnds_info
;
150 oprnds_info
.create (nops
);
151 for (i
= 0; i
< nops
; i
++)
153 oprnd_info
= XNEW (struct _slp_oprnd_info
);
154 oprnd_info
->def_stmts
.create (group_size
);
155 oprnd_info
->first_dt
= vect_uninitialized_def
;
156 oprnd_info
->first_op_type
= NULL_TREE
;
157 oprnd_info
->first_pattern
= false;
158 oprnd_info
->second_pattern
= false;
159 oprnds_info
.quick_push (oprnd_info
);
166 /* Free operands info. */
169 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
172 slp_oprnd_info oprnd_info
;
174 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
176 oprnd_info
->def_stmts
.release ();
177 XDELETE (oprnd_info
);
180 oprnds_info
.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
188 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
190 gimple
*next_stmt
= first_stmt
;
193 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
198 if (next_stmt
== stmt
)
200 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
202 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
223 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
224 gimple
*stmt
, unsigned stmt_num
,
225 vec
<slp_oprnd_info
> *oprnds_info
)
228 unsigned int i
, number_of_oprnds
;
230 enum vect_def_type dt
= vect_uninitialized_def
;
231 bool pattern
= false;
232 slp_oprnd_info oprnd_info
;
233 int first_op_idx
= 1;
234 bool commutative
= false;
235 bool first_op_cond
= false;
236 bool first
= stmt_num
== 0;
237 bool second
= stmt_num
== 1;
239 if (is_gimple_call (stmt
))
241 number_of_oprnds
= gimple_call_num_args (stmt
);
244 else if (is_gimple_assign (stmt
))
246 enum tree_code code
= gimple_assign_rhs_code (stmt
);
247 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
253 first_op_cond
= true;
257 commutative
= commutative_tree_code (code
);
262 bool swapped
= (*swap
!= 0);
263 gcc_assert (!swapped
|| first_op_cond
);
264 for (i
= 0; i
< number_of_oprnds
; i
++)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map
= maps
[*swap
];
274 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
276 oprnd
= gimple_op (stmt
, map
[i
]);
279 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
281 oprnd_info
= (*oprnds_info
)[i
];
283 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
290 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
299 if (def_stmt
&& gimple_bb (def_stmt
)
300 && vect_stmt_in_region_p (vinfo
, def_stmt
)
301 && vinfo_for_stmt (def_stmt
)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
307 if (!first
&& !oprnd_info
->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info
->first_dt
!= vect_reduction_def
311 || (!second
&& !oprnd_info
->second_pattern
)))
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
327 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
333 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
334 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
336 if (dt
== vect_unknown_def_type
)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
340 "Unsupported pattern.\n");
344 switch (gimple_code (def_stmt
))
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
353 "unsupported defining stmt:\n");
359 oprnd_info
->second_pattern
= pattern
;
363 oprnd_info
->first_dt
= dt
;
364 oprnd_info
->first_pattern
= pattern
;
365 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info
->first_dt
!= dt
375 && !(oprnd_info
->first_dt
== vect_reduction_def
376 && dt
== vect_internal_def
)
377 && !((oprnd_info
->first_dt
== vect_external_def
378 || oprnd_info
->first_dt
== vect_constant_def
)
379 && (dt
== vect_external_def
380 || dt
== vect_constant_def
)))
381 || !types_compatible_p (oprnd_info
->first_op_type
,
384 /* Try swapping operands if we got a mismatch. */
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
395 "Build SLP failed: different types\n");
401 /* Check the types of the definitions. */
404 case vect_constant_def
:
405 case vect_external_def
:
408 case vect_reduction_def
:
409 case vect_induction_def
:
410 case vect_internal_def
:
411 oprnd_info
->def_stmts
.quick_push (def_stmt
);
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
421 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
438 "Build SLP failed: cannot swap operands of "
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
447 tree cond
= gimple_assign_rhs1 (stmt
);
448 enum tree_code code
= TREE_CODE (cond
);
453 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
454 &TREE_OPERAND (cond
, 1));
455 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
460 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
461 gimple_assign_rhs3_ptr (stmt
));
462 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
463 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
464 gcc_assert (code
!= ERROR_MARK
);
465 TREE_SET_CODE (cond
, code
);
469 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
470 gimple_assign_rhs2_ptr (stmt
));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE
, vect_location
,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
484 /* Verify if the scalar stmts STMTS are isomorphic, require data
485 permutation or are of unsupported types of operation. Return
486 true if they are, otherwise return false and indicate in *MATCHES
487 which stmts are not isomorphic to the first one. If MATCHES[0]
488 is false then this indicates the comparison could not be
489 carried out or the stmts will never be vectorized by SLP.
491 Note COND_EXPR is possibly ismorphic to another one after swapping its
492 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
493 the first stmt by swapping the two operands of comparison; set SWAP[i]
494 to 2 if stmt I is isormorphic to the first stmt by inverting the code
495 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
496 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
499 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
500 vec
<gimple
*> stmts
, unsigned int group_size
,
501 unsigned nops
, unsigned int *max_nunits
,
502 bool *matches
, bool *two_operators
)
505 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
506 enum tree_code first_stmt_code
= ERROR_MARK
;
507 enum tree_code alt_stmt_code
= ERROR_MARK
;
508 enum tree_code rhs_code
= ERROR_MARK
;
509 enum tree_code first_cond_code
= ERROR_MARK
;
511 bool need_same_oprnds
= false;
512 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
515 machine_mode optab_op2_mode
;
516 machine_mode vec_mode
;
518 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
520 /* For every stmt in NODE find its def stmt/s. */
521 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
526 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
529 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
532 /* Fail to vectorize statements marked as unvectorizable. */
533 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
535 if (dump_enabled_p ())
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
538 "Build SLP failed: unvectorizable statement ");
539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
541 /* Fatal mismatch. */
546 lhs
= gimple_get_lhs (stmt
);
547 if (lhs
== NULL_TREE
)
549 if (dump_enabled_p ())
551 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
552 "Build SLP failed: not GIMPLE_ASSIGN nor "
554 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
556 /* Fatal mismatch. */
561 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
562 vectype
= get_vectype_for_scalar_type (scalar_type
);
565 if (dump_enabled_p ())
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
568 "Build SLP failed: unsupported data-type ");
569 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
571 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
573 /* Fatal mismatch. */
578 /* If populating the vector type requires unrolling then fail
579 before adjusting *max_nunits for basic-block vectorization. */
580 if (is_a
<bb_vec_info
> (vinfo
)
581 && TYPE_VECTOR_SUBPARTS (vectype
) > group_size
)
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
584 "Build SLP failed: unrolling required "
585 "in basic block SLP\n");
586 /* Fatal mismatch. */
591 /* In case of multiple types we need to detect the smallest type. */
592 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
593 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
595 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
597 rhs_code
= CALL_EXPR
;
598 if (gimple_call_internal_p (call_stmt
)
599 || gimple_call_tail_p (call_stmt
)
600 || gimple_call_noreturn_p (call_stmt
)
601 || !gimple_call_nothrow_p (call_stmt
)
602 || gimple_call_chain (call_stmt
))
604 if (dump_enabled_p ())
606 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
607 "Build SLP failed: unsupported call type ");
608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
611 /* Fatal mismatch. */
617 rhs_code
= gimple_assign_rhs_code (stmt
);
619 /* Check the operation. */
622 first_stmt_code
= rhs_code
;
624 /* Shift arguments should be equal in all the packed stmts for a
625 vector shift with scalar shift operand. */
626 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
627 || rhs_code
== LROTATE_EXPR
628 || rhs_code
== RROTATE_EXPR
)
630 vec_mode
= TYPE_MODE (vectype
);
632 /* First see if we have a vector/vector shift. */
633 optab
= optab_for_tree_code (rhs_code
, vectype
,
637 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
639 /* No vector/vector shift, try for a vector/scalar shift. */
640 optab
= optab_for_tree_code (rhs_code
, vectype
,
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
647 "Build SLP failed: no optab.\n");
648 /* Fatal mismatch. */
652 icode
= (int) optab_handler (optab
, vec_mode
);
653 if (icode
== CODE_FOR_nothing
)
655 if (dump_enabled_p ())
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
658 "op not supported by target.\n");
659 /* Fatal mismatch. */
663 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
664 if (!VECTOR_MODE_P (optab_op2_mode
))
666 need_same_oprnds
= true;
667 first_op1
= gimple_assign_rhs2 (stmt
);
671 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
673 need_same_oprnds
= true;
674 first_op1
= gimple_assign_rhs2 (stmt
);
679 if (first_stmt_code
!= rhs_code
680 && alt_stmt_code
== ERROR_MARK
)
681 alt_stmt_code
= rhs_code
;
682 if (first_stmt_code
!= rhs_code
683 && (first_stmt_code
!= IMAGPART_EXPR
684 || rhs_code
!= REALPART_EXPR
)
685 && (first_stmt_code
!= REALPART_EXPR
686 || rhs_code
!= IMAGPART_EXPR
)
687 /* Handle mismatches in plus/minus by computing both
688 and merging the results. */
689 && !((first_stmt_code
== PLUS_EXPR
690 || first_stmt_code
== MINUS_EXPR
)
691 && (alt_stmt_code
== PLUS_EXPR
692 || alt_stmt_code
== MINUS_EXPR
)
693 && rhs_code
== alt_stmt_code
)
694 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
695 && (first_stmt_code
== ARRAY_REF
696 || first_stmt_code
== BIT_FIELD_REF
697 || first_stmt_code
== INDIRECT_REF
698 || first_stmt_code
== COMPONENT_REF
699 || first_stmt_code
== MEM_REF
)))
701 if (dump_enabled_p ())
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
704 "Build SLP failed: different operation "
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
717 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
722 "Build SLP failed: different shift "
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
730 if (rhs_code
== CALL_EXPR
)
732 gimple
*first_stmt
= stmts
[0];
733 if (gimple_call_num_args (stmt
) != nops
734 || !operand_equal_p (gimple_call_fn (first_stmt
),
735 gimple_call_fn (stmt
), 0)
736 || gimple_call_fntype (first_stmt
)
737 != gimple_call_fntype (stmt
))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
742 "Build SLP failed: different calls in ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
752 /* Grouped store or load. */
753 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
755 if (REFERENCE_CLASS_P (lhs
))
763 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
766 /* Check that there are no loads from different interleaving
767 chains in the same node. */
768 if (prev_first_load
!= first_load
)
770 if (dump_enabled_p ())
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
774 "Build SLP failed: different "
775 "interleaving chains in one node ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
784 prev_first_load
= first_load
;
786 } /* Grouped access. */
789 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
791 /* Not grouped load. */
792 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
795 "Build SLP failed: not grouped load ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
799 /* FORNOW: Not grouped loads are not supported. */
800 /* Fatal mismatch. */
805 /* Not memory operation. */
806 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
807 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
808 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
809 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
810 && rhs_code
!= CALL_EXPR
)
812 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
815 "Build SLP failed: operation");
816 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
819 /* Fatal mismatch. */
824 if (rhs_code
== COND_EXPR
)
826 tree cond_expr
= gimple_assign_rhs1 (stmt
);
827 enum tree_code cond_code
= TREE_CODE (cond_expr
);
828 enum tree_code swap_code
= ERROR_MARK
;
829 enum tree_code invert_code
= ERROR_MARK
;
832 first_cond_code
= TREE_CODE (cond_expr
);
833 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
835 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
836 swap_code
= swap_tree_comparison (cond_code
);
837 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
840 if (first_cond_code
== cond_code
)
842 /* Isomorphic can be achieved by swapping. */
843 else if (first_cond_code
== swap_code
)
845 /* Isomorphic can be achieved by inverting. */
846 else if (first_cond_code
== invert_code
)
850 if (dump_enabled_p ())
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
853 "Build SLP failed: different"
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
867 for (i
= 0; i
< group_size
; ++i
)
871 /* If we allowed a two-operation SLP node verify the target can cope
872 with the permute we are going to use. */
873 if (alt_stmt_code
!= ERROR_MARK
874 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
877 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype
));
878 for (i
= 0; i
< TYPE_VECTOR_SUBPARTS (vectype
); ++i
)
881 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
882 sel
[i
] += TYPE_VECTOR_SUBPARTS (vectype
);
884 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
886 for (i
= 0; i
< group_size
; ++i
)
887 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
890 if (dump_enabled_p ())
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
893 "Build SLP failed: different operation "
895 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
905 *two_operators
= true;
911 /* Recursively build an SLP tree starting from NODE.
912 Fail (and return a value not equal to zero) if def-stmts are not
913 isomorphic, require data permutation or are of unsupported types of
914 operation. Otherwise, return 0.
915 The value returned is the depth in the SLP tree where a mismatch
919 vect_build_slp_tree (vec_info
*vinfo
,
920 vec
<gimple
*> stmts
, unsigned int group_size
,
921 unsigned int *max_nunits
,
922 vec
<slp_tree
> *loads
,
923 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
924 unsigned max_tree_size
)
926 unsigned nops
, i
, this_tree_size
= 0, this_max_nunits
= *max_nunits
;
933 if (is_gimple_call (stmt
))
934 nops
= gimple_call_num_args (stmt
);
935 else if (is_gimple_assign (stmt
))
937 nops
= gimple_num_ops (stmt
) - 1;
938 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
941 else if (gimple_code (stmt
) == GIMPLE_PHI
)
946 /* If the SLP node is a PHI (induction or reduction), terminate
948 if (gimple_code (stmt
) == GIMPLE_PHI
)
950 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
951 /* Induction from different IVs is not supported. */
952 if (def_type
== vect_induction_def
)
954 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
955 if (stmt
!= stmts
[0])
960 /* Else def types have to match. */
961 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
963 /* But for reduction chains only check on the first stmt. */
964 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
965 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
967 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
971 node
= vect_create_new_slp_node (stmts
);
976 bool two_operators
= false;
977 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
978 if (!vect_build_slp_tree_1 (vinfo
, swap
,
979 stmts
, group_size
, nops
,
980 &this_max_nunits
, matches
, &two_operators
))
983 /* If the SLP node is a load, terminate the recursion. */
984 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
985 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
987 *max_nunits
= this_max_nunits
;
988 node
= vect_create_new_slp_node (stmts
);
989 loads
->safe_push (node
);
993 /* Get at the operands, verifying they are compatible. */
994 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
995 slp_oprnd_info oprnd_info
;
996 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
998 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
999 stmt
, i
, &oprnds_info
);
1001 matches
[(res
== -1) ? 0 : i
] = false;
1005 for (i
= 0; i
< group_size
; ++i
)
1008 vect_free_oprnd_info (oprnds_info
);
1012 auto_vec
<slp_tree
, 4> children
;
1013 auto_vec
<slp_tree
> this_loads
;
1017 /* Create SLP_TREE nodes for the definition node/s. */
1018 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1021 unsigned old_nloads
= this_loads
.length ();
1022 unsigned old_tree_size
= this_tree_size
;
1025 if (oprnd_info
->first_dt
!= vect_internal_def
1026 && oprnd_info
->first_dt
!= vect_reduction_def
1027 && oprnd_info
->first_dt
!= vect_induction_def
)
1030 if (++this_tree_size
> max_tree_size
)
1032 FOR_EACH_VEC_ELT (children
, j
, child
)
1033 vect_free_slp_tree (child
);
1034 vect_free_oprnd_info (oprnds_info
);
1038 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1039 group_size
, &this_max_nunits
,
1040 &this_loads
, matches
, npermutes
,
1042 max_tree_size
)) != NULL
)
1044 /* If we have all children of child built up from scalars then just
1045 throw that away and build it up this node from scalars. */
1046 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1047 /* ??? Rejecting patterns this way doesn't work. We'd have to
1048 do extra work to cancel the pattern so the uses see the
1050 && !is_pattern_stmt_p
1051 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1053 slp_tree grandchild
;
1055 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1056 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1061 this_loads
.truncate (old_nloads
);
1062 this_tree_size
= old_tree_size
;
1063 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1064 vect_free_slp_tree (grandchild
);
1065 SLP_TREE_CHILDREN (child
).truncate (0);
1067 dump_printf_loc (MSG_NOTE
, vect_location
,
1068 "Building parent vector operands from "
1069 "scalars instead\n");
1070 oprnd_info
->def_stmts
= vNULL
;
1071 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1072 children
.safe_push (child
);
1077 oprnd_info
->def_stmts
= vNULL
;
1078 children
.safe_push (child
);
1082 /* If the SLP build failed fatally and we analyze a basic-block
1083 simply treat nodes we fail to build as externally defined
1084 (and thus build vectors from the scalar defs).
1085 The cost model will reject outright expensive cases.
1086 ??? This doesn't treat cases where permutation ultimatively
1087 fails (or we don't try permutation below). Ideally we'd
1088 even compute a permutation that will end up with the maximum
1090 if (is_a
<bb_vec_info
> (vinfo
)
1092 /* ??? Rejecting patterns this way doesn't work. We'd have to
1093 do extra work to cancel the pattern so the uses see the
1095 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1097 dump_printf_loc (MSG_NOTE
, vect_location
,
1098 "Building vector operands from scalars\n");
1099 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1100 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1101 children
.safe_push (child
);
1102 oprnd_info
->def_stmts
= vNULL
;
1106 /* If the SLP build for operand zero failed and operand zero
1107 and one can be commutated try that for the scalar stmts
1108 that failed the match. */
1110 /* A first scalar stmt mismatch signals a fatal mismatch. */
1112 /* ??? For COND_EXPRs we can swap the comparison operands
1113 as well as the arms under some constraints. */
1115 && oprnds_info
[1]->first_dt
== vect_internal_def
1116 && is_gimple_assign (stmt
)
1117 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1119 /* Do so only if the number of not successful permutes was nor more
1120 than a cut-ff as re-trying the recursive match on
1121 possibly each level of the tree would expose exponential
1125 /* Verify if we can safely swap or if we committed to a specific
1126 operand order already. */
1127 for (j
= 0; j
< group_size
; ++j
)
1130 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1132 if (dump_enabled_p ())
1134 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1135 "Build SLP failed: cannot swap operands "
1137 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1143 /* Swap mismatched definition stmts. */
1144 dump_printf_loc (MSG_NOTE
, vect_location
,
1145 "Re-trying with swapped operands of stmts ");
1146 for (j
= 0; j
< group_size
; ++j
)
1149 std::swap (oprnds_info
[0]->def_stmts
[j
],
1150 oprnds_info
[1]->def_stmts
[j
]);
1151 dump_printf (MSG_NOTE
, "%d ", j
);
1153 dump_printf (MSG_NOTE
, "\n");
1154 /* And try again with scratch 'matches' ... */
1155 bool *tem
= XALLOCAVEC (bool, group_size
);
1156 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1157 group_size
, &this_max_nunits
,
1158 &this_loads
, tem
, npermutes
,
1160 max_tree_size
)) != NULL
)
1162 /* ... so if successful we can apply the operand swapping
1163 to the GIMPLE IL. This is necessary because for example
1164 vect_get_slp_defs uses operand indexes and thus expects
1165 canonical operand order. This is also necessary even
1166 if we end up building the operand from scalars as
1167 we'll continue to process swapped operand two. */
1168 for (j
= 0; j
< group_size
; ++j
)
1170 gimple
*stmt
= stmts
[j
];
1171 gimple_set_plf (stmt
, GF_PLF_1
, false);
1173 for (j
= 0; j
< group_size
; ++j
)
1175 gimple
*stmt
= stmts
[j
];
1178 /* Avoid swapping operands twice. */
1179 if (gimple_plf (stmt
, GF_PLF_1
))
1181 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1182 gimple_assign_rhs2_ptr (stmt
));
1183 gimple_set_plf (stmt
, GF_PLF_1
, true);
1186 /* Verify we swap all duplicates or none. */
1188 for (j
= 0; j
< group_size
; ++j
)
1190 gimple
*stmt
= stmts
[j
];
1191 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1194 /* If we have all children of child built up from scalars then
1195 just throw that away and build it up this node from scalars. */
1196 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1197 /* ??? Rejecting patterns this way doesn't work. We'd have
1198 to do extra work to cancel the pattern so the uses see the
1200 && !is_pattern_stmt_p
1201 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1204 slp_tree grandchild
;
1206 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1207 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1212 this_loads
.truncate (old_nloads
);
1213 this_tree_size
= old_tree_size
;
1214 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1215 vect_free_slp_tree (grandchild
);
1216 SLP_TREE_CHILDREN (child
).truncate (0);
1218 dump_printf_loc (MSG_NOTE
, vect_location
,
1219 "Building parent vector operands from "
1220 "scalars instead\n");
1221 oprnd_info
->def_stmts
= vNULL
;
1222 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1223 children
.safe_push (child
);
1228 oprnd_info
->def_stmts
= vNULL
;
1229 children
.safe_push (child
);
1237 gcc_assert (child
== NULL
);
1238 FOR_EACH_VEC_ELT (children
, j
, child
)
1239 vect_free_slp_tree (child
);
1240 vect_free_oprnd_info (oprnds_info
);
1244 vect_free_oprnd_info (oprnds_info
);
1247 *tree_size
+= this_tree_size
;
1248 *max_nunits
= this_max_nunits
;
1249 loads
->safe_splice (this_loads
);
1251 node
= vect_create_new_slp_node (stmts
);
1252 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1253 SLP_TREE_CHILDREN (node
).splice (children
);
1257 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1260 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1266 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1267 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1268 ? " (external)" : "");
1269 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1271 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1272 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1274 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1275 vect_print_slp_tree (dump_kind
, loc
, child
);
1279 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1280 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1281 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1282 stmts in NODE are to be marked. */
1285 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1291 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1294 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1295 if (j
< 0 || i
== j
)
1296 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1298 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1299 vect_mark_slp_stmts (child
, mark
, j
);
1303 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1306 vect_mark_slp_stmts_relevant (slp_tree node
)
1310 stmt_vec_info stmt_info
;
1313 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1316 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1318 stmt_info
= vinfo_for_stmt (stmt
);
1319 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1320 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1321 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1324 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1325 vect_mark_slp_stmts_relevant (child
);
1329 /* Rearrange the statements of NODE according to PERMUTATION. */
1332 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1333 vec
<unsigned> permutation
)
1336 vec
<gimple
*> tmp_stmts
;
1340 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1341 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1343 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1344 tmp_stmts
.create (group_size
);
1345 tmp_stmts
.quick_grow_cleared (group_size
);
1347 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1348 tmp_stmts
[permutation
[i
]] = stmt
;
1350 SLP_TREE_SCALAR_STMTS (node
).release ();
1351 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1355 /* Attempt to reorder stmts in a reduction chain so that we don't
1356 require any load permutation. Return true if that was possible,
1357 otherwise return false. */
1360 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1362 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1365 slp_tree node
, load
;
1367 /* Compare all the permutation sequences to the first one. We know
1368 that at least one load is permuted. */
1369 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1370 if (!node
->load_permutation
.exists ())
1372 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1374 if (!load
->load_permutation
.exists ())
1376 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1377 if (lidx
!= node
->load_permutation
[j
])
1381 /* Check that the loads in the first sequence are different and there
1382 are no gaps between them. */
1383 auto_sbitmap
load_index (group_size
);
1384 bitmap_clear (load_index
);
1385 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1387 if (lidx
>= group_size
)
1389 if (bitmap_bit_p (load_index
, lidx
))
1392 bitmap_set_bit (load_index
, lidx
);
1394 for (i
= 0; i
< group_size
; i
++)
1395 if (!bitmap_bit_p (load_index
, i
))
1398 /* This permutation is valid for reduction. Since the order of the
1399 statements in the nodes is not important unless they are memory
1400 accesses, we can rearrange the statements in all the nodes
1401 according to the order of the loads. */
1402 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1403 node
->load_permutation
);
1405 /* We are done, no actual permutations need to be generated. */
1406 unsigned int unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1407 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1409 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1410 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1411 /* But we have to keep those permutations that are required because
1412 of handling of gaps. */
1413 if (unrolling_factor
== 1
1414 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1415 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1416 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1418 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1419 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1425 /* Check if the required load permutations in the SLP instance
1426 SLP_INSTN are supported. */
1429 vect_supported_load_permutation_p (slp_instance slp_instn
)
1431 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1432 unsigned int i
, j
, k
, next
;
1434 gimple
*stmt
, *load
, *next_load
;
1436 if (dump_enabled_p ())
1438 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1439 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1440 if (node
->load_permutation
.exists ())
1441 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1442 dump_printf (MSG_NOTE
, "%d ", next
);
1444 for (k
= 0; k
< group_size
; ++k
)
1445 dump_printf (MSG_NOTE
, "%d ", k
);
1446 dump_printf (MSG_NOTE
, "\n");
1449 /* In case of reduction every load permutation is allowed, since the order
1450 of the reduction statements is not important (as opposed to the case of
1451 grouped stores). The only condition we need to check is that all the
1452 load nodes are of the same size and have the same permutation (and then
1453 rearrange all the nodes of the SLP instance according to this
1456 /* Check that all the load nodes are of the same size. */
1457 /* ??? Can't we assert this? */
1458 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1459 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1462 node
= SLP_INSTANCE_TREE (slp_instn
);
1463 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1465 /* Reduction (there are no data-refs in the root).
1466 In reduction chain the order of the loads is not important. */
1467 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1468 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1469 vect_attempt_slp_rearrange_stmts (slp_instn
);
1471 /* In basic block vectorization we allow any subchain of an interleaving
1473 FORNOW: not supported in loop SLP because of realignment compications. */
1474 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1476 /* Check whether the loads in an instance form a subchain and thus
1477 no permutation is necessary. */
1478 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1480 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1482 bool subchain_p
= true;
1484 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1487 && (next_load
!= load
1488 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1493 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1496 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1499 stmt_vec_info group_info
1500 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1501 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1503 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info
));
1504 unsigned k
, maxk
= 0;
1505 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1508 /* In BB vectorization we may not actually use a loaded vector
1509 accessing elements in excess of GROUP_SIZE. */
1510 if (maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1513 "BB vectorization with gaps at the end of "
1514 "a load is not supported\n");
1518 /* Verify the permutation can be generated. */
1521 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1522 1, slp_instn
, true, &n_perms
))
1524 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1526 "unsupported load permutation\n");
1534 /* For loop vectorization verify we can generate the permutation. */
1536 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1537 if (node
->load_permutation
.exists ()
1538 && !vect_transform_slp_perm_load
1540 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
), slp_instn
, true,
1548 /* Find the last store in SLP INSTANCE. */
1551 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1553 gimple
*last
= NULL
, *stmt
;
1555 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1557 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1558 if (is_pattern_stmt_p (stmt_vinfo
))
1559 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1561 last
= get_later_stmt (stmt
, last
);
1567 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1570 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1571 stmt_vector_for_cost
*prologue_cost_vec
,
1572 stmt_vector_for_cost
*body_cost_vec
,
1573 unsigned ncopies_for_cost
)
1578 stmt_vec_info stmt_info
;
1581 /* Recurse down the SLP tree. */
1582 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1583 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1584 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1585 body_cost_vec
, ncopies_for_cost
);
1587 /* Look at the first scalar stmt to determine the cost. */
1588 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1589 stmt_info
= vinfo_for_stmt (stmt
);
1590 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1592 vect_memory_access_type memory_access_type
1593 = (STMT_VINFO_STRIDED_P (stmt_info
)
1596 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1597 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1598 memory_access_type
, vect_uninitialized_def
,
1599 node
, prologue_cost_vec
, body_cost_vec
);
1602 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1603 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1605 /* If the load is permuted then the alignment is determined by
1606 the first group element not by the first scalar stmt DR. */
1607 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1608 stmt_info
= vinfo_for_stmt (stmt
);
1609 /* Record the cost for the permutation. */
1611 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1612 ncopies_for_cost
, instance
, true,
1614 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1615 stmt_info
, 0, vect_body
);
1617 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1618 /* And adjust the number of loads performed. This handles
1619 redundancies as well as loads that are later dead. */
1620 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1621 bitmap_clear (perm
);
1622 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1623 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1624 ncopies_for_cost
= 0;
1625 bool load_seen
= false;
1626 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1628 if (i
% nunits
== 0)
1634 if (bitmap_bit_p (perm
, i
))
1639 gcc_assert (ncopies_for_cost
1640 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1641 + nunits
- 1) / nunits
);
1642 ncopies_for_cost
*= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1644 /* Record the cost for the vector loads. */
1645 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1646 memory_access_type
, node
, prologue_cost_vec
,
1651 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1653 /* ncopies_for_cost is the number of IVs we generate. */
1654 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1655 stmt_info
, 0, vect_body
);
1657 /* Prologue cost for the initial values and step vector. */
1658 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1660 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1662 ? vector_load
: vec_construct
,
1663 stmt_info
, 0, vect_prologue
);
1664 record_stmt_cost (prologue_cost_vec
, 1,
1666 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1667 ? vector_load
: vec_construct
,
1668 stmt_info
, 0, vect_prologue
);
1670 /* ??? No easy way to get at the actual number of vector stmts
1671 to be geneated and thus the derived IVs. */
1675 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1676 stmt_info
, 0, vect_body
);
1677 if (SLP_TREE_TWO_OPERATORS (node
))
1679 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1680 stmt_info
, 0, vect_body
);
1681 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1682 stmt_info
, 0, vect_body
);
1686 /* Push SLP node def-type to stmts. */
1687 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1688 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1689 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1690 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1692 /* Scan operands and account for prologue cost of constants/externals.
1693 ??? This over-estimates cost for multiple uses and should be
1695 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1696 lhs
= gimple_get_lhs (stmt
);
1697 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1699 tree op
= gimple_op (stmt
, i
);
1701 enum vect_def_type dt
;
1702 if (!op
|| op
== lhs
)
1704 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1706 /* Without looking at the actual initializer a vector of
1707 constants can be implemented as load from the constant pool.
1708 ??? We need to pass down stmt_info for a vector type
1709 even if it points to the wrong stmt. */
1710 if (dt
== vect_constant_def
)
1711 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1712 stmt_info
, 0, vect_prologue
);
1713 else if (dt
== vect_external_def
)
1714 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1715 stmt_info
, 0, vect_prologue
);
1719 /* Restore stmt def-types. */
1720 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1721 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1722 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1723 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1726 /* Compute the cost for the SLP instance INSTANCE. */
1729 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1731 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1732 unsigned ncopies_for_cost
;
1733 stmt_info_for_cost
*si
;
1736 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_NOTE
, vect_location
,
1738 "=== vect_analyze_slp_cost ===\n");
1740 /* Calculate the number of vector stmts to create based on the unrolling
1741 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1742 GROUP_SIZE / NUNITS otherwise. */
1743 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1744 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1745 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1746 /* Adjust the group_size by the vectorization factor which is always one
1747 for basic-block vectorization. */
1748 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1749 group_size
*= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info
));
1750 unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1751 /* For reductions look at a reduction operand in case the reduction
1752 operation is widening like DOT_PROD or SAD. */
1753 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1755 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1756 switch (gimple_assign_rhs_code (stmt
))
1760 nunits
= TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1761 (TREE_TYPE (gimple_assign_rhs1 (stmt
))));
1766 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1768 prologue_cost_vec
.create (10);
1769 body_cost_vec
.create (10);
1770 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1771 &prologue_cost_vec
, &body_cost_vec
,
1774 /* Record the prologue costs, which were delayed until we were
1775 sure that SLP was successful. */
1776 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1778 struct _stmt_vec_info
*stmt_info
1779 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1780 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1781 si
->misalign
, vect_prologue
);
1784 /* Record the instance's instructions in the target cost model. */
1785 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1787 struct _stmt_vec_info
*stmt_info
1788 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1789 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1790 si
->misalign
, vect_body
);
1793 prologue_cost_vec
.release ();
1794 body_cost_vec
.release ();
1797 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1798 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1799 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1800 containing the remainder.
1801 Return the first stmt in the second group. */
1804 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1806 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1807 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1808 gcc_assert (group1_size
> 0);
1809 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1810 gcc_assert (group2_size
> 0);
1811 GROUP_SIZE (first_vinfo
) = group1_size
;
1813 gimple
*stmt
= first_stmt
;
1814 for (unsigned i
= group1_size
; i
> 1; i
--)
1816 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1817 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1819 /* STMT is now the last element of the first group. */
1820 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1821 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1823 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1824 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1826 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1827 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1830 /* For the second group, the GROUP_GAP is that before the original group,
1831 plus skipping over the first vector. */
1832 GROUP_GAP (vinfo_for_stmt (group2
)) =
1833 GROUP_GAP (first_vinfo
) + group1_size
;
1835 /* GROUP_GAP of the first group now has to skip over the second group too. */
1836 GROUP_GAP (first_vinfo
) += group2_size
;
1838 if (dump_enabled_p ())
1839 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1840 group1_size
, group2_size
);
1845 /* Analyze an SLP instance starting from a group of grouped stores. Call
1846 vect_build_slp_tree to build a tree of packed stmts if possible.
1847 Return FALSE if it's impossible to SLP any stmt in the loop. */
1850 vect_analyze_slp_instance (vec_info
*vinfo
,
1851 gimple
*stmt
, unsigned max_tree_size
)
1853 slp_instance new_instance
;
1855 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1856 unsigned int unrolling_factor
= 1, nunits
;
1857 tree vectype
, scalar_type
= NULL_TREE
;
1860 unsigned int max_nunits
= 0;
1861 vec
<slp_tree
> loads
;
1862 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1863 vec
<gimple
*> scalar_stmts
;
1865 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1869 scalar_type
= TREE_TYPE (DR_REF (dr
));
1870 vectype
= get_vectype_for_scalar_type (scalar_type
);
1874 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1875 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1878 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1882 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1883 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1884 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1889 if (dump_enabled_p ())
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1892 "Build SLP failed: unsupported data-type ");
1893 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1894 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1899 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1901 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1902 scalar_stmts
.create (group_size
);
1904 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1906 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1909 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1910 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1911 scalar_stmts
.safe_push (
1912 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1914 scalar_stmts
.safe_push (next
);
1915 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1917 /* Mark the first element of the reduction chain as reduction to properly
1918 transform the node. In the reduction analysis phase only the last
1919 element of the chain is marked as reduction. */
1920 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1921 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
1925 /* Collect reduction statements. */
1926 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
1927 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1928 scalar_stmts
.safe_push (next
);
1931 loads
.create (group_size
);
1933 /* Build the tree for the SLP instance. */
1934 bool *matches
= XALLOCAVEC (bool, group_size
);
1935 unsigned npermutes
= 0;
1936 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
1937 &max_nunits
, &loads
, matches
, &npermutes
,
1938 NULL
, max_tree_size
);
1941 /* Calculate the unrolling factor based on the smallest type. */
1943 = least_common_multiple (max_nunits
, group_size
) / group_size
;
1945 if (unrolling_factor
!= 1
1946 && is_a
<bb_vec_info
> (vinfo
))
1949 if (max_nunits
> group_size
)
1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1952 "Build SLP failed: store group "
1953 "size not a multiple of the vector size "
1954 "in basic block SLP\n");
1955 vect_free_slp_tree (node
);
1959 /* Fatal mismatch. */
1960 matches
[group_size
/max_nunits
* max_nunits
] = false;
1961 vect_free_slp_tree (node
);
1966 /* Create a new SLP instance. */
1967 new_instance
= XNEW (struct _slp_instance
);
1968 SLP_INSTANCE_TREE (new_instance
) = node
;
1969 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1970 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1971 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1973 /* Compute the load permutation. */
1975 bool loads_permuted
= false;
1976 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
1978 vec
<unsigned> load_permutation
;
1980 gimple
*load
, *first_stmt
;
1981 bool this_load_permuted
= false;
1982 load_permutation
.create (group_size
);
1983 first_stmt
= GROUP_FIRST_ELEMENT
1984 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
1985 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1987 int load_place
= vect_get_place_in_interleaving_chain
1989 gcc_assert (load_place
!= -1);
1990 if (load_place
!= j
)
1991 this_load_permuted
= true;
1992 load_permutation
.safe_push (load_place
);
1994 if (!this_load_permuted
1995 /* The load requires permutation when unrolling exposes
1996 a gap either because the group is larger than the SLP
1997 group-size or because there is a gap between the groups. */
1998 && (unrolling_factor
== 1
1999 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2000 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2002 load_permutation
.release ();
2005 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2006 loads_permuted
= true;
2011 if (!vect_supported_load_permutation_p (new_instance
))
2013 if (dump_enabled_p ())
2015 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2016 "Build SLP failed: unsupported load "
2018 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2021 vect_free_slp_instance (new_instance
);
2026 /* If the loads and stores can be handled with load/store-lan
2027 instructions do not generate this SLP instance. */
2028 if (is_a
<loop_vec_info
> (vinfo
)
2030 && dr
&& vect_store_lanes_supported (vectype
, group_size
))
2033 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2035 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2036 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2037 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2038 /* Use SLP for strided accesses (or if we
2039 can't load-lanes). */
2040 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2041 || ! vect_load_lanes_supported
2042 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2043 GROUP_SIZE (stmt_vinfo
)))
2046 if (i
== loads
.length ())
2048 if (dump_enabled_p ())
2049 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2050 "Built SLP cancelled: can use "
2051 "load/store-lanes\n");
2052 vect_free_slp_instance (new_instance
);
2057 vinfo
->slp_instances
.safe_push (new_instance
);
2059 if (dump_enabled_p ())
2061 dump_printf_loc (MSG_NOTE
, vect_location
,
2062 "Final SLP tree for instance:\n");
2063 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2071 /* Failed to SLP. */
2072 /* Free the allocated memory. */
2073 scalar_stmts
.release ();
2077 /* For basic block SLP, try to break the group up into multiples of the
2079 if (is_a
<bb_vec_info
> (vinfo
)
2080 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2081 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2083 /* We consider breaking the group only on VF boundaries from the existing
2085 for (i
= 0; i
< group_size
; i
++)
2086 if (!matches
[i
]) break;
2088 if (i
>= nunits
&& i
< group_size
)
2090 /* Split into two groups at the first vector boundary before i. */
2091 gcc_assert ((nunits
& (nunits
- 1)) == 0);
2092 unsigned group1_size
= i
& ~(nunits
- 1);
2094 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2095 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2096 /* If the first non-match was in the middle of a vector,
2097 skip the rest of that vector. */
2098 if (group1_size
< i
)
2100 i
= group1_size
+ nunits
;
2102 rest
= vect_split_slp_store_group (rest
, nunits
);
2105 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2108 /* Even though the first vector did not all match, we might be able to SLP
2109 (some) of the remainder. FORNOW ignore this possibility. */
2116 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2117 trees of packed scalar stmts if SLP is possible. */
2120 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2123 gimple
*first_element
;
2125 if (dump_enabled_p ())
2126 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2128 /* Find SLP sequences starting from groups of grouped stores. */
2129 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2130 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2132 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2134 if (loop_vinfo
->reduction_chains
.length () > 0)
2136 /* Find SLP sequences starting from reduction chains. */
2137 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2138 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2141 /* Dissolve reduction chain group. */
2142 gimple
*next
, *stmt
= first_element
;
2145 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2146 next
= GROUP_NEXT_ELEMENT (vinfo
);
2147 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2148 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2151 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2152 = vect_internal_def
;
2156 /* Find SLP sequences starting from groups of reductions. */
2157 if (loop_vinfo
->reductions
.length () > 1)
2158 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2166 /* For each possible SLP instance decide whether to SLP it and calculate overall
2167 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2168 least one instance. */
2171 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2173 unsigned int i
, unrolling_factor
= 1;
2174 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2175 slp_instance instance
;
2176 int decided_to_slp
= 0;
2178 if (dump_enabled_p ())
2179 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2182 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2184 /* FORNOW: SLP if you can. */
2185 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
2186 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2188 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2189 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2190 loop-based vectorization. Such stmts will be marked as HYBRID. */
2191 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2195 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2197 if (decided_to_slp
&& dump_enabled_p ())
2198 dump_printf_loc (MSG_NOTE
, vect_location
,
2199 "Decided to SLP %d instances. Unrolling factor %d\n",
2200 decided_to_slp
, unrolling_factor
);
2202 return (decided_to_slp
> 0);
2206 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2207 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2210 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2212 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2213 imm_use_iterator imm_iter
;
2215 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2217 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2218 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2221 /* Propagate hybrid down the SLP tree. */
2222 if (stype
== hybrid
)
2224 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2228 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2229 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2230 /* If we get a pattern stmt here we have to use the LHS of the
2231 original stmt for immediate uses. */
2232 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2233 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2234 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2236 if (gimple_code (stmt
) == GIMPLE_PHI
)
2237 def
= gimple_phi_result (stmt
);
2239 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2241 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2243 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2245 use_vinfo
= vinfo_for_stmt (use_stmt
);
2246 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2247 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2248 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2249 if (!STMT_SLP_TYPE (use_vinfo
)
2250 && (STMT_VINFO_RELEVANT (use_vinfo
)
2251 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2252 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2253 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2255 if (dump_enabled_p ())
2257 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2258 "def in non-SLP stmt: ");
2259 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2267 && !HYBRID_SLP_STMT (stmt_vinfo
))
2269 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2272 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2274 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2277 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2278 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2279 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2282 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2285 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2287 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2288 struct loop
*loopp
= (struct loop
*)wi
->info
;
2293 if (TREE_CODE (*tp
) == SSA_NAME
2294 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2296 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2297 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2298 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2300 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2303 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2305 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2313 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2316 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2317 /* If the stmt is in a SLP instance then this isn't a reason
2318 to mark use definitions in other SLP instances as hybrid. */
2319 if (! STMT_SLP_TYPE (use_vinfo
)
2320 && (STMT_VINFO_RELEVANT (use_vinfo
)
2321 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2322 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2323 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2330 /* Find stmts that must be both vectorized and SLPed. */
2333 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2336 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2337 slp_instance instance
;
2339 if (dump_enabled_p ())
2340 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2343 /* First walk all pattern stmt in the loop and mark defs of uses as
2344 hybrid because immediate uses in them are not recorded. */
2345 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2347 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2348 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2351 gimple
*stmt
= gsi_stmt (gsi
);
2352 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2353 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2356 memset (&wi
, 0, sizeof (wi
));
2357 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2358 gimple_stmt_iterator gsi2
2359 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2360 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2361 vect_detect_hybrid_slp_1
, &wi
);
2362 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2363 vect_detect_hybrid_slp_2
,
2364 vect_detect_hybrid_slp_1
, &wi
);
2369 /* Then walk the SLP instance trees marking stmts with uses in
2370 non-SLP stmts as hybrid, also propagating hybrid down the
2371 SLP tree, collecting the above info on-the-fly. */
2372 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2374 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2375 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2381 /* Initialize a bb_vec_info struct for the statements between
2382 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2384 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2385 gimple_stmt_iterator region_end_in
)
2386 : vec_info (vec_info::bb
, init_cost (NULL
)),
2387 bb (gsi_bb (region_begin_in
)),
2388 region_begin (region_begin_in
),
2389 region_end (region_end_in
)
2391 gimple_stmt_iterator gsi
;
2393 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2396 gimple
*stmt
= gsi_stmt (gsi
);
2397 gimple_set_uid (stmt
, 0);
2398 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2405 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2406 stmts in the basic block. */
2408 _bb_vec_info::~_bb_vec_info ()
2410 for (gimple_stmt_iterator si
= region_begin
;
2411 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2413 gimple
*stmt
= gsi_stmt (si
);
2414 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2417 /* Free stmt_vec_info. */
2418 free_stmt_vec_info (stmt
);
2420 /* Reset region marker. */
2421 gimple_set_uid (stmt
, -1);
2428 /* Analyze statements contained in SLP tree node after recursively analyzing
2429 the subtree. Return TRUE if the operations are supported. */
2432 vect_slp_analyze_node_operations (slp_tree node
, slp_instance node_instance
)
2439 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2442 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2443 if (!vect_slp_analyze_node_operations (child
, node_instance
))
2446 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2447 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2448 gcc_assert (stmt_info
);
2449 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2451 /* For BB vectorization vector types are assigned here.
2452 Memory accesses already got their vector type assigned
2453 in vect_analyze_data_refs. */
2454 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2456 && ! STMT_VINFO_DATA_REF (stmt_info
))
2458 gcc_assert (PURE_SLP_STMT (stmt_info
));
2460 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2461 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_NOTE
, vect_location
,
2464 "get vectype for scalar type: ");
2465 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2466 dump_printf (MSG_NOTE
, "\n");
2469 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2472 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2475 "not SLPed: unsupported data-type ");
2476 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2478 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2483 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2486 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2487 dump_printf (MSG_NOTE
, "\n");
2491 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2492 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2495 /* Push SLP node def-type to stmt operands. */
2496 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2497 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2498 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2499 = SLP_TREE_DEF_TYPE (child
);
2500 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2501 /* Restore def-types. */
2502 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2503 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2504 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2505 = vect_internal_def
;
2513 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2514 operations are supported. */
2517 vect_slp_analyze_operations (vec
<slp_instance
> slp_instances
, void *data
)
2519 slp_instance instance
;
2522 if (dump_enabled_p ())
2523 dump_printf_loc (MSG_NOTE
, vect_location
,
2524 "=== vect_slp_analyze_operations ===\n");
2526 for (i
= 0; slp_instances
.iterate (i
, &instance
); )
2528 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance
),
2531 dump_printf_loc (MSG_NOTE
, vect_location
,
2532 "removing SLP instance operations starting from: ");
2533 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2534 SLP_TREE_SCALAR_STMTS
2535 (SLP_INSTANCE_TREE (instance
))[0], 0);
2536 vect_free_slp_instance (instance
);
2537 slp_instances
.ordered_remove (i
);
2541 /* Compute the costs of the SLP instance. */
2542 vect_analyze_slp_cost (instance
, data
);
2547 if (!slp_instances
.length ())
2554 /* Compute the scalar cost of the SLP node NODE and its children
2555 and return it. Do not account defs that are marked in LIFE and
2556 update LIFE according to uses of NODE. */
2559 vect_bb_slp_scalar_cost (basic_block bb
,
2560 slp_tree node
, vec
<bool, va_heap
> *life
)
2562 unsigned scalar_cost
= 0;
2567 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2570 ssa_op_iter op_iter
;
2571 def_operand_p def_p
;
2572 stmt_vec_info stmt_info
;
2577 /* If there is a non-vectorized use of the defs then the scalar
2578 stmt is kept live in which case we do not account it or any
2579 required defs in the SLP children in the scalar cost. This
2580 way we make the vectorization more costly when compared to
2582 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2584 imm_use_iterator use_iter
;
2586 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2587 if (!is_gimple_debug (use_stmt
)
2588 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2590 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2593 BREAK_FROM_IMM_USE_STMT (use_iter
);
2599 /* Count scalar stmts only once. */
2600 if (gimple_visited_p (stmt
))
2602 gimple_set_visited (stmt
, true);
2604 stmt_info
= vinfo_for_stmt (stmt
);
2605 if (STMT_VINFO_DATA_REF (stmt_info
))
2607 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2608 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2610 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2613 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2615 scalar_cost
+= stmt_cost
;
2618 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2619 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2620 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, life
);
2625 /* Check if vectorization of the basic block is profitable. */
2628 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2630 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2631 slp_instance instance
;
2633 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2634 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2636 /* Calculate scalar cost. */
2637 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2639 auto_vec
<bool, 20> life
;
2640 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2641 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2642 SLP_INSTANCE_TREE (instance
),
2646 /* Unset visited flag. */
2647 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2648 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2649 gimple_set_visited (gsi_stmt (gsi
), false);
2651 /* Complete the target-specific cost calculation. */
2652 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2653 &vec_inside_cost
, &vec_epilogue_cost
);
2655 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2657 if (dump_enabled_p ())
2659 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2660 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2662 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2663 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2664 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2667 /* Vectorization is profitable if its cost is more than the cost of scalar
2668 version. Note that we err on the vector side for equal cost because
2669 the cost estimate is otherwise quite pessimistic (constant uses are
2670 free on the scalar side but cost a load on the vector side for
2672 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2678 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2679 if so and sets fatal to true if failure is independent of
2680 current_vector_size. */
2683 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2684 gimple_stmt_iterator region_end
,
2685 vec
<data_reference_p
> datarefs
, int n_stmts
,
2688 bb_vec_info bb_vinfo
;
2689 slp_instance instance
;
2693 /* The first group of checks is independent of the vector size. */
2696 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2698 if (dump_enabled_p ())
2699 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2700 "not vectorized: too many instructions in "
2702 free_data_refs (datarefs
);
2706 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
2710 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2712 /* Analyze the data references. */
2714 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2716 if (dump_enabled_p ())
2717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2718 "not vectorized: unhandled data-ref in basic "
2725 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2727 if (dump_enabled_p ())
2728 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2729 "not vectorized: not enough data-refs in "
2736 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2738 if (dump_enabled_p ())
2739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2740 "not vectorized: unhandled data access in "
2747 /* If there are no grouped stores in the region there is no need
2748 to continue with pattern recog as vect_analyze_slp will fail
2750 if (bb_vinfo
->grouped_stores
.is_empty ())
2752 if (dump_enabled_p ())
2753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2754 "not vectorized: no grouped stores in "
2761 /* While the rest of the analysis below depends on it in some way. */
2764 vect_pattern_recog (bb_vinfo
);
2766 /* Check the SLP opportunities in the basic block, analyze and build SLP
2768 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2770 if (dump_enabled_p ())
2772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2773 "Failed to SLP the basic block.\n");
2774 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2775 "not vectorized: failed to find SLP opportunities "
2776 "in basic block.\n");
2783 /* Analyze and verify the alignment of data references and the
2784 dependence in the SLP instances. */
2785 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2787 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2788 || ! vect_slp_analyze_instance_dependence (instance
))
2790 dump_printf_loc (MSG_NOTE
, vect_location
,
2791 "removing SLP instance operations starting from: ");
2792 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2793 SLP_TREE_SCALAR_STMTS
2794 (SLP_INSTANCE_TREE (instance
))[0], 0);
2795 vect_free_slp_instance (instance
);
2796 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2800 /* Mark all the statements that we want to vectorize as pure SLP and
2802 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2803 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2807 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2813 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo
),
2814 BB_VINFO_TARGET_COST_DATA (bb_vinfo
)))
2816 if (dump_enabled_p ())
2817 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2818 "not vectorized: bad operation in basic block.\n");
2824 /* Cost model: check if the vectorization is worthwhile. */
2825 if (!unlimited_cost_model (NULL
)
2826 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2828 if (dump_enabled_p ())
2829 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2830 "not vectorized: vectorization is not "
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_NOTE
, vect_location
,
2839 "Basic block will be vectorized using SLP\n");
2845 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2846 true if anything in the basic-block was vectorized. */
2849 vect_slp_bb (basic_block bb
)
2851 bb_vec_info bb_vinfo
;
2852 gimple_stmt_iterator gsi
;
2853 unsigned int vector_sizes
;
2854 bool any_vectorized
= false;
2856 if (dump_enabled_p ())
2857 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2859 /* Autodetect first vector size we try. */
2860 current_vector_size
= 0;
2861 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2863 gsi
= gsi_start_bb (bb
);
2867 if (gsi_end_p (gsi
))
2870 gimple_stmt_iterator region_begin
= gsi
;
2871 vec
<data_reference_p
> datarefs
= vNULL
;
2874 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
2876 gimple
*stmt
= gsi_stmt (gsi
);
2877 if (is_gimple_debug (stmt
))
2881 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
2882 vect_location
= gimple_location (stmt
);
2884 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
2888 /* Skip leading unhandled stmts. */
2889 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
2895 gimple_stmt_iterator region_end
= gsi
;
2897 bool vectorized
= false;
2899 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
2900 datarefs
, insns
, fatal
);
2902 && dbg_cnt (vect_slp
))
2904 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
2907 vect_schedule_slp (bb_vinfo
);
2909 if (dump_enabled_p ())
2910 dump_printf_loc (MSG_NOTE
, vect_location
,
2911 "basic block part vectorized\n");
2917 any_vectorized
|= vectorized
;
2919 vector_sizes
&= ~current_vector_size
;
2921 || vector_sizes
== 0
2922 || current_vector_size
== 0
2923 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2924 vector sizes will fail do not bother iterating. */
2927 if (gsi_end_p (region_end
))
2930 /* Skip the unhandled stmt. */
2933 /* And reset vector sizes. */
2934 current_vector_size
= 0;
2935 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2939 /* Try the next biggest vector size. */
2940 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2941 if (dump_enabled_p ())
2942 dump_printf_loc (MSG_NOTE
, vect_location
,
2943 "***** Re-trying analysis with "
2944 "vector size %d\n", current_vector_size
);
2951 return any_vectorized
;
2955 /* Return 1 if vector type of boolean constant which is OPNUM
2956 operand in statement STMT is a boolean vector. */
2959 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
2961 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2962 enum tree_code code
= gimple_expr_code (stmt
);
2965 enum vect_def_type dt
;
2967 /* For comparison and COND_EXPR type is chosen depending
2968 on the other comparison operand. */
2969 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2972 op
= gimple_assign_rhs1 (stmt
);
2974 op
= gimple_assign_rhs2 (stmt
);
2976 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
2980 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
2983 if (code
== COND_EXPR
)
2985 tree cond
= gimple_assign_rhs1 (stmt
);
2987 if (TREE_CODE (cond
) == SSA_NAME
)
2990 op
= TREE_OPERAND (cond
, 1);
2992 op
= TREE_OPERAND (cond
, 0);
2994 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
2998 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3001 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3005 /* For constant and loop invariant defs of SLP_NODE this function returns
3006 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3007 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3008 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3009 REDUC_INDEX is the index of the reduction operand in the statements, unless
3013 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3014 vec
<tree
> *vec_oprnds
,
3015 unsigned int op_num
, unsigned int number_of_vectors
)
3017 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3018 gimple
*stmt
= stmts
[0];
3019 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3023 unsigned j
, number_of_places_left_in_vector
;
3026 int group_size
= stmts
.length ();
3027 unsigned int vec_num
, i
;
3028 unsigned number_of_copies
= 1;
3030 voprnds
.create (number_of_vectors
);
3031 bool constant_p
, is_store
;
3032 tree neutral_op
= NULL
;
3033 enum tree_code code
= gimple_expr_code (stmt
);
3034 gimple_seq ctor_seq
= NULL
;
3036 /* Check if vector type is a boolean vector. */
3037 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3038 && vect_mask_constant_operand_p (stmt
, op_num
))
3040 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3042 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3043 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
3045 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3048 op
= gimple_assign_rhs1 (stmt
);
3055 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3056 created vectors. It is greater than 1 if unrolling is performed.
3058 For example, we have two scalar operands, s1 and s2 (e.g., group of
3059 strided accesses of size two), while NUNITS is four (i.e., four scalars
3060 of this type can be packed in a vector). The output vector will contain
3061 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3064 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3065 containing the operands.
3067 For example, NUNITS is four as before, and the group size is 8
3068 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3069 {s5, s6, s7, s8}. */
3071 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3073 number_of_places_left_in_vector
= nunits
;
3075 elts
= XALLOCAVEC (tree
, nunits
);
3076 bool place_after_defs
= false;
3077 for (j
= 0; j
< number_of_copies
; j
++)
3079 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3082 op
= gimple_assign_rhs1 (stmt
);
3089 tree cond
= gimple_assign_rhs1 (stmt
);
3090 if (TREE_CODE (cond
) == SSA_NAME
)
3091 op
= gimple_op (stmt
, op_num
+ 1);
3092 else if (op_num
== 0 || op_num
== 1)
3093 op
= TREE_OPERAND (cond
, op_num
);
3097 op
= gimple_assign_rhs2 (stmt
);
3099 op
= gimple_assign_rhs3 (stmt
);
3105 op
= gimple_call_arg (stmt
, op_num
);
3112 op
= gimple_op (stmt
, op_num
+ 1);
3113 /* Unlike the other binary operators, shifts/rotates have
3114 the shift count being int, instead of the same type as
3115 the lhs, so make sure the scalar is the right type if
3116 we are dealing with vectors of
3117 long long/long/short/char. */
3118 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3119 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3123 op
= gimple_op (stmt
, op_num
+ 1);
3128 /* Create 'vect_ = {op0,op1,...,opn}'. */
3129 number_of_places_left_in_vector
--;
3131 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3133 if (CONSTANT_CLASS_P (op
))
3135 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3137 /* Can't use VIEW_CONVERT_EXPR for booleans because
3138 of possibly different sizes of scalar value and
3140 if (integer_zerop (op
))
3141 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3142 else if (integer_onep (op
))
3143 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3148 op
= fold_unary (VIEW_CONVERT_EXPR
,
3149 TREE_TYPE (vector_type
), op
);
3150 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3154 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3156 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3159 = build_all_ones_cst (TREE_TYPE (vector_type
));
3161 = build_zero_cst (TREE_TYPE (vector_type
));
3162 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3163 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3169 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3172 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3175 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3179 elts
[number_of_places_left_in_vector
] = op
;
3180 if (!CONSTANT_CLASS_P (op
))
3182 if (TREE_CODE (orig_op
) == SSA_NAME
3183 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3184 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3185 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3186 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3187 place_after_defs
= true;
3189 if (number_of_places_left_in_vector
== 0)
3192 vec_cst
= build_vector (vector_type
, elts
);
3195 vec
<constructor_elt
, va_gc
> *v
;
3197 vec_alloc (v
, nunits
);
3198 for (k
= 0; k
< nunits
; ++k
)
3199 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
3200 vec_cst
= build_constructor (vector_type
, v
);
3203 gimple_stmt_iterator gsi
;
3204 if (place_after_defs
)
3207 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3208 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3211 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3212 if (ctor_seq
!= NULL
)
3214 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3215 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
3219 voprnds
.quick_push (init
);
3220 place_after_defs
= false;
3221 number_of_places_left_in_vector
= nunits
;
3227 /* Since the vectors are created in the reverse order, we should invert
3229 vec_num
= voprnds
.length ();
3230 for (j
= vec_num
; j
!= 0; j
--)
3232 vop
= voprnds
[j
- 1];
3233 vec_oprnds
->quick_push (vop
);
3238 /* In case that VF is greater than the unrolling factor needed for the SLP
3239 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3240 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3241 to replicate the vectors. */
3242 while (number_of_vectors
> vec_oprnds
->length ())
3244 tree neutral_vec
= NULL
;
3249 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3251 vec_oprnds
->quick_push (neutral_vec
);
3255 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3256 vec_oprnds
->quick_push (vop
);
3262 /* Get vectorized definitions from SLP_NODE that contains corresponding
3263 vectorized def-stmts. */
3266 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3269 gimple
*vec_def_stmt
;
3272 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3274 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3276 gcc_assert (vec_def_stmt
);
3277 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3278 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3280 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3281 vec_oprnds
->quick_push (vec_oprnd
);
3286 /* Get vectorized definitions for SLP_NODE.
3287 If the scalar definitions are loop invariants or constants, collect them and
3288 call vect_get_constant_vectors() to create vector stmts.
3289 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3290 must be stored in the corresponding child of SLP_NODE, and we call
3291 vect_get_slp_vect_defs () to retrieve them. */
3294 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3295 vec
<vec
<tree
> > *vec_oprnds
)
3298 int number_of_vects
= 0, i
;
3299 unsigned int child_index
= 0;
3300 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3301 slp_tree child
= NULL
;
3304 bool vectorized_defs
;
3306 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3307 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3309 /* For each operand we check if it has vectorized definitions in a child
3310 node or we need to create them (for invariants and constants). We
3311 check if the LHS of the first stmt of the next child matches OPRND.
3312 If it does, we found the correct child. Otherwise, we call
3313 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3314 to check this child node for the next operand. */
3315 vectorized_defs
= false;
3316 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3318 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3320 /* We have to check both pattern and original def, if available. */
3321 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3323 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3325 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3328 if (gimple_code (first_def
) == GIMPLE_PHI
)
3329 first_def_op
= gimple_phi_result (first_def
);
3331 first_def_op
= gimple_get_lhs (first_def
);
3332 if (operand_equal_p (oprnd
, first_def_op
, 0)
3334 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3336 /* The number of vector defs is determined by the number of
3337 vector statements in the node from which we get those
3339 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3340 vectorized_defs
= true;
3348 if (!vectorized_defs
)
3352 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3353 /* Number of vector stmts was calculated according to LHS in
3354 vect_schedule_slp_instance (), fix it by replacing LHS with
3355 RHS, if necessary. See vect_get_smallest_scalar_type () for
3357 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3359 if (rhs_size_unit
!= lhs_size_unit
)
3361 number_of_vects
*= rhs_size_unit
;
3362 number_of_vects
/= lhs_size_unit
;
3367 /* Allocate memory for vectorized defs. */
3369 vec_defs
.create (number_of_vects
);
3371 /* For reduction defs we call vect_get_constant_vectors (), since we are
3372 looking for initial loop invariant values. */
3373 if (vectorized_defs
)
3374 /* The defs are already vectorized. */
3375 vect_get_slp_vect_defs (child
, &vec_defs
);
3377 /* Build vectors from scalar defs. */
3378 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3381 vec_oprnds
->quick_push (vec_defs
);
3385 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3386 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3387 permute statements for the SLP node NODE of the SLP instance
3388 SLP_NODE_INSTANCE. */
3391 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3392 gimple_stmt_iterator
*gsi
, int vf
,
3393 slp_instance slp_node_instance
, bool analyze_only
,
3396 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3397 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3398 tree mask_element_type
= NULL_TREE
, mask_type
;
3399 int nunits
, vec_index
= 0;
3400 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3401 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3403 unsigned char *mask
;
3406 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3409 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3411 mode
= TYPE_MODE (vectype
);
3413 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3414 same size as the vector element being permuted. */
3415 mask_element_type
= lang_hooks
.types
.type_for_mode
3416 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3417 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3418 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3419 mask
= XALLOCAVEC (unsigned char, nunits
);
3421 /* Initialize the vect stmts of NODE to properly insert the generated
3424 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3425 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3426 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3428 /* Generate permutation masks for every NODE. Number of masks for each NODE
3429 is equal to GROUP_SIZE.
3430 E.g., we have a group of three nodes with three loads from the same
3431 location in each node, and the vector size is 4. I.e., we have a
3432 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3433 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3434 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3437 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3438 The last mask is illegal since we assume two operands for permute
3439 operation, and the mask element values can't be outside that range.
3440 Hence, the last mask must be converted into {2,5,5,5}.
3441 For the first two permutations we need the first and the second input
3442 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3443 we need the second and the third vectors: {b1,c1,a2,b2} and
3446 int vect_stmts_counter
= 0;
3448 int first_vec_index
= -1;
3449 int second_vec_index
= -1;
3453 for (int j
= 0; j
< vf
; j
++)
3455 for (int k
= 0; k
< group_size
; k
++)
3457 int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3458 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3459 vec_index
= i
/ nunits
;
3460 mask_element
= i
% nunits
;
3461 if (vec_index
== first_vec_index
3462 || first_vec_index
== -1)
3464 first_vec_index
= vec_index
;
3466 else if (vec_index
== second_vec_index
3467 || second_vec_index
== -1)
3469 second_vec_index
= vec_index
;
3470 mask_element
+= nunits
;
3474 if (dump_enabled_p ())
3476 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3477 "permutation requires at "
3478 "least three vectors ");
3479 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3485 gcc_assert (mask_element
>= 0
3486 && mask_element
< 2 * nunits
);
3487 if (mask_element
!= index
)
3489 mask
[index
++] = mask_element
;
3491 if (index
== nunits
)
3494 && ! can_vec_perm_p (mode
, false, mask
))
3496 if (dump_enabled_p ())
3498 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3500 "unsupported vect permute { ");
3501 for (i
= 0; i
< nunits
; ++i
)
3502 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ", mask
[i
]);
3503 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3513 tree mask_vec
= NULL_TREE
;
3517 tree
*mask_elts
= XALLOCAVEC (tree
, nunits
);
3518 for (int l
= 0; l
< nunits
; ++l
)
3519 mask_elts
[l
] = build_int_cst (mask_element_type
,
3521 mask_vec
= build_vector (mask_type
, mask_elts
);
3524 if (second_vec_index
== -1)
3525 second_vec_index
= first_vec_index
;
3527 /* Generate the permute statement if necessary. */
3528 tree first_vec
= dr_chain
[first_vec_index
];
3529 tree second_vec
= dr_chain
[second_vec_index
];
3534 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3536 perm_dest
= make_ssa_name (perm_dest
);
3537 perm_stmt
= gimple_build_assign (perm_dest
,
3539 first_vec
, second_vec
,
3541 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3544 /* If mask was NULL_TREE generate the requested
3545 identity transform. */
3546 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3548 /* Store the vector statement in NODE. */
3549 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3553 first_vec_index
= -1;
3554 second_vec_index
= -1;
3565 /* Vectorize SLP instance tree in postorder. */
3568 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3569 unsigned int vectorization_factor
)
3572 bool grouped_store
, is_store
;
3573 gimple_stmt_iterator si
;
3574 stmt_vec_info stmt_info
;
3575 unsigned int vec_stmts_size
, nunits
, group_size
;
3580 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3583 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3584 vect_schedule_slp_instance (child
, instance
, vectorization_factor
);
3586 /* Push SLP node def-type to stmts. */
3587 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3588 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3589 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3590 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3592 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3593 stmt_info
= vinfo_for_stmt (stmt
);
3595 /* VECTYPE is the type of the destination. */
3596 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3597 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
3598 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3600 /* For each SLP instance calculate number of vector stmts to be created
3601 for the scalar stmts in each node of the SLP tree. Number of vector
3602 elements in one vector iteration is the number of scalar elements in
3603 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3605 Unless this is a SLP reduction in which case the number of vector
3606 stmts is equal to the number of vector stmts of the children. */
3607 if (GROUP_FIRST_ELEMENT (stmt_info
)
3608 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3609 vec_stmts_size
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
3611 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
3613 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3615 SLP_TREE_VEC_STMTS (node
).create (vec_stmts_size
);
3616 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
3619 if (dump_enabled_p ())
3621 dump_printf_loc (MSG_NOTE
,vect_location
,
3622 "------>vectorizing SLP node starting from: ");
3623 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3626 /* Vectorized stmts go before the last scalar stmt which is where
3627 all uses are ready. */
3628 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3630 /* Mark the first element of the reduction chain as reduction to properly
3631 transform the node. In the analysis phase only the last element of the
3632 chain is marked as reduction. */
3633 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3634 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3636 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3637 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3640 /* Handle two-operation SLP nodes by vectorizing the group with
3641 both operations and then performing a merge. */
3642 if (SLP_TREE_TWO_OPERATORS (node
))
3644 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3645 enum tree_code ocode
= ERROR_MARK
;
3647 unsigned char *mask
= XALLOCAVEC (unsigned char, group_size
);
3648 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3649 if (gimple_assign_rhs_code (ostmt
) != code0
)
3652 ocode
= gimple_assign_rhs_code (ostmt
);
3656 if (ocode
!= ERROR_MARK
)
3661 tree tmask
= NULL_TREE
;
3662 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3663 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3664 SLP_TREE_VEC_STMTS (node
).truncate (0);
3665 gimple_assign_set_rhs_code (stmt
, ocode
);
3666 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3667 gimple_assign_set_rhs_code (stmt
, code0
);
3668 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3669 SLP_TREE_VEC_STMTS (node
).truncate (0);
3670 tree meltype
= build_nonstandard_integer_type
3671 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3672 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3674 for (j
= 0; j
< v0
.length (); ++j
)
3676 tree
*melts
= XALLOCAVEC (tree
, TYPE_VECTOR_SUBPARTS (vectype
));
3677 for (l
= 0; l
< TYPE_VECTOR_SUBPARTS (vectype
); ++l
)
3679 if (k
>= group_size
)
3681 melts
[l
] = build_int_cst
3682 (meltype
, mask
[k
++] * TYPE_VECTOR_SUBPARTS (vectype
) + l
);
3684 tmask
= build_vector (mvectype
, melts
);
3686 /* ??? Not all targets support a VEC_PERM_EXPR with a
3687 constant mask that would translate to a vec_merge RTX
3688 (with their vec_perm_const_ok). We can either not
3689 vectorize in that case or let veclower do its job.
3690 Unfortunately that isn't too great and at least for
3691 plus/minus we'd eventually like to match targets
3692 vector addsub instructions. */
3694 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3696 gimple_assign_lhs (v0
[j
]),
3697 gimple_assign_lhs (v1
[j
]), tmask
);
3698 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3699 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3706 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3708 /* Restore stmt def-types. */
3709 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3710 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3711 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3712 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3717 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3718 For loop vectorization this is done in vectorizable_call, but for SLP
3719 it needs to be deferred until end of vect_schedule_slp, because multiple
3720 SLP instances may refer to the same scalar stmt. */
3723 vect_remove_slp_scalar_calls (slp_tree node
)
3725 gimple
*stmt
, *new_stmt
;
3726 gimple_stmt_iterator gsi
;
3730 stmt_vec_info stmt_info
;
3732 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3735 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3736 vect_remove_slp_scalar_calls (child
);
3738 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3740 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3742 stmt_info
= vinfo_for_stmt (stmt
);
3743 if (stmt_info
== NULL
3744 || is_pattern_stmt_p (stmt_info
)
3745 || !PURE_SLP_STMT (stmt_info
))
3747 lhs
= gimple_call_lhs (stmt
);
3748 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3749 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3750 set_vinfo_for_stmt (stmt
, NULL
);
3751 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3752 gsi
= gsi_for_stmt (stmt
);
3753 gsi_replace (&gsi
, new_stmt
, false);
3754 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3758 /* Generate vector code for all SLP instances in the loop/basic block. */
3761 vect_schedule_slp (vec_info
*vinfo
)
3763 vec
<slp_instance
> slp_instances
;
3764 slp_instance instance
;
3766 bool is_store
= false;
3768 slp_instances
= vinfo
->slp_instances
;
3769 if (is_a
<loop_vec_info
> (vinfo
))
3770 vf
= as_a
<loop_vec_info
> (vinfo
)->vectorization_factor
;
3774 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3776 /* Schedule the tree of INSTANCE. */
3777 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3779 if (dump_enabled_p ())
3780 dump_printf_loc (MSG_NOTE
, vect_location
,
3781 "vectorizing stmts using SLP.\n");
3784 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3786 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3789 gimple_stmt_iterator gsi
;
3791 /* Remove scalar call stmts. Do not do this for basic-block
3792 vectorization as not all uses may be vectorized.
3793 ??? Why should this be necessary? DCE should be able to
3794 remove the stmts itself.
3795 ??? For BB vectorization we can as well remove scalar
3796 stmts starting from the SLP tree root if they have no
3798 if (is_a
<loop_vec_info
> (vinfo
))
3799 vect_remove_slp_scalar_calls (root
);
3801 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3802 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3804 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3807 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3808 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3809 /* Free the attached stmt_vec_info and remove the stmt. */
3810 gsi
= gsi_for_stmt (store
);
3811 unlink_stmt_vdef (store
);
3812 gsi_remove (&gsi
, true);
3813 release_defs (store
);
3814 free_stmt_vec_info (store
);