1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
43 /* Pattern recognition functions */
44 static gimple
*vect_recog_widen_sum_pattern (vec
<gimple
*> *, tree
*,
46 static gimple
*vect_recog_widen_mult_pattern (vec
<gimple
*> *, tree
*,
48 static gimple
*vect_recog_dot_prod_pattern (vec
<gimple
*> *, tree
*,
50 static gimple
*vect_recog_sad_pattern (vec
<gimple
*> *, tree
*,
52 static gimple
*vect_recog_pow_pattern (vec
<gimple
*> *, tree
*, tree
*);
53 static gimple
*vect_recog_over_widening_pattern (vec
<gimple
*> *, tree
*,
55 static gimple
*vect_recog_widen_shift_pattern (vec
<gimple
*> *,
57 static gimple
*vect_recog_rotate_pattern (vec
<gimple
*> *, tree
*, tree
*);
58 static gimple
*vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *,
60 static gimple
*vect_recog_divmod_pattern (vec
<gimple
*> *,
63 static gimple
*vect_recog_mult_pattern (vec
<gimple
*> *,
66 static gimple
*vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *,
68 static gimple
*vect_recog_bool_pattern (vec
<gimple
*> *, tree
*, tree
*);
69 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
70 vect_recog_widen_mult_pattern
,
71 vect_recog_widen_sum_pattern
,
72 vect_recog_dot_prod_pattern
,
73 vect_recog_sad_pattern
,
74 vect_recog_pow_pattern
,
75 vect_recog_widen_shift_pattern
,
76 vect_recog_over_widening_pattern
,
77 vect_recog_rotate_pattern
,
78 vect_recog_vector_vector_shift_pattern
,
79 vect_recog_divmod_pattern
,
80 vect_recog_mult_pattern
,
81 vect_recog_mixed_size_cond_pattern
,
82 vect_recog_bool_pattern
};
85 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
87 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
92 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
94 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
95 append_pattern_def_seq (stmt_info
, stmt
);
98 /* Check whether STMT2 is in the same loop or basic block as STMT1.
99 Which of the two applies depends on whether we're currently doing
100 loop-based or basic-block-based vectorization, as determined by
101 the vinfo_for_stmt for STMT1 (which must be defined).
103 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
104 to be defined as well. */
107 vect_same_loop_or_bb_p (gimple
*stmt1
, gimple
*stmt2
)
109 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
110 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
111 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
113 if (!gimple_bb (stmt2
))
118 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
119 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
124 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
125 || gimple_code (stmt2
) == GIMPLE_PHI
)
129 gcc_assert (vinfo_for_stmt (stmt2
));
133 /* If the LHS of DEF_STMT has a single use, and that statement is
134 in the same loop or basic block, return it. */
137 vect_single_imm_use (gimple
*def_stmt
)
139 tree lhs
= gimple_assign_lhs (def_stmt
);
143 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
146 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
152 /* Check whether NAME, an ssa-name used in USE_STMT,
153 is a result of a type promotion, such that:
154 DEF_STMT: NAME = NOP (name0)
155 If CHECK_SIGN is TRUE, check that either both types are signed or both are
159 type_conversion_p (tree name
, gimple
*use_stmt
, bool check_sign
,
160 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
162 gimple
*dummy_gimple
;
163 stmt_vec_info stmt_vinfo
;
164 tree type
= TREE_TYPE (name
);
166 enum vect_def_type dt
;
168 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
169 if (!vect_is_simple_use (name
, stmt_vinfo
->vinfo
, def_stmt
, &dt
))
172 if (dt
!= vect_internal_def
173 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
179 if (!is_gimple_assign (*def_stmt
))
182 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
185 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
187 *orig_type
= TREE_TYPE (oprnd0
);
188 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
189 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
192 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
197 if (!vect_is_simple_use (oprnd0
, stmt_vinfo
->vinfo
, &dummy_gimple
, &dt
))
203 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
204 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
207 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
209 return make_temp_ssa_name (type
, stmt
, "patt");
212 /* Function vect_recog_dot_prod_pattern
214 Try to find the following pattern:
220 sum_0 = phi <init, sum_1>
223 S3 x_T = (TYPE1) x_t;
224 S4 y_T = (TYPE1) y_t;
226 [S6 prod = (TYPE2) prod; #optional]
227 S7 sum_1 = prod + sum_0;
229 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
230 same size of 'TYPE1' or bigger. This is a special case of a reduction
235 * STMTS: Contains a stmt from which the pattern search begins. In the
236 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
241 * TYPE_IN: The type of the input arguments to the pattern.
243 * TYPE_OUT: The type of the output of this pattern.
245 * Return value: A new stmt that will be used to replace the sequence of
246 stmts that constitute the pattern. In this case it will be:
247 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
249 Note: The dot-prod idiom is a widening reduction pattern that is
250 vectorized without preserving all the intermediate results. It
251 produces only N/2 (widened) results (by summing up pairs of
252 intermediate results) rather than all N results. Therefore, we
253 cannot allow this pattern when we want to get all the results and in
254 the correct order (as is the case when this computation is in an
255 inner-loop nested in an outer-loop that us being vectorized). */
258 vect_recog_dot_prod_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
261 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
263 tree oprnd00
, oprnd01
;
264 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
265 tree type
, half_type
;
266 gimple
*pattern_stmt
;
268 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
276 loop
= LOOP_VINFO_LOOP (loop_info
);
278 /* We don't allow changing the order of the computation in the inner-loop
279 when doing outer-loop vectorization. */
280 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
283 if (!is_gimple_assign (last_stmt
))
286 type
= gimple_expr_type (last_stmt
);
288 /* Look for the following pattern
292 DDPROD = (TYPE2) DPROD;
293 sum_1 = DDPROD + sum_0;
295 - DX is double the size of X
296 - DY is double the size of Y
297 - DX, DY, DPROD all have the same type
298 - sum is the same size of DPROD or bigger
299 - sum has been recognized as a reduction variable.
301 This is equivalent to:
302 DPROD = X w* Y; #widen mult
303 sum_1 = DPROD w+ sum_0; #widen summation
305 DPROD = X w* Y; #widen mult
306 sum_1 = DPROD + sum_0; #summation
309 /* Starting from LAST_STMT, follow the defs of its uses in search
310 of the above pattern. */
312 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
315 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
317 /* Has been detected as widening-summation? */
319 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
320 type
= gimple_expr_type (stmt
);
321 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
323 oprnd0
= gimple_assign_rhs1 (stmt
);
324 oprnd1
= gimple_assign_rhs2 (stmt
);
325 half_type
= TREE_TYPE (oprnd0
);
331 oprnd0
= gimple_assign_rhs1 (last_stmt
);
332 oprnd1
= gimple_assign_rhs2 (last_stmt
);
333 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
334 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
338 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
343 oprnd0
= gimple_assign_rhs1 (stmt
);
349 /* So far so good. Since last_stmt was detected as a (summation) reduction,
350 we know that oprnd1 is the reduction variable (defined by a loop-header
351 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
352 Left to check that oprnd0 is defined by a (widen_)mult_expr */
353 if (TREE_CODE (oprnd0
) != SSA_NAME
)
356 prod_type
= half_type
;
357 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
359 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
360 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
363 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
364 inside the loop (in case we are analyzing an outer-loop). */
365 if (!is_gimple_assign (stmt
))
367 stmt_vinfo
= vinfo_for_stmt (stmt
);
368 gcc_assert (stmt_vinfo
);
369 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
371 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
373 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
375 /* Has been detected as a widening multiplication? */
377 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
378 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
380 stmt_vinfo
= vinfo_for_stmt (stmt
);
381 gcc_assert (stmt_vinfo
);
382 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
383 oprnd00
= gimple_assign_rhs1 (stmt
);
384 oprnd01
= gimple_assign_rhs2 (stmt
);
385 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
386 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
390 tree half_type0
, half_type1
;
394 oprnd0
= gimple_assign_rhs1 (stmt
);
395 oprnd1
= gimple_assign_rhs2 (stmt
);
396 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
397 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
399 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
403 oprnd00
= gimple_assign_rhs1 (def_stmt
);
404 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
408 oprnd01
= gimple_assign_rhs1 (def_stmt
);
409 if (!types_compatible_p (half_type0
, half_type1
))
411 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
415 half_type
= TREE_TYPE (oprnd00
);
416 *type_in
= half_type
;
419 /* Pattern detected. Create a stmt to be used to replace the pattern: */
420 var
= vect_recog_temp_ssa_var (type
, NULL
);
421 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
422 oprnd00
, oprnd01
, oprnd1
);
424 if (dump_enabled_p ())
426 dump_printf_loc (MSG_NOTE
, vect_location
,
427 "vect_recog_dot_prod_pattern: detected: ");
428 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
429 dump_printf (MSG_NOTE
, "\n");
436 /* Function vect_recog_sad_pattern
438 Try to find the following Sum of Absolute Difference (SAD) pattern:
441 signed TYPE1 diff, abs_diff;
444 sum_0 = phi <init, sum_1>
447 S3 x_T = (TYPE1) x_t;
448 S4 y_T = (TYPE1) y_t;
450 S6 abs_diff = ABS_EXPR <diff>;
451 [S7 abs_diff = (TYPE2) abs_diff; #optional]
452 S8 sum_1 = abs_diff + sum_0;
454 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
455 same size of 'TYPE1' or bigger. This is a special case of a reduction
460 * STMTS: Contains a stmt from which the pattern search begins. In the
461 example, when this function is called with S8, the pattern
462 {S3,S4,S5,S6,S7,S8} will be detected.
466 * TYPE_IN: The type of the input arguments to the pattern.
468 * TYPE_OUT: The type of the output of this pattern.
470 * Return value: A new stmt that will be used to replace the sequence of
471 stmts that constitute the pattern. In this case it will be:
472 SAD_EXPR <x_t, y_t, sum_0>
476 vect_recog_sad_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
479 gimple
*last_stmt
= (*stmts
)[0];
480 tree sad_oprnd0
, sad_oprnd1
;
481 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
483 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
490 loop
= LOOP_VINFO_LOOP (loop_info
);
492 /* We don't allow changing the order of the computation in the inner-loop
493 when doing outer-loop vectorization. */
494 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
497 if (!is_gimple_assign (last_stmt
))
500 tree sum_type
= gimple_expr_type (last_stmt
);
502 /* Look for the following pattern
506 DAD = ABS_EXPR <DDIFF>;
507 DDPROD = (TYPE2) DPROD;
510 - DX is at least double the size of X
511 - DY is at least double the size of Y
512 - DX, DY, DDIFF, DAD all have the same type
513 - sum is the same size of DAD or bigger
514 - sum has been recognized as a reduction variable.
516 This is equivalent to:
517 DDIFF = X w- Y; #widen sub
518 DAD = ABS_EXPR <DDIFF>;
519 sum_1 = DAD w+ sum_0; #widen summation
521 DDIFF = X w- Y; #widen sub
522 DAD = ABS_EXPR <DDIFF>;
523 sum_1 = DAD + sum_0; #summation
526 /* Starting from LAST_STMT, follow the defs of its uses in search
527 of the above pattern. */
529 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
532 tree plus_oprnd0
, plus_oprnd1
;
534 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
536 /* Has been detected as widening-summation? */
538 gimple
*stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
539 sum_type
= gimple_expr_type (stmt
);
540 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
542 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
543 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
544 half_type
= TREE_TYPE (plus_oprnd0
);
550 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
551 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
552 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
553 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
556 /* The type conversion could be promotion, demotion,
557 or just signed -> unsigned. */
558 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
559 &half_type
, &def_stmt
, &promotion
))
560 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
562 half_type
= sum_type
;
565 /* So far so good. Since last_stmt was detected as a (summation) reduction,
566 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
567 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
568 Then check that plus_oprnd0 is defined by an abs_expr. */
570 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
573 tree abs_type
= half_type
;
574 gimple
*abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
576 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
577 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
580 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
581 inside the loop (in case we are analyzing an outer-loop). */
582 if (!is_gimple_assign (abs_stmt
))
585 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
586 gcc_assert (abs_stmt_vinfo
);
587 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
589 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
592 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
593 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
595 if (TYPE_UNSIGNED (abs_type
))
598 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
600 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
603 gimple
*diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
605 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
606 if (!gimple_bb (diff_stmt
)
607 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
610 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
611 inside the loop (in case we are analyzing an outer-loop). */
612 if (!is_gimple_assign (diff_stmt
))
615 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
616 gcc_assert (diff_stmt_vinfo
);
617 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
619 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
622 tree half_type0
, half_type1
;
625 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
626 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
628 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
629 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
631 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
632 &half_type0
, &def_stmt
, &promotion
)
635 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
637 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
638 &half_type1
, &def_stmt
, &promotion
)
641 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
643 if (!types_compatible_p (half_type0
, half_type1
))
645 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
646 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
649 *type_in
= TREE_TYPE (sad_oprnd0
);
650 *type_out
= sum_type
;
652 /* Pattern detected. Create a stmt to be used to replace the pattern: */
653 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
654 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
655 sad_oprnd1
, plus_oprnd1
);
657 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE
, vect_location
,
660 "vect_recog_sad_pattern: detected: ");
661 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
662 dump_printf (MSG_NOTE
, "\n");
669 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
672 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
673 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
675 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
676 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
677 that satisfies the above restrictions, we can perform a widening opeartion
678 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
679 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
682 vect_handle_widen_op_by_const (gimple
*stmt
, enum tree_code code
,
683 tree const_oprnd
, tree
*oprnd
,
684 gimple
**wstmt
, tree type
,
685 tree
*half_type
, gimple
*def_stmt
)
687 tree new_type
, new_oprnd
;
689 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
692 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
693 || (code
== LSHIFT_EXPR
694 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
696 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
698 /* CONST_OPRND is a constant of HALF_TYPE. */
699 *oprnd
= gimple_assign_rhs1 (def_stmt
);
703 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
706 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
709 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
710 a type 2 times bigger than HALF_TYPE. */
711 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
712 TYPE_UNSIGNED (type
));
713 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
714 || (code
== LSHIFT_EXPR
715 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
718 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
719 *oprnd
= gimple_assign_rhs1 (def_stmt
);
720 new_oprnd
= make_ssa_name (new_type
);
721 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
724 *half_type
= new_type
;
729 /* Function vect_recog_widen_mult_pattern
731 Try to find the following pattern:
735 TYPE a_T, b_T, prod_T;
741 S5 prod_T = a_T * b_T;
743 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
745 Also detect unsigned cases:
749 unsigned TYPE u_prod_T;
750 TYPE a_T, b_T, prod_T;
756 S5 prod_T = a_T * b_T;
757 S6 u_prod_T = (unsigned TYPE) prod_T;
759 and multiplication by constants:
766 S5 prod_T = a_T * CONST;
768 A special case of multiplication by constants is when 'TYPE' is 4 times
769 bigger than 'type', but CONST fits an intermediate type 2 times smaller
770 than 'TYPE'. In that case we create an additional pattern stmt for S3
771 to create a variable of the intermediate type, and perform widen-mult
772 on the intermediate type as well:
776 TYPE a_T, prod_T, prod_T';
780 '--> a_it = (interm_type) a_t;
781 S5 prod_T = a_T * CONST;
782 '--> prod_T' = a_it w* CONST;
786 * STMTS: Contains a stmt from which the pattern search begins. In the
787 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
788 is detected. In case of unsigned widen-mult, the original stmt (S5) is
789 replaced with S6 in STMTS. In case of multiplication by a constant
790 of an intermediate type (the last case above), STMTS also contains S3
791 (inserted before S5).
795 * TYPE_IN: The type of the input arguments to the pattern.
797 * TYPE_OUT: The type of the output of this pattern.
799 * Return value: A new stmt that will be used to replace the sequence of
800 stmts that constitute the pattern. In this case it will be:
801 WIDEN_MULT <a_t, b_t>
802 If the result of WIDEN_MULT needs to be converted to a larger type, the
803 returned stmt will be this type conversion stmt.
807 vect_recog_widen_mult_pattern (vec
<gimple
*> *stmts
,
808 tree
*type_in
, tree
*type_out
)
810 gimple
*last_stmt
= stmts
->pop ();
811 gimple
*def_stmt0
, *def_stmt1
;
813 tree type
, half_type0
, half_type1
;
814 gimple
*new_stmt
= NULL
, *pattern_stmt
= NULL
;
815 tree vectype
, vecitype
;
817 enum tree_code dummy_code
;
823 if (!is_gimple_assign (last_stmt
))
826 type
= gimple_expr_type (last_stmt
);
828 /* Starting from LAST_STMT, follow the defs of its uses in search
829 of the above pattern. */
831 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
834 oprnd0
= gimple_assign_rhs1 (last_stmt
);
835 oprnd1
= gimple_assign_rhs2 (last_stmt
);
836 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
837 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
840 /* Check argument 0. */
841 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
845 /* Check argument 1. */
846 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
847 &def_stmt1
, &promotion
);
849 if (op1_ok
&& promotion
)
851 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
852 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
856 if (TREE_CODE (oprnd1
) == INTEGER_CST
857 && TREE_CODE (half_type0
) == INTEGER_TYPE
858 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
859 &oprnd0
, &new_stmt
, type
,
860 &half_type0
, def_stmt0
))
862 half_type1
= half_type0
;
863 oprnd1
= fold_convert (half_type1
, oprnd1
);
869 /* If the two arguments have different sizes, convert the one with
870 the smaller type into the larger type. */
871 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
873 /* If we already used up the single-stmt slot give up. */
878 gimple
*def_stmt
= NULL
;
880 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
882 def_stmt
= def_stmt0
;
883 half_type0
= half_type1
;
888 def_stmt
= def_stmt1
;
889 half_type1
= half_type0
;
893 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
894 tree new_oprnd
= make_ssa_name (half_type0
);
895 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
899 /* Handle unsigned case. Look for
900 S6 u_prod_T = (unsigned TYPE) prod_T;
901 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
902 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
908 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
911 use_stmt
= vect_single_imm_use (last_stmt
);
912 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
913 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
916 use_lhs
= gimple_assign_lhs (use_stmt
);
917 use_type
= TREE_TYPE (use_lhs
);
918 if (!INTEGRAL_TYPE_P (use_type
)
919 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
920 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
924 last_stmt
= use_stmt
;
927 if (!types_compatible_p (half_type0
, half_type1
))
930 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
931 to get an intermediate result of type ITYPE. In this case we need
932 to build a statement to convert this intermediate result to type TYPE. */
934 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
935 itype
= build_nonstandard_integer_type
936 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
937 TYPE_UNSIGNED (type
));
939 /* Pattern detected. */
940 if (dump_enabled_p ())
941 dump_printf_loc (MSG_NOTE
, vect_location
,
942 "vect_recog_widen_mult_pattern: detected:\n");
944 /* Check target support */
945 vectype
= get_vectype_for_scalar_type (half_type0
);
946 vecitype
= get_vectype_for_scalar_type (itype
);
949 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
951 &dummy_code
, &dummy_code
,
952 &dummy_int
, &dummy_vec
))
956 *type_out
= get_vectype_for_scalar_type (type
);
958 /* Pattern supported. Create a stmt to be used to replace the pattern: */
959 var
= vect_recog_temp_ssa_var (itype
, NULL
);
960 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
962 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
963 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
965 /* If the original two operands have different sizes, we may need to convert
966 the smaller one into the larget type. If this is the case, at this point
967 the new stmt is already built. */
970 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
971 stmt_vec_info new_stmt_info
972 = new_stmt_vec_info (new_stmt
, stmt_vinfo
->vinfo
);
973 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
974 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
977 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
978 the result of the widen-mult operation into type TYPE. */
981 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
982 stmt_vec_info pattern_stmt_info
983 = new_stmt_vec_info (pattern_stmt
, stmt_vinfo
->vinfo
);
984 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
985 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
986 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
988 gimple_assign_lhs (pattern_stmt
));
991 if (dump_enabled_p ())
992 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
994 stmts
->safe_push (last_stmt
);
999 /* Function vect_recog_pow_pattern
1001 Try to find the following pattern:
1005 with POW being one of pow, powf, powi, powif and N being
1010 * LAST_STMT: A stmt from which the pattern search begins.
1014 * TYPE_IN: The type of the input arguments to the pattern.
1016 * TYPE_OUT: The type of the output of this pattern.
1018 * Return value: A new stmt that will be used to replace the sequence of
1019 stmts that constitute the pattern. In this case it will be:
1026 vect_recog_pow_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1029 gimple
*last_stmt
= (*stmts
)[0];
1030 tree fn
, base
, exp
= NULL
;
1034 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1037 fn
= gimple_call_fndecl (last_stmt
);
1038 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1041 switch (DECL_FUNCTION_CODE (fn
))
1043 case BUILT_IN_POWIF
:
1047 base
= gimple_call_arg (last_stmt
, 0);
1048 exp
= gimple_call_arg (last_stmt
, 1);
1049 if (TREE_CODE (exp
) != REAL_CST
1050 && TREE_CODE (exp
) != INTEGER_CST
)
1058 /* We now have a pow or powi builtin function call with a constant
1061 *type_out
= NULL_TREE
;
1063 /* Catch squaring. */
1064 if ((tree_fits_shwi_p (exp
)
1065 && tree_to_shwi (exp
) == 2)
1066 || (TREE_CODE (exp
) == REAL_CST
1067 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1069 *type_in
= TREE_TYPE (base
);
1071 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1072 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1076 /* Catch square root. */
1077 if (TREE_CODE (exp
) == REAL_CST
1078 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1080 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1081 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1084 gcall
*stmt
= gimple_build_call (newfn
, 1, base
);
1085 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1088 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1089 gimple_call_set_lhs (stmt
, var
);
1099 /* Function vect_recog_widen_sum_pattern
1101 Try to find the following pattern:
1104 TYPE x_T, sum = init;
1106 sum_0 = phi <init, sum_1>
1108 S2 x_T = (TYPE) x_t;
1109 S3 sum_1 = x_T + sum_0;
1111 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1112 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1113 a special case of a reduction computation.
1117 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1118 when this function is called with S3, the pattern {S2,S3} will be detected.
1122 * TYPE_IN: The type of the input arguments to the pattern.
1124 * TYPE_OUT: The type of the output of this pattern.
1126 * Return value: A new stmt that will be used to replace the sequence of
1127 stmts that constitute the pattern. In this case it will be:
1128 WIDEN_SUM <x_t, sum_0>
1130 Note: The widening-sum idiom is a widening reduction pattern that is
1131 vectorized without preserving all the intermediate results. It
1132 produces only N/2 (widened) results (by summing up pairs of
1133 intermediate results) rather than all N results. Therefore, we
1134 cannot allow this pattern when we want to get all the results and in
1135 the correct order (as is the case when this computation is in an
1136 inner-loop nested in an outer-loop that us being vectorized). */
1139 vect_recog_widen_sum_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1142 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
1143 tree oprnd0
, oprnd1
;
1144 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1145 tree type
, half_type
;
1146 gimple
*pattern_stmt
;
1147 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1155 loop
= LOOP_VINFO_LOOP (loop_info
);
1157 /* We don't allow changing the order of the computation in the inner-loop
1158 when doing outer-loop vectorization. */
1159 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1162 if (!is_gimple_assign (last_stmt
))
1165 type
= gimple_expr_type (last_stmt
);
1167 /* Look for the following pattern
1170 In which DX is at least double the size of X, and sum_1 has been
1171 recognized as a reduction variable.
1174 /* Starting from LAST_STMT, follow the defs of its uses in search
1175 of the above pattern. */
1177 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1180 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1181 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1182 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1183 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1186 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1187 we know that oprnd1 is the reduction variable (defined by a loop-header
1188 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1189 Left to check that oprnd0 is defined by a cast from type 'type' to type
1192 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1197 oprnd0
= gimple_assign_rhs1 (stmt
);
1198 *type_in
= half_type
;
1201 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1202 var
= vect_recog_temp_ssa_var (type
, NULL
);
1203 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1205 if (dump_enabled_p ())
1207 dump_printf_loc (MSG_NOTE
, vect_location
,
1208 "vect_recog_widen_sum_pattern: detected: ");
1209 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1210 dump_printf (MSG_NOTE
, "\n");
1213 return pattern_stmt
;
1217 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1220 STMT - a statement to check.
1221 DEF - we support operations with two operands, one of which is constant.
1222 The other operand can be defined by a demotion operation, or by a
1223 previous statement in a sequence of over-promoted operations. In the
1224 later case DEF is used to replace that operand. (It is defined by a
1225 pattern statement we created for the previous statement in the
1229 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1230 NULL, it's the type of DEF.
1231 STMTS - additional pattern statements. If a pattern statement (type
1232 conversion) is created in this function, its original statement is
1236 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1237 operands to use in the new pattern statement for STMT (will be created
1238 in vect_recog_over_widening_pattern ()).
1239 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1240 statements for STMT: the first one is a type promotion and the second
1241 one is the operation itself. We return the type promotion statement
1242 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1243 the second pattern statement. */
1246 vect_operation_fits_smaller_type (gimple
*stmt
, tree def
, tree
*new_type
,
1247 tree
*op0
, tree
*op1
, gimple
**new_def_stmt
,
1248 vec
<gimple
*> *stmts
)
1250 enum tree_code code
;
1251 tree const_oprnd
, oprnd
;
1252 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1253 gimple
*def_stmt
, *new_stmt
;
1259 *new_def_stmt
= NULL
;
1261 if (!is_gimple_assign (stmt
))
1264 code
= gimple_assign_rhs_code (stmt
);
1265 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1266 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1269 oprnd
= gimple_assign_rhs1 (stmt
);
1270 const_oprnd
= gimple_assign_rhs2 (stmt
);
1271 type
= gimple_expr_type (stmt
);
1273 if (TREE_CODE (oprnd
) != SSA_NAME
1274 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1277 /* If oprnd has other uses besides that in stmt we cannot mark it
1278 as being part of a pattern only. */
1279 if (!has_single_use (oprnd
))
1282 /* If we are in the middle of a sequence, we use DEF from a previous
1283 statement. Otherwise, OPRND has to be a result of type promotion. */
1286 half_type
= *new_type
;
1292 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1295 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1299 /* Can we perform the operation on a smaller type? */
1305 if (!int_fits_type_p (const_oprnd
, half_type
))
1307 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1308 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1311 interm_type
= build_nonstandard_integer_type (
1312 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1313 if (!int_fits_type_p (const_oprnd
, interm_type
))
1320 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1321 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1324 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1325 (e.g., if the original value was char, the shift amount is at most 8
1326 if we want to use short). */
1327 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1330 interm_type
= build_nonstandard_integer_type (
1331 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1333 if (!vect_supportable_shift (code
, interm_type
))
1339 if (vect_supportable_shift (code
, half_type
))
1342 /* Try intermediate type - HALF_TYPE is not supported. */
1343 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1346 interm_type
= build_nonstandard_integer_type (
1347 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1349 if (!vect_supportable_shift (code
, interm_type
))
1358 /* There are four possible cases:
1359 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1360 the first statement in the sequence)
1361 a. The original, HALF_TYPE, is not enough - we replace the promotion
1362 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1363 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1365 2. OPRND is defined by a pattern statement we created.
1366 a. Its type is not sufficient for the operation, we create a new stmt:
1367 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1368 this statement in NEW_DEF_STMT, and it is later put in
1369 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1370 b. OPRND is good to use in the new statement. */
1375 /* Replace the original type conversion HALF_TYPE->TYPE with
1376 HALF_TYPE->INTERM_TYPE. */
1377 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1379 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1380 /* Check if the already created pattern stmt is what we need. */
1381 if (!is_gimple_assign (new_stmt
)
1382 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1383 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1386 stmts
->safe_push (def_stmt
);
1387 oprnd
= gimple_assign_lhs (new_stmt
);
1391 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1392 oprnd
= gimple_assign_rhs1 (def_stmt
);
1393 new_oprnd
= make_ssa_name (interm_type
);
1394 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1395 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1396 stmts
->safe_push (def_stmt
);
1402 /* Retrieve the operand before the type promotion. */
1403 oprnd
= gimple_assign_rhs1 (def_stmt
);
1410 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1411 new_oprnd
= make_ssa_name (interm_type
);
1412 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1414 *new_def_stmt
= new_stmt
;
1417 /* Otherwise, OPRND is already set. */
1421 *new_type
= interm_type
;
1423 *new_type
= half_type
;
1426 *op1
= fold_convert (*new_type
, const_oprnd
);
1432 /* Try to find a statement or a sequence of statements that can be performed
1436 TYPE x_T, res0_T, res1_T;
1439 S2 x_T = (TYPE) x_t;
1440 S3 res0_T = op (x_T, C0);
1441 S4 res1_T = op (res0_T, C1);
1442 S5 ... = () res1_T; - type demotion
1444 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1446 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1447 be 'type' or some intermediate type. For now, we expect S5 to be a type
1448 demotion operation. We also check that S3 and S4 have only one use. */
1451 vect_recog_over_widening_pattern (vec
<gimple
*> *stmts
,
1452 tree
*type_in
, tree
*type_out
)
1454 gimple
*stmt
= stmts
->pop ();
1455 gimple
*pattern_stmt
= NULL
, *new_def_stmt
, *prev_stmt
= NULL
,
1457 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1458 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1465 if (!vinfo_for_stmt (stmt
)
1466 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1469 new_def_stmt
= NULL
;
1470 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1471 &op0
, &op1
, &new_def_stmt
,
1480 /* STMT can be performed on a smaller type. Check its uses. */
1481 use_stmt
= vect_single_imm_use (stmt
);
1482 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1485 /* Create pattern statement for STMT. */
1486 vectype
= get_vectype_for_scalar_type (new_type
);
1490 /* We want to collect all the statements for which we create pattern
1491 statetments, except for the case when the last statement in the
1492 sequence doesn't have a corresponding pattern statement. In such
1493 case we associate the last pattern statement with the last statement
1494 in the sequence. Therefore, we only add the original statement to
1495 the list if we know that it is not the last. */
1497 stmts
->safe_push (prev_stmt
);
1499 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1501 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1502 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1503 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1505 if (dump_enabled_p ())
1507 dump_printf_loc (MSG_NOTE
, vect_location
,
1508 "created pattern stmt: ");
1509 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1510 dump_printf (MSG_NOTE
, "\n");
1513 type
= gimple_expr_type (stmt
);
1520 /* We got a sequence. We expect it to end with a type demotion operation.
1521 Otherwise, we quit (for now). There are three possible cases: the
1522 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1523 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1524 NEW_TYPE differs (we create a new conversion statement). */
1525 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1527 use_lhs
= gimple_assign_lhs (use_stmt
);
1528 use_type
= TREE_TYPE (use_lhs
);
1529 /* Support only type demotion or signedess change. */
1530 if (!INTEGRAL_TYPE_P (use_type
)
1531 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1534 /* Check that NEW_TYPE is not bigger than the conversion result. */
1535 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1538 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1539 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1541 /* Create NEW_TYPE->USE_TYPE conversion. */
1542 new_oprnd
= make_ssa_name (use_type
);
1543 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1544 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1546 *type_in
= get_vectype_for_scalar_type (new_type
);
1547 *type_out
= get_vectype_for_scalar_type (use_type
);
1549 /* We created a pattern statement for the last statement in the
1550 sequence, so we don't need to associate it with the pattern
1551 statement created for PREV_STMT. Therefore, we add PREV_STMT
1552 to the list in order to mark it later in vect_pattern_recog_1. */
1554 stmts
->safe_push (prev_stmt
);
1559 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1560 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1563 *type_out
= NULL_TREE
;
1566 stmts
->safe_push (use_stmt
);
1569 /* TODO: support general case, create a conversion to the correct type. */
1572 /* Pattern detected. */
1573 if (dump_enabled_p ())
1575 dump_printf_loc (MSG_NOTE
, vect_location
,
1576 "vect_recog_over_widening_pattern: detected: ");
1577 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1578 dump_printf (MSG_NOTE
, "\n");
1581 return pattern_stmt
;
1584 /* Detect widening shift pattern:
1590 S2 a_T = (TYPE) a_t;
1591 S3 res_T = a_T << CONST;
1593 where type 'TYPE' is at least double the size of type 'type'.
1595 Also detect cases where the shift result is immediately converted
1596 to another type 'result_type' that is no larger in size than 'TYPE'.
1597 In those cases we perform a widen-shift that directly results in
1598 'result_type', to avoid a possible over-widening situation:
1602 result_type res_result;
1605 S2 a_T = (TYPE) a_t;
1606 S3 res_T = a_T << CONST;
1607 S4 res_result = (result_type) res_T;
1608 '--> res_result' = a_t w<< CONST;
1610 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1611 create an additional pattern stmt for S2 to create a variable of an
1612 intermediate type, and perform widen-shift on the intermediate type:
1616 TYPE a_T, res_T, res_T';
1619 S2 a_T = (TYPE) a_t;
1620 '--> a_it = (interm_type) a_t;
1621 S3 res_T = a_T << CONST;
1622 '--> res_T' = a_it <<* CONST;
1626 * STMTS: Contains a stmt from which the pattern search begins.
1627 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1628 in STMTS. When an intermediate type is used and a pattern statement is
1629 created for S2, we also put S2 here (before S3).
1633 * TYPE_IN: The type of the input arguments to the pattern.
1635 * TYPE_OUT: The type of the output of this pattern.
1637 * Return value: A new stmt that will be used to replace the sequence of
1638 stmts that constitute the pattern. In this case it will be:
1639 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1642 vect_recog_widen_shift_pattern (vec
<gimple
*> *stmts
,
1643 tree
*type_in
, tree
*type_out
)
1645 gimple
*last_stmt
= stmts
->pop ();
1647 tree oprnd0
, oprnd1
;
1648 tree type
, half_type0
;
1649 gimple
*pattern_stmt
;
1650 tree vectype
, vectype_out
= NULL_TREE
;
1652 enum tree_code dummy_code
;
1654 vec
<tree
> dummy_vec
;
1658 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1661 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1664 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1667 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1668 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1669 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1672 /* Check operand 0: it has to be defined by a type promotion. */
1673 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1678 /* Check operand 1: has to be positive. We check that it fits the type
1679 in vect_handle_widen_op_by_const (). */
1680 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1683 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1684 type
= gimple_expr_type (last_stmt
);
1686 /* Check for subsequent conversion to another type. */
1687 use_stmt
= vect_single_imm_use (last_stmt
);
1688 if (use_stmt
&& is_gimple_assign (use_stmt
)
1689 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1690 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1692 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1693 tree use_type
= TREE_TYPE (use_lhs
);
1695 if (INTEGRAL_TYPE_P (use_type
)
1696 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1698 last_stmt
= use_stmt
;
1703 /* Check if this a widening operation. */
1704 gimple
*wstmt
= NULL
;
1705 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1707 type
, &half_type0
, def_stmt0
))
1710 /* Pattern detected. */
1711 if (dump_enabled_p ())
1712 dump_printf_loc (MSG_NOTE
, vect_location
,
1713 "vect_recog_widen_shift_pattern: detected:\n");
1715 /* Check target support. */
1716 vectype
= get_vectype_for_scalar_type (half_type0
);
1717 vectype_out
= get_vectype_for_scalar_type (type
);
1721 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1722 vectype_out
, vectype
,
1723 &dummy_code
, &dummy_code
,
1724 &dummy_int
, &dummy_vec
))
1728 *type_out
= vectype_out
;
1730 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1731 var
= vect_recog_temp_ssa_var (type
, NULL
);
1733 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1736 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1737 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1738 stmt_vec_info new_stmt_info
1739 = new_stmt_vec_info (wstmt
, stmt_vinfo
->vinfo
);
1740 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1741 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1744 if (dump_enabled_p ())
1745 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1747 stmts
->safe_push (last_stmt
);
1748 return pattern_stmt
;
1751 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1755 S0 a_t = b_t r<< c_t;
1759 * STMTS: Contains a stmt from which the pattern search begins,
1760 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1764 S2 e_t = d_t & (B - 1);
1765 S3 f_t = b_t << c_t;
1766 S4 g_t = b_t >> e_t;
1769 where B is element bitsize of type.
1773 * TYPE_IN: The type of the input arguments to the pattern.
1775 * TYPE_OUT: The type of the output of this pattern.
1777 * Return value: A new stmt that will be used to replace the rotate
1781 vect_recog_rotate_pattern (vec
<gimple
*> *stmts
, tree
*type_in
, tree
*type_out
)
1783 gimple
*last_stmt
= stmts
->pop ();
1784 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1785 gimple
*pattern_stmt
, *def_stmt
;
1786 enum tree_code rhs_code
;
1787 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1788 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1789 enum vect_def_type dt
;
1790 optab optab1
, optab2
;
1791 edge ext_def
= NULL
;
1793 if (!is_gimple_assign (last_stmt
))
1796 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1806 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1809 lhs
= gimple_assign_lhs (last_stmt
);
1810 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1811 type
= TREE_TYPE (oprnd0
);
1812 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1813 if (TREE_CODE (oprnd0
) != SSA_NAME
1814 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1815 || !INTEGRAL_TYPE_P (type
)
1816 || !TYPE_UNSIGNED (type
))
1819 if (!vect_is_simple_use (oprnd1
, vinfo
, &def_stmt
, &dt
))
1822 if (dt
!= vect_internal_def
1823 && dt
!= vect_constant_def
1824 && dt
!= vect_external_def
)
1827 vectype
= get_vectype_for_scalar_type (type
);
1828 if (vectype
== NULL_TREE
)
1831 /* If vector/vector or vector/scalar rotate is supported by the target,
1832 don't do anything here. */
1833 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1835 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1838 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
1840 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1842 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1846 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1847 don't do anything here either. */
1848 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1849 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1851 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1853 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1855 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
1857 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1858 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1860 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1862 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1867 *type_out
= vectype
;
1868 if (*type_in
== NULL_TREE
)
1871 if (dt
== vect_external_def
1872 && TREE_CODE (oprnd1
) == SSA_NAME
1873 && is_a
<loop_vec_info
> (vinfo
))
1875 struct loop
*loop
= as_a
<loop_vec_info
> (vinfo
)->loop
;
1876 ext_def
= loop_preheader_edge (loop
);
1877 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1879 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1881 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1887 if (TREE_CODE (oprnd1
) == INTEGER_CST
1888 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1890 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1892 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1893 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1894 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1895 == TYPE_PRECISION (type
))
1899 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1900 if (def
== NULL_TREE
)
1902 def
= vect_recog_temp_ssa_var (type
, NULL
);
1903 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1907 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1908 gcc_assert (!new_bb
);
1911 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1913 stype
= TREE_TYPE (def
);
1915 if (TREE_CODE (def
) == INTEGER_CST
)
1917 if (!tree_fits_uhwi_p (def
)
1918 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1919 || integer_zerop (def
))
1921 def2
= build_int_cst (stype
,
1922 GET_MODE_PRECISION (TYPE_MODE (type
))
1923 - tree_to_uhwi (def
));
1927 tree vecstype
= get_vectype_for_scalar_type (stype
);
1928 stmt_vec_info def_stmt_vinfo
;
1930 if (vecstype
== NULL_TREE
)
1932 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1933 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1937 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1938 gcc_assert (!new_bb
);
1942 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1943 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1944 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1945 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1948 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1950 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1951 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1952 gimple_assign_lhs (def_stmt
), mask
);
1956 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1957 gcc_assert (!new_bb
);
1961 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1962 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1963 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1964 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1968 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1969 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
1970 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
1972 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1974 var2
= vect_recog_temp_ssa_var (type
, NULL
);
1975 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
1976 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
1978 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1980 /* Pattern detected. */
1981 if (dump_enabled_p ())
1982 dump_printf_loc (MSG_NOTE
, vect_location
,
1983 "vect_recog_rotate_pattern: detected:\n");
1985 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1986 var
= vect_recog_temp_ssa_var (type
, NULL
);
1987 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
1989 if (dump_enabled_p ())
1990 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1992 stmts
->safe_push (last_stmt
);
1993 return pattern_stmt
;
1996 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2004 S3 res_T = b_T op a_t;
2006 where type 'TYPE' is a type with different size than 'type',
2007 and op is <<, >> or rotate.
2012 TYPE b_T, c_T, res_T;
2015 S1 a_t = (type) c_T;
2017 S3 res_T = b_T op a_t;
2021 * STMTS: Contains a stmt from which the pattern search begins,
2022 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2023 with a shift/rotate which has same type on both operands, in the
2024 second case just b_T op c_T, in the first case with added cast
2025 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2029 * TYPE_IN: The type of the input arguments to the pattern.
2031 * TYPE_OUT: The type of the output of this pattern.
2033 * Return value: A new stmt that will be used to replace the shift/rotate
2037 vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *stmts
,
2038 tree
*type_in
, tree
*type_out
)
2040 gimple
*last_stmt
= stmts
->pop ();
2041 tree oprnd0
, oprnd1
, lhs
, var
;
2042 gimple
*pattern_stmt
, *def_stmt
;
2043 enum tree_code rhs_code
;
2044 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2045 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2046 enum vect_def_type dt
;
2048 if (!is_gimple_assign (last_stmt
))
2051 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2063 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2066 lhs
= gimple_assign_lhs (last_stmt
);
2067 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2068 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2069 if (TREE_CODE (oprnd0
) != SSA_NAME
2070 || TREE_CODE (oprnd1
) != SSA_NAME
2071 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2072 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2073 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2074 || TYPE_PRECISION (TREE_TYPE (lhs
))
2075 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2078 if (!vect_is_simple_use (oprnd1
, vinfo
, &def_stmt
, &dt
))
2081 if (dt
!= vect_internal_def
)
2084 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2085 *type_out
= *type_in
;
2086 if (*type_in
== NULL_TREE
)
2089 tree def
= NULL_TREE
;
2090 if (gimple_assign_cast_p (def_stmt
))
2092 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2093 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2094 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2095 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2099 if (def
== NULL_TREE
)
2101 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2102 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2103 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2106 /* Pattern detected. */
2107 if (dump_enabled_p ())
2108 dump_printf_loc (MSG_NOTE
, vect_location
,
2109 "vect_recog_vector_vector_shift_pattern: detected:\n");
2111 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2112 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2113 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2115 if (dump_enabled_p ())
2116 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2118 stmts
->safe_push (last_stmt
);
2119 return pattern_stmt
;
2122 /* Detect multiplication by constant which are postive or negatives of power 2,
2123 and convert them to shift patterns.
2125 Mult with constants that are postive power of two.
2132 Mult with constants that are negative power of two.
2137 STMTS: Contains a stmt from which the pattern search begins,
2138 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2139 constant operand is a power of 2.
2141 S1': b_t = a_t << log2 (n)
2143 Convert the mult operation to LSHIFT and followed by a NEGATE
2144 if constant operand is a negative power of 2.
2145 type a_t, b_t, res_T;
2146 S2': b_t = a_t << log2 (n)
2147 S3': res_T = - (b_t)
2151 * TYPE_IN: The type of the input arguments to the pattern.
2153 * TYPE_OUT: The type of the output of this pattern.
2155 * Return value: A new stmt that will be used to replace the multiplication
2159 vect_recog_mult_pattern (vec
<gimple
*> *stmts
,
2160 tree
*type_in
, tree
*type_out
)
2162 gimple
*last_stmt
= stmts
->pop ();
2163 tree oprnd0
, oprnd1
, vectype
, itype
;
2164 gimple
*pattern_stmt
, *def_stmt
;
2166 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2167 int power2_val
, power2_neg_val
;
2170 if (!is_gimple_assign (last_stmt
))
2173 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2176 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2177 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2178 itype
= TREE_TYPE (oprnd0
);
2180 if (TREE_CODE (oprnd0
) != SSA_NAME
2181 || TREE_CODE (oprnd1
) != INTEGER_CST
2182 || !INTEGRAL_TYPE_P (itype
)
2183 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2186 vectype
= get_vectype_for_scalar_type (itype
);
2187 if (vectype
== NULL_TREE
)
2190 /* If the target can handle vectorized multiplication natively,
2191 don't attempt to optimize this. */
2192 optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2193 if (optab
!= unknown_optab
)
2195 machine_mode vec_mode
= TYPE_MODE (vectype
);
2196 int icode
= (int) optab_handler (optab
, vec_mode
);
2197 if (icode
!= CODE_FOR_nothing
)
2201 /* If target cannot handle vector left shift then we cannot
2202 optimize and bail out. */
2203 optab
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2205 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2208 power2_val
= wi::exact_log2 (oprnd1
);
2209 power2_neg_val
= wi::exact_log2 (wi::neg (oprnd1
));
2211 /* Handle constant operands that are postive or negative powers of 2. */
2212 if (power2_val
!= -1)
2214 shift
= build_int_cst (itype
, power2_val
);
2216 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2217 LSHIFT_EXPR
, oprnd0
, shift
);
2219 else if (power2_neg_val
!= -1)
2221 /* If the target cannot handle vector NEGATE then we cannot
2222 do the optimization. */
2223 optab
= optab_for_tree_code (NEGATE_EXPR
, vectype
, optab_vector
);
2225 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2228 shift
= build_int_cst (itype
, power2_neg_val
);
2230 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2231 LSHIFT_EXPR
, oprnd0
, shift
);
2232 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2234 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2235 NEGATE_EXPR
, gimple_assign_lhs (def_stmt
));
2240 /* Pattern detected. */
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_NOTE
, vect_location
,
2243 "vect_recog_mult_pattern: detected:\n");
2245 if (dump_enabled_p ())
2246 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
,
2249 stmts
->safe_push (last_stmt
);
2251 *type_out
= vectype
;
2253 return pattern_stmt
;
2256 /* Detect a signed division by a constant that wouldn't be
2257 otherwise vectorized:
2263 where type 'type' is an integral type and N is a constant.
2265 Similarly handle modulo by a constant:
2271 * STMTS: Contains a stmt from which the pattern search begins,
2272 i.e. the division stmt. S1 is replaced by if N is a power
2273 of two constant and type is signed:
2274 S3 y_t = b_t < 0 ? N - 1 : 0;
2276 S1' a_t = x_t >> log2 (N);
2278 S4 is replaced if N is a power of two constant and
2279 type is signed by (where *_T temporaries have unsigned type):
2280 S9 y_T = b_t < 0 ? -1U : 0U;
2281 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2282 S7 z_t = (type) z_T;
2284 S5 x_t = w_t & (N - 1);
2285 S4' a_t = x_t - z_t;
2289 * TYPE_IN: The type of the input arguments to the pattern.
2291 * TYPE_OUT: The type of the output of this pattern.
2293 * Return value: A new stmt that will be used to replace the division
2294 S1 or modulo S4 stmt. */
2297 vect_recog_divmod_pattern (vec
<gimple
*> *stmts
,
2298 tree
*type_in
, tree
*type_out
)
2300 gimple
*last_stmt
= stmts
->pop ();
2301 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2302 gimple
*pattern_stmt
, *def_stmt
;
2303 enum tree_code rhs_code
;
2304 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2305 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2308 int dummy_int
, prec
;
2309 stmt_vec_info def_stmt_vinfo
;
2311 if (!is_gimple_assign (last_stmt
))
2314 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2317 case TRUNC_DIV_EXPR
:
2318 case TRUNC_MOD_EXPR
:
2324 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2327 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2328 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2329 itype
= TREE_TYPE (oprnd0
);
2330 if (TREE_CODE (oprnd0
) != SSA_NAME
2331 || TREE_CODE (oprnd1
) != INTEGER_CST
2332 || TREE_CODE (itype
) != INTEGER_TYPE
2333 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2336 vectype
= get_vectype_for_scalar_type (itype
);
2337 if (vectype
== NULL_TREE
)
2340 /* If the target can handle vectorized division or modulo natively,
2341 don't attempt to optimize this. */
2342 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2343 if (optab
!= unknown_optab
)
2345 machine_mode vec_mode
= TYPE_MODE (vectype
);
2346 int icode
= (int) optab_handler (optab
, vec_mode
);
2347 if (icode
!= CODE_FOR_nothing
)
2351 prec
= TYPE_PRECISION (itype
);
2352 if (integer_pow2p (oprnd1
))
2354 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2357 /* Pattern detected. */
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_NOTE
, vect_location
,
2360 "vect_recog_divmod_pattern: detected:\n");
2362 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2363 build_int_cst (itype
, 0));
2364 if (rhs_code
== TRUNC_DIV_EXPR
)
2366 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2369 = gimple_build_assign (var
, COND_EXPR
, cond
,
2370 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2371 build_int_cst (itype
, 1)),
2372 build_int_cst (itype
, 0));
2373 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2374 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2376 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2377 gimple_assign_lhs (def_stmt
));
2378 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2380 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2382 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2383 RSHIFT_EXPR
, var
, shift
);
2388 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2389 if (compare_tree_int (oprnd1
, 2) == 0)
2391 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2392 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2393 build_int_cst (itype
, 1),
2394 build_int_cst (itype
, 0));
2395 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2400 = build_nonstandard_integer_type (prec
, 1);
2401 tree vecutype
= get_vectype_for_scalar_type (utype
);
2403 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2404 - tree_log2 (oprnd1
));
2405 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2407 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2408 build_int_cst (utype
, -1),
2409 build_int_cst (utype
, 0));
2410 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2411 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2412 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2413 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2414 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2415 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2416 gimple_assign_lhs (def_stmt
),
2418 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2419 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2420 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2421 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2422 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2424 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2425 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2428 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2429 PLUS_EXPR
, oprnd0
, signmask
);
2430 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2432 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2433 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2434 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2435 build_int_cst (itype
, 1)));
2436 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2439 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2440 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2444 if (dump_enabled_p ())
2445 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2448 stmts
->safe_push (last_stmt
);
2451 *type_out
= vectype
;
2452 return pattern_stmt
;
2455 if (prec
> HOST_BITS_PER_WIDE_INT
2456 || integer_zerop (oprnd1
))
2459 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2462 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2464 if (TYPE_UNSIGNED (itype
))
2466 unsigned HOST_WIDE_INT mh
, ml
;
2467 int pre_shift
, post_shift
;
2468 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2469 & GET_MODE_MASK (TYPE_MODE (itype
)));
2470 tree t1
, t2
, t3
, t4
;
2472 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2473 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2476 /* Find a suitable multiplier and right shift count
2477 instead of multiplying with D. */
2478 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2480 /* If the suggested multiplier is more than SIZE bits, we can do better
2481 for even divisors, using an initial right shift. */
2482 if (mh
!= 0 && (d
& 1) == 0)
2484 pre_shift
= floor_log2 (d
& -d
);
2485 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2486 &ml
, &post_shift
, &dummy_int
);
2494 if (post_shift
- 1 >= prec
)
2497 /* t1 = oprnd0 h* ml;
2501 q = t4 >> (post_shift - 1); */
2502 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2503 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2504 build_int_cst (itype
, ml
));
2505 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2507 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2509 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2510 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2512 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2514 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2515 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2517 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2519 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2521 if (post_shift
!= 1)
2523 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2525 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2527 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2528 build_int_cst (itype
, post_shift
- 1));
2533 pattern_stmt
= def_stmt
;
2538 if (pre_shift
>= prec
|| post_shift
>= prec
)
2541 /* t1 = oprnd0 >> pre_shift;
2543 q = t2 >> post_shift; */
2546 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2548 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2549 build_int_cst (NULL
, pre_shift
));
2550 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2555 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2556 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2557 build_int_cst (itype
, ml
));
2561 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2563 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2565 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2566 build_int_cst (itype
, post_shift
));
2571 pattern_stmt
= def_stmt
;
2576 unsigned HOST_WIDE_INT ml
;
2578 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2579 unsigned HOST_WIDE_INT abs_d
;
2581 tree t1
, t2
, t3
, t4
;
2583 /* Give up for -1. */
2587 /* Since d might be INT_MIN, we have to cast to
2588 unsigned HOST_WIDE_INT before negating to avoid
2589 undefined signed overflow. */
2591 ? (unsigned HOST_WIDE_INT
) d
2592 : - (unsigned HOST_WIDE_INT
) d
);
2594 /* n rem d = n rem -d */
2595 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2598 oprnd1
= build_int_cst (itype
, abs_d
);
2600 else if (HOST_BITS_PER_WIDE_INT
>= prec
2601 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2602 /* This case is not handled correctly below. */
2605 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2606 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2609 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2611 if (post_shift
>= prec
)
2614 /* t1 = oprnd0 h* ml; */
2615 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2616 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2617 build_int_cst (itype
, ml
));
2621 /* t2 = t1 + oprnd0; */
2622 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2623 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2624 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2631 /* t3 = t2 >> post_shift; */
2632 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2633 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2634 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2635 build_int_cst (itype
, post_shift
));
2640 wide_int oprnd0_min
, oprnd0_max
;
2642 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2644 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2646 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2650 if (msb
== 0 && d
>= 0)
2654 pattern_stmt
= def_stmt
;
2658 /* t4 = oprnd0 >> (prec - 1);
2659 or if we know from VRP that oprnd0 >= 0
2661 or if we know from VRP that oprnd0 < 0
2663 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2664 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2666 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2667 build_int_cst (itype
, msb
));
2669 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2670 build_int_cst (itype
, prec
- 1));
2671 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2673 /* q = t3 - t4; or q = t4 - t3; */
2674 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2675 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2680 if (rhs_code
== TRUNC_MOD_EXPR
)
2684 /* We divided. Now finish by:
2687 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2689 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2690 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2691 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2693 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2694 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2697 /* Pattern detected. */
2698 if (dump_enabled_p ())
2700 dump_printf_loc (MSG_NOTE
, vect_location
,
2701 "vect_recog_divmod_pattern: detected: ");
2702 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2703 dump_printf (MSG_NOTE
, "\n");
2706 stmts
->safe_push (last_stmt
);
2709 *type_out
= vectype
;
2710 return pattern_stmt
;
2713 /* Function vect_recog_mixed_size_cond_pattern
2715 Try to find the following pattern:
2720 S1 a_T = x_t CMP y_t ? b_T : c_T;
2722 where type 'TYPE' is an integral type which has different size
2723 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2724 than 'type', the constants need to fit into an integer type
2725 with the same width as 'type') or results of conversion from 'type'.
2729 * LAST_STMT: A stmt from which the pattern search begins.
2733 * TYPE_IN: The type of the input arguments to the pattern.
2735 * TYPE_OUT: The type of the output of this pattern.
2737 * Return value: A new stmt that will be used to replace the pattern.
2738 Additionally a def_stmt is added.
2740 a_it = x_t CMP y_t ? b_it : c_it;
2741 a_T = (TYPE) a_it; */
2744 vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
2747 gimple
*last_stmt
= (*stmts
)[0];
2748 tree cond_expr
, then_clause
, else_clause
;
2749 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2750 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2751 gimple
*pattern_stmt
, *def_stmt
;
2752 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2753 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2754 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
2756 tree comp_scalar_type
;
2758 if (!is_gimple_assign (last_stmt
)
2759 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2760 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2763 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2764 then_clause
= gimple_assign_rhs2 (last_stmt
);
2765 else_clause
= gimple_assign_rhs3 (last_stmt
);
2767 if (!COMPARISON_CLASS_P (cond_expr
))
2770 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2771 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2772 if (comp_vectype
== NULL_TREE
)
2775 type
= gimple_expr_type (last_stmt
);
2776 if (types_compatible_p (type
, comp_scalar_type
)
2777 || ((TREE_CODE (then_clause
) != INTEGER_CST
2778 || TREE_CODE (else_clause
) != INTEGER_CST
)
2779 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2780 || !INTEGRAL_TYPE_P (type
))
2783 if ((TREE_CODE (then_clause
) != INTEGER_CST
2784 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2785 &def_stmt0
, &promotion
))
2786 || (TREE_CODE (else_clause
) != INTEGER_CST
2787 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2788 &def_stmt1
, &promotion
)))
2791 if (orig_type0
&& orig_type1
2792 && !types_compatible_p (orig_type0
, orig_type1
))
2797 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2799 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2805 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2807 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2812 HOST_WIDE_INT cmp_mode_size
2813 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
2815 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == cmp_mode_size
)
2818 vectype
= get_vectype_for_scalar_type (type
);
2819 if (vectype
== NULL_TREE
)
2822 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2825 if (itype
== NULL_TREE
)
2826 itype
= build_nonstandard_integer_type (cmp_mode_size
,
2827 TYPE_UNSIGNED (type
));
2829 if (itype
== NULL_TREE
2830 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != cmp_mode_size
)
2833 vecitype
= get_vectype_for_scalar_type (itype
);
2834 if (vecitype
== NULL_TREE
)
2837 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2840 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > cmp_mode_size
)
2842 if ((TREE_CODE (then_clause
) == INTEGER_CST
2843 && !int_fits_type_p (then_clause
, itype
))
2844 || (TREE_CODE (else_clause
) == INTEGER_CST
2845 && !int_fits_type_p (else_clause
, itype
)))
2849 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2850 COND_EXPR
, unshare_expr (cond_expr
),
2851 fold_convert (itype
, then_clause
),
2852 fold_convert (itype
, else_clause
));
2853 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2854 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2856 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2857 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
2858 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2859 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2860 *type_in
= vecitype
;
2861 *type_out
= vectype
;
2863 if (dump_enabled_p ())
2864 dump_printf_loc (MSG_NOTE
, vect_location
,
2865 "vect_recog_mixed_size_cond_pattern: detected:\n");
2867 return pattern_stmt
;
2871 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2872 true if bool VAR can be optimized that way. */
2875 check_bool_pattern (tree var
, vec_info
*vinfo
)
2878 enum vect_def_type dt
;
2880 enum tree_code rhs_code
;
2882 if (!vect_is_simple_use (var
, vinfo
, &def_stmt
, &dt
))
2885 if (dt
!= vect_internal_def
)
2888 if (!is_gimple_assign (def_stmt
))
2891 if (!has_single_use (var
))
2894 rhs1
= gimple_assign_rhs1 (def_stmt
);
2895 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2899 return check_bool_pattern (rhs1
, vinfo
);
2902 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2903 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2904 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2906 return check_bool_pattern (rhs1
, vinfo
);
2909 return check_bool_pattern (rhs1
, vinfo
);
2914 if (!check_bool_pattern (rhs1
, vinfo
))
2916 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
);
2919 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2921 tree vecitype
, comp_vectype
;
2923 /* If the comparison can throw, then is_gimple_condexpr will be
2924 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2925 if (stmt_could_throw_p (def_stmt
))
2928 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2929 if (comp_vectype
== NULL_TREE
)
2932 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2934 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2936 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2937 vecitype
= get_vectype_for_scalar_type (itype
);
2938 if (vecitype
== NULL_TREE
)
2942 vecitype
= comp_vectype
;
2943 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2950 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2951 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2952 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2955 adjust_bool_pattern_cast (tree type
, tree var
)
2957 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2958 gimple
*cast_stmt
, *pattern_stmt
;
2960 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2961 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2962 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2963 cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2964 NOP_EXPR
, gimple_assign_lhs (pattern_stmt
));
2965 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2966 return gimple_assign_lhs (cast_stmt
);
2970 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2971 recursively. VAR is an SSA_NAME that should be transformed from bool
2972 to a wider integer type, OUT_TYPE is the desired final integer type of
2973 the whole pattern, TRUEVAL should be NULL unless optimizing
2974 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2975 in the then_clause, STMTS is where statements with added pattern stmts
2976 should be pushed to. */
2979 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2980 vec
<gimple
*> *stmts
)
2982 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
2983 enum tree_code rhs_code
, def_rhs_code
;
2984 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2986 gimple
*pattern_stmt
, *def_stmt
;
2988 rhs1
= gimple_assign_rhs1 (stmt
);
2989 rhs2
= gimple_assign_rhs2 (stmt
);
2990 rhs_code
= gimple_assign_rhs_code (stmt
);
2991 loc
= gimple_location (stmt
);
2996 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2997 itype
= TREE_TYPE (irhs1
);
2999 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3004 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3005 itype
= TREE_TYPE (irhs1
);
3007 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3008 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3012 /* Try to optimize x = y & (a < b ? 1 : 0); into
3013 x = (a < b ? y : 0);
3019 S1 a_b = x1 CMP1 y1;
3020 S2 b_b = x2 CMP2 y2;
3022 S4 d_T = (TYPE) c_b;
3024 we would normally emit:
3026 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3027 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3028 S3' c_T = a_T & b_T;
3031 but we can save one stmt by using the
3032 result of one of the COND_EXPRs in the other COND_EXPR and leave
3033 BIT_AND_EXPR stmt out:
3035 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3036 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3039 At least when VEC_COND_EXPR is implemented using masks
3040 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3041 computes the comparison masks and ands it, in one case with
3042 all ones vector, in the other case with a vector register.
3043 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3044 often more expensive. */
3045 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3046 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3047 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3049 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3050 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3051 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3052 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3055 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3056 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
3057 tstmt
= stmts
->pop ();
3058 gcc_assert (tstmt
== def_stmt
);
3059 stmts
->quick_push (stmt
);
3060 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3061 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3062 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3063 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3067 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3070 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3071 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3072 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3074 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3075 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3076 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3077 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3080 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3081 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3082 tstmt
= stmts
->pop ();
3083 gcc_assert (tstmt
== def_stmt
);
3084 stmts
->quick_push (stmt
);
3085 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3086 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3087 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3088 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3092 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3098 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3099 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3101 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3102 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3104 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3105 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3106 int out_prec
= TYPE_PRECISION (out_type
);
3107 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3108 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3109 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3110 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3113 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3114 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3117 itype
= TREE_TYPE (irhs1
);
3119 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3120 rhs_code
, irhs1
, irhs2
);
3124 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3125 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3126 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3127 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3128 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3130 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3132 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3135 itype
= TREE_TYPE (rhs1
);
3136 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3137 if (trueval
== NULL_TREE
)
3138 trueval
= build_int_cst (itype
, 1);
3140 gcc_checking_assert (useless_type_conversion_p (itype
,
3141 TREE_TYPE (trueval
)));
3143 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3144 COND_EXPR
, cond_expr
, trueval
,
3145 build_int_cst (itype
, 0));
3149 stmts
->safe_push (stmt
);
3150 gimple_set_location (pattern_stmt
, loc
);
3151 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3152 return gimple_assign_lhs (pattern_stmt
);
3156 /* Function vect_recog_bool_pattern
3158 Try to find pattern like following:
3160 bool a_b, b_b, c_b, d_b, e_b;
3163 S1 a_b = x1 CMP1 y1;
3164 S2 b_b = x2 CMP2 y2;
3166 S4 d_b = x3 CMP3 y3;
3168 S6 f_T = (TYPE) e_b;
3170 where type 'TYPE' is an integral type. Or a similar pattern
3173 S6 f_Y = e_b ? r_Y : s_Y;
3175 as results from if-conversion of a complex condition.
3179 * LAST_STMT: A stmt at the end from which the pattern
3180 search begins, i.e. cast of a bool to
3185 * TYPE_IN: The type of the input arguments to the pattern.
3187 * TYPE_OUT: The type of the output of this pattern.
3189 * Return value: A new stmt that will be used to replace the pattern.
3191 Assuming size of TYPE is the same as size of all comparisons
3192 (otherwise some casts would be added where needed), the above
3193 sequence we create related pattern stmts:
3194 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3195 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3196 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3197 S5' e_T = c_T | d_T;
3200 Instead of the above S3' we could emit:
3201 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3202 S3' c_T = a_T | b_T;
3203 but the above is more efficient. */
3206 vect_recog_bool_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
3209 gimple
*last_stmt
= stmts
->pop ();
3210 enum tree_code rhs_code
;
3211 tree var
, lhs
, rhs
, vectype
;
3212 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3213 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3214 gimple
*pattern_stmt
;
3216 if (!is_gimple_assign (last_stmt
))
3219 var
= gimple_assign_rhs1 (last_stmt
);
3220 lhs
= gimple_assign_lhs (last_stmt
);
3222 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3223 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3224 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3227 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3228 if (CONVERT_EXPR_CODE_P (rhs_code
))
3230 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3231 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3233 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3234 if (vectype
== NULL_TREE
)
3237 if (!check_bool_pattern (var
, vinfo
))
3240 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3241 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3242 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3243 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3246 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3247 *type_out
= vectype
;
3249 stmts
->safe_push (last_stmt
);
3250 if (dump_enabled_p ())
3251 dump_printf_loc (MSG_NOTE
, vect_location
,
3252 "vect_recog_bool_pattern: detected:\n");
3254 return pattern_stmt
;
3256 else if (rhs_code
== COND_EXPR
3257 && TREE_CODE (var
) == SSA_NAME
)
3259 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3260 if (vectype
== NULL_TREE
)
3263 /* Build a scalar type for the boolean result that when
3264 vectorized matches the vector type of the result in
3265 size and number of elements. */
3267 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3268 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3270 = build_nonstandard_integer_type (prec
,
3271 TYPE_UNSIGNED (TREE_TYPE (var
)));
3272 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3275 if (!check_bool_pattern (var
, vinfo
))
3278 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3279 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3281 = gimple_build_assign (lhs
, COND_EXPR
,
3282 build2 (NE_EXPR
, boolean_type_node
,
3283 rhs
, build_int_cst (type
, 0)),
3284 gimple_assign_rhs2 (last_stmt
),
3285 gimple_assign_rhs3 (last_stmt
));
3286 *type_out
= vectype
;
3288 stmts
->safe_push (last_stmt
);
3289 if (dump_enabled_p ())
3290 dump_printf_loc (MSG_NOTE
, vect_location
,
3291 "vect_recog_bool_pattern: detected:\n");
3293 return pattern_stmt
;
3295 else if (rhs_code
== SSA_NAME
3296 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3298 stmt_vec_info pattern_stmt_info
;
3299 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3300 gcc_assert (vectype
!= NULL_TREE
);
3301 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3303 if (!check_bool_pattern (var
, vinfo
))
3306 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3307 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3308 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3310 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3311 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3312 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3315 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3316 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3317 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3318 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3319 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3320 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3321 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3322 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3323 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3324 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3325 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3326 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3327 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3328 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3329 *type_out
= vectype
;
3331 stmts
->safe_push (last_stmt
);
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_NOTE
, vect_location
,
3334 "vect_recog_bool_pattern: detected:\n");
3335 return pattern_stmt
;
3342 /* Mark statements that are involved in a pattern. */
3345 vect_mark_pattern_stmts (gimple
*orig_stmt
, gimple
*pattern_stmt
,
3346 tree pattern_vectype
)
3348 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3349 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3350 vec_info
*vinfo
= orig_stmt_info
->vinfo
;
3353 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3354 if (pattern_stmt_info
== NULL
)
3356 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3357 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3359 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3361 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3362 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3363 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3364 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3365 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3366 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3367 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3368 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3369 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3371 gimple_stmt_iterator si
;
3372 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3373 !gsi_end_p (si
); gsi_next (&si
))
3375 def_stmt
= gsi_stmt (si
);
3376 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3377 if (def_stmt_info
== NULL
)
3379 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
3380 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3382 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3383 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3384 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3385 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3386 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3391 /* Function vect_pattern_recog_1
3394 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3395 computation pattern.
3396 STMT: A stmt from which the pattern search should start.
3398 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3399 expression that computes the same functionality and can be used to
3400 replace the sequence of stmts that are involved in the pattern.
3403 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3404 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3405 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3406 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3407 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3408 to the available target pattern.
3410 This function also does some bookkeeping, as explained in the documentation
3411 for vect_recog_pattern. */
3414 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3415 gimple_stmt_iterator si
,
3416 vec
<gimple
*> *stmts_to_replace
)
3418 gimple
*stmt
= gsi_stmt (si
), *pattern_stmt
;
3419 stmt_vec_info stmt_info
;
3420 loop_vec_info loop_vinfo
;
3421 tree pattern_vectype
;
3422 tree type_in
, type_out
;
3423 enum tree_code code
;
3427 stmts_to_replace
->truncate (0);
3428 stmts_to_replace
->quick_push (stmt
);
3429 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3433 stmt
= stmts_to_replace
->last ();
3434 stmt_info
= vinfo_for_stmt (stmt
);
3435 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3437 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3439 /* No need to check target support (already checked by the pattern
3440 recognition function). */
3441 pattern_vectype
= type_out
? type_out
: type_in
;
3445 machine_mode vec_mode
;
3446 enum insn_code icode
;
3449 /* Check target support */
3450 type_in
= get_vectype_for_scalar_type (type_in
);
3454 type_out
= get_vectype_for_scalar_type (type_out
);
3459 pattern_vectype
= type_out
;
3461 if (is_gimple_assign (pattern_stmt
))
3462 code
= gimple_assign_rhs_code (pattern_stmt
);
3465 gcc_assert (is_gimple_call (pattern_stmt
));
3469 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3470 vec_mode
= TYPE_MODE (type_in
);
3472 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3473 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3477 /* Found a vectorizable pattern. */
3478 if (dump_enabled_p ())
3480 dump_printf_loc (MSG_NOTE
, vect_location
,
3481 "pattern recognized: ");
3482 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3485 /* Mark the stmts that are involved in the pattern. */
3486 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3488 /* Patterns cannot be vectorized using SLP, because they change the order of
3491 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3493 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3495 /* It is possible that additional pattern stmts are created and inserted in
3496 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3497 relevant statements. */
3498 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3499 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3502 stmt_info
= vinfo_for_stmt (stmt
);
3503 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3504 if (dump_enabled_p ())
3506 dump_printf_loc (MSG_NOTE
, vect_location
,
3507 "additional pattern stmt: ");
3508 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3511 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3516 /* Function vect_pattern_recog
3519 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3522 Output - for each computation idiom that is detected we create a new stmt
3523 that provides the same functionality and that can be vectorized. We
3524 also record some information in the struct_stmt_info of the relevant
3525 stmts, as explained below:
3527 At the entry to this function we have the following stmts, with the
3528 following initial value in the STMT_VINFO fields:
3530 stmt in_pattern_p related_stmt vec_stmt
3531 S1: a_i = .... - - -
3532 S2: a_2 = ..use(a_i).. - - -
3533 S3: a_1 = ..use(a_2).. - - -
3534 S4: a_0 = ..use(a_1).. - - -
3535 S5: ... = ..use(a_0).. - - -
3537 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3538 represented by a single stmt. We then:
3539 - create a new stmt S6 equivalent to the pattern (the stmt is not
3540 inserted into the code)
3541 - fill in the STMT_VINFO fields as follows:
3543 in_pattern_p related_stmt vec_stmt
3544 S1: a_i = .... - - -
3545 S2: a_2 = ..use(a_i).. - - -
3546 S3: a_1 = ..use(a_2).. - - -
3547 S4: a_0 = ..use(a_1).. true S6 -
3548 '---> S6: a_new = .... - S4 -
3549 S5: ... = ..use(a_0).. - - -
3551 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3552 to each other through the RELATED_STMT field).
3554 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3555 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3556 remain irrelevant unless used by stmts other than S4.
3558 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3559 (because they are marked as irrelevant). It will vectorize S6, and record
3560 a pointer to the new vector stmt VS6 from S6 (as usual).
3561 S4 will be skipped, and S5 will be vectorized as usual:
3563 in_pattern_p related_stmt vec_stmt
3564 S1: a_i = .... - - -
3565 S2: a_2 = ..use(a_i).. - - -
3566 S3: a_1 = ..use(a_2).. - - -
3567 > VS6: va_new = .... - - -
3568 S4: a_0 = ..use(a_1).. true S6 VS6
3569 '---> S6: a_new = .... - S4 VS6
3570 > VS5: ... = ..vuse(va_new).. - - -
3571 S5: ... = ..use(a_0).. - - -
3573 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3574 elsewhere), and we'll end up with:
3577 VS5: ... = ..vuse(va_new)..
3579 In case of more than one pattern statements, e.g., widen-mult with
3583 S2 a_T = (TYPE) a_t;
3584 '--> S3: a_it = (interm_type) a_t;
3585 S4 prod_T = a_T * CONST;
3586 '--> S5: prod_T' = a_it w* CONST;
3588 there may be other users of a_T outside the pattern. In that case S2 will
3589 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3590 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3591 be recorded in S3. */
3594 vect_pattern_recog (vec_info
*vinfo
)
3599 gimple_stmt_iterator si
;
3601 vect_recog_func_ptr vect_recog_func
;
3602 auto_vec
<gimple
*, 1> stmts_to_replace
;
3605 if (dump_enabled_p ())
3606 dump_printf_loc (MSG_NOTE
, vect_location
,
3607 "=== vect_pattern_recog ===\n");
3609 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
3611 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3612 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3613 nbbs
= loop
->num_nodes
;
3617 bbs
= &as_a
<bb_vec_info
> (vinfo
)->bb
;
3621 /* Scan through the loop stmts, applying the pattern recognition
3622 functions starting at each stmt visited: */
3623 for (i
= 0; i
< nbbs
; i
++)
3625 basic_block bb
= bbs
[i
];
3626 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3628 if (is_a
<bb_vec_info
> (vinfo
)
3629 && (stmt
= gsi_stmt (si
))
3630 && vinfo_for_stmt (stmt
)
3631 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3634 /* Scan over all generic vect_recog_xxx_pattern functions. */
3635 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3637 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3638 vect_pattern_recog_1 (vect_recog_func
, si
,