]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-vect-patterns.c
gimplify.c (gimplify_scan_omp_clauses): Add support for OMP_CLAUSE_TILE.
[gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42
43 /* Pattern recognition functions */
44 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
45 tree *);
46 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
47 tree *);
48 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
49 tree *);
50 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
53 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
54 tree *);
55 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
56 tree *, tree *);
57 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
58 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
59 tree *, tree *);
60 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
61 tree *, tree *);
62
63 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
64 tree *, tree *);
65
66 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
67 tree *, tree *);
68 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
69 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
70 vect_recog_widen_mult_pattern,
71 vect_recog_widen_sum_pattern,
72 vect_recog_dot_prod_pattern,
73 vect_recog_sad_pattern,
74 vect_recog_pow_pattern,
75 vect_recog_widen_shift_pattern,
76 vect_recog_over_widening_pattern,
77 vect_recog_rotate_pattern,
78 vect_recog_vector_vector_shift_pattern,
79 vect_recog_divmod_pattern,
80 vect_recog_mult_pattern,
81 vect_recog_mixed_size_cond_pattern,
82 vect_recog_bool_pattern};
83
84 static inline void
85 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
86 {
87 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
88 stmt);
89 }
90
91 static inline void
92 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
93 {
94 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
95 append_pattern_def_seq (stmt_info, stmt);
96 }
97
98 /* Check whether STMT2 is in the same loop or basic block as STMT1.
99 Which of the two applies depends on whether we're currently doing
100 loop-based or basic-block-based vectorization, as determined by
101 the vinfo_for_stmt for STMT1 (which must be defined).
102
103 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
104 to be defined as well. */
105
106 static bool
107 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
108 {
109 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
111 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
112
113 if (!gimple_bb (stmt2))
114 return false;
115
116 if (loop_vinfo)
117 {
118 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
119 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
120 return false;
121 }
122 else
123 {
124 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
125 || gimple_code (stmt2) == GIMPLE_PHI)
126 return false;
127 }
128
129 gcc_assert (vinfo_for_stmt (stmt2));
130 return true;
131 }
132
133 /* If the LHS of DEF_STMT has a single use, and that statement is
134 in the same loop or basic block, return it. */
135
136 static gimple *
137 vect_single_imm_use (gimple *def_stmt)
138 {
139 tree lhs = gimple_assign_lhs (def_stmt);
140 use_operand_p use_p;
141 gimple *use_stmt;
142
143 if (!single_imm_use (lhs, &use_p, &use_stmt))
144 return NULL;
145
146 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
147 return NULL;
148
149 return use_stmt;
150 }
151
152 /* Check whether NAME, an ssa-name used in USE_STMT,
153 is a result of a type promotion, such that:
154 DEF_STMT: NAME = NOP (name0)
155 If CHECK_SIGN is TRUE, check that either both types are signed or both are
156 unsigned. */
157
158 static bool
159 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
160 tree *orig_type, gimple **def_stmt, bool *promotion)
161 {
162 gimple *dummy_gimple;
163 stmt_vec_info stmt_vinfo;
164 tree type = TREE_TYPE (name);
165 tree oprnd0;
166 enum vect_def_type dt;
167
168 stmt_vinfo = vinfo_for_stmt (use_stmt);
169 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
170 return false;
171
172 if (dt != vect_internal_def
173 && dt != vect_external_def && dt != vect_constant_def)
174 return false;
175
176 if (!*def_stmt)
177 return false;
178
179 if (!is_gimple_assign (*def_stmt))
180 return false;
181
182 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
183 return false;
184
185 oprnd0 = gimple_assign_rhs1 (*def_stmt);
186
187 *orig_type = TREE_TYPE (oprnd0);
188 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
189 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
190 return false;
191
192 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
193 *promotion = true;
194 else
195 *promotion = false;
196
197 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
198 return false;
199
200 return true;
201 }
202
203 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
204 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
205
206 static tree
207 vect_recog_temp_ssa_var (tree type, gimple *stmt)
208 {
209 return make_temp_ssa_name (type, stmt, "patt");
210 }
211
212 /* Function vect_recog_dot_prod_pattern
213
214 Try to find the following pattern:
215
216 type x_t, y_t;
217 TYPE1 prod;
218 TYPE2 sum = init;
219 loop:
220 sum_0 = phi <init, sum_1>
221 S1 x_t = ...
222 S2 y_t = ...
223 S3 x_T = (TYPE1) x_t;
224 S4 y_T = (TYPE1) y_t;
225 S5 prod = x_T * y_T;
226 [S6 prod = (TYPE2) prod; #optional]
227 S7 sum_1 = prod + sum_0;
228
229 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
230 same size of 'TYPE1' or bigger. This is a special case of a reduction
231 computation.
232
233 Input:
234
235 * STMTS: Contains a stmt from which the pattern search begins. In the
236 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
237 will be detected.
238
239 Output:
240
241 * TYPE_IN: The type of the input arguments to the pattern.
242
243 * TYPE_OUT: The type of the output of this pattern.
244
245 * Return value: A new stmt that will be used to replace the sequence of
246 stmts that constitute the pattern. In this case it will be:
247 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
248
249 Note: The dot-prod idiom is a widening reduction pattern that is
250 vectorized without preserving all the intermediate results. It
251 produces only N/2 (widened) results (by summing up pairs of
252 intermediate results) rather than all N results. Therefore, we
253 cannot allow this pattern when we want to get all the results and in
254 the correct order (as is the case when this computation is in an
255 inner-loop nested in an outer-loop that us being vectorized). */
256
257 static gimple *
258 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
259 tree *type_out)
260 {
261 gimple *stmt, *last_stmt = (*stmts)[0];
262 tree oprnd0, oprnd1;
263 tree oprnd00, oprnd01;
264 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
265 tree type, half_type;
266 gimple *pattern_stmt;
267 tree prod_type;
268 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
269 struct loop *loop;
270 tree var;
271 bool promotion;
272
273 if (!loop_info)
274 return NULL;
275
276 loop = LOOP_VINFO_LOOP (loop_info);
277
278 /* We don't allow changing the order of the computation in the inner-loop
279 when doing outer-loop vectorization. */
280 if (loop && nested_in_vect_loop_p (loop, last_stmt))
281 return NULL;
282
283 if (!is_gimple_assign (last_stmt))
284 return NULL;
285
286 type = gimple_expr_type (last_stmt);
287
288 /* Look for the following pattern
289 DX = (TYPE1) X;
290 DY = (TYPE1) Y;
291 DPROD = DX * DY;
292 DDPROD = (TYPE2) DPROD;
293 sum_1 = DDPROD + sum_0;
294 In which
295 - DX is double the size of X
296 - DY is double the size of Y
297 - DX, DY, DPROD all have the same type
298 - sum is the same size of DPROD or bigger
299 - sum has been recognized as a reduction variable.
300
301 This is equivalent to:
302 DPROD = X w* Y; #widen mult
303 sum_1 = DPROD w+ sum_0; #widen summation
304 or
305 DPROD = X w* Y; #widen mult
306 sum_1 = DPROD + sum_0; #summation
307 */
308
309 /* Starting from LAST_STMT, follow the defs of its uses in search
310 of the above pattern. */
311
312 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
313 return NULL;
314
315 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
316 {
317 /* Has been detected as widening-summation? */
318
319 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
320 type = gimple_expr_type (stmt);
321 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
322 return NULL;
323 oprnd0 = gimple_assign_rhs1 (stmt);
324 oprnd1 = gimple_assign_rhs2 (stmt);
325 half_type = TREE_TYPE (oprnd0);
326 }
327 else
328 {
329 gimple *def_stmt;
330
331 oprnd0 = gimple_assign_rhs1 (last_stmt);
332 oprnd1 = gimple_assign_rhs2 (last_stmt);
333 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
334 || !types_compatible_p (TREE_TYPE (oprnd1), type))
335 return NULL;
336 stmt = last_stmt;
337
338 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
339 &promotion)
340 && promotion)
341 {
342 stmt = def_stmt;
343 oprnd0 = gimple_assign_rhs1 (stmt);
344 }
345 else
346 half_type = type;
347 }
348
349 /* So far so good. Since last_stmt was detected as a (summation) reduction,
350 we know that oprnd1 is the reduction variable (defined by a loop-header
351 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
352 Left to check that oprnd0 is defined by a (widen_)mult_expr */
353 if (TREE_CODE (oprnd0) != SSA_NAME)
354 return NULL;
355
356 prod_type = half_type;
357 stmt = SSA_NAME_DEF_STMT (oprnd0);
358
359 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
360 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
361 return NULL;
362
363 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
364 inside the loop (in case we are analyzing an outer-loop). */
365 if (!is_gimple_assign (stmt))
366 return NULL;
367 stmt_vinfo = vinfo_for_stmt (stmt);
368 gcc_assert (stmt_vinfo);
369 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
370 return NULL;
371 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
372 return NULL;
373 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
374 {
375 /* Has been detected as a widening multiplication? */
376
377 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
378 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
379 return NULL;
380 stmt_vinfo = vinfo_for_stmt (stmt);
381 gcc_assert (stmt_vinfo);
382 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
383 oprnd00 = gimple_assign_rhs1 (stmt);
384 oprnd01 = gimple_assign_rhs2 (stmt);
385 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
386 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
387 }
388 else
389 {
390 tree half_type0, half_type1;
391 gimple *def_stmt;
392 tree oprnd0, oprnd1;
393
394 oprnd0 = gimple_assign_rhs1 (stmt);
395 oprnd1 = gimple_assign_rhs2 (stmt);
396 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
397 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
398 return NULL;
399 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
400 &promotion)
401 || !promotion)
402 return NULL;
403 oprnd00 = gimple_assign_rhs1 (def_stmt);
404 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
405 &promotion)
406 || !promotion)
407 return NULL;
408 oprnd01 = gimple_assign_rhs1 (def_stmt);
409 if (!types_compatible_p (half_type0, half_type1))
410 return NULL;
411 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
412 return NULL;
413 }
414
415 half_type = TREE_TYPE (oprnd00);
416 *type_in = half_type;
417 *type_out = type;
418
419 /* Pattern detected. Create a stmt to be used to replace the pattern: */
420 var = vect_recog_temp_ssa_var (type, NULL);
421 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
422 oprnd00, oprnd01, oprnd1);
423
424 if (dump_enabled_p ())
425 {
426 dump_printf_loc (MSG_NOTE, vect_location,
427 "vect_recog_dot_prod_pattern: detected: ");
428 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
429 dump_printf (MSG_NOTE, "\n");
430 }
431
432 return pattern_stmt;
433 }
434
435
436 /* Function vect_recog_sad_pattern
437
438 Try to find the following Sum of Absolute Difference (SAD) pattern:
439
440 type x_t, y_t;
441 signed TYPE1 diff, abs_diff;
442 TYPE2 sum = init;
443 loop:
444 sum_0 = phi <init, sum_1>
445 S1 x_t = ...
446 S2 y_t = ...
447 S3 x_T = (TYPE1) x_t;
448 S4 y_T = (TYPE1) y_t;
449 S5 diff = x_T - y_T;
450 S6 abs_diff = ABS_EXPR <diff>;
451 [S7 abs_diff = (TYPE2) abs_diff; #optional]
452 S8 sum_1 = abs_diff + sum_0;
453
454 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
455 same size of 'TYPE1' or bigger. This is a special case of a reduction
456 computation.
457
458 Input:
459
460 * STMTS: Contains a stmt from which the pattern search begins. In the
461 example, when this function is called with S8, the pattern
462 {S3,S4,S5,S6,S7,S8} will be detected.
463
464 Output:
465
466 * TYPE_IN: The type of the input arguments to the pattern.
467
468 * TYPE_OUT: The type of the output of this pattern.
469
470 * Return value: A new stmt that will be used to replace the sequence of
471 stmts that constitute the pattern. In this case it will be:
472 SAD_EXPR <x_t, y_t, sum_0>
473 */
474
475 static gimple *
476 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
477 tree *type_out)
478 {
479 gimple *last_stmt = (*stmts)[0];
480 tree sad_oprnd0, sad_oprnd1;
481 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
482 tree half_type;
483 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
484 struct loop *loop;
485 bool promotion;
486
487 if (!loop_info)
488 return NULL;
489
490 loop = LOOP_VINFO_LOOP (loop_info);
491
492 /* We don't allow changing the order of the computation in the inner-loop
493 when doing outer-loop vectorization. */
494 if (loop && nested_in_vect_loop_p (loop, last_stmt))
495 return NULL;
496
497 if (!is_gimple_assign (last_stmt))
498 return NULL;
499
500 tree sum_type = gimple_expr_type (last_stmt);
501
502 /* Look for the following pattern
503 DX = (TYPE1) X;
504 DY = (TYPE1) Y;
505 DDIFF = DX - DY;
506 DAD = ABS_EXPR <DDIFF>;
507 DDPROD = (TYPE2) DPROD;
508 sum_1 = DAD + sum_0;
509 In which
510 - DX is at least double the size of X
511 - DY is at least double the size of Y
512 - DX, DY, DDIFF, DAD all have the same type
513 - sum is the same size of DAD or bigger
514 - sum has been recognized as a reduction variable.
515
516 This is equivalent to:
517 DDIFF = X w- Y; #widen sub
518 DAD = ABS_EXPR <DDIFF>;
519 sum_1 = DAD w+ sum_0; #widen summation
520 or
521 DDIFF = X w- Y; #widen sub
522 DAD = ABS_EXPR <DDIFF>;
523 sum_1 = DAD + sum_0; #summation
524 */
525
526 /* Starting from LAST_STMT, follow the defs of its uses in search
527 of the above pattern. */
528
529 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
530 return NULL;
531
532 tree plus_oprnd0, plus_oprnd1;
533
534 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
535 {
536 /* Has been detected as widening-summation? */
537
538 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
539 sum_type = gimple_expr_type (stmt);
540 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
541 return NULL;
542 plus_oprnd0 = gimple_assign_rhs1 (stmt);
543 plus_oprnd1 = gimple_assign_rhs2 (stmt);
544 half_type = TREE_TYPE (plus_oprnd0);
545 }
546 else
547 {
548 gimple *def_stmt;
549
550 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
551 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
552 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
553 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
554 return NULL;
555
556 /* The type conversion could be promotion, demotion,
557 or just signed -> unsigned. */
558 if (type_conversion_p (plus_oprnd0, last_stmt, false,
559 &half_type, &def_stmt, &promotion))
560 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
561 else
562 half_type = sum_type;
563 }
564
565 /* So far so good. Since last_stmt was detected as a (summation) reduction,
566 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
567 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
568 Then check that plus_oprnd0 is defined by an abs_expr. */
569
570 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
571 return NULL;
572
573 tree abs_type = half_type;
574 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
575
576 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
577 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
578 return NULL;
579
580 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
581 inside the loop (in case we are analyzing an outer-loop). */
582 if (!is_gimple_assign (abs_stmt))
583 return NULL;
584
585 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
586 gcc_assert (abs_stmt_vinfo);
587 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
588 return NULL;
589 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
590 return NULL;
591
592 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
593 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
594 return NULL;
595 if (TYPE_UNSIGNED (abs_type))
596 return NULL;
597
598 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
599
600 if (TREE_CODE (abs_oprnd) != SSA_NAME)
601 return NULL;
602
603 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
604
605 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
606 if (!gimple_bb (diff_stmt)
607 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
608 return NULL;
609
610 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
611 inside the loop (in case we are analyzing an outer-loop). */
612 if (!is_gimple_assign (diff_stmt))
613 return NULL;
614
615 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
616 gcc_assert (diff_stmt_vinfo);
617 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
618 return NULL;
619 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
620 return NULL;
621
622 tree half_type0, half_type1;
623 gimple *def_stmt;
624
625 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
626 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
627
628 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
629 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
630 return NULL;
631 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
632 &half_type0, &def_stmt, &promotion)
633 || !promotion)
634 return NULL;
635 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
636
637 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
638 &half_type1, &def_stmt, &promotion)
639 || !promotion)
640 return NULL;
641 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
642
643 if (!types_compatible_p (half_type0, half_type1))
644 return NULL;
645 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
646 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
647 return NULL;
648
649 *type_in = TREE_TYPE (sad_oprnd0);
650 *type_out = sum_type;
651
652 /* Pattern detected. Create a stmt to be used to replace the pattern: */
653 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
654 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
655 sad_oprnd1, plus_oprnd1);
656
657 if (dump_enabled_p ())
658 {
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "vect_recog_sad_pattern: detected: ");
661 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
662 dump_printf (MSG_NOTE, "\n");
663 }
664
665 return pattern_stmt;
666 }
667
668
669 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
670 and LSHIFT_EXPR.
671
672 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
673 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
674
675 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
676 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
677 that satisfies the above restrictions, we can perform a widening opeartion
678 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
679 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
680
681 static bool
682 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
683 tree const_oprnd, tree *oprnd,
684 gimple **wstmt, tree type,
685 tree *half_type, gimple *def_stmt)
686 {
687 tree new_type, new_oprnd;
688
689 if (code != MULT_EXPR && code != LSHIFT_EXPR)
690 return false;
691
692 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
693 || (code == LSHIFT_EXPR
694 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
695 != 1))
696 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
697 {
698 /* CONST_OPRND is a constant of HALF_TYPE. */
699 *oprnd = gimple_assign_rhs1 (def_stmt);
700 return true;
701 }
702
703 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
704 return false;
705
706 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
707 return false;
708
709 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
710 a type 2 times bigger than HALF_TYPE. */
711 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
712 TYPE_UNSIGNED (type));
713 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
714 || (code == LSHIFT_EXPR
715 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
716 return false;
717
718 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
719 *oprnd = gimple_assign_rhs1 (def_stmt);
720 new_oprnd = make_ssa_name (new_type);
721 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
722 *oprnd = new_oprnd;
723
724 *half_type = new_type;
725 return true;
726 }
727
728
729 /* Function vect_recog_widen_mult_pattern
730
731 Try to find the following pattern:
732
733 type1 a_t;
734 type2 b_t;
735 TYPE a_T, b_T, prod_T;
736
737 S1 a_t = ;
738 S2 b_t = ;
739 S3 a_T = (TYPE) a_t;
740 S4 b_T = (TYPE) b_t;
741 S5 prod_T = a_T * b_T;
742
743 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
744
745 Also detect unsigned cases:
746
747 unsigned type1 a_t;
748 unsigned type2 b_t;
749 unsigned TYPE u_prod_T;
750 TYPE a_T, b_T, prod_T;
751
752 S1 a_t = ;
753 S2 b_t = ;
754 S3 a_T = (TYPE) a_t;
755 S4 b_T = (TYPE) b_t;
756 S5 prod_T = a_T * b_T;
757 S6 u_prod_T = (unsigned TYPE) prod_T;
758
759 and multiplication by constants:
760
761 type a_t;
762 TYPE a_T, prod_T;
763
764 S1 a_t = ;
765 S3 a_T = (TYPE) a_t;
766 S5 prod_T = a_T * CONST;
767
768 A special case of multiplication by constants is when 'TYPE' is 4 times
769 bigger than 'type', but CONST fits an intermediate type 2 times smaller
770 than 'TYPE'. In that case we create an additional pattern stmt for S3
771 to create a variable of the intermediate type, and perform widen-mult
772 on the intermediate type as well:
773
774 type a_t;
775 interm_type a_it;
776 TYPE a_T, prod_T, prod_T';
777
778 S1 a_t = ;
779 S3 a_T = (TYPE) a_t;
780 '--> a_it = (interm_type) a_t;
781 S5 prod_T = a_T * CONST;
782 '--> prod_T' = a_it w* CONST;
783
784 Input/Output:
785
786 * STMTS: Contains a stmt from which the pattern search begins. In the
787 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
788 is detected. In case of unsigned widen-mult, the original stmt (S5) is
789 replaced with S6 in STMTS. In case of multiplication by a constant
790 of an intermediate type (the last case above), STMTS also contains S3
791 (inserted before S5).
792
793 Output:
794
795 * TYPE_IN: The type of the input arguments to the pattern.
796
797 * TYPE_OUT: The type of the output of this pattern.
798
799 * Return value: A new stmt that will be used to replace the sequence of
800 stmts that constitute the pattern. In this case it will be:
801 WIDEN_MULT <a_t, b_t>
802 If the result of WIDEN_MULT needs to be converted to a larger type, the
803 returned stmt will be this type conversion stmt.
804 */
805
806 static gimple *
807 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
808 tree *type_in, tree *type_out)
809 {
810 gimple *last_stmt = stmts->pop ();
811 gimple *def_stmt0, *def_stmt1;
812 tree oprnd0, oprnd1;
813 tree type, half_type0, half_type1;
814 gimple *new_stmt = NULL, *pattern_stmt = NULL;
815 tree vectype, vecitype;
816 tree var;
817 enum tree_code dummy_code;
818 int dummy_int;
819 vec<tree> dummy_vec;
820 bool op1_ok;
821 bool promotion;
822
823 if (!is_gimple_assign (last_stmt))
824 return NULL;
825
826 type = gimple_expr_type (last_stmt);
827
828 /* Starting from LAST_STMT, follow the defs of its uses in search
829 of the above pattern. */
830
831 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
832 return NULL;
833
834 oprnd0 = gimple_assign_rhs1 (last_stmt);
835 oprnd1 = gimple_assign_rhs2 (last_stmt);
836 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
837 || !types_compatible_p (TREE_TYPE (oprnd1), type))
838 return NULL;
839
840 /* Check argument 0. */
841 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
842 &promotion)
843 || !promotion)
844 return NULL;
845 /* Check argument 1. */
846 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
847 &def_stmt1, &promotion);
848
849 if (op1_ok && promotion)
850 {
851 oprnd0 = gimple_assign_rhs1 (def_stmt0);
852 oprnd1 = gimple_assign_rhs1 (def_stmt1);
853 }
854 else
855 {
856 if (TREE_CODE (oprnd1) == INTEGER_CST
857 && TREE_CODE (half_type0) == INTEGER_TYPE
858 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
859 &oprnd0, &new_stmt, type,
860 &half_type0, def_stmt0))
861 {
862 half_type1 = half_type0;
863 oprnd1 = fold_convert (half_type1, oprnd1);
864 }
865 else
866 return NULL;
867 }
868
869 /* If the two arguments have different sizes, convert the one with
870 the smaller type into the larger type. */
871 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
872 {
873 /* If we already used up the single-stmt slot give up. */
874 if (new_stmt)
875 return NULL;
876
877 tree* oprnd = NULL;
878 gimple *def_stmt = NULL;
879
880 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
881 {
882 def_stmt = def_stmt0;
883 half_type0 = half_type1;
884 oprnd = &oprnd0;
885 }
886 else
887 {
888 def_stmt = def_stmt1;
889 half_type1 = half_type0;
890 oprnd = &oprnd1;
891 }
892
893 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
894 tree new_oprnd = make_ssa_name (half_type0);
895 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
896 *oprnd = new_oprnd;
897 }
898
899 /* Handle unsigned case. Look for
900 S6 u_prod_T = (unsigned TYPE) prod_T;
901 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
902 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
903 {
904 gimple *use_stmt;
905 tree use_lhs;
906 tree use_type;
907
908 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
909 return NULL;
910
911 use_stmt = vect_single_imm_use (last_stmt);
912 if (!use_stmt || !is_gimple_assign (use_stmt)
913 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
914 return NULL;
915
916 use_lhs = gimple_assign_lhs (use_stmt);
917 use_type = TREE_TYPE (use_lhs);
918 if (!INTEGRAL_TYPE_P (use_type)
919 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
920 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
921 return NULL;
922
923 type = use_type;
924 last_stmt = use_stmt;
925 }
926
927 if (!types_compatible_p (half_type0, half_type1))
928 return NULL;
929
930 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
931 to get an intermediate result of type ITYPE. In this case we need
932 to build a statement to convert this intermediate result to type TYPE. */
933 tree itype = type;
934 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
935 itype = build_nonstandard_integer_type
936 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
937 TYPE_UNSIGNED (type));
938
939 /* Pattern detected. */
940 if (dump_enabled_p ())
941 dump_printf_loc (MSG_NOTE, vect_location,
942 "vect_recog_widen_mult_pattern: detected:\n");
943
944 /* Check target support */
945 vectype = get_vectype_for_scalar_type (half_type0);
946 vecitype = get_vectype_for_scalar_type (itype);
947 if (!vectype
948 || !vecitype
949 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
950 vecitype, vectype,
951 &dummy_code, &dummy_code,
952 &dummy_int, &dummy_vec))
953 return NULL;
954
955 *type_in = vectype;
956 *type_out = get_vectype_for_scalar_type (type);
957
958 /* Pattern supported. Create a stmt to be used to replace the pattern: */
959 var = vect_recog_temp_ssa_var (itype, NULL);
960 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
961
962 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
963 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
964
965 /* If the original two operands have different sizes, we may need to convert
966 the smaller one into the larget type. If this is the case, at this point
967 the new stmt is already built. */
968 if (new_stmt)
969 {
970 append_pattern_def_seq (stmt_vinfo, new_stmt);
971 stmt_vec_info new_stmt_info
972 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
973 set_vinfo_for_stmt (new_stmt, new_stmt_info);
974 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
975 }
976
977 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
978 the result of the widen-mult operation into type TYPE. */
979 if (itype != type)
980 {
981 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
982 stmt_vec_info pattern_stmt_info
983 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
984 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
985 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
986 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
987 NOP_EXPR,
988 gimple_assign_lhs (pattern_stmt));
989 }
990
991 if (dump_enabled_p ())
992 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
993
994 stmts->safe_push (last_stmt);
995 return pattern_stmt;
996 }
997
998
999 /* Function vect_recog_pow_pattern
1000
1001 Try to find the following pattern:
1002
1003 x = POW (y, N);
1004
1005 with POW being one of pow, powf, powi, powif and N being
1006 either 2 or 0.5.
1007
1008 Input:
1009
1010 * LAST_STMT: A stmt from which the pattern search begins.
1011
1012 Output:
1013
1014 * TYPE_IN: The type of the input arguments to the pattern.
1015
1016 * TYPE_OUT: The type of the output of this pattern.
1017
1018 * Return value: A new stmt that will be used to replace the sequence of
1019 stmts that constitute the pattern. In this case it will be:
1020 x = x * x
1021 or
1022 x = sqrt (x)
1023 */
1024
1025 static gimple *
1026 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1027 tree *type_out)
1028 {
1029 gimple *last_stmt = (*stmts)[0];
1030 tree fn, base, exp = NULL;
1031 gimple *stmt;
1032 tree var;
1033
1034 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1035 return NULL;
1036
1037 fn = gimple_call_fndecl (last_stmt);
1038 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1039 return NULL;
1040
1041 switch (DECL_FUNCTION_CODE (fn))
1042 {
1043 case BUILT_IN_POWIF:
1044 case BUILT_IN_POWI:
1045 case BUILT_IN_POWF:
1046 case BUILT_IN_POW:
1047 base = gimple_call_arg (last_stmt, 0);
1048 exp = gimple_call_arg (last_stmt, 1);
1049 if (TREE_CODE (exp) != REAL_CST
1050 && TREE_CODE (exp) != INTEGER_CST)
1051 return NULL;
1052 break;
1053
1054 default:
1055 return NULL;
1056 }
1057
1058 /* We now have a pow or powi builtin function call with a constant
1059 exponent. */
1060
1061 *type_out = NULL_TREE;
1062
1063 /* Catch squaring. */
1064 if ((tree_fits_shwi_p (exp)
1065 && tree_to_shwi (exp) == 2)
1066 || (TREE_CODE (exp) == REAL_CST
1067 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1068 {
1069 *type_in = TREE_TYPE (base);
1070
1071 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1072 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1073 return stmt;
1074 }
1075
1076 /* Catch square root. */
1077 if (TREE_CODE (exp) == REAL_CST
1078 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1079 {
1080 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1081 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1082 if (*type_in)
1083 {
1084 gcall *stmt = gimple_build_call (newfn, 1, base);
1085 if (vectorizable_function (stmt, *type_in, *type_in)
1086 != NULL_TREE)
1087 {
1088 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1089 gimple_call_set_lhs (stmt, var);
1090 return stmt;
1091 }
1092 }
1093 }
1094
1095 return NULL;
1096 }
1097
1098
1099 /* Function vect_recog_widen_sum_pattern
1100
1101 Try to find the following pattern:
1102
1103 type x_t;
1104 TYPE x_T, sum = init;
1105 loop:
1106 sum_0 = phi <init, sum_1>
1107 S1 x_t = *p;
1108 S2 x_T = (TYPE) x_t;
1109 S3 sum_1 = x_T + sum_0;
1110
1111 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1112 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1113 a special case of a reduction computation.
1114
1115 Input:
1116
1117 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1118 when this function is called with S3, the pattern {S2,S3} will be detected.
1119
1120 Output:
1121
1122 * TYPE_IN: The type of the input arguments to the pattern.
1123
1124 * TYPE_OUT: The type of the output of this pattern.
1125
1126 * Return value: A new stmt that will be used to replace the sequence of
1127 stmts that constitute the pattern. In this case it will be:
1128 WIDEN_SUM <x_t, sum_0>
1129
1130 Note: The widening-sum idiom is a widening reduction pattern that is
1131 vectorized without preserving all the intermediate results. It
1132 produces only N/2 (widened) results (by summing up pairs of
1133 intermediate results) rather than all N results. Therefore, we
1134 cannot allow this pattern when we want to get all the results and in
1135 the correct order (as is the case when this computation is in an
1136 inner-loop nested in an outer-loop that us being vectorized). */
1137
1138 static gimple *
1139 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1140 tree *type_out)
1141 {
1142 gimple *stmt, *last_stmt = (*stmts)[0];
1143 tree oprnd0, oprnd1;
1144 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1145 tree type, half_type;
1146 gimple *pattern_stmt;
1147 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1148 struct loop *loop;
1149 tree var;
1150 bool promotion;
1151
1152 if (!loop_info)
1153 return NULL;
1154
1155 loop = LOOP_VINFO_LOOP (loop_info);
1156
1157 /* We don't allow changing the order of the computation in the inner-loop
1158 when doing outer-loop vectorization. */
1159 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1160 return NULL;
1161
1162 if (!is_gimple_assign (last_stmt))
1163 return NULL;
1164
1165 type = gimple_expr_type (last_stmt);
1166
1167 /* Look for the following pattern
1168 DX = (TYPE) X;
1169 sum_1 = DX + sum_0;
1170 In which DX is at least double the size of X, and sum_1 has been
1171 recognized as a reduction variable.
1172 */
1173
1174 /* Starting from LAST_STMT, follow the defs of its uses in search
1175 of the above pattern. */
1176
1177 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1178 return NULL;
1179
1180 oprnd0 = gimple_assign_rhs1 (last_stmt);
1181 oprnd1 = gimple_assign_rhs2 (last_stmt);
1182 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1183 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1184 return NULL;
1185
1186 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1187 we know that oprnd1 is the reduction variable (defined by a loop-header
1188 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1189 Left to check that oprnd0 is defined by a cast from type 'type' to type
1190 'TYPE'. */
1191
1192 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1193 &promotion)
1194 || !promotion)
1195 return NULL;
1196
1197 oprnd0 = gimple_assign_rhs1 (stmt);
1198 *type_in = half_type;
1199 *type_out = type;
1200
1201 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1202 var = vect_recog_temp_ssa_var (type, NULL);
1203 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1204
1205 if (dump_enabled_p ())
1206 {
1207 dump_printf_loc (MSG_NOTE, vect_location,
1208 "vect_recog_widen_sum_pattern: detected: ");
1209 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1210 dump_printf (MSG_NOTE, "\n");
1211 }
1212
1213 return pattern_stmt;
1214 }
1215
1216
1217 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1218
1219 Input:
1220 STMT - a statement to check.
1221 DEF - we support operations with two operands, one of which is constant.
1222 The other operand can be defined by a demotion operation, or by a
1223 previous statement in a sequence of over-promoted operations. In the
1224 later case DEF is used to replace that operand. (It is defined by a
1225 pattern statement we created for the previous statement in the
1226 sequence).
1227
1228 Input/output:
1229 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1230 NULL, it's the type of DEF.
1231 STMTS - additional pattern statements. If a pattern statement (type
1232 conversion) is created in this function, its original statement is
1233 added to STMTS.
1234
1235 Output:
1236 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1237 operands to use in the new pattern statement for STMT (will be created
1238 in vect_recog_over_widening_pattern ()).
1239 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1240 statements for STMT: the first one is a type promotion and the second
1241 one is the operation itself. We return the type promotion statement
1242 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1243 the second pattern statement. */
1244
1245 static bool
1246 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1247 tree *op0, tree *op1, gimple **new_def_stmt,
1248 vec<gimple *> *stmts)
1249 {
1250 enum tree_code code;
1251 tree const_oprnd, oprnd;
1252 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1253 gimple *def_stmt, *new_stmt;
1254 bool first = false;
1255 bool promotion;
1256
1257 *op0 = NULL_TREE;
1258 *op1 = NULL_TREE;
1259 *new_def_stmt = NULL;
1260
1261 if (!is_gimple_assign (stmt))
1262 return false;
1263
1264 code = gimple_assign_rhs_code (stmt);
1265 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1266 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1267 return false;
1268
1269 oprnd = gimple_assign_rhs1 (stmt);
1270 const_oprnd = gimple_assign_rhs2 (stmt);
1271 type = gimple_expr_type (stmt);
1272
1273 if (TREE_CODE (oprnd) != SSA_NAME
1274 || TREE_CODE (const_oprnd) != INTEGER_CST)
1275 return false;
1276
1277 /* If oprnd has other uses besides that in stmt we cannot mark it
1278 as being part of a pattern only. */
1279 if (!has_single_use (oprnd))
1280 return false;
1281
1282 /* If we are in the middle of a sequence, we use DEF from a previous
1283 statement. Otherwise, OPRND has to be a result of type promotion. */
1284 if (*new_type)
1285 {
1286 half_type = *new_type;
1287 oprnd = def;
1288 }
1289 else
1290 {
1291 first = true;
1292 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1293 &promotion)
1294 || !promotion
1295 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1296 return false;
1297 }
1298
1299 /* Can we perform the operation on a smaller type? */
1300 switch (code)
1301 {
1302 case BIT_IOR_EXPR:
1303 case BIT_XOR_EXPR:
1304 case BIT_AND_EXPR:
1305 if (!int_fits_type_p (const_oprnd, half_type))
1306 {
1307 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1308 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1309 return false;
1310
1311 interm_type = build_nonstandard_integer_type (
1312 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1313 if (!int_fits_type_p (const_oprnd, interm_type))
1314 return false;
1315 }
1316
1317 break;
1318
1319 case LSHIFT_EXPR:
1320 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1321 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1322 return false;
1323
1324 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1325 (e.g., if the original value was char, the shift amount is at most 8
1326 if we want to use short). */
1327 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1328 return false;
1329
1330 interm_type = build_nonstandard_integer_type (
1331 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1332
1333 if (!vect_supportable_shift (code, interm_type))
1334 return false;
1335
1336 break;
1337
1338 case RSHIFT_EXPR:
1339 if (vect_supportable_shift (code, half_type))
1340 break;
1341
1342 /* Try intermediate type - HALF_TYPE is not supported. */
1343 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1344 return false;
1345
1346 interm_type = build_nonstandard_integer_type (
1347 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1348
1349 if (!vect_supportable_shift (code, interm_type))
1350 return false;
1351
1352 break;
1353
1354 default:
1355 gcc_unreachable ();
1356 }
1357
1358 /* There are four possible cases:
1359 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1360 the first statement in the sequence)
1361 a. The original, HALF_TYPE, is not enough - we replace the promotion
1362 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1363 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1364 promotion.
1365 2. OPRND is defined by a pattern statement we created.
1366 a. Its type is not sufficient for the operation, we create a new stmt:
1367 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1368 this statement in NEW_DEF_STMT, and it is later put in
1369 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1370 b. OPRND is good to use in the new statement. */
1371 if (first)
1372 {
1373 if (interm_type)
1374 {
1375 /* Replace the original type conversion HALF_TYPE->TYPE with
1376 HALF_TYPE->INTERM_TYPE. */
1377 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1378 {
1379 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1380 /* Check if the already created pattern stmt is what we need. */
1381 if (!is_gimple_assign (new_stmt)
1382 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1383 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1384 return false;
1385
1386 stmts->safe_push (def_stmt);
1387 oprnd = gimple_assign_lhs (new_stmt);
1388 }
1389 else
1390 {
1391 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1392 oprnd = gimple_assign_rhs1 (def_stmt);
1393 new_oprnd = make_ssa_name (interm_type);
1394 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1395 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1396 stmts->safe_push (def_stmt);
1397 oprnd = new_oprnd;
1398 }
1399 }
1400 else
1401 {
1402 /* Retrieve the operand before the type promotion. */
1403 oprnd = gimple_assign_rhs1 (def_stmt);
1404 }
1405 }
1406 else
1407 {
1408 if (interm_type)
1409 {
1410 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1411 new_oprnd = make_ssa_name (interm_type);
1412 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1413 oprnd = new_oprnd;
1414 *new_def_stmt = new_stmt;
1415 }
1416
1417 /* Otherwise, OPRND is already set. */
1418 }
1419
1420 if (interm_type)
1421 *new_type = interm_type;
1422 else
1423 *new_type = half_type;
1424
1425 *op0 = oprnd;
1426 *op1 = fold_convert (*new_type, const_oprnd);
1427
1428 return true;
1429 }
1430
1431
1432 /* Try to find a statement or a sequence of statements that can be performed
1433 on a smaller type:
1434
1435 type x_t;
1436 TYPE x_T, res0_T, res1_T;
1437 loop:
1438 S1 x_t = *p;
1439 S2 x_T = (TYPE) x_t;
1440 S3 res0_T = op (x_T, C0);
1441 S4 res1_T = op (res0_T, C1);
1442 S5 ... = () res1_T; - type demotion
1443
1444 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1445 constants.
1446 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1447 be 'type' or some intermediate type. For now, we expect S5 to be a type
1448 demotion operation. We also check that S3 and S4 have only one use. */
1449
1450 static gimple *
1451 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1452 tree *type_in, tree *type_out)
1453 {
1454 gimple *stmt = stmts->pop ();
1455 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1456 *use_stmt = NULL;
1457 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1458 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1459 bool first;
1460 tree type = NULL;
1461
1462 first = true;
1463 while (1)
1464 {
1465 if (!vinfo_for_stmt (stmt)
1466 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1467 return NULL;
1468
1469 new_def_stmt = NULL;
1470 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1471 &op0, &op1, &new_def_stmt,
1472 stmts))
1473 {
1474 if (first)
1475 return NULL;
1476 else
1477 break;
1478 }
1479
1480 /* STMT can be performed on a smaller type. Check its uses. */
1481 use_stmt = vect_single_imm_use (stmt);
1482 if (!use_stmt || !is_gimple_assign (use_stmt))
1483 return NULL;
1484
1485 /* Create pattern statement for STMT. */
1486 vectype = get_vectype_for_scalar_type (new_type);
1487 if (!vectype)
1488 return NULL;
1489
1490 /* We want to collect all the statements for which we create pattern
1491 statetments, except for the case when the last statement in the
1492 sequence doesn't have a corresponding pattern statement. In such
1493 case we associate the last pattern statement with the last statement
1494 in the sequence. Therefore, we only add the original statement to
1495 the list if we know that it is not the last. */
1496 if (prev_stmt)
1497 stmts->safe_push (prev_stmt);
1498
1499 var = vect_recog_temp_ssa_var (new_type, NULL);
1500 pattern_stmt
1501 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1502 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1503 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1504
1505 if (dump_enabled_p ())
1506 {
1507 dump_printf_loc (MSG_NOTE, vect_location,
1508 "created pattern stmt: ");
1509 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1510 dump_printf (MSG_NOTE, "\n");
1511 }
1512
1513 type = gimple_expr_type (stmt);
1514 prev_stmt = stmt;
1515 stmt = use_stmt;
1516
1517 first = false;
1518 }
1519
1520 /* We got a sequence. We expect it to end with a type demotion operation.
1521 Otherwise, we quit (for now). There are three possible cases: the
1522 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1523 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1524 NEW_TYPE differs (we create a new conversion statement). */
1525 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1526 {
1527 use_lhs = gimple_assign_lhs (use_stmt);
1528 use_type = TREE_TYPE (use_lhs);
1529 /* Support only type demotion or signedess change. */
1530 if (!INTEGRAL_TYPE_P (use_type)
1531 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1532 return NULL;
1533
1534 /* Check that NEW_TYPE is not bigger than the conversion result. */
1535 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1536 return NULL;
1537
1538 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1539 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1540 {
1541 /* Create NEW_TYPE->USE_TYPE conversion. */
1542 new_oprnd = make_ssa_name (use_type);
1543 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1544 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1545
1546 *type_in = get_vectype_for_scalar_type (new_type);
1547 *type_out = get_vectype_for_scalar_type (use_type);
1548
1549 /* We created a pattern statement for the last statement in the
1550 sequence, so we don't need to associate it with the pattern
1551 statement created for PREV_STMT. Therefore, we add PREV_STMT
1552 to the list in order to mark it later in vect_pattern_recog_1. */
1553 if (prev_stmt)
1554 stmts->safe_push (prev_stmt);
1555 }
1556 else
1557 {
1558 if (prev_stmt)
1559 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1560 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1561
1562 *type_in = vectype;
1563 *type_out = NULL_TREE;
1564 }
1565
1566 stmts->safe_push (use_stmt);
1567 }
1568 else
1569 /* TODO: support general case, create a conversion to the correct type. */
1570 return NULL;
1571
1572 /* Pattern detected. */
1573 if (dump_enabled_p ())
1574 {
1575 dump_printf_loc (MSG_NOTE, vect_location,
1576 "vect_recog_over_widening_pattern: detected: ");
1577 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1578 dump_printf (MSG_NOTE, "\n");
1579 }
1580
1581 return pattern_stmt;
1582 }
1583
1584 /* Detect widening shift pattern:
1585
1586 type a_t;
1587 TYPE a_T, res_T;
1588
1589 S1 a_t = ;
1590 S2 a_T = (TYPE) a_t;
1591 S3 res_T = a_T << CONST;
1592
1593 where type 'TYPE' is at least double the size of type 'type'.
1594
1595 Also detect cases where the shift result is immediately converted
1596 to another type 'result_type' that is no larger in size than 'TYPE'.
1597 In those cases we perform a widen-shift that directly results in
1598 'result_type', to avoid a possible over-widening situation:
1599
1600 type a_t;
1601 TYPE a_T, res_T;
1602 result_type res_result;
1603
1604 S1 a_t = ;
1605 S2 a_T = (TYPE) a_t;
1606 S3 res_T = a_T << CONST;
1607 S4 res_result = (result_type) res_T;
1608 '--> res_result' = a_t w<< CONST;
1609
1610 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1611 create an additional pattern stmt for S2 to create a variable of an
1612 intermediate type, and perform widen-shift on the intermediate type:
1613
1614 type a_t;
1615 interm_type a_it;
1616 TYPE a_T, res_T, res_T';
1617
1618 S1 a_t = ;
1619 S2 a_T = (TYPE) a_t;
1620 '--> a_it = (interm_type) a_t;
1621 S3 res_T = a_T << CONST;
1622 '--> res_T' = a_it <<* CONST;
1623
1624 Input/Output:
1625
1626 * STMTS: Contains a stmt from which the pattern search begins.
1627 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1628 in STMTS. When an intermediate type is used and a pattern statement is
1629 created for S2, we also put S2 here (before S3).
1630
1631 Output:
1632
1633 * TYPE_IN: The type of the input arguments to the pattern.
1634
1635 * TYPE_OUT: The type of the output of this pattern.
1636
1637 * Return value: A new stmt that will be used to replace the sequence of
1638 stmts that constitute the pattern. In this case it will be:
1639 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1640
1641 static gimple *
1642 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1643 tree *type_in, tree *type_out)
1644 {
1645 gimple *last_stmt = stmts->pop ();
1646 gimple *def_stmt0;
1647 tree oprnd0, oprnd1;
1648 tree type, half_type0;
1649 gimple *pattern_stmt;
1650 tree vectype, vectype_out = NULL_TREE;
1651 tree var;
1652 enum tree_code dummy_code;
1653 int dummy_int;
1654 vec<tree> dummy_vec;
1655 gimple *use_stmt;
1656 bool promotion;
1657
1658 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1659 return NULL;
1660
1661 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1662 return NULL;
1663
1664 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1665 return NULL;
1666
1667 oprnd0 = gimple_assign_rhs1 (last_stmt);
1668 oprnd1 = gimple_assign_rhs2 (last_stmt);
1669 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1670 return NULL;
1671
1672 /* Check operand 0: it has to be defined by a type promotion. */
1673 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1674 &promotion)
1675 || !promotion)
1676 return NULL;
1677
1678 /* Check operand 1: has to be positive. We check that it fits the type
1679 in vect_handle_widen_op_by_const (). */
1680 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1681 return NULL;
1682
1683 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1684 type = gimple_expr_type (last_stmt);
1685
1686 /* Check for subsequent conversion to another type. */
1687 use_stmt = vect_single_imm_use (last_stmt);
1688 if (use_stmt && is_gimple_assign (use_stmt)
1689 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1690 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1691 {
1692 tree use_lhs = gimple_assign_lhs (use_stmt);
1693 tree use_type = TREE_TYPE (use_lhs);
1694
1695 if (INTEGRAL_TYPE_P (use_type)
1696 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1697 {
1698 last_stmt = use_stmt;
1699 type = use_type;
1700 }
1701 }
1702
1703 /* Check if this a widening operation. */
1704 gimple *wstmt = NULL;
1705 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1706 &oprnd0, &wstmt,
1707 type, &half_type0, def_stmt0))
1708 return NULL;
1709
1710 /* Pattern detected. */
1711 if (dump_enabled_p ())
1712 dump_printf_loc (MSG_NOTE, vect_location,
1713 "vect_recog_widen_shift_pattern: detected:\n");
1714
1715 /* Check target support. */
1716 vectype = get_vectype_for_scalar_type (half_type0);
1717 vectype_out = get_vectype_for_scalar_type (type);
1718
1719 if (!vectype
1720 || !vectype_out
1721 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1722 vectype_out, vectype,
1723 &dummy_code, &dummy_code,
1724 &dummy_int, &dummy_vec))
1725 return NULL;
1726
1727 *type_in = vectype;
1728 *type_out = vectype_out;
1729
1730 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1731 var = vect_recog_temp_ssa_var (type, NULL);
1732 pattern_stmt =
1733 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1734 if (wstmt)
1735 {
1736 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1737 new_pattern_def_seq (stmt_vinfo, wstmt);
1738 stmt_vec_info new_stmt_info
1739 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1740 set_vinfo_for_stmt (wstmt, new_stmt_info);
1741 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1742 }
1743
1744 if (dump_enabled_p ())
1745 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1746
1747 stmts->safe_push (last_stmt);
1748 return pattern_stmt;
1749 }
1750
1751 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1752
1753 type a_t, b_t, c_t;
1754
1755 S0 a_t = b_t r<< c_t;
1756
1757 Input/Output:
1758
1759 * STMTS: Contains a stmt from which the pattern search begins,
1760 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1761 with a sequence:
1762
1763 S1 d_t = -c_t;
1764 S2 e_t = d_t & (B - 1);
1765 S3 f_t = b_t << c_t;
1766 S4 g_t = b_t >> e_t;
1767 S0 a_t = f_t | g_t;
1768
1769 where B is element bitsize of type.
1770
1771 Output:
1772
1773 * TYPE_IN: The type of the input arguments to the pattern.
1774
1775 * TYPE_OUT: The type of the output of this pattern.
1776
1777 * Return value: A new stmt that will be used to replace the rotate
1778 S0 stmt. */
1779
1780 static gimple *
1781 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1782 {
1783 gimple *last_stmt = stmts->pop ();
1784 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1785 gimple *pattern_stmt, *def_stmt;
1786 enum tree_code rhs_code;
1787 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1788 vec_info *vinfo = stmt_vinfo->vinfo;
1789 enum vect_def_type dt;
1790 optab optab1, optab2;
1791 edge ext_def = NULL;
1792
1793 if (!is_gimple_assign (last_stmt))
1794 return NULL;
1795
1796 rhs_code = gimple_assign_rhs_code (last_stmt);
1797 switch (rhs_code)
1798 {
1799 case LROTATE_EXPR:
1800 case RROTATE_EXPR:
1801 break;
1802 default:
1803 return NULL;
1804 }
1805
1806 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1807 return NULL;
1808
1809 lhs = gimple_assign_lhs (last_stmt);
1810 oprnd0 = gimple_assign_rhs1 (last_stmt);
1811 type = TREE_TYPE (oprnd0);
1812 oprnd1 = gimple_assign_rhs2 (last_stmt);
1813 if (TREE_CODE (oprnd0) != SSA_NAME
1814 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1815 || !INTEGRAL_TYPE_P (type)
1816 || !TYPE_UNSIGNED (type))
1817 return NULL;
1818
1819 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1820 return NULL;
1821
1822 if (dt != vect_internal_def
1823 && dt != vect_constant_def
1824 && dt != vect_external_def)
1825 return NULL;
1826
1827 vectype = get_vectype_for_scalar_type (type);
1828 if (vectype == NULL_TREE)
1829 return NULL;
1830
1831 /* If vector/vector or vector/scalar rotate is supported by the target,
1832 don't do anything here. */
1833 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1834 if (optab1
1835 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1836 return NULL;
1837
1838 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1839 {
1840 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1841 if (optab2
1842 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1843 return NULL;
1844 }
1845
1846 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1847 don't do anything here either. */
1848 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1849 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1850 if (!optab1
1851 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1852 || !optab2
1853 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1854 {
1855 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1856 return NULL;
1857 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1858 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1859 if (!optab1
1860 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1861 || !optab2
1862 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1863 return NULL;
1864 }
1865
1866 *type_in = vectype;
1867 *type_out = vectype;
1868 if (*type_in == NULL_TREE)
1869 return NULL;
1870
1871 if (dt == vect_external_def
1872 && TREE_CODE (oprnd1) == SSA_NAME
1873 && is_a <loop_vec_info> (vinfo))
1874 {
1875 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1876 ext_def = loop_preheader_edge (loop);
1877 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1878 {
1879 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1880 if (bb == NULL
1881 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1882 ext_def = NULL;
1883 }
1884 }
1885
1886 def = NULL_TREE;
1887 if (TREE_CODE (oprnd1) == INTEGER_CST
1888 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1889 def = oprnd1;
1890 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1891 {
1892 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1893 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1894 && TYPE_PRECISION (TREE_TYPE (rhs1))
1895 == TYPE_PRECISION (type))
1896 def = rhs1;
1897 }
1898
1899 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1900 if (def == NULL_TREE)
1901 {
1902 def = vect_recog_temp_ssa_var (type, NULL);
1903 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1904 if (ext_def)
1905 {
1906 basic_block new_bb
1907 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1908 gcc_assert (!new_bb);
1909 }
1910 else
1911 append_pattern_def_seq (stmt_vinfo, def_stmt);
1912 }
1913 stype = TREE_TYPE (def);
1914
1915 if (TREE_CODE (def) == INTEGER_CST)
1916 {
1917 if (!tree_fits_uhwi_p (def)
1918 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1919 || integer_zerop (def))
1920 return NULL;
1921 def2 = build_int_cst (stype,
1922 GET_MODE_PRECISION (TYPE_MODE (type))
1923 - tree_to_uhwi (def));
1924 }
1925 else
1926 {
1927 tree vecstype = get_vectype_for_scalar_type (stype);
1928 stmt_vec_info def_stmt_vinfo;
1929
1930 if (vecstype == NULL_TREE)
1931 return NULL;
1932 def2 = vect_recog_temp_ssa_var (stype, NULL);
1933 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1934 if (ext_def)
1935 {
1936 basic_block new_bb
1937 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1938 gcc_assert (!new_bb);
1939 }
1940 else
1941 {
1942 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1943 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1944 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1945 append_pattern_def_seq (stmt_vinfo, def_stmt);
1946 }
1947
1948 def2 = vect_recog_temp_ssa_var (stype, NULL);
1949 tree mask
1950 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1951 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1952 gimple_assign_lhs (def_stmt), mask);
1953 if (ext_def)
1954 {
1955 basic_block new_bb
1956 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1957 gcc_assert (!new_bb);
1958 }
1959 else
1960 {
1961 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1962 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1963 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1964 append_pattern_def_seq (stmt_vinfo, def_stmt);
1965 }
1966 }
1967
1968 var1 = vect_recog_temp_ssa_var (type, NULL);
1969 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1970 ? LSHIFT_EXPR : RSHIFT_EXPR,
1971 oprnd0, def);
1972 append_pattern_def_seq (stmt_vinfo, def_stmt);
1973
1974 var2 = vect_recog_temp_ssa_var (type, NULL);
1975 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1976 ? RSHIFT_EXPR : LSHIFT_EXPR,
1977 oprnd0, def2);
1978 append_pattern_def_seq (stmt_vinfo, def_stmt);
1979
1980 /* Pattern detected. */
1981 if (dump_enabled_p ())
1982 dump_printf_loc (MSG_NOTE, vect_location,
1983 "vect_recog_rotate_pattern: detected:\n");
1984
1985 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1986 var = vect_recog_temp_ssa_var (type, NULL);
1987 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1988
1989 if (dump_enabled_p ())
1990 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1991
1992 stmts->safe_push (last_stmt);
1993 return pattern_stmt;
1994 }
1995
1996 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1997 vectorized:
1998
1999 type a_t;
2000 TYPE b_T, res_T;
2001
2002 S1 a_t = ;
2003 S2 b_T = ;
2004 S3 res_T = b_T op a_t;
2005
2006 where type 'TYPE' is a type with different size than 'type',
2007 and op is <<, >> or rotate.
2008
2009 Also detect cases:
2010
2011 type a_t;
2012 TYPE b_T, c_T, res_T;
2013
2014 S0 c_T = ;
2015 S1 a_t = (type) c_T;
2016 S2 b_T = ;
2017 S3 res_T = b_T op a_t;
2018
2019 Input/Output:
2020
2021 * STMTS: Contains a stmt from which the pattern search begins,
2022 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2023 with a shift/rotate which has same type on both operands, in the
2024 second case just b_T op c_T, in the first case with added cast
2025 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2026
2027 Output:
2028
2029 * TYPE_IN: The type of the input arguments to the pattern.
2030
2031 * TYPE_OUT: The type of the output of this pattern.
2032
2033 * Return value: A new stmt that will be used to replace the shift/rotate
2034 S3 stmt. */
2035
2036 static gimple *
2037 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2038 tree *type_in, tree *type_out)
2039 {
2040 gimple *last_stmt = stmts->pop ();
2041 tree oprnd0, oprnd1, lhs, var;
2042 gimple *pattern_stmt, *def_stmt;
2043 enum tree_code rhs_code;
2044 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2045 vec_info *vinfo = stmt_vinfo->vinfo;
2046 enum vect_def_type dt;
2047
2048 if (!is_gimple_assign (last_stmt))
2049 return NULL;
2050
2051 rhs_code = gimple_assign_rhs_code (last_stmt);
2052 switch (rhs_code)
2053 {
2054 case LSHIFT_EXPR:
2055 case RSHIFT_EXPR:
2056 case LROTATE_EXPR:
2057 case RROTATE_EXPR:
2058 break;
2059 default:
2060 return NULL;
2061 }
2062
2063 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2064 return NULL;
2065
2066 lhs = gimple_assign_lhs (last_stmt);
2067 oprnd0 = gimple_assign_rhs1 (last_stmt);
2068 oprnd1 = gimple_assign_rhs2 (last_stmt);
2069 if (TREE_CODE (oprnd0) != SSA_NAME
2070 || TREE_CODE (oprnd1) != SSA_NAME
2071 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2072 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2073 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2074 || TYPE_PRECISION (TREE_TYPE (lhs))
2075 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2076 return NULL;
2077
2078 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2079 return NULL;
2080
2081 if (dt != vect_internal_def)
2082 return NULL;
2083
2084 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2085 *type_out = *type_in;
2086 if (*type_in == NULL_TREE)
2087 return NULL;
2088
2089 tree def = NULL_TREE;
2090 if (gimple_assign_cast_p (def_stmt))
2091 {
2092 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2093 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2094 && TYPE_PRECISION (TREE_TYPE (rhs1))
2095 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2096 def = rhs1;
2097 }
2098
2099 if (def == NULL_TREE)
2100 {
2101 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2102 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2103 new_pattern_def_seq (stmt_vinfo, def_stmt);
2104 }
2105
2106 /* Pattern detected. */
2107 if (dump_enabled_p ())
2108 dump_printf_loc (MSG_NOTE, vect_location,
2109 "vect_recog_vector_vector_shift_pattern: detected:\n");
2110
2111 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2112 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2113 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2114
2115 if (dump_enabled_p ())
2116 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2117
2118 stmts->safe_push (last_stmt);
2119 return pattern_stmt;
2120 }
2121
2122 /* Detect multiplication by constant which are postive or negatives of power 2,
2123 and convert them to shift patterns.
2124
2125 Mult with constants that are postive power of two.
2126 type a_t;
2127 type b_t
2128 S1: b_t = a_t * n
2129
2130 or
2131
2132 Mult with constants that are negative power of two.
2133 S2: b_t = a_t * -n
2134
2135 Input/Output:
2136
2137 STMTS: Contains a stmt from which the pattern search begins,
2138 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2139 constant operand is a power of 2.
2140 type a_t, b_t
2141 S1': b_t = a_t << log2 (n)
2142
2143 Convert the mult operation to LSHIFT and followed by a NEGATE
2144 if constant operand is a negative power of 2.
2145 type a_t, b_t, res_T;
2146 S2': b_t = a_t << log2 (n)
2147 S3': res_T = - (b_t)
2148
2149 Output:
2150
2151 * TYPE_IN: The type of the input arguments to the pattern.
2152
2153 * TYPE_OUT: The type of the output of this pattern.
2154
2155 * Return value: A new stmt that will be used to replace the multiplication
2156 S1 or S2 stmt. */
2157
2158 static gimple *
2159 vect_recog_mult_pattern (vec<gimple *> *stmts,
2160 tree *type_in, tree *type_out)
2161 {
2162 gimple *last_stmt = stmts->pop ();
2163 tree oprnd0, oprnd1, vectype, itype;
2164 gimple *pattern_stmt, *def_stmt;
2165 optab optab;
2166 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2167 int power2_val, power2_neg_val;
2168 tree shift;
2169
2170 if (!is_gimple_assign (last_stmt))
2171 return NULL;
2172
2173 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2174 return NULL;
2175
2176 oprnd0 = gimple_assign_rhs1 (last_stmt);
2177 oprnd1 = gimple_assign_rhs2 (last_stmt);
2178 itype = TREE_TYPE (oprnd0);
2179
2180 if (TREE_CODE (oprnd0) != SSA_NAME
2181 || TREE_CODE (oprnd1) != INTEGER_CST
2182 || !INTEGRAL_TYPE_P (itype)
2183 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2184 return NULL;
2185
2186 vectype = get_vectype_for_scalar_type (itype);
2187 if (vectype == NULL_TREE)
2188 return NULL;
2189
2190 /* If the target can handle vectorized multiplication natively,
2191 don't attempt to optimize this. */
2192 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2193 if (optab != unknown_optab)
2194 {
2195 machine_mode vec_mode = TYPE_MODE (vectype);
2196 int icode = (int) optab_handler (optab, vec_mode);
2197 if (icode != CODE_FOR_nothing)
2198 return NULL;
2199 }
2200
2201 /* If target cannot handle vector left shift then we cannot
2202 optimize and bail out. */
2203 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2204 if (!optab
2205 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2206 return NULL;
2207
2208 power2_val = wi::exact_log2 (oprnd1);
2209 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2210
2211 /* Handle constant operands that are postive or negative powers of 2. */
2212 if (power2_val != -1)
2213 {
2214 shift = build_int_cst (itype, power2_val);
2215 pattern_stmt
2216 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2217 LSHIFT_EXPR, oprnd0, shift);
2218 }
2219 else if (power2_neg_val != -1)
2220 {
2221 /* If the target cannot handle vector NEGATE then we cannot
2222 do the optimization. */
2223 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2224 if (!optab
2225 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2226 return NULL;
2227
2228 shift = build_int_cst (itype, power2_neg_val);
2229 def_stmt
2230 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2231 LSHIFT_EXPR, oprnd0, shift);
2232 new_pattern_def_seq (stmt_vinfo, def_stmt);
2233 pattern_stmt
2234 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2235 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2236 }
2237 else
2238 return NULL;
2239
2240 /* Pattern detected. */
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_NOTE, vect_location,
2243 "vect_recog_mult_pattern: detected:\n");
2244
2245 if (dump_enabled_p ())
2246 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2247 pattern_stmt,0);
2248
2249 stmts->safe_push (last_stmt);
2250 *type_in = vectype;
2251 *type_out = vectype;
2252
2253 return pattern_stmt;
2254 }
2255
2256 /* Detect a signed division by a constant that wouldn't be
2257 otherwise vectorized:
2258
2259 type a_t, b_t;
2260
2261 S1 a_t = b_t / N;
2262
2263 where type 'type' is an integral type and N is a constant.
2264
2265 Similarly handle modulo by a constant:
2266
2267 S4 a_t = b_t % N;
2268
2269 Input/Output:
2270
2271 * STMTS: Contains a stmt from which the pattern search begins,
2272 i.e. the division stmt. S1 is replaced by if N is a power
2273 of two constant and type is signed:
2274 S3 y_t = b_t < 0 ? N - 1 : 0;
2275 S2 x_t = b_t + y_t;
2276 S1' a_t = x_t >> log2 (N);
2277
2278 S4 is replaced if N is a power of two constant and
2279 type is signed by (where *_T temporaries have unsigned type):
2280 S9 y_T = b_t < 0 ? -1U : 0U;
2281 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2282 S7 z_t = (type) z_T;
2283 S6 w_t = b_t + z_t;
2284 S5 x_t = w_t & (N - 1);
2285 S4' a_t = x_t - z_t;
2286
2287 Output:
2288
2289 * TYPE_IN: The type of the input arguments to the pattern.
2290
2291 * TYPE_OUT: The type of the output of this pattern.
2292
2293 * Return value: A new stmt that will be used to replace the division
2294 S1 or modulo S4 stmt. */
2295
2296 static gimple *
2297 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2298 tree *type_in, tree *type_out)
2299 {
2300 gimple *last_stmt = stmts->pop ();
2301 tree oprnd0, oprnd1, vectype, itype, cond;
2302 gimple *pattern_stmt, *def_stmt;
2303 enum tree_code rhs_code;
2304 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2305 vec_info *vinfo = stmt_vinfo->vinfo;
2306 optab optab;
2307 tree q;
2308 int dummy_int, prec;
2309 stmt_vec_info def_stmt_vinfo;
2310
2311 if (!is_gimple_assign (last_stmt))
2312 return NULL;
2313
2314 rhs_code = gimple_assign_rhs_code (last_stmt);
2315 switch (rhs_code)
2316 {
2317 case TRUNC_DIV_EXPR:
2318 case TRUNC_MOD_EXPR:
2319 break;
2320 default:
2321 return NULL;
2322 }
2323
2324 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2325 return NULL;
2326
2327 oprnd0 = gimple_assign_rhs1 (last_stmt);
2328 oprnd1 = gimple_assign_rhs2 (last_stmt);
2329 itype = TREE_TYPE (oprnd0);
2330 if (TREE_CODE (oprnd0) != SSA_NAME
2331 || TREE_CODE (oprnd1) != INTEGER_CST
2332 || TREE_CODE (itype) != INTEGER_TYPE
2333 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2334 return NULL;
2335
2336 vectype = get_vectype_for_scalar_type (itype);
2337 if (vectype == NULL_TREE)
2338 return NULL;
2339
2340 /* If the target can handle vectorized division or modulo natively,
2341 don't attempt to optimize this. */
2342 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2343 if (optab != unknown_optab)
2344 {
2345 machine_mode vec_mode = TYPE_MODE (vectype);
2346 int icode = (int) optab_handler (optab, vec_mode);
2347 if (icode != CODE_FOR_nothing)
2348 return NULL;
2349 }
2350
2351 prec = TYPE_PRECISION (itype);
2352 if (integer_pow2p (oprnd1))
2353 {
2354 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2355 return NULL;
2356
2357 /* Pattern detected. */
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_NOTE, vect_location,
2360 "vect_recog_divmod_pattern: detected:\n");
2361
2362 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2363 build_int_cst (itype, 0));
2364 if (rhs_code == TRUNC_DIV_EXPR)
2365 {
2366 tree var = vect_recog_temp_ssa_var (itype, NULL);
2367 tree shift;
2368 def_stmt
2369 = gimple_build_assign (var, COND_EXPR, cond,
2370 fold_build2 (MINUS_EXPR, itype, oprnd1,
2371 build_int_cst (itype, 1)),
2372 build_int_cst (itype, 0));
2373 new_pattern_def_seq (stmt_vinfo, def_stmt);
2374 var = vect_recog_temp_ssa_var (itype, NULL);
2375 def_stmt
2376 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2377 gimple_assign_lhs (def_stmt));
2378 append_pattern_def_seq (stmt_vinfo, def_stmt);
2379
2380 shift = build_int_cst (itype, tree_log2 (oprnd1));
2381 pattern_stmt
2382 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2383 RSHIFT_EXPR, var, shift);
2384 }
2385 else
2386 {
2387 tree signmask;
2388 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2389 if (compare_tree_int (oprnd1, 2) == 0)
2390 {
2391 signmask = vect_recog_temp_ssa_var (itype, NULL);
2392 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2393 build_int_cst (itype, 1),
2394 build_int_cst (itype, 0));
2395 append_pattern_def_seq (stmt_vinfo, def_stmt);
2396 }
2397 else
2398 {
2399 tree utype
2400 = build_nonstandard_integer_type (prec, 1);
2401 tree vecutype = get_vectype_for_scalar_type (utype);
2402 tree shift
2403 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2404 - tree_log2 (oprnd1));
2405 tree var = vect_recog_temp_ssa_var (utype, NULL);
2406
2407 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2408 build_int_cst (utype, -1),
2409 build_int_cst (utype, 0));
2410 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2411 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2412 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2413 append_pattern_def_seq (stmt_vinfo, def_stmt);
2414 var = vect_recog_temp_ssa_var (utype, NULL);
2415 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2416 gimple_assign_lhs (def_stmt),
2417 shift);
2418 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2419 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2420 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2421 append_pattern_def_seq (stmt_vinfo, def_stmt);
2422 signmask = vect_recog_temp_ssa_var (itype, NULL);
2423 def_stmt
2424 = gimple_build_assign (signmask, NOP_EXPR, var);
2425 append_pattern_def_seq (stmt_vinfo, def_stmt);
2426 }
2427 def_stmt
2428 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2429 PLUS_EXPR, oprnd0, signmask);
2430 append_pattern_def_seq (stmt_vinfo, def_stmt);
2431 def_stmt
2432 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2433 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2434 fold_build2 (MINUS_EXPR, itype, oprnd1,
2435 build_int_cst (itype, 1)));
2436 append_pattern_def_seq (stmt_vinfo, def_stmt);
2437
2438 pattern_stmt
2439 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2440 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2441 signmask);
2442 }
2443
2444 if (dump_enabled_p ())
2445 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2446 0);
2447
2448 stmts->safe_push (last_stmt);
2449
2450 *type_in = vectype;
2451 *type_out = vectype;
2452 return pattern_stmt;
2453 }
2454
2455 if (prec > HOST_BITS_PER_WIDE_INT
2456 || integer_zerop (oprnd1))
2457 return NULL;
2458
2459 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2460 return NULL;
2461
2462 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2463
2464 if (TYPE_UNSIGNED (itype))
2465 {
2466 unsigned HOST_WIDE_INT mh, ml;
2467 int pre_shift, post_shift;
2468 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2469 & GET_MODE_MASK (TYPE_MODE (itype)));
2470 tree t1, t2, t3, t4;
2471
2472 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2473 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2474 return NULL;
2475
2476 /* Find a suitable multiplier and right shift count
2477 instead of multiplying with D. */
2478 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2479
2480 /* If the suggested multiplier is more than SIZE bits, we can do better
2481 for even divisors, using an initial right shift. */
2482 if (mh != 0 && (d & 1) == 0)
2483 {
2484 pre_shift = floor_log2 (d & -d);
2485 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2486 &ml, &post_shift, &dummy_int);
2487 gcc_assert (!mh);
2488 }
2489 else
2490 pre_shift = 0;
2491
2492 if (mh != 0)
2493 {
2494 if (post_shift - 1 >= prec)
2495 return NULL;
2496
2497 /* t1 = oprnd0 h* ml;
2498 t2 = oprnd0 - t1;
2499 t3 = t2 >> 1;
2500 t4 = t1 + t3;
2501 q = t4 >> (post_shift - 1); */
2502 t1 = vect_recog_temp_ssa_var (itype, NULL);
2503 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2504 build_int_cst (itype, ml));
2505 append_pattern_def_seq (stmt_vinfo, def_stmt);
2506
2507 t2 = vect_recog_temp_ssa_var (itype, NULL);
2508 def_stmt
2509 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2510 append_pattern_def_seq (stmt_vinfo, def_stmt);
2511
2512 t3 = vect_recog_temp_ssa_var (itype, NULL);
2513 def_stmt
2514 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2515 append_pattern_def_seq (stmt_vinfo, def_stmt);
2516
2517 t4 = vect_recog_temp_ssa_var (itype, NULL);
2518 def_stmt
2519 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2520
2521 if (post_shift != 1)
2522 {
2523 append_pattern_def_seq (stmt_vinfo, def_stmt);
2524
2525 q = vect_recog_temp_ssa_var (itype, NULL);
2526 pattern_stmt
2527 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2528 build_int_cst (itype, post_shift - 1));
2529 }
2530 else
2531 {
2532 q = t4;
2533 pattern_stmt = def_stmt;
2534 }
2535 }
2536 else
2537 {
2538 if (pre_shift >= prec || post_shift >= prec)
2539 return NULL;
2540
2541 /* t1 = oprnd0 >> pre_shift;
2542 t2 = t1 h* ml;
2543 q = t2 >> post_shift; */
2544 if (pre_shift)
2545 {
2546 t1 = vect_recog_temp_ssa_var (itype, NULL);
2547 def_stmt
2548 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2549 build_int_cst (NULL, pre_shift));
2550 append_pattern_def_seq (stmt_vinfo, def_stmt);
2551 }
2552 else
2553 t1 = oprnd0;
2554
2555 t2 = vect_recog_temp_ssa_var (itype, NULL);
2556 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2557 build_int_cst (itype, ml));
2558
2559 if (post_shift)
2560 {
2561 append_pattern_def_seq (stmt_vinfo, def_stmt);
2562
2563 q = vect_recog_temp_ssa_var (itype, NULL);
2564 def_stmt
2565 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2566 build_int_cst (itype, post_shift));
2567 }
2568 else
2569 q = t2;
2570
2571 pattern_stmt = def_stmt;
2572 }
2573 }
2574 else
2575 {
2576 unsigned HOST_WIDE_INT ml;
2577 int post_shift;
2578 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2579 unsigned HOST_WIDE_INT abs_d;
2580 bool add = false;
2581 tree t1, t2, t3, t4;
2582
2583 /* Give up for -1. */
2584 if (d == -1)
2585 return NULL;
2586
2587 /* Since d might be INT_MIN, we have to cast to
2588 unsigned HOST_WIDE_INT before negating to avoid
2589 undefined signed overflow. */
2590 abs_d = (d >= 0
2591 ? (unsigned HOST_WIDE_INT) d
2592 : - (unsigned HOST_WIDE_INT) d);
2593
2594 /* n rem d = n rem -d */
2595 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2596 {
2597 d = abs_d;
2598 oprnd1 = build_int_cst (itype, abs_d);
2599 }
2600 else if (HOST_BITS_PER_WIDE_INT >= prec
2601 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2602 /* This case is not handled correctly below. */
2603 return NULL;
2604
2605 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2606 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2607 {
2608 add = true;
2609 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2610 }
2611 if (post_shift >= prec)
2612 return NULL;
2613
2614 /* t1 = oprnd0 h* ml; */
2615 t1 = vect_recog_temp_ssa_var (itype, NULL);
2616 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2617 build_int_cst (itype, ml));
2618
2619 if (add)
2620 {
2621 /* t2 = t1 + oprnd0; */
2622 append_pattern_def_seq (stmt_vinfo, def_stmt);
2623 t2 = vect_recog_temp_ssa_var (itype, NULL);
2624 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2625 }
2626 else
2627 t2 = t1;
2628
2629 if (post_shift)
2630 {
2631 /* t3 = t2 >> post_shift; */
2632 append_pattern_def_seq (stmt_vinfo, def_stmt);
2633 t3 = vect_recog_temp_ssa_var (itype, NULL);
2634 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2635 build_int_cst (itype, post_shift));
2636 }
2637 else
2638 t3 = t2;
2639
2640 wide_int oprnd0_min, oprnd0_max;
2641 int msb = 1;
2642 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2643 {
2644 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2645 msb = 0;
2646 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2647 msb = -1;
2648 }
2649
2650 if (msb == 0 && d >= 0)
2651 {
2652 /* q = t3; */
2653 q = t3;
2654 pattern_stmt = def_stmt;
2655 }
2656 else
2657 {
2658 /* t4 = oprnd0 >> (prec - 1);
2659 or if we know from VRP that oprnd0 >= 0
2660 t4 = 0;
2661 or if we know from VRP that oprnd0 < 0
2662 t4 = -1; */
2663 append_pattern_def_seq (stmt_vinfo, def_stmt);
2664 t4 = vect_recog_temp_ssa_var (itype, NULL);
2665 if (msb != 1)
2666 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2667 build_int_cst (itype, msb));
2668 else
2669 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2670 build_int_cst (itype, prec - 1));
2671 append_pattern_def_seq (stmt_vinfo, def_stmt);
2672
2673 /* q = t3 - t4; or q = t4 - t3; */
2674 q = vect_recog_temp_ssa_var (itype, NULL);
2675 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2676 d < 0 ? t3 : t4);
2677 }
2678 }
2679
2680 if (rhs_code == TRUNC_MOD_EXPR)
2681 {
2682 tree r, t1;
2683
2684 /* We divided. Now finish by:
2685 t1 = q * oprnd1;
2686 r = oprnd0 - t1; */
2687 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2688
2689 t1 = vect_recog_temp_ssa_var (itype, NULL);
2690 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2691 append_pattern_def_seq (stmt_vinfo, def_stmt);
2692
2693 r = vect_recog_temp_ssa_var (itype, NULL);
2694 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2695 }
2696
2697 /* Pattern detected. */
2698 if (dump_enabled_p ())
2699 {
2700 dump_printf_loc (MSG_NOTE, vect_location,
2701 "vect_recog_divmod_pattern: detected: ");
2702 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2703 dump_printf (MSG_NOTE, "\n");
2704 }
2705
2706 stmts->safe_push (last_stmt);
2707
2708 *type_in = vectype;
2709 *type_out = vectype;
2710 return pattern_stmt;
2711 }
2712
2713 /* Function vect_recog_mixed_size_cond_pattern
2714
2715 Try to find the following pattern:
2716
2717 type x_t, y_t;
2718 TYPE a_T, b_T, c_T;
2719 loop:
2720 S1 a_T = x_t CMP y_t ? b_T : c_T;
2721
2722 where type 'TYPE' is an integral type which has different size
2723 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2724 than 'type', the constants need to fit into an integer type
2725 with the same width as 'type') or results of conversion from 'type'.
2726
2727 Input:
2728
2729 * LAST_STMT: A stmt from which the pattern search begins.
2730
2731 Output:
2732
2733 * TYPE_IN: The type of the input arguments to the pattern.
2734
2735 * TYPE_OUT: The type of the output of this pattern.
2736
2737 * Return value: A new stmt that will be used to replace the pattern.
2738 Additionally a def_stmt is added.
2739
2740 a_it = x_t CMP y_t ? b_it : c_it;
2741 a_T = (TYPE) a_it; */
2742
2743 static gimple *
2744 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2745 tree *type_out)
2746 {
2747 gimple *last_stmt = (*stmts)[0];
2748 tree cond_expr, then_clause, else_clause;
2749 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2750 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2751 gimple *pattern_stmt, *def_stmt;
2752 vec_info *vinfo = stmt_vinfo->vinfo;
2753 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2754 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2755 bool promotion;
2756 tree comp_scalar_type;
2757
2758 if (!is_gimple_assign (last_stmt)
2759 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2760 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2761 return NULL;
2762
2763 cond_expr = gimple_assign_rhs1 (last_stmt);
2764 then_clause = gimple_assign_rhs2 (last_stmt);
2765 else_clause = gimple_assign_rhs3 (last_stmt);
2766
2767 if (!COMPARISON_CLASS_P (cond_expr))
2768 return NULL;
2769
2770 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2771 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2772 if (comp_vectype == NULL_TREE)
2773 return NULL;
2774
2775 type = gimple_expr_type (last_stmt);
2776 if (types_compatible_p (type, comp_scalar_type)
2777 || ((TREE_CODE (then_clause) != INTEGER_CST
2778 || TREE_CODE (else_clause) != INTEGER_CST)
2779 && !INTEGRAL_TYPE_P (comp_scalar_type))
2780 || !INTEGRAL_TYPE_P (type))
2781 return NULL;
2782
2783 if ((TREE_CODE (then_clause) != INTEGER_CST
2784 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2785 &def_stmt0, &promotion))
2786 || (TREE_CODE (else_clause) != INTEGER_CST
2787 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2788 &def_stmt1, &promotion)))
2789 return NULL;
2790
2791 if (orig_type0 && orig_type1
2792 && !types_compatible_p (orig_type0, orig_type1))
2793 return NULL;
2794
2795 if (orig_type0)
2796 {
2797 if (!types_compatible_p (orig_type0, comp_scalar_type))
2798 return NULL;
2799 then_clause = gimple_assign_rhs1 (def_stmt0);
2800 itype = orig_type0;
2801 }
2802
2803 if (orig_type1)
2804 {
2805 if (!types_compatible_p (orig_type1, comp_scalar_type))
2806 return NULL;
2807 else_clause = gimple_assign_rhs1 (def_stmt1);
2808 itype = orig_type1;
2809 }
2810
2811
2812 HOST_WIDE_INT cmp_mode_size
2813 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2814
2815 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
2816 return NULL;
2817
2818 vectype = get_vectype_for_scalar_type (type);
2819 if (vectype == NULL_TREE)
2820 return NULL;
2821
2822 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2823 return NULL;
2824
2825 if (itype == NULL_TREE)
2826 itype = build_nonstandard_integer_type (cmp_mode_size,
2827 TYPE_UNSIGNED (type));
2828
2829 if (itype == NULL_TREE
2830 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
2831 return NULL;
2832
2833 vecitype = get_vectype_for_scalar_type (itype);
2834 if (vecitype == NULL_TREE)
2835 return NULL;
2836
2837 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2838 return NULL;
2839
2840 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
2841 {
2842 if ((TREE_CODE (then_clause) == INTEGER_CST
2843 && !int_fits_type_p (then_clause, itype))
2844 || (TREE_CODE (else_clause) == INTEGER_CST
2845 && !int_fits_type_p (else_clause, itype)))
2846 return NULL;
2847 }
2848
2849 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2850 COND_EXPR, unshare_expr (cond_expr),
2851 fold_convert (itype, then_clause),
2852 fold_convert (itype, else_clause));
2853 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2854 NOP_EXPR, gimple_assign_lhs (def_stmt));
2855
2856 new_pattern_def_seq (stmt_vinfo, def_stmt);
2857 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
2858 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2859 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2860 *type_in = vecitype;
2861 *type_out = vectype;
2862
2863 if (dump_enabled_p ())
2864 dump_printf_loc (MSG_NOTE, vect_location,
2865 "vect_recog_mixed_size_cond_pattern: detected:\n");
2866
2867 return pattern_stmt;
2868 }
2869
2870
2871 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2872 true if bool VAR can be optimized that way. */
2873
2874 static bool
2875 check_bool_pattern (tree var, vec_info *vinfo)
2876 {
2877 gimple *def_stmt;
2878 enum vect_def_type dt;
2879 tree rhs1;
2880 enum tree_code rhs_code;
2881
2882 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
2883 return false;
2884
2885 if (dt != vect_internal_def)
2886 return false;
2887
2888 if (!is_gimple_assign (def_stmt))
2889 return false;
2890
2891 if (!has_single_use (var))
2892 return false;
2893
2894 rhs1 = gimple_assign_rhs1 (def_stmt);
2895 rhs_code = gimple_assign_rhs_code (def_stmt);
2896 switch (rhs_code)
2897 {
2898 case SSA_NAME:
2899 return check_bool_pattern (rhs1, vinfo);
2900
2901 CASE_CONVERT:
2902 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2903 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2904 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2905 return false;
2906 return check_bool_pattern (rhs1, vinfo);
2907
2908 case BIT_NOT_EXPR:
2909 return check_bool_pattern (rhs1, vinfo);
2910
2911 case BIT_AND_EXPR:
2912 case BIT_IOR_EXPR:
2913 case BIT_XOR_EXPR:
2914 if (!check_bool_pattern (rhs1, vinfo))
2915 return false;
2916 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
2917
2918 default:
2919 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2920 {
2921 tree vecitype, comp_vectype;
2922
2923 /* If the comparison can throw, then is_gimple_condexpr will be
2924 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2925 if (stmt_could_throw_p (def_stmt))
2926 return false;
2927
2928 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2929 if (comp_vectype == NULL_TREE)
2930 return false;
2931
2932 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2933 {
2934 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2935 tree itype
2936 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2937 vecitype = get_vectype_for_scalar_type (itype);
2938 if (vecitype == NULL_TREE)
2939 return false;
2940 }
2941 else
2942 vecitype = comp_vectype;
2943 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2944 }
2945 return false;
2946 }
2947 }
2948
2949
2950 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2951 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2952 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2953
2954 static tree
2955 adjust_bool_pattern_cast (tree type, tree var)
2956 {
2957 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2958 gimple *cast_stmt, *pattern_stmt;
2959
2960 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2961 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2962 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2963 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2964 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2965 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2966 return gimple_assign_lhs (cast_stmt);
2967 }
2968
2969
2970 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2971 recursively. VAR is an SSA_NAME that should be transformed from bool
2972 to a wider integer type, OUT_TYPE is the desired final integer type of
2973 the whole pattern, TRUEVAL should be NULL unless optimizing
2974 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2975 in the then_clause, STMTS is where statements with added pattern stmts
2976 should be pushed to. */
2977
2978 static tree
2979 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2980 vec<gimple *> *stmts)
2981 {
2982 gimple *stmt = SSA_NAME_DEF_STMT (var);
2983 enum tree_code rhs_code, def_rhs_code;
2984 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2985 location_t loc;
2986 gimple *pattern_stmt, *def_stmt;
2987
2988 rhs1 = gimple_assign_rhs1 (stmt);
2989 rhs2 = gimple_assign_rhs2 (stmt);
2990 rhs_code = gimple_assign_rhs_code (stmt);
2991 loc = gimple_location (stmt);
2992 switch (rhs_code)
2993 {
2994 case SSA_NAME:
2995 CASE_CONVERT:
2996 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2997 itype = TREE_TYPE (irhs1);
2998 pattern_stmt
2999 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3000 SSA_NAME, irhs1);
3001 break;
3002
3003 case BIT_NOT_EXPR:
3004 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3005 itype = TREE_TYPE (irhs1);
3006 pattern_stmt
3007 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3008 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3009 break;
3010
3011 case BIT_AND_EXPR:
3012 /* Try to optimize x = y & (a < b ? 1 : 0); into
3013 x = (a < b ? y : 0);
3014
3015 E.g. for:
3016 bool a_b, b_b, c_b;
3017 TYPE d_T;
3018
3019 S1 a_b = x1 CMP1 y1;
3020 S2 b_b = x2 CMP2 y2;
3021 S3 c_b = a_b & b_b;
3022 S4 d_T = (TYPE) c_b;
3023
3024 we would normally emit:
3025
3026 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3027 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3028 S3' c_T = a_T & b_T;
3029 S4' d_T = c_T;
3030
3031 but we can save one stmt by using the
3032 result of one of the COND_EXPRs in the other COND_EXPR and leave
3033 BIT_AND_EXPR stmt out:
3034
3035 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3036 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3037 S4' f_T = c_T;
3038
3039 At least when VEC_COND_EXPR is implemented using masks
3040 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3041 computes the comparison masks and ands it, in one case with
3042 all ones vector, in the other case with a vector register.
3043 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3044 often more expensive. */
3045 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3046 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3047 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3048 {
3049 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3050 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3051 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3052 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3053 {
3054 gimple *tstmt;
3055 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3056 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3057 tstmt = stmts->pop ();
3058 gcc_assert (tstmt == def_stmt);
3059 stmts->quick_push (stmt);
3060 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3061 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3062 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3063 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3064 return irhs2;
3065 }
3066 else
3067 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3068 goto and_ior_xor;
3069 }
3070 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3071 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3072 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3073 {
3074 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3075 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3076 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3077 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3078 {
3079 gimple *tstmt;
3080 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3081 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3082 tstmt = stmts->pop ();
3083 gcc_assert (tstmt == def_stmt);
3084 stmts->quick_push (stmt);
3085 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3086 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3087 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3088 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3089 return irhs1;
3090 }
3091 else
3092 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3093 goto and_ior_xor;
3094 }
3095 /* FALLTHRU */
3096 case BIT_IOR_EXPR:
3097 case BIT_XOR_EXPR:
3098 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3099 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3100 and_ior_xor:
3101 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3102 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3103 {
3104 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3105 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3106 int out_prec = TYPE_PRECISION (out_type);
3107 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3108 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3109 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3110 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3111 else
3112 {
3113 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3114 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3115 }
3116 }
3117 itype = TREE_TYPE (irhs1);
3118 pattern_stmt
3119 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3120 rhs_code, irhs1, irhs2);
3121 break;
3122
3123 default:
3124 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3125 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3126 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3127 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3128 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3129 {
3130 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3131 itype
3132 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3133 }
3134 else
3135 itype = TREE_TYPE (rhs1);
3136 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3137 if (trueval == NULL_TREE)
3138 trueval = build_int_cst (itype, 1);
3139 else
3140 gcc_checking_assert (useless_type_conversion_p (itype,
3141 TREE_TYPE (trueval)));
3142 pattern_stmt
3143 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3144 COND_EXPR, cond_expr, trueval,
3145 build_int_cst (itype, 0));
3146 break;
3147 }
3148
3149 stmts->safe_push (stmt);
3150 gimple_set_location (pattern_stmt, loc);
3151 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3152 return gimple_assign_lhs (pattern_stmt);
3153 }
3154
3155
3156 /* Function vect_recog_bool_pattern
3157
3158 Try to find pattern like following:
3159
3160 bool a_b, b_b, c_b, d_b, e_b;
3161 TYPE f_T;
3162 loop:
3163 S1 a_b = x1 CMP1 y1;
3164 S2 b_b = x2 CMP2 y2;
3165 S3 c_b = a_b & b_b;
3166 S4 d_b = x3 CMP3 y3;
3167 S5 e_b = c_b | d_b;
3168 S6 f_T = (TYPE) e_b;
3169
3170 where type 'TYPE' is an integral type. Or a similar pattern
3171 ending in
3172
3173 S6 f_Y = e_b ? r_Y : s_Y;
3174
3175 as results from if-conversion of a complex condition.
3176
3177 Input:
3178
3179 * LAST_STMT: A stmt at the end from which the pattern
3180 search begins, i.e. cast of a bool to
3181 an integer type.
3182
3183 Output:
3184
3185 * TYPE_IN: The type of the input arguments to the pattern.
3186
3187 * TYPE_OUT: The type of the output of this pattern.
3188
3189 * Return value: A new stmt that will be used to replace the pattern.
3190
3191 Assuming size of TYPE is the same as size of all comparisons
3192 (otherwise some casts would be added where needed), the above
3193 sequence we create related pattern stmts:
3194 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3195 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3196 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3197 S5' e_T = c_T | d_T;
3198 S6' f_T = e_T;
3199
3200 Instead of the above S3' we could emit:
3201 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3202 S3' c_T = a_T | b_T;
3203 but the above is more efficient. */
3204
3205 static gimple *
3206 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3207 tree *type_out)
3208 {
3209 gimple *last_stmt = stmts->pop ();
3210 enum tree_code rhs_code;
3211 tree var, lhs, rhs, vectype;
3212 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3213 vec_info *vinfo = stmt_vinfo->vinfo;
3214 gimple *pattern_stmt;
3215
3216 if (!is_gimple_assign (last_stmt))
3217 return NULL;
3218
3219 var = gimple_assign_rhs1 (last_stmt);
3220 lhs = gimple_assign_lhs (last_stmt);
3221
3222 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3223 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3224 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3225 return NULL;
3226
3227 rhs_code = gimple_assign_rhs_code (last_stmt);
3228 if (CONVERT_EXPR_CODE_P (rhs_code))
3229 {
3230 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3231 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3232 return NULL;
3233 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3234 if (vectype == NULL_TREE)
3235 return NULL;
3236
3237 if (!check_bool_pattern (var, vinfo))
3238 return NULL;
3239
3240 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3241 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3242 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3243 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3244 else
3245 pattern_stmt
3246 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3247 *type_out = vectype;
3248 *type_in = vectype;
3249 stmts->safe_push (last_stmt);
3250 if (dump_enabled_p ())
3251 dump_printf_loc (MSG_NOTE, vect_location,
3252 "vect_recog_bool_pattern: detected:\n");
3253
3254 return pattern_stmt;
3255 }
3256 else if (rhs_code == COND_EXPR
3257 && TREE_CODE (var) == SSA_NAME)
3258 {
3259 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3260 if (vectype == NULL_TREE)
3261 return NULL;
3262
3263 /* Build a scalar type for the boolean result that when
3264 vectorized matches the vector type of the result in
3265 size and number of elements. */
3266 unsigned prec
3267 = wi::udiv_trunc (TYPE_SIZE (vectype),
3268 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3269 tree type
3270 = build_nonstandard_integer_type (prec,
3271 TYPE_UNSIGNED (TREE_TYPE (var)));
3272 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3273 return NULL;
3274
3275 if (!check_bool_pattern (var, vinfo))
3276 return NULL;
3277
3278 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3279 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3280 pattern_stmt
3281 = gimple_build_assign (lhs, COND_EXPR,
3282 build2 (NE_EXPR, boolean_type_node,
3283 rhs, build_int_cst (type, 0)),
3284 gimple_assign_rhs2 (last_stmt),
3285 gimple_assign_rhs3 (last_stmt));
3286 *type_out = vectype;
3287 *type_in = vectype;
3288 stmts->safe_push (last_stmt);
3289 if (dump_enabled_p ())
3290 dump_printf_loc (MSG_NOTE, vect_location,
3291 "vect_recog_bool_pattern: detected:\n");
3292
3293 return pattern_stmt;
3294 }
3295 else if (rhs_code == SSA_NAME
3296 && STMT_VINFO_DATA_REF (stmt_vinfo))
3297 {
3298 stmt_vec_info pattern_stmt_info;
3299 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3300 gcc_assert (vectype != NULL_TREE);
3301 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3302 return NULL;
3303 if (!check_bool_pattern (var, vinfo))
3304 return NULL;
3305
3306 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3307 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3308 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3309 {
3310 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3311 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3312 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3313 rhs = rhs2;
3314 }
3315 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3316 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3317 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3318 STMT_VINFO_DATA_REF (pattern_stmt_info)
3319 = STMT_VINFO_DATA_REF (stmt_vinfo);
3320 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3321 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3322 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3323 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3324 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3325 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3326 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3327 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3328 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3329 *type_out = vectype;
3330 *type_in = vectype;
3331 stmts->safe_push (last_stmt);
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_NOTE, vect_location,
3334 "vect_recog_bool_pattern: detected:\n");
3335 return pattern_stmt;
3336 }
3337 else
3338 return NULL;
3339 }
3340
3341
3342 /* Mark statements that are involved in a pattern. */
3343
3344 static inline void
3345 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
3346 tree pattern_vectype)
3347 {
3348 stmt_vec_info pattern_stmt_info, def_stmt_info;
3349 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3350 vec_info *vinfo = orig_stmt_info->vinfo;
3351 gimple *def_stmt;
3352
3353 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3354 if (pattern_stmt_info == NULL)
3355 {
3356 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3357 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3358 }
3359 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3360
3361 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3362 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3363 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3364 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3365 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3366 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3367 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3368 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3369 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3370 {
3371 gimple_stmt_iterator si;
3372 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3373 !gsi_end_p (si); gsi_next (&si))
3374 {
3375 def_stmt = gsi_stmt (si);
3376 def_stmt_info = vinfo_for_stmt (def_stmt);
3377 if (def_stmt_info == NULL)
3378 {
3379 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3380 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3381 }
3382 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3383 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3384 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3385 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3386 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3387 }
3388 }
3389 }
3390
3391 /* Function vect_pattern_recog_1
3392
3393 Input:
3394 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3395 computation pattern.
3396 STMT: A stmt from which the pattern search should start.
3397
3398 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3399 expression that computes the same functionality and can be used to
3400 replace the sequence of stmts that are involved in the pattern.
3401
3402 Output:
3403 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3404 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3405 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3406 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3407 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3408 to the available target pattern.
3409
3410 This function also does some bookkeeping, as explained in the documentation
3411 for vect_recog_pattern. */
3412
3413 static void
3414 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3415 gimple_stmt_iterator si,
3416 vec<gimple *> *stmts_to_replace)
3417 {
3418 gimple *stmt = gsi_stmt (si), *pattern_stmt;
3419 stmt_vec_info stmt_info;
3420 loop_vec_info loop_vinfo;
3421 tree pattern_vectype;
3422 tree type_in, type_out;
3423 enum tree_code code;
3424 int i;
3425 gimple *next;
3426
3427 stmts_to_replace->truncate (0);
3428 stmts_to_replace->quick_push (stmt);
3429 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3430 if (!pattern_stmt)
3431 return;
3432
3433 stmt = stmts_to_replace->last ();
3434 stmt_info = vinfo_for_stmt (stmt);
3435 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3436
3437 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3438 {
3439 /* No need to check target support (already checked by the pattern
3440 recognition function). */
3441 pattern_vectype = type_out ? type_out : type_in;
3442 }
3443 else
3444 {
3445 machine_mode vec_mode;
3446 enum insn_code icode;
3447 optab optab;
3448
3449 /* Check target support */
3450 type_in = get_vectype_for_scalar_type (type_in);
3451 if (!type_in)
3452 return;
3453 if (type_out)
3454 type_out = get_vectype_for_scalar_type (type_out);
3455 else
3456 type_out = type_in;
3457 if (!type_out)
3458 return;
3459 pattern_vectype = type_out;
3460
3461 if (is_gimple_assign (pattern_stmt))
3462 code = gimple_assign_rhs_code (pattern_stmt);
3463 else
3464 {
3465 gcc_assert (is_gimple_call (pattern_stmt));
3466 code = CALL_EXPR;
3467 }
3468
3469 optab = optab_for_tree_code (code, type_in, optab_default);
3470 vec_mode = TYPE_MODE (type_in);
3471 if (!optab
3472 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3473 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3474 return;
3475 }
3476
3477 /* Found a vectorizable pattern. */
3478 if (dump_enabled_p ())
3479 {
3480 dump_printf_loc (MSG_NOTE, vect_location,
3481 "pattern recognized: ");
3482 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3483 }
3484
3485 /* Mark the stmts that are involved in the pattern. */
3486 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3487
3488 /* Patterns cannot be vectorized using SLP, because they change the order of
3489 computation. */
3490 if (loop_vinfo)
3491 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3492 if (next == stmt)
3493 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3494
3495 /* It is possible that additional pattern stmts are created and inserted in
3496 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3497 relevant statements. */
3498 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3499 && (unsigned) i < (stmts_to_replace->length () - 1);
3500 i++)
3501 {
3502 stmt_info = vinfo_for_stmt (stmt);
3503 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3504 if (dump_enabled_p ())
3505 {
3506 dump_printf_loc (MSG_NOTE, vect_location,
3507 "additional pattern stmt: ");
3508 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3509 }
3510
3511 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3512 }
3513 }
3514
3515
3516 /* Function vect_pattern_recog
3517
3518 Input:
3519 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3520 computation idioms.
3521
3522 Output - for each computation idiom that is detected we create a new stmt
3523 that provides the same functionality and that can be vectorized. We
3524 also record some information in the struct_stmt_info of the relevant
3525 stmts, as explained below:
3526
3527 At the entry to this function we have the following stmts, with the
3528 following initial value in the STMT_VINFO fields:
3529
3530 stmt in_pattern_p related_stmt vec_stmt
3531 S1: a_i = .... - - -
3532 S2: a_2 = ..use(a_i).. - - -
3533 S3: a_1 = ..use(a_2).. - - -
3534 S4: a_0 = ..use(a_1).. - - -
3535 S5: ... = ..use(a_0).. - - -
3536
3537 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3538 represented by a single stmt. We then:
3539 - create a new stmt S6 equivalent to the pattern (the stmt is not
3540 inserted into the code)
3541 - fill in the STMT_VINFO fields as follows:
3542
3543 in_pattern_p related_stmt vec_stmt
3544 S1: a_i = .... - - -
3545 S2: a_2 = ..use(a_i).. - - -
3546 S3: a_1 = ..use(a_2).. - - -
3547 S4: a_0 = ..use(a_1).. true S6 -
3548 '---> S6: a_new = .... - S4 -
3549 S5: ... = ..use(a_0).. - - -
3550
3551 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3552 to each other through the RELATED_STMT field).
3553
3554 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3555 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3556 remain irrelevant unless used by stmts other than S4.
3557
3558 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3559 (because they are marked as irrelevant). It will vectorize S6, and record
3560 a pointer to the new vector stmt VS6 from S6 (as usual).
3561 S4 will be skipped, and S5 will be vectorized as usual:
3562
3563 in_pattern_p related_stmt vec_stmt
3564 S1: a_i = .... - - -
3565 S2: a_2 = ..use(a_i).. - - -
3566 S3: a_1 = ..use(a_2).. - - -
3567 > VS6: va_new = .... - - -
3568 S4: a_0 = ..use(a_1).. true S6 VS6
3569 '---> S6: a_new = .... - S4 VS6
3570 > VS5: ... = ..vuse(va_new).. - - -
3571 S5: ... = ..use(a_0).. - - -
3572
3573 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3574 elsewhere), and we'll end up with:
3575
3576 VS6: va_new = ....
3577 VS5: ... = ..vuse(va_new)..
3578
3579 In case of more than one pattern statements, e.g., widen-mult with
3580 intermediate type:
3581
3582 S1 a_t = ;
3583 S2 a_T = (TYPE) a_t;
3584 '--> S3: a_it = (interm_type) a_t;
3585 S4 prod_T = a_T * CONST;
3586 '--> S5: prod_T' = a_it w* CONST;
3587
3588 there may be other users of a_T outside the pattern. In that case S2 will
3589 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3590 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3591 be recorded in S3. */
3592
3593 void
3594 vect_pattern_recog (vec_info *vinfo)
3595 {
3596 struct loop *loop;
3597 basic_block *bbs;
3598 unsigned int nbbs;
3599 gimple_stmt_iterator si;
3600 unsigned int i, j;
3601 vect_recog_func_ptr vect_recog_func;
3602 auto_vec<gimple *, 1> stmts_to_replace;
3603 gimple *stmt;
3604
3605 if (dump_enabled_p ())
3606 dump_printf_loc (MSG_NOTE, vect_location,
3607 "=== vect_pattern_recog ===\n");
3608
3609 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
3610 {
3611 loop = LOOP_VINFO_LOOP (loop_vinfo);
3612 bbs = LOOP_VINFO_BBS (loop_vinfo);
3613 nbbs = loop->num_nodes;
3614 }
3615 else
3616 {
3617 bbs = &as_a <bb_vec_info> (vinfo)->bb;
3618 nbbs = 1;
3619 }
3620
3621 /* Scan through the loop stmts, applying the pattern recognition
3622 functions starting at each stmt visited: */
3623 for (i = 0; i < nbbs; i++)
3624 {
3625 basic_block bb = bbs[i];
3626 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3627 {
3628 if (is_a <bb_vec_info> (vinfo)
3629 && (stmt = gsi_stmt (si))
3630 && vinfo_for_stmt (stmt)
3631 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3632 continue;
3633
3634 /* Scan over all generic vect_recog_xxx_pattern functions. */
3635 for (j = 0; j < NUM_PATTERNS; j++)
3636 {
3637 vect_recog_func = vect_vect_recog_func_ptrs[j];
3638 vect_pattern_recog_1 (vect_recog_func, si,
3639 &stmts_to_replace);
3640 }
3641 }
3642 }
3643 }
This page took 0.214158 seconds and 5 git commands to generate.