]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-vect-patterns.c
4a6ef5377f26bcfbe9019e9426a501745339167b
[gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
52
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
55 {
56 value_range_kind vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
61 {
62 if (dump_enabled_p ())
63 {
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
70 }
71 return true;
72 }
73 else
74 {
75 if (dump_enabled_p ())
76 {
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
79 }
80 return false;
81 }
82 }
83
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
86
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
89 {
90 if (dump_enabled_p ())
91 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
92 }
93
94 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95 return the pattern statement's stmt_vec_info. Set its vector type to
96 VECTYPE if it doesn't have one already. */
97
98 static stmt_vec_info
99 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
100 tree vectype)
101 {
102 vec_info *vinfo = orig_stmt_info->vinfo;
103 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
104 if (pattern_stmt_info == NULL)
105 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
106 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
107
108 pattern_stmt_info->pattern_stmt_p = true;
109 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
110 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
111 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
112 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
113 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
114 return pattern_stmt_info;
115 }
116
117 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
119 have one already. */
120
121 static void
122 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
123 tree vectype)
124 {
125 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
126 STMT_VINFO_RELATED_STMT (orig_stmt_info)
127 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
128 }
129
130 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
131 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132 be different from the vector type of the final pattern statement. */
133
134 static inline void
135 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
136 tree vectype = NULL_TREE)
137 {
138 vec_info *vinfo = stmt_info->vinfo;
139 if (vectype)
140 {
141 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
142 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
143 }
144 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
145 new_stmt);
146 }
147
148 /* The caller wants to perform new operations on vect_external variable
149 VAR, so that the result of the operations would also be vect_external.
150 Return the edge on which the operations can be performed, if one exists.
151 Return null if the operations should instead be treated as part of
152 the pattern that needs them. */
153
154 static edge
155 vect_get_external_def_edge (vec_info *vinfo, tree var)
156 {
157 edge e = NULL;
158 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
159 {
160 e = loop_preheader_edge (loop_vinfo->loop);
161 if (!SSA_NAME_IS_DEFAULT_DEF (var))
162 {
163 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
164 if (bb == NULL
165 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
166 e = NULL;
167 }
168 }
169 return e;
170 }
171
172 /* Return true if the target supports a vector version of CODE,
173 where CODE is known to map to a direct optab. ITYPE specifies
174 the type of (some of) the scalar inputs and OTYPE specifies the
175 type of the scalar result.
176
177 If CODE allows the inputs and outputs to have different type
178 (such as for WIDEN_SUM_EXPR), it is the input mode rather
179 than the output mode that determines the appropriate target pattern.
180 Operand 0 of the target pattern then specifies the mode that the output
181 must have.
182
183 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
185 is nonnull. */
186
187 static bool
188 vect_supportable_direct_optab_p (tree otype, tree_code code,
189 tree itype, tree *vecotype_out,
190 tree *vecitype_out = NULL)
191 {
192 tree vecitype = get_vectype_for_scalar_type (itype);
193 if (!vecitype)
194 return false;
195
196 tree vecotype = get_vectype_for_scalar_type (otype);
197 if (!vecotype)
198 return false;
199
200 optab optab = optab_for_tree_code (code, vecitype, optab_default);
201 if (!optab)
202 return false;
203
204 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
205 if (icode == CODE_FOR_nothing
206 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
207 return false;
208
209 *vecotype_out = vecotype;
210 if (vecitype_out)
211 *vecitype_out = vecitype;
212 return true;
213 }
214
215 /* Round bit precision PRECISION up to a full element. */
216
217 static unsigned int
218 vect_element_precision (unsigned int precision)
219 {
220 precision = 1 << ceil_log2 (precision);
221 return MAX (precision, BITS_PER_UNIT);
222 }
223
224 /* If OP is defined by a statement that's being considered for vectorization,
225 return information about that statement, otherwise return NULL. */
226
227 static stmt_vec_info
228 vect_get_internal_def (vec_info *vinfo, tree op)
229 {
230 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
231 if (def_stmt_info
232 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
233 return def_stmt_info;
234 return NULL;
235 }
236
237 /* Check whether NAME, an ssa-name used in STMT_VINFO,
238 is a result of a type promotion, such that:
239 DEF_STMT: NAME = NOP (name0)
240 If CHECK_SIGN is TRUE, check that either both types are signed or both are
241 unsigned. */
242
243 static bool
244 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
245 tree *orig_type, gimple **def_stmt, bool *promotion)
246 {
247 tree type = TREE_TYPE (name);
248 tree oprnd0;
249 enum vect_def_type dt;
250
251 stmt_vec_info def_stmt_info;
252 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
253 def_stmt))
254 return false;
255
256 if (dt != vect_internal_def
257 && dt != vect_external_def && dt != vect_constant_def)
258 return false;
259
260 if (!*def_stmt)
261 return false;
262
263 if (!is_gimple_assign (*def_stmt))
264 return false;
265
266 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
267 return false;
268
269 oprnd0 = gimple_assign_rhs1 (*def_stmt);
270
271 *orig_type = TREE_TYPE (oprnd0);
272 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
273 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
274 return false;
275
276 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
277 *promotion = true;
278 else
279 *promotion = false;
280
281 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
282 return false;
283
284 return true;
285 }
286
287 /* Holds information about an input operand after some sign changes
288 and type promotions have been peeled away. */
289 struct vect_unpromoted_value {
290 vect_unpromoted_value ();
291
292 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
293
294 /* The value obtained after peeling away zero or more casts. */
295 tree op;
296
297 /* The type of OP. */
298 tree type;
299
300 /* The definition type of OP. */
301 vect_def_type dt;
302
303 /* If OP is the result of peeling at least one cast, and if the cast
304 of OP itself is a vectorizable statement, CASTER identifies that
305 statement, otherwise it is null. */
306 stmt_vec_info caster;
307 };
308
309 inline vect_unpromoted_value::vect_unpromoted_value ()
310 : op (NULL_TREE),
311 type (NULL_TREE),
312 dt (vect_uninitialized_def),
313 caster (NULL)
314 {
315 }
316
317 /* Set the operand to OP_IN, its definition type to DT_IN, and the
318 statement that casts it to CASTER_IN. */
319
320 inline void
321 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
322 stmt_vec_info caster_in)
323 {
324 op = op_in;
325 type = TREE_TYPE (op);
326 dt = dt_in;
327 caster = caster_in;
328 }
329
330 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331 to reach some vectorizable inner operand OP', continuing as long as it
332 is possible to convert OP' back to OP using a possible sign change
333 followed by a possible promotion P. Return this OP', or null if OP is
334 not a vectorizable SSA name. If there is a promotion P, describe its
335 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
336 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337 have more than one user.
338
339 A successful return means that it is possible to go from OP' to OP
340 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
341 whereas the cast from UNPROM to OP might be a promotion, a sign
342 change, or a nop.
343
344 E.g. say we have:
345
346 signed short *ptr = ...;
347 signed short C = *ptr;
348 unsigned short B = (unsigned short) C; // sign change
349 signed int A = (signed int) B; // unsigned promotion
350 ...possible other uses of A...
351 unsigned int OP = (unsigned int) A; // sign change
352
353 In this case it's possible to go directly from C to OP using:
354
355 OP = (unsigned int) (unsigned short) C;
356 +------------+ +--------------+
357 promotion sign change
358
359 so OP' would be C. The input to the promotion is B, so UNPROM
360 would describe B. */
361
362 static tree
363 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
364 vect_unpromoted_value *unprom,
365 bool *single_use_p = NULL)
366 {
367 tree res = NULL_TREE;
368 tree op_type = TREE_TYPE (op);
369 unsigned int orig_precision = TYPE_PRECISION (op_type);
370 unsigned int min_precision = orig_precision;
371 stmt_vec_info caster = NULL;
372 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
373 {
374 /* See whether OP is simple enough to vectorize. */
375 stmt_vec_info def_stmt_info;
376 gimple *def_stmt;
377 vect_def_type dt;
378 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
379 break;
380
381 /* If OP is the input of a demotion, skip over it to see whether
382 OP is itself the result of a promotion. If so, the combined
383 effect of the promotion and the demotion might fit the required
384 pattern, otherwise neither operation fits.
385
386 This copes with cases such as the result of an arithmetic
387 operation being truncated before being stored, and where that
388 arithmetic operation has been recognized as an over-widened one. */
389 if (TYPE_PRECISION (op_type) <= min_precision)
390 {
391 /* Use OP as the UNPROM described above if we haven't yet
392 found a promotion, or if using the new input preserves the
393 sign of the previous promotion. */
394 if (!res
395 || TYPE_PRECISION (unprom->type) == orig_precision
396 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
397 {
398 unprom->set_op (op, dt, caster);
399 min_precision = TYPE_PRECISION (op_type);
400 }
401 /* Stop if we've already seen a promotion and if this
402 conversion does more than change the sign. */
403 else if (TYPE_PRECISION (op_type)
404 != TYPE_PRECISION (unprom->type))
405 break;
406
407 /* The sequence now extends to OP. */
408 res = op;
409 }
410
411 /* See whether OP is defined by a cast. Record it as CASTER if
412 the cast is potentially vectorizable. */
413 if (!def_stmt)
414 break;
415 caster = def_stmt_info;
416
417 /* Ignore pattern statements, since we don't link uses for them. */
418 if (caster
419 && single_use_p
420 && !STMT_VINFO_RELATED_STMT (caster)
421 && !has_single_use (res))
422 *single_use_p = false;
423
424 gassign *assign = dyn_cast <gassign *> (def_stmt);
425 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
426 break;
427
428 /* Continue with the input to the cast. */
429 op = gimple_assign_rhs1 (def_stmt);
430 op_type = TREE_TYPE (op);
431 }
432 return res;
433 }
434
435 /* OP is an integer operand to an operation that returns TYPE, and we
436 want to treat the operation as a widening one. So far we can treat
437 it as widening from *COMMON_TYPE.
438
439 Return true if OP is suitable for such a widening operation,
440 either widening from *COMMON_TYPE or from some supertype of it.
441 Update *COMMON_TYPE to the supertype in the latter case.
442
443 SHIFT_P is true if OP is a shift amount. */
444
445 static bool
446 vect_joust_widened_integer (tree type, bool shift_p, tree op,
447 tree *common_type)
448 {
449 /* Calculate the minimum precision required by OP, without changing
450 the sign of either operand. */
451 unsigned int precision;
452 if (shift_p)
453 {
454 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
455 return false;
456 precision = TREE_INT_CST_LOW (op);
457 }
458 else
459 {
460 precision = wi::min_precision (wi::to_widest (op),
461 TYPE_SIGN (*common_type));
462 if (precision * 2 > TYPE_PRECISION (type))
463 return false;
464 }
465
466 /* If OP requires a wider type, switch to that type. The checks
467 above ensure that this is still narrower than the result. */
468 precision = vect_element_precision (precision);
469 if (TYPE_PRECISION (*common_type) < precision)
470 *common_type = build_nonstandard_integer_type
471 (precision, TYPE_UNSIGNED (*common_type));
472 return true;
473 }
474
475 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
476 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
477
478 static bool
479 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
480 {
481 if (types_compatible_p (*common_type, new_type))
482 return true;
483
484 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
485 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
486 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
487 return true;
488
489 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
490 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
491 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
492 {
493 *common_type = new_type;
494 return true;
495 }
496
497 /* We have mismatched signs, with the signed type being
498 no wider than the unsigned type. In this case we need
499 a wider signed type. */
500 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
501 TYPE_PRECISION (new_type));
502 precision *= 2;
503 if (precision * 2 > TYPE_PRECISION (type))
504 return false;
505
506 *common_type = build_nonstandard_integer_type (precision, false);
507 return true;
508 }
509
510 /* Check whether STMT_INFO can be viewed as a tree of integer operations
511 in which each node either performs CODE or WIDENED_CODE, and where
512 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
513 specifies the maximum number of leaf operands. SHIFT_P says whether
514 CODE and WIDENED_CODE are some sort of shift.
515
516 If STMT_INFO is such a tree, return the number of leaf operands
517 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
518 to a type that (a) is narrower than the result of STMT_INFO and
519 (b) can hold all leaf operand values.
520
521 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
522 exists. */
523
524 static unsigned int
525 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
526 tree_code widened_code, bool shift_p,
527 unsigned int max_nops,
528 vect_unpromoted_value *unprom, tree *common_type)
529 {
530 /* Check for an integer operation with the right code. */
531 vec_info *vinfo = stmt_info->vinfo;
532 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
533 if (!assign)
534 return 0;
535
536 tree_code rhs_code = gimple_assign_rhs_code (assign);
537 if (rhs_code != code && rhs_code != widened_code)
538 return 0;
539
540 tree type = gimple_expr_type (assign);
541 if (!INTEGRAL_TYPE_P (type))
542 return 0;
543
544 /* Assume that both operands will be leaf operands. */
545 max_nops -= 2;
546
547 /* Check the operands. */
548 unsigned int next_op = 0;
549 for (unsigned int i = 0; i < 2; ++i)
550 {
551 vect_unpromoted_value *this_unprom = &unprom[next_op];
552 unsigned int nops = 1;
553 tree op = gimple_op (assign, i + 1);
554 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
555 {
556 /* We already have a common type from earlier operands.
557 Update it to account for OP. */
558 this_unprom->set_op (op, vect_constant_def);
559 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
560 return 0;
561 }
562 else
563 {
564 /* Only allow shifts by constants. */
565 if (shift_p && i == 1)
566 return 0;
567
568 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
569 this_unprom))
570 return 0;
571
572 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
573 {
574 /* The operand isn't widened. If STMT_INFO has the code
575 for an unwidened operation, recursively check whether
576 this operand is a node of the tree. */
577 if (rhs_code != code
578 || max_nops == 0
579 || this_unprom->dt != vect_internal_def)
580 return 0;
581
582 /* Give back the leaf slot allocated above now that we're
583 not treating this as a leaf operand. */
584 max_nops += 1;
585
586 /* Recursively process the definition of the operand. */
587 stmt_vec_info def_stmt_info
588 = vinfo->lookup_def (this_unprom->op);
589 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
590 shift_p, max_nops, this_unprom,
591 common_type);
592 if (nops == 0)
593 return 0;
594
595 max_nops -= nops;
596 }
597 else
598 {
599 /* Make sure that the operand is narrower than the result. */
600 if (TYPE_PRECISION (this_unprom->type) * 2
601 > TYPE_PRECISION (type))
602 return 0;
603
604 /* Update COMMON_TYPE for the new operand. */
605 if (i == 0)
606 *common_type = this_unprom->type;
607 else if (!vect_joust_widened_type (type, this_unprom->type,
608 common_type))
609 return 0;
610 }
611 }
612 next_op += nops;
613 }
614 return next_op;
615 }
616
617 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
618 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
619
620 static tree
621 vect_recog_temp_ssa_var (tree type, gimple *stmt)
622 {
623 return make_temp_ssa_name (type, stmt, "patt");
624 }
625
626 /* STMT2_INFO describes a type conversion that could be split into STMT1
627 followed by a version of STMT2_INFO that takes NEW_RHS as its first
628 input. Try to do this using pattern statements, returning true on
629 success. */
630
631 static bool
632 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
633 gimple *stmt1, tree vectype)
634 {
635 if (is_pattern_stmt_p (stmt2_info))
636 {
637 /* STMT2_INFO is part of a pattern. Get the statement to which
638 the pattern is attached. */
639 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
640 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
641
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "Splitting pattern statement: %G", stmt2_info->stmt);
645
646 /* Since STMT2_INFO is a pattern statement, we can change it
647 in-situ without worrying about changing the code for the
648 containing block. */
649 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
650
651 if (dump_enabled_p ())
652 {
653 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
654 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
655 stmt2_info->stmt);
656 }
657
658 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
659 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
660 /* STMT2_INFO is the actual pattern statement. Add STMT1
661 to the end of the definition sequence. */
662 gimple_seq_add_stmt_without_update (def_seq, stmt1);
663 else
664 {
665 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
666 before it. */
667 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
668 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
669 }
670 return true;
671 }
672 else
673 {
674 /* STMT2_INFO doesn't yet have a pattern. Try to create a
675 two-statement pattern now. */
676 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
677 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
678 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
679 if (!lhs_vectype)
680 return false;
681
682 if (dump_enabled_p ())
683 dump_printf_loc (MSG_NOTE, vect_location,
684 "Splitting statement: %G", stmt2_info->stmt);
685
686 /* Add STMT1 as a singleton pattern definition sequence. */
687 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
688 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
689 gimple_seq_add_stmt_without_update (def_seq, stmt1);
690
691 /* Build the second of the two pattern statements. */
692 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
693 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
694 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
695
696 if (dump_enabled_p ())
697 {
698 dump_printf_loc (MSG_NOTE, vect_location,
699 "into pattern statements: %G", stmt1);
700 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
701 }
702
703 return true;
704 }
705 }
706
707 /* Convert UNPROM to TYPE and return the result, adding new statements
708 to STMT_INFO's pattern definition statements if no better way is
709 available. VECTYPE is the vector form of TYPE. */
710
711 static tree
712 vect_convert_input (stmt_vec_info stmt_info, tree type,
713 vect_unpromoted_value *unprom, tree vectype)
714 {
715 /* Check for a no-op conversion. */
716 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
717 return unprom->op;
718
719 /* Allow the caller to create constant vect_unpromoted_values. */
720 if (TREE_CODE (unprom->op) == INTEGER_CST)
721 return wide_int_to_tree (type, wi::to_widest (unprom->op));
722
723 tree input = unprom->op;
724 if (unprom->caster)
725 {
726 tree lhs = gimple_get_lhs (unprom->caster->stmt);
727 tree lhs_type = TREE_TYPE (lhs);
728
729 /* If the result of the existing cast is the right width, use it
730 instead of the source of the cast. */
731 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
732 input = lhs;
733 /* If the precision we want is between the source and result
734 precisions of the existing cast, try splitting the cast into
735 two and tapping into a mid-way point. */
736 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
737 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
738 {
739 /* In order to preserve the semantics of the original cast,
740 give the mid-way point the same signedness as the input value.
741
742 It would be possible to use a signed type here instead if
743 TYPE is signed and UNPROM->TYPE is unsigned, but that would
744 make the sign of the midtype sensitive to the order in
745 which we process the statements, since the signedness of
746 TYPE is the signedness required by just one of possibly
747 many users. Also, unsigned promotions are usually as cheap
748 as or cheaper than signed ones, so it's better to keep an
749 unsigned promotion. */
750 tree midtype = build_nonstandard_integer_type
751 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
752 tree vec_midtype = get_vectype_for_scalar_type (midtype);
753 if (vec_midtype)
754 {
755 input = vect_recog_temp_ssa_var (midtype, NULL);
756 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
757 unprom->op);
758 if (!vect_split_statement (unprom->caster, input, new_stmt,
759 vec_midtype))
760 append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
761 }
762 }
763
764 /* See if we can reuse an existing result. */
765 if (types_compatible_p (type, TREE_TYPE (input)))
766 return input;
767 }
768
769 /* We need a new conversion statement. */
770 tree new_op = vect_recog_temp_ssa_var (type, NULL);
771 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
772
773 /* If OP is an external value, see if we can insert the new statement
774 on an incoming edge. */
775 if (input == unprom->op && unprom->dt == vect_external_def)
776 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
777 {
778 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
779 gcc_assert (!new_bb);
780 return new_op;
781 }
782
783 /* As a (common) last resort, add the statement to the pattern itself. */
784 append_pattern_def_seq (stmt_info, new_stmt, vectype);
785 return new_op;
786 }
787
788 /* Invoke vect_convert_input for N elements of UNPROM and store the
789 result in the corresponding elements of RESULT. */
790
791 static void
792 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
793 tree *result, tree type, vect_unpromoted_value *unprom,
794 tree vectype)
795 {
796 for (unsigned int i = 0; i < n; ++i)
797 {
798 unsigned int j;
799 for (j = 0; j < i; ++j)
800 if (unprom[j].op == unprom[i].op)
801 break;
802 if (j < i)
803 result[i] = result[j];
804 else
805 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
806 }
807 }
808
809 /* The caller has created a (possibly empty) sequence of pattern definition
810 statements followed by a single statement PATTERN_STMT. Cast the result
811 of this final statement to TYPE. If a new statement is needed, add
812 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
813 and return the new statement, otherwise return PATTERN_STMT as-is.
814 VECITYPE is the vector form of PATTERN_STMT's result type. */
815
816 static gimple *
817 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
818 tree vecitype)
819 {
820 tree lhs = gimple_get_lhs (pattern_stmt);
821 if (!types_compatible_p (type, TREE_TYPE (lhs)))
822 {
823 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
824 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
825 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
826 }
827 return pattern_stmt;
828 }
829
830 /* Return true if STMT_VINFO describes a reduction for which reassociation
831 is allowed. If STMT_INFO is part of a group, assume that it's part of
832 a reduction chain and optimistically assume that all statements
833 except the last allow reassociation. */
834
835 static bool
836 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
837 {
838 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
839 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
840 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
841 }
842
843 /* As above, but also require it to have code CODE and to be a reduction
844 in the outermost loop. When returning true, store the operands in
845 *OP0_OUT and *OP1_OUT. */
846
847 static bool
848 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
849 tree *op0_out, tree *op1_out)
850 {
851 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
852 if (!loop_info)
853 return false;
854
855 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
856 if (!assign || gimple_assign_rhs_code (assign) != code)
857 return false;
858
859 /* We don't allow changing the order of the computation in the inner-loop
860 when doing outer-loop vectorization. */
861 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
862 if (loop && nested_in_vect_loop_p (loop, stmt_info))
863 return false;
864
865 if (!vect_reassociating_reduction_p (stmt_info))
866 return false;
867
868 *op0_out = gimple_assign_rhs1 (assign);
869 *op1_out = gimple_assign_rhs2 (assign);
870 return true;
871 }
872
873 /* Function vect_recog_dot_prod_pattern
874
875 Try to find the following pattern:
876
877 type x_t, y_t;
878 TYPE1 prod;
879 TYPE2 sum = init;
880 loop:
881 sum_0 = phi <init, sum_1>
882 S1 x_t = ...
883 S2 y_t = ...
884 S3 x_T = (TYPE1) x_t;
885 S4 y_T = (TYPE1) y_t;
886 S5 prod = x_T * y_T;
887 [S6 prod = (TYPE2) prod; #optional]
888 S7 sum_1 = prod + sum_0;
889
890 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
891 same size of 'TYPE1' or bigger. This is a special case of a reduction
892 computation.
893
894 Input:
895
896 * STMT_VINFO: The stmt from which the pattern search begins. In the
897 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
898 will be detected.
899
900 Output:
901
902 * TYPE_OUT: The type of the output of this pattern.
903
904 * Return value: A new stmt that will be used to replace the sequence of
905 stmts that constitute the pattern. In this case it will be:
906 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
907
908 Note: The dot-prod idiom is a widening reduction pattern that is
909 vectorized without preserving all the intermediate results. It
910 produces only N/2 (widened) results (by summing up pairs of
911 intermediate results) rather than all N results. Therefore, we
912 cannot allow this pattern when we want to get all the results and in
913 the correct order (as is the case when this computation is in an
914 inner-loop nested in an outer-loop that us being vectorized). */
915
916 static gimple *
917 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
918 {
919 tree oprnd0, oprnd1;
920 gimple *last_stmt = stmt_vinfo->stmt;
921 vec_info *vinfo = stmt_vinfo->vinfo;
922 tree type, half_type;
923 gimple *pattern_stmt;
924 tree var;
925
926 /* Look for the following pattern
927 DX = (TYPE1) X;
928 DY = (TYPE1) Y;
929 DPROD = DX * DY;
930 DDPROD = (TYPE2) DPROD;
931 sum_1 = DDPROD + sum_0;
932 In which
933 - DX is double the size of X
934 - DY is double the size of Y
935 - DX, DY, DPROD all have the same type
936 - sum is the same size of DPROD or bigger
937 - sum has been recognized as a reduction variable.
938
939 This is equivalent to:
940 DPROD = X w* Y; #widen mult
941 sum_1 = DPROD w+ sum_0; #widen summation
942 or
943 DPROD = X w* Y; #widen mult
944 sum_1 = DPROD + sum_0; #summation
945 */
946
947 /* Starting from LAST_STMT, follow the defs of its uses in search
948 of the above pattern. */
949
950 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
951 &oprnd0, &oprnd1))
952 return NULL;
953
954 type = gimple_expr_type (last_stmt);
955
956 vect_unpromoted_value unprom_mult;
957 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
958
959 /* So far so good. Since last_stmt was detected as a (summation) reduction,
960 we know that oprnd1 is the reduction variable (defined by a loop-header
961 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
962 Left to check that oprnd0 is defined by a (widen_)mult_expr */
963 if (!oprnd0)
964 return NULL;
965
966 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
967 if (!mult_vinfo)
968 return NULL;
969
970 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
971 inside the loop (in case we are analyzing an outer-loop). */
972 vect_unpromoted_value unprom0[2];
973 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
974 false, 2, unprom0, &half_type))
975 return NULL;
976
977 /* If there are two widening operations, make sure they agree on
978 the sign of the extension. */
979 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
980 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
981 return NULL;
982
983 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
984
985 tree half_vectype;
986 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
987 type_out, &half_vectype))
988 return NULL;
989
990 /* Get the inputs in the appropriate types. */
991 tree mult_oprnd[2];
992 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
993 unprom0, half_vectype);
994
995 var = vect_recog_temp_ssa_var (type, NULL);
996 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
997 mult_oprnd[0], mult_oprnd[1], oprnd1);
998
999 return pattern_stmt;
1000 }
1001
1002
1003 /* Function vect_recog_sad_pattern
1004
1005 Try to find the following Sum of Absolute Difference (SAD) pattern:
1006
1007 type x_t, y_t;
1008 signed TYPE1 diff, abs_diff;
1009 TYPE2 sum = init;
1010 loop:
1011 sum_0 = phi <init, sum_1>
1012 S1 x_t = ...
1013 S2 y_t = ...
1014 S3 x_T = (TYPE1) x_t;
1015 S4 y_T = (TYPE1) y_t;
1016 S5 diff = x_T - y_T;
1017 S6 abs_diff = ABS_EXPR <diff>;
1018 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1019 S8 sum_1 = abs_diff + sum_0;
1020
1021 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1022 same size of 'TYPE1' or bigger. This is a special case of a reduction
1023 computation.
1024
1025 Input:
1026
1027 * STMT_VINFO: The stmt from which the pattern search begins. In the
1028 example, when this function is called with S8, the pattern
1029 {S3,S4,S5,S6,S7,S8} will be detected.
1030
1031 Output:
1032
1033 * TYPE_OUT: The type of the output of this pattern.
1034
1035 * Return value: A new stmt that will be used to replace the sequence of
1036 stmts that constitute the pattern. In this case it will be:
1037 SAD_EXPR <x_t, y_t, sum_0>
1038 */
1039
1040 static gimple *
1041 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1042 {
1043 gimple *last_stmt = stmt_vinfo->stmt;
1044 vec_info *vinfo = stmt_vinfo->vinfo;
1045 tree half_type;
1046
1047 /* Look for the following pattern
1048 DX = (TYPE1) X;
1049 DY = (TYPE1) Y;
1050 DDIFF = DX - DY;
1051 DAD = ABS_EXPR <DDIFF>;
1052 DDPROD = (TYPE2) DPROD;
1053 sum_1 = DAD + sum_0;
1054 In which
1055 - DX is at least double the size of X
1056 - DY is at least double the size of Y
1057 - DX, DY, DDIFF, DAD all have the same type
1058 - sum is the same size of DAD or bigger
1059 - sum has been recognized as a reduction variable.
1060
1061 This is equivalent to:
1062 DDIFF = X w- Y; #widen sub
1063 DAD = ABS_EXPR <DDIFF>;
1064 sum_1 = DAD w+ sum_0; #widen summation
1065 or
1066 DDIFF = X w- Y; #widen sub
1067 DAD = ABS_EXPR <DDIFF>;
1068 sum_1 = DAD + sum_0; #summation
1069 */
1070
1071 /* Starting from LAST_STMT, follow the defs of its uses in search
1072 of the above pattern. */
1073
1074 tree plus_oprnd0, plus_oprnd1;
1075 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1076 &plus_oprnd0, &plus_oprnd1))
1077 return NULL;
1078
1079 tree sum_type = gimple_expr_type (last_stmt);
1080
1081 /* Any non-truncating sequence of conversions is OK here, since
1082 with a successful match, the result of the ABS(U) is known to fit
1083 within the nonnegative range of the result type. (It cannot be the
1084 negative of the minimum signed value due to the range of the widening
1085 MINUS_EXPR.) */
1086 vect_unpromoted_value unprom_abs;
1087 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1088 &unprom_abs);
1089
1090 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1091 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1092 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1093 Then check that plus_oprnd0 is defined by an abs_expr. */
1094
1095 if (!plus_oprnd0)
1096 return NULL;
1097
1098 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1099 if (!abs_stmt_vinfo)
1100 return NULL;
1101
1102 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1103 inside the loop (in case we are analyzing an outer-loop). */
1104 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1105 if (!abs_stmt
1106 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1107 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1108 return NULL;
1109
1110 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1111 tree abs_type = TREE_TYPE (abs_oprnd);
1112 if (TYPE_UNSIGNED (abs_type))
1113 return NULL;
1114
1115 /* Peel off conversions from the ABS input. This can involve sign
1116 changes (e.g. from an unsigned subtraction to a signed ABS input)
1117 or signed promotion, but it can't include unsigned promotion.
1118 (Note that ABS of an unsigned promotion should have been folded
1119 away before now anyway.) */
1120 vect_unpromoted_value unprom_diff;
1121 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1122 &unprom_diff);
1123 if (!abs_oprnd)
1124 return NULL;
1125 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1126 && TYPE_UNSIGNED (unprom_diff.type))
1127 return NULL;
1128
1129 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1130 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1131 if (!diff_stmt_vinfo)
1132 return NULL;
1133
1134 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1135 inside the loop (in case we are analyzing an outer-loop). */
1136 vect_unpromoted_value unprom[2];
1137 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1138 false, 2, unprom, &half_type))
1139 return NULL;
1140
1141 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1142
1143 tree half_vectype;
1144 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1145 type_out, &half_vectype))
1146 return NULL;
1147
1148 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1149 tree sad_oprnd[2];
1150 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1151 unprom, half_vectype);
1152
1153 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1154 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1155 sad_oprnd[1], plus_oprnd1);
1156
1157 return pattern_stmt;
1158 }
1159
1160 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1161 so that it can be treated as though it had the form:
1162
1163 A_TYPE a;
1164 B_TYPE b;
1165 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1166 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1167 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1168 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1169 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1170
1171 Try to replace the pattern with:
1172
1173 A_TYPE a;
1174 B_TYPE b;
1175 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1176 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1177 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1178 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1179
1180 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1181
1182 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1183 name of the pattern being matched, for dump purposes. */
1184
1185 static gimple *
1186 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1187 tree_code orig_code, tree_code wide_code,
1188 bool shift_p, const char *name)
1189 {
1190 gimple *last_stmt = last_stmt_info->stmt;
1191
1192 vect_unpromoted_value unprom[2];
1193 tree half_type;
1194 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1195 shift_p, 2, unprom, &half_type))
1196 return NULL;
1197
1198 /* Pattern detected. */
1199 vect_pattern_detected (name, last_stmt);
1200
1201 tree type = gimple_expr_type (last_stmt);
1202 tree itype = type;
1203 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1204 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1205 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1206 TYPE_UNSIGNED (half_type));
1207
1208 /* Check target support */
1209 tree vectype = get_vectype_for_scalar_type (half_type);
1210 tree vecitype = get_vectype_for_scalar_type (itype);
1211 enum tree_code dummy_code;
1212 int dummy_int;
1213 auto_vec<tree> dummy_vec;
1214 if (!vectype
1215 || !vecitype
1216 || !supportable_widening_operation (wide_code, last_stmt_info,
1217 vecitype, vectype,
1218 &dummy_code, &dummy_code,
1219 &dummy_int, &dummy_vec))
1220 return NULL;
1221
1222 *type_out = get_vectype_for_scalar_type (type);
1223 if (!*type_out)
1224 return NULL;
1225
1226 tree oprnd[2];
1227 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1228
1229 tree var = vect_recog_temp_ssa_var (itype, NULL);
1230 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1231 oprnd[0], oprnd[1]);
1232
1233 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1234 }
1235
1236 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1237 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1238
1239 static gimple *
1240 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1241 {
1242 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1243 WIDEN_MULT_EXPR, false,
1244 "vect_recog_widen_mult_pattern");
1245 }
1246
1247 /* Function vect_recog_pow_pattern
1248
1249 Try to find the following pattern:
1250
1251 x = POW (y, N);
1252
1253 with POW being one of pow, powf, powi, powif and N being
1254 either 2 or 0.5.
1255
1256 Input:
1257
1258 * STMT_VINFO: The stmt from which the pattern search begins.
1259
1260 Output:
1261
1262 * TYPE_OUT: The type of the output of this pattern.
1263
1264 * Return value: A new stmt that will be used to replace the sequence of
1265 stmts that constitute the pattern. In this case it will be:
1266 x = x * x
1267 or
1268 x = sqrt (x)
1269 */
1270
1271 static gimple *
1272 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1273 {
1274 gimple *last_stmt = stmt_vinfo->stmt;
1275 tree base, exp;
1276 gimple *stmt;
1277 tree var;
1278
1279 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1280 return NULL;
1281
1282 switch (gimple_call_combined_fn (last_stmt))
1283 {
1284 CASE_CFN_POW:
1285 CASE_CFN_POWI:
1286 break;
1287
1288 default:
1289 return NULL;
1290 }
1291
1292 base = gimple_call_arg (last_stmt, 0);
1293 exp = gimple_call_arg (last_stmt, 1);
1294 if (TREE_CODE (exp) != REAL_CST
1295 && TREE_CODE (exp) != INTEGER_CST)
1296 {
1297 if (flag_unsafe_math_optimizations
1298 && TREE_CODE (base) == REAL_CST
1299 && !gimple_call_internal_p (last_stmt))
1300 {
1301 combined_fn log_cfn;
1302 built_in_function exp_bfn;
1303 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1304 {
1305 case BUILT_IN_POW:
1306 log_cfn = CFN_BUILT_IN_LOG;
1307 exp_bfn = BUILT_IN_EXP;
1308 break;
1309 case BUILT_IN_POWF:
1310 log_cfn = CFN_BUILT_IN_LOGF;
1311 exp_bfn = BUILT_IN_EXPF;
1312 break;
1313 case BUILT_IN_POWL:
1314 log_cfn = CFN_BUILT_IN_LOGL;
1315 exp_bfn = BUILT_IN_EXPL;
1316 break;
1317 default:
1318 return NULL;
1319 }
1320 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1321 tree exp_decl = builtin_decl_implicit (exp_bfn);
1322 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1323 does that, but if C is a power of 2, we want to use
1324 exp2 (log2 (C) * x) in the non-vectorized version, but for
1325 vectorization we don't have vectorized exp2. */
1326 if (logc
1327 && TREE_CODE (logc) == REAL_CST
1328 && exp_decl
1329 && lookup_attribute ("omp declare simd",
1330 DECL_ATTRIBUTES (exp_decl)))
1331 {
1332 cgraph_node *node = cgraph_node::get_create (exp_decl);
1333 if (node->simd_clones == NULL)
1334 {
1335 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1336 || node->definition)
1337 return NULL;
1338 expand_simd_clones (node);
1339 if (node->simd_clones == NULL)
1340 return NULL;
1341 }
1342 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1343 if (!*type_out)
1344 return NULL;
1345 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1346 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1347 append_pattern_def_seq (stmt_vinfo, g);
1348 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1349 g = gimple_build_call (exp_decl, 1, def);
1350 gimple_call_set_lhs (g, res);
1351 return g;
1352 }
1353 }
1354
1355 return NULL;
1356 }
1357
1358 /* We now have a pow or powi builtin function call with a constant
1359 exponent. */
1360
1361 /* Catch squaring. */
1362 if ((tree_fits_shwi_p (exp)
1363 && tree_to_shwi (exp) == 2)
1364 || (TREE_CODE (exp) == REAL_CST
1365 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1366 {
1367 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1368 TREE_TYPE (base), type_out))
1369 return NULL;
1370
1371 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1372 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1373 return stmt;
1374 }
1375
1376 /* Catch square root. */
1377 if (TREE_CODE (exp) == REAL_CST
1378 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1379 {
1380 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1381 if (*type_out
1382 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1383 OPTIMIZE_FOR_SPEED))
1384 {
1385 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1386 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1387 gimple_call_set_lhs (stmt, var);
1388 gimple_call_set_nothrow (stmt, true);
1389 return stmt;
1390 }
1391 }
1392
1393 return NULL;
1394 }
1395
1396
1397 /* Function vect_recog_widen_sum_pattern
1398
1399 Try to find the following pattern:
1400
1401 type x_t;
1402 TYPE x_T, sum = init;
1403 loop:
1404 sum_0 = phi <init, sum_1>
1405 S1 x_t = *p;
1406 S2 x_T = (TYPE) x_t;
1407 S3 sum_1 = x_T + sum_0;
1408
1409 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1410 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1411 a special case of a reduction computation.
1412
1413 Input:
1414
1415 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1416 when this function is called with S3, the pattern {S2,S3} will be detected.
1417
1418 Output:
1419
1420 * TYPE_OUT: The type of the output of this pattern.
1421
1422 * Return value: A new stmt that will be used to replace the sequence of
1423 stmts that constitute the pattern. In this case it will be:
1424 WIDEN_SUM <x_t, sum_0>
1425
1426 Note: The widening-sum idiom is a widening reduction pattern that is
1427 vectorized without preserving all the intermediate results. It
1428 produces only N/2 (widened) results (by summing up pairs of
1429 intermediate results) rather than all N results. Therefore, we
1430 cannot allow this pattern when we want to get all the results and in
1431 the correct order (as is the case when this computation is in an
1432 inner-loop nested in an outer-loop that us being vectorized). */
1433
1434 static gimple *
1435 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1436 {
1437 gimple *last_stmt = stmt_vinfo->stmt;
1438 tree oprnd0, oprnd1;
1439 vec_info *vinfo = stmt_vinfo->vinfo;
1440 tree type;
1441 gimple *pattern_stmt;
1442 tree var;
1443
1444 /* Look for the following pattern
1445 DX = (TYPE) X;
1446 sum_1 = DX + sum_0;
1447 In which DX is at least double the size of X, and sum_1 has been
1448 recognized as a reduction variable.
1449 */
1450
1451 /* Starting from LAST_STMT, follow the defs of its uses in search
1452 of the above pattern. */
1453
1454 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1455 &oprnd0, &oprnd1))
1456 return NULL;
1457
1458 type = gimple_expr_type (last_stmt);
1459
1460 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1461 we know that oprnd1 is the reduction variable (defined by a loop-header
1462 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1463 Left to check that oprnd0 is defined by a cast from type 'type' to type
1464 'TYPE'. */
1465
1466 vect_unpromoted_value unprom0;
1467 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1468 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1469 return NULL;
1470
1471 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1472
1473 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1474 type_out))
1475 return NULL;
1476
1477 var = vect_recog_temp_ssa_var (type, NULL);
1478 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1479
1480 return pattern_stmt;
1481 }
1482
1483 /* Recognize cases in which an operation is performed in one type WTYPE
1484 but could be done more efficiently in a narrower type NTYPE. For example,
1485 if we have:
1486
1487 ATYPE a; // narrower than NTYPE
1488 BTYPE b; // narrower than NTYPE
1489 WTYPE aw = (WTYPE) a;
1490 WTYPE bw = (WTYPE) b;
1491 WTYPE res = aw + bw; // only uses of aw and bw
1492
1493 then it would be more efficient to do:
1494
1495 NTYPE an = (NTYPE) a;
1496 NTYPE bn = (NTYPE) b;
1497 NTYPE resn = an + bn;
1498 WTYPE res = (WTYPE) resn;
1499
1500 Other situations include things like:
1501
1502 ATYPE a; // NTYPE or narrower
1503 WTYPE aw = (WTYPE) a;
1504 WTYPE res = aw + b;
1505
1506 when only "(NTYPE) res" is significant. In that case it's more efficient
1507 to truncate "b" and do the operation on NTYPE instead:
1508
1509 NTYPE an = (NTYPE) a;
1510 NTYPE bn = (NTYPE) b; // truncation
1511 NTYPE resn = an + bn;
1512 WTYPE res = (WTYPE) resn;
1513
1514 All users of "res" should then use "resn" instead, making the final
1515 statement dead (not marked as relevant). The final statement is still
1516 needed to maintain the type correctness of the IR.
1517
1518 vect_determine_precisions has already determined the minimum
1519 precison of the operation and the minimum precision required
1520 by users of the result. */
1521
1522 static gimple *
1523 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1524 {
1525 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1526 if (!last_stmt)
1527 return NULL;
1528
1529 /* See whether we have found that this operation can be done on a
1530 narrower type without changing its semantics. */
1531 unsigned int new_precision = last_stmt_info->operation_precision;
1532 if (!new_precision)
1533 return NULL;
1534
1535 vec_info *vinfo = last_stmt_info->vinfo;
1536 tree lhs = gimple_assign_lhs (last_stmt);
1537 tree type = TREE_TYPE (lhs);
1538 tree_code code = gimple_assign_rhs_code (last_stmt);
1539
1540 /* Keep the first operand of a COND_EXPR as-is: only the other two
1541 operands are interesting. */
1542 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1543
1544 /* Check the operands. */
1545 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1546 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1547 unprom.quick_grow (nops);
1548 unsigned int min_precision = 0;
1549 bool single_use_p = false;
1550 for (unsigned int i = 0; i < nops; ++i)
1551 {
1552 tree op = gimple_op (last_stmt, first_op + i);
1553 if (TREE_CODE (op) == INTEGER_CST)
1554 unprom[i].set_op (op, vect_constant_def);
1555 else if (TREE_CODE (op) == SSA_NAME)
1556 {
1557 bool op_single_use_p = true;
1558 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1559 &op_single_use_p))
1560 return NULL;
1561 /* If:
1562
1563 (1) N bits of the result are needed;
1564 (2) all inputs are widened from M<N bits; and
1565 (3) one operand OP is a single-use SSA name
1566
1567 we can shift the M->N widening from OP to the output
1568 without changing the number or type of extensions involved.
1569 This then reduces the number of copies of STMT_INFO.
1570
1571 If instead of (3) more than one operand is a single-use SSA name,
1572 shifting the extension to the output is even more of a win.
1573
1574 If instead:
1575
1576 (1) N bits of the result are needed;
1577 (2) one operand OP2 is widened from M2<N bits;
1578 (3) another operand OP1 is widened from M1<M2 bits; and
1579 (4) both OP1 and OP2 are single-use
1580
1581 the choice is between:
1582
1583 (a) truncating OP2 to M1, doing the operation on M1,
1584 and then widening the result to N
1585
1586 (b) widening OP1 to M2, doing the operation on M2, and then
1587 widening the result to N
1588
1589 Both shift the M2->N widening of the inputs to the output.
1590 (a) additionally shifts the M1->M2 widening to the output;
1591 it requires fewer copies of STMT_INFO but requires an extra
1592 M2->M1 truncation.
1593
1594 Which is better will depend on the complexity and cost of
1595 STMT_INFO, which is hard to predict at this stage. However,
1596 a clear tie-breaker in favor of (b) is the fact that the
1597 truncation in (a) increases the length of the operation chain.
1598
1599 If instead of (4) only one of OP1 or OP2 is single-use,
1600 (b) is still a win over doing the operation in N bits:
1601 it still shifts the M2->N widening on the single-use operand
1602 to the output and reduces the number of STMT_INFO copies.
1603
1604 If neither operand is single-use then operating on fewer than
1605 N bits might lead to more extensions overall. Whether it does
1606 or not depends on global information about the vectorization
1607 region, and whether that's a good trade-off would again
1608 depend on the complexity and cost of the statements involved,
1609 as well as things like register pressure that are not normally
1610 modelled at this stage. We therefore ignore these cases
1611 and just optimize the clear single-use wins above.
1612
1613 Thus we take the maximum precision of the unpromoted operands
1614 and record whether any operand is single-use. */
1615 if (unprom[i].dt == vect_internal_def)
1616 {
1617 min_precision = MAX (min_precision,
1618 TYPE_PRECISION (unprom[i].type));
1619 single_use_p |= op_single_use_p;
1620 }
1621 }
1622 }
1623
1624 /* Although the operation could be done in operation_precision, we have
1625 to balance that against introducing extra truncations or extensions.
1626 Calculate the minimum precision that can be handled efficiently.
1627
1628 The loop above determined that the operation could be handled
1629 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1630 extension from the inputs to the output without introducing more
1631 instructions, and would reduce the number of instructions required
1632 for STMT_INFO itself.
1633
1634 vect_determine_precisions has also determined that the result only
1635 needs min_output_precision bits. Truncating by a factor of N times
1636 requires a tree of N - 1 instructions, so if TYPE is N times wider
1637 than min_output_precision, doing the operation in TYPE and truncating
1638 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1639 In contrast:
1640
1641 - truncating the input to a unary operation and doing the operation
1642 in the new type requires at most N - 1 + 1 = N instructions per
1643 output vector
1644
1645 - doing the same for a binary operation requires at most
1646 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1647
1648 Both unary and binary operations require fewer instructions than
1649 this if the operands were extended from a suitable truncated form.
1650 Thus there is usually nothing to lose by doing operations in
1651 min_output_precision bits, but there can be something to gain. */
1652 if (!single_use_p)
1653 min_precision = last_stmt_info->min_output_precision;
1654 else
1655 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1656
1657 /* Apply the minimum efficient precision we just calculated. */
1658 if (new_precision < min_precision)
1659 new_precision = min_precision;
1660 if (new_precision >= TYPE_PRECISION (type))
1661 return NULL;
1662
1663 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1664
1665 *type_out = get_vectype_for_scalar_type (type);
1666 if (!*type_out)
1667 return NULL;
1668
1669 /* We've found a viable pattern. Get the new type of the operation. */
1670 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1671 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1672
1673 /* If we're truncating an operation, we need to make sure that we
1674 don't introduce new undefined overflow. The codes tested here are
1675 a subset of those accepted by vect_truncatable_operation_p. */
1676 tree op_type = new_type;
1677 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1678 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1679 op_type = build_nonstandard_integer_type (new_precision, true);
1680
1681 /* We specifically don't check here whether the target supports the
1682 new operation, since it might be something that a later pattern
1683 wants to rewrite anyway. If targets have a minimum element size
1684 for some optabs, we should pattern-match smaller ops to larger ops
1685 where beneficial. */
1686 tree new_vectype = get_vectype_for_scalar_type (new_type);
1687 tree op_vectype = get_vectype_for_scalar_type (op_type);
1688 if (!new_vectype || !op_vectype)
1689 return NULL;
1690
1691 if (dump_enabled_p ())
1692 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1693 type, new_type);
1694
1695 /* Calculate the rhs operands for an operation on OP_TYPE. */
1696 tree ops[3] = {};
1697 for (unsigned int i = 1; i < first_op; ++i)
1698 ops[i - 1] = gimple_op (last_stmt, i);
1699 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1700 op_type, &unprom[0], op_vectype);
1701
1702 /* Use the operation to produce a result of type OP_TYPE. */
1703 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1704 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1705 ops[0], ops[1], ops[2]);
1706 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1707
1708 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_NOTE, vect_location,
1710 "created pattern stmt: %G", pattern_stmt);
1711
1712 /* Convert back to the original signedness, if OP_TYPE is different
1713 from NEW_TYPE. */
1714 if (op_type != new_type)
1715 pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1716 pattern_stmt, op_vectype);
1717
1718 /* Promote the result to the original type. */
1719 pattern_stmt = vect_convert_output (last_stmt_info, type,
1720 pattern_stmt, new_vectype);
1721
1722 return pattern_stmt;
1723 }
1724
1725 /* Recognize the patterns:
1726
1727 ATYPE a; // narrower than TYPE
1728 BTYPE b; // narrower than TYPE
1729 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1730 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1731
1732 where only the bottom half of avg is used. Try to transform them into:
1733
1734 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1735 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1736
1737 followed by:
1738
1739 TYPE avg = (TYPE) avg';
1740
1741 where NTYPE is no wider than half of TYPE. Since only the bottom half
1742 of avg is used, all or part of the cast of avg' should become redundant. */
1743
1744 static gimple *
1745 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1746 {
1747 /* Check for a shift right by one bit. */
1748 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1749 vec_info *vinfo = last_stmt_info->vinfo;
1750 if (!last_stmt
1751 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1752 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1753 return NULL;
1754
1755 /* Check that the shift result is wider than the users of the
1756 result need (i.e. that narrowing would be a natural choice). */
1757 tree lhs = gimple_assign_lhs (last_stmt);
1758 tree type = TREE_TYPE (lhs);
1759 unsigned int target_precision
1760 = vect_element_precision (last_stmt_info->min_output_precision);
1761 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1762 return NULL;
1763
1764 /* Look through any change in sign on the shift input. */
1765 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1766 vect_unpromoted_value unprom_plus;
1767 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1768 &unprom_plus);
1769 if (!rshift_rhs
1770 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1771 return NULL;
1772
1773 /* Get the definition of the shift input. */
1774 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1775 if (!plus_stmt_info)
1776 return NULL;
1777
1778 /* Check whether the shift input can be seen as a tree of additions on
1779 2 or 3 widened inputs.
1780
1781 Note that the pattern should be a win even if the result of one or
1782 more additions is reused elsewhere: if the pattern matches, we'd be
1783 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1784 internal_fn ifn = IFN_AVG_FLOOR;
1785 vect_unpromoted_value unprom[3];
1786 tree new_type;
1787 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1788 PLUS_EXPR, false, 3,
1789 unprom, &new_type);
1790 if (nops == 0)
1791 return NULL;
1792 if (nops == 3)
1793 {
1794 /* Check that one operand is 1. */
1795 unsigned int i;
1796 for (i = 0; i < 3; ++i)
1797 if (integer_onep (unprom[i].op))
1798 break;
1799 if (i == 3)
1800 return NULL;
1801 /* Throw away the 1 operand and keep the other two. */
1802 if (i < 2)
1803 unprom[i] = unprom[2];
1804 ifn = IFN_AVG_CEIL;
1805 }
1806
1807 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1808
1809 /* We know that:
1810
1811 (a) the operation can be viewed as:
1812
1813 TYPE widened0 = (TYPE) UNPROM[0];
1814 TYPE widened1 = (TYPE) UNPROM[1];
1815 TYPE tmp1 = widened0 + widened1 {+ 1};
1816 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1817
1818 (b) the first two statements are equivalent to:
1819
1820 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1821 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1822
1823 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1824 where sensible;
1825
1826 (d) all the operations can be performed correctly at twice the width of
1827 NEW_TYPE, due to the nature of the average operation; and
1828
1829 (e) users of the result of the right shift need only TARGET_PRECISION
1830 bits, where TARGET_PRECISION is no more than half of TYPE's
1831 precision.
1832
1833 Under these circumstances, the only situation in which NEW_TYPE
1834 could be narrower than TARGET_PRECISION is if widened0, widened1
1835 and an addition result are all used more than once. Thus we can
1836 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1837 as "free", whereas widening the result of the average instruction
1838 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1839 therefore better not to go narrower than TARGET_PRECISION. */
1840 if (TYPE_PRECISION (new_type) < target_precision)
1841 new_type = build_nonstandard_integer_type (target_precision,
1842 TYPE_UNSIGNED (new_type));
1843
1844 /* Check for target support. */
1845 tree new_vectype = get_vectype_for_scalar_type (new_type);
1846 if (!new_vectype
1847 || !direct_internal_fn_supported_p (ifn, new_vectype,
1848 OPTIMIZE_FOR_SPEED))
1849 return NULL;
1850
1851 /* The IR requires a valid vector type for the cast result, even though
1852 it's likely to be discarded. */
1853 *type_out = get_vectype_for_scalar_type (type);
1854 if (!*type_out)
1855 return NULL;
1856
1857 /* Generate the IFN_AVG* call. */
1858 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1859 tree new_ops[2];
1860 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1861 unprom, new_vectype);
1862 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1863 new_ops[1]);
1864 gimple_call_set_lhs (average_stmt, new_var);
1865 gimple_set_location (average_stmt, gimple_location (last_stmt));
1866
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_NOTE, vect_location,
1869 "created pattern stmt: %G", average_stmt);
1870
1871 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1872 }
1873
1874 /* Recognize cases in which the input to a cast is wider than its
1875 output, and the input is fed by a widening operation. Fold this
1876 by removing the unnecessary intermediate widening. E.g.:
1877
1878 unsigned char a;
1879 unsigned int b = (unsigned int) a;
1880 unsigned short c = (unsigned short) b;
1881
1882 -->
1883
1884 unsigned short c = (unsigned short) a;
1885
1886 Although this is rare in input IR, it is an expected side-effect
1887 of the over-widening pattern above.
1888
1889 This is beneficial also for integer-to-float conversions, if the
1890 widened integer has more bits than the float, and if the unwidened
1891 input doesn't. */
1892
1893 static gimple *
1894 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1895 {
1896 /* Check for a cast, including an integer-to-float conversion. */
1897 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1898 if (!last_stmt)
1899 return NULL;
1900 tree_code code = gimple_assign_rhs_code (last_stmt);
1901 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1902 return NULL;
1903
1904 /* Make sure that the rhs is a scalar with a natural bitsize. */
1905 tree lhs = gimple_assign_lhs (last_stmt);
1906 if (!lhs)
1907 return NULL;
1908 tree lhs_type = TREE_TYPE (lhs);
1909 scalar_mode lhs_mode;
1910 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1911 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1912 return NULL;
1913
1914 /* Check for a narrowing operation (from a vector point of view). */
1915 tree rhs = gimple_assign_rhs1 (last_stmt);
1916 tree rhs_type = TREE_TYPE (rhs);
1917 if (!INTEGRAL_TYPE_P (rhs_type)
1918 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1919 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1920 return NULL;
1921
1922 /* Try to find an unpromoted input. */
1923 vec_info *vinfo = last_stmt_info->vinfo;
1924 vect_unpromoted_value unprom;
1925 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1926 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1927 return NULL;
1928
1929 /* If the bits above RHS_TYPE matter, make sure that they're the
1930 same when extending from UNPROM as they are when extending from RHS. */
1931 if (!INTEGRAL_TYPE_P (lhs_type)
1932 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1933 return NULL;
1934
1935 /* We can get the same result by casting UNPROM directly, to avoid
1936 the unnecessary widening and narrowing. */
1937 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1938
1939 *type_out = get_vectype_for_scalar_type (lhs_type);
1940 if (!*type_out)
1941 return NULL;
1942
1943 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1944 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1945 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1946
1947 return pattern_stmt;
1948 }
1949
1950 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1951 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1952
1953 static gimple *
1954 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1955 {
1956 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1957 WIDEN_LSHIFT_EXPR, true,
1958 "vect_recog_widen_shift_pattern");
1959 }
1960
1961 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1962
1963 type a_t, b_t, c_t;
1964
1965 S0 a_t = b_t r<< c_t;
1966
1967 Input/Output:
1968
1969 * STMT_VINFO: The stmt from which the pattern search begins,
1970 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1971 with a sequence:
1972
1973 S1 d_t = -c_t;
1974 S2 e_t = d_t & (B - 1);
1975 S3 f_t = b_t << c_t;
1976 S4 g_t = b_t >> e_t;
1977 S0 a_t = f_t | g_t;
1978
1979 where B is element bitsize of type.
1980
1981 Output:
1982
1983 * TYPE_OUT: The type of the output of this pattern.
1984
1985 * Return value: A new stmt that will be used to replace the rotate
1986 S0 stmt. */
1987
1988 static gimple *
1989 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1990 {
1991 gimple *last_stmt = stmt_vinfo->stmt;
1992 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1993 gimple *pattern_stmt, *def_stmt;
1994 enum tree_code rhs_code;
1995 vec_info *vinfo = stmt_vinfo->vinfo;
1996 enum vect_def_type dt;
1997 optab optab1, optab2;
1998 edge ext_def = NULL;
1999
2000 if (!is_gimple_assign (last_stmt))
2001 return NULL;
2002
2003 rhs_code = gimple_assign_rhs_code (last_stmt);
2004 switch (rhs_code)
2005 {
2006 case LROTATE_EXPR:
2007 case RROTATE_EXPR:
2008 break;
2009 default:
2010 return NULL;
2011 }
2012
2013 lhs = gimple_assign_lhs (last_stmt);
2014 oprnd0 = gimple_assign_rhs1 (last_stmt);
2015 type = TREE_TYPE (oprnd0);
2016 oprnd1 = gimple_assign_rhs2 (last_stmt);
2017 if (TREE_CODE (oprnd0) != SSA_NAME
2018 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2019 || !INTEGRAL_TYPE_P (type)
2020 || !TYPE_UNSIGNED (type))
2021 return NULL;
2022
2023 stmt_vec_info def_stmt_info;
2024 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2025 return NULL;
2026
2027 if (dt != vect_internal_def
2028 && dt != vect_constant_def
2029 && dt != vect_external_def)
2030 return NULL;
2031
2032 vectype = get_vectype_for_scalar_type (type);
2033 if (vectype == NULL_TREE)
2034 return NULL;
2035
2036 /* If vector/vector or vector/scalar rotate is supported by the target,
2037 don't do anything here. */
2038 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2039 if (optab1
2040 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2041 return NULL;
2042
2043 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2044 {
2045 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2046 if (optab2
2047 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2048 return NULL;
2049 }
2050
2051 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2052 don't do anything here either. */
2053 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2054 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2055 if (!optab1
2056 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2057 || !optab2
2058 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2059 {
2060 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2061 return NULL;
2062 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2063 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2064 if (!optab1
2065 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2066 || !optab2
2067 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2068 return NULL;
2069 }
2070
2071 *type_out = vectype;
2072
2073 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2074 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2075
2076 def = NULL_TREE;
2077 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2078 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2079 def = oprnd1;
2080 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2081 {
2082 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2083 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2084 && TYPE_PRECISION (TREE_TYPE (rhs1))
2085 == TYPE_PRECISION (type))
2086 def = rhs1;
2087 }
2088
2089 if (def == NULL_TREE)
2090 {
2091 def = vect_recog_temp_ssa_var (type, NULL);
2092 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2093 append_pattern_def_seq (stmt_vinfo, def_stmt);
2094 }
2095 stype = TREE_TYPE (def);
2096
2097 if (TREE_CODE (def) == INTEGER_CST)
2098 {
2099 if (!tree_fits_uhwi_p (def)
2100 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2101 || integer_zerop (def))
2102 return NULL;
2103 def2 = build_int_cst (stype,
2104 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2105 }
2106 else
2107 {
2108 tree vecstype = get_vectype_for_scalar_type (stype);
2109
2110 if (vecstype == NULL_TREE)
2111 return NULL;
2112 def2 = vect_recog_temp_ssa_var (stype, NULL);
2113 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2114 if (ext_def)
2115 {
2116 basic_block new_bb
2117 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2118 gcc_assert (!new_bb);
2119 }
2120 else
2121 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2122
2123 def2 = vect_recog_temp_ssa_var (stype, NULL);
2124 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2125 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2126 gimple_assign_lhs (def_stmt), mask);
2127 if (ext_def)
2128 {
2129 basic_block new_bb
2130 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2131 gcc_assert (!new_bb);
2132 }
2133 else
2134 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2135 }
2136
2137 var1 = vect_recog_temp_ssa_var (type, NULL);
2138 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2139 ? LSHIFT_EXPR : RSHIFT_EXPR,
2140 oprnd0, def);
2141 append_pattern_def_seq (stmt_vinfo, def_stmt);
2142
2143 var2 = vect_recog_temp_ssa_var (type, NULL);
2144 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2145 ? RSHIFT_EXPR : LSHIFT_EXPR,
2146 oprnd0, def2);
2147 append_pattern_def_seq (stmt_vinfo, def_stmt);
2148
2149 /* Pattern detected. */
2150 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2151
2152 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2153 var = vect_recog_temp_ssa_var (type, NULL);
2154 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2155
2156 return pattern_stmt;
2157 }
2158
2159 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2160 vectorized:
2161
2162 type a_t;
2163 TYPE b_T, res_T;
2164
2165 S1 a_t = ;
2166 S2 b_T = ;
2167 S3 res_T = b_T op a_t;
2168
2169 where type 'TYPE' is a type with different size than 'type',
2170 and op is <<, >> or rotate.
2171
2172 Also detect cases:
2173
2174 type a_t;
2175 TYPE b_T, c_T, res_T;
2176
2177 S0 c_T = ;
2178 S1 a_t = (type) c_T;
2179 S2 b_T = ;
2180 S3 res_T = b_T op a_t;
2181
2182 Input/Output:
2183
2184 * STMT_VINFO: The stmt from which the pattern search begins,
2185 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2186 with a shift/rotate which has same type on both operands, in the
2187 second case just b_T op c_T, in the first case with added cast
2188 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2189
2190 Output:
2191
2192 * TYPE_OUT: The type of the output of this pattern.
2193
2194 * Return value: A new stmt that will be used to replace the shift/rotate
2195 S3 stmt. */
2196
2197 static gimple *
2198 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2199 tree *type_out)
2200 {
2201 gimple *last_stmt = stmt_vinfo->stmt;
2202 tree oprnd0, oprnd1, lhs, var;
2203 gimple *pattern_stmt;
2204 enum tree_code rhs_code;
2205 vec_info *vinfo = stmt_vinfo->vinfo;
2206
2207 if (!is_gimple_assign (last_stmt))
2208 return NULL;
2209
2210 rhs_code = gimple_assign_rhs_code (last_stmt);
2211 switch (rhs_code)
2212 {
2213 case LSHIFT_EXPR:
2214 case RSHIFT_EXPR:
2215 case LROTATE_EXPR:
2216 case RROTATE_EXPR:
2217 break;
2218 default:
2219 return NULL;
2220 }
2221
2222 lhs = gimple_assign_lhs (last_stmt);
2223 oprnd0 = gimple_assign_rhs1 (last_stmt);
2224 oprnd1 = gimple_assign_rhs2 (last_stmt);
2225 if (TREE_CODE (oprnd0) != SSA_NAME
2226 || TREE_CODE (oprnd1) != SSA_NAME
2227 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2228 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2229 || TYPE_PRECISION (TREE_TYPE (lhs))
2230 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2231 return NULL;
2232
2233 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2234 if (!def_vinfo)
2235 return NULL;
2236
2237 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2238 if (*type_out == NULL_TREE)
2239 return NULL;
2240
2241 tree def = NULL_TREE;
2242 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2243 if (def_stmt && gimple_assign_cast_p (def_stmt))
2244 {
2245 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2246 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2247 && TYPE_PRECISION (TREE_TYPE (rhs1))
2248 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2249 {
2250 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2251 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2252 def = rhs1;
2253 else
2254 {
2255 tree mask
2256 = build_low_bits_mask (TREE_TYPE (rhs1),
2257 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2258 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2259 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2260 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2261 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2262 }
2263 }
2264 }
2265
2266 if (def == NULL_TREE)
2267 {
2268 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2269 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2270 append_pattern_def_seq (stmt_vinfo, def_stmt);
2271 }
2272
2273 /* Pattern detected. */
2274 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2275
2276 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2277 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2278 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2279
2280 return pattern_stmt;
2281 }
2282
2283 /* Return true iff the target has a vector optab implementing the operation
2284 CODE on type VECTYPE. */
2285
2286 static bool
2287 target_has_vecop_for_code (tree_code code, tree vectype)
2288 {
2289 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2290 return voptab
2291 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2292 }
2293
2294 /* Verify that the target has optabs of VECTYPE to perform all the steps
2295 needed by the multiplication-by-immediate synthesis algorithm described by
2296 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2297 present. Return true iff the target supports all the steps. */
2298
2299 static bool
2300 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2301 tree vectype, bool synth_shift_p)
2302 {
2303 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2304 return false;
2305
2306 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2307 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2308
2309 if (var == negate_variant
2310 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2311 return false;
2312
2313 /* If we must synthesize shifts with additions make sure that vector
2314 addition is available. */
2315 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2316 return false;
2317
2318 for (int i = 1; i < alg->ops; i++)
2319 {
2320 switch (alg->op[i])
2321 {
2322 case alg_shift:
2323 break;
2324 case alg_add_t_m2:
2325 case alg_add_t2_m:
2326 case alg_add_factor:
2327 if (!supports_vplus)
2328 return false;
2329 break;
2330 case alg_sub_t_m2:
2331 case alg_sub_t2_m:
2332 case alg_sub_factor:
2333 if (!supports_vminus)
2334 return false;
2335 break;
2336 case alg_unknown:
2337 case alg_m:
2338 case alg_zero:
2339 case alg_impossible:
2340 return false;
2341 default:
2342 gcc_unreachable ();
2343 }
2344 }
2345
2346 return true;
2347 }
2348
2349 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2350 putting the final result in DEST. Append all statements but the last into
2351 VINFO. Return the last statement. */
2352
2353 static gimple *
2354 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2355 stmt_vec_info vinfo)
2356 {
2357 HOST_WIDE_INT i;
2358 tree itype = TREE_TYPE (op);
2359 tree prev_res = op;
2360 gcc_assert (amnt >= 0);
2361 for (i = 0; i < amnt; i++)
2362 {
2363 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2364 : dest;
2365 gimple *stmt
2366 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2367 prev_res = tmp_var;
2368 if (i < amnt - 1)
2369 append_pattern_def_seq (vinfo, stmt);
2370 else
2371 return stmt;
2372 }
2373 gcc_unreachable ();
2374 return NULL;
2375 }
2376
2377 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2378 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2379 the process if necessary. Append the resulting assignment statements
2380 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2381 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2382 left shifts using additions. */
2383
2384 static tree
2385 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2386 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2387 {
2388 if (integer_zerop (op2)
2389 && (code == LSHIFT_EXPR
2390 || code == PLUS_EXPR))
2391 {
2392 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2393 return op1;
2394 }
2395
2396 gimple *stmt;
2397 tree itype = TREE_TYPE (op1);
2398 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2399
2400 if (code == LSHIFT_EXPR
2401 && synth_shift_p)
2402 {
2403 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2404 stmt_vinfo);
2405 append_pattern_def_seq (stmt_vinfo, stmt);
2406 return tmp_var;
2407 }
2408
2409 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2410 append_pattern_def_seq (stmt_vinfo, stmt);
2411 return tmp_var;
2412 }
2413
2414 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2415 and simple arithmetic operations to be vectorized. Record the statements
2416 produced in STMT_VINFO and return the last statement in the sequence or
2417 NULL if it's not possible to synthesize such a multiplication.
2418 This function mirrors the behavior of expand_mult_const in expmed.c but
2419 works on tree-ssa form. */
2420
2421 static gimple *
2422 vect_synth_mult_by_constant (tree op, tree val,
2423 stmt_vec_info stmt_vinfo)
2424 {
2425 tree itype = TREE_TYPE (op);
2426 machine_mode mode = TYPE_MODE (itype);
2427 struct algorithm alg;
2428 mult_variant variant;
2429 if (!tree_fits_shwi_p (val))
2430 return NULL;
2431
2432 /* Multiplication synthesis by shifts, adds and subs can introduce
2433 signed overflow where the original operation didn't. Perform the
2434 operations on an unsigned type and cast back to avoid this.
2435 In the future we may want to relax this for synthesis algorithms
2436 that we can prove do not cause unexpected overflow. */
2437 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2438
2439 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2440
2441 /* Targets that don't support vector shifts but support vector additions
2442 can synthesize shifts that way. */
2443 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2444
2445 HOST_WIDE_INT hwval = tree_to_shwi (val);
2446 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2447 The vectorizer's benefit analysis will decide whether it's beneficial
2448 to do this. */
2449 bool possible = choose_mult_variant (mode, hwval, &alg,
2450 &variant, MAX_COST);
2451 if (!possible)
2452 return NULL;
2453
2454 tree vectype = get_vectype_for_scalar_type (multtype);
2455
2456 if (!vectype
2457 || !target_supports_mult_synth_alg (&alg, variant,
2458 vectype, synth_shift_p))
2459 return NULL;
2460
2461 tree accumulator;
2462
2463 /* Clear out the sequence of statements so we can populate it below. */
2464 gimple *stmt = NULL;
2465
2466 if (cast_to_unsigned_p)
2467 {
2468 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2469 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2470 append_pattern_def_seq (stmt_vinfo, stmt);
2471 op = tmp_op;
2472 }
2473
2474 if (alg.op[0] == alg_zero)
2475 accumulator = build_int_cst (multtype, 0);
2476 else
2477 accumulator = op;
2478
2479 bool needs_fixup = (variant == negate_variant)
2480 || (variant == add_variant);
2481
2482 for (int i = 1; i < alg.ops; i++)
2483 {
2484 tree shft_log = build_int_cst (multtype, alg.log[i]);
2485 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2486 tree tmp_var = NULL_TREE;
2487
2488 switch (alg.op[i])
2489 {
2490 case alg_shift:
2491 if (synth_shift_p)
2492 stmt
2493 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2494 stmt_vinfo);
2495 else
2496 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2497 shft_log);
2498 break;
2499 case alg_add_t_m2:
2500 tmp_var
2501 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2502 stmt_vinfo, synth_shift_p);
2503 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2504 tmp_var);
2505 break;
2506 case alg_sub_t_m2:
2507 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2508 shft_log, stmt_vinfo,
2509 synth_shift_p);
2510 /* In some algorithms the first step involves zeroing the
2511 accumulator. If subtracting from such an accumulator
2512 just emit the negation directly. */
2513 if (integer_zerop (accumulator))
2514 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2515 else
2516 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2517 tmp_var);
2518 break;
2519 case alg_add_t2_m:
2520 tmp_var
2521 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2522 stmt_vinfo, synth_shift_p);
2523 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2524 break;
2525 case alg_sub_t2_m:
2526 tmp_var
2527 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2528 stmt_vinfo, synth_shift_p);
2529 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2530 break;
2531 case alg_add_factor:
2532 tmp_var
2533 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2534 stmt_vinfo, synth_shift_p);
2535 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2536 tmp_var);
2537 break;
2538 case alg_sub_factor:
2539 tmp_var
2540 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2541 stmt_vinfo, synth_shift_p);
2542 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2543 accumulator);
2544 break;
2545 default:
2546 gcc_unreachable ();
2547 }
2548 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2549 but rather return it directly. */
2550
2551 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2552 append_pattern_def_seq (stmt_vinfo, stmt);
2553 accumulator = accum_tmp;
2554 }
2555 if (variant == negate_variant)
2556 {
2557 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2558 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2559 accumulator = accum_tmp;
2560 if (cast_to_unsigned_p)
2561 append_pattern_def_seq (stmt_vinfo, stmt);
2562 }
2563 else if (variant == add_variant)
2564 {
2565 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2566 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2567 accumulator = accum_tmp;
2568 if (cast_to_unsigned_p)
2569 append_pattern_def_seq (stmt_vinfo, stmt);
2570 }
2571 /* Move back to a signed if needed. */
2572 if (cast_to_unsigned_p)
2573 {
2574 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2575 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2576 }
2577
2578 return stmt;
2579 }
2580
2581 /* Detect multiplication by constant and convert it into a sequence of
2582 shifts and additions, subtractions, negations. We reuse the
2583 choose_mult_variant algorithms from expmed.c
2584
2585 Input/Output:
2586
2587 STMT_VINFO: The stmt from which the pattern search begins,
2588 i.e. the mult stmt.
2589
2590 Output:
2591
2592 * TYPE_OUT: The type of the output of this pattern.
2593
2594 * Return value: A new stmt that will be used to replace
2595 the multiplication. */
2596
2597 static gimple *
2598 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2599 {
2600 gimple *last_stmt = stmt_vinfo->stmt;
2601 tree oprnd0, oprnd1, vectype, itype;
2602 gimple *pattern_stmt;
2603
2604 if (!is_gimple_assign (last_stmt))
2605 return NULL;
2606
2607 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2608 return NULL;
2609
2610 oprnd0 = gimple_assign_rhs1 (last_stmt);
2611 oprnd1 = gimple_assign_rhs2 (last_stmt);
2612 itype = TREE_TYPE (oprnd0);
2613
2614 if (TREE_CODE (oprnd0) != SSA_NAME
2615 || TREE_CODE (oprnd1) != INTEGER_CST
2616 || !INTEGRAL_TYPE_P (itype)
2617 || !type_has_mode_precision_p (itype))
2618 return NULL;
2619
2620 vectype = get_vectype_for_scalar_type (itype);
2621 if (vectype == NULL_TREE)
2622 return NULL;
2623
2624 /* If the target can handle vectorized multiplication natively,
2625 don't attempt to optimize this. */
2626 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2627 if (mul_optab != unknown_optab)
2628 {
2629 machine_mode vec_mode = TYPE_MODE (vectype);
2630 int icode = (int) optab_handler (mul_optab, vec_mode);
2631 if (icode != CODE_FOR_nothing)
2632 return NULL;
2633 }
2634
2635 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2636 if (!pattern_stmt)
2637 return NULL;
2638
2639 /* Pattern detected. */
2640 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2641
2642 *type_out = vectype;
2643
2644 return pattern_stmt;
2645 }
2646
2647 /* Detect a signed division by a constant that wouldn't be
2648 otherwise vectorized:
2649
2650 type a_t, b_t;
2651
2652 S1 a_t = b_t / N;
2653
2654 where type 'type' is an integral type and N is a constant.
2655
2656 Similarly handle modulo by a constant:
2657
2658 S4 a_t = b_t % N;
2659
2660 Input/Output:
2661
2662 * STMT_VINFO: The stmt from which the pattern search begins,
2663 i.e. the division stmt. S1 is replaced by if N is a power
2664 of two constant and type is signed:
2665 S3 y_t = b_t < 0 ? N - 1 : 0;
2666 S2 x_t = b_t + y_t;
2667 S1' a_t = x_t >> log2 (N);
2668
2669 S4 is replaced if N is a power of two constant and
2670 type is signed by (where *_T temporaries have unsigned type):
2671 S9 y_T = b_t < 0 ? -1U : 0U;
2672 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2673 S7 z_t = (type) z_T;
2674 S6 w_t = b_t + z_t;
2675 S5 x_t = w_t & (N - 1);
2676 S4' a_t = x_t - z_t;
2677
2678 Output:
2679
2680 * TYPE_OUT: The type of the output of this pattern.
2681
2682 * Return value: A new stmt that will be used to replace the division
2683 S1 or modulo S4 stmt. */
2684
2685 static gimple *
2686 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2687 {
2688 gimple *last_stmt = stmt_vinfo->stmt;
2689 tree oprnd0, oprnd1, vectype, itype, cond;
2690 gimple *pattern_stmt, *def_stmt;
2691 enum tree_code rhs_code;
2692 optab optab;
2693 tree q;
2694 int dummy_int, prec;
2695
2696 if (!is_gimple_assign (last_stmt))
2697 return NULL;
2698
2699 rhs_code = gimple_assign_rhs_code (last_stmt);
2700 switch (rhs_code)
2701 {
2702 case TRUNC_DIV_EXPR:
2703 case EXACT_DIV_EXPR:
2704 case TRUNC_MOD_EXPR:
2705 break;
2706 default:
2707 return NULL;
2708 }
2709
2710 oprnd0 = gimple_assign_rhs1 (last_stmt);
2711 oprnd1 = gimple_assign_rhs2 (last_stmt);
2712 itype = TREE_TYPE (oprnd0);
2713 if (TREE_CODE (oprnd0) != SSA_NAME
2714 || TREE_CODE (oprnd1) != INTEGER_CST
2715 || TREE_CODE (itype) != INTEGER_TYPE
2716 || !type_has_mode_precision_p (itype))
2717 return NULL;
2718
2719 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2720 vectype = get_vectype_for_scalar_type (itype);
2721 if (vectype == NULL_TREE)
2722 return NULL;
2723
2724 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2725 {
2726 /* If the target can handle vectorized division or modulo natively,
2727 don't attempt to optimize this, since native division is likely
2728 to give smaller code. */
2729 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2730 if (optab != unknown_optab)
2731 {
2732 machine_mode vec_mode = TYPE_MODE (vectype);
2733 int icode = (int) optab_handler (optab, vec_mode);
2734 if (icode != CODE_FOR_nothing)
2735 return NULL;
2736 }
2737 }
2738
2739 prec = TYPE_PRECISION (itype);
2740 if (integer_pow2p (oprnd1))
2741 {
2742 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2743 return NULL;
2744
2745 /* Pattern detected. */
2746 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2747
2748 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2749 build_int_cst (itype, 0));
2750 if (rhs_code == TRUNC_DIV_EXPR
2751 || rhs_code == EXACT_DIV_EXPR)
2752 {
2753 tree var = vect_recog_temp_ssa_var (itype, NULL);
2754 tree shift;
2755 def_stmt
2756 = gimple_build_assign (var, COND_EXPR, cond,
2757 fold_build2 (MINUS_EXPR, itype, oprnd1,
2758 build_int_cst (itype, 1)),
2759 build_int_cst (itype, 0));
2760 append_pattern_def_seq (stmt_vinfo, def_stmt);
2761 var = vect_recog_temp_ssa_var (itype, NULL);
2762 def_stmt
2763 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2764 gimple_assign_lhs (def_stmt));
2765 append_pattern_def_seq (stmt_vinfo, def_stmt);
2766
2767 shift = build_int_cst (itype, tree_log2 (oprnd1));
2768 pattern_stmt
2769 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2770 RSHIFT_EXPR, var, shift);
2771 }
2772 else
2773 {
2774 tree signmask;
2775 if (compare_tree_int (oprnd1, 2) == 0)
2776 {
2777 signmask = vect_recog_temp_ssa_var (itype, NULL);
2778 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2779 build_int_cst (itype, 1),
2780 build_int_cst (itype, 0));
2781 append_pattern_def_seq (stmt_vinfo, def_stmt);
2782 }
2783 else
2784 {
2785 tree utype
2786 = build_nonstandard_integer_type (prec, 1);
2787 tree vecutype = get_vectype_for_scalar_type (utype);
2788 tree shift
2789 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2790 - tree_log2 (oprnd1));
2791 tree var = vect_recog_temp_ssa_var (utype, NULL);
2792
2793 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2794 build_int_cst (utype, -1),
2795 build_int_cst (utype, 0));
2796 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2797 var = vect_recog_temp_ssa_var (utype, NULL);
2798 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2799 gimple_assign_lhs (def_stmt),
2800 shift);
2801 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2802 signmask = vect_recog_temp_ssa_var (itype, NULL);
2803 def_stmt
2804 = gimple_build_assign (signmask, NOP_EXPR, var);
2805 append_pattern_def_seq (stmt_vinfo, def_stmt);
2806 }
2807 def_stmt
2808 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2809 PLUS_EXPR, oprnd0, signmask);
2810 append_pattern_def_seq (stmt_vinfo, def_stmt);
2811 def_stmt
2812 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2813 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2814 fold_build2 (MINUS_EXPR, itype, oprnd1,
2815 build_int_cst (itype, 1)));
2816 append_pattern_def_seq (stmt_vinfo, def_stmt);
2817
2818 pattern_stmt
2819 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2820 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2821 signmask);
2822 }
2823
2824 *type_out = vectype;
2825 return pattern_stmt;
2826 }
2827
2828 if (prec > HOST_BITS_PER_WIDE_INT
2829 || integer_zerop (oprnd1))
2830 return NULL;
2831
2832 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2833 return NULL;
2834
2835 if (TYPE_UNSIGNED (itype))
2836 {
2837 unsigned HOST_WIDE_INT mh, ml;
2838 int pre_shift, post_shift;
2839 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2840 & GET_MODE_MASK (itype_mode));
2841 tree t1, t2, t3, t4;
2842
2843 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2844 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2845 return NULL;
2846
2847 /* Find a suitable multiplier and right shift count
2848 instead of multiplying with D. */
2849 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2850
2851 /* If the suggested multiplier is more than SIZE bits, we can do better
2852 for even divisors, using an initial right shift. */
2853 if (mh != 0 && (d & 1) == 0)
2854 {
2855 pre_shift = ctz_or_zero (d);
2856 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2857 &ml, &post_shift, &dummy_int);
2858 gcc_assert (!mh);
2859 }
2860 else
2861 pre_shift = 0;
2862
2863 if (mh != 0)
2864 {
2865 if (post_shift - 1 >= prec)
2866 return NULL;
2867
2868 /* t1 = oprnd0 h* ml;
2869 t2 = oprnd0 - t1;
2870 t3 = t2 >> 1;
2871 t4 = t1 + t3;
2872 q = t4 >> (post_shift - 1); */
2873 t1 = vect_recog_temp_ssa_var (itype, NULL);
2874 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2875 build_int_cst (itype, ml));
2876 append_pattern_def_seq (stmt_vinfo, def_stmt);
2877
2878 t2 = vect_recog_temp_ssa_var (itype, NULL);
2879 def_stmt
2880 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2881 append_pattern_def_seq (stmt_vinfo, def_stmt);
2882
2883 t3 = vect_recog_temp_ssa_var (itype, NULL);
2884 def_stmt
2885 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2886 append_pattern_def_seq (stmt_vinfo, def_stmt);
2887
2888 t4 = vect_recog_temp_ssa_var (itype, NULL);
2889 def_stmt
2890 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2891
2892 if (post_shift != 1)
2893 {
2894 append_pattern_def_seq (stmt_vinfo, def_stmt);
2895
2896 q = vect_recog_temp_ssa_var (itype, NULL);
2897 pattern_stmt
2898 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2899 build_int_cst (itype, post_shift - 1));
2900 }
2901 else
2902 {
2903 q = t4;
2904 pattern_stmt = def_stmt;
2905 }
2906 }
2907 else
2908 {
2909 if (pre_shift >= prec || post_shift >= prec)
2910 return NULL;
2911
2912 /* t1 = oprnd0 >> pre_shift;
2913 t2 = t1 h* ml;
2914 q = t2 >> post_shift; */
2915 if (pre_shift)
2916 {
2917 t1 = vect_recog_temp_ssa_var (itype, NULL);
2918 def_stmt
2919 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2920 build_int_cst (NULL, pre_shift));
2921 append_pattern_def_seq (stmt_vinfo, def_stmt);
2922 }
2923 else
2924 t1 = oprnd0;
2925
2926 t2 = vect_recog_temp_ssa_var (itype, NULL);
2927 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2928 build_int_cst (itype, ml));
2929
2930 if (post_shift)
2931 {
2932 append_pattern_def_seq (stmt_vinfo, def_stmt);
2933
2934 q = vect_recog_temp_ssa_var (itype, NULL);
2935 def_stmt
2936 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2937 build_int_cst (itype, post_shift));
2938 }
2939 else
2940 q = t2;
2941
2942 pattern_stmt = def_stmt;
2943 }
2944 }
2945 else
2946 {
2947 unsigned HOST_WIDE_INT ml;
2948 int post_shift;
2949 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2950 unsigned HOST_WIDE_INT abs_d;
2951 bool add = false;
2952 tree t1, t2, t3, t4;
2953
2954 /* Give up for -1. */
2955 if (d == -1)
2956 return NULL;
2957
2958 /* Since d might be INT_MIN, we have to cast to
2959 unsigned HOST_WIDE_INT before negating to avoid
2960 undefined signed overflow. */
2961 abs_d = (d >= 0
2962 ? (unsigned HOST_WIDE_INT) d
2963 : - (unsigned HOST_WIDE_INT) d);
2964
2965 /* n rem d = n rem -d */
2966 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2967 {
2968 d = abs_d;
2969 oprnd1 = build_int_cst (itype, abs_d);
2970 }
2971 if (HOST_BITS_PER_WIDE_INT >= prec
2972 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2973 /* This case is not handled correctly below. */
2974 return NULL;
2975
2976 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2977 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2978 {
2979 add = true;
2980 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2981 }
2982 if (post_shift >= prec)
2983 return NULL;
2984
2985 /* t1 = oprnd0 h* ml; */
2986 t1 = vect_recog_temp_ssa_var (itype, NULL);
2987 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2988 build_int_cst (itype, ml));
2989
2990 if (add)
2991 {
2992 /* t2 = t1 + oprnd0; */
2993 append_pattern_def_seq (stmt_vinfo, def_stmt);
2994 t2 = vect_recog_temp_ssa_var (itype, NULL);
2995 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2996 }
2997 else
2998 t2 = t1;
2999
3000 if (post_shift)
3001 {
3002 /* t3 = t2 >> post_shift; */
3003 append_pattern_def_seq (stmt_vinfo, def_stmt);
3004 t3 = vect_recog_temp_ssa_var (itype, NULL);
3005 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3006 build_int_cst (itype, post_shift));
3007 }
3008 else
3009 t3 = t2;
3010
3011 wide_int oprnd0_min, oprnd0_max;
3012 int msb = 1;
3013 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3014 {
3015 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3016 msb = 0;
3017 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3018 msb = -1;
3019 }
3020
3021 if (msb == 0 && d >= 0)
3022 {
3023 /* q = t3; */
3024 q = t3;
3025 pattern_stmt = def_stmt;
3026 }
3027 else
3028 {
3029 /* t4 = oprnd0 >> (prec - 1);
3030 or if we know from VRP that oprnd0 >= 0
3031 t4 = 0;
3032 or if we know from VRP that oprnd0 < 0
3033 t4 = -1; */
3034 append_pattern_def_seq (stmt_vinfo, def_stmt);
3035 t4 = vect_recog_temp_ssa_var (itype, NULL);
3036 if (msb != 1)
3037 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3038 build_int_cst (itype, msb));
3039 else
3040 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3041 build_int_cst (itype, prec - 1));
3042 append_pattern_def_seq (stmt_vinfo, def_stmt);
3043
3044 /* q = t3 - t4; or q = t4 - t3; */
3045 q = vect_recog_temp_ssa_var (itype, NULL);
3046 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3047 d < 0 ? t3 : t4);
3048 }
3049 }
3050
3051 if (rhs_code == TRUNC_MOD_EXPR)
3052 {
3053 tree r, t1;
3054
3055 /* We divided. Now finish by:
3056 t1 = q * oprnd1;
3057 r = oprnd0 - t1; */
3058 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3059
3060 t1 = vect_recog_temp_ssa_var (itype, NULL);
3061 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3062 append_pattern_def_seq (stmt_vinfo, def_stmt);
3063
3064 r = vect_recog_temp_ssa_var (itype, NULL);
3065 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3066 }
3067
3068 /* Pattern detected. */
3069 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3070
3071 *type_out = vectype;
3072 return pattern_stmt;
3073 }
3074
3075 /* Function vect_recog_mixed_size_cond_pattern
3076
3077 Try to find the following pattern:
3078
3079 type x_t, y_t;
3080 TYPE a_T, b_T, c_T;
3081 loop:
3082 S1 a_T = x_t CMP y_t ? b_T : c_T;
3083
3084 where type 'TYPE' is an integral type which has different size
3085 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3086 than 'type', the constants need to fit into an integer type
3087 with the same width as 'type') or results of conversion from 'type'.
3088
3089 Input:
3090
3091 * STMT_VINFO: The stmt from which the pattern search begins.
3092
3093 Output:
3094
3095 * TYPE_OUT: The type of the output of this pattern.
3096
3097 * Return value: A new stmt that will be used to replace the pattern.
3098 Additionally a def_stmt is added.
3099
3100 a_it = x_t CMP y_t ? b_it : c_it;
3101 a_T = (TYPE) a_it; */
3102
3103 static gimple *
3104 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3105 {
3106 gimple *last_stmt = stmt_vinfo->stmt;
3107 tree cond_expr, then_clause, else_clause;
3108 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3109 gimple *pattern_stmt, *def_stmt;
3110 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3111 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3112 bool promotion;
3113 tree comp_scalar_type;
3114
3115 if (!is_gimple_assign (last_stmt)
3116 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3117 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3118 return NULL;
3119
3120 cond_expr = gimple_assign_rhs1 (last_stmt);
3121 then_clause = gimple_assign_rhs2 (last_stmt);
3122 else_clause = gimple_assign_rhs3 (last_stmt);
3123
3124 if (!COMPARISON_CLASS_P (cond_expr))
3125 return NULL;
3126
3127 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3128 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3129 if (comp_vectype == NULL_TREE)
3130 return NULL;
3131
3132 type = gimple_expr_type (last_stmt);
3133 if (types_compatible_p (type, comp_scalar_type)
3134 || ((TREE_CODE (then_clause) != INTEGER_CST
3135 || TREE_CODE (else_clause) != INTEGER_CST)
3136 && !INTEGRAL_TYPE_P (comp_scalar_type))
3137 || !INTEGRAL_TYPE_P (type))
3138 return NULL;
3139
3140 if ((TREE_CODE (then_clause) != INTEGER_CST
3141 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3142 &def_stmt0, &promotion))
3143 || (TREE_CODE (else_clause) != INTEGER_CST
3144 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3145 &def_stmt1, &promotion)))
3146 return NULL;
3147
3148 if (orig_type0 && orig_type1
3149 && !types_compatible_p (orig_type0, orig_type1))
3150 return NULL;
3151
3152 if (orig_type0)
3153 {
3154 if (!types_compatible_p (orig_type0, comp_scalar_type))
3155 return NULL;
3156 then_clause = gimple_assign_rhs1 (def_stmt0);
3157 itype = orig_type0;
3158 }
3159
3160 if (orig_type1)
3161 {
3162 if (!types_compatible_p (orig_type1, comp_scalar_type))
3163 return NULL;
3164 else_clause = gimple_assign_rhs1 (def_stmt1);
3165 itype = orig_type1;
3166 }
3167
3168
3169 HOST_WIDE_INT cmp_mode_size
3170 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3171
3172 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3173 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3174 return NULL;
3175
3176 vectype = get_vectype_for_scalar_type (type);
3177 if (vectype == NULL_TREE)
3178 return NULL;
3179
3180 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3181 return NULL;
3182
3183 if (itype == NULL_TREE)
3184 itype = build_nonstandard_integer_type (cmp_mode_size,
3185 TYPE_UNSIGNED (type));
3186
3187 if (itype == NULL_TREE
3188 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3189 return NULL;
3190
3191 vecitype = get_vectype_for_scalar_type (itype);
3192 if (vecitype == NULL_TREE)
3193 return NULL;
3194
3195 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3196 return NULL;
3197
3198 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3199 {
3200 if ((TREE_CODE (then_clause) == INTEGER_CST
3201 && !int_fits_type_p (then_clause, itype))
3202 || (TREE_CODE (else_clause) == INTEGER_CST
3203 && !int_fits_type_p (else_clause, itype)))
3204 return NULL;
3205 }
3206
3207 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3208 COND_EXPR, unshare_expr (cond_expr),
3209 fold_convert (itype, then_clause),
3210 fold_convert (itype, else_clause));
3211 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3212 NOP_EXPR, gimple_assign_lhs (def_stmt));
3213
3214 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3215 *type_out = vectype;
3216
3217 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3218
3219 return pattern_stmt;
3220 }
3221
3222
3223 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3224 true if bool VAR can and should be optimized that way. Assume it shouldn't
3225 in case it's a result of a comparison which can be directly vectorized into
3226 a vector comparison. Fills in STMTS with all stmts visited during the
3227 walk. */
3228
3229 static bool
3230 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3231 {
3232 tree rhs1;
3233 enum tree_code rhs_code;
3234
3235 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3236 if (!def_stmt_info)
3237 return false;
3238
3239 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3240 if (!def_stmt)
3241 return false;
3242
3243 if (stmts.contains (def_stmt))
3244 return true;
3245
3246 rhs1 = gimple_assign_rhs1 (def_stmt);
3247 rhs_code = gimple_assign_rhs_code (def_stmt);
3248 switch (rhs_code)
3249 {
3250 case SSA_NAME:
3251 if (! check_bool_pattern (rhs1, vinfo, stmts))
3252 return false;
3253 break;
3254
3255 CASE_CONVERT:
3256 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3257 return false;
3258 if (! check_bool_pattern (rhs1, vinfo, stmts))
3259 return false;
3260 break;
3261
3262 case BIT_NOT_EXPR:
3263 if (! check_bool_pattern (rhs1, vinfo, stmts))
3264 return false;
3265 break;
3266
3267 case BIT_AND_EXPR:
3268 case BIT_IOR_EXPR:
3269 case BIT_XOR_EXPR:
3270 if (! check_bool_pattern (rhs1, vinfo, stmts)
3271 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3272 return false;
3273 break;
3274
3275 default:
3276 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3277 {
3278 tree vecitype, comp_vectype;
3279
3280 /* If the comparison can throw, then is_gimple_condexpr will be
3281 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3282 if (stmt_could_throw_p (cfun, def_stmt))
3283 return false;
3284
3285 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3286 if (comp_vectype == NULL_TREE)
3287 return false;
3288
3289 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3290 if (mask_type
3291 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3292 return false;
3293
3294 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3295 {
3296 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3297 tree itype
3298 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3299 vecitype = get_vectype_for_scalar_type (itype);
3300 if (vecitype == NULL_TREE)
3301 return false;
3302 }
3303 else
3304 vecitype = comp_vectype;
3305 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3306 return false;
3307 }
3308 else
3309 return false;
3310 break;
3311 }
3312
3313 bool res = stmts.add (def_stmt);
3314 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3315 gcc_assert (!res);
3316
3317 return true;
3318 }
3319
3320
3321 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3322 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3323 pattern sequence. */
3324
3325 static tree
3326 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3327 {
3328 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3329 NOP_EXPR, var);
3330 append_pattern_def_seq (stmt_info, cast_stmt,
3331 get_vectype_for_scalar_type (type));
3332 return gimple_assign_lhs (cast_stmt);
3333 }
3334
3335 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3336 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3337 type, OUT_TYPE is the desired final integer type of the whole pattern.
3338 STMT_INFO is the info of the pattern root and is where pattern stmts should
3339 be associated with. DEFS is a map of pattern defs. */
3340
3341 static void
3342 adjust_bool_pattern (tree var, tree out_type,
3343 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3344 {
3345 gimple *stmt = SSA_NAME_DEF_STMT (var);
3346 enum tree_code rhs_code, def_rhs_code;
3347 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3348 location_t loc;
3349 gimple *pattern_stmt, *def_stmt;
3350 tree trueval = NULL_TREE;
3351
3352 rhs1 = gimple_assign_rhs1 (stmt);
3353 rhs2 = gimple_assign_rhs2 (stmt);
3354 rhs_code = gimple_assign_rhs_code (stmt);
3355 loc = gimple_location (stmt);
3356 switch (rhs_code)
3357 {
3358 case SSA_NAME:
3359 CASE_CONVERT:
3360 irhs1 = *defs.get (rhs1);
3361 itype = TREE_TYPE (irhs1);
3362 pattern_stmt
3363 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3364 SSA_NAME, irhs1);
3365 break;
3366
3367 case BIT_NOT_EXPR:
3368 irhs1 = *defs.get (rhs1);
3369 itype = TREE_TYPE (irhs1);
3370 pattern_stmt
3371 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3372 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3373 break;
3374
3375 case BIT_AND_EXPR:
3376 /* Try to optimize x = y & (a < b ? 1 : 0); into
3377 x = (a < b ? y : 0);
3378
3379 E.g. for:
3380 bool a_b, b_b, c_b;
3381 TYPE d_T;
3382
3383 S1 a_b = x1 CMP1 y1;
3384 S2 b_b = x2 CMP2 y2;
3385 S3 c_b = a_b & b_b;
3386 S4 d_T = (TYPE) c_b;
3387
3388 we would normally emit:
3389
3390 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3391 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3392 S3' c_T = a_T & b_T;
3393 S4' d_T = c_T;
3394
3395 but we can save one stmt by using the
3396 result of one of the COND_EXPRs in the other COND_EXPR and leave
3397 BIT_AND_EXPR stmt out:
3398
3399 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3400 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3401 S4' f_T = c_T;
3402
3403 At least when VEC_COND_EXPR is implemented using masks
3404 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3405 computes the comparison masks and ands it, in one case with
3406 all ones vector, in the other case with a vector register.
3407 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3408 often more expensive. */
3409 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3410 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3411 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3412 {
3413 irhs1 = *defs.get (rhs1);
3414 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3415 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3416 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3417 {
3418 rhs_code = def_rhs_code;
3419 rhs1 = def_rhs1;
3420 rhs2 = gimple_assign_rhs2 (def_stmt);
3421 trueval = irhs1;
3422 goto do_compare;
3423 }
3424 else
3425 irhs2 = *defs.get (rhs2);
3426 goto and_ior_xor;
3427 }
3428 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3429 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3430 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3431 {
3432 irhs2 = *defs.get (rhs2);
3433 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3434 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3435 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3436 {
3437 rhs_code = def_rhs_code;
3438 rhs1 = def_rhs1;
3439 rhs2 = gimple_assign_rhs2 (def_stmt);
3440 trueval = irhs2;
3441 goto do_compare;
3442 }
3443 else
3444 irhs1 = *defs.get (rhs1);
3445 goto and_ior_xor;
3446 }
3447 /* FALLTHRU */
3448 case BIT_IOR_EXPR:
3449 case BIT_XOR_EXPR:
3450 irhs1 = *defs.get (rhs1);
3451 irhs2 = *defs.get (rhs2);
3452 and_ior_xor:
3453 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3454 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3455 {
3456 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3457 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3458 int out_prec = TYPE_PRECISION (out_type);
3459 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3460 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3461 stmt_info);
3462 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3463 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3464 stmt_info);
3465 else
3466 {
3467 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3468 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3469 }
3470 }
3471 itype = TREE_TYPE (irhs1);
3472 pattern_stmt
3473 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3474 rhs_code, irhs1, irhs2);
3475 break;
3476
3477 default:
3478 do_compare:
3479 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3480 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3481 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3482 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3483 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3484 {
3485 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3486 itype
3487 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3488 }
3489 else
3490 itype = TREE_TYPE (rhs1);
3491 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3492 if (trueval == NULL_TREE)
3493 trueval = build_int_cst (itype, 1);
3494 else
3495 gcc_checking_assert (useless_type_conversion_p (itype,
3496 TREE_TYPE (trueval)));
3497 pattern_stmt
3498 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3499 COND_EXPR, cond_expr, trueval,
3500 build_int_cst (itype, 0));
3501 break;
3502 }
3503
3504 gimple_set_location (pattern_stmt, loc);
3505 append_pattern_def_seq (stmt_info, pattern_stmt,
3506 get_vectype_for_scalar_type (itype));
3507 defs.put (var, gimple_assign_lhs (pattern_stmt));
3508 }
3509
3510 /* Comparison function to qsort a vector of gimple stmts after UID. */
3511
3512 static int
3513 sort_after_uid (const void *p1, const void *p2)
3514 {
3515 const gimple *stmt1 = *(const gimple * const *)p1;
3516 const gimple *stmt2 = *(const gimple * const *)p2;
3517 return gimple_uid (stmt1) - gimple_uid (stmt2);
3518 }
3519
3520 /* Create pattern stmts for all stmts participating in the bool pattern
3521 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3522 OUT_TYPE. Return the def of the pattern root. */
3523
3524 static tree
3525 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3526 tree out_type, stmt_vec_info stmt_info)
3527 {
3528 /* Gather original stmts in the bool pattern in their order of appearance
3529 in the IL. */
3530 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3531 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3532 i != bool_stmt_set.end (); ++i)
3533 bool_stmts.quick_push (*i);
3534 bool_stmts.qsort (sort_after_uid);
3535
3536 /* Now process them in that order, producing pattern stmts. */
3537 hash_map <tree, tree> defs;
3538 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3539 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3540 out_type, stmt_info, defs);
3541
3542 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3543 gimple *pattern_stmt
3544 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3545 return gimple_assign_lhs (pattern_stmt);
3546 }
3547
3548 /* Helper for search_type_for_mask. */
3549
3550 static tree
3551 search_type_for_mask_1 (tree var, vec_info *vinfo,
3552 hash_map<gimple *, tree> &cache)
3553 {
3554 tree rhs1;
3555 enum tree_code rhs_code;
3556 tree res = NULL_TREE, res2;
3557
3558 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3559 return NULL_TREE;
3560
3561 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3562 if (!def_stmt_info)
3563 return NULL_TREE;
3564
3565 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3566 if (!def_stmt)
3567 return NULL_TREE;
3568
3569 tree *c = cache.get (def_stmt);
3570 if (c)
3571 return *c;
3572
3573 rhs_code = gimple_assign_rhs_code (def_stmt);
3574 rhs1 = gimple_assign_rhs1 (def_stmt);
3575
3576 switch (rhs_code)
3577 {
3578 case SSA_NAME:
3579 case BIT_NOT_EXPR:
3580 CASE_CONVERT:
3581 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3582 break;
3583
3584 case BIT_AND_EXPR:
3585 case BIT_IOR_EXPR:
3586 case BIT_XOR_EXPR:
3587 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3588 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3589 cache);
3590 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3591 res = res2;
3592 break;
3593
3594 default:
3595 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3596 {
3597 tree comp_vectype, mask_type;
3598
3599 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3600 {
3601 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3602 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3603 vinfo, cache);
3604 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3605 res = res2;
3606 break;
3607 }
3608
3609 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3610 if (comp_vectype == NULL_TREE)
3611 {
3612 res = NULL_TREE;
3613 break;
3614 }
3615
3616 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3617 if (!mask_type
3618 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3619 {
3620 res = NULL_TREE;
3621 break;
3622 }
3623
3624 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3625 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3626 {
3627 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3628 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3629 }
3630 else
3631 res = TREE_TYPE (rhs1);
3632 }
3633 }
3634
3635 cache.put (def_stmt, res);
3636 return res;
3637 }
3638
3639 /* Return the proper type for converting bool VAR into
3640 an integer value or NULL_TREE if no such type exists.
3641 The type is chosen so that converted value has the
3642 same number of elements as VAR's vector type. */
3643
3644 static tree
3645 search_type_for_mask (tree var, vec_info *vinfo)
3646 {
3647 hash_map<gimple *, tree> cache;
3648 return search_type_for_mask_1 (var, vinfo, cache);
3649 }
3650
3651 /* Function vect_recog_bool_pattern
3652
3653 Try to find pattern like following:
3654
3655 bool a_b, b_b, c_b, d_b, e_b;
3656 TYPE f_T;
3657 loop:
3658 S1 a_b = x1 CMP1 y1;
3659 S2 b_b = x2 CMP2 y2;
3660 S3 c_b = a_b & b_b;
3661 S4 d_b = x3 CMP3 y3;
3662 S5 e_b = c_b | d_b;
3663 S6 f_T = (TYPE) e_b;
3664
3665 where type 'TYPE' is an integral type. Or a similar pattern
3666 ending in
3667
3668 S6 f_Y = e_b ? r_Y : s_Y;
3669
3670 as results from if-conversion of a complex condition.
3671
3672 Input:
3673
3674 * STMT_VINFO: The stmt at the end from which the pattern
3675 search begins, i.e. cast of a bool to
3676 an integer type.
3677
3678 Output:
3679
3680 * TYPE_OUT: The type of the output of this pattern.
3681
3682 * Return value: A new stmt that will be used to replace the pattern.
3683
3684 Assuming size of TYPE is the same as size of all comparisons
3685 (otherwise some casts would be added where needed), the above
3686 sequence we create related pattern stmts:
3687 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3688 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3689 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3690 S5' e_T = c_T | d_T;
3691 S6' f_T = e_T;
3692
3693 Instead of the above S3' we could emit:
3694 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3695 S3' c_T = a_T | b_T;
3696 but the above is more efficient. */
3697
3698 static gimple *
3699 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3700 {
3701 gimple *last_stmt = stmt_vinfo->stmt;
3702 enum tree_code rhs_code;
3703 tree var, lhs, rhs, vectype;
3704 vec_info *vinfo = stmt_vinfo->vinfo;
3705 gimple *pattern_stmt;
3706
3707 if (!is_gimple_assign (last_stmt))
3708 return NULL;
3709
3710 var = gimple_assign_rhs1 (last_stmt);
3711 lhs = gimple_assign_lhs (last_stmt);
3712 rhs_code = gimple_assign_rhs_code (last_stmt);
3713
3714 if (rhs_code == VIEW_CONVERT_EXPR)
3715 var = TREE_OPERAND (var, 0);
3716
3717 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3718 return NULL;
3719
3720 hash_set<gimple *> bool_stmts;
3721
3722 if (CONVERT_EXPR_CODE_P (rhs_code)
3723 || rhs_code == VIEW_CONVERT_EXPR)
3724 {
3725 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3726 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3727 return NULL;
3728 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3729 if (vectype == NULL_TREE)
3730 return NULL;
3731
3732 if (check_bool_pattern (var, vinfo, bool_stmts))
3733 {
3734 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
3735 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3736 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3737 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3738 else
3739 pattern_stmt
3740 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3741 }
3742 else
3743 {
3744 tree type = search_type_for_mask (var, vinfo);
3745 tree cst0, cst1, tmp;
3746
3747 if (!type)
3748 return NULL;
3749
3750 /* We may directly use cond with narrowed type to avoid
3751 multiple cond exprs with following result packing and
3752 perform single cond with packed mask instead. In case
3753 of widening we better make cond first and then extract
3754 results. */
3755 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3756 type = TREE_TYPE (lhs);
3757
3758 cst0 = build_int_cst (type, 0);
3759 cst1 = build_int_cst (type, 1);
3760 tmp = vect_recog_temp_ssa_var (type, NULL);
3761 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3762
3763 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3764 {
3765 tree new_vectype = get_vectype_for_scalar_type (type);
3766 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3767
3768 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3769 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3770 }
3771 }
3772
3773 *type_out = vectype;
3774 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3775
3776 return pattern_stmt;
3777 }
3778 else if (rhs_code == COND_EXPR
3779 && TREE_CODE (var) == SSA_NAME)
3780 {
3781 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3782 if (vectype == NULL_TREE)
3783 return NULL;
3784
3785 /* Build a scalar type for the boolean result that when
3786 vectorized matches the vector type of the result in
3787 size and number of elements. */
3788 unsigned prec
3789 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3790 TYPE_VECTOR_SUBPARTS (vectype));
3791
3792 tree type
3793 = build_nonstandard_integer_type (prec,
3794 TYPE_UNSIGNED (TREE_TYPE (var)));
3795 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3796 return NULL;
3797
3798 if (!check_bool_pattern (var, vinfo, bool_stmts))
3799 return NULL;
3800
3801 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
3802
3803 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3804 pattern_stmt
3805 = gimple_build_assign (lhs, COND_EXPR,
3806 build2 (NE_EXPR, boolean_type_node,
3807 rhs, build_int_cst (type, 0)),
3808 gimple_assign_rhs2 (last_stmt),
3809 gimple_assign_rhs3 (last_stmt));
3810 *type_out = vectype;
3811 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3812
3813 return pattern_stmt;
3814 }
3815 else if (rhs_code == SSA_NAME
3816 && STMT_VINFO_DATA_REF (stmt_vinfo))
3817 {
3818 stmt_vec_info pattern_stmt_info;
3819 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3820 gcc_assert (vectype != NULL_TREE);
3821 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3822 return NULL;
3823
3824 if (check_bool_pattern (var, vinfo, bool_stmts))
3825 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
3826 else
3827 {
3828 tree type = search_type_for_mask (var, vinfo);
3829 tree cst0, cst1, new_vectype;
3830
3831 if (!type)
3832 return NULL;
3833
3834 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3835 type = TREE_TYPE (vectype);
3836
3837 cst0 = build_int_cst (type, 0);
3838 cst1 = build_int_cst (type, 1);
3839 new_vectype = get_vectype_for_scalar_type (type);
3840
3841 rhs = vect_recog_temp_ssa_var (type, NULL);
3842 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3843 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3844 }
3845
3846 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3847 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3848 {
3849 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3850 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3851 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3852 rhs = rhs2;
3853 }
3854 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3855 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3856 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3857 *type_out = vectype;
3858 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3859
3860 return pattern_stmt;
3861 }
3862 else
3863 return NULL;
3864 }
3865
3866
3867 /* A helper for vect_recog_mask_conversion_pattern. Build
3868 conversion of MASK to a type suitable for masking VECTYPE.
3869 Built statement gets required vectype and is appended to
3870 a pattern sequence of STMT_VINFO.
3871
3872 Return converted mask. */
3873
3874 static tree
3875 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
3876 {
3877 gimple *stmt;
3878 tree masktype, tmp;
3879
3880 masktype = build_same_sized_truth_vector_type (vectype);
3881 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3882 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3883 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
3884
3885 return tmp;
3886 }
3887
3888
3889 /* Function vect_recog_mask_conversion_pattern
3890
3891 Try to find statements which require boolean type
3892 converison. Additional conversion statements are
3893 added to handle such cases. For example:
3894
3895 bool m_1, m_2, m_3;
3896 int i_4, i_5;
3897 double d_6, d_7;
3898 char c_1, c_2, c_3;
3899
3900 S1 m_1 = i_4 > i_5;
3901 S2 m_2 = d_6 < d_7;
3902 S3 m_3 = m_1 & m_2;
3903 S4 c_1 = m_3 ? c_2 : c_3;
3904
3905 Will be transformed into:
3906
3907 S1 m_1 = i_4 > i_5;
3908 S2 m_2 = d_6 < d_7;
3909 S3'' m_2' = (_Bool[bitsize=32])m_2
3910 S3' m_3' = m_1 & m_2';
3911 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3912 S4' c_1' = m_3'' ? c_2 : c_3; */
3913
3914 static gimple *
3915 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3916 {
3917 gimple *last_stmt = stmt_vinfo->stmt;
3918 enum tree_code rhs_code;
3919 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3920 tree vectype1, vectype2;
3921 stmt_vec_info pattern_stmt_info;
3922 vec_info *vinfo = stmt_vinfo->vinfo;
3923
3924 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3925 if (is_gimple_call (last_stmt)
3926 && gimple_call_internal_p (last_stmt))
3927 {
3928 gcall *pattern_stmt;
3929
3930 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3931 int mask_argno = internal_fn_mask_index (ifn);
3932 if (mask_argno < 0)
3933 return NULL;
3934
3935 bool store_p = internal_store_fn_p (ifn);
3936 if (store_p)
3937 {
3938 int rhs_index = internal_fn_stored_value_index (ifn);
3939 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3940 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
3941 }
3942 else
3943 {
3944 lhs = gimple_call_lhs (last_stmt);
3945 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3946 }
3947
3948 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3949 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3950 if (!mask_arg_type)
3951 return NULL;
3952 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
3953
3954 if (!vectype1 || !vectype2
3955 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3956 TYPE_VECTOR_SUBPARTS (vectype2)))
3957 return NULL;
3958
3959 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3960
3961 auto_vec<tree, 8> args;
3962 unsigned int nargs = gimple_call_num_args (last_stmt);
3963 args.safe_grow (nargs);
3964 for (unsigned int i = 0; i < nargs; ++i)
3965 args[i] = ((int) i == mask_argno
3966 ? tmp
3967 : gimple_call_arg (last_stmt, i));
3968 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
3969
3970 if (!store_p)
3971 {
3972 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3973 gimple_call_set_lhs (pattern_stmt, lhs);
3974 }
3975 gimple_call_set_nothrow (pattern_stmt, true);
3976
3977 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3978 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3979 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3980
3981 *type_out = vectype1;
3982 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3983
3984 return pattern_stmt;
3985 }
3986
3987 if (!is_gimple_assign (last_stmt))
3988 return NULL;
3989
3990 gimple *pattern_stmt;
3991 lhs = gimple_assign_lhs (last_stmt);
3992 rhs1 = gimple_assign_rhs1 (last_stmt);
3993 rhs_code = gimple_assign_rhs_code (last_stmt);
3994
3995 /* Check for cond expression requiring mask conversion. */
3996 if (rhs_code == COND_EXPR)
3997 {
3998 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3999
4000 if (TREE_CODE (rhs1) == SSA_NAME)
4001 {
4002 rhs1_type = search_type_for_mask (rhs1, vinfo);
4003 if (!rhs1_type)
4004 return NULL;
4005 }
4006 else if (COMPARISON_CLASS_P (rhs1))
4007 {
4008 /* Check whether we're comparing scalar booleans and (if so)
4009 whether a better mask type exists than the mask associated
4010 with boolean-sized elements. This avoids unnecessary packs
4011 and unpacks if the booleans are set from comparisons of
4012 wider types. E.g. in:
4013
4014 int x1, x2, x3, x4, y1, y1;
4015 ...
4016 bool b1 = (x1 == x2);
4017 bool b2 = (x3 == x4);
4018 ... = b1 == b2 ? y1 : y2;
4019
4020 it is better for b1 and b2 to use the mask type associated
4021 with int elements rather bool (byte) elements. */
4022 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4023 if (!rhs1_type)
4024 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4025 }
4026 else
4027 return NULL;
4028
4029 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4030
4031 if (!vectype1 || !vectype2)
4032 return NULL;
4033
4034 /* Continue if a conversion is needed. Also continue if we have
4035 a comparison whose vector type would normally be different from
4036 VECTYPE2 when considered in isolation. In that case we'll
4037 replace the comparison with an SSA name (so that we can record
4038 its vector type) and behave as though the comparison was an SSA
4039 name from the outset. */
4040 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4041 TYPE_VECTOR_SUBPARTS (vectype2))
4042 && (TREE_CODE (rhs1) == SSA_NAME
4043 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4044 return NULL;
4045
4046 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4047 in place, we can handle it in vectorizable_condition. This avoids
4048 unnecessary promotion stmts and increased vectorization factor. */
4049 if (COMPARISON_CLASS_P (rhs1)
4050 && INTEGRAL_TYPE_P (rhs1_type)
4051 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4052 TYPE_VECTOR_SUBPARTS (vectype2)))
4053 {
4054 enum vect_def_type dt;
4055 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4056 && dt == vect_external_def
4057 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4058 && (dt == vect_external_def
4059 || dt == vect_constant_def))
4060 {
4061 tree wide_scalar_type = build_nonstandard_integer_type
4062 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4063 TYPE_UNSIGNED (rhs1_type));
4064 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4065 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4066 return NULL;
4067 }
4068 }
4069
4070 /* If rhs1 is a comparison we need to move it into a
4071 separate statement. */
4072 if (TREE_CODE (rhs1) != SSA_NAME)
4073 {
4074 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4075 pattern_stmt = gimple_build_assign (tmp, rhs1);
4076 rhs1 = tmp;
4077 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4078 }
4079
4080 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4081 TYPE_VECTOR_SUBPARTS (vectype2)))
4082 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4083 else
4084 tmp = rhs1;
4085
4086 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4087 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4088 gimple_assign_rhs2 (last_stmt),
4089 gimple_assign_rhs3 (last_stmt));
4090
4091 *type_out = vectype1;
4092 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4093
4094 return pattern_stmt;
4095 }
4096
4097 /* Now check for binary boolean operations requiring conversion for
4098 one of operands. */
4099 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4100 return NULL;
4101
4102 if (rhs_code != BIT_IOR_EXPR
4103 && rhs_code != BIT_XOR_EXPR
4104 && rhs_code != BIT_AND_EXPR
4105 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4106 return NULL;
4107
4108 rhs2 = gimple_assign_rhs2 (last_stmt);
4109
4110 rhs1_type = search_type_for_mask (rhs1, vinfo);
4111 rhs2_type = search_type_for_mask (rhs2, vinfo);
4112
4113 if (!rhs1_type || !rhs2_type
4114 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4115 return NULL;
4116
4117 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4118 {
4119 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4120 if (!vectype1)
4121 return NULL;
4122 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4123 }
4124 else
4125 {
4126 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4127 if (!vectype1)
4128 return NULL;
4129 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4130 }
4131
4132 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4133 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4134
4135 *type_out = vectype1;
4136 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4137
4138 return pattern_stmt;
4139 }
4140
4141 /* STMT_INFO is a load or store. If the load or store is conditional, return
4142 the boolean condition under which it occurs, otherwise return null. */
4143
4144 static tree
4145 vect_get_load_store_mask (stmt_vec_info stmt_info)
4146 {
4147 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4148 {
4149 gcc_assert (gimple_assign_single_p (def_assign));
4150 return NULL_TREE;
4151 }
4152
4153 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4154 {
4155 internal_fn ifn = gimple_call_internal_fn (def_call);
4156 int mask_index = internal_fn_mask_index (ifn);
4157 return gimple_call_arg (def_call, mask_index);
4158 }
4159
4160 gcc_unreachable ();
4161 }
4162
4163 /* Return the scalar offset type that an internal gather/scatter function
4164 should use. GS_INFO describes the gather/scatter operation. */
4165
4166 static tree
4167 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4168 {
4169 tree offset_type = TREE_TYPE (gs_info->offset);
4170 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4171
4172 /* Enforced by vect_check_gather_scatter. */
4173 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4174 gcc_assert (element_bits >= offset_bits);
4175
4176 /* If the offset is narrower than the elements, extend it according
4177 to its sign. */
4178 if (element_bits > offset_bits)
4179 return build_nonstandard_integer_type (element_bits,
4180 TYPE_UNSIGNED (offset_type));
4181
4182 return offset_type;
4183 }
4184
4185 /* Return MASK if MASK is suitable for masking an operation on vectors
4186 of type VECTYPE, otherwise convert it into such a form and return
4187 the result. Associate any conversion statements with STMT_INFO's
4188 pattern. */
4189
4190 static tree
4191 vect_convert_mask_for_vectype (tree mask, tree vectype,
4192 stmt_vec_info stmt_info, vec_info *vinfo)
4193 {
4194 tree mask_type = search_type_for_mask (mask, vinfo);
4195 if (mask_type)
4196 {
4197 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4198 if (mask_vectype
4199 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4200 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4201 mask = build_mask_conversion (mask, vectype, stmt_info);
4202 }
4203 return mask;
4204 }
4205
4206 /* Return the equivalent of:
4207
4208 fold_convert (TYPE, VALUE)
4209
4210 with the expectation that the operation will be vectorized.
4211 If new statements are needed, add them as pattern statements
4212 to STMT_INFO. */
4213
4214 static tree
4215 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4216 {
4217 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4218 return value;
4219
4220 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4221 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4222 append_pattern_def_seq (stmt_info, conversion,
4223 get_vectype_for_scalar_type (type));
4224 return new_value;
4225 }
4226
4227 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4228 internal function. Return the final statement on success and set
4229 *TYPE_OUT to the vector type being loaded or stored.
4230
4231 This function only handles gathers and scatters that were recognized
4232 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4233
4234 static gimple *
4235 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4236 {
4237 /* Currently we only support this for loop vectorization. */
4238 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4239 if (!loop_vinfo)
4240 return NULL;
4241
4242 /* Make sure that we're looking at a gather load or scatter store. */
4243 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4244 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4245 return NULL;
4246
4247 /* Get the boolean that controls whether the load or store happens.
4248 This is null if the operation is unconditional. */
4249 tree mask = vect_get_load_store_mask (stmt_info);
4250
4251 /* Make sure that the target supports an appropriate internal
4252 function for the gather/scatter operation. */
4253 gather_scatter_info gs_info;
4254 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4255 || gs_info.decl)
4256 return NULL;
4257
4258 /* Convert the mask to the right form. */
4259 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4260 if (mask)
4261 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4262 loop_vinfo);
4263
4264 /* Get the invariant base and non-invariant offset, converting the
4265 latter to the same width as the vector elements. */
4266 tree base = gs_info.base;
4267 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4268 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4269 stmt_info);
4270
4271 /* Build the new pattern statement. */
4272 tree scale = size_int (gs_info.scale);
4273 gcall *pattern_stmt;
4274 if (DR_IS_READ (dr))
4275 {
4276 if (mask != NULL)
4277 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4278 offset, scale, mask);
4279 else
4280 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4281 offset, scale);
4282 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4283 gimple_call_set_lhs (pattern_stmt, load_lhs);
4284 }
4285 else
4286 {
4287 tree rhs = vect_get_store_rhs (stmt_info);
4288 if (mask != NULL)
4289 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4290 base, offset, scale, rhs,
4291 mask);
4292 else
4293 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4294 base, offset, scale, rhs);
4295 }
4296 gimple_call_set_nothrow (pattern_stmt, true);
4297
4298 /* Copy across relevant vectorization info and associate DR with the
4299 new pattern statement instead of the original statement. */
4300 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4301 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4302
4303 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4304 *type_out = vectype;
4305 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4306
4307 return pattern_stmt;
4308 }
4309
4310 /* Return true if TYPE is a non-boolean integer type. These are the types
4311 that we want to consider for narrowing. */
4312
4313 static bool
4314 vect_narrowable_type_p (tree type)
4315 {
4316 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4317 }
4318
4319 /* Return true if the operation given by CODE can be truncated to N bits
4320 when only N bits of the output are needed. This is only true if bit N+1
4321 of the inputs has no effect on the low N bits of the result. */
4322
4323 static bool
4324 vect_truncatable_operation_p (tree_code code)
4325 {
4326 switch (code)
4327 {
4328 case PLUS_EXPR:
4329 case MINUS_EXPR:
4330 case MULT_EXPR:
4331 case BIT_AND_EXPR:
4332 case BIT_IOR_EXPR:
4333 case BIT_XOR_EXPR:
4334 case COND_EXPR:
4335 return true;
4336
4337 default:
4338 return false;
4339 }
4340 }
4341
4342 /* Record that STMT_INFO could be changed from operating on TYPE to
4343 operating on a type with the precision and sign given by PRECISION
4344 and SIGN respectively. PRECISION is an arbitrary bit precision;
4345 it might not be a whole number of bytes. */
4346
4347 static void
4348 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4349 unsigned int precision, signop sign)
4350 {
4351 /* Round the precision up to a whole number of bytes. */
4352 precision = vect_element_precision (precision);
4353 if (precision < TYPE_PRECISION (type)
4354 && (!stmt_info->operation_precision
4355 || stmt_info->operation_precision > precision))
4356 {
4357 stmt_info->operation_precision = precision;
4358 stmt_info->operation_sign = sign;
4359 }
4360 }
4361
4362 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4363 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4364 is an arbitrary bit precision; it might not be a whole number of bytes. */
4365
4366 static void
4367 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4368 unsigned int min_input_precision)
4369 {
4370 /* This operation in isolation only requires the inputs to have
4371 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4372 that MIN_INPUT_PRECISION is a natural precision for the chain
4373 as a whole. E.g. consider something like:
4374
4375 unsigned short *x, *y;
4376 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4377
4378 The right shift can be done on unsigned chars, and only requires the
4379 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4380 approach would mean turning a natural chain of single-vector unsigned
4381 short operations into one that truncates "*x" and then extends
4382 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4383 operation and one vector for each unsigned char operation.
4384 This would be a significant pessimization.
4385
4386 Instead only propagate the maximum of this precision and the precision
4387 required by the users of the result. This means that we don't pessimize
4388 the case above but continue to optimize things like:
4389
4390 unsigned char *y;
4391 unsigned short *x;
4392 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4393
4394 Here we would truncate two vectors of *x to a single vector of
4395 unsigned chars and use single-vector unsigned char operations for
4396 everything else, rather than doing two unsigned short copies of
4397 "(*x & 0xf0) >> 4" and then truncating the result. */
4398 min_input_precision = MAX (min_input_precision,
4399 stmt_info->min_output_precision);
4400
4401 if (min_input_precision < TYPE_PRECISION (type)
4402 && (!stmt_info->min_input_precision
4403 || stmt_info->min_input_precision > min_input_precision))
4404 stmt_info->min_input_precision = min_input_precision;
4405 }
4406
4407 /* Subroutine of vect_determine_min_output_precision. Return true if
4408 we can calculate a reduced number of output bits for STMT_INFO,
4409 whose result is LHS. */
4410
4411 static bool
4412 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4413 {
4414 /* Take the maximum precision required by users of the result. */
4415 vec_info *vinfo = stmt_info->vinfo;
4416 unsigned int precision = 0;
4417 imm_use_iterator iter;
4418 use_operand_p use;
4419 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4420 {
4421 gimple *use_stmt = USE_STMT (use);
4422 if (is_gimple_debug (use_stmt))
4423 continue;
4424 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4425 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4426 return false;
4427 /* The input precision recorded for COND_EXPRs applies only to the
4428 "then" and "else" values. */
4429 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4430 if (assign
4431 && gimple_assign_rhs_code (assign) == COND_EXPR
4432 && use->use != gimple_assign_rhs2_ptr (assign)
4433 && use->use != gimple_assign_rhs3_ptr (assign))
4434 return false;
4435 precision = MAX (precision, use_stmt_info->min_input_precision);
4436 }
4437
4438 if (dump_enabled_p ())
4439 dump_printf_loc (MSG_NOTE, vect_location,
4440 "only the low %d bits of %T are significant\n",
4441 precision, lhs);
4442 stmt_info->min_output_precision = precision;
4443 return true;
4444 }
4445
4446 /* Calculate min_output_precision for STMT_INFO. */
4447
4448 static void
4449 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4450 {
4451 /* We're only interested in statements with a narrowable result. */
4452 tree lhs = gimple_get_lhs (stmt_info->stmt);
4453 if (!lhs
4454 || TREE_CODE (lhs) != SSA_NAME
4455 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4456 return;
4457
4458 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4459 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4460 }
4461
4462 /* Use range information to decide whether STMT (described by STMT_INFO)
4463 could be done in a narrower type. This is effectively a forward
4464 propagation, since it uses context-independent information that applies
4465 to all users of an SSA name. */
4466
4467 static void
4468 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4469 {
4470 tree lhs = gimple_assign_lhs (stmt);
4471 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4472 return;
4473
4474 tree type = TREE_TYPE (lhs);
4475 if (!vect_narrowable_type_p (type))
4476 return;
4477
4478 /* First see whether we have any useful range information for the result. */
4479 unsigned int precision = TYPE_PRECISION (type);
4480 signop sign = TYPE_SIGN (type);
4481 wide_int min_value, max_value;
4482 if (!vect_get_range_info (lhs, &min_value, &max_value))
4483 return;
4484
4485 tree_code code = gimple_assign_rhs_code (stmt);
4486 unsigned int nops = gimple_num_ops (stmt);
4487
4488 if (!vect_truncatable_operation_p (code))
4489 /* Check that all relevant input operands are compatible, and update
4490 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4491 for (unsigned int i = 1; i < nops; ++i)
4492 {
4493 tree op = gimple_op (stmt, i);
4494 if (TREE_CODE (op) == INTEGER_CST)
4495 {
4496 /* Don't require the integer to have RHS_TYPE (which it might
4497 not for things like shift amounts, etc.), but do require it
4498 to fit the type. */
4499 if (!int_fits_type_p (op, type))
4500 return;
4501
4502 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4503 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4504 }
4505 else if (TREE_CODE (op) == SSA_NAME)
4506 {
4507 /* Ignore codes that don't take uniform arguments. */
4508 if (!types_compatible_p (TREE_TYPE (op), type))
4509 return;
4510
4511 wide_int op_min_value, op_max_value;
4512 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4513 return;
4514
4515 min_value = wi::min (min_value, op_min_value, sign);
4516 max_value = wi::max (max_value, op_max_value, sign);
4517 }
4518 else
4519 return;
4520 }
4521
4522 /* Try to switch signed types for unsigned types if we can.
4523 This is better for two reasons. First, unsigned ops tend
4524 to be cheaper than signed ops. Second, it means that we can
4525 handle things like:
4526
4527 signed char c;
4528 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4529
4530 as:
4531
4532 signed char c;
4533 unsigned short res_1 = (unsigned short) c & 0xff00;
4534 int res = (int) res_1;
4535
4536 where the intermediate result res_1 has unsigned rather than
4537 signed type. */
4538 if (sign == SIGNED && !wi::neg_p (min_value))
4539 sign = UNSIGNED;
4540
4541 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4542 unsigned int precision1 = wi::min_precision (min_value, sign);
4543 unsigned int precision2 = wi::min_precision (max_value, sign);
4544 unsigned int value_precision = MAX (precision1, precision2);
4545 if (value_precision >= precision)
4546 return;
4547
4548 if (dump_enabled_p ())
4549 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4550 " without loss of precision: %G",
4551 sign == SIGNED ? "signed" : "unsigned",
4552 value_precision, stmt);
4553
4554 vect_set_operation_type (stmt_info, type, value_precision, sign);
4555 vect_set_min_input_precision (stmt_info, type, value_precision);
4556 }
4557
4558 /* Use information about the users of STMT's result to decide whether
4559 STMT (described by STMT_INFO) could be done in a narrower type.
4560 This is effectively a backward propagation. */
4561
4562 static void
4563 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4564 {
4565 tree_code code = gimple_assign_rhs_code (stmt);
4566 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4567 tree type = TREE_TYPE (gimple_op (stmt, opno));
4568 if (!vect_narrowable_type_p (type))
4569 return;
4570
4571 unsigned int precision = TYPE_PRECISION (type);
4572 unsigned int operation_precision, min_input_precision;
4573 switch (code)
4574 {
4575 CASE_CONVERT:
4576 /* Only the bits that contribute to the output matter. Don't change
4577 the precision of the operation itself. */
4578 operation_precision = precision;
4579 min_input_precision = stmt_info->min_output_precision;
4580 break;
4581
4582 case LSHIFT_EXPR:
4583 case RSHIFT_EXPR:
4584 {
4585 tree shift = gimple_assign_rhs2 (stmt);
4586 if (TREE_CODE (shift) != INTEGER_CST
4587 || !wi::ltu_p (wi::to_widest (shift), precision))
4588 return;
4589 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4590 if (code == LSHIFT_EXPR)
4591 {
4592 /* We need CONST_SHIFT fewer bits of the input. */
4593 operation_precision = stmt_info->min_output_precision;
4594 min_input_precision = (MAX (operation_precision, const_shift)
4595 - const_shift);
4596 }
4597 else
4598 {
4599 /* We need CONST_SHIFT extra bits to do the operation. */
4600 operation_precision = (stmt_info->min_output_precision
4601 + const_shift);
4602 min_input_precision = operation_precision;
4603 }
4604 break;
4605 }
4606
4607 default:
4608 if (vect_truncatable_operation_p (code))
4609 {
4610 /* Input bit N has no effect on output bits N-1 and lower. */
4611 operation_precision = stmt_info->min_output_precision;
4612 min_input_precision = operation_precision;
4613 break;
4614 }
4615 return;
4616 }
4617
4618 if (operation_precision < precision)
4619 {
4620 if (dump_enabled_p ())
4621 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4622 " without affecting users: %G",
4623 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4624 operation_precision, stmt);
4625 vect_set_operation_type (stmt_info, type, operation_precision,
4626 TYPE_SIGN (type));
4627 }
4628 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4629 }
4630
4631 /* Handle vect_determine_precisions for STMT_INFO, given that we
4632 have already done so for the users of its result. */
4633
4634 void
4635 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4636 {
4637 vect_determine_min_output_precision (stmt_info);
4638 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4639 {
4640 vect_determine_precisions_from_range (stmt_info, stmt);
4641 vect_determine_precisions_from_users (stmt_info, stmt);
4642 }
4643 }
4644
4645 /* Walk backwards through the vectorizable region to determine the
4646 values of these fields:
4647
4648 - min_output_precision
4649 - min_input_precision
4650 - operation_precision
4651 - operation_sign. */
4652
4653 void
4654 vect_determine_precisions (vec_info *vinfo)
4655 {
4656 DUMP_VECT_SCOPE ("vect_determine_precisions");
4657
4658 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4659 {
4660 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4662 unsigned int nbbs = loop->num_nodes;
4663
4664 for (unsigned int i = 0; i < nbbs; i++)
4665 {
4666 basic_block bb = bbs[nbbs - i - 1];
4667 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4668 !gsi_end_p (si); gsi_prev (&si))
4669 vect_determine_stmt_precisions
4670 (vinfo->lookup_stmt (gsi_stmt (si)));
4671 }
4672 }
4673 else
4674 {
4675 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4676 gimple_stmt_iterator si = bb_vinfo->region_end;
4677 gimple *stmt;
4678 do
4679 {
4680 if (!gsi_stmt (si))
4681 si = gsi_last_bb (bb_vinfo->bb);
4682 else
4683 gsi_prev (&si);
4684 stmt = gsi_stmt (si);
4685 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
4686 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4687 vect_determine_stmt_precisions (stmt_info);
4688 }
4689 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4690 }
4691 }
4692
4693 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4694
4695 struct vect_recog_func
4696 {
4697 vect_recog_func_ptr fn;
4698 const char *name;
4699 };
4700
4701 /* Note that ordering matters - the first pattern matching on a stmt is
4702 taken which means usually the more complex one needs to preceed the
4703 less comples onex (widen_sum only after dot_prod or sad for example). */
4704 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4705 { vect_recog_over_widening_pattern, "over_widening" },
4706 /* Must come after over_widening, which narrows the shift as much as
4707 possible beforehand. */
4708 { vect_recog_average_pattern, "average" },
4709 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4710 { vect_recog_widen_mult_pattern, "widen_mult" },
4711 { vect_recog_dot_prod_pattern, "dot_prod" },
4712 { vect_recog_sad_pattern, "sad" },
4713 { vect_recog_widen_sum_pattern, "widen_sum" },
4714 { vect_recog_pow_pattern, "pow" },
4715 { vect_recog_widen_shift_pattern, "widen_shift" },
4716 { vect_recog_rotate_pattern, "rotate" },
4717 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4718 { vect_recog_divmod_pattern, "divmod" },
4719 { vect_recog_mult_pattern, "mult" },
4720 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4721 { vect_recog_bool_pattern, "bool" },
4722 /* This must come before mask conversion, and includes the parts
4723 of mask conversion that are needed for gather and scatter
4724 internal functions. */
4725 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4726 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4727 };
4728
4729 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4730
4731 /* Mark statements that are involved in a pattern. */
4732
4733 static inline void
4734 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
4735 tree pattern_vectype)
4736 {
4737 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4738
4739 gimple *orig_pattern_stmt = NULL;
4740 if (is_pattern_stmt_p (orig_stmt_info))
4741 {
4742 /* We're replacing a statement in an existing pattern definition
4743 sequence. */
4744 orig_pattern_stmt = orig_stmt_info->stmt;
4745 if (dump_enabled_p ())
4746 dump_printf_loc (MSG_NOTE, vect_location,
4747 "replacing earlier pattern %G", orig_pattern_stmt);
4748
4749 /* To keep the book-keeping simple, just swap the lhs of the
4750 old and new statements, so that the old one has a valid but
4751 unused lhs. */
4752 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4753 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
4754 gimple_set_lhs (pattern_stmt, old_lhs);
4755
4756 if (dump_enabled_p ())
4757 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
4758
4759 /* Switch to the statement that ORIG replaces. */
4760 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
4761
4762 /* We shouldn't be replacing the main pattern statement. */
4763 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4764 != orig_pattern_stmt);
4765 }
4766
4767 if (def_seq)
4768 for (gimple_stmt_iterator si = gsi_start (def_seq);
4769 !gsi_end_p (si); gsi_next (&si))
4770 {
4771 stmt_vec_info pattern_stmt_info
4772 = vect_init_pattern_stmt (gsi_stmt (si),
4773 orig_stmt_info, pattern_vectype);
4774 /* Stmts in the def sequence are not vectorizable cycle or
4775 induction defs, instead they should all be vect_internal_def
4776 feeding the main pattern stmt which retains this def type. */
4777 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
4778 }
4779
4780 if (orig_pattern_stmt)
4781 {
4782 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4783
4784 /* Insert all the new pattern statements before the original one. */
4785 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4786 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4787 orig_def_seq);
4788 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4789 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4790
4791 /* Remove the pattern statement that this new pattern replaces. */
4792 gsi_remove (&gsi, false);
4793 }
4794 else
4795 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4796 }
4797
4798 /* Function vect_pattern_recog_1
4799
4800 Input:
4801 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4802 computation pattern.
4803 STMT_INFO: A stmt from which the pattern search should start.
4804
4805 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4806 a sequence of statements that has the same functionality and can be
4807 used to replace STMT_INFO. It returns the last statement in the sequence
4808 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4809 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4810 statement, having first checked that the target supports the new operation
4811 in that type.
4812
4813 This function also does some bookkeeping, as explained in the documentation
4814 for vect_recog_pattern. */
4815
4816 static void
4817 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
4818 {
4819 vec_info *vinfo = stmt_info->vinfo;
4820 gimple *pattern_stmt;
4821 loop_vec_info loop_vinfo;
4822 tree pattern_vectype;
4823
4824 /* If this statement has already been replaced with pattern statements,
4825 leave the original statement alone, since the first match wins.
4826 Instead try to match against the definition statements that feed
4827 the main pattern statement. */
4828 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4829 {
4830 gimple_stmt_iterator gsi;
4831 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4832 !gsi_end_p (gsi); gsi_next (&gsi))
4833 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
4834 return;
4835 }
4836
4837 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4838 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4839 if (!pattern_stmt)
4840 {
4841 /* Clear any half-formed pattern definition sequence. */
4842 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4843 return;
4844 }
4845
4846 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4847 gcc_assert (pattern_vectype);
4848
4849 /* Found a vectorizable pattern. */
4850 if (dump_enabled_p ())
4851 dump_printf_loc (MSG_NOTE, vect_location,
4852 "%s pattern recognized: %G",
4853 recog_func->name, pattern_stmt);
4854
4855 /* Mark the stmts that are involved in the pattern. */
4856 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
4857
4858 /* Patterns cannot be vectorized using SLP, because they change the order of
4859 computation. */
4860 if (loop_vinfo)
4861 {
4862 unsigned ix, ix2;
4863 stmt_vec_info *elem_ptr;
4864 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4865 elem_ptr, *elem_ptr == stmt_info);
4866 }
4867 }
4868
4869
4870 /* Function vect_pattern_recog
4871
4872 Input:
4873 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4874 computation idioms.
4875
4876 Output - for each computation idiom that is detected we create a new stmt
4877 that provides the same functionality and that can be vectorized. We
4878 also record some information in the struct_stmt_info of the relevant
4879 stmts, as explained below:
4880
4881 At the entry to this function we have the following stmts, with the
4882 following initial value in the STMT_VINFO fields:
4883
4884 stmt in_pattern_p related_stmt vec_stmt
4885 S1: a_i = .... - - -
4886 S2: a_2 = ..use(a_i).. - - -
4887 S3: a_1 = ..use(a_2).. - - -
4888 S4: a_0 = ..use(a_1).. - - -
4889 S5: ... = ..use(a_0).. - - -
4890
4891 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4892 represented by a single stmt. We then:
4893 - create a new stmt S6 equivalent to the pattern (the stmt is not
4894 inserted into the code)
4895 - fill in the STMT_VINFO fields as follows:
4896
4897 in_pattern_p related_stmt vec_stmt
4898 S1: a_i = .... - - -
4899 S2: a_2 = ..use(a_i).. - - -
4900 S3: a_1 = ..use(a_2).. - - -
4901 S4: a_0 = ..use(a_1).. true S6 -
4902 '---> S6: a_new = .... - S4 -
4903 S5: ... = ..use(a_0).. - - -
4904
4905 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4906 to each other through the RELATED_STMT field).
4907
4908 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4909 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4910 remain irrelevant unless used by stmts other than S4.
4911
4912 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4913 (because they are marked as irrelevant). It will vectorize S6, and record
4914 a pointer to the new vector stmt VS6 from S6 (as usual).
4915 S4 will be skipped, and S5 will be vectorized as usual:
4916
4917 in_pattern_p related_stmt vec_stmt
4918 S1: a_i = .... - - -
4919 S2: a_2 = ..use(a_i).. - - -
4920 S3: a_1 = ..use(a_2).. - - -
4921 > VS6: va_new = .... - - -
4922 S4: a_0 = ..use(a_1).. true S6 VS6
4923 '---> S6: a_new = .... - S4 VS6
4924 > VS5: ... = ..vuse(va_new).. - - -
4925 S5: ... = ..use(a_0).. - - -
4926
4927 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4928 elsewhere), and we'll end up with:
4929
4930 VS6: va_new = ....
4931 VS5: ... = ..vuse(va_new)..
4932
4933 In case of more than one pattern statements, e.g., widen-mult with
4934 intermediate type:
4935
4936 S1 a_t = ;
4937 S2 a_T = (TYPE) a_t;
4938 '--> S3: a_it = (interm_type) a_t;
4939 S4 prod_T = a_T * CONST;
4940 '--> S5: prod_T' = a_it w* CONST;
4941
4942 there may be other users of a_T outside the pattern. In that case S2 will
4943 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4944 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4945 be recorded in S3. */
4946
4947 void
4948 vect_pattern_recog (vec_info *vinfo)
4949 {
4950 struct loop *loop;
4951 basic_block *bbs;
4952 unsigned int nbbs;
4953 gimple_stmt_iterator si;
4954 unsigned int i, j;
4955
4956 vect_determine_precisions (vinfo);
4957
4958 DUMP_VECT_SCOPE ("vect_pattern_recog");
4959
4960 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4961 {
4962 loop = LOOP_VINFO_LOOP (loop_vinfo);
4963 bbs = LOOP_VINFO_BBS (loop_vinfo);
4964 nbbs = loop->num_nodes;
4965
4966 /* Scan through the loop stmts, applying the pattern recognition
4967 functions starting at each stmt visited: */
4968 for (i = 0; i < nbbs; i++)
4969 {
4970 basic_block bb = bbs[i];
4971 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4972 {
4973 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4974 /* Scan over all generic vect_recog_xxx_pattern functions. */
4975 for (j = 0; j < NUM_PATTERNS; j++)
4976 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4977 stmt_info);
4978 }
4979 }
4980 }
4981 else
4982 {
4983 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4984 for (si = bb_vinfo->region_begin;
4985 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4986 {
4987 gimple *stmt = gsi_stmt (si);
4988 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
4989 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4990 continue;
4991
4992 /* Scan over all generic vect_recog_xxx_pattern functions. */
4993 for (j = 0; j < NUM_PATTERNS; j++)
4994 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
4995 }
4996 }
4997 }
This page took 0.255628 seconds and 4 git commands to generate.