]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-vect-stmts.c
Update copyright years.
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
44 #include "tree-eh.h"
45 #include "gimple-expr.h"
46 #include "is-a.h"
47 #include "gimple.h"
48 #include "gimplify.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
52 #include "tree-cfg.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "expr.h"
62 #include "recog.h" /* FIXME: for insn_data */
63 #include "insn-codes.h"
64 #include "optabs.h"
65 #include "diagnostic-core.h"
66 #include "tree-vectorizer.h"
67 #include "dumpfile.h"
68 #include "hash-map.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "builtins.h"
73
74 /* For lang_hooks.types.type_for_mode. */
75 #include "langhooks.h"
76
77 /* Return the vectorized type for the given statement. */
78
79 tree
80 stmt_vectype (struct _stmt_vec_info *stmt_info)
81 {
82 return STMT_VINFO_VECTYPE (stmt_info);
83 }
84
85 /* Return TRUE iff the given statement is in an inner loop relative to
86 the loop being vectorized. */
87 bool
88 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
89 {
90 gimple stmt = STMT_VINFO_STMT (stmt_info);
91 basic_block bb = gimple_bb (stmt);
92 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
93 struct loop* loop;
94
95 if (!loop_vinfo)
96 return false;
97
98 loop = LOOP_VINFO_LOOP (loop_vinfo);
99
100 return (bb->loop_father == loop->inner);
101 }
102
103 /* Record the cost of a statement, either by directly informing the
104 target model or by saving it in a vector for later processing.
105 Return a preliminary estimate of the statement's cost. */
106
107 unsigned
108 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
109 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
110 int misalign, enum vect_cost_model_location where)
111 {
112 if (body_cost_vec)
113 {
114 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
115 add_stmt_info_to_vec (body_cost_vec, count, kind,
116 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
117 misalign);
118 return (unsigned)
119 (builtin_vectorization_cost (kind, vectype, misalign) * count);
120
121 }
122 else
123 {
124 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
125 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
126 void *target_cost_data;
127
128 if (loop_vinfo)
129 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
130 else
131 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
132
133 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
134 misalign, where);
135 }
136 }
137
138 /* Return a variable of type ELEM_TYPE[NELEMS]. */
139
140 static tree
141 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
142 {
143 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
144 "vect_array");
145 }
146
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Return an SSA_NAME for the vector in index N. The reference
149 is part of the vectorization of STMT and the vector is associated
150 with scalar destination SCALAR_DEST. */
151
152 static tree
153 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
154 tree array, unsigned HOST_WIDE_INT n)
155 {
156 tree vect_type, vect, vect_name, array_ref;
157 gimple new_stmt;
158
159 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
160 vect_type = TREE_TYPE (TREE_TYPE (array));
161 vect = vect_create_destination_var (scalar_dest, vect_type);
162 array_ref = build4 (ARRAY_REF, vect_type, array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
165
166 new_stmt = gimple_build_assign (vect, array_ref);
167 vect_name = make_ssa_name (vect, new_stmt);
168 gimple_assign_set_lhs (new_stmt, vect_name);
169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
170
171 return vect_name;
172 }
173
174 /* ARRAY is an array of vectors created by create_vector_array.
175 Emit code to store SSA_NAME VECT in index N of the array.
176 The store is part of the vectorization of STMT. */
177
178 static void
179 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
180 tree array, unsigned HOST_WIDE_INT n)
181 {
182 tree array_ref;
183 gimple new_stmt;
184
185 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
186 build_int_cst (size_type_node, n),
187 NULL_TREE, NULL_TREE);
188
189 new_stmt = gimple_build_assign (array_ref, vect);
190 vect_finish_stmt_generation (stmt, new_stmt, gsi);
191 }
192
193 /* PTR is a pointer to an array of type TYPE. Return a representation
194 of *PTR. The memory reference replaces those in FIRST_DR
195 (and its group). */
196
197 static tree
198 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
199 {
200 tree mem_ref, alias_ptr_type;
201
202 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
203 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
204 /* Arrays have the same alignment as their type. */
205 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
206 return mem_ref;
207 }
208
209 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
210
211 /* Function vect_mark_relevant.
212
213 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
214
215 static void
216 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
217 enum vect_relevant relevant, bool live_p,
218 bool used_in_pattern)
219 {
220 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
221 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
222 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
223 gimple pattern_stmt;
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "mark relevant %d, live %d.\n", relevant, live_p);
228
229 /* If this stmt is an original stmt in a pattern, we might need to mark its
230 related pattern stmt instead of the original stmt. However, such stmts
231 may have their own uses that are not in any pattern, in such cases the
232 stmt itself should be marked. */
233 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
234 {
235 bool found = false;
236 if (!used_in_pattern)
237 {
238 imm_use_iterator imm_iter;
239 use_operand_p use_p;
240 gimple use_stmt;
241 tree lhs;
242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
243 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
244
245 if (is_gimple_assign (stmt))
246 lhs = gimple_assign_lhs (stmt);
247 else
248 lhs = gimple_call_lhs (stmt);
249
250 /* This use is out of pattern use, if LHS has other uses that are
251 pattern uses, we should mark the stmt itself, and not the pattern
252 stmt. */
253 if (lhs && TREE_CODE (lhs) == SSA_NAME)
254 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
255 {
256 if (is_gimple_debug (USE_STMT (use_p)))
257 continue;
258 use_stmt = USE_STMT (use_p);
259
260 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
261 continue;
262
263 if (vinfo_for_stmt (use_stmt)
264 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
265 {
266 found = true;
267 break;
268 }
269 }
270 }
271
272 if (!found)
273 {
274 /* This is the last stmt in a sequence that was detected as a
275 pattern that can potentially be vectorized. Don't mark the stmt
276 as relevant/live because it's not going to be vectorized.
277 Instead mark the pattern-stmt that replaces it. */
278
279 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
280
281 if (dump_enabled_p ())
282 dump_printf_loc (MSG_NOTE, vect_location,
283 "last stmt in pattern. don't mark"
284 " relevant/live.\n");
285 stmt_info = vinfo_for_stmt (pattern_stmt);
286 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
287 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
288 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
289 stmt = pattern_stmt;
290 }
291 }
292
293 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
294 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
295 STMT_VINFO_RELEVANT (stmt_info) = relevant;
296
297 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
298 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
299 {
300 if (dump_enabled_p ())
301 dump_printf_loc (MSG_NOTE, vect_location,
302 "already marked relevant/live.\n");
303 return;
304 }
305
306 worklist->safe_push (stmt);
307 }
308
309
310 /* Function vect_stmt_relevant_p.
311
312 Return true if STMT in loop that is represented by LOOP_VINFO is
313 "relevant for vectorization".
314
315 A stmt is considered "relevant for vectorization" if:
316 - it has uses outside the loop.
317 - it has vdefs (it alters memory).
318 - control stmts in the loop (except for the exit condition).
319
320 CHECKME: what other side effects would the vectorizer allow? */
321
322 static bool
323 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
324 enum vect_relevant *relevant, bool *live_p)
325 {
326 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
327 ssa_op_iter op_iter;
328 imm_use_iterator imm_iter;
329 use_operand_p use_p;
330 def_operand_p def_p;
331
332 *relevant = vect_unused_in_scope;
333 *live_p = false;
334
335 /* cond stmt other than loop exit cond. */
336 if (is_ctrl_stmt (stmt)
337 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
338 != loop_exit_ctrl_vec_info_type)
339 *relevant = vect_used_in_scope;
340
341 /* changing memory. */
342 if (gimple_code (stmt) != GIMPLE_PHI)
343 if (gimple_vdef (stmt)
344 && !gimple_clobber_p (stmt))
345 {
346 if (dump_enabled_p ())
347 dump_printf_loc (MSG_NOTE, vect_location,
348 "vec_stmt_relevant_p: stmt has vdefs.\n");
349 *relevant = vect_used_in_scope;
350 }
351
352 /* uses outside the loop. */
353 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
354 {
355 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
356 {
357 basic_block bb = gimple_bb (USE_STMT (use_p));
358 if (!flow_bb_inside_loop_p (loop, bb))
359 {
360 if (dump_enabled_p ())
361 dump_printf_loc (MSG_NOTE, vect_location,
362 "vec_stmt_relevant_p: used out of loop.\n");
363
364 if (is_gimple_debug (USE_STMT (use_p)))
365 continue;
366
367 /* We expect all such uses to be in the loop exit phis
368 (because of loop closed form) */
369 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
370 gcc_assert (bb == single_exit (loop)->dest);
371
372 *live_p = true;
373 }
374 }
375 }
376
377 return (*live_p || *relevant);
378 }
379
380
381 /* Function exist_non_indexing_operands_for_use_p
382
383 USE is one of the uses attached to STMT. Check if USE is
384 used in STMT for anything other than indexing an array. */
385
386 static bool
387 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
388 {
389 tree operand;
390 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
391
392 /* USE corresponds to some operand in STMT. If there is no data
393 reference in STMT, then any operand that corresponds to USE
394 is not indexing an array. */
395 if (!STMT_VINFO_DATA_REF (stmt_info))
396 return true;
397
398 /* STMT has a data_ref. FORNOW this means that its of one of
399 the following forms:
400 -1- ARRAY_REF = var
401 -2- var = ARRAY_REF
402 (This should have been verified in analyze_data_refs).
403
404 'var' in the second case corresponds to a def, not a use,
405 so USE cannot correspond to any operands that are not used
406 for array indexing.
407
408 Therefore, all we need to check is if STMT falls into the
409 first case, and whether var corresponds to USE. */
410
411 if (!gimple_assign_copy_p (stmt))
412 {
413 if (is_gimple_call (stmt)
414 && gimple_call_internal_p (stmt))
415 switch (gimple_call_internal_fn (stmt))
416 {
417 case IFN_MASK_STORE:
418 operand = gimple_call_arg (stmt, 3);
419 if (operand == use)
420 return true;
421 /* FALLTHRU */
422 case IFN_MASK_LOAD:
423 operand = gimple_call_arg (stmt, 2);
424 if (operand == use)
425 return true;
426 break;
427 default:
428 break;
429 }
430 return false;
431 }
432
433 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
434 return false;
435 operand = gimple_assign_rhs1 (stmt);
436 if (TREE_CODE (operand) != SSA_NAME)
437 return false;
438
439 if (operand == use)
440 return true;
441
442 return false;
443 }
444
445
446 /*
447 Function process_use.
448
449 Inputs:
450 - a USE in STMT in a loop represented by LOOP_VINFO
451 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
452 that defined USE. This is done by calling mark_relevant and passing it
453 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
454 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
455 be performed.
456
457 Outputs:
458 Generally, LIVE_P and RELEVANT are used to define the liveness and
459 relevance info of the DEF_STMT of this USE:
460 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
461 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
462 Exceptions:
463 - case 1: If USE is used only for address computations (e.g. array indexing),
464 which does not need to be directly vectorized, then the liveness/relevance
465 of the respective DEF_STMT is left unchanged.
466 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
467 skip DEF_STMT cause it had already been processed.
468 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
469 be modified accordingly.
470
471 Return true if everything is as expected. Return false otherwise. */
472
473 static bool
474 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
475 enum vect_relevant relevant, vec<gimple> *worklist,
476 bool force)
477 {
478 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
479 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
480 stmt_vec_info dstmt_vinfo;
481 basic_block bb, def_bb;
482 tree def;
483 gimple def_stmt;
484 enum vect_def_type dt;
485
486 /* case 1: we are only interested in uses that need to be vectorized. Uses
487 that are used for address computation are not considered relevant. */
488 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
489 return true;
490
491 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
492 {
493 if (dump_enabled_p ())
494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
495 "not vectorized: unsupported use in stmt.\n");
496 return false;
497 }
498
499 if (!def_stmt || gimple_nop_p (def_stmt))
500 return true;
501
502 def_bb = gimple_bb (def_stmt);
503 if (!flow_bb_inside_loop_p (loop, def_bb))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
507 return true;
508 }
509
510 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
511 DEF_STMT must have already been processed, because this should be the
512 only way that STMT, which is a reduction-phi, was put in the worklist,
513 as there should be no other uses for DEF_STMT in the loop. So we just
514 check that everything is as expected, and we are done. */
515 dstmt_vinfo = vinfo_for_stmt (def_stmt);
516 bb = gimple_bb (stmt);
517 if (gimple_code (stmt) == GIMPLE_PHI
518 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
519 && gimple_code (def_stmt) != GIMPLE_PHI
520 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
521 && bb->loop_father == def_bb->loop_father)
522 {
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE, vect_location,
525 "reduc-stmt defining reduc-phi in the same nest.\n");
526 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
527 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
528 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
529 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
530 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
531 return true;
532 }
533
534 /* case 3a: outer-loop stmt defining an inner-loop stmt:
535 outer-loop-header-bb:
536 d = def_stmt
537 inner-loop:
538 stmt # use (d)
539 outer-loop-tail-bb:
540 ... */
541 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "outer-loop def-stmt defining inner-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
551 vect_used_in_scope : vect_unused_in_scope;
552 break;
553
554 case vect_used_in_outer_by_reduction:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_by_reduction;
557 break;
558
559 case vect_used_in_outer:
560 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
561 relevant = vect_used_in_scope;
562 break;
563
564 case vect_used_in_scope:
565 break;
566
567 default:
568 gcc_unreachable ();
569 }
570 }
571
572 /* case 3b: inner-loop stmt defining an outer-loop stmt:
573 outer-loop-header-bb:
574 ...
575 inner-loop:
576 d = def_stmt
577 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
578 stmt # use (d) */
579 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
580 {
581 if (dump_enabled_p ())
582 dump_printf_loc (MSG_NOTE, vect_location,
583 "inner-loop def-stmt defining outer-loop stmt.\n");
584
585 switch (relevant)
586 {
587 case vect_unused_in_scope:
588 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
589 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
590 vect_used_in_outer_by_reduction : vect_unused_in_scope;
591 break;
592
593 case vect_used_by_reduction:
594 relevant = vect_used_in_outer_by_reduction;
595 break;
596
597 case vect_used_in_scope:
598 relevant = vect_used_in_outer;
599 break;
600
601 default:
602 gcc_unreachable ();
603 }
604 }
605
606 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
607 is_pattern_stmt_p (stmt_vinfo));
608 return true;
609 }
610
611
612 /* Function vect_mark_stmts_to_be_vectorized.
613
614 Not all stmts in the loop need to be vectorized. For example:
615
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
620
621 3. j = j + 1
622
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
625
626 This pass detects such stmts. */
627
628 bool
629 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630 {
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
635 gimple stmt;
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
639 gimple phi;
640 bool live_p;
641 enum vect_relevant relevant, tmp_relevant;
642 enum vect_def_type def_type;
643
644 if (dump_enabled_p ())
645 dump_printf_loc (MSG_NOTE, vect_location,
646 "=== vect_mark_stmts_to_be_vectorized ===\n");
647
648 auto_vec<gimple, 64> worklist;
649
650 /* 1. Init worklist. */
651 for (i = 0; i < nbbs; i++)
652 {
653 bb = bbs[i];
654 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
655 {
656 phi = gsi_stmt (si);
657 if (dump_enabled_p ())
658 {
659 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
660 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
661 dump_printf (MSG_NOTE, "\n");
662 }
663
664 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
665 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
666 }
667 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
668 {
669 stmt = gsi_stmt (si);
670 if (dump_enabled_p ())
671 {
672 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
673 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
674 dump_printf (MSG_NOTE, "\n");
675 }
676
677 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
678 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
679 }
680 }
681
682 /* 2. Process_worklist */
683 while (worklist.length () > 0)
684 {
685 use_operand_p use_p;
686 ssa_op_iter iter;
687
688 stmt = worklist.pop ();
689 if (dump_enabled_p ())
690 {
691 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
692 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
693 dump_printf (MSG_NOTE, "\n");
694 }
695
696 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
697 (DEF_STMT) as relevant/irrelevant and live/dead according to the
698 liveness and relevance properties of STMT. */
699 stmt_vinfo = vinfo_for_stmt (stmt);
700 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
701 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
702
703 /* Generally, the liveness and relevance properties of STMT are
704 propagated as is to the DEF_STMTs of its USEs:
705 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
706 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
707
708 One exception is when STMT has been identified as defining a reduction
709 variable; in this case we set the liveness/relevance as follows:
710 live_p = false
711 relevant = vect_used_by_reduction
712 This is because we distinguish between two kinds of relevant stmts -
713 those that are used by a reduction computation, and those that are
714 (also) used by a regular computation. This allows us later on to
715 identify stmts that are used solely by a reduction, and therefore the
716 order of the results that they produce does not have to be kept. */
717
718 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
719 tmp_relevant = relevant;
720 switch (def_type)
721 {
722 case vect_reduction_def:
723 switch (tmp_relevant)
724 {
725 case vect_unused_in_scope:
726 relevant = vect_used_by_reduction;
727 break;
728
729 case vect_used_by_reduction:
730 if (gimple_code (stmt) == GIMPLE_PHI)
731 break;
732 /* fall through */
733
734 default:
735 if (dump_enabled_p ())
736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
737 "unsupported use of reduction.\n");
738 return false;
739 }
740
741 live_p = false;
742 break;
743
744 case vect_nested_cycle:
745 if (tmp_relevant != vect_unused_in_scope
746 && tmp_relevant != vect_used_in_outer_by_reduction
747 && tmp_relevant != vect_used_in_outer)
748 {
749 if (dump_enabled_p ())
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
751 "unsupported use of nested cycle.\n");
752
753 return false;
754 }
755
756 live_p = false;
757 break;
758
759 case vect_double_reduction_def:
760 if (tmp_relevant != vect_unused_in_scope
761 && tmp_relevant != vect_used_by_reduction)
762 {
763 if (dump_enabled_p ())
764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
765 "unsupported use of double reduction.\n");
766
767 return false;
768 }
769
770 live_p = false;
771 break;
772
773 default:
774 break;
775 }
776
777 if (is_pattern_stmt_p (stmt_vinfo))
778 {
779 /* Pattern statements are not inserted into the code, so
780 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
781 have to scan the RHS or function arguments instead. */
782 if (is_gimple_assign (stmt))
783 {
784 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
785 tree op = gimple_assign_rhs1 (stmt);
786
787 i = 1;
788 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
789 {
790 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
791 live_p, relevant, &worklist, false)
792 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
793 live_p, relevant, &worklist, false))
794 return false;
795 i = 2;
796 }
797 for (; i < gimple_num_ops (stmt); i++)
798 {
799 op = gimple_op (stmt, i);
800 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
801 &worklist, false))
802 return false;
803 }
804 }
805 else if (is_gimple_call (stmt))
806 {
807 for (i = 0; i < gimple_call_num_args (stmt); i++)
808 {
809 tree arg = gimple_call_arg (stmt, i);
810 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
811 &worklist, false))
812 return false;
813 }
814 }
815 }
816 else
817 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
818 {
819 tree op = USE_FROM_PTR (use_p);
820 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
821 &worklist, false))
822 return false;
823 }
824
825 if (STMT_VINFO_GATHER_P (stmt_vinfo))
826 {
827 tree off;
828 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
829 gcc_assert (decl);
830 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
831 &worklist, true))
832 return false;
833 }
834 } /* while worklist */
835
836 return true;
837 }
838
839
840 /* Function vect_model_simple_cost.
841
842 Models cost for simple operations, i.e. those that only emit ncopies of a
843 single op. Right now, this does not account for multiple insns that could
844 be generated for the single vector op. We will handle that shortly. */
845
846 void
847 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
848 enum vect_def_type *dt,
849 stmt_vector_for_cost *prologue_cost_vec,
850 stmt_vector_for_cost *body_cost_vec)
851 {
852 int i;
853 int inside_cost = 0, prologue_cost = 0;
854
855 /* The SLP costs were already calculated during SLP tree build. */
856 if (PURE_SLP_STMT (stmt_info))
857 return;
858
859 /* FORNOW: Assuming maximum 2 args per stmts. */
860 for (i = 0; i < 2; i++)
861 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
862 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
863 stmt_info, 0, vect_prologue);
864
865 /* Pass the inside-of-loop statements to the target-specific cost model. */
866 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
867 stmt_info, 0, vect_body);
868
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE, vect_location,
871 "vect_model_simple_cost: inside_cost = %d, "
872 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 }
874
875
876 /* Model cost for type demotion and promotion operations. PWR is normally
877 zero for single-step promotions and demotions. It will be one if
878 two-step promotion/demotion is required, and so on. Each additional
879 step doubles the number of instructions required. */
880
881 static void
882 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
883 enum vect_def_type *dt, int pwr)
884 {
885 int i, tmp;
886 int inside_cost = 0, prologue_cost = 0;
887 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
888 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
889 void *target_cost_data;
890
891 /* The SLP costs were already calculated during SLP tree build. */
892 if (PURE_SLP_STMT (stmt_info))
893 return;
894
895 if (loop_vinfo)
896 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
897 else
898 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
899
900 for (i = 0; i < pwr + 1; i++)
901 {
902 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
903 (i + 1) : i;
904 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
905 vec_promote_demote, stmt_info, 0,
906 vect_body);
907 }
908
909 /* FORNOW: Assuming maximum 2 args per stmts. */
910 for (i = 0; i < 2; i++)
911 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
912 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
913 stmt_info, 0, vect_prologue);
914
915 if (dump_enabled_p ())
916 dump_printf_loc (MSG_NOTE, vect_location,
917 "vect_model_promotion_demotion_cost: inside_cost = %d, "
918 "prologue_cost = %d .\n", inside_cost, prologue_cost);
919 }
920
921 /* Function vect_cost_group_size
922
923 For grouped load or store, return the group_size only if it is the first
924 load or store of a group, else return 1. This ensures that group size is
925 only returned once per group. */
926
927 static int
928 vect_cost_group_size (stmt_vec_info stmt_info)
929 {
930 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
931
932 if (first_stmt == STMT_VINFO_STMT (stmt_info))
933 return GROUP_SIZE (stmt_info);
934
935 return 1;
936 }
937
938
939 /* Function vect_model_store_cost
940
941 Models cost for stores. In the case of grouped accesses, one access
942 has the overhead of the grouped access attributed to it. */
943
944 void
945 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
946 bool store_lanes_p, enum vect_def_type dt,
947 slp_tree slp_node,
948 stmt_vector_for_cost *prologue_cost_vec,
949 stmt_vector_for_cost *body_cost_vec)
950 {
951 int group_size;
952 unsigned int inside_cost = 0, prologue_cost = 0;
953 struct data_reference *first_dr;
954 gimple first_stmt;
955
956 /* The SLP costs were already calculated during SLP tree build. */
957 if (PURE_SLP_STMT (stmt_info))
958 return;
959
960 if (dt == vect_constant_def || dt == vect_external_def)
961 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
962 stmt_info, 0, vect_prologue);
963
964 /* Grouped access? */
965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
966 {
967 if (slp_node)
968 {
969 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
970 group_size = 1;
971 }
972 else
973 {
974 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
975 group_size = vect_cost_group_size (stmt_info);
976 }
977
978 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
979 }
980 /* Not a grouped access. */
981 else
982 {
983 group_size = 1;
984 first_dr = STMT_VINFO_DATA_REF (stmt_info);
985 }
986
987 /* We assume that the cost of a single store-lanes instruction is
988 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
989 access is instead being provided by a permute-and-store operation,
990 include the cost of the permutes. */
991 if (!store_lanes_p && group_size > 1)
992 {
993 /* Uses a high and low interleave or shuffle operations for each
994 needed permute. */
995 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
996 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
997 stmt_info, 0, vect_body);
998
999 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_NOTE, vect_location,
1001 "vect_model_store_cost: strided group_size = %d .\n",
1002 group_size);
1003 }
1004
1005 /* Costs of the stores. */
1006 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1007
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE, vect_location,
1010 "vect_model_store_cost: inside_cost = %d, "
1011 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1012 }
1013
1014
1015 /* Calculate cost of DR's memory access. */
1016 void
1017 vect_get_store_cost (struct data_reference *dr, int ncopies,
1018 unsigned int *inside_cost,
1019 stmt_vector_for_cost *body_cost_vec)
1020 {
1021 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1022 gimple stmt = DR_STMT (dr);
1023 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1024
1025 switch (alignment_support_scheme)
1026 {
1027 case dr_aligned:
1028 {
1029 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1030 vector_store, stmt_info, 0,
1031 vect_body);
1032
1033 if (dump_enabled_p ())
1034 dump_printf_loc (MSG_NOTE, vect_location,
1035 "vect_model_store_cost: aligned.\n");
1036 break;
1037 }
1038
1039 case dr_unaligned_supported:
1040 {
1041 /* Here, we assign an additional cost for the unaligned store. */
1042 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1043 unaligned_store, stmt_info,
1044 DR_MISALIGNMENT (dr), vect_body);
1045 if (dump_enabled_p ())
1046 dump_printf_loc (MSG_NOTE, vect_location,
1047 "vect_model_store_cost: unaligned supported by "
1048 "hardware.\n");
1049 break;
1050 }
1051
1052 case dr_unaligned_unsupported:
1053 {
1054 *inside_cost = VECT_MAX_COST;
1055
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1058 "vect_model_store_cost: unsupported access.\n");
1059 break;
1060 }
1061
1062 default:
1063 gcc_unreachable ();
1064 }
1065 }
1066
1067
1068 /* Function vect_model_load_cost
1069
1070 Models cost for loads. In the case of grouped accesses, the last access
1071 has the overhead of the grouped access attributed to it. Since unaligned
1072 accesses are supported for loads, we also account for the costs of the
1073 access scheme chosen. */
1074
1075 void
1076 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1077 bool load_lanes_p, slp_tree slp_node,
1078 stmt_vector_for_cost *prologue_cost_vec,
1079 stmt_vector_for_cost *body_cost_vec)
1080 {
1081 int group_size;
1082 gimple first_stmt;
1083 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1084 unsigned int inside_cost = 0, prologue_cost = 0;
1085
1086 /* The SLP costs were already calculated during SLP tree build. */
1087 if (PURE_SLP_STMT (stmt_info))
1088 return;
1089
1090 /* Grouped accesses? */
1091 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1092 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1093 {
1094 group_size = vect_cost_group_size (stmt_info);
1095 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1096 }
1097 /* Not a grouped access. */
1098 else
1099 {
1100 group_size = 1;
1101 first_dr = dr;
1102 }
1103
1104 /* We assume that the cost of a single load-lanes instruction is
1105 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1106 access is instead being provided by a load-and-permute operation,
1107 include the cost of the permutes. */
1108 if (!load_lanes_p && group_size > 1)
1109 {
1110 /* Uses an even and odd extract operations or shuffle operations
1111 for each needed permute. */
1112 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1113 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1114 stmt_info, 0, vect_body);
1115
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE, vect_location,
1118 "vect_model_load_cost: strided group_size = %d .\n",
1119 group_size);
1120 }
1121
1122 /* The loads themselves. */
1123 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1124 {
1125 /* N scalar loads plus gathering them into a vector. */
1126 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1127 inside_cost += record_stmt_cost (body_cost_vec,
1128 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1129 scalar_load, stmt_info, 0, vect_body);
1130 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1131 stmt_info, 0, vect_body);
1132 }
1133 else
1134 vect_get_load_cost (first_dr, ncopies,
1135 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1136 || group_size > 1 || slp_node),
1137 &inside_cost, &prologue_cost,
1138 prologue_cost_vec, body_cost_vec, true);
1139
1140 if (dump_enabled_p ())
1141 dump_printf_loc (MSG_NOTE, vect_location,
1142 "vect_model_load_cost: inside_cost = %d, "
1143 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1144 }
1145
1146
1147 /* Calculate cost of DR's memory access. */
1148 void
1149 vect_get_load_cost (struct data_reference *dr, int ncopies,
1150 bool add_realign_cost, unsigned int *inside_cost,
1151 unsigned int *prologue_cost,
1152 stmt_vector_for_cost *prologue_cost_vec,
1153 stmt_vector_for_cost *body_cost_vec,
1154 bool record_prologue_costs)
1155 {
1156 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1157 gimple stmt = DR_STMT (dr);
1158 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1159
1160 switch (alignment_support_scheme)
1161 {
1162 case dr_aligned:
1163 {
1164 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1165 stmt_info, 0, vect_body);
1166
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: aligned.\n");
1170
1171 break;
1172 }
1173 case dr_unaligned_supported:
1174 {
1175 /* Here, we assign an additional cost for the unaligned load. */
1176 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1177 unaligned_load, stmt_info,
1178 DR_MISALIGNMENT (dr), vect_body);
1179
1180 if (dump_enabled_p ())
1181 dump_printf_loc (MSG_NOTE, vect_location,
1182 "vect_model_load_cost: unaligned supported by "
1183 "hardware.\n");
1184
1185 break;
1186 }
1187 case dr_explicit_realign:
1188 {
1189 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1190 vector_load, stmt_info, 0, vect_body);
1191 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1192 vec_perm, stmt_info, 0, vect_body);
1193
1194 /* FIXME: If the misalignment remains fixed across the iterations of
1195 the containing loop, the following cost should be added to the
1196 prologue costs. */
1197 if (targetm.vectorize.builtin_mask_for_load)
1198 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1199 stmt_info, 0, vect_body);
1200
1201 if (dump_enabled_p ())
1202 dump_printf_loc (MSG_NOTE, vect_location,
1203 "vect_model_load_cost: explicit realign\n");
1204
1205 break;
1206 }
1207 case dr_explicit_realign_optimized:
1208 {
1209 if (dump_enabled_p ())
1210 dump_printf_loc (MSG_NOTE, vect_location,
1211 "vect_model_load_cost: unaligned software "
1212 "pipelined.\n");
1213
1214 /* Unaligned software pipeline has a load of an address, an initial
1215 load, and possibly a mask operation to "prime" the loop. However,
1216 if this is an access in a group of loads, which provide grouped
1217 access, then the above cost should only be considered for one
1218 access in the group. Inside the loop, there is a load op
1219 and a realignment op. */
1220
1221 if (add_realign_cost && record_prologue_costs)
1222 {
1223 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1224 vector_stmt, stmt_info,
1225 0, vect_prologue);
1226 if (targetm.vectorize.builtin_mask_for_load)
1227 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1228 vector_stmt, stmt_info,
1229 0, vect_prologue);
1230 }
1231
1232 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1233 stmt_info, 0, vect_body);
1234 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1235 stmt_info, 0, vect_body);
1236
1237 if (dump_enabled_p ())
1238 dump_printf_loc (MSG_NOTE, vect_location,
1239 "vect_model_load_cost: explicit realign optimized"
1240 "\n");
1241
1242 break;
1243 }
1244
1245 case dr_unaligned_unsupported:
1246 {
1247 *inside_cost = VECT_MAX_COST;
1248
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1251 "vect_model_load_cost: unsupported access.\n");
1252 break;
1253 }
1254
1255 default:
1256 gcc_unreachable ();
1257 }
1258 }
1259
1260 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1261 the loop preheader for the vectorized stmt STMT. */
1262
1263 static void
1264 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1265 {
1266 if (gsi)
1267 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1268 else
1269 {
1270 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1271 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1272
1273 if (loop_vinfo)
1274 {
1275 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1276 basic_block new_bb;
1277 edge pe;
1278
1279 if (nested_in_vect_loop_p (loop, stmt))
1280 loop = loop->inner;
1281
1282 pe = loop_preheader_edge (loop);
1283 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1284 gcc_assert (!new_bb);
1285 }
1286 else
1287 {
1288 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1289 basic_block bb;
1290 gimple_stmt_iterator gsi_bb_start;
1291
1292 gcc_assert (bb_vinfo);
1293 bb = BB_VINFO_BB (bb_vinfo);
1294 gsi_bb_start = gsi_after_labels (bb);
1295 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1296 }
1297 }
1298
1299 if (dump_enabled_p ())
1300 {
1301 dump_printf_loc (MSG_NOTE, vect_location,
1302 "created new init_stmt: ");
1303 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1304 dump_printf (MSG_NOTE, "\n");
1305 }
1306 }
1307
1308 /* Function vect_init_vector.
1309
1310 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1311 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1312 vector type a vector with all elements equal to VAL is created first.
1313 Place the initialization at BSI if it is not NULL. Otherwise, place the
1314 initialization at the loop preheader.
1315 Return the DEF of INIT_STMT.
1316 It will be used in the vectorization of STMT. */
1317
1318 tree
1319 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1320 {
1321 tree new_var;
1322 gimple init_stmt;
1323 tree vec_oprnd;
1324 tree new_temp;
1325
1326 if (TREE_CODE (type) == VECTOR_TYPE
1327 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1328 {
1329 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1330 {
1331 if (CONSTANT_CLASS_P (val))
1332 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1333 else
1334 {
1335 new_temp = make_ssa_name (TREE_TYPE (type));
1336 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1337 vect_init_vector_1 (stmt, init_stmt, gsi);
1338 val = new_temp;
1339 }
1340 }
1341 val = build_vector_from_val (type, val);
1342 }
1343
1344 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1345 init_stmt = gimple_build_assign (new_var, val);
1346 new_temp = make_ssa_name (new_var, init_stmt);
1347 gimple_assign_set_lhs (init_stmt, new_temp);
1348 vect_init_vector_1 (stmt, init_stmt, gsi);
1349 vec_oprnd = gimple_assign_lhs (init_stmt);
1350 return vec_oprnd;
1351 }
1352
1353
1354 /* Function vect_get_vec_def_for_operand.
1355
1356 OP is an operand in STMT. This function returns a (vector) def that will be
1357 used in the vectorized stmt for STMT.
1358
1359 In the case that OP is an SSA_NAME which is defined in the loop, then
1360 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1361
1362 In case OP is an invariant or constant, a new stmt that creates a vector def
1363 needs to be introduced. */
1364
1365 tree
1366 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1367 {
1368 tree vec_oprnd;
1369 gimple vec_stmt;
1370 gimple def_stmt;
1371 stmt_vec_info def_stmt_info = NULL;
1372 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1373 unsigned int nunits;
1374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1375 tree def;
1376 enum vect_def_type dt;
1377 bool is_simple_use;
1378 tree vector_type;
1379
1380 if (dump_enabled_p ())
1381 {
1382 dump_printf_loc (MSG_NOTE, vect_location,
1383 "vect_get_vec_def_for_operand: ");
1384 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1385 dump_printf (MSG_NOTE, "\n");
1386 }
1387
1388 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1389 &def_stmt, &def, &dt);
1390 gcc_assert (is_simple_use);
1391 if (dump_enabled_p ())
1392 {
1393 int loc_printed = 0;
1394 if (def)
1395 {
1396 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1397 loc_printed = 1;
1398 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1399 dump_printf (MSG_NOTE, "\n");
1400 }
1401 if (def_stmt)
1402 {
1403 if (loc_printed)
1404 dump_printf (MSG_NOTE, " def_stmt = ");
1405 else
1406 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1407 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1408 dump_printf (MSG_NOTE, "\n");
1409 }
1410 }
1411
1412 switch (dt)
1413 {
1414 /* Case 1: operand is a constant. */
1415 case vect_constant_def:
1416 {
1417 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1418 gcc_assert (vector_type);
1419 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1420
1421 if (scalar_def)
1422 *scalar_def = op;
1423
1424 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1425 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "Create vector_cst. nunits = %d\n", nunits);
1428
1429 return vect_init_vector (stmt, op, vector_type, NULL);
1430 }
1431
1432 /* Case 2: operand is defined outside the loop - loop invariant. */
1433 case vect_external_def:
1434 {
1435 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1436 gcc_assert (vector_type);
1437
1438 if (scalar_def)
1439 *scalar_def = def;
1440
1441 /* Create 'vec_inv = {inv,inv,..,inv}' */
1442 if (dump_enabled_p ())
1443 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1444
1445 return vect_init_vector (stmt, def, vector_type, NULL);
1446 }
1447
1448 /* Case 3: operand is defined inside the loop. */
1449 case vect_internal_def:
1450 {
1451 if (scalar_def)
1452 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1453
1454 /* Get the def from the vectorized stmt. */
1455 def_stmt_info = vinfo_for_stmt (def_stmt);
1456
1457 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1458 /* Get vectorized pattern statement. */
1459 if (!vec_stmt
1460 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1461 && !STMT_VINFO_RELEVANT (def_stmt_info))
1462 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1463 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1464 gcc_assert (vec_stmt);
1465 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1466 vec_oprnd = PHI_RESULT (vec_stmt);
1467 else if (is_gimple_call (vec_stmt))
1468 vec_oprnd = gimple_call_lhs (vec_stmt);
1469 else
1470 vec_oprnd = gimple_assign_lhs (vec_stmt);
1471 return vec_oprnd;
1472 }
1473
1474 /* Case 4: operand is defined by a loop header phi - reduction */
1475 case vect_reduction_def:
1476 case vect_double_reduction_def:
1477 case vect_nested_cycle:
1478 {
1479 struct loop *loop;
1480
1481 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1482 loop = (gimple_bb (def_stmt))->loop_father;
1483
1484 /* Get the def before the loop */
1485 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1486 return get_initial_def_for_reduction (stmt, op, scalar_def);
1487 }
1488
1489 /* Case 5: operand is defined by loop-header phi - induction. */
1490 case vect_induction_def:
1491 {
1492 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1493
1494 /* Get the def from the vectorized stmt. */
1495 def_stmt_info = vinfo_for_stmt (def_stmt);
1496 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1497 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1498 vec_oprnd = PHI_RESULT (vec_stmt);
1499 else
1500 vec_oprnd = gimple_get_lhs (vec_stmt);
1501 return vec_oprnd;
1502 }
1503
1504 default:
1505 gcc_unreachable ();
1506 }
1507 }
1508
1509
1510 /* Function vect_get_vec_def_for_stmt_copy
1511
1512 Return a vector-def for an operand. This function is used when the
1513 vectorized stmt to be created (by the caller to this function) is a "copy"
1514 created in case the vectorized result cannot fit in one vector, and several
1515 copies of the vector-stmt are required. In this case the vector-def is
1516 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1517 of the stmt that defines VEC_OPRND.
1518 DT is the type of the vector def VEC_OPRND.
1519
1520 Context:
1521 In case the vectorization factor (VF) is bigger than the number
1522 of elements that can fit in a vectype (nunits), we have to generate
1523 more than one vector stmt to vectorize the scalar stmt. This situation
1524 arises when there are multiple data-types operated upon in the loop; the
1525 smallest data-type determines the VF, and as a result, when vectorizing
1526 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1527 vector stmt (each computing a vector of 'nunits' results, and together
1528 computing 'VF' results in each iteration). This function is called when
1529 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1530 which VF=16 and nunits=4, so the number of copies required is 4):
1531
1532 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1533
1534 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1535 VS1.1: vx.1 = memref1 VS1.2
1536 VS1.2: vx.2 = memref2 VS1.3
1537 VS1.3: vx.3 = memref3
1538
1539 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1540 VSnew.1: vz1 = vx.1 + ... VSnew.2
1541 VSnew.2: vz2 = vx.2 + ... VSnew.3
1542 VSnew.3: vz3 = vx.3 + ...
1543
1544 The vectorization of S1 is explained in vectorizable_load.
1545 The vectorization of S2:
1546 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1547 the function 'vect_get_vec_def_for_operand' is called to
1548 get the relevant vector-def for each operand of S2. For operand x it
1549 returns the vector-def 'vx.0'.
1550
1551 To create the remaining copies of the vector-stmt (VSnew.j), this
1552 function is called to get the relevant vector-def for each operand. It is
1553 obtained from the respective VS1.j stmt, which is recorded in the
1554 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1555
1556 For example, to obtain the vector-def 'vx.1' in order to create the
1557 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1558 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1559 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1560 and return its def ('vx.1').
1561 Overall, to create the above sequence this function will be called 3 times:
1562 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1563 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1564 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1565
1566 tree
1567 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1568 {
1569 gimple vec_stmt_for_operand;
1570 stmt_vec_info def_stmt_info;
1571
1572 /* Do nothing; can reuse same def. */
1573 if (dt == vect_external_def || dt == vect_constant_def )
1574 return vec_oprnd;
1575
1576 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1577 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1578 gcc_assert (def_stmt_info);
1579 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1580 gcc_assert (vec_stmt_for_operand);
1581 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1582 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1583 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1584 else
1585 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1586 return vec_oprnd;
1587 }
1588
1589
1590 /* Get vectorized definitions for the operands to create a copy of an original
1591 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1592
1593 static void
1594 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1595 vec<tree> *vec_oprnds0,
1596 vec<tree> *vec_oprnds1)
1597 {
1598 tree vec_oprnd = vec_oprnds0->pop ();
1599
1600 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1601 vec_oprnds0->quick_push (vec_oprnd);
1602
1603 if (vec_oprnds1 && vec_oprnds1->length ())
1604 {
1605 vec_oprnd = vec_oprnds1->pop ();
1606 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1607 vec_oprnds1->quick_push (vec_oprnd);
1608 }
1609 }
1610
1611
1612 /* Get vectorized definitions for OP0 and OP1.
1613 REDUC_INDEX is the index of reduction operand in case of reduction,
1614 and -1 otherwise. */
1615
1616 void
1617 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1618 vec<tree> *vec_oprnds0,
1619 vec<tree> *vec_oprnds1,
1620 slp_tree slp_node, int reduc_index)
1621 {
1622 if (slp_node)
1623 {
1624 int nops = (op1 == NULL_TREE) ? 1 : 2;
1625 auto_vec<tree> ops (nops);
1626 auto_vec<vec<tree> > vec_defs (nops);
1627
1628 ops.quick_push (op0);
1629 if (op1)
1630 ops.quick_push (op1);
1631
1632 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1633
1634 *vec_oprnds0 = vec_defs[0];
1635 if (op1)
1636 *vec_oprnds1 = vec_defs[1];
1637 }
1638 else
1639 {
1640 tree vec_oprnd;
1641
1642 vec_oprnds0->create (1);
1643 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1644 vec_oprnds0->quick_push (vec_oprnd);
1645
1646 if (op1)
1647 {
1648 vec_oprnds1->create (1);
1649 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1650 vec_oprnds1->quick_push (vec_oprnd);
1651 }
1652 }
1653 }
1654
1655
1656 /* Function vect_finish_stmt_generation.
1657
1658 Insert a new stmt. */
1659
1660 void
1661 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1662 gimple_stmt_iterator *gsi)
1663 {
1664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1665 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1666 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1667
1668 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1669
1670 if (!gsi_end_p (*gsi)
1671 && gimple_has_mem_ops (vec_stmt))
1672 {
1673 gimple at_stmt = gsi_stmt (*gsi);
1674 tree vuse = gimple_vuse (at_stmt);
1675 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1676 {
1677 tree vdef = gimple_vdef (at_stmt);
1678 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1679 /* If we have an SSA vuse and insert a store, update virtual
1680 SSA form to avoid triggering the renamer. Do so only
1681 if we can easily see all uses - which is what almost always
1682 happens with the way vectorized stmts are inserted. */
1683 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1684 && ((is_gimple_assign (vec_stmt)
1685 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1686 || (is_gimple_call (vec_stmt)
1687 && !(gimple_call_flags (vec_stmt)
1688 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1689 {
1690 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1691 gimple_set_vdef (vec_stmt, new_vdef);
1692 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1693 }
1694 }
1695 }
1696 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1697
1698 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1699 bb_vinfo));
1700
1701 if (dump_enabled_p ())
1702 {
1703 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1704 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1705 dump_printf (MSG_NOTE, "\n");
1706 }
1707
1708 gimple_set_location (vec_stmt, gimple_location (stmt));
1709
1710 /* While EH edges will generally prevent vectorization, stmt might
1711 e.g. be in a must-not-throw region. Ensure newly created stmts
1712 that could throw are part of the same region. */
1713 int lp_nr = lookup_stmt_eh_lp (stmt);
1714 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1715 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1716 }
1717
1718 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1719 a function declaration if the target has a vectorized version
1720 of the function, or NULL_TREE if the function cannot be vectorized. */
1721
1722 tree
1723 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1724 {
1725 tree fndecl = gimple_call_fndecl (call);
1726
1727 /* We only handle functions that do not read or clobber memory -- i.e.
1728 const or novops ones. */
1729 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1730 return NULL_TREE;
1731
1732 if (!fndecl
1733 || TREE_CODE (fndecl) != FUNCTION_DECL
1734 || !DECL_BUILT_IN (fndecl))
1735 return NULL_TREE;
1736
1737 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1738 vectype_in);
1739 }
1740
1741
1742 static tree permute_vec_elements (tree, tree, tree, gimple,
1743 gimple_stmt_iterator *);
1744
1745
1746 /* Function vectorizable_mask_load_store.
1747
1748 Check if STMT performs a conditional load or store that can be vectorized.
1749 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1750 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1751 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1752
1753 static bool
1754 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1755 gimple *vec_stmt, slp_tree slp_node)
1756 {
1757 tree vec_dest = NULL;
1758 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1759 stmt_vec_info prev_stmt_info;
1760 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1761 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1762 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1763 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1764 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1765 tree elem_type;
1766 gimple new_stmt;
1767 tree dummy;
1768 tree dataref_ptr = NULL_TREE;
1769 gimple ptr_incr;
1770 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1771 int ncopies;
1772 int i, j;
1773 bool inv_p;
1774 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1775 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1776 int gather_scale = 1;
1777 enum vect_def_type gather_dt = vect_unknown_def_type;
1778 bool is_store;
1779 tree mask;
1780 gimple def_stmt;
1781 tree def;
1782 enum vect_def_type dt;
1783
1784 if (slp_node != NULL)
1785 return false;
1786
1787 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1788 gcc_assert (ncopies >= 1);
1789
1790 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1791 mask = gimple_call_arg (stmt, 2);
1792 if (TYPE_PRECISION (TREE_TYPE (mask))
1793 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1794 return false;
1795
1796 /* FORNOW. This restriction should be relaxed. */
1797 if (nested_in_vect_loop && ncopies > 1)
1798 {
1799 if (dump_enabled_p ())
1800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1801 "multiple types in nested loop.");
1802 return false;
1803 }
1804
1805 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1806 return false;
1807
1808 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1809 return false;
1810
1811 if (!STMT_VINFO_DATA_REF (stmt_info))
1812 return false;
1813
1814 elem_type = TREE_TYPE (vectype);
1815
1816 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1817 return false;
1818
1819 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1820 return false;
1821
1822 if (STMT_VINFO_GATHER_P (stmt_info))
1823 {
1824 gimple def_stmt;
1825 tree def;
1826 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1827 &gather_off, &gather_scale);
1828 gcc_assert (gather_decl);
1829 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1830 &def_stmt, &def, &gather_dt,
1831 &gather_off_vectype))
1832 {
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1835 "gather index use not simple.");
1836 return false;
1837 }
1838
1839 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1840 tree masktype
1841 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1842 if (TREE_CODE (masktype) == INTEGER_TYPE)
1843 {
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "masked gather with integer mask not supported.");
1847 return false;
1848 }
1849 }
1850 else if (tree_int_cst_compare (nested_in_vect_loop
1851 ? STMT_VINFO_DR_STEP (stmt_info)
1852 : DR_STEP (dr), size_zero_node) <= 0)
1853 return false;
1854 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1855 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1856 return false;
1857
1858 if (TREE_CODE (mask) != SSA_NAME)
1859 return false;
1860
1861 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1862 &def_stmt, &def, &dt))
1863 return false;
1864
1865 if (is_store)
1866 {
1867 tree rhs = gimple_call_arg (stmt, 3);
1868 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1869 &def_stmt, &def, &dt))
1870 return false;
1871 }
1872
1873 if (!vec_stmt) /* transformation not required. */
1874 {
1875 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1876 if (is_store)
1877 vect_model_store_cost (stmt_info, ncopies, false, dt,
1878 NULL, NULL, NULL);
1879 else
1880 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1881 return true;
1882 }
1883
1884 /** Transform. **/
1885
1886 if (STMT_VINFO_GATHER_P (stmt_info))
1887 {
1888 tree vec_oprnd0 = NULL_TREE, op;
1889 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1890 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1891 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1892 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1893 tree mask_perm_mask = NULL_TREE;
1894 edge pe = loop_preheader_edge (loop);
1895 gimple_seq seq;
1896 basic_block new_bb;
1897 enum { NARROW, NONE, WIDEN } modifier;
1898 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1899
1900 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1901 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1902 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1903 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1904 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1905 scaletype = TREE_VALUE (arglist);
1906 gcc_checking_assert (types_compatible_p (srctype, rettype)
1907 && types_compatible_p (srctype, masktype));
1908
1909 if (nunits == gather_off_nunits)
1910 modifier = NONE;
1911 else if (nunits == gather_off_nunits / 2)
1912 {
1913 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1914 modifier = WIDEN;
1915
1916 for (i = 0; i < gather_off_nunits; ++i)
1917 sel[i] = i | nunits;
1918
1919 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1920 }
1921 else if (nunits == gather_off_nunits * 2)
1922 {
1923 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1924 modifier = NARROW;
1925
1926 for (i = 0; i < nunits; ++i)
1927 sel[i] = i < gather_off_nunits
1928 ? i : i + nunits - gather_off_nunits;
1929
1930 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1931 ncopies *= 2;
1932 for (i = 0; i < nunits; ++i)
1933 sel[i] = i | gather_off_nunits;
1934 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1935 }
1936 else
1937 gcc_unreachable ();
1938
1939 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1940
1941 ptr = fold_convert (ptrtype, gather_base);
1942 if (!is_gimple_min_invariant (ptr))
1943 {
1944 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1945 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1946 gcc_assert (!new_bb);
1947 }
1948
1949 scale = build_int_cst (scaletype, gather_scale);
1950
1951 prev_stmt_info = NULL;
1952 for (j = 0; j < ncopies; ++j)
1953 {
1954 if (modifier == WIDEN && (j & 1))
1955 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1956 perm_mask, stmt, gsi);
1957 else if (j == 0)
1958 op = vec_oprnd0
1959 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1960 else
1961 op = vec_oprnd0
1962 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1963
1964 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1965 {
1966 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1967 == TYPE_VECTOR_SUBPARTS (idxtype));
1968 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1969 var = make_ssa_name (var);
1970 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1971 new_stmt
1972 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1974 op = var;
1975 }
1976
1977 if (mask_perm_mask && (j & 1))
1978 mask_op = permute_vec_elements (mask_op, mask_op,
1979 mask_perm_mask, stmt, gsi);
1980 else
1981 {
1982 if (j == 0)
1983 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1984 else
1985 {
1986 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1987 &def_stmt, &def, &dt);
1988 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1989 }
1990
1991 mask_op = vec_mask;
1992 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1993 {
1994 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1995 == TYPE_VECTOR_SUBPARTS (masktype));
1996 var = vect_get_new_vect_var (masktype, vect_simple_var,
1997 NULL);
1998 var = make_ssa_name (var);
1999 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2000 new_stmt
2001 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2002 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2003 mask_op = var;
2004 }
2005 }
2006
2007 new_stmt
2008 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2009 scale);
2010
2011 if (!useless_type_conversion_p (vectype, rettype))
2012 {
2013 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2014 == TYPE_VECTOR_SUBPARTS (rettype));
2015 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2016 op = make_ssa_name (var, new_stmt);
2017 gimple_call_set_lhs (new_stmt, op);
2018 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2019 var = make_ssa_name (vec_dest);
2020 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2021 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2022 }
2023 else
2024 {
2025 var = make_ssa_name (vec_dest, new_stmt);
2026 gimple_call_set_lhs (new_stmt, var);
2027 }
2028
2029 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2030
2031 if (modifier == NARROW)
2032 {
2033 if ((j & 1) == 0)
2034 {
2035 prev_res = var;
2036 continue;
2037 }
2038 var = permute_vec_elements (prev_res, var,
2039 perm_mask, stmt, gsi);
2040 new_stmt = SSA_NAME_DEF_STMT (var);
2041 }
2042
2043 if (prev_stmt_info == NULL)
2044 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2045 else
2046 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2047 prev_stmt_info = vinfo_for_stmt (new_stmt);
2048 }
2049
2050 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2051 from the IL. */
2052 tree lhs = gimple_call_lhs (stmt);
2053 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2054 set_vinfo_for_stmt (new_stmt, stmt_info);
2055 set_vinfo_for_stmt (stmt, NULL);
2056 STMT_VINFO_STMT (stmt_info) = new_stmt;
2057 gsi_replace (gsi, new_stmt, true);
2058 return true;
2059 }
2060 else if (is_store)
2061 {
2062 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2063 prev_stmt_info = NULL;
2064 for (i = 0; i < ncopies; i++)
2065 {
2066 unsigned align, misalign;
2067
2068 if (i == 0)
2069 {
2070 tree rhs = gimple_call_arg (stmt, 3);
2071 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2072 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2073 /* We should have catched mismatched types earlier. */
2074 gcc_assert (useless_type_conversion_p (vectype,
2075 TREE_TYPE (vec_rhs)));
2076 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2077 NULL_TREE, &dummy, gsi,
2078 &ptr_incr, false, &inv_p);
2079 gcc_assert (!inv_p);
2080 }
2081 else
2082 {
2083 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2084 &def, &dt);
2085 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2086 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2087 &def, &dt);
2088 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2089 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2090 TYPE_SIZE_UNIT (vectype));
2091 }
2092
2093 align = TYPE_ALIGN_UNIT (vectype);
2094 if (aligned_access_p (dr))
2095 misalign = 0;
2096 else if (DR_MISALIGNMENT (dr) == -1)
2097 {
2098 align = TYPE_ALIGN_UNIT (elem_type);
2099 misalign = 0;
2100 }
2101 else
2102 misalign = DR_MISALIGNMENT (dr);
2103 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2104 misalign);
2105 new_stmt
2106 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2107 gimple_call_arg (stmt, 1),
2108 vec_mask, vec_rhs);
2109 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2110 if (i == 0)
2111 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2112 else
2113 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2114 prev_stmt_info = vinfo_for_stmt (new_stmt);
2115 }
2116 }
2117 else
2118 {
2119 tree vec_mask = NULL_TREE;
2120 prev_stmt_info = NULL;
2121 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2122 for (i = 0; i < ncopies; i++)
2123 {
2124 unsigned align, misalign;
2125
2126 if (i == 0)
2127 {
2128 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2129 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2130 NULL_TREE, &dummy, gsi,
2131 &ptr_incr, false, &inv_p);
2132 gcc_assert (!inv_p);
2133 }
2134 else
2135 {
2136 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2137 &def, &dt);
2138 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2139 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2140 TYPE_SIZE_UNIT (vectype));
2141 }
2142
2143 align = TYPE_ALIGN_UNIT (vectype);
2144 if (aligned_access_p (dr))
2145 misalign = 0;
2146 else if (DR_MISALIGNMENT (dr) == -1)
2147 {
2148 align = TYPE_ALIGN_UNIT (elem_type);
2149 misalign = 0;
2150 }
2151 else
2152 misalign = DR_MISALIGNMENT (dr);
2153 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2154 misalign);
2155 new_stmt
2156 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2157 gimple_call_arg (stmt, 1),
2158 vec_mask);
2159 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2160 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2161 if (i == 0)
2162 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2163 else
2164 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2165 prev_stmt_info = vinfo_for_stmt (new_stmt);
2166 }
2167 }
2168
2169 if (!is_store)
2170 {
2171 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2172 from the IL. */
2173 tree lhs = gimple_call_lhs (stmt);
2174 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2175 set_vinfo_for_stmt (new_stmt, stmt_info);
2176 set_vinfo_for_stmt (stmt, NULL);
2177 STMT_VINFO_STMT (stmt_info) = new_stmt;
2178 gsi_replace (gsi, new_stmt, true);
2179 }
2180
2181 return true;
2182 }
2183
2184
2185 /* Function vectorizable_call.
2186
2187 Check if GS performs a function call that can be vectorized.
2188 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2189 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2190 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2191
2192 static bool
2193 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2194 slp_tree slp_node)
2195 {
2196 gcall *stmt;
2197 tree vec_dest;
2198 tree scalar_dest;
2199 tree op, type;
2200 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2201 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2202 tree vectype_out, vectype_in;
2203 int nunits_in;
2204 int nunits_out;
2205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2206 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2207 tree fndecl, new_temp, def, rhs_type;
2208 gimple def_stmt;
2209 enum vect_def_type dt[3]
2210 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2211 gimple new_stmt = NULL;
2212 int ncopies, j;
2213 vec<tree> vargs = vNULL;
2214 enum { NARROW, NONE, WIDEN } modifier;
2215 size_t i, nargs;
2216 tree lhs;
2217
2218 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2219 return false;
2220
2221 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2222 return false;
2223
2224 /* Is GS a vectorizable call? */
2225 stmt = dyn_cast <gcall *> (gs);
2226 if (!stmt)
2227 return false;
2228
2229 if (gimple_call_internal_p (stmt)
2230 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2231 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2232 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2233 slp_node);
2234
2235 if (gimple_call_lhs (stmt) == NULL_TREE
2236 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2237 return false;
2238
2239 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2240
2241 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2242
2243 /* Process function arguments. */
2244 rhs_type = NULL_TREE;
2245 vectype_in = NULL_TREE;
2246 nargs = gimple_call_num_args (stmt);
2247
2248 /* Bail out if the function has more than three arguments, we do not have
2249 interesting builtin functions to vectorize with more than two arguments
2250 except for fma. No arguments is also not good. */
2251 if (nargs == 0 || nargs > 3)
2252 return false;
2253
2254 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2255 if (gimple_call_internal_p (stmt)
2256 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2257 {
2258 nargs = 0;
2259 rhs_type = unsigned_type_node;
2260 }
2261
2262 for (i = 0; i < nargs; i++)
2263 {
2264 tree opvectype;
2265
2266 op = gimple_call_arg (stmt, i);
2267
2268 /* We can only handle calls with arguments of the same type. */
2269 if (rhs_type
2270 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2271 {
2272 if (dump_enabled_p ())
2273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2274 "argument types differ.\n");
2275 return false;
2276 }
2277 if (!rhs_type)
2278 rhs_type = TREE_TYPE (op);
2279
2280 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2281 &def_stmt, &def, &dt[i], &opvectype))
2282 {
2283 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2285 "use not simple.\n");
2286 return false;
2287 }
2288
2289 if (!vectype_in)
2290 vectype_in = opvectype;
2291 else if (opvectype
2292 && opvectype != vectype_in)
2293 {
2294 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2296 "argument vector types differ.\n");
2297 return false;
2298 }
2299 }
2300 /* If all arguments are external or constant defs use a vector type with
2301 the same size as the output vector type. */
2302 if (!vectype_in)
2303 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2304 if (vec_stmt)
2305 gcc_assert (vectype_in);
2306 if (!vectype_in)
2307 {
2308 if (dump_enabled_p ())
2309 {
2310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2311 "no vectype for scalar type ");
2312 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2313 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2314 }
2315
2316 return false;
2317 }
2318
2319 /* FORNOW */
2320 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2321 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2322 if (nunits_in == nunits_out / 2)
2323 modifier = NARROW;
2324 else if (nunits_out == nunits_in)
2325 modifier = NONE;
2326 else if (nunits_out == nunits_in / 2)
2327 modifier = WIDEN;
2328 else
2329 return false;
2330
2331 /* For now, we only vectorize functions if a target specific builtin
2332 is available. TODO -- in some cases, it might be profitable to
2333 insert the calls for pieces of the vector, in order to be able
2334 to vectorize other operations in the loop. */
2335 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2336 if (fndecl == NULL_TREE)
2337 {
2338 if (gimple_call_internal_p (stmt)
2339 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2340 && !slp_node
2341 && loop_vinfo
2342 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2343 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2344 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2345 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2346 {
2347 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2348 { 0, 1, 2, ... vf - 1 } vector. */
2349 gcc_assert (nargs == 0);
2350 }
2351 else
2352 {
2353 if (dump_enabled_p ())
2354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2355 "function is not vectorizable.\n");
2356 return false;
2357 }
2358 }
2359
2360 gcc_assert (!gimple_vuse (stmt));
2361
2362 if (slp_node || PURE_SLP_STMT (stmt_info))
2363 ncopies = 1;
2364 else if (modifier == NARROW)
2365 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2366 else
2367 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2368
2369 /* Sanity check: make sure that at least one copy of the vectorized stmt
2370 needs to be generated. */
2371 gcc_assert (ncopies >= 1);
2372
2373 if (!vec_stmt) /* transformation not required. */
2374 {
2375 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2376 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2378 "\n");
2379 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2380 return true;
2381 }
2382
2383 /** Transform. **/
2384
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2387
2388 /* Handle def. */
2389 scalar_dest = gimple_call_lhs (stmt);
2390 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2391
2392 prev_stmt_info = NULL;
2393 switch (modifier)
2394 {
2395 case NONE:
2396 for (j = 0; j < ncopies; ++j)
2397 {
2398 /* Build argument list for the vectorized call. */
2399 if (j == 0)
2400 vargs.create (nargs);
2401 else
2402 vargs.truncate (0);
2403
2404 if (slp_node)
2405 {
2406 auto_vec<vec<tree> > vec_defs (nargs);
2407 vec<tree> vec_oprnds0;
2408
2409 for (i = 0; i < nargs; i++)
2410 vargs.quick_push (gimple_call_arg (stmt, i));
2411 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2412 vec_oprnds0 = vec_defs[0];
2413
2414 /* Arguments are ready. Create the new vector stmt. */
2415 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2416 {
2417 size_t k;
2418 for (k = 0; k < nargs; k++)
2419 {
2420 vec<tree> vec_oprndsk = vec_defs[k];
2421 vargs[k] = vec_oprndsk[i];
2422 }
2423 new_stmt = gimple_build_call_vec (fndecl, vargs);
2424 new_temp = make_ssa_name (vec_dest, new_stmt);
2425 gimple_call_set_lhs (new_stmt, new_temp);
2426 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2427 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2428 }
2429
2430 for (i = 0; i < nargs; i++)
2431 {
2432 vec<tree> vec_oprndsi = vec_defs[i];
2433 vec_oprndsi.release ();
2434 }
2435 continue;
2436 }
2437
2438 for (i = 0; i < nargs; i++)
2439 {
2440 op = gimple_call_arg (stmt, i);
2441 if (j == 0)
2442 vec_oprnd0
2443 = vect_get_vec_def_for_operand (op, stmt, NULL);
2444 else
2445 {
2446 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2447 vec_oprnd0
2448 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2449 }
2450
2451 vargs.quick_push (vec_oprnd0);
2452 }
2453
2454 if (gimple_call_internal_p (stmt)
2455 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2456 {
2457 tree *v = XALLOCAVEC (tree, nunits_out);
2458 int k;
2459 for (k = 0; k < nunits_out; ++k)
2460 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2461 tree cst = build_vector (vectype_out, v);
2462 tree new_var
2463 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2464 gimple init_stmt = gimple_build_assign (new_var, cst);
2465 new_temp = make_ssa_name (new_var, init_stmt);
2466 gimple_assign_set_lhs (init_stmt, new_temp);
2467 vect_init_vector_1 (stmt, init_stmt, NULL);
2468 new_temp = make_ssa_name (vec_dest);
2469 new_stmt = gimple_build_assign (new_temp,
2470 gimple_assign_lhs (init_stmt));
2471 }
2472 else
2473 {
2474 new_stmt = gimple_build_call_vec (fndecl, vargs);
2475 new_temp = make_ssa_name (vec_dest, new_stmt);
2476 gimple_call_set_lhs (new_stmt, new_temp);
2477 }
2478 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2479
2480 if (j == 0)
2481 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2482 else
2483 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2484
2485 prev_stmt_info = vinfo_for_stmt (new_stmt);
2486 }
2487
2488 break;
2489
2490 case NARROW:
2491 for (j = 0; j < ncopies; ++j)
2492 {
2493 /* Build argument list for the vectorized call. */
2494 if (j == 0)
2495 vargs.create (nargs * 2);
2496 else
2497 vargs.truncate (0);
2498
2499 if (slp_node)
2500 {
2501 auto_vec<vec<tree> > vec_defs (nargs);
2502 vec<tree> vec_oprnds0;
2503
2504 for (i = 0; i < nargs; i++)
2505 vargs.quick_push (gimple_call_arg (stmt, i));
2506 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2507 vec_oprnds0 = vec_defs[0];
2508
2509 /* Arguments are ready. Create the new vector stmt. */
2510 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2511 {
2512 size_t k;
2513 vargs.truncate (0);
2514 for (k = 0; k < nargs; k++)
2515 {
2516 vec<tree> vec_oprndsk = vec_defs[k];
2517 vargs.quick_push (vec_oprndsk[i]);
2518 vargs.quick_push (vec_oprndsk[i + 1]);
2519 }
2520 new_stmt = gimple_build_call_vec (fndecl, vargs);
2521 new_temp = make_ssa_name (vec_dest, new_stmt);
2522 gimple_call_set_lhs (new_stmt, new_temp);
2523 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2524 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2525 }
2526
2527 for (i = 0; i < nargs; i++)
2528 {
2529 vec<tree> vec_oprndsi = vec_defs[i];
2530 vec_oprndsi.release ();
2531 }
2532 continue;
2533 }
2534
2535 for (i = 0; i < nargs; i++)
2536 {
2537 op = gimple_call_arg (stmt, i);
2538 if (j == 0)
2539 {
2540 vec_oprnd0
2541 = vect_get_vec_def_for_operand (op, stmt, NULL);
2542 vec_oprnd1
2543 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2544 }
2545 else
2546 {
2547 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2548 vec_oprnd0
2549 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2550 vec_oprnd1
2551 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2552 }
2553
2554 vargs.quick_push (vec_oprnd0);
2555 vargs.quick_push (vec_oprnd1);
2556 }
2557
2558 new_stmt = gimple_build_call_vec (fndecl, vargs);
2559 new_temp = make_ssa_name (vec_dest, new_stmt);
2560 gimple_call_set_lhs (new_stmt, new_temp);
2561 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2562
2563 if (j == 0)
2564 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2565 else
2566 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2567
2568 prev_stmt_info = vinfo_for_stmt (new_stmt);
2569 }
2570
2571 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2572
2573 break;
2574
2575 case WIDEN:
2576 /* No current target implements this case. */
2577 return false;
2578 }
2579
2580 vargs.release ();
2581
2582 /* The call in STMT might prevent it from being removed in dce.
2583 We however cannot remove it here, due to the way the ssa name
2584 it defines is mapped to the new definition. So just replace
2585 rhs of the statement with something harmless. */
2586
2587 if (slp_node)
2588 return true;
2589
2590 type = TREE_TYPE (scalar_dest);
2591 if (is_pattern_stmt_p (stmt_info))
2592 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2593 else
2594 lhs = gimple_call_lhs (stmt);
2595 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2596 set_vinfo_for_stmt (new_stmt, stmt_info);
2597 set_vinfo_for_stmt (stmt, NULL);
2598 STMT_VINFO_STMT (stmt_info) = new_stmt;
2599 gsi_replace (gsi, new_stmt, false);
2600
2601 return true;
2602 }
2603
2604
2605 struct simd_call_arg_info
2606 {
2607 tree vectype;
2608 tree op;
2609 enum vect_def_type dt;
2610 HOST_WIDE_INT linear_step;
2611 unsigned int align;
2612 };
2613
2614 /* Function vectorizable_simd_clone_call.
2615
2616 Check if STMT performs a function call that can be vectorized
2617 by calling a simd clone of the function.
2618 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2619 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2620 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2621
2622 static bool
2623 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2624 gimple *vec_stmt, slp_tree slp_node)
2625 {
2626 tree vec_dest;
2627 tree scalar_dest;
2628 tree op, type;
2629 tree vec_oprnd0 = NULL_TREE;
2630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2631 tree vectype;
2632 unsigned int nunits;
2633 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2634 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2635 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2636 tree fndecl, new_temp, def;
2637 gimple def_stmt;
2638 gimple new_stmt = NULL;
2639 int ncopies, j;
2640 vec<simd_call_arg_info> arginfo = vNULL;
2641 vec<tree> vargs = vNULL;
2642 size_t i, nargs;
2643 tree lhs, rtype, ratype;
2644 vec<constructor_elt, va_gc> *ret_ctor_elts;
2645
2646 /* Is STMT a vectorizable call? */
2647 if (!is_gimple_call (stmt))
2648 return false;
2649
2650 fndecl = gimple_call_fndecl (stmt);
2651 if (fndecl == NULL_TREE)
2652 return false;
2653
2654 struct cgraph_node *node = cgraph_node::get (fndecl);
2655 if (node == NULL || node->simd_clones == NULL)
2656 return false;
2657
2658 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2659 return false;
2660
2661 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2662 return false;
2663
2664 if (gimple_call_lhs (stmt)
2665 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2666 return false;
2667
2668 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2669
2670 vectype = STMT_VINFO_VECTYPE (stmt_info);
2671
2672 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2673 return false;
2674
2675 /* FORNOW */
2676 if (slp_node || PURE_SLP_STMT (stmt_info))
2677 return false;
2678
2679 /* Process function arguments. */
2680 nargs = gimple_call_num_args (stmt);
2681
2682 /* Bail out if the function has zero arguments. */
2683 if (nargs == 0)
2684 return false;
2685
2686 arginfo.create (nargs);
2687
2688 for (i = 0; i < nargs; i++)
2689 {
2690 simd_call_arg_info thisarginfo;
2691 affine_iv iv;
2692
2693 thisarginfo.linear_step = 0;
2694 thisarginfo.align = 0;
2695 thisarginfo.op = NULL_TREE;
2696
2697 op = gimple_call_arg (stmt, i);
2698 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2699 &def_stmt, &def, &thisarginfo.dt,
2700 &thisarginfo.vectype)
2701 || thisarginfo.dt == vect_uninitialized_def)
2702 {
2703 if (dump_enabled_p ())
2704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2705 "use not simple.\n");
2706 arginfo.release ();
2707 return false;
2708 }
2709
2710 if (thisarginfo.dt == vect_constant_def
2711 || thisarginfo.dt == vect_external_def)
2712 gcc_assert (thisarginfo.vectype == NULL_TREE);
2713 else
2714 gcc_assert (thisarginfo.vectype != NULL_TREE);
2715
2716 /* For linear arguments, the analyze phase should have saved
2717 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2718 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2719 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2720 {
2721 gcc_assert (vec_stmt);
2722 thisarginfo.linear_step
2723 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2724 thisarginfo.op
2725 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2726 /* If loop has been peeled for alignment, we need to adjust it. */
2727 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2728 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2729 if (n1 != n2)
2730 {
2731 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2732 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2733 tree opt = TREE_TYPE (thisarginfo.op);
2734 bias = fold_convert (TREE_TYPE (step), bias);
2735 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2736 thisarginfo.op
2737 = fold_build2 (POINTER_TYPE_P (opt)
2738 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2739 thisarginfo.op, bias);
2740 }
2741 }
2742 else if (!vec_stmt
2743 && thisarginfo.dt != vect_constant_def
2744 && thisarginfo.dt != vect_external_def
2745 && loop_vinfo
2746 && TREE_CODE (op) == SSA_NAME
2747 && simple_iv (loop, loop_containing_stmt (stmt), op,
2748 &iv, false)
2749 && tree_fits_shwi_p (iv.step))
2750 {
2751 thisarginfo.linear_step = tree_to_shwi (iv.step);
2752 thisarginfo.op = iv.base;
2753 }
2754 else if ((thisarginfo.dt == vect_constant_def
2755 || thisarginfo.dt == vect_external_def)
2756 && POINTER_TYPE_P (TREE_TYPE (op)))
2757 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2758
2759 arginfo.quick_push (thisarginfo);
2760 }
2761
2762 unsigned int badness = 0;
2763 struct cgraph_node *bestn = NULL;
2764 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2765 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2766 else
2767 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2768 n = n->simdclone->next_clone)
2769 {
2770 unsigned int this_badness = 0;
2771 if (n->simdclone->simdlen
2772 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2773 || n->simdclone->nargs != nargs)
2774 continue;
2775 if (n->simdclone->simdlen
2776 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2777 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2778 - exact_log2 (n->simdclone->simdlen)) * 1024;
2779 if (n->simdclone->inbranch)
2780 this_badness += 2048;
2781 int target_badness = targetm.simd_clone.usable (n);
2782 if (target_badness < 0)
2783 continue;
2784 this_badness += target_badness * 512;
2785 /* FORNOW: Have to add code to add the mask argument. */
2786 if (n->simdclone->inbranch)
2787 continue;
2788 for (i = 0; i < nargs; i++)
2789 {
2790 switch (n->simdclone->args[i].arg_type)
2791 {
2792 case SIMD_CLONE_ARG_TYPE_VECTOR:
2793 if (!useless_type_conversion_p
2794 (n->simdclone->args[i].orig_type,
2795 TREE_TYPE (gimple_call_arg (stmt, i))))
2796 i = -1;
2797 else if (arginfo[i].dt == vect_constant_def
2798 || arginfo[i].dt == vect_external_def
2799 || arginfo[i].linear_step)
2800 this_badness += 64;
2801 break;
2802 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2803 if (arginfo[i].dt != vect_constant_def
2804 && arginfo[i].dt != vect_external_def)
2805 i = -1;
2806 break;
2807 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2808 if (arginfo[i].dt == vect_constant_def
2809 || arginfo[i].dt == vect_external_def
2810 || (arginfo[i].linear_step
2811 != n->simdclone->args[i].linear_step))
2812 i = -1;
2813 break;
2814 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2815 /* FORNOW */
2816 i = -1;
2817 break;
2818 case SIMD_CLONE_ARG_TYPE_MASK:
2819 gcc_unreachable ();
2820 }
2821 if (i == (size_t) -1)
2822 break;
2823 if (n->simdclone->args[i].alignment > arginfo[i].align)
2824 {
2825 i = -1;
2826 break;
2827 }
2828 if (arginfo[i].align)
2829 this_badness += (exact_log2 (arginfo[i].align)
2830 - exact_log2 (n->simdclone->args[i].alignment));
2831 }
2832 if (i == (size_t) -1)
2833 continue;
2834 if (bestn == NULL || this_badness < badness)
2835 {
2836 bestn = n;
2837 badness = this_badness;
2838 }
2839 }
2840
2841 if (bestn == NULL)
2842 {
2843 arginfo.release ();
2844 return false;
2845 }
2846
2847 for (i = 0; i < nargs; i++)
2848 if ((arginfo[i].dt == vect_constant_def
2849 || arginfo[i].dt == vect_external_def)
2850 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2851 {
2852 arginfo[i].vectype
2853 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2854 i)));
2855 if (arginfo[i].vectype == NULL
2856 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2857 > bestn->simdclone->simdlen))
2858 {
2859 arginfo.release ();
2860 return false;
2861 }
2862 }
2863
2864 fndecl = bestn->decl;
2865 nunits = bestn->simdclone->simdlen;
2866 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2867
2868 /* If the function isn't const, only allow it in simd loops where user
2869 has asserted that at least nunits consecutive iterations can be
2870 performed using SIMD instructions. */
2871 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2872 && gimple_vuse (stmt))
2873 {
2874 arginfo.release ();
2875 return false;
2876 }
2877
2878 /* Sanity check: make sure that at least one copy of the vectorized stmt
2879 needs to be generated. */
2880 gcc_assert (ncopies >= 1);
2881
2882 if (!vec_stmt) /* transformation not required. */
2883 {
2884 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2885 for (i = 0; i < nargs; i++)
2886 if (bestn->simdclone->args[i].arg_type
2887 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2888 {
2889 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2890 + 1);
2891 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2892 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2893 ? size_type_node : TREE_TYPE (arginfo[i].op);
2894 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2896 }
2897 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2898 if (dump_enabled_p ())
2899 dump_printf_loc (MSG_NOTE, vect_location,
2900 "=== vectorizable_simd_clone_call ===\n");
2901 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2902 arginfo.release ();
2903 return true;
2904 }
2905
2906 /** Transform. **/
2907
2908 if (dump_enabled_p ())
2909 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2910
2911 /* Handle def. */
2912 scalar_dest = gimple_call_lhs (stmt);
2913 vec_dest = NULL_TREE;
2914 rtype = NULL_TREE;
2915 ratype = NULL_TREE;
2916 if (scalar_dest)
2917 {
2918 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2919 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2920 if (TREE_CODE (rtype) == ARRAY_TYPE)
2921 {
2922 ratype = rtype;
2923 rtype = TREE_TYPE (ratype);
2924 }
2925 }
2926
2927 prev_stmt_info = NULL;
2928 for (j = 0; j < ncopies; ++j)
2929 {
2930 /* Build argument list for the vectorized call. */
2931 if (j == 0)
2932 vargs.create (nargs);
2933 else
2934 vargs.truncate (0);
2935
2936 for (i = 0; i < nargs; i++)
2937 {
2938 unsigned int k, l, m, o;
2939 tree atype;
2940 op = gimple_call_arg (stmt, i);
2941 switch (bestn->simdclone->args[i].arg_type)
2942 {
2943 case SIMD_CLONE_ARG_TYPE_VECTOR:
2944 atype = bestn->simdclone->args[i].vector_type;
2945 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2946 for (m = j * o; m < (j + 1) * o; m++)
2947 {
2948 if (TYPE_VECTOR_SUBPARTS (atype)
2949 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2950 {
2951 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2952 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2953 / TYPE_VECTOR_SUBPARTS (atype));
2954 gcc_assert ((k & (k - 1)) == 0);
2955 if (m == 0)
2956 vec_oprnd0
2957 = vect_get_vec_def_for_operand (op, stmt, NULL);
2958 else
2959 {
2960 vec_oprnd0 = arginfo[i].op;
2961 if ((m & (k - 1)) == 0)
2962 vec_oprnd0
2963 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2964 vec_oprnd0);
2965 }
2966 arginfo[i].op = vec_oprnd0;
2967 vec_oprnd0
2968 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2969 size_int (prec),
2970 bitsize_int ((m & (k - 1)) * prec));
2971 new_stmt
2972 = gimple_build_assign (make_ssa_name (atype),
2973 vec_oprnd0);
2974 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2975 vargs.safe_push (gimple_assign_lhs (new_stmt));
2976 }
2977 else
2978 {
2979 k = (TYPE_VECTOR_SUBPARTS (atype)
2980 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2981 gcc_assert ((k & (k - 1)) == 0);
2982 vec<constructor_elt, va_gc> *ctor_elts;
2983 if (k != 1)
2984 vec_alloc (ctor_elts, k);
2985 else
2986 ctor_elts = NULL;
2987 for (l = 0; l < k; l++)
2988 {
2989 if (m == 0 && l == 0)
2990 vec_oprnd0
2991 = vect_get_vec_def_for_operand (op, stmt, NULL);
2992 else
2993 vec_oprnd0
2994 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2995 arginfo[i].op);
2996 arginfo[i].op = vec_oprnd0;
2997 if (k == 1)
2998 break;
2999 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3000 vec_oprnd0);
3001 }
3002 if (k == 1)
3003 vargs.safe_push (vec_oprnd0);
3004 else
3005 {
3006 vec_oprnd0 = build_constructor (atype, ctor_elts);
3007 new_stmt
3008 = gimple_build_assign (make_ssa_name (atype),
3009 vec_oprnd0);
3010 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3011 vargs.safe_push (gimple_assign_lhs (new_stmt));
3012 }
3013 }
3014 }
3015 break;
3016 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3017 vargs.safe_push (op);
3018 break;
3019 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3020 if (j == 0)
3021 {
3022 gimple_seq stmts;
3023 arginfo[i].op
3024 = force_gimple_operand (arginfo[i].op, &stmts, true,
3025 NULL_TREE);
3026 if (stmts != NULL)
3027 {
3028 basic_block new_bb;
3029 edge pe = loop_preheader_edge (loop);
3030 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3031 gcc_assert (!new_bb);
3032 }
3033 tree phi_res = copy_ssa_name (op);
3034 gphi *new_phi = create_phi_node (phi_res, loop->header);
3035 set_vinfo_for_stmt (new_phi,
3036 new_stmt_vec_info (new_phi, loop_vinfo,
3037 NULL));
3038 add_phi_arg (new_phi, arginfo[i].op,
3039 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3040 enum tree_code code
3041 = POINTER_TYPE_P (TREE_TYPE (op))
3042 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3043 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3044 ? sizetype : TREE_TYPE (op);
3045 widest_int cst
3046 = wi::mul (bestn->simdclone->args[i].linear_step,
3047 ncopies * nunits);
3048 tree tcst = wide_int_to_tree (type, cst);
3049 tree phi_arg = copy_ssa_name (op);
3050 new_stmt
3051 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3052 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3053 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3054 set_vinfo_for_stmt (new_stmt,
3055 new_stmt_vec_info (new_stmt, loop_vinfo,
3056 NULL));
3057 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3058 UNKNOWN_LOCATION);
3059 arginfo[i].op = phi_res;
3060 vargs.safe_push (phi_res);
3061 }
3062 else
3063 {
3064 enum tree_code code
3065 = POINTER_TYPE_P (TREE_TYPE (op))
3066 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3067 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3068 ? sizetype : TREE_TYPE (op);
3069 widest_int cst
3070 = wi::mul (bestn->simdclone->args[i].linear_step,
3071 j * nunits);
3072 tree tcst = wide_int_to_tree (type, cst);
3073 new_temp = make_ssa_name (TREE_TYPE (op));
3074 new_stmt = gimple_build_assign (new_temp, code,
3075 arginfo[i].op, tcst);
3076 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3077 vargs.safe_push (new_temp);
3078 }
3079 break;
3080 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3081 default:
3082 gcc_unreachable ();
3083 }
3084 }
3085
3086 new_stmt = gimple_build_call_vec (fndecl, vargs);
3087 if (vec_dest)
3088 {
3089 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3090 if (ratype)
3091 new_temp = create_tmp_var (ratype);
3092 else if (TYPE_VECTOR_SUBPARTS (vectype)
3093 == TYPE_VECTOR_SUBPARTS (rtype))
3094 new_temp = make_ssa_name (vec_dest, new_stmt);
3095 else
3096 new_temp = make_ssa_name (rtype, new_stmt);
3097 gimple_call_set_lhs (new_stmt, new_temp);
3098 }
3099 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3100
3101 if (vec_dest)
3102 {
3103 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3104 {
3105 unsigned int k, l;
3106 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3107 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3108 gcc_assert ((k & (k - 1)) == 0);
3109 for (l = 0; l < k; l++)
3110 {
3111 tree t;
3112 if (ratype)
3113 {
3114 t = build_fold_addr_expr (new_temp);
3115 t = build2 (MEM_REF, vectype, t,
3116 build_int_cst (TREE_TYPE (t),
3117 l * prec / BITS_PER_UNIT));
3118 }
3119 else
3120 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3121 size_int (prec), bitsize_int (l * prec));
3122 new_stmt
3123 = gimple_build_assign (make_ssa_name (vectype), t);
3124 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3125 if (j == 0 && l == 0)
3126 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3127 else
3128 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3129
3130 prev_stmt_info = vinfo_for_stmt (new_stmt);
3131 }
3132
3133 if (ratype)
3134 {
3135 tree clobber = build_constructor (ratype, NULL);
3136 TREE_THIS_VOLATILE (clobber) = 1;
3137 new_stmt = gimple_build_assign (new_temp, clobber);
3138 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3139 }
3140 continue;
3141 }
3142 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3143 {
3144 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3145 / TYPE_VECTOR_SUBPARTS (rtype));
3146 gcc_assert ((k & (k - 1)) == 0);
3147 if ((j & (k - 1)) == 0)
3148 vec_alloc (ret_ctor_elts, k);
3149 if (ratype)
3150 {
3151 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3152 for (m = 0; m < o; m++)
3153 {
3154 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3155 size_int (m), NULL_TREE, NULL_TREE);
3156 new_stmt
3157 = gimple_build_assign (make_ssa_name (rtype), tem);
3158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3159 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3160 gimple_assign_lhs (new_stmt));
3161 }
3162 tree clobber = build_constructor (ratype, NULL);
3163 TREE_THIS_VOLATILE (clobber) = 1;
3164 new_stmt = gimple_build_assign (new_temp, clobber);
3165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3166 }
3167 else
3168 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3169 if ((j & (k - 1)) != k - 1)
3170 continue;
3171 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3172 new_stmt
3173 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3174 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3175
3176 if ((unsigned) j == k - 1)
3177 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3178 else
3179 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3180
3181 prev_stmt_info = vinfo_for_stmt (new_stmt);
3182 continue;
3183 }
3184 else if (ratype)
3185 {
3186 tree t = build_fold_addr_expr (new_temp);
3187 t = build2 (MEM_REF, vectype, t,
3188 build_int_cst (TREE_TYPE (t), 0));
3189 new_stmt
3190 = gimple_build_assign (make_ssa_name (vec_dest), t);
3191 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3192 tree clobber = build_constructor (ratype, NULL);
3193 TREE_THIS_VOLATILE (clobber) = 1;
3194 vect_finish_stmt_generation (stmt,
3195 gimple_build_assign (new_temp,
3196 clobber), gsi);
3197 }
3198 }
3199
3200 if (j == 0)
3201 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3202 else
3203 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3204
3205 prev_stmt_info = vinfo_for_stmt (new_stmt);
3206 }
3207
3208 vargs.release ();
3209
3210 /* The call in STMT might prevent it from being removed in dce.
3211 We however cannot remove it here, due to the way the ssa name
3212 it defines is mapped to the new definition. So just replace
3213 rhs of the statement with something harmless. */
3214
3215 if (slp_node)
3216 return true;
3217
3218 if (scalar_dest)
3219 {
3220 type = TREE_TYPE (scalar_dest);
3221 if (is_pattern_stmt_p (stmt_info))
3222 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3223 else
3224 lhs = gimple_call_lhs (stmt);
3225 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3226 }
3227 else
3228 new_stmt = gimple_build_nop ();
3229 set_vinfo_for_stmt (new_stmt, stmt_info);
3230 set_vinfo_for_stmt (stmt, NULL);
3231 STMT_VINFO_STMT (stmt_info) = new_stmt;
3232 gsi_replace (gsi, new_stmt, true);
3233 unlink_stmt_vdef (stmt);
3234
3235 return true;
3236 }
3237
3238
3239 /* Function vect_gen_widened_results_half
3240
3241 Create a vector stmt whose code, type, number of arguments, and result
3242 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3243 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3244 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3245 needs to be created (DECL is a function-decl of a target-builtin).
3246 STMT is the original scalar stmt that we are vectorizing. */
3247
3248 static gimple
3249 vect_gen_widened_results_half (enum tree_code code,
3250 tree decl,
3251 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3252 tree vec_dest, gimple_stmt_iterator *gsi,
3253 gimple stmt)
3254 {
3255 gimple new_stmt;
3256 tree new_temp;
3257
3258 /* Generate half of the widened result: */
3259 if (code == CALL_EXPR)
3260 {
3261 /* Target specific support */
3262 if (op_type == binary_op)
3263 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3264 else
3265 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3266 new_temp = make_ssa_name (vec_dest, new_stmt);
3267 gimple_call_set_lhs (new_stmt, new_temp);
3268 }
3269 else
3270 {
3271 /* Generic support */
3272 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3273 if (op_type != binary_op)
3274 vec_oprnd1 = NULL;
3275 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3276 new_temp = make_ssa_name (vec_dest, new_stmt);
3277 gimple_assign_set_lhs (new_stmt, new_temp);
3278 }
3279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3280
3281 return new_stmt;
3282 }
3283
3284
3285 /* Get vectorized definitions for loop-based vectorization. For the first
3286 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3287 scalar operand), and for the rest we get a copy with
3288 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3289 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3290 The vectors are collected into VEC_OPRNDS. */
3291
3292 static void
3293 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3294 vec<tree> *vec_oprnds, int multi_step_cvt)
3295 {
3296 tree vec_oprnd;
3297
3298 /* Get first vector operand. */
3299 /* All the vector operands except the very first one (that is scalar oprnd)
3300 are stmt copies. */
3301 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3302 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3303 else
3304 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3305
3306 vec_oprnds->quick_push (vec_oprnd);
3307
3308 /* Get second vector operand. */
3309 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3310 vec_oprnds->quick_push (vec_oprnd);
3311
3312 *oprnd = vec_oprnd;
3313
3314 /* For conversion in multiple steps, continue to get operands
3315 recursively. */
3316 if (multi_step_cvt)
3317 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3318 }
3319
3320
3321 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3322 For multi-step conversions store the resulting vectors and call the function
3323 recursively. */
3324
3325 static void
3326 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3327 int multi_step_cvt, gimple stmt,
3328 vec<tree> vec_dsts,
3329 gimple_stmt_iterator *gsi,
3330 slp_tree slp_node, enum tree_code code,
3331 stmt_vec_info *prev_stmt_info)
3332 {
3333 unsigned int i;
3334 tree vop0, vop1, new_tmp, vec_dest;
3335 gimple new_stmt;
3336 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3337
3338 vec_dest = vec_dsts.pop ();
3339
3340 for (i = 0; i < vec_oprnds->length (); i += 2)
3341 {
3342 /* Create demotion operation. */
3343 vop0 = (*vec_oprnds)[i];
3344 vop1 = (*vec_oprnds)[i + 1];
3345 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3346 new_tmp = make_ssa_name (vec_dest, new_stmt);
3347 gimple_assign_set_lhs (new_stmt, new_tmp);
3348 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3349
3350 if (multi_step_cvt)
3351 /* Store the resulting vector for next recursive call. */
3352 (*vec_oprnds)[i/2] = new_tmp;
3353 else
3354 {
3355 /* This is the last step of the conversion sequence. Store the
3356 vectors in SLP_NODE or in vector info of the scalar statement
3357 (or in STMT_VINFO_RELATED_STMT chain). */
3358 if (slp_node)
3359 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3360 else
3361 {
3362 if (!*prev_stmt_info)
3363 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3364 else
3365 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3366
3367 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3368 }
3369 }
3370 }
3371
3372 /* For multi-step demotion operations we first generate demotion operations
3373 from the source type to the intermediate types, and then combine the
3374 results (stored in VEC_OPRNDS) in demotion operation to the destination
3375 type. */
3376 if (multi_step_cvt)
3377 {
3378 /* At each level of recursion we have half of the operands we had at the
3379 previous level. */
3380 vec_oprnds->truncate ((i+1)/2);
3381 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3382 stmt, vec_dsts, gsi, slp_node,
3383 VEC_PACK_TRUNC_EXPR,
3384 prev_stmt_info);
3385 }
3386
3387 vec_dsts.quick_push (vec_dest);
3388 }
3389
3390
3391 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3392 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3393 the resulting vectors and call the function recursively. */
3394
3395 static void
3396 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3397 vec<tree> *vec_oprnds1,
3398 gimple stmt, tree vec_dest,
3399 gimple_stmt_iterator *gsi,
3400 enum tree_code code1,
3401 enum tree_code code2, tree decl1,
3402 tree decl2, int op_type)
3403 {
3404 int i;
3405 tree vop0, vop1, new_tmp1, new_tmp2;
3406 gimple new_stmt1, new_stmt2;
3407 vec<tree> vec_tmp = vNULL;
3408
3409 vec_tmp.create (vec_oprnds0->length () * 2);
3410 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3411 {
3412 if (op_type == binary_op)
3413 vop1 = (*vec_oprnds1)[i];
3414 else
3415 vop1 = NULL_TREE;
3416
3417 /* Generate the two halves of promotion operation. */
3418 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3419 op_type, vec_dest, gsi, stmt);
3420 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3421 op_type, vec_dest, gsi, stmt);
3422 if (is_gimple_call (new_stmt1))
3423 {
3424 new_tmp1 = gimple_call_lhs (new_stmt1);
3425 new_tmp2 = gimple_call_lhs (new_stmt2);
3426 }
3427 else
3428 {
3429 new_tmp1 = gimple_assign_lhs (new_stmt1);
3430 new_tmp2 = gimple_assign_lhs (new_stmt2);
3431 }
3432
3433 /* Store the results for the next step. */
3434 vec_tmp.quick_push (new_tmp1);
3435 vec_tmp.quick_push (new_tmp2);
3436 }
3437
3438 vec_oprnds0->release ();
3439 *vec_oprnds0 = vec_tmp;
3440 }
3441
3442
3443 /* Check if STMT performs a conversion operation, that can be vectorized.
3444 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3445 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3446 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3447
3448 static bool
3449 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3450 gimple *vec_stmt, slp_tree slp_node)
3451 {
3452 tree vec_dest;
3453 tree scalar_dest;
3454 tree op0, op1 = NULL_TREE;
3455 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3456 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3457 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3458 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3459 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3460 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3461 tree new_temp;
3462 tree def;
3463 gimple def_stmt;
3464 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3465 gimple new_stmt = NULL;
3466 stmt_vec_info prev_stmt_info;
3467 int nunits_in;
3468 int nunits_out;
3469 tree vectype_out, vectype_in;
3470 int ncopies, i, j;
3471 tree lhs_type, rhs_type;
3472 enum { NARROW, NONE, WIDEN } modifier;
3473 vec<tree> vec_oprnds0 = vNULL;
3474 vec<tree> vec_oprnds1 = vNULL;
3475 tree vop0;
3476 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3477 int multi_step_cvt = 0;
3478 vec<tree> vec_dsts = vNULL;
3479 vec<tree> interm_types = vNULL;
3480 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3481 int op_type;
3482 machine_mode rhs_mode;
3483 unsigned short fltsz;
3484
3485 /* Is STMT a vectorizable conversion? */
3486
3487 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3488 return false;
3489
3490 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3491 return false;
3492
3493 if (!is_gimple_assign (stmt))
3494 return false;
3495
3496 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3497 return false;
3498
3499 code = gimple_assign_rhs_code (stmt);
3500 if (!CONVERT_EXPR_CODE_P (code)
3501 && code != FIX_TRUNC_EXPR
3502 && code != FLOAT_EXPR
3503 && code != WIDEN_MULT_EXPR
3504 && code != WIDEN_LSHIFT_EXPR)
3505 return false;
3506
3507 op_type = TREE_CODE_LENGTH (code);
3508
3509 /* Check types of lhs and rhs. */
3510 scalar_dest = gimple_assign_lhs (stmt);
3511 lhs_type = TREE_TYPE (scalar_dest);
3512 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3513
3514 op0 = gimple_assign_rhs1 (stmt);
3515 rhs_type = TREE_TYPE (op0);
3516
3517 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3518 && !((INTEGRAL_TYPE_P (lhs_type)
3519 && INTEGRAL_TYPE_P (rhs_type))
3520 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3521 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3522 return false;
3523
3524 if ((INTEGRAL_TYPE_P (lhs_type)
3525 && (TYPE_PRECISION (lhs_type)
3526 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3527 || (INTEGRAL_TYPE_P (rhs_type)
3528 && (TYPE_PRECISION (rhs_type)
3529 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3530 {
3531 if (dump_enabled_p ())
3532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3533 "type conversion to/from bit-precision unsupported."
3534 "\n");
3535 return false;
3536 }
3537
3538 /* Check the operands of the operation. */
3539 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3540 &def_stmt, &def, &dt[0], &vectype_in))
3541 {
3542 if (dump_enabled_p ())
3543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3544 "use not simple.\n");
3545 return false;
3546 }
3547 if (op_type == binary_op)
3548 {
3549 bool ok;
3550
3551 op1 = gimple_assign_rhs2 (stmt);
3552 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3553 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3554 OP1. */
3555 if (CONSTANT_CLASS_P (op0))
3556 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3557 &def_stmt, &def, &dt[1], &vectype_in);
3558 else
3559 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3560 &def, &dt[1]);
3561
3562 if (!ok)
3563 {
3564 if (dump_enabled_p ())
3565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3566 "use not simple.\n");
3567 return false;
3568 }
3569 }
3570
3571 /* If op0 is an external or constant defs use a vector type of
3572 the same size as the output vector type. */
3573 if (!vectype_in)
3574 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3575 if (vec_stmt)
3576 gcc_assert (vectype_in);
3577 if (!vectype_in)
3578 {
3579 if (dump_enabled_p ())
3580 {
3581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3582 "no vectype for scalar type ");
3583 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3584 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3585 }
3586
3587 return false;
3588 }
3589
3590 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3591 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3592 if (nunits_in < nunits_out)
3593 modifier = NARROW;
3594 else if (nunits_out == nunits_in)
3595 modifier = NONE;
3596 else
3597 modifier = WIDEN;
3598
3599 /* Multiple types in SLP are handled by creating the appropriate number of
3600 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3601 case of SLP. */
3602 if (slp_node || PURE_SLP_STMT (stmt_info))
3603 ncopies = 1;
3604 else if (modifier == NARROW)
3605 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3606 else
3607 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3608
3609 /* Sanity check: make sure that at least one copy of the vectorized stmt
3610 needs to be generated. */
3611 gcc_assert (ncopies >= 1);
3612
3613 /* Supportable by target? */
3614 switch (modifier)
3615 {
3616 case NONE:
3617 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3618 return false;
3619 if (supportable_convert_operation (code, vectype_out, vectype_in,
3620 &decl1, &code1))
3621 break;
3622 /* FALLTHRU */
3623 unsupported:
3624 if (dump_enabled_p ())
3625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3626 "conversion not supported by target.\n");
3627 return false;
3628
3629 case WIDEN:
3630 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3631 &code1, &code2, &multi_step_cvt,
3632 &interm_types))
3633 {
3634 /* Binary widening operation can only be supported directly by the
3635 architecture. */
3636 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3637 break;
3638 }
3639
3640 if (code != FLOAT_EXPR
3641 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3642 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3643 goto unsupported;
3644
3645 rhs_mode = TYPE_MODE (rhs_type);
3646 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3647 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3648 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3649 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3650 {
3651 cvt_type
3652 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3653 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3654 if (cvt_type == NULL_TREE)
3655 goto unsupported;
3656
3657 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3658 {
3659 if (!supportable_convert_operation (code, vectype_out,
3660 cvt_type, &decl1, &codecvt1))
3661 goto unsupported;
3662 }
3663 else if (!supportable_widening_operation (code, stmt, vectype_out,
3664 cvt_type, &codecvt1,
3665 &codecvt2, &multi_step_cvt,
3666 &interm_types))
3667 continue;
3668 else
3669 gcc_assert (multi_step_cvt == 0);
3670
3671 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3672 vectype_in, &code1, &code2,
3673 &multi_step_cvt, &interm_types))
3674 break;
3675 }
3676
3677 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3678 goto unsupported;
3679
3680 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3681 codecvt2 = ERROR_MARK;
3682 else
3683 {
3684 multi_step_cvt++;
3685 interm_types.safe_push (cvt_type);
3686 cvt_type = NULL_TREE;
3687 }
3688 break;
3689
3690 case NARROW:
3691 gcc_assert (op_type == unary_op);
3692 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3693 &code1, &multi_step_cvt,
3694 &interm_types))
3695 break;
3696
3697 if (code != FIX_TRUNC_EXPR
3698 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3699 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3700 goto unsupported;
3701
3702 rhs_mode = TYPE_MODE (rhs_type);
3703 cvt_type
3704 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3705 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3706 if (cvt_type == NULL_TREE)
3707 goto unsupported;
3708 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3709 &decl1, &codecvt1))
3710 goto unsupported;
3711 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3712 &code1, &multi_step_cvt,
3713 &interm_types))
3714 break;
3715 goto unsupported;
3716
3717 default:
3718 gcc_unreachable ();
3719 }
3720
3721 if (!vec_stmt) /* transformation not required. */
3722 {
3723 if (dump_enabled_p ())
3724 dump_printf_loc (MSG_NOTE, vect_location,
3725 "=== vectorizable_conversion ===\n");
3726 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3727 {
3728 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3729 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3730 }
3731 else if (modifier == NARROW)
3732 {
3733 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3734 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3735 }
3736 else
3737 {
3738 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3739 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3740 }
3741 interm_types.release ();
3742 return true;
3743 }
3744
3745 /** Transform. **/
3746 if (dump_enabled_p ())
3747 dump_printf_loc (MSG_NOTE, vect_location,
3748 "transform conversion. ncopies = %d.\n", ncopies);
3749
3750 if (op_type == binary_op)
3751 {
3752 if (CONSTANT_CLASS_P (op0))
3753 op0 = fold_convert (TREE_TYPE (op1), op0);
3754 else if (CONSTANT_CLASS_P (op1))
3755 op1 = fold_convert (TREE_TYPE (op0), op1);
3756 }
3757
3758 /* In case of multi-step conversion, we first generate conversion operations
3759 to the intermediate types, and then from that types to the final one.
3760 We create vector destinations for the intermediate type (TYPES) received
3761 from supportable_*_operation, and store them in the correct order
3762 for future use in vect_create_vectorized_*_stmts (). */
3763 vec_dsts.create (multi_step_cvt + 1);
3764 vec_dest = vect_create_destination_var (scalar_dest,
3765 (cvt_type && modifier == WIDEN)
3766 ? cvt_type : vectype_out);
3767 vec_dsts.quick_push (vec_dest);
3768
3769 if (multi_step_cvt)
3770 {
3771 for (i = interm_types.length () - 1;
3772 interm_types.iterate (i, &intermediate_type); i--)
3773 {
3774 vec_dest = vect_create_destination_var (scalar_dest,
3775 intermediate_type);
3776 vec_dsts.quick_push (vec_dest);
3777 }
3778 }
3779
3780 if (cvt_type)
3781 vec_dest = vect_create_destination_var (scalar_dest,
3782 modifier == WIDEN
3783 ? vectype_out : cvt_type);
3784
3785 if (!slp_node)
3786 {
3787 if (modifier == WIDEN)
3788 {
3789 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3790 if (op_type == binary_op)
3791 vec_oprnds1.create (1);
3792 }
3793 else if (modifier == NARROW)
3794 vec_oprnds0.create (
3795 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3796 }
3797 else if (code == WIDEN_LSHIFT_EXPR)
3798 vec_oprnds1.create (slp_node->vec_stmts_size);
3799
3800 last_oprnd = op0;
3801 prev_stmt_info = NULL;
3802 switch (modifier)
3803 {
3804 case NONE:
3805 for (j = 0; j < ncopies; j++)
3806 {
3807 if (j == 0)
3808 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3809 -1);
3810 else
3811 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3812
3813 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3814 {
3815 /* Arguments are ready, create the new vector stmt. */
3816 if (code1 == CALL_EXPR)
3817 {
3818 new_stmt = gimple_build_call (decl1, 1, vop0);
3819 new_temp = make_ssa_name (vec_dest, new_stmt);
3820 gimple_call_set_lhs (new_stmt, new_temp);
3821 }
3822 else
3823 {
3824 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3825 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3826 new_temp = make_ssa_name (vec_dest, new_stmt);
3827 gimple_assign_set_lhs (new_stmt, new_temp);
3828 }
3829
3830 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3831 if (slp_node)
3832 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3833 }
3834
3835 if (j == 0)
3836 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3837 else
3838 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3839 prev_stmt_info = vinfo_for_stmt (new_stmt);
3840 }
3841 break;
3842
3843 case WIDEN:
3844 /* In case the vectorization factor (VF) is bigger than the number
3845 of elements that we can fit in a vectype (nunits), we have to
3846 generate more than one vector stmt - i.e - we need to "unroll"
3847 the vector stmt by a factor VF/nunits. */
3848 for (j = 0; j < ncopies; j++)
3849 {
3850 /* Handle uses. */
3851 if (j == 0)
3852 {
3853 if (slp_node)
3854 {
3855 if (code == WIDEN_LSHIFT_EXPR)
3856 {
3857 unsigned int k;
3858
3859 vec_oprnd1 = op1;
3860 /* Store vec_oprnd1 for every vector stmt to be created
3861 for SLP_NODE. We check during the analysis that all
3862 the shift arguments are the same. */
3863 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3864 vec_oprnds1.quick_push (vec_oprnd1);
3865
3866 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3867 slp_node, -1);
3868 }
3869 else
3870 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3871 &vec_oprnds1, slp_node, -1);
3872 }
3873 else
3874 {
3875 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3876 vec_oprnds0.quick_push (vec_oprnd0);
3877 if (op_type == binary_op)
3878 {
3879 if (code == WIDEN_LSHIFT_EXPR)
3880 vec_oprnd1 = op1;
3881 else
3882 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3883 NULL);
3884 vec_oprnds1.quick_push (vec_oprnd1);
3885 }
3886 }
3887 }
3888 else
3889 {
3890 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3891 vec_oprnds0.truncate (0);
3892 vec_oprnds0.quick_push (vec_oprnd0);
3893 if (op_type == binary_op)
3894 {
3895 if (code == WIDEN_LSHIFT_EXPR)
3896 vec_oprnd1 = op1;
3897 else
3898 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3899 vec_oprnd1);
3900 vec_oprnds1.truncate (0);
3901 vec_oprnds1.quick_push (vec_oprnd1);
3902 }
3903 }
3904
3905 /* Arguments are ready. Create the new vector stmts. */
3906 for (i = multi_step_cvt; i >= 0; i--)
3907 {
3908 tree this_dest = vec_dsts[i];
3909 enum tree_code c1 = code1, c2 = code2;
3910 if (i == 0 && codecvt2 != ERROR_MARK)
3911 {
3912 c1 = codecvt1;
3913 c2 = codecvt2;
3914 }
3915 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3916 &vec_oprnds1,
3917 stmt, this_dest, gsi,
3918 c1, c2, decl1, decl2,
3919 op_type);
3920 }
3921
3922 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3923 {
3924 if (cvt_type)
3925 {
3926 if (codecvt1 == CALL_EXPR)
3927 {
3928 new_stmt = gimple_build_call (decl1, 1, vop0);
3929 new_temp = make_ssa_name (vec_dest, new_stmt);
3930 gimple_call_set_lhs (new_stmt, new_temp);
3931 }
3932 else
3933 {
3934 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3935 new_temp = make_ssa_name (vec_dest);
3936 new_stmt = gimple_build_assign (new_temp, codecvt1,
3937 vop0);
3938 }
3939
3940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3941 }
3942 else
3943 new_stmt = SSA_NAME_DEF_STMT (vop0);
3944
3945 if (slp_node)
3946 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3947 else
3948 {
3949 if (!prev_stmt_info)
3950 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3951 else
3952 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3953 prev_stmt_info = vinfo_for_stmt (new_stmt);
3954 }
3955 }
3956 }
3957
3958 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3959 break;
3960
3961 case NARROW:
3962 /* In case the vectorization factor (VF) is bigger than the number
3963 of elements that we can fit in a vectype (nunits), we have to
3964 generate more than one vector stmt - i.e - we need to "unroll"
3965 the vector stmt by a factor VF/nunits. */
3966 for (j = 0; j < ncopies; j++)
3967 {
3968 /* Handle uses. */
3969 if (slp_node)
3970 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3971 slp_node, -1);
3972 else
3973 {
3974 vec_oprnds0.truncate (0);
3975 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3976 vect_pow2 (multi_step_cvt) - 1);
3977 }
3978
3979 /* Arguments are ready. Create the new vector stmts. */
3980 if (cvt_type)
3981 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3982 {
3983 if (codecvt1 == CALL_EXPR)
3984 {
3985 new_stmt = gimple_build_call (decl1, 1, vop0);
3986 new_temp = make_ssa_name (vec_dest, new_stmt);
3987 gimple_call_set_lhs (new_stmt, new_temp);
3988 }
3989 else
3990 {
3991 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3992 new_temp = make_ssa_name (vec_dest);
3993 new_stmt = gimple_build_assign (new_temp, codecvt1,
3994 vop0);
3995 }
3996
3997 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3998 vec_oprnds0[i] = new_temp;
3999 }
4000
4001 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4002 stmt, vec_dsts, gsi,
4003 slp_node, code1,
4004 &prev_stmt_info);
4005 }
4006
4007 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4008 break;
4009 }
4010
4011 vec_oprnds0.release ();
4012 vec_oprnds1.release ();
4013 vec_dsts.release ();
4014 interm_types.release ();
4015
4016 return true;
4017 }
4018
4019
4020 /* Function vectorizable_assignment.
4021
4022 Check if STMT performs an assignment (copy) that can be vectorized.
4023 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4024 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4025 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4026
4027 static bool
4028 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4029 gimple *vec_stmt, slp_tree slp_node)
4030 {
4031 tree vec_dest;
4032 tree scalar_dest;
4033 tree op;
4034 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4035 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4036 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4037 tree new_temp;
4038 tree def;
4039 gimple def_stmt;
4040 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4041 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4042 int ncopies;
4043 int i, j;
4044 vec<tree> vec_oprnds = vNULL;
4045 tree vop;
4046 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4047 gimple new_stmt = NULL;
4048 stmt_vec_info prev_stmt_info = NULL;
4049 enum tree_code code;
4050 tree vectype_in;
4051
4052 /* Multiple types in SLP are handled by creating the appropriate number of
4053 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4054 case of SLP. */
4055 if (slp_node || PURE_SLP_STMT (stmt_info))
4056 ncopies = 1;
4057 else
4058 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4059
4060 gcc_assert (ncopies >= 1);
4061
4062 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4063 return false;
4064
4065 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4066 return false;
4067
4068 /* Is vectorizable assignment? */
4069 if (!is_gimple_assign (stmt))
4070 return false;
4071
4072 scalar_dest = gimple_assign_lhs (stmt);
4073 if (TREE_CODE (scalar_dest) != SSA_NAME)
4074 return false;
4075
4076 code = gimple_assign_rhs_code (stmt);
4077 if (gimple_assign_single_p (stmt)
4078 || code == PAREN_EXPR
4079 || CONVERT_EXPR_CODE_P (code))
4080 op = gimple_assign_rhs1 (stmt);
4081 else
4082 return false;
4083
4084 if (code == VIEW_CONVERT_EXPR)
4085 op = TREE_OPERAND (op, 0);
4086
4087 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4088 &def_stmt, &def, &dt[0], &vectype_in))
4089 {
4090 if (dump_enabled_p ())
4091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4092 "use not simple.\n");
4093 return false;
4094 }
4095
4096 /* We can handle NOP_EXPR conversions that do not change the number
4097 of elements or the vector size. */
4098 if ((CONVERT_EXPR_CODE_P (code)
4099 || code == VIEW_CONVERT_EXPR)
4100 && (!vectype_in
4101 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4102 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4103 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4104 return false;
4105
4106 /* We do not handle bit-precision changes. */
4107 if ((CONVERT_EXPR_CODE_P (code)
4108 || code == VIEW_CONVERT_EXPR)
4109 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4110 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4111 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4112 || ((TYPE_PRECISION (TREE_TYPE (op))
4113 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4114 /* But a conversion that does not change the bit-pattern is ok. */
4115 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4116 > TYPE_PRECISION (TREE_TYPE (op)))
4117 && TYPE_UNSIGNED (TREE_TYPE (op))))
4118 {
4119 if (dump_enabled_p ())
4120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4121 "type conversion to/from bit-precision "
4122 "unsupported.\n");
4123 return false;
4124 }
4125
4126 if (!vec_stmt) /* transformation not required. */
4127 {
4128 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4129 if (dump_enabled_p ())
4130 dump_printf_loc (MSG_NOTE, vect_location,
4131 "=== vectorizable_assignment ===\n");
4132 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4133 return true;
4134 }
4135
4136 /** Transform. **/
4137 if (dump_enabled_p ())
4138 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4139
4140 /* Handle def. */
4141 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4142
4143 /* Handle use. */
4144 for (j = 0; j < ncopies; j++)
4145 {
4146 /* Handle uses. */
4147 if (j == 0)
4148 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4149 else
4150 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4151
4152 /* Arguments are ready. create the new vector stmt. */
4153 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4154 {
4155 if (CONVERT_EXPR_CODE_P (code)
4156 || code == VIEW_CONVERT_EXPR)
4157 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4158 new_stmt = gimple_build_assign (vec_dest, vop);
4159 new_temp = make_ssa_name (vec_dest, new_stmt);
4160 gimple_assign_set_lhs (new_stmt, new_temp);
4161 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4162 if (slp_node)
4163 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4164 }
4165
4166 if (slp_node)
4167 continue;
4168
4169 if (j == 0)
4170 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4171 else
4172 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4173
4174 prev_stmt_info = vinfo_for_stmt (new_stmt);
4175 }
4176
4177 vec_oprnds.release ();
4178 return true;
4179 }
4180
4181
4182 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4183 either as shift by a scalar or by a vector. */
4184
4185 bool
4186 vect_supportable_shift (enum tree_code code, tree scalar_type)
4187 {
4188
4189 machine_mode vec_mode;
4190 optab optab;
4191 int icode;
4192 tree vectype;
4193
4194 vectype = get_vectype_for_scalar_type (scalar_type);
4195 if (!vectype)
4196 return false;
4197
4198 optab = optab_for_tree_code (code, vectype, optab_scalar);
4199 if (!optab
4200 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4201 {
4202 optab = optab_for_tree_code (code, vectype, optab_vector);
4203 if (!optab
4204 || (optab_handler (optab, TYPE_MODE (vectype))
4205 == CODE_FOR_nothing))
4206 return false;
4207 }
4208
4209 vec_mode = TYPE_MODE (vectype);
4210 icode = (int) optab_handler (optab, vec_mode);
4211 if (icode == CODE_FOR_nothing)
4212 return false;
4213
4214 return true;
4215 }
4216
4217
4218 /* Function vectorizable_shift.
4219
4220 Check if STMT performs a shift operation that can be vectorized.
4221 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4222 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4223 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4224
4225 static bool
4226 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4227 gimple *vec_stmt, slp_tree slp_node)
4228 {
4229 tree vec_dest;
4230 tree scalar_dest;
4231 tree op0, op1 = NULL;
4232 tree vec_oprnd1 = NULL_TREE;
4233 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4234 tree vectype;
4235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4236 enum tree_code code;
4237 machine_mode vec_mode;
4238 tree new_temp;
4239 optab optab;
4240 int icode;
4241 machine_mode optab_op2_mode;
4242 tree def;
4243 gimple def_stmt;
4244 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4245 gimple new_stmt = NULL;
4246 stmt_vec_info prev_stmt_info;
4247 int nunits_in;
4248 int nunits_out;
4249 tree vectype_out;
4250 tree op1_vectype;
4251 int ncopies;
4252 int j, i;
4253 vec<tree> vec_oprnds0 = vNULL;
4254 vec<tree> vec_oprnds1 = vNULL;
4255 tree vop0, vop1;
4256 unsigned int k;
4257 bool scalar_shift_arg = true;
4258 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4259 int vf;
4260
4261 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4262 return false;
4263
4264 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4265 return false;
4266
4267 /* Is STMT a vectorizable binary/unary operation? */
4268 if (!is_gimple_assign (stmt))
4269 return false;
4270
4271 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4272 return false;
4273
4274 code = gimple_assign_rhs_code (stmt);
4275
4276 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4277 || code == RROTATE_EXPR))
4278 return false;
4279
4280 scalar_dest = gimple_assign_lhs (stmt);
4281 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4282 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4283 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4284 {
4285 if (dump_enabled_p ())
4286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4287 "bit-precision shifts not supported.\n");
4288 return false;
4289 }
4290
4291 op0 = gimple_assign_rhs1 (stmt);
4292 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4293 &def_stmt, &def, &dt[0], &vectype))
4294 {
4295 if (dump_enabled_p ())
4296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4297 "use not simple.\n");
4298 return false;
4299 }
4300 /* If op0 is an external or constant def use a vector type with
4301 the same size as the output vector type. */
4302 if (!vectype)
4303 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4304 if (vec_stmt)
4305 gcc_assert (vectype);
4306 if (!vectype)
4307 {
4308 if (dump_enabled_p ())
4309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4310 "no vectype for scalar type\n");
4311 return false;
4312 }
4313
4314 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4315 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4316 if (nunits_out != nunits_in)
4317 return false;
4318
4319 op1 = gimple_assign_rhs2 (stmt);
4320 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4321 &def, &dt[1], &op1_vectype))
4322 {
4323 if (dump_enabled_p ())
4324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4325 "use not simple.\n");
4326 return false;
4327 }
4328
4329 if (loop_vinfo)
4330 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4331 else
4332 vf = 1;
4333
4334 /* Multiple types in SLP are handled by creating the appropriate number of
4335 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4336 case of SLP. */
4337 if (slp_node || PURE_SLP_STMT (stmt_info))
4338 ncopies = 1;
4339 else
4340 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4341
4342 gcc_assert (ncopies >= 1);
4343
4344 /* Determine whether the shift amount is a vector, or scalar. If the
4345 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4346
4347 if (dt[1] == vect_internal_def && !slp_node)
4348 scalar_shift_arg = false;
4349 else if (dt[1] == vect_constant_def
4350 || dt[1] == vect_external_def
4351 || dt[1] == vect_internal_def)
4352 {
4353 /* In SLP, need to check whether the shift count is the same,
4354 in loops if it is a constant or invariant, it is always
4355 a scalar shift. */
4356 if (slp_node)
4357 {
4358 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4359 gimple slpstmt;
4360
4361 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4362 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4363 scalar_shift_arg = false;
4364 }
4365 }
4366 else
4367 {
4368 if (dump_enabled_p ())
4369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4370 "operand mode requires invariant argument.\n");
4371 return false;
4372 }
4373
4374 /* Vector shifted by vector. */
4375 if (!scalar_shift_arg)
4376 {
4377 optab = optab_for_tree_code (code, vectype, optab_vector);
4378 if (dump_enabled_p ())
4379 dump_printf_loc (MSG_NOTE, vect_location,
4380 "vector/vector shift/rotate found.\n");
4381
4382 if (!op1_vectype)
4383 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4384 if (op1_vectype == NULL_TREE
4385 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4386 {
4387 if (dump_enabled_p ())
4388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4389 "unusable type for last operand in"
4390 " vector/vector shift/rotate.\n");
4391 return false;
4392 }
4393 }
4394 /* See if the machine has a vector shifted by scalar insn and if not
4395 then see if it has a vector shifted by vector insn. */
4396 else
4397 {
4398 optab = optab_for_tree_code (code, vectype, optab_scalar);
4399 if (optab
4400 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4401 {
4402 if (dump_enabled_p ())
4403 dump_printf_loc (MSG_NOTE, vect_location,
4404 "vector/scalar shift/rotate found.\n");
4405 }
4406 else
4407 {
4408 optab = optab_for_tree_code (code, vectype, optab_vector);
4409 if (optab
4410 && (optab_handler (optab, TYPE_MODE (vectype))
4411 != CODE_FOR_nothing))
4412 {
4413 scalar_shift_arg = false;
4414
4415 if (dump_enabled_p ())
4416 dump_printf_loc (MSG_NOTE, vect_location,
4417 "vector/vector shift/rotate found.\n");
4418
4419 /* Unlike the other binary operators, shifts/rotates have
4420 the rhs being int, instead of the same type as the lhs,
4421 so make sure the scalar is the right type if we are
4422 dealing with vectors of long long/long/short/char. */
4423 if (dt[1] == vect_constant_def)
4424 op1 = fold_convert (TREE_TYPE (vectype), op1);
4425 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4426 TREE_TYPE (op1)))
4427 {
4428 if (slp_node
4429 && TYPE_MODE (TREE_TYPE (vectype))
4430 != TYPE_MODE (TREE_TYPE (op1)))
4431 {
4432 if (dump_enabled_p ())
4433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4434 "unusable type for last operand in"
4435 " vector/vector shift/rotate.\n");
4436 return false;
4437 }
4438 if (vec_stmt && !slp_node)
4439 {
4440 op1 = fold_convert (TREE_TYPE (vectype), op1);
4441 op1 = vect_init_vector (stmt, op1,
4442 TREE_TYPE (vectype), NULL);
4443 }
4444 }
4445 }
4446 }
4447 }
4448
4449 /* Supportable by target? */
4450 if (!optab)
4451 {
4452 if (dump_enabled_p ())
4453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4454 "no optab.\n");
4455 return false;
4456 }
4457 vec_mode = TYPE_MODE (vectype);
4458 icode = (int) optab_handler (optab, vec_mode);
4459 if (icode == CODE_FOR_nothing)
4460 {
4461 if (dump_enabled_p ())
4462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4463 "op not supported by target.\n");
4464 /* Check only during analysis. */
4465 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4466 || (vf < vect_min_worthwhile_factor (code)
4467 && !vec_stmt))
4468 return false;
4469 if (dump_enabled_p ())
4470 dump_printf_loc (MSG_NOTE, vect_location,
4471 "proceeding using word mode.\n");
4472 }
4473
4474 /* Worthwhile without SIMD support? Check only during analysis. */
4475 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4476 && vf < vect_min_worthwhile_factor (code)
4477 && !vec_stmt)
4478 {
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4481 "not worthwhile without SIMD support.\n");
4482 return false;
4483 }
4484
4485 if (!vec_stmt) /* transformation not required. */
4486 {
4487 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4488 if (dump_enabled_p ())
4489 dump_printf_loc (MSG_NOTE, vect_location,
4490 "=== vectorizable_shift ===\n");
4491 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4492 return true;
4493 }
4494
4495 /** Transform. **/
4496
4497 if (dump_enabled_p ())
4498 dump_printf_loc (MSG_NOTE, vect_location,
4499 "transform binary/unary operation.\n");
4500
4501 /* Handle def. */
4502 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4503
4504 prev_stmt_info = NULL;
4505 for (j = 0; j < ncopies; j++)
4506 {
4507 /* Handle uses. */
4508 if (j == 0)
4509 {
4510 if (scalar_shift_arg)
4511 {
4512 /* Vector shl and shr insn patterns can be defined with scalar
4513 operand 2 (shift operand). In this case, use constant or loop
4514 invariant op1 directly, without extending it to vector mode
4515 first. */
4516 optab_op2_mode = insn_data[icode].operand[2].mode;
4517 if (!VECTOR_MODE_P (optab_op2_mode))
4518 {
4519 if (dump_enabled_p ())
4520 dump_printf_loc (MSG_NOTE, vect_location,
4521 "operand 1 using scalar mode.\n");
4522 vec_oprnd1 = op1;
4523 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4524 vec_oprnds1.quick_push (vec_oprnd1);
4525 if (slp_node)
4526 {
4527 /* Store vec_oprnd1 for every vector stmt to be created
4528 for SLP_NODE. We check during the analysis that all
4529 the shift arguments are the same.
4530 TODO: Allow different constants for different vector
4531 stmts generated for an SLP instance. */
4532 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4533 vec_oprnds1.quick_push (vec_oprnd1);
4534 }
4535 }
4536 }
4537
4538 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4539 (a special case for certain kind of vector shifts); otherwise,
4540 operand 1 should be of a vector type (the usual case). */
4541 if (vec_oprnd1)
4542 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4543 slp_node, -1);
4544 else
4545 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4546 slp_node, -1);
4547 }
4548 else
4549 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4550
4551 /* Arguments are ready. Create the new vector stmt. */
4552 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4553 {
4554 vop1 = vec_oprnds1[i];
4555 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4556 new_temp = make_ssa_name (vec_dest, new_stmt);
4557 gimple_assign_set_lhs (new_stmt, new_temp);
4558 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4559 if (slp_node)
4560 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4561 }
4562
4563 if (slp_node)
4564 continue;
4565
4566 if (j == 0)
4567 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4568 else
4569 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4570 prev_stmt_info = vinfo_for_stmt (new_stmt);
4571 }
4572
4573 vec_oprnds0.release ();
4574 vec_oprnds1.release ();
4575
4576 return true;
4577 }
4578
4579
4580 /* Function vectorizable_operation.
4581
4582 Check if STMT performs a binary, unary or ternary operation that can
4583 be vectorized.
4584 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4585 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4586 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4587
4588 static bool
4589 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4590 gimple *vec_stmt, slp_tree slp_node)
4591 {
4592 tree vec_dest;
4593 tree scalar_dest;
4594 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4595 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4596 tree vectype;
4597 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4598 enum tree_code code;
4599 machine_mode vec_mode;
4600 tree new_temp;
4601 int op_type;
4602 optab optab;
4603 int icode;
4604 tree def;
4605 gimple def_stmt;
4606 enum vect_def_type dt[3]
4607 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4608 gimple new_stmt = NULL;
4609 stmt_vec_info prev_stmt_info;
4610 int nunits_in;
4611 int nunits_out;
4612 tree vectype_out;
4613 int ncopies;
4614 int j, i;
4615 vec<tree> vec_oprnds0 = vNULL;
4616 vec<tree> vec_oprnds1 = vNULL;
4617 vec<tree> vec_oprnds2 = vNULL;
4618 tree vop0, vop1, vop2;
4619 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4620 int vf;
4621
4622 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4623 return false;
4624
4625 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4626 return false;
4627
4628 /* Is STMT a vectorizable binary/unary operation? */
4629 if (!is_gimple_assign (stmt))
4630 return false;
4631
4632 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4633 return false;
4634
4635 code = gimple_assign_rhs_code (stmt);
4636
4637 /* For pointer addition, we should use the normal plus for
4638 the vector addition. */
4639 if (code == POINTER_PLUS_EXPR)
4640 code = PLUS_EXPR;
4641
4642 /* Support only unary or binary operations. */
4643 op_type = TREE_CODE_LENGTH (code);
4644 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4645 {
4646 if (dump_enabled_p ())
4647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4648 "num. args = %d (not unary/binary/ternary op).\n",
4649 op_type);
4650 return false;
4651 }
4652
4653 scalar_dest = gimple_assign_lhs (stmt);
4654 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4655
4656 /* Most operations cannot handle bit-precision types without extra
4657 truncations. */
4658 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4659 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4660 /* Exception are bitwise binary operations. */
4661 && code != BIT_IOR_EXPR
4662 && code != BIT_XOR_EXPR
4663 && code != BIT_AND_EXPR)
4664 {
4665 if (dump_enabled_p ())
4666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4667 "bit-precision arithmetic not supported.\n");
4668 return false;
4669 }
4670
4671 op0 = gimple_assign_rhs1 (stmt);
4672 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4673 &def_stmt, &def, &dt[0], &vectype))
4674 {
4675 if (dump_enabled_p ())
4676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4677 "use not simple.\n");
4678 return false;
4679 }
4680 /* If op0 is an external or constant def use a vector type with
4681 the same size as the output vector type. */
4682 if (!vectype)
4683 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4684 if (vec_stmt)
4685 gcc_assert (vectype);
4686 if (!vectype)
4687 {
4688 if (dump_enabled_p ())
4689 {
4690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4691 "no vectype for scalar type ");
4692 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4693 TREE_TYPE (op0));
4694 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4695 }
4696
4697 return false;
4698 }
4699
4700 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4701 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4702 if (nunits_out != nunits_in)
4703 return false;
4704
4705 if (op_type == binary_op || op_type == ternary_op)
4706 {
4707 op1 = gimple_assign_rhs2 (stmt);
4708 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4709 &def, &dt[1]))
4710 {
4711 if (dump_enabled_p ())
4712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4713 "use not simple.\n");
4714 return false;
4715 }
4716 }
4717 if (op_type == ternary_op)
4718 {
4719 op2 = gimple_assign_rhs3 (stmt);
4720 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4721 &def, &dt[2]))
4722 {
4723 if (dump_enabled_p ())
4724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4725 "use not simple.\n");
4726 return false;
4727 }
4728 }
4729
4730 if (loop_vinfo)
4731 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4732 else
4733 vf = 1;
4734
4735 /* Multiple types in SLP are handled by creating the appropriate number of
4736 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4737 case of SLP. */
4738 if (slp_node || PURE_SLP_STMT (stmt_info))
4739 ncopies = 1;
4740 else
4741 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4742
4743 gcc_assert (ncopies >= 1);
4744
4745 /* Shifts are handled in vectorizable_shift (). */
4746 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4747 || code == RROTATE_EXPR)
4748 return false;
4749
4750 /* Supportable by target? */
4751
4752 vec_mode = TYPE_MODE (vectype);
4753 if (code == MULT_HIGHPART_EXPR)
4754 {
4755 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4756 icode = LAST_INSN_CODE;
4757 else
4758 icode = CODE_FOR_nothing;
4759 }
4760 else
4761 {
4762 optab = optab_for_tree_code (code, vectype, optab_default);
4763 if (!optab)
4764 {
4765 if (dump_enabled_p ())
4766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4767 "no optab.\n");
4768 return false;
4769 }
4770 icode = (int) optab_handler (optab, vec_mode);
4771 }
4772
4773 if (icode == CODE_FOR_nothing)
4774 {
4775 if (dump_enabled_p ())
4776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4777 "op not supported by target.\n");
4778 /* Check only during analysis. */
4779 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4780 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4781 return false;
4782 if (dump_enabled_p ())
4783 dump_printf_loc (MSG_NOTE, vect_location,
4784 "proceeding using word mode.\n");
4785 }
4786
4787 /* Worthwhile without SIMD support? Check only during analysis. */
4788 if (!VECTOR_MODE_P (vec_mode)
4789 && !vec_stmt
4790 && vf < vect_min_worthwhile_factor (code))
4791 {
4792 if (dump_enabled_p ())
4793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4794 "not worthwhile without SIMD support.\n");
4795 return false;
4796 }
4797
4798 if (!vec_stmt) /* transformation not required. */
4799 {
4800 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4801 if (dump_enabled_p ())
4802 dump_printf_loc (MSG_NOTE, vect_location,
4803 "=== vectorizable_operation ===\n");
4804 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4805 return true;
4806 }
4807
4808 /** Transform. **/
4809
4810 if (dump_enabled_p ())
4811 dump_printf_loc (MSG_NOTE, vect_location,
4812 "transform binary/unary operation.\n");
4813
4814 /* Handle def. */
4815 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4816
4817 /* In case the vectorization factor (VF) is bigger than the number
4818 of elements that we can fit in a vectype (nunits), we have to generate
4819 more than one vector stmt - i.e - we need to "unroll" the
4820 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4821 from one copy of the vector stmt to the next, in the field
4822 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4823 stages to find the correct vector defs to be used when vectorizing
4824 stmts that use the defs of the current stmt. The example below
4825 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4826 we need to create 4 vectorized stmts):
4827
4828 before vectorization:
4829 RELATED_STMT VEC_STMT
4830 S1: x = memref - -
4831 S2: z = x + 1 - -
4832
4833 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4834 there):
4835 RELATED_STMT VEC_STMT
4836 VS1_0: vx0 = memref0 VS1_1 -
4837 VS1_1: vx1 = memref1 VS1_2 -
4838 VS1_2: vx2 = memref2 VS1_3 -
4839 VS1_3: vx3 = memref3 - -
4840 S1: x = load - VS1_0
4841 S2: z = x + 1 - -
4842
4843 step2: vectorize stmt S2 (done here):
4844 To vectorize stmt S2 we first need to find the relevant vector
4845 def for the first operand 'x'. This is, as usual, obtained from
4846 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4847 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4848 relevant vector def 'vx0'. Having found 'vx0' we can generate
4849 the vector stmt VS2_0, and as usual, record it in the
4850 STMT_VINFO_VEC_STMT of stmt S2.
4851 When creating the second copy (VS2_1), we obtain the relevant vector
4852 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4853 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4854 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4855 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4856 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4857 chain of stmts and pointers:
4858 RELATED_STMT VEC_STMT
4859 VS1_0: vx0 = memref0 VS1_1 -
4860 VS1_1: vx1 = memref1 VS1_2 -
4861 VS1_2: vx2 = memref2 VS1_3 -
4862 VS1_3: vx3 = memref3 - -
4863 S1: x = load - VS1_0
4864 VS2_0: vz0 = vx0 + v1 VS2_1 -
4865 VS2_1: vz1 = vx1 + v1 VS2_2 -
4866 VS2_2: vz2 = vx2 + v1 VS2_3 -
4867 VS2_3: vz3 = vx3 + v1 - -
4868 S2: z = x + 1 - VS2_0 */
4869
4870 prev_stmt_info = NULL;
4871 for (j = 0; j < ncopies; j++)
4872 {
4873 /* Handle uses. */
4874 if (j == 0)
4875 {
4876 if (op_type == binary_op || op_type == ternary_op)
4877 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4878 slp_node, -1);
4879 else
4880 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4881 slp_node, -1);
4882 if (op_type == ternary_op)
4883 {
4884 vec_oprnds2.create (1);
4885 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4886 stmt,
4887 NULL));
4888 }
4889 }
4890 else
4891 {
4892 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4893 if (op_type == ternary_op)
4894 {
4895 tree vec_oprnd = vec_oprnds2.pop ();
4896 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4897 vec_oprnd));
4898 }
4899 }
4900
4901 /* Arguments are ready. Create the new vector stmt. */
4902 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4903 {
4904 vop1 = ((op_type == binary_op || op_type == ternary_op)
4905 ? vec_oprnds1[i] : NULL_TREE);
4906 vop2 = ((op_type == ternary_op)
4907 ? vec_oprnds2[i] : NULL_TREE);
4908 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4909 new_temp = make_ssa_name (vec_dest, new_stmt);
4910 gimple_assign_set_lhs (new_stmt, new_temp);
4911 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4912 if (slp_node)
4913 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4914 }
4915
4916 if (slp_node)
4917 continue;
4918
4919 if (j == 0)
4920 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4921 else
4922 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4923 prev_stmt_info = vinfo_for_stmt (new_stmt);
4924 }
4925
4926 vec_oprnds0.release ();
4927 vec_oprnds1.release ();
4928 vec_oprnds2.release ();
4929
4930 return true;
4931 }
4932
4933 /* A helper function to ensure data reference DR's base alignment
4934 for STMT_INFO. */
4935
4936 static void
4937 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4938 {
4939 if (!dr->aux)
4940 return;
4941
4942 if (((dataref_aux *)dr->aux)->base_misaligned)
4943 {
4944 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4945 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4946
4947 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4948 DECL_USER_ALIGN (base_decl) = 1;
4949 ((dataref_aux *)dr->aux)->base_misaligned = false;
4950 }
4951 }
4952
4953
4954 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4955 reversal of the vector elements. If that is impossible to do,
4956 returns NULL. */
4957
4958 static tree
4959 perm_mask_for_reverse (tree vectype)
4960 {
4961 int i, nunits;
4962 unsigned char *sel;
4963
4964 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4965 sel = XALLOCAVEC (unsigned char, nunits);
4966
4967 for (i = 0; i < nunits; ++i)
4968 sel[i] = nunits - 1 - i;
4969
4970 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4971 return NULL_TREE;
4972 return vect_gen_perm_mask_checked (vectype, sel);
4973 }
4974
4975 /* Function vectorizable_store.
4976
4977 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4978 can be vectorized.
4979 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4980 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4981 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4982
4983 static bool
4984 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4985 slp_tree slp_node)
4986 {
4987 tree scalar_dest;
4988 tree data_ref;
4989 tree op;
4990 tree vec_oprnd = NULL_TREE;
4991 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4992 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4993 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4994 tree elem_type;
4995 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4996 struct loop *loop = NULL;
4997 machine_mode vec_mode;
4998 tree dummy;
4999 enum dr_alignment_support alignment_support_scheme;
5000 tree def;
5001 gimple def_stmt;
5002 enum vect_def_type dt;
5003 stmt_vec_info prev_stmt_info = NULL;
5004 tree dataref_ptr = NULL_TREE;
5005 tree dataref_offset = NULL_TREE;
5006 gimple ptr_incr = NULL;
5007 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5008 int ncopies;
5009 int j;
5010 gimple next_stmt, first_stmt = NULL;
5011 bool grouped_store = false;
5012 bool store_lanes_p = false;
5013 unsigned int group_size, i;
5014 vec<tree> dr_chain = vNULL;
5015 vec<tree> oprnds = vNULL;
5016 vec<tree> result_chain = vNULL;
5017 bool inv_p;
5018 bool negative = false;
5019 tree offset = NULL_TREE;
5020 vec<tree> vec_oprnds = vNULL;
5021 bool slp = (slp_node != NULL);
5022 unsigned int vec_num;
5023 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5024 tree aggr_type;
5025
5026 if (loop_vinfo)
5027 loop = LOOP_VINFO_LOOP (loop_vinfo);
5028
5029 /* Multiple types in SLP are handled by creating the appropriate number of
5030 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5031 case of SLP. */
5032 if (slp || PURE_SLP_STMT (stmt_info))
5033 ncopies = 1;
5034 else
5035 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5036
5037 gcc_assert (ncopies >= 1);
5038
5039 /* FORNOW. This restriction should be relaxed. */
5040 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5041 {
5042 if (dump_enabled_p ())
5043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5044 "multiple types in nested loop.\n");
5045 return false;
5046 }
5047
5048 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5049 return false;
5050
5051 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5052 return false;
5053
5054 /* Is vectorizable store? */
5055
5056 if (!is_gimple_assign (stmt))
5057 return false;
5058
5059 scalar_dest = gimple_assign_lhs (stmt);
5060 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5061 && is_pattern_stmt_p (stmt_info))
5062 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5063 if (TREE_CODE (scalar_dest) != ARRAY_REF
5064 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5065 && TREE_CODE (scalar_dest) != INDIRECT_REF
5066 && TREE_CODE (scalar_dest) != COMPONENT_REF
5067 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5068 && TREE_CODE (scalar_dest) != REALPART_EXPR
5069 && TREE_CODE (scalar_dest) != MEM_REF)
5070 return false;
5071
5072 gcc_assert (gimple_assign_single_p (stmt));
5073 op = gimple_assign_rhs1 (stmt);
5074 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5075 &def, &dt))
5076 {
5077 if (dump_enabled_p ())
5078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5079 "use not simple.\n");
5080 return false;
5081 }
5082
5083 elem_type = TREE_TYPE (vectype);
5084 vec_mode = TYPE_MODE (vectype);
5085
5086 /* FORNOW. In some cases can vectorize even if data-type not supported
5087 (e.g. - array initialization with 0). */
5088 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5089 return false;
5090
5091 if (!STMT_VINFO_DATA_REF (stmt_info))
5092 return false;
5093
5094 negative =
5095 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5096 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5097 size_zero_node) < 0;
5098 if (negative && ncopies > 1)
5099 {
5100 if (dump_enabled_p ())
5101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5102 "multiple types with negative step.\n");
5103 return false;
5104 }
5105
5106 if (negative)
5107 {
5108 gcc_assert (!grouped_store);
5109 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5110 if (alignment_support_scheme != dr_aligned
5111 && alignment_support_scheme != dr_unaligned_supported)
5112 {
5113 if (dump_enabled_p ())
5114 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5115 "negative step but alignment required.\n");
5116 return false;
5117 }
5118 if (dt != vect_constant_def
5119 && dt != vect_external_def
5120 && !perm_mask_for_reverse (vectype))
5121 {
5122 if (dump_enabled_p ())
5123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5124 "negative step and reversing not supported.\n");
5125 return false;
5126 }
5127 }
5128
5129 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5130 {
5131 grouped_store = true;
5132 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5133 if (!slp && !PURE_SLP_STMT (stmt_info))
5134 {
5135 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5136 if (vect_store_lanes_supported (vectype, group_size))
5137 store_lanes_p = true;
5138 else if (!vect_grouped_store_supported (vectype, group_size))
5139 return false;
5140 }
5141
5142 if (first_stmt == stmt)
5143 {
5144 /* STMT is the leader of the group. Check the operands of all the
5145 stmts of the group. */
5146 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5147 while (next_stmt)
5148 {
5149 gcc_assert (gimple_assign_single_p (next_stmt));
5150 op = gimple_assign_rhs1 (next_stmt);
5151 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5152 &def_stmt, &def, &dt))
5153 {
5154 if (dump_enabled_p ())
5155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5156 "use not simple.\n");
5157 return false;
5158 }
5159 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5160 }
5161 }
5162 }
5163
5164 if (!vec_stmt) /* transformation not required. */
5165 {
5166 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5167 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5168 NULL, NULL, NULL);
5169 return true;
5170 }
5171
5172 /** Transform. **/
5173
5174 ensure_base_align (stmt_info, dr);
5175
5176 if (grouped_store)
5177 {
5178 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5179 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5180
5181 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5182
5183 /* FORNOW */
5184 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5185
5186 /* We vectorize all the stmts of the interleaving group when we
5187 reach the last stmt in the group. */
5188 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5189 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5190 && !slp)
5191 {
5192 *vec_stmt = NULL;
5193 return true;
5194 }
5195
5196 if (slp)
5197 {
5198 grouped_store = false;
5199 /* VEC_NUM is the number of vect stmts to be created for this
5200 group. */
5201 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5202 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5203 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5204 op = gimple_assign_rhs1 (first_stmt);
5205 }
5206 else
5207 /* VEC_NUM is the number of vect stmts to be created for this
5208 group. */
5209 vec_num = group_size;
5210 }
5211 else
5212 {
5213 first_stmt = stmt;
5214 first_dr = dr;
5215 group_size = vec_num = 1;
5216 }
5217
5218 if (dump_enabled_p ())
5219 dump_printf_loc (MSG_NOTE, vect_location,
5220 "transform store. ncopies = %d\n", ncopies);
5221
5222 dr_chain.create (group_size);
5223 oprnds.create (group_size);
5224
5225 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5226 gcc_assert (alignment_support_scheme);
5227 /* Targets with store-lane instructions must not require explicit
5228 realignment. */
5229 gcc_assert (!store_lanes_p
5230 || alignment_support_scheme == dr_aligned
5231 || alignment_support_scheme == dr_unaligned_supported);
5232
5233 if (negative)
5234 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5235
5236 if (store_lanes_p)
5237 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5238 else
5239 aggr_type = vectype;
5240
5241 /* In case the vectorization factor (VF) is bigger than the number
5242 of elements that we can fit in a vectype (nunits), we have to generate
5243 more than one vector stmt - i.e - we need to "unroll" the
5244 vector stmt by a factor VF/nunits. For more details see documentation in
5245 vect_get_vec_def_for_copy_stmt. */
5246
5247 /* In case of interleaving (non-unit grouped access):
5248
5249 S1: &base + 2 = x2
5250 S2: &base = x0
5251 S3: &base + 1 = x1
5252 S4: &base + 3 = x3
5253
5254 We create vectorized stores starting from base address (the access of the
5255 first stmt in the chain (S2 in the above example), when the last store stmt
5256 of the chain (S4) is reached:
5257
5258 VS1: &base = vx2
5259 VS2: &base + vec_size*1 = vx0
5260 VS3: &base + vec_size*2 = vx1
5261 VS4: &base + vec_size*3 = vx3
5262
5263 Then permutation statements are generated:
5264
5265 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5266 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5267 ...
5268
5269 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5270 (the order of the data-refs in the output of vect_permute_store_chain
5271 corresponds to the order of scalar stmts in the interleaving chain - see
5272 the documentation of vect_permute_store_chain()).
5273
5274 In case of both multiple types and interleaving, above vector stores and
5275 permutation stmts are created for every copy. The result vector stmts are
5276 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5277 STMT_VINFO_RELATED_STMT for the next copies.
5278 */
5279
5280 prev_stmt_info = NULL;
5281 for (j = 0; j < ncopies; j++)
5282 {
5283 gimple new_stmt;
5284
5285 if (j == 0)
5286 {
5287 if (slp)
5288 {
5289 /* Get vectorized arguments for SLP_NODE. */
5290 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5291 NULL, slp_node, -1);
5292
5293 vec_oprnd = vec_oprnds[0];
5294 }
5295 else
5296 {
5297 /* For interleaved stores we collect vectorized defs for all the
5298 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5299 used as an input to vect_permute_store_chain(), and OPRNDS as
5300 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5301
5302 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5303 OPRNDS are of size 1. */
5304 next_stmt = first_stmt;
5305 for (i = 0; i < group_size; i++)
5306 {
5307 /* Since gaps are not supported for interleaved stores,
5308 GROUP_SIZE is the exact number of stmts in the chain.
5309 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5310 there is no interleaving, GROUP_SIZE is 1, and only one
5311 iteration of the loop will be executed. */
5312 gcc_assert (next_stmt
5313 && gimple_assign_single_p (next_stmt));
5314 op = gimple_assign_rhs1 (next_stmt);
5315
5316 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5317 NULL);
5318 dr_chain.quick_push (vec_oprnd);
5319 oprnds.quick_push (vec_oprnd);
5320 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5321 }
5322 }
5323
5324 /* We should have catched mismatched types earlier. */
5325 gcc_assert (useless_type_conversion_p (vectype,
5326 TREE_TYPE (vec_oprnd)));
5327 bool simd_lane_access_p
5328 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5329 if (simd_lane_access_p
5330 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5331 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5332 && integer_zerop (DR_OFFSET (first_dr))
5333 && integer_zerop (DR_INIT (first_dr))
5334 && alias_sets_conflict_p (get_alias_set (aggr_type),
5335 get_alias_set (DR_REF (first_dr))))
5336 {
5337 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5338 dataref_offset = build_int_cst (reference_alias_ptr_type
5339 (DR_REF (first_dr)), 0);
5340 inv_p = false;
5341 }
5342 else
5343 dataref_ptr
5344 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5345 simd_lane_access_p ? loop : NULL,
5346 offset, &dummy, gsi, &ptr_incr,
5347 simd_lane_access_p, &inv_p);
5348 gcc_assert (bb_vinfo || !inv_p);
5349 }
5350 else
5351 {
5352 /* For interleaved stores we created vectorized defs for all the
5353 defs stored in OPRNDS in the previous iteration (previous copy).
5354 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5355 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5356 next copy.
5357 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5358 OPRNDS are of size 1. */
5359 for (i = 0; i < group_size; i++)
5360 {
5361 op = oprnds[i];
5362 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5363 &def, &dt);
5364 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5365 dr_chain[i] = vec_oprnd;
5366 oprnds[i] = vec_oprnd;
5367 }
5368 if (dataref_offset)
5369 dataref_offset
5370 = int_const_binop (PLUS_EXPR, dataref_offset,
5371 TYPE_SIZE_UNIT (aggr_type));
5372 else
5373 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5374 TYPE_SIZE_UNIT (aggr_type));
5375 }
5376
5377 if (store_lanes_p)
5378 {
5379 tree vec_array;
5380
5381 /* Combine all the vectors into an array. */
5382 vec_array = create_vector_array (vectype, vec_num);
5383 for (i = 0; i < vec_num; i++)
5384 {
5385 vec_oprnd = dr_chain[i];
5386 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5387 }
5388
5389 /* Emit:
5390 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5391 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5392 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5393 gimple_call_set_lhs (new_stmt, data_ref);
5394 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5395 }
5396 else
5397 {
5398 new_stmt = NULL;
5399 if (grouped_store)
5400 {
5401 if (j == 0)
5402 result_chain.create (group_size);
5403 /* Permute. */
5404 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5405 &result_chain);
5406 }
5407
5408 next_stmt = first_stmt;
5409 for (i = 0; i < vec_num; i++)
5410 {
5411 unsigned align, misalign;
5412
5413 if (i > 0)
5414 /* Bump the vector pointer. */
5415 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5416 stmt, NULL_TREE);
5417
5418 if (slp)
5419 vec_oprnd = vec_oprnds[i];
5420 else if (grouped_store)
5421 /* For grouped stores vectorized defs are interleaved in
5422 vect_permute_store_chain(). */
5423 vec_oprnd = result_chain[i];
5424
5425 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5426 dataref_offset
5427 ? dataref_offset
5428 : build_int_cst (reference_alias_ptr_type
5429 (DR_REF (first_dr)), 0));
5430 align = TYPE_ALIGN_UNIT (vectype);
5431 if (aligned_access_p (first_dr))
5432 misalign = 0;
5433 else if (DR_MISALIGNMENT (first_dr) == -1)
5434 {
5435 TREE_TYPE (data_ref)
5436 = build_aligned_type (TREE_TYPE (data_ref),
5437 TYPE_ALIGN (elem_type));
5438 align = TYPE_ALIGN_UNIT (elem_type);
5439 misalign = 0;
5440 }
5441 else
5442 {
5443 TREE_TYPE (data_ref)
5444 = build_aligned_type (TREE_TYPE (data_ref),
5445 TYPE_ALIGN (elem_type));
5446 misalign = DR_MISALIGNMENT (first_dr);
5447 }
5448 if (dataref_offset == NULL_TREE)
5449 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5450 misalign);
5451
5452 if (negative
5453 && dt != vect_constant_def
5454 && dt != vect_external_def)
5455 {
5456 tree perm_mask = perm_mask_for_reverse (vectype);
5457 tree perm_dest
5458 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5459 vectype);
5460 tree new_temp = make_ssa_name (perm_dest);
5461
5462 /* Generate the permute statement. */
5463 gimple perm_stmt
5464 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5465 vec_oprnd, perm_mask);
5466 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5467
5468 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5469 vec_oprnd = new_temp;
5470 }
5471
5472 /* Arguments are ready. Create the new vector stmt. */
5473 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5474 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5475
5476 if (slp)
5477 continue;
5478
5479 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5480 if (!next_stmt)
5481 break;
5482 }
5483 }
5484 if (!slp)
5485 {
5486 if (j == 0)
5487 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5488 else
5489 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5490 prev_stmt_info = vinfo_for_stmt (new_stmt);
5491 }
5492 }
5493
5494 dr_chain.release ();
5495 oprnds.release ();
5496 result_chain.release ();
5497 vec_oprnds.release ();
5498
5499 return true;
5500 }
5501
5502 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5503 VECTOR_CST mask. No checks are made that the target platform supports the
5504 mask, so callers may wish to test can_vec_perm_p separately, or use
5505 vect_gen_perm_mask_checked. */
5506
5507 tree
5508 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5509 {
5510 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5511 int i, nunits;
5512
5513 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5514
5515 mask_elt_type = lang_hooks.types.type_for_mode
5516 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5517 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5518
5519 mask_elts = XALLOCAVEC (tree, nunits);
5520 for (i = nunits - 1; i >= 0; i--)
5521 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5522 mask_vec = build_vector (mask_type, mask_elts);
5523
5524 return mask_vec;
5525 }
5526
5527 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5528 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5529
5530 tree
5531 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5532 {
5533 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5534 return vect_gen_perm_mask_any (vectype, sel);
5535 }
5536
5537 /* Given a vector variable X and Y, that was generated for the scalar
5538 STMT, generate instructions to permute the vector elements of X and Y
5539 using permutation mask MASK_VEC, insert them at *GSI and return the
5540 permuted vector variable. */
5541
5542 static tree
5543 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5544 gimple_stmt_iterator *gsi)
5545 {
5546 tree vectype = TREE_TYPE (x);
5547 tree perm_dest, data_ref;
5548 gimple perm_stmt;
5549
5550 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5551 data_ref = make_ssa_name (perm_dest);
5552
5553 /* Generate the permute statement. */
5554 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5555 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5556
5557 return data_ref;
5558 }
5559
5560 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5561 inserting them on the loops preheader edge. Returns true if we
5562 were successful in doing so (and thus STMT can be moved then),
5563 otherwise returns false. */
5564
5565 static bool
5566 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5567 {
5568 ssa_op_iter i;
5569 tree op;
5570 bool any = false;
5571
5572 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5573 {
5574 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5575 if (!gimple_nop_p (def_stmt)
5576 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5577 {
5578 /* Make sure we don't need to recurse. While we could do
5579 so in simple cases when there are more complex use webs
5580 we don't have an easy way to preserve stmt order to fulfil
5581 dependencies within them. */
5582 tree op2;
5583 ssa_op_iter i2;
5584 if (gimple_code (def_stmt) == GIMPLE_PHI)
5585 return false;
5586 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5587 {
5588 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5589 if (!gimple_nop_p (def_stmt2)
5590 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5591 return false;
5592 }
5593 any = true;
5594 }
5595 }
5596
5597 if (!any)
5598 return true;
5599
5600 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5601 {
5602 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5603 if (!gimple_nop_p (def_stmt)
5604 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5605 {
5606 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5607 gsi_remove (&gsi, false);
5608 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5609 }
5610 }
5611
5612 return true;
5613 }
5614
5615 /* vectorizable_load.
5616
5617 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5618 can be vectorized.
5619 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5620 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5621 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5622
5623 static bool
5624 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5625 slp_tree slp_node, slp_instance slp_node_instance)
5626 {
5627 tree scalar_dest;
5628 tree vec_dest = NULL;
5629 tree data_ref = NULL;
5630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5631 stmt_vec_info prev_stmt_info;
5632 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5633 struct loop *loop = NULL;
5634 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5635 bool nested_in_vect_loop = false;
5636 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5637 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5638 tree elem_type;
5639 tree new_temp;
5640 machine_mode mode;
5641 gimple new_stmt = NULL;
5642 tree dummy;
5643 enum dr_alignment_support alignment_support_scheme;
5644 tree dataref_ptr = NULL_TREE;
5645 tree dataref_offset = NULL_TREE;
5646 gimple ptr_incr = NULL;
5647 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5648 int ncopies;
5649 int i, j, group_size, group_gap;
5650 tree msq = NULL_TREE, lsq;
5651 tree offset = NULL_TREE;
5652 tree byte_offset = NULL_TREE;
5653 tree realignment_token = NULL_TREE;
5654 gphi *phi = NULL;
5655 vec<tree> dr_chain = vNULL;
5656 bool grouped_load = false;
5657 bool load_lanes_p = false;
5658 gimple first_stmt;
5659 bool inv_p;
5660 bool negative = false;
5661 bool compute_in_loop = false;
5662 struct loop *at_loop;
5663 int vec_num;
5664 bool slp = (slp_node != NULL);
5665 bool slp_perm = false;
5666 enum tree_code code;
5667 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5668 int vf;
5669 tree aggr_type;
5670 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5671 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5672 int gather_scale = 1;
5673 enum vect_def_type gather_dt = vect_unknown_def_type;
5674
5675 if (loop_vinfo)
5676 {
5677 loop = LOOP_VINFO_LOOP (loop_vinfo);
5678 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5679 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5680 }
5681 else
5682 vf = 1;
5683
5684 /* Multiple types in SLP are handled by creating the appropriate number of
5685 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5686 case of SLP. */
5687 if (slp || PURE_SLP_STMT (stmt_info))
5688 ncopies = 1;
5689 else
5690 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5691
5692 gcc_assert (ncopies >= 1);
5693
5694 /* FORNOW. This restriction should be relaxed. */
5695 if (nested_in_vect_loop && ncopies > 1)
5696 {
5697 if (dump_enabled_p ())
5698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5699 "multiple types in nested loop.\n");
5700 return false;
5701 }
5702
5703 /* Invalidate assumptions made by dependence analysis when vectorization
5704 on the unrolled body effectively re-orders stmts. */
5705 if (ncopies > 1
5706 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5707 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5708 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5709 {
5710 if (dump_enabled_p ())
5711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5712 "cannot perform implicit CSE when unrolling "
5713 "with negative dependence distance\n");
5714 return false;
5715 }
5716
5717 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5718 return false;
5719
5720 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5721 return false;
5722
5723 /* Is vectorizable load? */
5724 if (!is_gimple_assign (stmt))
5725 return false;
5726
5727 scalar_dest = gimple_assign_lhs (stmt);
5728 if (TREE_CODE (scalar_dest) != SSA_NAME)
5729 return false;
5730
5731 code = gimple_assign_rhs_code (stmt);
5732 if (code != ARRAY_REF
5733 && code != BIT_FIELD_REF
5734 && code != INDIRECT_REF
5735 && code != COMPONENT_REF
5736 && code != IMAGPART_EXPR
5737 && code != REALPART_EXPR
5738 && code != MEM_REF
5739 && TREE_CODE_CLASS (code) != tcc_declaration)
5740 return false;
5741
5742 if (!STMT_VINFO_DATA_REF (stmt_info))
5743 return false;
5744
5745 elem_type = TREE_TYPE (vectype);
5746 mode = TYPE_MODE (vectype);
5747
5748 /* FORNOW. In some cases can vectorize even if data-type not supported
5749 (e.g. - data copies). */
5750 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5751 {
5752 if (dump_enabled_p ())
5753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5754 "Aligned load, but unsupported type.\n");
5755 return false;
5756 }
5757
5758 /* Check if the load is a part of an interleaving chain. */
5759 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5760 {
5761 grouped_load = true;
5762 /* FORNOW */
5763 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5764
5765 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5766 if (!slp && !PURE_SLP_STMT (stmt_info))
5767 {
5768 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5769 if (vect_load_lanes_supported (vectype, group_size))
5770 load_lanes_p = true;
5771 else if (!vect_grouped_load_supported (vectype, group_size))
5772 return false;
5773 }
5774
5775 /* Invalidate assumptions made by dependence analysis when vectorization
5776 on the unrolled body effectively re-orders stmts. */
5777 if (!PURE_SLP_STMT (stmt_info)
5778 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5779 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5780 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5781 {
5782 if (dump_enabled_p ())
5783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5784 "cannot perform implicit CSE when performing "
5785 "group loads with negative dependence distance\n");
5786 return false;
5787 }
5788 }
5789
5790
5791 if (STMT_VINFO_GATHER_P (stmt_info))
5792 {
5793 gimple def_stmt;
5794 tree def;
5795 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5796 &gather_off, &gather_scale);
5797 gcc_assert (gather_decl);
5798 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5799 &def_stmt, &def, &gather_dt,
5800 &gather_off_vectype))
5801 {
5802 if (dump_enabled_p ())
5803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5804 "gather index use not simple.\n");
5805 return false;
5806 }
5807 }
5808 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5809 ;
5810 else
5811 {
5812 negative = tree_int_cst_compare (nested_in_vect_loop
5813 ? STMT_VINFO_DR_STEP (stmt_info)
5814 : DR_STEP (dr),
5815 size_zero_node) < 0;
5816 if (negative && ncopies > 1)
5817 {
5818 if (dump_enabled_p ())
5819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5820 "multiple types with negative step.\n");
5821 return false;
5822 }
5823
5824 if (negative)
5825 {
5826 if (grouped_load)
5827 {
5828 if (dump_enabled_p ())
5829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5830 "negative step for group load not supported"
5831 "\n");
5832 return false;
5833 }
5834 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5835 if (alignment_support_scheme != dr_aligned
5836 && alignment_support_scheme != dr_unaligned_supported)
5837 {
5838 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5840 "negative step but alignment required.\n");
5841 return false;
5842 }
5843 if (!perm_mask_for_reverse (vectype))
5844 {
5845 if (dump_enabled_p ())
5846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5847 "negative step and reversing not supported."
5848 "\n");
5849 return false;
5850 }
5851 }
5852 }
5853
5854 if (!vec_stmt) /* transformation not required. */
5855 {
5856 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5857 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5858 return true;
5859 }
5860
5861 if (dump_enabled_p ())
5862 dump_printf_loc (MSG_NOTE, vect_location,
5863 "transform load. ncopies = %d\n", ncopies);
5864
5865 /** Transform. **/
5866
5867 ensure_base_align (stmt_info, dr);
5868
5869 if (STMT_VINFO_GATHER_P (stmt_info))
5870 {
5871 tree vec_oprnd0 = NULL_TREE, op;
5872 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5873 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5874 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5875 edge pe = loop_preheader_edge (loop);
5876 gimple_seq seq;
5877 basic_block new_bb;
5878 enum { NARROW, NONE, WIDEN } modifier;
5879 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5880
5881 if (nunits == gather_off_nunits)
5882 modifier = NONE;
5883 else if (nunits == gather_off_nunits / 2)
5884 {
5885 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5886 modifier = WIDEN;
5887
5888 for (i = 0; i < gather_off_nunits; ++i)
5889 sel[i] = i | nunits;
5890
5891 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5892 }
5893 else if (nunits == gather_off_nunits * 2)
5894 {
5895 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5896 modifier = NARROW;
5897
5898 for (i = 0; i < nunits; ++i)
5899 sel[i] = i < gather_off_nunits
5900 ? i : i + nunits - gather_off_nunits;
5901
5902 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5903 ncopies *= 2;
5904 }
5905 else
5906 gcc_unreachable ();
5907
5908 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5909 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5910 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5911 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5912 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5913 scaletype = TREE_VALUE (arglist);
5914 gcc_checking_assert (types_compatible_p (srctype, rettype));
5915
5916 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5917
5918 ptr = fold_convert (ptrtype, gather_base);
5919 if (!is_gimple_min_invariant (ptr))
5920 {
5921 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5922 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5923 gcc_assert (!new_bb);
5924 }
5925
5926 /* Currently we support only unconditional gather loads,
5927 so mask should be all ones. */
5928 if (TREE_CODE (masktype) == INTEGER_TYPE)
5929 mask = build_int_cst (masktype, -1);
5930 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5931 {
5932 mask = build_int_cst (TREE_TYPE (masktype), -1);
5933 mask = build_vector_from_val (masktype, mask);
5934 mask = vect_init_vector (stmt, mask, masktype, NULL);
5935 }
5936 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5937 {
5938 REAL_VALUE_TYPE r;
5939 long tmp[6];
5940 for (j = 0; j < 6; ++j)
5941 tmp[j] = -1;
5942 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5943 mask = build_real (TREE_TYPE (masktype), r);
5944 mask = build_vector_from_val (masktype, mask);
5945 mask = vect_init_vector (stmt, mask, masktype, NULL);
5946 }
5947 else
5948 gcc_unreachable ();
5949
5950 scale = build_int_cst (scaletype, gather_scale);
5951
5952 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5953 merge = build_int_cst (TREE_TYPE (rettype), 0);
5954 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5955 {
5956 REAL_VALUE_TYPE r;
5957 long tmp[6];
5958 for (j = 0; j < 6; ++j)
5959 tmp[j] = 0;
5960 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5961 merge = build_real (TREE_TYPE (rettype), r);
5962 }
5963 else
5964 gcc_unreachable ();
5965 merge = build_vector_from_val (rettype, merge);
5966 merge = vect_init_vector (stmt, merge, rettype, NULL);
5967
5968 prev_stmt_info = NULL;
5969 for (j = 0; j < ncopies; ++j)
5970 {
5971 if (modifier == WIDEN && (j & 1))
5972 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5973 perm_mask, stmt, gsi);
5974 else if (j == 0)
5975 op = vec_oprnd0
5976 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5977 else
5978 op = vec_oprnd0
5979 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5980
5981 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5982 {
5983 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5984 == TYPE_VECTOR_SUBPARTS (idxtype));
5985 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5986 var = make_ssa_name (var);
5987 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5988 new_stmt
5989 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5990 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5991 op = var;
5992 }
5993
5994 new_stmt
5995 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5996
5997 if (!useless_type_conversion_p (vectype, rettype))
5998 {
5999 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6000 == TYPE_VECTOR_SUBPARTS (rettype));
6001 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6002 op = make_ssa_name (var, new_stmt);
6003 gimple_call_set_lhs (new_stmt, op);
6004 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6005 var = make_ssa_name (vec_dest);
6006 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6007 new_stmt
6008 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6009 }
6010 else
6011 {
6012 var = make_ssa_name (vec_dest, new_stmt);
6013 gimple_call_set_lhs (new_stmt, var);
6014 }
6015
6016 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6017
6018 if (modifier == NARROW)
6019 {
6020 if ((j & 1) == 0)
6021 {
6022 prev_res = var;
6023 continue;
6024 }
6025 var = permute_vec_elements (prev_res, var,
6026 perm_mask, stmt, gsi);
6027 new_stmt = SSA_NAME_DEF_STMT (var);
6028 }
6029
6030 if (prev_stmt_info == NULL)
6031 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6032 else
6033 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6034 prev_stmt_info = vinfo_for_stmt (new_stmt);
6035 }
6036 return true;
6037 }
6038 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6039 {
6040 gimple_stmt_iterator incr_gsi;
6041 bool insert_after;
6042 gimple incr;
6043 tree offvar;
6044 tree ivstep;
6045 tree running_off;
6046 vec<constructor_elt, va_gc> *v = NULL;
6047 gimple_seq stmts = NULL;
6048 tree stride_base, stride_step, alias_off;
6049
6050 gcc_assert (!nested_in_vect_loop);
6051
6052 stride_base
6053 = fold_build_pointer_plus
6054 (unshare_expr (DR_BASE_ADDRESS (dr)),
6055 size_binop (PLUS_EXPR,
6056 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6057 convert_to_ptrofftype (DR_INIT (dr))));
6058 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6059
6060 /* For a load with loop-invariant (but other than power-of-2)
6061 stride (i.e. not a grouped access) like so:
6062
6063 for (i = 0; i < n; i += stride)
6064 ... = array[i];
6065
6066 we generate a new induction variable and new accesses to
6067 form a new vector (or vectors, depending on ncopies):
6068
6069 for (j = 0; ; j += VF*stride)
6070 tmp1 = array[j];
6071 tmp2 = array[j + stride];
6072 ...
6073 vectemp = {tmp1, tmp2, ...}
6074 */
6075
6076 ivstep = stride_step;
6077 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6078 build_int_cst (TREE_TYPE (ivstep), vf));
6079
6080 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6081
6082 create_iv (stride_base, ivstep, NULL,
6083 loop, &incr_gsi, insert_after,
6084 &offvar, NULL);
6085 incr = gsi_stmt (incr_gsi);
6086 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6087
6088 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6089 if (stmts)
6090 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6091
6092 prev_stmt_info = NULL;
6093 running_off = offvar;
6094 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6095 for (j = 0; j < ncopies; j++)
6096 {
6097 tree vec_inv;
6098
6099 vec_alloc (v, nunits);
6100 for (i = 0; i < nunits; i++)
6101 {
6102 tree newref, newoff;
6103 gimple incr;
6104 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6105 running_off, alias_off);
6106
6107 newref = force_gimple_operand_gsi (gsi, newref, true,
6108 NULL_TREE, true,
6109 GSI_SAME_STMT);
6110 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6111 newoff = copy_ssa_name (running_off);
6112 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6113 running_off, stride_step);
6114 vect_finish_stmt_generation (stmt, incr, gsi);
6115
6116 running_off = newoff;
6117 }
6118
6119 vec_inv = build_constructor (vectype, v);
6120 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6121 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6122
6123 if (j == 0)
6124 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6125 else
6126 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6127 prev_stmt_info = vinfo_for_stmt (new_stmt);
6128 }
6129 return true;
6130 }
6131
6132 if (grouped_load)
6133 {
6134 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6135 if (slp
6136 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6137 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6138 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6139
6140 /* Check if the chain of loads is already vectorized. */
6141 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6142 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6143 ??? But we can only do so if there is exactly one
6144 as we have no way to get at the rest. Leave the CSE
6145 opportunity alone.
6146 ??? With the group load eventually participating
6147 in multiple different permutations (having multiple
6148 slp nodes which refer to the same group) the CSE
6149 is even wrong code. See PR56270. */
6150 && !slp)
6151 {
6152 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6153 return true;
6154 }
6155 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6156 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6157
6158 /* VEC_NUM is the number of vect stmts to be created for this group. */
6159 if (slp)
6160 {
6161 grouped_load = false;
6162 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6163 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6164 slp_perm = true;
6165 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6166 }
6167 else
6168 {
6169 vec_num = group_size;
6170 group_gap = 0;
6171 }
6172 }
6173 else
6174 {
6175 first_stmt = stmt;
6176 first_dr = dr;
6177 group_size = vec_num = 1;
6178 group_gap = 0;
6179 }
6180
6181 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6182 gcc_assert (alignment_support_scheme);
6183 /* Targets with load-lane instructions must not require explicit
6184 realignment. */
6185 gcc_assert (!load_lanes_p
6186 || alignment_support_scheme == dr_aligned
6187 || alignment_support_scheme == dr_unaligned_supported);
6188
6189 /* In case the vectorization factor (VF) is bigger than the number
6190 of elements that we can fit in a vectype (nunits), we have to generate
6191 more than one vector stmt - i.e - we need to "unroll" the
6192 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6193 from one copy of the vector stmt to the next, in the field
6194 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6195 stages to find the correct vector defs to be used when vectorizing
6196 stmts that use the defs of the current stmt. The example below
6197 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6198 need to create 4 vectorized stmts):
6199
6200 before vectorization:
6201 RELATED_STMT VEC_STMT
6202 S1: x = memref - -
6203 S2: z = x + 1 - -
6204
6205 step 1: vectorize stmt S1:
6206 We first create the vector stmt VS1_0, and, as usual, record a
6207 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6208 Next, we create the vector stmt VS1_1, and record a pointer to
6209 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6210 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6211 stmts and pointers:
6212 RELATED_STMT VEC_STMT
6213 VS1_0: vx0 = memref0 VS1_1 -
6214 VS1_1: vx1 = memref1 VS1_2 -
6215 VS1_2: vx2 = memref2 VS1_3 -
6216 VS1_3: vx3 = memref3 - -
6217 S1: x = load - VS1_0
6218 S2: z = x + 1 - -
6219
6220 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6221 information we recorded in RELATED_STMT field is used to vectorize
6222 stmt S2. */
6223
6224 /* In case of interleaving (non-unit grouped access):
6225
6226 S1: x2 = &base + 2
6227 S2: x0 = &base
6228 S3: x1 = &base + 1
6229 S4: x3 = &base + 3
6230
6231 Vectorized loads are created in the order of memory accesses
6232 starting from the access of the first stmt of the chain:
6233
6234 VS1: vx0 = &base
6235 VS2: vx1 = &base + vec_size*1
6236 VS3: vx3 = &base + vec_size*2
6237 VS4: vx4 = &base + vec_size*3
6238
6239 Then permutation statements are generated:
6240
6241 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6242 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6243 ...
6244
6245 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6246 (the order of the data-refs in the output of vect_permute_load_chain
6247 corresponds to the order of scalar stmts in the interleaving chain - see
6248 the documentation of vect_permute_load_chain()).
6249 The generation of permutation stmts and recording them in
6250 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6251
6252 In case of both multiple types and interleaving, the vector loads and
6253 permutation stmts above are created for every copy. The result vector
6254 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6255 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6256
6257 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6258 on a target that supports unaligned accesses (dr_unaligned_supported)
6259 we generate the following code:
6260 p = initial_addr;
6261 indx = 0;
6262 loop {
6263 p = p + indx * vectype_size;
6264 vec_dest = *(p);
6265 indx = indx + 1;
6266 }
6267
6268 Otherwise, the data reference is potentially unaligned on a target that
6269 does not support unaligned accesses (dr_explicit_realign_optimized) -
6270 then generate the following code, in which the data in each iteration is
6271 obtained by two vector loads, one from the previous iteration, and one
6272 from the current iteration:
6273 p1 = initial_addr;
6274 msq_init = *(floor(p1))
6275 p2 = initial_addr + VS - 1;
6276 realignment_token = call target_builtin;
6277 indx = 0;
6278 loop {
6279 p2 = p2 + indx * vectype_size
6280 lsq = *(floor(p2))
6281 vec_dest = realign_load (msq, lsq, realignment_token)
6282 indx = indx + 1;
6283 msq = lsq;
6284 } */
6285
6286 /* If the misalignment remains the same throughout the execution of the
6287 loop, we can create the init_addr and permutation mask at the loop
6288 preheader. Otherwise, it needs to be created inside the loop.
6289 This can only occur when vectorizing memory accesses in the inner-loop
6290 nested within an outer-loop that is being vectorized. */
6291
6292 if (nested_in_vect_loop
6293 && (TREE_INT_CST_LOW (DR_STEP (dr))
6294 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6295 {
6296 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6297 compute_in_loop = true;
6298 }
6299
6300 if ((alignment_support_scheme == dr_explicit_realign_optimized
6301 || alignment_support_scheme == dr_explicit_realign)
6302 && !compute_in_loop)
6303 {
6304 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6305 alignment_support_scheme, NULL_TREE,
6306 &at_loop);
6307 if (alignment_support_scheme == dr_explicit_realign_optimized)
6308 {
6309 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6310 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6311 size_one_node);
6312 }
6313 }
6314 else
6315 at_loop = loop;
6316
6317 if (negative)
6318 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6319
6320 if (load_lanes_p)
6321 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6322 else
6323 aggr_type = vectype;
6324
6325 prev_stmt_info = NULL;
6326 for (j = 0; j < ncopies; j++)
6327 {
6328 /* 1. Create the vector or array pointer update chain. */
6329 if (j == 0)
6330 {
6331 bool simd_lane_access_p
6332 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6333 if (simd_lane_access_p
6334 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6335 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6336 && integer_zerop (DR_OFFSET (first_dr))
6337 && integer_zerop (DR_INIT (first_dr))
6338 && alias_sets_conflict_p (get_alias_set (aggr_type),
6339 get_alias_set (DR_REF (first_dr)))
6340 && (alignment_support_scheme == dr_aligned
6341 || alignment_support_scheme == dr_unaligned_supported))
6342 {
6343 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6344 dataref_offset = build_int_cst (reference_alias_ptr_type
6345 (DR_REF (first_dr)), 0);
6346 inv_p = false;
6347 }
6348 else
6349 dataref_ptr
6350 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6351 offset, &dummy, gsi, &ptr_incr,
6352 simd_lane_access_p, &inv_p,
6353 byte_offset);
6354 }
6355 else if (dataref_offset)
6356 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6357 TYPE_SIZE_UNIT (aggr_type));
6358 else
6359 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6360 TYPE_SIZE_UNIT (aggr_type));
6361
6362 if (grouped_load || slp_perm)
6363 dr_chain.create (vec_num);
6364
6365 if (load_lanes_p)
6366 {
6367 tree vec_array;
6368
6369 vec_array = create_vector_array (vectype, vec_num);
6370
6371 /* Emit:
6372 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6373 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6374 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6375 gimple_call_set_lhs (new_stmt, vec_array);
6376 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6377
6378 /* Extract each vector into an SSA_NAME. */
6379 for (i = 0; i < vec_num; i++)
6380 {
6381 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6382 vec_array, i);
6383 dr_chain.quick_push (new_temp);
6384 }
6385
6386 /* Record the mapping between SSA_NAMEs and statements. */
6387 vect_record_grouped_load_vectors (stmt, dr_chain);
6388 }
6389 else
6390 {
6391 for (i = 0; i < vec_num; i++)
6392 {
6393 if (i > 0)
6394 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6395 stmt, NULL_TREE);
6396
6397 /* 2. Create the vector-load in the loop. */
6398 switch (alignment_support_scheme)
6399 {
6400 case dr_aligned:
6401 case dr_unaligned_supported:
6402 {
6403 unsigned int align, misalign;
6404
6405 data_ref
6406 = build2 (MEM_REF, vectype, dataref_ptr,
6407 dataref_offset
6408 ? dataref_offset
6409 : build_int_cst (reference_alias_ptr_type
6410 (DR_REF (first_dr)), 0));
6411 align = TYPE_ALIGN_UNIT (vectype);
6412 if (alignment_support_scheme == dr_aligned)
6413 {
6414 gcc_assert (aligned_access_p (first_dr));
6415 misalign = 0;
6416 }
6417 else if (DR_MISALIGNMENT (first_dr) == -1)
6418 {
6419 TREE_TYPE (data_ref)
6420 = build_aligned_type (TREE_TYPE (data_ref),
6421 TYPE_ALIGN (elem_type));
6422 align = TYPE_ALIGN_UNIT (elem_type);
6423 misalign = 0;
6424 }
6425 else
6426 {
6427 TREE_TYPE (data_ref)
6428 = build_aligned_type (TREE_TYPE (data_ref),
6429 TYPE_ALIGN (elem_type));
6430 misalign = DR_MISALIGNMENT (first_dr);
6431 }
6432 if (dataref_offset == NULL_TREE)
6433 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6434 align, misalign);
6435 break;
6436 }
6437 case dr_explicit_realign:
6438 {
6439 tree ptr, bump;
6440 tree vs_minus_1;
6441
6442 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6443
6444 if (compute_in_loop)
6445 msq = vect_setup_realignment (first_stmt, gsi,
6446 &realignment_token,
6447 dr_explicit_realign,
6448 dataref_ptr, NULL);
6449
6450 ptr = copy_ssa_name (dataref_ptr);
6451 new_stmt = gimple_build_assign
6452 (ptr, BIT_AND_EXPR, dataref_ptr,
6453 build_int_cst
6454 (TREE_TYPE (dataref_ptr),
6455 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6456 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6457 data_ref
6458 = build2 (MEM_REF, vectype, ptr,
6459 build_int_cst (reference_alias_ptr_type
6460 (DR_REF (first_dr)), 0));
6461 vec_dest = vect_create_destination_var (scalar_dest,
6462 vectype);
6463 new_stmt = gimple_build_assign (vec_dest, data_ref);
6464 new_temp = make_ssa_name (vec_dest, new_stmt);
6465 gimple_assign_set_lhs (new_stmt, new_temp);
6466 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6467 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6468 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6469 msq = new_temp;
6470
6471 bump = size_binop (MULT_EXPR, vs_minus_1,
6472 TYPE_SIZE_UNIT (elem_type));
6473 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6474 new_stmt = gimple_build_assign
6475 (NULL_TREE, BIT_AND_EXPR, ptr,
6476 build_int_cst
6477 (TREE_TYPE (ptr),
6478 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6479 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6480 gimple_assign_set_lhs (new_stmt, ptr);
6481 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6482 data_ref
6483 = build2 (MEM_REF, vectype, ptr,
6484 build_int_cst (reference_alias_ptr_type
6485 (DR_REF (first_dr)), 0));
6486 break;
6487 }
6488 case dr_explicit_realign_optimized:
6489 new_temp = copy_ssa_name (dataref_ptr);
6490 new_stmt = gimple_build_assign
6491 (new_temp, BIT_AND_EXPR, dataref_ptr,
6492 build_int_cst
6493 (TREE_TYPE (dataref_ptr),
6494 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6495 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6496 data_ref
6497 = build2 (MEM_REF, vectype, new_temp,
6498 build_int_cst (reference_alias_ptr_type
6499 (DR_REF (first_dr)), 0));
6500 break;
6501 default:
6502 gcc_unreachable ();
6503 }
6504 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6505 new_stmt = gimple_build_assign (vec_dest, data_ref);
6506 new_temp = make_ssa_name (vec_dest, new_stmt);
6507 gimple_assign_set_lhs (new_stmt, new_temp);
6508 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6509
6510 /* 3. Handle explicit realignment if necessary/supported.
6511 Create in loop:
6512 vec_dest = realign_load (msq, lsq, realignment_token) */
6513 if (alignment_support_scheme == dr_explicit_realign_optimized
6514 || alignment_support_scheme == dr_explicit_realign)
6515 {
6516 lsq = gimple_assign_lhs (new_stmt);
6517 if (!realignment_token)
6518 realignment_token = dataref_ptr;
6519 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6520 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6521 msq, lsq, realignment_token);
6522 new_temp = make_ssa_name (vec_dest, new_stmt);
6523 gimple_assign_set_lhs (new_stmt, new_temp);
6524 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6525
6526 if (alignment_support_scheme == dr_explicit_realign_optimized)
6527 {
6528 gcc_assert (phi);
6529 if (i == vec_num - 1 && j == ncopies - 1)
6530 add_phi_arg (phi, lsq,
6531 loop_latch_edge (containing_loop),
6532 UNKNOWN_LOCATION);
6533 msq = lsq;
6534 }
6535 }
6536
6537 /* 4. Handle invariant-load. */
6538 if (inv_p && !bb_vinfo)
6539 {
6540 gcc_assert (!grouped_load);
6541 /* If we have versioned for aliasing or the loop doesn't
6542 have any data dependencies that would preclude this,
6543 then we are sure this is a loop invariant load and
6544 thus we can insert it on the preheader edge. */
6545 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6546 && !nested_in_vect_loop
6547 && hoist_defs_of_uses (stmt, loop))
6548 {
6549 if (dump_enabled_p ())
6550 {
6551 dump_printf_loc (MSG_NOTE, vect_location,
6552 "hoisting out of the vectorized "
6553 "loop: ");
6554 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6555 dump_printf (MSG_NOTE, "\n");
6556 }
6557 tree tem = copy_ssa_name (scalar_dest);
6558 gsi_insert_on_edge_immediate
6559 (loop_preheader_edge (loop),
6560 gimple_build_assign (tem,
6561 unshare_expr
6562 (gimple_assign_rhs1 (stmt))));
6563 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6564 }
6565 else
6566 {
6567 gimple_stmt_iterator gsi2 = *gsi;
6568 gsi_next (&gsi2);
6569 new_temp = vect_init_vector (stmt, scalar_dest,
6570 vectype, &gsi2);
6571 }
6572 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6573 set_vinfo_for_stmt (new_stmt,
6574 new_stmt_vec_info (new_stmt, loop_vinfo,
6575 bb_vinfo));
6576 }
6577
6578 if (negative)
6579 {
6580 tree perm_mask = perm_mask_for_reverse (vectype);
6581 new_temp = permute_vec_elements (new_temp, new_temp,
6582 perm_mask, stmt, gsi);
6583 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6584 }
6585
6586 /* Collect vector loads and later create their permutation in
6587 vect_transform_grouped_load (). */
6588 if (grouped_load || slp_perm)
6589 dr_chain.quick_push (new_temp);
6590
6591 /* Store vector loads in the corresponding SLP_NODE. */
6592 if (slp && !slp_perm)
6593 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6594 }
6595 /* Bump the vector pointer to account for a gap. */
6596 if (slp && group_gap != 0)
6597 {
6598 tree bump = size_binop (MULT_EXPR,
6599 TYPE_SIZE_UNIT (elem_type),
6600 size_int (group_gap));
6601 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6602 stmt, bump);
6603 }
6604 }
6605
6606 if (slp && !slp_perm)
6607 continue;
6608
6609 if (slp_perm)
6610 {
6611 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6612 slp_node_instance, false))
6613 {
6614 dr_chain.release ();
6615 return false;
6616 }
6617 }
6618 else
6619 {
6620 if (grouped_load)
6621 {
6622 if (!load_lanes_p)
6623 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6624 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6625 }
6626 else
6627 {
6628 if (j == 0)
6629 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6630 else
6631 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6632 prev_stmt_info = vinfo_for_stmt (new_stmt);
6633 }
6634 }
6635 dr_chain.release ();
6636 }
6637
6638 return true;
6639 }
6640
6641 /* Function vect_is_simple_cond.
6642
6643 Input:
6644 LOOP - the loop that is being vectorized.
6645 COND - Condition that is checked for simple use.
6646
6647 Output:
6648 *COMP_VECTYPE - the vector type for the comparison.
6649
6650 Returns whether a COND can be vectorized. Checks whether
6651 condition operands are supportable using vec_is_simple_use. */
6652
6653 static bool
6654 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6655 bb_vec_info bb_vinfo, tree *comp_vectype)
6656 {
6657 tree lhs, rhs;
6658 tree def;
6659 enum vect_def_type dt;
6660 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6661
6662 if (!COMPARISON_CLASS_P (cond))
6663 return false;
6664
6665 lhs = TREE_OPERAND (cond, 0);
6666 rhs = TREE_OPERAND (cond, 1);
6667
6668 if (TREE_CODE (lhs) == SSA_NAME)
6669 {
6670 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6671 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6672 &lhs_def_stmt, &def, &dt, &vectype1))
6673 return false;
6674 }
6675 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6676 && TREE_CODE (lhs) != FIXED_CST)
6677 return false;
6678
6679 if (TREE_CODE (rhs) == SSA_NAME)
6680 {
6681 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6682 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6683 &rhs_def_stmt, &def, &dt, &vectype2))
6684 return false;
6685 }
6686 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6687 && TREE_CODE (rhs) != FIXED_CST)
6688 return false;
6689
6690 *comp_vectype = vectype1 ? vectype1 : vectype2;
6691 return true;
6692 }
6693
6694 /* vectorizable_condition.
6695
6696 Check if STMT is conditional modify expression that can be vectorized.
6697 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6698 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6699 at GSI.
6700
6701 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6702 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6703 else caluse if it is 2).
6704
6705 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6706
6707 bool
6708 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6709 gimple *vec_stmt, tree reduc_def, int reduc_index,
6710 slp_tree slp_node)
6711 {
6712 tree scalar_dest = NULL_TREE;
6713 tree vec_dest = NULL_TREE;
6714 tree cond_expr, then_clause, else_clause;
6715 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6716 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6717 tree comp_vectype = NULL_TREE;
6718 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6719 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6720 tree vec_compare, vec_cond_expr;
6721 tree new_temp;
6722 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6723 tree def;
6724 enum vect_def_type dt, dts[4];
6725 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6726 int ncopies;
6727 enum tree_code code;
6728 stmt_vec_info prev_stmt_info = NULL;
6729 int i, j;
6730 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6731 vec<tree> vec_oprnds0 = vNULL;
6732 vec<tree> vec_oprnds1 = vNULL;
6733 vec<tree> vec_oprnds2 = vNULL;
6734 vec<tree> vec_oprnds3 = vNULL;
6735 tree vec_cmp_type;
6736
6737 if (slp_node || PURE_SLP_STMT (stmt_info))
6738 ncopies = 1;
6739 else
6740 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6741
6742 gcc_assert (ncopies >= 1);
6743 if (reduc_index && ncopies > 1)
6744 return false; /* FORNOW */
6745
6746 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6747 return false;
6748
6749 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6750 return false;
6751
6752 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6753 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6754 && reduc_def))
6755 return false;
6756
6757 /* FORNOW: not yet supported. */
6758 if (STMT_VINFO_LIVE_P (stmt_info))
6759 {
6760 if (dump_enabled_p ())
6761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6762 "value used after loop.\n");
6763 return false;
6764 }
6765
6766 /* Is vectorizable conditional operation? */
6767 if (!is_gimple_assign (stmt))
6768 return false;
6769
6770 code = gimple_assign_rhs_code (stmt);
6771
6772 if (code != COND_EXPR)
6773 return false;
6774
6775 cond_expr = gimple_assign_rhs1 (stmt);
6776 then_clause = gimple_assign_rhs2 (stmt);
6777 else_clause = gimple_assign_rhs3 (stmt);
6778
6779 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6780 &comp_vectype)
6781 || !comp_vectype)
6782 return false;
6783
6784 if (TREE_CODE (then_clause) == SSA_NAME)
6785 {
6786 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6787 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6788 &then_def_stmt, &def, &dt))
6789 return false;
6790 }
6791 else if (TREE_CODE (then_clause) != INTEGER_CST
6792 && TREE_CODE (then_clause) != REAL_CST
6793 && TREE_CODE (then_clause) != FIXED_CST)
6794 return false;
6795
6796 if (TREE_CODE (else_clause) == SSA_NAME)
6797 {
6798 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6799 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6800 &else_def_stmt, &def, &dt))
6801 return false;
6802 }
6803 else if (TREE_CODE (else_clause) != INTEGER_CST
6804 && TREE_CODE (else_clause) != REAL_CST
6805 && TREE_CODE (else_clause) != FIXED_CST)
6806 return false;
6807
6808 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6809 /* The result of a vector comparison should be signed type. */
6810 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6811 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6812 if (vec_cmp_type == NULL_TREE)
6813 return false;
6814
6815 if (!vec_stmt)
6816 {
6817 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6818 return expand_vec_cond_expr_p (vectype, comp_vectype);
6819 }
6820
6821 /* Transform. */
6822
6823 if (!slp_node)
6824 {
6825 vec_oprnds0.create (1);
6826 vec_oprnds1.create (1);
6827 vec_oprnds2.create (1);
6828 vec_oprnds3.create (1);
6829 }
6830
6831 /* Handle def. */
6832 scalar_dest = gimple_assign_lhs (stmt);
6833 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6834
6835 /* Handle cond expr. */
6836 for (j = 0; j < ncopies; j++)
6837 {
6838 gassign *new_stmt = NULL;
6839 if (j == 0)
6840 {
6841 if (slp_node)
6842 {
6843 auto_vec<tree, 4> ops;
6844 auto_vec<vec<tree>, 4> vec_defs;
6845
6846 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6847 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6848 ops.safe_push (then_clause);
6849 ops.safe_push (else_clause);
6850 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6851 vec_oprnds3 = vec_defs.pop ();
6852 vec_oprnds2 = vec_defs.pop ();
6853 vec_oprnds1 = vec_defs.pop ();
6854 vec_oprnds0 = vec_defs.pop ();
6855
6856 ops.release ();
6857 vec_defs.release ();
6858 }
6859 else
6860 {
6861 gimple gtemp;
6862 vec_cond_lhs =
6863 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6864 stmt, NULL);
6865 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6866 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6867
6868 vec_cond_rhs =
6869 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6870 stmt, NULL);
6871 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6872 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6873 if (reduc_index == 1)
6874 vec_then_clause = reduc_def;
6875 else
6876 {
6877 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6878 stmt, NULL);
6879 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6880 NULL, &gtemp, &def, &dts[2]);
6881 }
6882 if (reduc_index == 2)
6883 vec_else_clause = reduc_def;
6884 else
6885 {
6886 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6887 stmt, NULL);
6888 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6889 NULL, &gtemp, &def, &dts[3]);
6890 }
6891 }
6892 }
6893 else
6894 {
6895 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6896 vec_oprnds0.pop ());
6897 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6898 vec_oprnds1.pop ());
6899 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6900 vec_oprnds2.pop ());
6901 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6902 vec_oprnds3.pop ());
6903 }
6904
6905 if (!slp_node)
6906 {
6907 vec_oprnds0.quick_push (vec_cond_lhs);
6908 vec_oprnds1.quick_push (vec_cond_rhs);
6909 vec_oprnds2.quick_push (vec_then_clause);
6910 vec_oprnds3.quick_push (vec_else_clause);
6911 }
6912
6913 /* Arguments are ready. Create the new vector stmt. */
6914 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6915 {
6916 vec_cond_rhs = vec_oprnds1[i];
6917 vec_then_clause = vec_oprnds2[i];
6918 vec_else_clause = vec_oprnds3[i];
6919
6920 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6921 vec_cond_lhs, vec_cond_rhs);
6922 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6923 vec_compare, vec_then_clause, vec_else_clause);
6924
6925 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6926 new_temp = make_ssa_name (vec_dest, new_stmt);
6927 gimple_assign_set_lhs (new_stmt, new_temp);
6928 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6929 if (slp_node)
6930 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6931 }
6932
6933 if (slp_node)
6934 continue;
6935
6936 if (j == 0)
6937 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6938 else
6939 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6940
6941 prev_stmt_info = vinfo_for_stmt (new_stmt);
6942 }
6943
6944 vec_oprnds0.release ();
6945 vec_oprnds1.release ();
6946 vec_oprnds2.release ();
6947 vec_oprnds3.release ();
6948
6949 return true;
6950 }
6951
6952
6953 /* Make sure the statement is vectorizable. */
6954
6955 bool
6956 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6957 {
6958 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6959 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6960 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6961 bool ok;
6962 tree scalar_type, vectype;
6963 gimple pattern_stmt;
6964 gimple_seq pattern_def_seq;
6965
6966 if (dump_enabled_p ())
6967 {
6968 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6969 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6970 dump_printf (MSG_NOTE, "\n");
6971 }
6972
6973 if (gimple_has_volatile_ops (stmt))
6974 {
6975 if (dump_enabled_p ())
6976 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6977 "not vectorized: stmt has volatile operands\n");
6978
6979 return false;
6980 }
6981
6982 /* Skip stmts that do not need to be vectorized. In loops this is expected
6983 to include:
6984 - the COND_EXPR which is the loop exit condition
6985 - any LABEL_EXPRs in the loop
6986 - computations that are used only for array indexing or loop control.
6987 In basic blocks we only analyze statements that are a part of some SLP
6988 instance, therefore, all the statements are relevant.
6989
6990 Pattern statement needs to be analyzed instead of the original statement
6991 if the original statement is not relevant. Otherwise, we analyze both
6992 statements. In basic blocks we are called from some SLP instance
6993 traversal, don't analyze pattern stmts instead, the pattern stmts
6994 already will be part of SLP instance. */
6995
6996 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6997 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6998 && !STMT_VINFO_LIVE_P (stmt_info))
6999 {
7000 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7001 && pattern_stmt
7002 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7003 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7004 {
7005 /* Analyze PATTERN_STMT instead of the original stmt. */
7006 stmt = pattern_stmt;
7007 stmt_info = vinfo_for_stmt (pattern_stmt);
7008 if (dump_enabled_p ())
7009 {
7010 dump_printf_loc (MSG_NOTE, vect_location,
7011 "==> examining pattern statement: ");
7012 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7013 dump_printf (MSG_NOTE, "\n");
7014 }
7015 }
7016 else
7017 {
7018 if (dump_enabled_p ())
7019 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7020
7021 return true;
7022 }
7023 }
7024 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7025 && node == NULL
7026 && pattern_stmt
7027 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7028 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7029 {
7030 /* Analyze PATTERN_STMT too. */
7031 if (dump_enabled_p ())
7032 {
7033 dump_printf_loc (MSG_NOTE, vect_location,
7034 "==> examining pattern statement: ");
7035 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7036 dump_printf (MSG_NOTE, "\n");
7037 }
7038
7039 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7040 return false;
7041 }
7042
7043 if (is_pattern_stmt_p (stmt_info)
7044 && node == NULL
7045 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7046 {
7047 gimple_stmt_iterator si;
7048
7049 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7050 {
7051 gimple pattern_def_stmt = gsi_stmt (si);
7052 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7053 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7054 {
7055 /* Analyze def stmt of STMT if it's a pattern stmt. */
7056 if (dump_enabled_p ())
7057 {
7058 dump_printf_loc (MSG_NOTE, vect_location,
7059 "==> examining pattern def statement: ");
7060 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7061 dump_printf (MSG_NOTE, "\n");
7062 }
7063
7064 if (!vect_analyze_stmt (pattern_def_stmt,
7065 need_to_vectorize, node))
7066 return false;
7067 }
7068 }
7069 }
7070
7071 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7072 {
7073 case vect_internal_def:
7074 break;
7075
7076 case vect_reduction_def:
7077 case vect_nested_cycle:
7078 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7079 || relevance == vect_used_in_outer_by_reduction
7080 || relevance == vect_unused_in_scope));
7081 break;
7082
7083 case vect_induction_def:
7084 case vect_constant_def:
7085 case vect_external_def:
7086 case vect_unknown_def_type:
7087 default:
7088 gcc_unreachable ();
7089 }
7090
7091 if (bb_vinfo)
7092 {
7093 gcc_assert (PURE_SLP_STMT (stmt_info));
7094
7095 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7096 if (dump_enabled_p ())
7097 {
7098 dump_printf_loc (MSG_NOTE, vect_location,
7099 "get vectype for scalar type: ");
7100 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7101 dump_printf (MSG_NOTE, "\n");
7102 }
7103
7104 vectype = get_vectype_for_scalar_type (scalar_type);
7105 if (!vectype)
7106 {
7107 if (dump_enabled_p ())
7108 {
7109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7110 "not SLPed: unsupported data-type ");
7111 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7112 scalar_type);
7113 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7114 }
7115 return false;
7116 }
7117
7118 if (dump_enabled_p ())
7119 {
7120 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7121 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7122 dump_printf (MSG_NOTE, "\n");
7123 }
7124
7125 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7126 }
7127
7128 if (STMT_VINFO_RELEVANT_P (stmt_info))
7129 {
7130 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7131 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7132 || (is_gimple_call (stmt)
7133 && gimple_call_lhs (stmt) == NULL_TREE));
7134 *need_to_vectorize = true;
7135 }
7136
7137 ok = true;
7138 if (!bb_vinfo
7139 && (STMT_VINFO_RELEVANT_P (stmt_info)
7140 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7141 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7142 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7143 || vectorizable_shift (stmt, NULL, NULL, NULL)
7144 || vectorizable_operation (stmt, NULL, NULL, NULL)
7145 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7146 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7147 || vectorizable_call (stmt, NULL, NULL, NULL)
7148 || vectorizable_store (stmt, NULL, NULL, NULL)
7149 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7150 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7151 else
7152 {
7153 if (bb_vinfo)
7154 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7155 || vectorizable_conversion (stmt, NULL, NULL, node)
7156 || vectorizable_shift (stmt, NULL, NULL, node)
7157 || vectorizable_operation (stmt, NULL, NULL, node)
7158 || vectorizable_assignment (stmt, NULL, NULL, node)
7159 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7160 || vectorizable_call (stmt, NULL, NULL, node)
7161 || vectorizable_store (stmt, NULL, NULL, node)
7162 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7163 }
7164
7165 if (!ok)
7166 {
7167 if (dump_enabled_p ())
7168 {
7169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7170 "not vectorized: relevant stmt not ");
7171 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7172 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7173 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7174 }
7175
7176 return false;
7177 }
7178
7179 if (bb_vinfo)
7180 return true;
7181
7182 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7183 need extra handling, except for vectorizable reductions. */
7184 if (STMT_VINFO_LIVE_P (stmt_info)
7185 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7186 ok = vectorizable_live_operation (stmt, NULL, NULL);
7187
7188 if (!ok)
7189 {
7190 if (dump_enabled_p ())
7191 {
7192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7193 "not vectorized: live stmt not ");
7194 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7195 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7196 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7197 }
7198
7199 return false;
7200 }
7201
7202 return true;
7203 }
7204
7205
7206 /* Function vect_transform_stmt.
7207
7208 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7209
7210 bool
7211 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7212 bool *grouped_store, slp_tree slp_node,
7213 slp_instance slp_node_instance)
7214 {
7215 bool is_store = false;
7216 gimple vec_stmt = NULL;
7217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7218 bool done;
7219
7220 switch (STMT_VINFO_TYPE (stmt_info))
7221 {
7222 case type_demotion_vec_info_type:
7223 case type_promotion_vec_info_type:
7224 case type_conversion_vec_info_type:
7225 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7226 gcc_assert (done);
7227 break;
7228
7229 case induc_vec_info_type:
7230 gcc_assert (!slp_node);
7231 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7232 gcc_assert (done);
7233 break;
7234
7235 case shift_vec_info_type:
7236 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7237 gcc_assert (done);
7238 break;
7239
7240 case op_vec_info_type:
7241 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7242 gcc_assert (done);
7243 break;
7244
7245 case assignment_vec_info_type:
7246 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7247 gcc_assert (done);
7248 break;
7249
7250 case load_vec_info_type:
7251 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7252 slp_node_instance);
7253 gcc_assert (done);
7254 break;
7255
7256 case store_vec_info_type:
7257 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7258 gcc_assert (done);
7259 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7260 {
7261 /* In case of interleaving, the whole chain is vectorized when the
7262 last store in the chain is reached. Store stmts before the last
7263 one are skipped, and there vec_stmt_info shouldn't be freed
7264 meanwhile. */
7265 *grouped_store = true;
7266 if (STMT_VINFO_VEC_STMT (stmt_info))
7267 is_store = true;
7268 }
7269 else
7270 is_store = true;
7271 break;
7272
7273 case condition_vec_info_type:
7274 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7275 gcc_assert (done);
7276 break;
7277
7278 case call_vec_info_type:
7279 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7280 stmt = gsi_stmt (*gsi);
7281 if (is_gimple_call (stmt)
7282 && gimple_call_internal_p (stmt)
7283 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7284 is_store = true;
7285 break;
7286
7287 case call_simd_clone_vec_info_type:
7288 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7289 stmt = gsi_stmt (*gsi);
7290 break;
7291
7292 case reduc_vec_info_type:
7293 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7294 gcc_assert (done);
7295 break;
7296
7297 default:
7298 if (!STMT_VINFO_LIVE_P (stmt_info))
7299 {
7300 if (dump_enabled_p ())
7301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7302 "stmt not supported.\n");
7303 gcc_unreachable ();
7304 }
7305 }
7306
7307 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7308 is being vectorized, but outside the immediately enclosing loop. */
7309 if (vec_stmt
7310 && STMT_VINFO_LOOP_VINFO (stmt_info)
7311 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7312 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7313 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7314 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7315 || STMT_VINFO_RELEVANT (stmt_info) ==
7316 vect_used_in_outer_by_reduction))
7317 {
7318 struct loop *innerloop = LOOP_VINFO_LOOP (
7319 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7320 imm_use_iterator imm_iter;
7321 use_operand_p use_p;
7322 tree scalar_dest;
7323 gimple exit_phi;
7324
7325 if (dump_enabled_p ())
7326 dump_printf_loc (MSG_NOTE, vect_location,
7327 "Record the vdef for outer-loop vectorization.\n");
7328
7329 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7330 (to be used when vectorizing outer-loop stmts that use the DEF of
7331 STMT). */
7332 if (gimple_code (stmt) == GIMPLE_PHI)
7333 scalar_dest = PHI_RESULT (stmt);
7334 else
7335 scalar_dest = gimple_assign_lhs (stmt);
7336
7337 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7338 {
7339 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7340 {
7341 exit_phi = USE_STMT (use_p);
7342 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7343 }
7344 }
7345 }
7346
7347 /* Handle stmts whose DEF is used outside the loop-nest that is
7348 being vectorized. */
7349 if (STMT_VINFO_LIVE_P (stmt_info)
7350 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7351 {
7352 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7353 gcc_assert (done);
7354 }
7355
7356 if (vec_stmt)
7357 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7358
7359 return is_store;
7360 }
7361
7362
7363 /* Remove a group of stores (for SLP or interleaving), free their
7364 stmt_vec_info. */
7365
7366 void
7367 vect_remove_stores (gimple first_stmt)
7368 {
7369 gimple next = first_stmt;
7370 gimple tmp;
7371 gimple_stmt_iterator next_si;
7372
7373 while (next)
7374 {
7375 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7376
7377 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7378 if (is_pattern_stmt_p (stmt_info))
7379 next = STMT_VINFO_RELATED_STMT (stmt_info);
7380 /* Free the attached stmt_vec_info and remove the stmt. */
7381 next_si = gsi_for_stmt (next);
7382 unlink_stmt_vdef (next);
7383 gsi_remove (&next_si, true);
7384 release_defs (next);
7385 free_stmt_vec_info (next);
7386 next = tmp;
7387 }
7388 }
7389
7390
7391 /* Function new_stmt_vec_info.
7392
7393 Create and initialize a new stmt_vec_info struct for STMT. */
7394
7395 stmt_vec_info
7396 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7397 bb_vec_info bb_vinfo)
7398 {
7399 stmt_vec_info res;
7400 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7401
7402 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7403 STMT_VINFO_STMT (res) = stmt;
7404 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7405 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7406 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7407 STMT_VINFO_LIVE_P (res) = false;
7408 STMT_VINFO_VECTYPE (res) = NULL;
7409 STMT_VINFO_VEC_STMT (res) = NULL;
7410 STMT_VINFO_VECTORIZABLE (res) = true;
7411 STMT_VINFO_IN_PATTERN_P (res) = false;
7412 STMT_VINFO_RELATED_STMT (res) = NULL;
7413 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7414 STMT_VINFO_DATA_REF (res) = NULL;
7415
7416 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7417 STMT_VINFO_DR_OFFSET (res) = NULL;
7418 STMT_VINFO_DR_INIT (res) = NULL;
7419 STMT_VINFO_DR_STEP (res) = NULL;
7420 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7421
7422 if (gimple_code (stmt) == GIMPLE_PHI
7423 && is_loop_header_bb_p (gimple_bb (stmt)))
7424 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7425 else
7426 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7427
7428 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7429 STMT_SLP_TYPE (res) = loop_vect;
7430 GROUP_FIRST_ELEMENT (res) = NULL;
7431 GROUP_NEXT_ELEMENT (res) = NULL;
7432 GROUP_SIZE (res) = 0;
7433 GROUP_STORE_COUNT (res) = 0;
7434 GROUP_GAP (res) = 0;
7435 GROUP_SAME_DR_STMT (res) = NULL;
7436
7437 return res;
7438 }
7439
7440
7441 /* Create a hash table for stmt_vec_info. */
7442
7443 void
7444 init_stmt_vec_info_vec (void)
7445 {
7446 gcc_assert (!stmt_vec_info_vec.exists ());
7447 stmt_vec_info_vec.create (50);
7448 }
7449
7450
7451 /* Free hash table for stmt_vec_info. */
7452
7453 void
7454 free_stmt_vec_info_vec (void)
7455 {
7456 unsigned int i;
7457 vec_void_p info;
7458 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7459 if (info != NULL)
7460 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7461 gcc_assert (stmt_vec_info_vec.exists ());
7462 stmt_vec_info_vec.release ();
7463 }
7464
7465
7466 /* Free stmt vectorization related info. */
7467
7468 void
7469 free_stmt_vec_info (gimple stmt)
7470 {
7471 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7472
7473 if (!stmt_info)
7474 return;
7475
7476 /* Check if this statement has a related "pattern stmt"
7477 (introduced by the vectorizer during the pattern recognition
7478 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7479 too. */
7480 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7481 {
7482 stmt_vec_info patt_info
7483 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7484 if (patt_info)
7485 {
7486 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7487 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7488 gimple_set_bb (patt_stmt, NULL);
7489 tree lhs = gimple_get_lhs (patt_stmt);
7490 if (TREE_CODE (lhs) == SSA_NAME)
7491 release_ssa_name (lhs);
7492 if (seq)
7493 {
7494 gimple_stmt_iterator si;
7495 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7496 {
7497 gimple seq_stmt = gsi_stmt (si);
7498 gimple_set_bb (seq_stmt, NULL);
7499 lhs = gimple_get_lhs (patt_stmt);
7500 if (TREE_CODE (lhs) == SSA_NAME)
7501 release_ssa_name (lhs);
7502 free_stmt_vec_info (seq_stmt);
7503 }
7504 }
7505 free_stmt_vec_info (patt_stmt);
7506 }
7507 }
7508
7509 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7510 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7511 set_vinfo_for_stmt (stmt, NULL);
7512 free (stmt_info);
7513 }
7514
7515
7516 /* Function get_vectype_for_scalar_type_and_size.
7517
7518 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7519 by the target. */
7520
7521 static tree
7522 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7523 {
7524 machine_mode inner_mode = TYPE_MODE (scalar_type);
7525 machine_mode simd_mode;
7526 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7527 int nunits;
7528 tree vectype;
7529
7530 if (nbytes == 0)
7531 return NULL_TREE;
7532
7533 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7534 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7535 return NULL_TREE;
7536
7537 /* For vector types of elements whose mode precision doesn't
7538 match their types precision we use a element type of mode
7539 precision. The vectorization routines will have to make sure
7540 they support the proper result truncation/extension.
7541 We also make sure to build vector types with INTEGER_TYPE
7542 component type only. */
7543 if (INTEGRAL_TYPE_P (scalar_type)
7544 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7545 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7546 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7547 TYPE_UNSIGNED (scalar_type));
7548
7549 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7550 When the component mode passes the above test simply use a type
7551 corresponding to that mode. The theory is that any use that
7552 would cause problems with this will disable vectorization anyway. */
7553 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7554 && !INTEGRAL_TYPE_P (scalar_type))
7555 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7556
7557 /* We can't build a vector type of elements with alignment bigger than
7558 their size. */
7559 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7560 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7561 TYPE_UNSIGNED (scalar_type));
7562
7563 /* If we felt back to using the mode fail if there was
7564 no scalar type for it. */
7565 if (scalar_type == NULL_TREE)
7566 return NULL_TREE;
7567
7568 /* If no size was supplied use the mode the target prefers. Otherwise
7569 lookup a vector mode of the specified size. */
7570 if (size == 0)
7571 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7572 else
7573 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7574 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7575 if (nunits <= 1)
7576 return NULL_TREE;
7577
7578 vectype = build_vector_type (scalar_type, nunits);
7579
7580 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7581 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7582 return NULL_TREE;
7583
7584 return vectype;
7585 }
7586
7587 unsigned int current_vector_size;
7588
7589 /* Function get_vectype_for_scalar_type.
7590
7591 Returns the vector type corresponding to SCALAR_TYPE as supported
7592 by the target. */
7593
7594 tree
7595 get_vectype_for_scalar_type (tree scalar_type)
7596 {
7597 tree vectype;
7598 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7599 current_vector_size);
7600 if (vectype
7601 && current_vector_size == 0)
7602 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7603 return vectype;
7604 }
7605
7606 /* Function get_same_sized_vectype
7607
7608 Returns a vector type corresponding to SCALAR_TYPE of size
7609 VECTOR_TYPE if supported by the target. */
7610
7611 tree
7612 get_same_sized_vectype (tree scalar_type, tree vector_type)
7613 {
7614 return get_vectype_for_scalar_type_and_size
7615 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7616 }
7617
7618 /* Function vect_is_simple_use.
7619
7620 Input:
7621 LOOP_VINFO - the vect info of the loop that is being vectorized.
7622 BB_VINFO - the vect info of the basic block that is being vectorized.
7623 OPERAND - operand of STMT in the loop or bb.
7624 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7625
7626 Returns whether a stmt with OPERAND can be vectorized.
7627 For loops, supportable operands are constants, loop invariants, and operands
7628 that are defined by the current iteration of the loop. Unsupportable
7629 operands are those that are defined by a previous iteration of the loop (as
7630 is the case in reduction/induction computations).
7631 For basic blocks, supportable operands are constants and bb invariants.
7632 For now, operands defined outside the basic block are not supported. */
7633
7634 bool
7635 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7636 bb_vec_info bb_vinfo, gimple *def_stmt,
7637 tree *def, enum vect_def_type *dt)
7638 {
7639 basic_block bb;
7640 stmt_vec_info stmt_vinfo;
7641 struct loop *loop = NULL;
7642
7643 if (loop_vinfo)
7644 loop = LOOP_VINFO_LOOP (loop_vinfo);
7645
7646 *def_stmt = NULL;
7647 *def = NULL_TREE;
7648
7649 if (dump_enabled_p ())
7650 {
7651 dump_printf_loc (MSG_NOTE, vect_location,
7652 "vect_is_simple_use: operand ");
7653 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7654 dump_printf (MSG_NOTE, "\n");
7655 }
7656
7657 if (CONSTANT_CLASS_P (operand))
7658 {
7659 *dt = vect_constant_def;
7660 return true;
7661 }
7662
7663 if (is_gimple_min_invariant (operand))
7664 {
7665 *def = operand;
7666 *dt = vect_external_def;
7667 return true;
7668 }
7669
7670 if (TREE_CODE (operand) == PAREN_EXPR)
7671 {
7672 if (dump_enabled_p ())
7673 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7674 operand = TREE_OPERAND (operand, 0);
7675 }
7676
7677 if (TREE_CODE (operand) != SSA_NAME)
7678 {
7679 if (dump_enabled_p ())
7680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7681 "not ssa-name.\n");
7682 return false;
7683 }
7684
7685 *def_stmt = SSA_NAME_DEF_STMT (operand);
7686 if (*def_stmt == NULL)
7687 {
7688 if (dump_enabled_p ())
7689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7690 "no def_stmt.\n");
7691 return false;
7692 }
7693
7694 if (dump_enabled_p ())
7695 {
7696 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7697 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7698 dump_printf (MSG_NOTE, "\n");
7699 }
7700
7701 /* Empty stmt is expected only in case of a function argument.
7702 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7703 if (gimple_nop_p (*def_stmt))
7704 {
7705 *def = operand;
7706 *dt = vect_external_def;
7707 return true;
7708 }
7709
7710 bb = gimple_bb (*def_stmt);
7711
7712 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7713 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7714 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7715 *dt = vect_external_def;
7716 else
7717 {
7718 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7719 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7720 }
7721
7722 if (*dt == vect_unknown_def_type
7723 || (stmt
7724 && *dt == vect_double_reduction_def
7725 && gimple_code (stmt) != GIMPLE_PHI))
7726 {
7727 if (dump_enabled_p ())
7728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7729 "Unsupported pattern.\n");
7730 return false;
7731 }
7732
7733 if (dump_enabled_p ())
7734 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7735
7736 switch (gimple_code (*def_stmt))
7737 {
7738 case GIMPLE_PHI:
7739 *def = gimple_phi_result (*def_stmt);
7740 break;
7741
7742 case GIMPLE_ASSIGN:
7743 *def = gimple_assign_lhs (*def_stmt);
7744 break;
7745
7746 case GIMPLE_CALL:
7747 *def = gimple_call_lhs (*def_stmt);
7748 if (*def != NULL)
7749 break;
7750 /* FALLTHRU */
7751 default:
7752 if (dump_enabled_p ())
7753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7754 "unsupported defining stmt:\n");
7755 return false;
7756 }
7757
7758 return true;
7759 }
7760
7761 /* Function vect_is_simple_use_1.
7762
7763 Same as vect_is_simple_use_1 but also determines the vector operand
7764 type of OPERAND and stores it to *VECTYPE. If the definition of
7765 OPERAND is vect_uninitialized_def, vect_constant_def or
7766 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7767 is responsible to compute the best suited vector type for the
7768 scalar operand. */
7769
7770 bool
7771 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7772 bb_vec_info bb_vinfo, gimple *def_stmt,
7773 tree *def, enum vect_def_type *dt, tree *vectype)
7774 {
7775 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7776 def, dt))
7777 return false;
7778
7779 /* Now get a vector type if the def is internal, otherwise supply
7780 NULL_TREE and leave it up to the caller to figure out a proper
7781 type for the use stmt. */
7782 if (*dt == vect_internal_def
7783 || *dt == vect_induction_def
7784 || *dt == vect_reduction_def
7785 || *dt == vect_double_reduction_def
7786 || *dt == vect_nested_cycle)
7787 {
7788 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7789
7790 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7791 && !STMT_VINFO_RELEVANT (stmt_info)
7792 && !STMT_VINFO_LIVE_P (stmt_info))
7793 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7794
7795 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7796 gcc_assert (*vectype != NULL_TREE);
7797 }
7798 else if (*dt == vect_uninitialized_def
7799 || *dt == vect_constant_def
7800 || *dt == vect_external_def)
7801 *vectype = NULL_TREE;
7802 else
7803 gcc_unreachable ();
7804
7805 return true;
7806 }
7807
7808
7809 /* Function supportable_widening_operation
7810
7811 Check whether an operation represented by the code CODE is a
7812 widening operation that is supported by the target platform in
7813 vector form (i.e., when operating on arguments of type VECTYPE_IN
7814 producing a result of type VECTYPE_OUT).
7815
7816 Widening operations we currently support are NOP (CONVERT), FLOAT
7817 and WIDEN_MULT. This function checks if these operations are supported
7818 by the target platform either directly (via vector tree-codes), or via
7819 target builtins.
7820
7821 Output:
7822 - CODE1 and CODE2 are codes of vector operations to be used when
7823 vectorizing the operation, if available.
7824 - MULTI_STEP_CVT determines the number of required intermediate steps in
7825 case of multi-step conversion (like char->short->int - in that case
7826 MULTI_STEP_CVT will be 1).
7827 - INTERM_TYPES contains the intermediate type required to perform the
7828 widening operation (short in the above example). */
7829
7830 bool
7831 supportable_widening_operation (enum tree_code code, gimple stmt,
7832 tree vectype_out, tree vectype_in,
7833 enum tree_code *code1, enum tree_code *code2,
7834 int *multi_step_cvt,
7835 vec<tree> *interm_types)
7836 {
7837 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7838 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7839 struct loop *vect_loop = NULL;
7840 machine_mode vec_mode;
7841 enum insn_code icode1, icode2;
7842 optab optab1, optab2;
7843 tree vectype = vectype_in;
7844 tree wide_vectype = vectype_out;
7845 enum tree_code c1, c2;
7846 int i;
7847 tree prev_type, intermediate_type;
7848 machine_mode intermediate_mode, prev_mode;
7849 optab optab3, optab4;
7850
7851 *multi_step_cvt = 0;
7852 if (loop_info)
7853 vect_loop = LOOP_VINFO_LOOP (loop_info);
7854
7855 switch (code)
7856 {
7857 case WIDEN_MULT_EXPR:
7858 /* The result of a vectorized widening operation usually requires
7859 two vectors (because the widened results do not fit into one vector).
7860 The generated vector results would normally be expected to be
7861 generated in the same order as in the original scalar computation,
7862 i.e. if 8 results are generated in each vector iteration, they are
7863 to be organized as follows:
7864 vect1: [res1,res2,res3,res4],
7865 vect2: [res5,res6,res7,res8].
7866
7867 However, in the special case that the result of the widening
7868 operation is used in a reduction computation only, the order doesn't
7869 matter (because when vectorizing a reduction we change the order of
7870 the computation). Some targets can take advantage of this and
7871 generate more efficient code. For example, targets like Altivec,
7872 that support widen_mult using a sequence of {mult_even,mult_odd}
7873 generate the following vectors:
7874 vect1: [res1,res3,res5,res7],
7875 vect2: [res2,res4,res6,res8].
7876
7877 When vectorizing outer-loops, we execute the inner-loop sequentially
7878 (each vectorized inner-loop iteration contributes to VF outer-loop
7879 iterations in parallel). We therefore don't allow to change the
7880 order of the computation in the inner-loop during outer-loop
7881 vectorization. */
7882 /* TODO: Another case in which order doesn't *really* matter is when we
7883 widen and then contract again, e.g. (short)((int)x * y >> 8).
7884 Normally, pack_trunc performs an even/odd permute, whereas the
7885 repack from an even/odd expansion would be an interleave, which
7886 would be significantly simpler for e.g. AVX2. */
7887 /* In any case, in order to avoid duplicating the code below, recurse
7888 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7889 are properly set up for the caller. If we fail, we'll continue with
7890 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7891 if (vect_loop
7892 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7893 && !nested_in_vect_loop_p (vect_loop, stmt)
7894 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7895 stmt, vectype_out, vectype_in,
7896 code1, code2, multi_step_cvt,
7897 interm_types))
7898 {
7899 /* Elements in a vector with vect_used_by_reduction property cannot
7900 be reordered if the use chain with this property does not have the
7901 same operation. One such an example is s += a * b, where elements
7902 in a and b cannot be reordered. Here we check if the vector defined
7903 by STMT is only directly used in the reduction statement. */
7904 tree lhs = gimple_assign_lhs (stmt);
7905 use_operand_p dummy;
7906 gimple use_stmt;
7907 stmt_vec_info use_stmt_info = NULL;
7908 if (single_imm_use (lhs, &dummy, &use_stmt)
7909 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7910 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7911 return true;
7912 }
7913 c1 = VEC_WIDEN_MULT_LO_EXPR;
7914 c2 = VEC_WIDEN_MULT_HI_EXPR;
7915 break;
7916
7917 case VEC_WIDEN_MULT_EVEN_EXPR:
7918 /* Support the recursion induced just above. */
7919 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7920 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7921 break;
7922
7923 case WIDEN_LSHIFT_EXPR:
7924 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7925 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7926 break;
7927
7928 CASE_CONVERT:
7929 c1 = VEC_UNPACK_LO_EXPR;
7930 c2 = VEC_UNPACK_HI_EXPR;
7931 break;
7932
7933 case FLOAT_EXPR:
7934 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7935 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7936 break;
7937
7938 case FIX_TRUNC_EXPR:
7939 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7940 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7941 computing the operation. */
7942 return false;
7943
7944 default:
7945 gcc_unreachable ();
7946 }
7947
7948 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7949 {
7950 enum tree_code ctmp = c1;
7951 c1 = c2;
7952 c2 = ctmp;
7953 }
7954
7955 if (code == FIX_TRUNC_EXPR)
7956 {
7957 /* The signedness is determined from output operand. */
7958 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7959 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7960 }
7961 else
7962 {
7963 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7964 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7965 }
7966
7967 if (!optab1 || !optab2)
7968 return false;
7969
7970 vec_mode = TYPE_MODE (vectype);
7971 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7972 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7973 return false;
7974
7975 *code1 = c1;
7976 *code2 = c2;
7977
7978 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7979 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7980 return true;
7981
7982 /* Check if it's a multi-step conversion that can be done using intermediate
7983 types. */
7984
7985 prev_type = vectype;
7986 prev_mode = vec_mode;
7987
7988 if (!CONVERT_EXPR_CODE_P (code))
7989 return false;
7990
7991 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7992 intermediate steps in promotion sequence. We try
7993 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7994 not. */
7995 interm_types->create (MAX_INTERM_CVT_STEPS);
7996 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7997 {
7998 intermediate_mode = insn_data[icode1].operand[0].mode;
7999 intermediate_type
8000 = lang_hooks.types.type_for_mode (intermediate_mode,
8001 TYPE_UNSIGNED (prev_type));
8002 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8003 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8004
8005 if (!optab3 || !optab4
8006 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8007 || insn_data[icode1].operand[0].mode != intermediate_mode
8008 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8009 || insn_data[icode2].operand[0].mode != intermediate_mode
8010 || ((icode1 = optab_handler (optab3, intermediate_mode))
8011 == CODE_FOR_nothing)
8012 || ((icode2 = optab_handler (optab4, intermediate_mode))
8013 == CODE_FOR_nothing))
8014 break;
8015
8016 interm_types->quick_push (intermediate_type);
8017 (*multi_step_cvt)++;
8018
8019 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8020 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8021 return true;
8022
8023 prev_type = intermediate_type;
8024 prev_mode = intermediate_mode;
8025 }
8026
8027 interm_types->release ();
8028 return false;
8029 }
8030
8031
8032 /* Function supportable_narrowing_operation
8033
8034 Check whether an operation represented by the code CODE is a
8035 narrowing operation that is supported by the target platform in
8036 vector form (i.e., when operating on arguments of type VECTYPE_IN
8037 and producing a result of type VECTYPE_OUT).
8038
8039 Narrowing operations we currently support are NOP (CONVERT) and
8040 FIX_TRUNC. This function checks if these operations are supported by
8041 the target platform directly via vector tree-codes.
8042
8043 Output:
8044 - CODE1 is the code of a vector operation to be used when
8045 vectorizing the operation, if available.
8046 - MULTI_STEP_CVT determines the number of required intermediate steps in
8047 case of multi-step conversion (like int->short->char - in that case
8048 MULTI_STEP_CVT will be 1).
8049 - INTERM_TYPES contains the intermediate type required to perform the
8050 narrowing operation (short in the above example). */
8051
8052 bool
8053 supportable_narrowing_operation (enum tree_code code,
8054 tree vectype_out, tree vectype_in,
8055 enum tree_code *code1, int *multi_step_cvt,
8056 vec<tree> *interm_types)
8057 {
8058 machine_mode vec_mode;
8059 enum insn_code icode1;
8060 optab optab1, interm_optab;
8061 tree vectype = vectype_in;
8062 tree narrow_vectype = vectype_out;
8063 enum tree_code c1;
8064 tree intermediate_type;
8065 machine_mode intermediate_mode, prev_mode;
8066 int i;
8067 bool uns;
8068
8069 *multi_step_cvt = 0;
8070 switch (code)
8071 {
8072 CASE_CONVERT:
8073 c1 = VEC_PACK_TRUNC_EXPR;
8074 break;
8075
8076 case FIX_TRUNC_EXPR:
8077 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8078 break;
8079
8080 case FLOAT_EXPR:
8081 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8082 tree code and optabs used for computing the operation. */
8083 return false;
8084
8085 default:
8086 gcc_unreachable ();
8087 }
8088
8089 if (code == FIX_TRUNC_EXPR)
8090 /* The signedness is determined from output operand. */
8091 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8092 else
8093 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8094
8095 if (!optab1)
8096 return false;
8097
8098 vec_mode = TYPE_MODE (vectype);
8099 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8100 return false;
8101
8102 *code1 = c1;
8103
8104 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8105 return true;
8106
8107 /* Check if it's a multi-step conversion that can be done using intermediate
8108 types. */
8109 prev_mode = vec_mode;
8110 if (code == FIX_TRUNC_EXPR)
8111 uns = TYPE_UNSIGNED (vectype_out);
8112 else
8113 uns = TYPE_UNSIGNED (vectype);
8114
8115 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8116 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8117 costly than signed. */
8118 if (code == FIX_TRUNC_EXPR && uns)
8119 {
8120 enum insn_code icode2;
8121
8122 intermediate_type
8123 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8124 interm_optab
8125 = optab_for_tree_code (c1, intermediate_type, optab_default);
8126 if (interm_optab != unknown_optab
8127 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8128 && insn_data[icode1].operand[0].mode
8129 == insn_data[icode2].operand[0].mode)
8130 {
8131 uns = false;
8132 optab1 = interm_optab;
8133 icode1 = icode2;
8134 }
8135 }
8136
8137 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8138 intermediate steps in promotion sequence. We try
8139 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8140 interm_types->create (MAX_INTERM_CVT_STEPS);
8141 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8142 {
8143 intermediate_mode = insn_data[icode1].operand[0].mode;
8144 intermediate_type
8145 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8146 interm_optab
8147 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8148 optab_default);
8149 if (!interm_optab
8150 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8151 || insn_data[icode1].operand[0].mode != intermediate_mode
8152 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8153 == CODE_FOR_nothing))
8154 break;
8155
8156 interm_types->quick_push (intermediate_type);
8157 (*multi_step_cvt)++;
8158
8159 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8160 return true;
8161
8162 prev_mode = intermediate_mode;
8163 optab1 = interm_optab;
8164 }
8165
8166 interm_types->release ();
8167 return false;
8168 }
This page took 0.402735 seconds and 5 git commands to generate.