]> gcc.gnu.org Git - gcc.git/blame - gcc/omp-expand.c
omp-low.c (lower_rec_simd_input_clauses): Set TREE_THIS_NOTRAP on ivar and lvar.
[gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
a5544970 5Copyright (C) 2005-2019 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
629b3d75
MJ
56#include "gomp-constants.h"
57#include "gimple-pretty-print.h"
13293add 58#include "hsa-common.h"
314e6352
ML
59#include "stringpool.h"
60#include "attribs.h"
629b3d75
MJ
61
62/* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
65
66struct omp_region
67{
68 /* The enclosing region. */
69 struct omp_region *outer;
70
71 /* First child region. */
72 struct omp_region *inner;
73
74 /* Next peer region. */
75 struct omp_region *next;
76
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
79
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
82
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
85
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
90
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
93
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
96
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
99
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
102
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
106};
107
108static struct omp_region *root_omp_region;
109static bool omp_any_child_fn_dumped;
110
111static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113static gphi *find_phi_with_arg_on_edge (tree, edge);
114static void expand_omp (struct omp_region *region);
115
116/* Return true if REGION is a combined parallel+workshare region. */
117
118static inline bool
119is_combined_parallel (struct omp_region *region)
120{
121 return region->is_combined_parallel;
122}
123
124/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
128
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
134
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
137
138 Is lowered into:
139
01914336 140 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
143
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
149
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
154
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
160
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
165
166static bool
167workshare_safe_to_combine_p (basic_block ws_entry_bb)
168{
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
171
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
174
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
28567c40
JJ
176 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
177 return false;
629b3d75
MJ
178
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
185
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
196
197 return true;
198}
199
200/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
202
203static tree
204omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205{
28567c40 206 if (!simd_schedule || integer_zerop (chunk_size))
629b3d75
MJ
207 return chunk_size;
208
9d2f08ab
RS
209 poly_uint64 vf = omp_max_vf ();
210 if (known_eq (vf, 1U))
629b3d75
MJ
211 return chunk_size;
212
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
218}
219
220/* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
223
224static vec<tree, va_gc> *
225get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226{
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
230
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232 {
233 struct omp_for_data fd;
234 tree n1, n2;
235
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
239
240 if (gimple_omp_for_combined_into_p (for_stmt))
241 {
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
251 }
252
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
257
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
260
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
263
264 if (fd.chunk_size)
265 {
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
269 }
270
271 return ws_args;
272 }
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274 {
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
283 }
284
285 gcc_unreachable ();
286}
287
288/* Discover whether REGION is a combined parallel+workshare region. */
289
290static void
291determine_parallel_type (struct omp_region *region)
292{
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
295
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
300
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
306
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
313
28567c40
JJ
314 /* Give up for task reductions on the parallel, while it is implementable,
315 adding another big set of APIs or slowing down the normal paths is
316 not acceptable. */
317 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
318 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
319 return;
320
629b3d75
MJ
321 if (single_succ (par_entry_bb) == ws_entry_bb
322 && single_succ (ws_exit_bb) == par_exit_bb
323 && workshare_safe_to_combine_p (ws_entry_bb)
324 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
325 || (last_and_only_stmt (ws_entry_bb)
326 && last_and_only_stmt (par_exit_bb))))
327 {
328 gimple *par_stmt = last_stmt (par_entry_bb);
329 gimple *ws_stmt = last_stmt (ws_entry_bb);
330
331 if (region->inner->type == GIMPLE_OMP_FOR)
332 {
333 /* If this is a combined parallel loop, we need to determine
334 whether or not to use the combined library calls. There
335 are two cases where we do not apply the transformation:
336 static loops and any kind of ordered loop. In the first
337 case, we already open code the loop so there is no need
338 to do anything else. In the latter case, the combined
339 parallel loop call would still need extra synchronization
340 to implement ordered semantics, so there would not be any
341 gain in using the combined call. */
342 tree clauses = gimple_omp_for_clauses (ws_stmt);
343 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
344 if (c == NULL
345 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
346 == OMP_CLAUSE_SCHEDULE_STATIC)
28567c40 347 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
6c7ae8c5 348 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
8221c30b
JJ
349 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
350 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
28567c40 351 return;
629b3d75 352 }
28567c40 353 else if (region->inner->type == GIMPLE_OMP_SECTIONS
8221c30b
JJ
354 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
355 OMP_CLAUSE__REDUCTEMP_)
356 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
357 OMP_CLAUSE__CONDTEMP_)))
28567c40 358 return;
629b3d75
MJ
359
360 region->is_combined_parallel = true;
361 region->inner->is_combined_parallel = true;
362 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
363 }
364}
365
366/* Debugging dumps for parallel regions. */
367void dump_omp_region (FILE *, struct omp_region *, int);
368void debug_omp_region (struct omp_region *);
369void debug_all_omp_regions (void);
370
371/* Dump the parallel region tree rooted at REGION. */
372
373void
374dump_omp_region (FILE *file, struct omp_region *region, int indent)
375{
376 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
377 gimple_code_name[region->type]);
378
379 if (region->inner)
380 dump_omp_region (file, region->inner, indent + 4);
381
382 if (region->cont)
383 {
384 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
385 region->cont->index);
386 }
387
388 if (region->exit)
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
390 region->exit->index);
391 else
392 fprintf (file, "%*s[no exit marker]\n", indent, "");
393
394 if (region->next)
395 dump_omp_region (file, region->next, indent);
396}
397
398DEBUG_FUNCTION void
399debug_omp_region (struct omp_region *region)
400{
401 dump_omp_region (stderr, region, 0);
402}
403
404DEBUG_FUNCTION void
405debug_all_omp_regions (void)
406{
407 dump_omp_region (stderr, root_omp_region, 0);
408}
409
410/* Create a new parallel region starting at STMT inside region PARENT. */
411
412static struct omp_region *
413new_omp_region (basic_block bb, enum gimple_code type,
414 struct omp_region *parent)
415{
416 struct omp_region *region = XCNEW (struct omp_region);
417
418 region->outer = parent;
419 region->entry = bb;
420 region->type = type;
421
422 if (parent)
423 {
424 /* This is a nested region. Add it to the list of inner
425 regions in PARENT. */
426 region->next = parent->inner;
427 parent->inner = region;
428 }
429 else
430 {
431 /* This is a toplevel region. Add it to the list of toplevel
432 regions in ROOT_OMP_REGION. */
433 region->next = root_omp_region;
434 root_omp_region = region;
435 }
436
437 return region;
438}
439
440/* Release the memory associated with the region tree rooted at REGION. */
441
442static void
443free_omp_region_1 (struct omp_region *region)
444{
445 struct omp_region *i, *n;
446
447 for (i = region->inner; i ; i = n)
448 {
449 n = i->next;
450 free_omp_region_1 (i);
451 }
452
453 free (region);
454}
455
456/* Release the memory for the entire omp region tree. */
457
458void
459omp_free_regions (void)
460{
461 struct omp_region *r, *n;
462 for (r = root_omp_region; r ; r = n)
463 {
464 n = r->next;
465 free_omp_region_1 (r);
466 }
467 root_omp_region = NULL;
468}
469
470/* A convenience function to build an empty GIMPLE_COND with just the
471 condition. */
472
473static gcond *
474gimple_build_cond_empty (tree cond)
475{
476 enum tree_code pred_code;
477 tree lhs, rhs;
478
479 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
480 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
481}
482
483/* Return true if a parallel REGION is within a declare target function or
484 within a target region and is not a part of a gridified target. */
485
486static bool
487parallel_needs_hsa_kernel_p (struct omp_region *region)
488{
489 bool indirect = false;
490 for (region = region->outer; region; region = region->outer)
491 {
492 if (region->type == GIMPLE_OMP_PARALLEL)
493 indirect = true;
494 else if (region->type == GIMPLE_OMP_TARGET)
495 {
496 gomp_target *tgt_stmt
497 = as_a <gomp_target *> (last_stmt (region->entry));
498
499 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
500 OMP_CLAUSE__GRIDDIM_))
501 return indirect;
502 else
503 return true;
504 }
505 }
506
507 if (lookup_attribute ("omp declare target",
508 DECL_ATTRIBUTES (current_function_decl)))
509 return true;
510
511 return false;
512}
513
623c6df5
KB
514/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
515 Add CHILD_FNDECL to decl chain of the supercontext of the block
516 ENTRY_BLOCK - this is the block which originally contained the
517 code from which CHILD_FNDECL was created.
518
519 Together, these actions ensure that the debug info for the outlined
520 function will be emitted with the correct lexical scope. */
521
522static void
4ccc4e30
JJ
523adjust_context_and_scope (struct omp_region *region, tree entry_block,
524 tree child_fndecl)
623c6df5 525{
4ccc4e30
JJ
526 tree parent_fndecl = NULL_TREE;
527 gimple *entry_stmt;
528 /* OMP expansion expands inner regions before outer ones, so if
529 we e.g. have explicit task region nested in parallel region, when
530 expanding the task region current_function_decl will be the original
531 source function, but we actually want to use as context the child
532 function of the parallel. */
533 for (region = region->outer;
534 region && parent_fndecl == NULL_TREE; region = region->outer)
535 switch (region->type)
536 {
537 case GIMPLE_OMP_PARALLEL:
538 case GIMPLE_OMP_TASK:
539 case GIMPLE_OMP_TEAMS:
540 entry_stmt = last_stmt (region->entry);
541 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
542 break;
543 case GIMPLE_OMP_TARGET:
544 entry_stmt = last_stmt (region->entry);
545 parent_fndecl
546 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
547 break;
548 default:
549 break;
550 }
551
552 if (parent_fndecl == NULL_TREE)
553 parent_fndecl = current_function_decl;
554 DECL_CONTEXT (child_fndecl) = parent_fndecl;
555
623c6df5
KB
556 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
557 {
558 tree b = BLOCK_SUPERCONTEXT (entry_block);
623c6df5
KB
559 if (TREE_CODE (b) == BLOCK)
560 {
623c6df5
KB
561 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
562 BLOCK_VARS (b) = child_fndecl;
563 }
564 }
565}
566
28567c40 567/* Build the function calls to GOMP_parallel etc to actually
629b3d75
MJ
568 generate the parallel operation. REGION is the parallel region
569 being expanded. BB is the block where to insert the code. WS_ARGS
570 will be set if this is a call to a combined parallel+workshare
571 construct, it contains the list of additional arguments needed by
572 the workshare construct. */
573
574static void
575expand_parallel_call (struct omp_region *region, basic_block bb,
576 gomp_parallel *entry_stmt,
577 vec<tree, va_gc> *ws_args)
578{
579 tree t, t1, t2, val, cond, c, clauses, flags;
580 gimple_stmt_iterator gsi;
581 gimple *stmt;
582 enum built_in_function start_ix;
583 int start_ix2;
584 location_t clause_loc;
585 vec<tree, va_gc> *args;
586
587 clauses = gimple_omp_parallel_clauses (entry_stmt);
588
589 /* Determine what flavor of GOMP_parallel we will be
590 emitting. */
591 start_ix = BUILT_IN_GOMP_PARALLEL;
28567c40
JJ
592 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
593 if (rtmp)
594 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
595 else if (is_combined_parallel (region))
629b3d75
MJ
596 {
597 switch (region->inner->type)
598 {
599 case GIMPLE_OMP_FOR:
600 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
601 switch (region->inner->sched_kind)
602 {
603 case OMP_CLAUSE_SCHEDULE_RUNTIME:
28567c40
JJ
604 if ((region->inner->sched_modifiers
605 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
606 start_ix2 = 6;
607 else if ((region->inner->sched_modifiers
608 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
609 start_ix2 = 7;
610 else
611 start_ix2 = 3;
629b3d75
MJ
612 break;
613 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
614 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40
JJ
615 if ((region->inner->sched_modifiers
616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
629b3d75
MJ
617 {
618 start_ix2 = 3 + region->inner->sched_kind;
619 break;
620 }
621 /* FALLTHRU */
622 default:
623 start_ix2 = region->inner->sched_kind;
624 break;
625 }
626 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
627 start_ix = (enum built_in_function) start_ix2;
628 break;
629 case GIMPLE_OMP_SECTIONS:
630 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
631 break;
632 default:
633 gcc_unreachable ();
634 }
635 }
636
637 /* By default, the value of NUM_THREADS is zero (selected at run time)
638 and there is no conditional. */
639 cond = NULL_TREE;
640 val = build_int_cst (unsigned_type_node, 0);
641 flags = build_int_cst (unsigned_type_node, 0);
642
643 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
644 if (c)
645 cond = OMP_CLAUSE_IF_EXPR (c);
646
647 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
648 if (c)
649 {
650 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
651 clause_loc = OMP_CLAUSE_LOCATION (c);
652 }
653 else
654 clause_loc = gimple_location (entry_stmt);
655
656 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
657 if (c)
658 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
659
660 /* Ensure 'val' is of the correct type. */
661 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
662
663 /* If we found the clause 'if (cond)', build either
664 (cond != 0) or (cond ? val : 1u). */
665 if (cond)
666 {
667 cond = gimple_boolify (cond);
668
669 if (integer_zerop (val))
670 val = fold_build2_loc (clause_loc,
671 EQ_EXPR, unsigned_type_node, cond,
672 build_int_cst (TREE_TYPE (cond), 0));
673 else
674 {
675 basic_block cond_bb, then_bb, else_bb;
676 edge e, e_then, e_else;
677 tree tmp_then, tmp_else, tmp_join, tmp_var;
678
679 tmp_var = create_tmp_var (TREE_TYPE (val));
680 if (gimple_in_ssa_p (cfun))
681 {
682 tmp_then = make_ssa_name (tmp_var);
683 tmp_else = make_ssa_name (tmp_var);
684 tmp_join = make_ssa_name (tmp_var);
685 }
686 else
687 {
688 tmp_then = tmp_var;
689 tmp_else = tmp_var;
690 tmp_join = tmp_var;
691 }
692
693 e = split_block_after_labels (bb);
694 cond_bb = e->src;
695 bb = e->dest;
696 remove_edge (e);
697
698 then_bb = create_empty_bb (cond_bb);
699 else_bb = create_empty_bb (then_bb);
700 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
701 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
702
703 stmt = gimple_build_cond_empty (cond);
704 gsi = gsi_start_bb (cond_bb);
705 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
706
707 gsi = gsi_start_bb (then_bb);
708 expand_omp_build_assign (&gsi, tmp_then, val, true);
709
710 gsi = gsi_start_bb (else_bb);
711 expand_omp_build_assign (&gsi, tmp_else,
712 build_int_cst (unsigned_type_node, 1),
713 true);
714
715 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
716 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
717 add_bb_to_loop (then_bb, cond_bb->loop_father);
718 add_bb_to_loop (else_bb, cond_bb->loop_father);
719 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
720 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
721
722 if (gimple_in_ssa_p (cfun))
723 {
724 gphi *phi = create_phi_node (tmp_join, bb);
725 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
726 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
727 }
728
729 val = tmp_join;
730 }
731
732 gsi = gsi_start_bb (bb);
733 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
734 false, GSI_CONTINUE_LINKING);
735 }
736
65f4b875 737 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
738 t = gimple_omp_parallel_data_arg (entry_stmt);
739 if (t == NULL)
740 t1 = null_pointer_node;
741 else
742 t1 = build_fold_addr_expr (t);
743 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
744 t2 = build_fold_addr_expr (child_fndecl);
745
746 vec_alloc (args, 4 + vec_safe_length (ws_args));
747 args->quick_push (t2);
748 args->quick_push (t1);
749 args->quick_push (val);
750 if (ws_args)
751 args->splice (*ws_args);
752 args->quick_push (flags);
753
754 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
755 builtin_decl_explicit (start_ix), args);
756
28567c40
JJ
757 if (rtmp)
758 {
759 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
760 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
761 fold_convert (type,
762 fold_convert (pointer_sized_int_node, t)));
763 }
629b3d75
MJ
764 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
765 false, GSI_CONTINUE_LINKING);
766
767 if (hsa_gen_requested_p ()
768 && parallel_needs_hsa_kernel_p (region))
769 {
770 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
771 hsa_register_kernel (child_cnode);
772 }
773}
774
629b3d75
MJ
775/* Build the function call to GOMP_task to actually
776 generate the task operation. BB is the block where to insert the code. */
777
778static void
779expand_task_call (struct omp_region *region, basic_block bb,
780 gomp_task *entry_stmt)
781{
782 tree t1, t2, t3;
783 gimple_stmt_iterator gsi;
784 location_t loc = gimple_location (entry_stmt);
785
786 tree clauses = gimple_omp_task_clauses (entry_stmt);
787
788 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
789 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
790 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
791 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
792 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
793 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
794
795 unsigned int iflags
796 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
797 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
798 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
799
800 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
801 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
802 tree num_tasks = NULL_TREE;
803 bool ull = false;
804 if (taskloop_p)
805 {
806 gimple *g = last_stmt (region->outer->entry);
807 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
808 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
809 struct omp_for_data fd;
810 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
811 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
812 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
813 OMP_CLAUSE__LOOPTEMP_);
814 startvar = OMP_CLAUSE_DECL (startvar);
815 endvar = OMP_CLAUSE_DECL (endvar);
816 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
817 if (fd.loop.cond_code == LT_EXPR)
818 iflags |= GOMP_TASK_FLAG_UP;
819 tree tclauses = gimple_omp_for_clauses (g);
820 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
821 if (num_tasks)
822 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
823 else
824 {
825 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
826 if (num_tasks)
827 {
828 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
829 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
830 }
831 else
832 num_tasks = integer_zero_node;
833 }
834 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
835 if (ifc == NULL_TREE)
836 iflags |= GOMP_TASK_FLAG_IF;
837 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
838 iflags |= GOMP_TASK_FLAG_NOGROUP;
839 ull = fd.iter_type == long_long_unsigned_type_node;
28567c40
JJ
840 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
841 iflags |= GOMP_TASK_FLAG_REDUCTION;
629b3d75
MJ
842 }
843 else if (priority)
844 iflags |= GOMP_TASK_FLAG_PRIORITY;
845
846 tree flags = build_int_cst (unsigned_type_node, iflags);
847
848 tree cond = boolean_true_node;
849 if (ifc)
850 {
851 if (taskloop_p)
852 {
853 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
854 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
855 build_int_cst (unsigned_type_node,
856 GOMP_TASK_FLAG_IF),
857 build_int_cst (unsigned_type_node, 0));
858 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
859 flags, t);
860 }
861 else
862 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
863 }
864
865 if (finalc)
866 {
867 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
868 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
869 build_int_cst (unsigned_type_node,
870 GOMP_TASK_FLAG_FINAL),
871 build_int_cst (unsigned_type_node, 0));
872 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
873 }
874 if (depend)
875 depend = OMP_CLAUSE_DECL (depend);
876 else
877 depend = build_int_cst (ptr_type_node, 0);
878 if (priority)
879 priority = fold_convert (integer_type_node,
880 OMP_CLAUSE_PRIORITY_EXPR (priority));
881 else
882 priority = integer_zero_node;
883
65f4b875 884 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
885 tree t = gimple_omp_task_data_arg (entry_stmt);
886 if (t == NULL)
887 t2 = null_pointer_node;
888 else
889 t2 = build_fold_addr_expr_loc (loc, t);
890 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
891 t = gimple_omp_task_copy_fn (entry_stmt);
892 if (t == NULL)
893 t3 = null_pointer_node;
894 else
895 t3 = build_fold_addr_expr_loc (loc, t);
896
897 if (taskloop_p)
898 t = build_call_expr (ull
899 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
900 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
901 11, t1, t2, t3,
902 gimple_omp_task_arg_size (entry_stmt),
903 gimple_omp_task_arg_align (entry_stmt), flags,
904 num_tasks, priority, startvar, endvar, step);
905 else
906 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
907 9, t1, t2, t3,
908 gimple_omp_task_arg_size (entry_stmt),
909 gimple_omp_task_arg_align (entry_stmt), cond, flags,
910 depend, priority);
911
912 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
913 false, GSI_CONTINUE_LINKING);
914}
915
28567c40
JJ
916/* Build the function call to GOMP_taskwait_depend to actually
917 generate the taskwait operation. BB is the block where to insert the
918 code. */
919
920static void
921expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
922{
923 tree clauses = gimple_omp_task_clauses (entry_stmt);
924 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
925 if (depend == NULL_TREE)
926 return;
927
928 depend = OMP_CLAUSE_DECL (depend);
929
930 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
931 tree t
932 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
933 1, depend);
934
935 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
936 false, GSI_CONTINUE_LINKING);
937}
938
939/* Build the function call to GOMP_teams_reg to actually
940 generate the host teams operation. REGION is the teams region
941 being expanded. BB is the block where to insert the code. */
942
943static void
944expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
945{
946 tree clauses = gimple_omp_teams_clauses (entry_stmt);
947 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
948 if (num_teams == NULL_TREE)
949 num_teams = build_int_cst (unsigned_type_node, 0);
950 else
951 {
952 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
953 num_teams = fold_convert (unsigned_type_node, num_teams);
954 }
955 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
956 if (thread_limit == NULL_TREE)
957 thread_limit = build_int_cst (unsigned_type_node, 0);
958 else
959 {
960 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
961 thread_limit = fold_convert (unsigned_type_node, thread_limit);
962 }
963
964 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
965 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
966 if (t == NULL)
967 t1 = null_pointer_node;
968 else
969 t1 = build_fold_addr_expr (t);
970 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
971 tree t2 = build_fold_addr_expr (child_fndecl);
972
28567c40
JJ
973 vec<tree, va_gc> *args;
974 vec_alloc (args, 5);
975 args->quick_push (t2);
976 args->quick_push (t1);
977 args->quick_push (num_teams);
978 args->quick_push (thread_limit);
979 /* For future extensibility. */
980 args->quick_push (build_zero_cst (unsigned_type_node));
981
982 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
983 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
984 args);
985
986 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
987 false, GSI_CONTINUE_LINKING);
988}
989
629b3d75
MJ
990/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
991
992static tree
993vec2chain (vec<tree, va_gc> *v)
994{
995 tree chain = NULL_TREE, t;
996 unsigned ix;
997
998 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
999 {
1000 DECL_CHAIN (t) = chain;
1001 chain = t;
1002 }
1003
1004 return chain;
1005}
1006
1007/* Remove barriers in REGION->EXIT's block. Note that this is only
1008 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1009 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1010 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1011 removed. */
1012
1013static void
1014remove_exit_barrier (struct omp_region *region)
1015{
1016 gimple_stmt_iterator gsi;
1017 basic_block exit_bb;
1018 edge_iterator ei;
1019 edge e;
1020 gimple *stmt;
1021 int any_addressable_vars = -1;
1022
1023 exit_bb = region->exit;
1024
1025 /* If the parallel region doesn't return, we don't have REGION->EXIT
1026 block at all. */
1027 if (! exit_bb)
1028 return;
1029
1030 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1031 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1032 statements that can appear in between are extremely limited -- no
1033 memory operations at all. Here, we allow nothing at all, so the
1034 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
65f4b875 1035 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75 1036 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
65f4b875 1037 gsi_prev_nondebug (&gsi);
629b3d75
MJ
1038 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1039 return;
1040
1041 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1042 {
65f4b875 1043 gsi = gsi_last_nondebug_bb (e->src);
629b3d75
MJ
1044 if (gsi_end_p (gsi))
1045 continue;
1046 stmt = gsi_stmt (gsi);
1047 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1048 && !gimple_omp_return_nowait_p (stmt))
1049 {
1050 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1051 in many cases. If there could be tasks queued, the barrier
1052 might be needed to let the tasks run before some local
1053 variable of the parallel that the task uses as shared
1054 runs out of scope. The task can be spawned either
1055 from within current function (this would be easy to check)
1056 or from some function it calls and gets passed an address
1057 of such a variable. */
1058 if (any_addressable_vars < 0)
1059 {
1060 gomp_parallel *parallel_stmt
1061 = as_a <gomp_parallel *> (last_stmt (region->entry));
1062 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1063 tree local_decls, block, decl;
1064 unsigned ix;
1065
1066 any_addressable_vars = 0;
1067 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1068 if (TREE_ADDRESSABLE (decl))
1069 {
1070 any_addressable_vars = 1;
1071 break;
1072 }
1073 for (block = gimple_block (stmt);
1074 !any_addressable_vars
1075 && block
1076 && TREE_CODE (block) == BLOCK;
1077 block = BLOCK_SUPERCONTEXT (block))
1078 {
1079 for (local_decls = BLOCK_VARS (block);
1080 local_decls;
1081 local_decls = DECL_CHAIN (local_decls))
1082 if (TREE_ADDRESSABLE (local_decls))
1083 {
1084 any_addressable_vars = 1;
1085 break;
1086 }
1087 if (block == gimple_block (parallel_stmt))
1088 break;
1089 }
1090 }
1091 if (!any_addressable_vars)
1092 gimple_omp_return_set_nowait (stmt);
1093 }
1094 }
1095}
1096
1097static void
1098remove_exit_barriers (struct omp_region *region)
1099{
1100 if (region->type == GIMPLE_OMP_PARALLEL)
1101 remove_exit_barrier (region);
1102
1103 if (region->inner)
1104 {
1105 region = region->inner;
1106 remove_exit_barriers (region);
1107 while (region->next)
1108 {
1109 region = region->next;
1110 remove_exit_barriers (region);
1111 }
1112 }
1113}
1114
1115/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1116 calls. These can't be declared as const functions, but
1117 within one parallel body they are constant, so they can be
1118 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1119 which are declared const. Similarly for task body, except
1120 that in untied task omp_get_thread_num () can change at any task
1121 scheduling point. */
1122
1123static void
1124optimize_omp_library_calls (gimple *entry_stmt)
1125{
1126 basic_block bb;
1127 gimple_stmt_iterator gsi;
1128 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1129 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1130 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1131 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1132 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1133 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1134 OMP_CLAUSE_UNTIED) != NULL);
1135
1136 FOR_EACH_BB_FN (bb, cfun)
1137 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1138 {
1139 gimple *call = gsi_stmt (gsi);
1140 tree decl;
1141
1142 if (is_gimple_call (call)
1143 && (decl = gimple_call_fndecl (call))
1144 && DECL_EXTERNAL (decl)
1145 && TREE_PUBLIC (decl)
1146 && DECL_INITIAL (decl) == NULL)
1147 {
1148 tree built_in;
1149
1150 if (DECL_NAME (decl) == thr_num_id)
1151 {
1152 /* In #pragma omp task untied omp_get_thread_num () can change
1153 during the execution of the task region. */
1154 if (untied_task)
1155 continue;
1156 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1157 }
1158 else if (DECL_NAME (decl) == num_thr_id)
1159 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1160 else
1161 continue;
1162
1163 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1164 || gimple_call_num_args (call) != 0)
1165 continue;
1166
1167 if (flag_exceptions && !TREE_NOTHROW (decl))
1168 continue;
1169
1170 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1171 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1172 TREE_TYPE (TREE_TYPE (built_in))))
1173 continue;
1174
1175 gimple_call_set_fndecl (call, built_in);
1176 }
1177 }
1178}
1179
1180/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1181 regimplified. */
1182
1183static tree
1184expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1185{
1186 tree t = *tp;
1187
1188 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1189 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1190 return t;
1191
1192 if (TREE_CODE (t) == ADDR_EXPR)
1193 recompute_tree_invariant_for_addr_expr (t);
1194
1195 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1196 return NULL_TREE;
1197}
1198
1199/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1200
1201static void
1202expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1203 bool after)
1204{
1205 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1206 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1207 !after, after ? GSI_CONTINUE_LINKING
1208 : GSI_SAME_STMT);
1209 gimple *stmt = gimple_build_assign (to, from);
1210 if (after)
1211 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1212 else
1213 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1214 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1215 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1216 {
1217 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1218 gimple_regimplify_operands (stmt, &gsi);
1219 }
1220}
1221
1222/* Expand the OpenMP parallel or task directive starting at REGION. */
1223
1224static void
1225expand_omp_taskreg (struct omp_region *region)
1226{
1227 basic_block entry_bb, exit_bb, new_bb;
1228 struct function *child_cfun;
1229 tree child_fn, block, t;
1230 gimple_stmt_iterator gsi;
1231 gimple *entry_stmt, *stmt;
1232 edge e;
1233 vec<tree, va_gc> *ws_args;
1234
1235 entry_stmt = last_stmt (region->entry);
28567c40
JJ
1236 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1237 && gimple_omp_task_taskwait_p (entry_stmt))
1238 {
1239 new_bb = region->entry;
1240 gsi = gsi_last_nondebug_bb (region->entry);
1241 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1242 gsi_remove (&gsi, true);
1243 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1244 return;
1245 }
1246
629b3d75
MJ
1247 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1248 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1249
1250 entry_bb = region->entry;
1251 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1252 exit_bb = region->cont;
1253 else
1254 exit_bb = region->exit;
1255
5e9d6aa4 1256 if (is_combined_parallel (region))
629b3d75
MJ
1257 ws_args = region->ws_args;
1258 else
1259 ws_args = NULL;
1260
1261 if (child_cfun->cfg)
1262 {
1263 /* Due to inlining, it may happen that we have already outlined
1264 the region, in which case all we need to do is make the
1265 sub-graph unreachable and emit the parallel call. */
1266 edge entry_succ_e, exit_succ_e;
1267
1268 entry_succ_e = single_succ_edge (entry_bb);
1269
65f4b875 1270 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 1271 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1272 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1273 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
629b3d75
MJ
1274 gsi_remove (&gsi, true);
1275
1276 new_bb = entry_bb;
1277 if (exit_bb)
1278 {
1279 exit_succ_e = single_succ_edge (exit_bb);
1280 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1281 }
1282 remove_edge_and_dominated_blocks (entry_succ_e);
1283 }
1284 else
1285 {
1286 unsigned srcidx, dstidx, num;
1287
1288 /* If the parallel region needs data sent from the parent
1289 function, then the very first statement (except possible
1290 tree profile counter updates) of the parallel body
1291 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1292 &.OMP_DATA_O is passed as an argument to the child function,
1293 we need to replace it with the argument as seen by the child
1294 function.
1295
1296 In most cases, this will end up being the identity assignment
1297 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1298 a function call that has been inlined, the original PARM_DECL
1299 .OMP_DATA_I may have been converted into a different local
1300 variable. In which case, we need to keep the assignment. */
1301 if (gimple_omp_taskreg_data_arg (entry_stmt))
1302 {
1303 basic_block entry_succ_bb
1304 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1305 : FALLTHRU_EDGE (entry_bb)->dest;
1306 tree arg;
1307 gimple *parcopy_stmt = NULL;
1308
1309 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1310 {
1311 gimple *stmt;
1312
1313 gcc_assert (!gsi_end_p (gsi));
1314 stmt = gsi_stmt (gsi);
1315 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1316 continue;
1317
1318 if (gimple_num_ops (stmt) == 2)
1319 {
1320 tree arg = gimple_assign_rhs1 (stmt);
1321
1322 /* We're ignore the subcode because we're
1323 effectively doing a STRIP_NOPS. */
1324
1325 if (TREE_CODE (arg) == ADDR_EXPR
28567c40
JJ
1326 && (TREE_OPERAND (arg, 0)
1327 == gimple_omp_taskreg_data_arg (entry_stmt)))
629b3d75
MJ
1328 {
1329 parcopy_stmt = stmt;
1330 break;
1331 }
1332 }
1333 }
1334
1335 gcc_assert (parcopy_stmt != NULL);
1336 arg = DECL_ARGUMENTS (child_fn);
1337
1338 if (!gimple_in_ssa_p (cfun))
1339 {
1340 if (gimple_assign_lhs (parcopy_stmt) == arg)
1341 gsi_remove (&gsi, true);
1342 else
1343 {
01914336 1344 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1345 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1346 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1347 }
1348 }
1349 else
1350 {
1351 tree lhs = gimple_assign_lhs (parcopy_stmt);
1352 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1353 /* We'd like to set the rhs to the default def in the child_fn,
1354 but it's too early to create ssa names in the child_fn.
1355 Instead, we set the rhs to the parm. In
1356 move_sese_region_to_fn, we introduce a default def for the
1357 parm, map the parm to it's default def, and once we encounter
1358 this stmt, replace the parm with the default def. */
1359 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1360 update_stmt (parcopy_stmt);
1361 }
1362 }
1363
1364 /* Declare local variables needed in CHILD_CFUN. */
1365 block = DECL_INITIAL (child_fn);
1366 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1367 /* The gimplifier could record temporaries in parallel/task block
1368 rather than in containing function's local_decls chain,
1369 which would mean cgraph missed finalizing them. Do it now. */
1370 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1371 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1372 varpool_node::finalize_decl (t);
1373 DECL_SAVED_TREE (child_fn) = NULL;
1374 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1375 gimple_set_body (child_fn, NULL);
1376 TREE_USED (block) = 1;
1377
1378 /* Reset DECL_CONTEXT on function arguments. */
1379 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1380 DECL_CONTEXT (t) = child_fn;
1381
1382 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1383 so that it can be moved to the child function. */
65f4b875 1384 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1385 stmt = gsi_stmt (gsi);
1386 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1387 || gimple_code (stmt) == GIMPLE_OMP_TASK
1388 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
629b3d75
MJ
1389 e = split_block (entry_bb, stmt);
1390 gsi_remove (&gsi, true);
1391 entry_bb = e->dest;
1392 edge e2 = NULL;
28567c40 1393 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
629b3d75
MJ
1394 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1395 else
1396 {
1397 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1398 gcc_assert (e2->dest == region->exit);
1399 remove_edge (BRANCH_EDGE (entry_bb));
1400 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
65f4b875 1401 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
1402 gcc_assert (!gsi_end_p (gsi)
1403 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1404 gsi_remove (&gsi, true);
1405 }
1406
1407 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1408 if (exit_bb)
1409 {
65f4b875 1410 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
1411 gcc_assert (!gsi_end_p (gsi)
1412 && (gimple_code (gsi_stmt (gsi))
1413 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1414 stmt = gimple_build_return (NULL);
1415 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1416 gsi_remove (&gsi, true);
1417 }
1418
1419 /* Move the parallel region into CHILD_CFUN. */
1420
1421 if (gimple_in_ssa_p (cfun))
1422 {
1423 init_tree_ssa (child_cfun);
1424 init_ssa_operands (child_cfun);
1425 child_cfun->gimple_df->in_ssa_p = true;
1426 block = NULL_TREE;
1427 }
1428 else
1429 block = gimple_block (entry_stmt);
1430
1431 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1432 if (exit_bb)
1433 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1434 if (e2)
1435 {
1436 basic_block dest_bb = e2->dest;
1437 if (!exit_bb)
1438 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1439 remove_edge (e2);
1440 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1441 }
1442 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1443 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1444 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1445 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1446
1447 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1448 num = vec_safe_length (child_cfun->local_decls);
1449 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1450 {
1451 t = (*child_cfun->local_decls)[srcidx];
1452 if (DECL_CONTEXT (t) == cfun->decl)
1453 continue;
1454 if (srcidx != dstidx)
1455 (*child_cfun->local_decls)[dstidx] = t;
1456 dstidx++;
1457 }
1458 if (dstidx != num)
1459 vec_safe_truncate (child_cfun->local_decls, dstidx);
1460
1461 /* Inform the callgraph about the new function. */
1462 child_cfun->curr_properties = cfun->curr_properties;
1463 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1464 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1465 cgraph_node *node = cgraph_node::get_create (child_fn);
1466 node->parallelized_function = 1;
1467 cgraph_node::add_new_function (child_fn, true);
1468
1469 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1470 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1471
1472 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1473 fixed in a following pass. */
1474 push_cfun (child_cfun);
1475 if (need_asm)
9579db35 1476 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1477
1478 if (optimize)
1479 optimize_omp_library_calls (entry_stmt);
fc06ae0d 1480 update_max_bb_count ();
629b3d75
MJ
1481 cgraph_edge::rebuild_edges ();
1482
1483 /* Some EH regions might become dead, see PR34608. If
1484 pass_cleanup_cfg isn't the first pass to happen with the
1485 new child, these dead EH edges might cause problems.
1486 Clean them up now. */
1487 if (flag_exceptions)
1488 {
1489 basic_block bb;
1490 bool changed = false;
1491
1492 FOR_EACH_BB_FN (bb, cfun)
1493 changed |= gimple_purge_dead_eh_edges (bb);
1494 if (changed)
1495 cleanup_tree_cfg ();
1496 }
1497 if (gimple_in_ssa_p (cfun))
1498 update_ssa (TODO_update_ssa);
1499 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1500 verify_loop_structure ();
1501 pop_cfun ();
1502
1503 if (dump_file && !gimple_in_ssa_p (cfun))
1504 {
1505 omp_any_child_fn_dumped = true;
1506 dump_function_header (dump_file, child_fn, dump_flags);
1507 dump_function_to_file (child_fn, dump_file, dump_flags);
1508 }
1509 }
1510
4ccc4e30
JJ
1511 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1512
5e9d6aa4 1513 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
629b3d75
MJ
1514 expand_parallel_call (region, new_bb,
1515 as_a <gomp_parallel *> (entry_stmt), ws_args);
28567c40
JJ
1516 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1517 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
629b3d75
MJ
1518 else
1519 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1520 if (gimple_in_ssa_p (cfun))
1521 update_ssa (TODO_update_ssa_only_virtuals);
1522}
1523
1524/* Information about members of an OpenACC collapsed loop nest. */
1525
1526struct oacc_collapse
1527{
01914336 1528 tree base; /* Base value. */
629b3d75 1529 tree iters; /* Number of steps. */
02889d23
CLT
1530 tree step; /* Step size. */
1531 tree tile; /* Tile increment (if tiled). */
1532 tree outer; /* Tile iterator var. */
629b3d75
MJ
1533};
1534
1535/* Helper for expand_oacc_for. Determine collapsed loop information.
1536 Fill in COUNTS array. Emit any initialization code before GSI.
1537 Return the calculated outer loop bound of BOUND_TYPE. */
1538
1539static tree
1540expand_oacc_collapse_init (const struct omp_for_data *fd,
1541 gimple_stmt_iterator *gsi,
02889d23
CLT
1542 oacc_collapse *counts, tree bound_type,
1543 location_t loc)
629b3d75 1544{
02889d23 1545 tree tiling = fd->tiling;
629b3d75
MJ
1546 tree total = build_int_cst (bound_type, 1);
1547 int ix;
1548
1549 gcc_assert (integer_onep (fd->loop.step));
1550 gcc_assert (integer_zerop (fd->loop.n1));
1551
02889d23
CLT
1552 /* When tiling, the first operand of the tile clause applies to the
1553 innermost loop, and we work outwards from there. Seems
1554 backwards, but whatever. */
1555 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1556 {
1557 const omp_for_data_loop *loop = &fd->loops[ix];
1558
1559 tree iter_type = TREE_TYPE (loop->v);
1560 tree diff_type = iter_type;
1561 tree plus_type = iter_type;
1562
1563 gcc_assert (loop->cond_code == fd->loop.cond_code);
1564
1565 if (POINTER_TYPE_P (iter_type))
1566 plus_type = sizetype;
1567 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1568 diff_type = signed_type_for (diff_type);
c31bc4ac
TV
1569 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1570 diff_type = integer_type_node;
629b3d75 1571
02889d23
CLT
1572 if (tiling)
1573 {
1574 tree num = build_int_cst (integer_type_node, fd->collapse);
1575 tree loop_no = build_int_cst (integer_type_node, ix);
1576 tree tile = TREE_VALUE (tiling);
1577 gcall *call
1578 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1579 /* gwv-outer=*/integer_zero_node,
1580 /* gwv-inner=*/integer_zero_node);
1581
1582 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1583 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1584 gimple_call_set_lhs (call, counts[ix].tile);
1585 gimple_set_location (call, loc);
1586 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1587
1588 tiling = TREE_CHAIN (tiling);
1589 }
1590 else
1591 {
1592 counts[ix].tile = NULL;
1593 counts[ix].outer = loop->v;
1594 }
1595
629b3d75
MJ
1596 tree b = loop->n1;
1597 tree e = loop->n2;
1598 tree s = loop->step;
1599 bool up = loop->cond_code == LT_EXPR;
1600 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1601 bool negating;
1602 tree expr;
1603
1604 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1605 true, GSI_SAME_STMT);
1606 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1607 true, GSI_SAME_STMT);
1608
01914336 1609 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1610 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1611 if (negating)
1612 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1613 s = fold_convert (diff_type, s);
1614 if (negating)
1615 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1616 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1617 true, GSI_SAME_STMT);
1618
01914336 1619 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1620 negating = !up && TYPE_UNSIGNED (iter_type);
1621 expr = fold_build2 (MINUS_EXPR, plus_type,
1622 fold_convert (plus_type, negating ? b : e),
1623 fold_convert (plus_type, negating ? e : b));
1624 expr = fold_convert (diff_type, expr);
1625 if (negating)
1626 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1627 tree range = force_gimple_operand_gsi
1628 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1629
1630 /* Determine number of iterations. */
1631 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1632 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1633 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1634
1635 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1636 true, GSI_SAME_STMT);
1637
1638 counts[ix].base = b;
1639 counts[ix].iters = iters;
1640 counts[ix].step = s;
1641
1642 total = fold_build2 (MULT_EXPR, bound_type, total,
1643 fold_convert (bound_type, iters));
1644 }
1645
1646 return total;
1647}
1648
02889d23
CLT
1649/* Emit initializers for collapsed loop members. INNER is true if
1650 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1651 loop iteration variable, from which collapsed loop iteration values
1652 are calculated. COUNTS array has been initialized by
1653 expand_oacc_collapse_inits. */
1654
1655static void
02889d23 1656expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75
MJ
1657 gimple_stmt_iterator *gsi,
1658 const oacc_collapse *counts, tree ivar)
1659{
1660 tree ivar_type = TREE_TYPE (ivar);
1661
1662 /* The most rapidly changing iteration variable is the innermost
1663 one. */
1664 for (int ix = fd->collapse; ix--;)
1665 {
1666 const omp_for_data_loop *loop = &fd->loops[ix];
1667 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1668 tree v = inner ? loop->v : collapse->outer;
1669 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1670 tree diff_type = TREE_TYPE (collapse->step);
1671 tree plus_type = iter_type;
1672 enum tree_code plus_code = PLUS_EXPR;
1673 tree expr;
1674
1675 if (POINTER_TYPE_P (iter_type))
1676 {
1677 plus_code = POINTER_PLUS_EXPR;
1678 plus_type = sizetype;
1679 }
1680
02889d23
CLT
1681 expr = ivar;
1682 if (ix)
1683 {
1684 tree mod = fold_convert (ivar_type, collapse->iters);
1685 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1686 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1687 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1688 true, GSI_SAME_STMT);
1689 }
1690
629b3d75
MJ
1691 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1692 collapse->step);
02889d23
CLT
1693 expr = fold_build2 (plus_code, iter_type,
1694 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1695 fold_convert (plus_type, expr));
1696 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1697 true, GSI_SAME_STMT);
02889d23 1698 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1699 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1700 }
1701}
1702
1703/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1704 of the combined collapse > 1 loop constructs, generate code like:
1705 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1706 if (cond3 is <)
1707 adj = STEP3 - 1;
1708 else
1709 adj = STEP3 + 1;
1710 count3 = (adj + N32 - N31) / STEP3;
1711 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1712 if (cond2 is <)
1713 adj = STEP2 - 1;
1714 else
1715 adj = STEP2 + 1;
1716 count2 = (adj + N22 - N21) / STEP2;
1717 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1718 if (cond1 is <)
1719 adj = STEP1 - 1;
1720 else
1721 adj = STEP1 + 1;
1722 count1 = (adj + N12 - N11) / STEP1;
1723 count = count1 * count2 * count3;
1724 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1725 count = 0;
1726 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1727 of the combined loop constructs, just initialize COUNTS array
1728 from the _looptemp_ clauses. */
1729
1730/* NOTE: It *could* be better to moosh all of the BBs together,
1731 creating one larger BB with all the computation and the unexpected
1732 jump at the end. I.e.
1733
1734 bool zero3, zero2, zero1, zero;
1735
1736 zero3 = N32 c3 N31;
1737 count3 = (N32 - N31) /[cl] STEP3;
1738 zero2 = N22 c2 N21;
1739 count2 = (N22 - N21) /[cl] STEP2;
1740 zero1 = N12 c1 N11;
1741 count1 = (N12 - N11) /[cl] STEP1;
1742 zero = zero3 || zero2 || zero1;
1743 count = count1 * count2 * count3;
1744 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1745
1746 After all, we expect the zero=false, and thus we expect to have to
1747 evaluate all of the comparison expressions, so short-circuiting
1748 oughtn't be a win. Since the condition isn't protecting a
1749 denominator, we're not concerned about divide-by-zero, so we can
1750 fully evaluate count even if a numerator turned out to be wrong.
1751
1752 It seems like putting this all together would create much better
1753 scheduling opportunities, and less pressure on the chip's branch
1754 predictor. */
1755
1756static void
1757expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1758 basic_block &entry_bb, tree *counts,
1759 basic_block &zero_iter1_bb, int &first_zero_iter1,
1760 basic_block &zero_iter2_bb, int &first_zero_iter2,
1761 basic_block &l2_dom_bb)
1762{
1763 tree t, type = TREE_TYPE (fd->loop.v);
1764 edge e, ne;
1765 int i;
1766
1767 /* Collapsed loops need work for expansion into SSA form. */
1768 gcc_assert (!gimple_in_ssa_p (cfun));
1769
1770 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1771 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1772 {
1773 gcc_assert (fd->ordered == 0);
1774 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1775 isn't supposed to be handled, as the inner loop doesn't
1776 use it. */
1777 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1778 OMP_CLAUSE__LOOPTEMP_);
1779 gcc_assert (innerc);
1780 for (i = 0; i < fd->collapse; i++)
1781 {
1782 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1783 OMP_CLAUSE__LOOPTEMP_);
1784 gcc_assert (innerc);
1785 if (i)
1786 counts[i] = OMP_CLAUSE_DECL (innerc);
1787 else
1788 counts[0] = NULL_TREE;
1789 }
1790 return;
1791 }
1792
1793 for (i = fd->collapse; i < fd->ordered; i++)
1794 {
1795 tree itype = TREE_TYPE (fd->loops[i].v);
1796 counts[i] = NULL_TREE;
1797 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1798 fold_convert (itype, fd->loops[i].n1),
1799 fold_convert (itype, fd->loops[i].n2));
1800 if (t && integer_zerop (t))
1801 {
1802 for (i = fd->collapse; i < fd->ordered; i++)
1803 counts[i] = build_int_cst (type, 0);
1804 break;
1805 }
1806 }
1807 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1808 {
1809 tree itype = TREE_TYPE (fd->loops[i].v);
1810
1811 if (i >= fd->collapse && counts[i])
1812 continue;
1813 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1814 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1815 fold_convert (itype, fd->loops[i].n1),
1816 fold_convert (itype, fd->loops[i].n2)))
1817 == NULL_TREE || !integer_onep (t)))
1818 {
1819 gcond *cond_stmt;
1820 tree n1, n2;
1821 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1822 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1823 true, GSI_SAME_STMT);
1824 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1825 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1826 true, GSI_SAME_STMT);
1827 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1828 NULL_TREE, NULL_TREE);
1829 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1830 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1831 expand_omp_regimplify_p, NULL, NULL)
1832 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1833 expand_omp_regimplify_p, NULL, NULL))
1834 {
1835 *gsi = gsi_for_stmt (cond_stmt);
1836 gimple_regimplify_operands (cond_stmt, gsi);
1837 }
1838 e = split_block (entry_bb, cond_stmt);
1839 basic_block &zero_iter_bb
1840 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1841 int &first_zero_iter
1842 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1843 if (zero_iter_bb == NULL)
1844 {
1845 gassign *assign_stmt;
1846 first_zero_iter = i;
1847 zero_iter_bb = create_empty_bb (entry_bb);
1848 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1849 *gsi = gsi_after_labels (zero_iter_bb);
1850 if (i < fd->collapse)
1851 assign_stmt = gimple_build_assign (fd->loop.n2,
1852 build_zero_cst (type));
1853 else
1854 {
1855 counts[i] = create_tmp_reg (type, ".count");
1856 assign_stmt
1857 = gimple_build_assign (counts[i], build_zero_cst (type));
1858 }
1859 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1860 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1861 entry_bb);
1862 }
1863 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1864 ne->probability = profile_probability::very_unlikely ();
629b3d75 1865 e->flags = EDGE_TRUE_VALUE;
357067f2 1866 e->probability = ne->probability.invert ();
629b3d75
MJ
1867 if (l2_dom_bb == NULL)
1868 l2_dom_bb = entry_bb;
1869 entry_bb = e->dest;
65f4b875 1870 *gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1871 }
1872
1873 if (POINTER_TYPE_P (itype))
1874 itype = signed_type_for (itype);
1875 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1876 ? -1 : 1));
1877 t = fold_build2 (PLUS_EXPR, itype,
1878 fold_convert (itype, fd->loops[i].step), t);
1879 t = fold_build2 (PLUS_EXPR, itype, t,
1880 fold_convert (itype, fd->loops[i].n2));
1881 t = fold_build2 (MINUS_EXPR, itype, t,
1882 fold_convert (itype, fd->loops[i].n1));
1883 /* ?? We could probably use CEIL_DIV_EXPR instead of
1884 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1885 generate the same code in the end because generically we
1886 don't know that the values involved must be negative for
1887 GT?? */
1888 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1889 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1890 fold_build1 (NEGATE_EXPR, itype, t),
1891 fold_build1 (NEGATE_EXPR, itype,
1892 fold_convert (itype,
1893 fd->loops[i].step)));
1894 else
1895 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1896 fold_convert (itype, fd->loops[i].step));
1897 t = fold_convert (type, t);
1898 if (TREE_CODE (t) == INTEGER_CST)
1899 counts[i] = t;
1900 else
1901 {
1902 if (i < fd->collapse || i != first_zero_iter2)
1903 counts[i] = create_tmp_reg (type, ".count");
1904 expand_omp_build_assign (gsi, counts[i], t);
1905 }
1906 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1907 {
1908 if (i == 0)
1909 t = counts[0];
1910 else
1911 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1912 expand_omp_build_assign (gsi, fd->loop.n2, t);
1913 }
1914 }
1915}
1916
1917/* Helper function for expand_omp_{for_*,simd}. Generate code like:
1918 T = V;
1919 V3 = N31 + (T % count3) * STEP3;
1920 T = T / count3;
1921 V2 = N21 + (T % count2) * STEP2;
1922 T = T / count2;
1923 V1 = N11 + T * STEP1;
1924 if this loop doesn't have an inner loop construct combined with it.
1925 If it does have an inner loop construct combined with it and the
1926 iteration count isn't known constant, store values from counts array
1927 into its _looptemp_ temporaries instead. */
1928
1929static void
1930expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1931 tree *counts, gimple *inner_stmt, tree startvar)
1932{
1933 int i;
1934 if (gimple_omp_for_combined_p (fd->for_stmt))
1935 {
1936 /* If fd->loop.n2 is constant, then no propagation of the counts
1937 is needed, they are constant. */
1938 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1939 return;
1940
1941 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1942 ? gimple_omp_taskreg_clauses (inner_stmt)
1943 : gimple_omp_for_clauses (inner_stmt);
1944 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1945 isn't supposed to be handled, as the inner loop doesn't
1946 use it. */
1947 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1948 gcc_assert (innerc);
1949 for (i = 0; i < fd->collapse; i++)
1950 {
1951 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1952 OMP_CLAUSE__LOOPTEMP_);
1953 gcc_assert (innerc);
1954 if (i)
1955 {
1956 tree tem = OMP_CLAUSE_DECL (innerc);
1957 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1958 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1959 false, GSI_CONTINUE_LINKING);
1960 gassign *stmt = gimple_build_assign (tem, t);
1961 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1962 }
1963 }
1964 return;
1965 }
1966
1967 tree type = TREE_TYPE (fd->loop.v);
1968 tree tem = create_tmp_reg (type, ".tem");
1969 gassign *stmt = gimple_build_assign (tem, startvar);
1970 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1971
1972 for (i = fd->collapse - 1; i >= 0; i--)
1973 {
1974 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1975 itype = vtype;
1976 if (POINTER_TYPE_P (vtype))
1977 itype = signed_type_for (vtype);
1978 if (i != 0)
1979 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1980 else
1981 t = tem;
1982 t = fold_convert (itype, t);
1983 t = fold_build2 (MULT_EXPR, itype, t,
1984 fold_convert (itype, fd->loops[i].step));
1985 if (POINTER_TYPE_P (vtype))
1986 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1987 else
1988 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1989 t = force_gimple_operand_gsi (gsi, t,
1990 DECL_P (fd->loops[i].v)
1991 && TREE_ADDRESSABLE (fd->loops[i].v),
1992 NULL_TREE, false,
1993 GSI_CONTINUE_LINKING);
1994 stmt = gimple_build_assign (fd->loops[i].v, t);
1995 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1996 if (i != 0)
1997 {
1998 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1999 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2000 false, GSI_CONTINUE_LINKING);
2001 stmt = gimple_build_assign (tem, t);
2002 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2003 }
2004 }
2005}
2006
2007/* Helper function for expand_omp_for_*. Generate code like:
2008 L10:
2009 V3 += STEP3;
2010 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2011 L11:
2012 V3 = N31;
2013 V2 += STEP2;
2014 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2015 L12:
2016 V2 = N21;
2017 V1 += STEP1;
2018 goto BODY_BB; */
2019
2020static basic_block
2021extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2022 basic_block body_bb)
2023{
2024 basic_block last_bb, bb, collapse_bb = NULL;
2025 int i;
2026 gimple_stmt_iterator gsi;
2027 edge e;
2028 tree t;
2029 gimple *stmt;
2030
2031 last_bb = cont_bb;
2032 for (i = fd->collapse - 1; i >= 0; i--)
2033 {
2034 tree vtype = TREE_TYPE (fd->loops[i].v);
2035
2036 bb = create_empty_bb (last_bb);
2037 add_bb_to_loop (bb, last_bb->loop_father);
2038 gsi = gsi_start_bb (bb);
2039
2040 if (i < fd->collapse - 1)
2041 {
2042 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
357067f2 2043 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75
MJ
2044
2045 t = fd->loops[i + 1].n1;
2046 t = force_gimple_operand_gsi (&gsi, t,
2047 DECL_P (fd->loops[i + 1].v)
2048 && TREE_ADDRESSABLE (fd->loops[i
2049 + 1].v),
2050 NULL_TREE, false,
2051 GSI_CONTINUE_LINKING);
2052 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2053 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2054 }
2055 else
2056 collapse_bb = bb;
2057
2058 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2059
2060 if (POINTER_TYPE_P (vtype))
2061 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2062 else
2063 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2064 t = force_gimple_operand_gsi (&gsi, t,
2065 DECL_P (fd->loops[i].v)
2066 && TREE_ADDRESSABLE (fd->loops[i].v),
2067 NULL_TREE, false, GSI_CONTINUE_LINKING);
2068 stmt = gimple_build_assign (fd->loops[i].v, t);
2069 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2070
2071 if (i > 0)
2072 {
2073 t = fd->loops[i].n2;
2074 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2075 false, GSI_CONTINUE_LINKING);
2076 tree v = fd->loops[i].v;
2077 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2078 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2079 false, GSI_CONTINUE_LINKING);
2080 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2081 stmt = gimple_build_cond_empty (t);
2082 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
d1ffbd43
JJ
2083 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2084 expand_omp_regimplify_p, NULL, NULL)
2085 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2086 expand_omp_regimplify_p, NULL, NULL))
2087 gimple_regimplify_operands (stmt, &gsi);
629b3d75 2088 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 2089 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
2090 }
2091 else
2092 make_edge (bb, body_bb, EDGE_FALLTHRU);
2093 last_bb = bb;
2094 }
2095
2096 return collapse_bb;
2097}
2098
2099/* Expand #pragma omp ordered depend(source). */
2100
2101static void
2102expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2103 tree *counts, location_t loc)
2104{
2105 enum built_in_function source_ix
2106 = fd->iter_type == long_integer_type_node
2107 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2108 gimple *g
2109 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2110 build_fold_addr_expr (counts[fd->ordered]));
2111 gimple_set_location (g, loc);
2112 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2113}
2114
2115/* Expand a single depend from #pragma omp ordered depend(sink:...). */
2116
2117static void
2118expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2119 tree *counts, tree c, location_t loc)
2120{
2121 auto_vec<tree, 10> args;
2122 enum built_in_function sink_ix
2123 = fd->iter_type == long_integer_type_node
2124 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2125 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2126 int i;
2127 gimple_stmt_iterator gsi2 = *gsi;
2128 bool warned_step = false;
2129
2130 for (i = 0; i < fd->ordered; i++)
2131 {
2132 tree step = NULL_TREE;
2133 off = TREE_PURPOSE (deps);
2134 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2135 {
2136 step = TREE_OPERAND (off, 1);
2137 off = TREE_OPERAND (off, 0);
2138 }
2139 if (!integer_zerop (off))
2140 {
2141 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2142 || fd->loops[i].cond_code == GT_EXPR);
2143 bool forward = fd->loops[i].cond_code == LT_EXPR;
2144 if (step)
2145 {
2146 /* Non-simple Fortran DO loops. If step is variable,
2147 we don't know at compile even the direction, so can't
2148 warn. */
2149 if (TREE_CODE (step) != INTEGER_CST)
2150 break;
2151 forward = tree_int_cst_sgn (step) != -1;
2152 }
2153 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
90a0bf4e
JJ
2154 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2155 "waiting for lexically later iteration");
629b3d75
MJ
2156 break;
2157 }
2158 deps = TREE_CHAIN (deps);
2159 }
2160 /* If all offsets corresponding to the collapsed loops are zero,
2161 this depend clause can be ignored. FIXME: but there is still a
2162 flush needed. We need to emit one __sync_synchronize () for it
2163 though (perhaps conditionally)? Solve this together with the
2164 conservative dependence folding optimization.
2165 if (i >= fd->collapse)
2166 return; */
2167
2168 deps = OMP_CLAUSE_DECL (c);
2169 gsi_prev (&gsi2);
2170 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2171 edge e2 = split_block_after_labels (e1->dest);
2172
2173 gsi2 = gsi_after_labels (e1->dest);
2174 *gsi = gsi_last_bb (e1->src);
2175 for (i = 0; i < fd->ordered; i++)
2176 {
2177 tree itype = TREE_TYPE (fd->loops[i].v);
2178 tree step = NULL_TREE;
2179 tree orig_off = NULL_TREE;
2180 if (POINTER_TYPE_P (itype))
2181 itype = sizetype;
2182 if (i)
2183 deps = TREE_CHAIN (deps);
2184 off = TREE_PURPOSE (deps);
2185 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2186 {
2187 step = TREE_OPERAND (off, 1);
2188 off = TREE_OPERAND (off, 0);
2189 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2190 && integer_onep (fd->loops[i].step)
2191 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2192 }
2193 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2194 if (step)
2195 {
2196 off = fold_convert_loc (loc, itype, off);
2197 orig_off = off;
2198 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2199 }
2200
2201 if (integer_zerop (off))
2202 t = boolean_true_node;
2203 else
2204 {
2205 tree a;
2206 tree co = fold_convert_loc (loc, itype, off);
2207 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2208 {
2209 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2210 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2211 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2212 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2213 co);
2214 }
2215 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2216 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2217 fd->loops[i].v, co);
2218 else
2219 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2220 fd->loops[i].v, co);
2221 if (step)
2222 {
2223 tree t1, t2;
2224 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2225 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2226 fd->loops[i].n1);
2227 else
2228 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2229 fd->loops[i].n2);
2230 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2231 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2232 fd->loops[i].n2);
2233 else
2234 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2235 fd->loops[i].n1);
2236 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2237 step, build_int_cst (TREE_TYPE (step), 0));
2238 if (TREE_CODE (step) != INTEGER_CST)
2239 {
2240 t1 = unshare_expr (t1);
2241 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2242 false, GSI_CONTINUE_LINKING);
2243 t2 = unshare_expr (t2);
2244 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2245 false, GSI_CONTINUE_LINKING);
2246 }
2247 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2248 t, t2, t1);
2249 }
2250 else if (fd->loops[i].cond_code == LT_EXPR)
2251 {
2252 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2253 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2254 fd->loops[i].n1);
2255 else
2256 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2257 fd->loops[i].n2);
2258 }
2259 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2260 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2261 fd->loops[i].n2);
2262 else
2263 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2264 fd->loops[i].n1);
2265 }
2266 if (cond)
2267 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2268 else
2269 cond = t;
2270
2271 off = fold_convert_loc (loc, itype, off);
2272
2273 if (step
2274 || (fd->loops[i].cond_code == LT_EXPR
2275 ? !integer_onep (fd->loops[i].step)
2276 : !integer_minus_onep (fd->loops[i].step)))
2277 {
2278 if (step == NULL_TREE
2279 && TYPE_UNSIGNED (itype)
2280 && fd->loops[i].cond_code == GT_EXPR)
2281 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2282 fold_build1_loc (loc, NEGATE_EXPR, itype,
2283 s));
2284 else
2285 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2286 orig_off ? orig_off : off, s);
2287 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2288 build_int_cst (itype, 0));
2289 if (integer_zerop (t) && !warned_step)
2290 {
90a0bf4e
JJ
2291 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2292 "refers to iteration never in the iteration "
2293 "space");
629b3d75
MJ
2294 warned_step = true;
2295 }
2296 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2297 cond, t);
2298 }
2299
2300 if (i <= fd->collapse - 1 && fd->collapse > 1)
2301 t = fd->loop.v;
2302 else if (counts[i])
2303 t = counts[i];
2304 else
2305 {
2306 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2307 fd->loops[i].v, fd->loops[i].n1);
2308 t = fold_convert_loc (loc, fd->iter_type, t);
2309 }
2310 if (step)
2311 /* We have divided off by step already earlier. */;
2312 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2313 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2314 fold_build1_loc (loc, NEGATE_EXPR, itype,
2315 s));
2316 else
2317 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2318 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2319 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2320 off = fold_convert_loc (loc, fd->iter_type, off);
2321 if (i <= fd->collapse - 1 && fd->collapse > 1)
2322 {
2323 if (i)
2324 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2325 off);
2326 if (i < fd->collapse - 1)
2327 {
2328 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2329 counts[i]);
2330 continue;
2331 }
2332 }
2333 off = unshare_expr (off);
2334 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2335 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2336 true, GSI_SAME_STMT);
2337 args.safe_push (t);
2338 }
2339 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2340 gimple_set_location (g, loc);
2341 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2342
2343 cond = unshare_expr (cond);
2344 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2345 GSI_CONTINUE_LINKING);
2346 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2347 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
2348 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2349 e1->probability = e3->probability.invert ();
629b3d75
MJ
2350 e1->flags = EDGE_TRUE_VALUE;
2351 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2352
2353 *gsi = gsi_after_labels (e2->dest);
2354}
2355
2356/* Expand all #pragma omp ordered depend(source) and
2357 #pragma omp ordered depend(sink:...) constructs in the current
2358 #pragma omp for ordered(n) region. */
2359
2360static void
2361expand_omp_ordered_source_sink (struct omp_region *region,
2362 struct omp_for_data *fd, tree *counts,
2363 basic_block cont_bb)
2364{
2365 struct omp_region *inner;
2366 int i;
2367 for (i = fd->collapse - 1; i < fd->ordered; i++)
2368 if (i == fd->collapse - 1 && fd->collapse > 1)
2369 counts[i] = NULL_TREE;
2370 else if (i >= fd->collapse && !cont_bb)
2371 counts[i] = build_zero_cst (fd->iter_type);
2372 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2373 && integer_onep (fd->loops[i].step))
2374 counts[i] = NULL_TREE;
2375 else
2376 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2377 tree atype
2378 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2379 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2380 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2381
2382 for (inner = region->inner; inner; inner = inner->next)
2383 if (inner->type == GIMPLE_OMP_ORDERED)
2384 {
2385 gomp_ordered *ord_stmt = inner->ord_stmt;
2386 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2387 location_t loc = gimple_location (ord_stmt);
2388 tree c;
2389 for (c = gimple_omp_ordered_clauses (ord_stmt);
2390 c; c = OMP_CLAUSE_CHAIN (c))
2391 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2392 break;
2393 if (c)
2394 expand_omp_ordered_source (&gsi, fd, counts, loc);
2395 for (c = gimple_omp_ordered_clauses (ord_stmt);
2396 c; c = OMP_CLAUSE_CHAIN (c))
2397 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2398 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2399 gsi_remove (&gsi, true);
2400 }
2401}
2402
2403/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2404 collapsed. */
2405
2406static basic_block
2407expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2408 basic_block cont_bb, basic_block body_bb,
2409 bool ordered_lastprivate)
2410{
2411 if (fd->ordered == fd->collapse)
2412 return cont_bb;
2413
2414 if (!cont_bb)
2415 {
2416 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2417 for (int i = fd->collapse; i < fd->ordered; i++)
2418 {
2419 tree type = TREE_TYPE (fd->loops[i].v);
2420 tree n1 = fold_convert (type, fd->loops[i].n1);
2421 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2422 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2423 size_int (i - fd->collapse + 1),
2424 NULL_TREE, NULL_TREE);
2425 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2426 }
2427 return NULL;
2428 }
2429
2430 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2431 {
2432 tree t, type = TREE_TYPE (fd->loops[i].v);
2433 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2434 expand_omp_build_assign (&gsi, fd->loops[i].v,
2435 fold_convert (type, fd->loops[i].n1));
2436 if (counts[i])
2437 expand_omp_build_assign (&gsi, counts[i],
2438 build_zero_cst (fd->iter_type));
2439 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2440 size_int (i - fd->collapse + 1),
2441 NULL_TREE, NULL_TREE);
2442 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2443 if (!gsi_end_p (gsi))
2444 gsi_prev (&gsi);
2445 else
2446 gsi = gsi_last_bb (body_bb);
2447 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2448 basic_block new_body = e1->dest;
2449 if (body_bb == cont_bb)
2450 cont_bb = new_body;
2451 edge e2 = NULL;
2452 basic_block new_header;
2453 if (EDGE_COUNT (cont_bb->preds) > 0)
2454 {
2455 gsi = gsi_last_bb (cont_bb);
2456 if (POINTER_TYPE_P (type))
2457 t = fold_build_pointer_plus (fd->loops[i].v,
2458 fold_convert (sizetype,
2459 fd->loops[i].step));
2460 else
2461 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2462 fold_convert (type, fd->loops[i].step));
2463 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2464 if (counts[i])
2465 {
2466 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2467 build_int_cst (fd->iter_type, 1));
2468 expand_omp_build_assign (&gsi, counts[i], t);
2469 t = counts[i];
2470 }
2471 else
2472 {
2473 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2474 fd->loops[i].v, fd->loops[i].n1);
2475 t = fold_convert (fd->iter_type, t);
2476 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2477 true, GSI_SAME_STMT);
2478 }
2479 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2480 size_int (i - fd->collapse + 1),
2481 NULL_TREE, NULL_TREE);
2482 expand_omp_build_assign (&gsi, aref, t);
2483 gsi_prev (&gsi);
2484 e2 = split_block (cont_bb, gsi_stmt (gsi));
2485 new_header = e2->dest;
2486 }
2487 else
2488 new_header = cont_bb;
2489 gsi = gsi_after_labels (new_header);
2490 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2491 true, GSI_SAME_STMT);
2492 tree n2
2493 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2494 true, NULL_TREE, true, GSI_SAME_STMT);
2495 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2496 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2497 edge e3 = split_block (new_header, gsi_stmt (gsi));
2498 cont_bb = e3->dest;
2499 remove_edge (e1);
2500 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2501 e3->flags = EDGE_FALSE_VALUE;
357067f2 2502 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 2503 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 2504 e1->probability = e3->probability.invert ();
629b3d75
MJ
2505
2506 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2507 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2508
2509 if (e2)
2510 {
2511 struct loop *loop = alloc_loop ();
2512 loop->header = new_header;
2513 loop->latch = e2->src;
2514 add_loop (loop, body_bb->loop_father);
2515 }
2516 }
2517
2518 /* If there are any lastprivate clauses and it is possible some loops
2519 might have zero iterations, ensure all the decls are initialized,
2520 otherwise we could crash evaluating C++ class iterators with lastprivate
2521 clauses. */
2522 bool need_inits = false;
2523 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2524 if (need_inits)
2525 {
2526 tree type = TREE_TYPE (fd->loops[i].v);
2527 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2528 expand_omp_build_assign (&gsi, fd->loops[i].v,
2529 fold_convert (type, fd->loops[i].n1));
2530 }
2531 else
2532 {
2533 tree type = TREE_TYPE (fd->loops[i].v);
2534 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2535 boolean_type_node,
2536 fold_convert (type, fd->loops[i].n1),
2537 fold_convert (type, fd->loops[i].n2));
2538 if (!integer_onep (this_cond))
2539 need_inits = true;
2540 }
2541
2542 return cont_bb;
2543}
2544
2545/* A subroutine of expand_omp_for. Generate code for a parallel
2546 loop with any schedule. Given parameters:
2547
2548 for (V = N1; V cond N2; V += STEP) BODY;
2549
2550 where COND is "<" or ">", we generate pseudocode
2551
2552 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2553 if (more) goto L0; else goto L3;
2554 L0:
2555 V = istart0;
2556 iend = iend0;
2557 L1:
2558 BODY;
2559 V += STEP;
2560 if (V cond iend) goto L1; else goto L2;
2561 L2:
2562 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2563 L3:
2564
2565 If this is a combined omp parallel loop, instead of the call to
2566 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2567 If this is gimple_omp_for_combined_p loop, then instead of assigning
2568 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2569 inner GIMPLE_OMP_FOR and V += STEP; and
2570 if (V cond iend) goto L1; else goto L2; are removed.
2571
2572 For collapsed loops, given parameters:
2573 collapse(3)
2574 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2575 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2576 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2577 BODY;
2578
2579 we generate pseudocode
2580
2581 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2582 if (cond3 is <)
2583 adj = STEP3 - 1;
2584 else
2585 adj = STEP3 + 1;
2586 count3 = (adj + N32 - N31) / STEP3;
2587 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2588 if (cond2 is <)
2589 adj = STEP2 - 1;
2590 else
2591 adj = STEP2 + 1;
2592 count2 = (adj + N22 - N21) / STEP2;
2593 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2594 if (cond1 is <)
2595 adj = STEP1 - 1;
2596 else
2597 adj = STEP1 + 1;
2598 count1 = (adj + N12 - N11) / STEP1;
2599 count = count1 * count2 * count3;
2600 goto Z1;
2601 Z0:
2602 count = 0;
2603 Z1:
2604 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2605 if (more) goto L0; else goto L3;
2606 L0:
2607 V = istart0;
2608 T = V;
2609 V3 = N31 + (T % count3) * STEP3;
2610 T = T / count3;
2611 V2 = N21 + (T % count2) * STEP2;
2612 T = T / count2;
2613 V1 = N11 + T * STEP1;
2614 iend = iend0;
2615 L1:
2616 BODY;
2617 V += 1;
2618 if (V < iend) goto L10; else goto L2;
2619 L10:
2620 V3 += STEP3;
2621 if (V3 cond3 N32) goto L1; else goto L11;
2622 L11:
2623 V3 = N31;
2624 V2 += STEP2;
2625 if (V2 cond2 N22) goto L1; else goto L12;
2626 L12:
2627 V2 = N21;
2628 V1 += STEP1;
2629 goto L1;
2630 L2:
2631 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2632 L3:
2633
2634 */
2635
2636static void
2637expand_omp_for_generic (struct omp_region *region,
2638 struct omp_for_data *fd,
2639 enum built_in_function start_fn,
2640 enum built_in_function next_fn,
28567c40 2641 tree sched_arg,
629b3d75
MJ
2642 gimple *inner_stmt)
2643{
2644 tree type, istart0, iend0, iend;
2645 tree t, vmain, vback, bias = NULL_TREE;
2646 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2647 basic_block l2_bb = NULL, l3_bb = NULL;
2648 gimple_stmt_iterator gsi;
2649 gassign *assign_stmt;
2650 bool in_combined_parallel = is_combined_parallel (region);
2651 bool broken_loop = region->cont == NULL;
2652 edge e, ne;
2653 tree *counts = NULL;
2654 int i;
2655 bool ordered_lastprivate = false;
2656
2657 gcc_assert (!broken_loop || !in_combined_parallel);
2658 gcc_assert (fd->iter_type == long_integer_type_node
2659 || !in_combined_parallel);
2660
2661 entry_bb = region->entry;
2662 cont_bb = region->cont;
2663 collapse_bb = NULL;
2664 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2665 gcc_assert (broken_loop
2666 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2667 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2668 l1_bb = single_succ (l0_bb);
2669 if (!broken_loop)
2670 {
2671 l2_bb = create_empty_bb (cont_bb);
2672 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2673 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2674 == l1_bb));
2675 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2676 }
2677 else
2678 l2_bb = NULL;
2679 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2680 exit_bb = region->exit;
2681
65f4b875 2682 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2683
2684 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2685 if (fd->ordered
6c7ae8c5 2686 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
629b3d75
MJ
2687 OMP_CLAUSE_LASTPRIVATE))
2688 ordered_lastprivate = false;
28567c40 2689 tree reductions = NULL_TREE;
6c7ae8c5
JJ
2690 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2691 tree memv = NULL_TREE;
8221c30b
JJ
2692 if (fd->lastprivate_conditional)
2693 {
2694 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2695 OMP_CLAUSE__CONDTEMP_);
2696 if (fd->have_pointer_condtemp)
2697 condtemp = OMP_CLAUSE_DECL (c);
2698 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2699 cond_var = OMP_CLAUSE_DECL (c);
2700 }
28567c40
JJ
2701 if (sched_arg)
2702 {
2703 if (fd->have_reductemp)
2704 {
6c7ae8c5 2705 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
28567c40
JJ
2706 OMP_CLAUSE__REDUCTEMP_);
2707 reductions = OMP_CLAUSE_DECL (c);
2708 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2709 gimple *g = SSA_NAME_DEF_STMT (reductions);
2710 reductions = gimple_assign_rhs1 (g);
2711 OMP_CLAUSE_DECL (c) = reductions;
2712 entry_bb = gimple_bb (g);
2713 edge e = split_block (entry_bb, g);
2714 if (region->entry == entry_bb)
2715 region->entry = e->dest;
2716 gsi = gsi_last_bb (entry_bb);
2717 }
2718 else
2719 reductions = null_pointer_node;
8221c30b 2720 if (fd->have_pointer_condtemp)
6c7ae8c5 2721 {
6c7ae8c5
JJ
2722 tree type = TREE_TYPE (condtemp);
2723 memv = create_tmp_var (type);
2724 TREE_ADDRESSABLE (memv) = 1;
2725 unsigned HOST_WIDE_INT sz
2726 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2727 sz *= fd->lastprivate_conditional;
2728 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2729 false);
2730 mem = build_fold_addr_expr (memv);
2731 }
2732 else
2733 mem = null_pointer_node;
28567c40 2734 }
629b3d75
MJ
2735 if (fd->collapse > 1 || fd->ordered)
2736 {
2737 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2738 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2739
2740 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2741 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2742 zero_iter1_bb, first_zero_iter1,
2743 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2744
2745 if (zero_iter1_bb)
2746 {
2747 /* Some counts[i] vars might be uninitialized if
2748 some loop has zero iterations. But the body shouldn't
2749 be executed in that case, so just avoid uninit warnings. */
2750 for (i = first_zero_iter1;
2751 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2752 if (SSA_VAR_P (counts[i]))
2753 TREE_NO_WARNING (counts[i]) = 1;
2754 gsi_prev (&gsi);
2755 e = split_block (entry_bb, gsi_stmt (gsi));
2756 entry_bb = e->dest;
2757 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 2758 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2759 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2760 get_immediate_dominator (CDI_DOMINATORS,
2761 zero_iter1_bb));
2762 }
2763 if (zero_iter2_bb)
2764 {
2765 /* Some counts[i] vars might be uninitialized if
2766 some loop has zero iterations. But the body shouldn't
2767 be executed in that case, so just avoid uninit warnings. */
2768 for (i = first_zero_iter2; i < fd->ordered; i++)
2769 if (SSA_VAR_P (counts[i]))
2770 TREE_NO_WARNING (counts[i]) = 1;
2771 if (zero_iter1_bb)
2772 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2773 else
2774 {
2775 gsi_prev (&gsi);
2776 e = split_block (entry_bb, gsi_stmt (gsi));
2777 entry_bb = e->dest;
2778 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 2779 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2780 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2781 get_immediate_dominator
2782 (CDI_DOMINATORS, zero_iter2_bb));
2783 }
2784 }
2785 if (fd->collapse == 1)
2786 {
2787 counts[0] = fd->loop.n2;
2788 fd->loop = fd->loops[0];
2789 }
2790 }
2791
2792 type = TREE_TYPE (fd->loop.v);
2793 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2794 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2795 TREE_ADDRESSABLE (istart0) = 1;
2796 TREE_ADDRESSABLE (iend0) = 1;
2797
2798 /* See if we need to bias by LLONG_MIN. */
2799 if (fd->iter_type == long_long_unsigned_type_node
2800 && TREE_CODE (type) == INTEGER_TYPE
2801 && !TYPE_UNSIGNED (type)
2802 && fd->ordered == 0)
2803 {
2804 tree n1, n2;
2805
2806 if (fd->loop.cond_code == LT_EXPR)
2807 {
2808 n1 = fd->loop.n1;
2809 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2810 }
2811 else
2812 {
2813 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2814 n2 = fd->loop.n1;
2815 }
2816 if (TREE_CODE (n1) != INTEGER_CST
2817 || TREE_CODE (n2) != INTEGER_CST
2818 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2819 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2820 }
2821
2822 gimple_stmt_iterator gsif = gsi;
2823 gsi_prev (&gsif);
2824
2825 tree arr = NULL_TREE;
2826 if (in_combined_parallel)
2827 {
2828 gcc_assert (fd->ordered == 0);
2829 /* In a combined parallel loop, emit a call to
2830 GOMP_loop_foo_next. */
2831 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2832 build_fold_addr_expr (istart0),
2833 build_fold_addr_expr (iend0));
2834 }
2835 else
2836 {
2837 tree t0, t1, t2, t3, t4;
2838 /* If this is not a combined parallel loop, emit a call to
2839 GOMP_loop_foo_start in ENTRY_BB. */
2840 t4 = build_fold_addr_expr (iend0);
2841 t3 = build_fold_addr_expr (istart0);
2842 if (fd->ordered)
2843 {
2844 t0 = build_int_cst (unsigned_type_node,
2845 fd->ordered - fd->collapse + 1);
2846 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2847 fd->ordered
2848 - fd->collapse + 1),
2849 ".omp_counts");
2850 DECL_NAMELESS (arr) = 1;
2851 TREE_ADDRESSABLE (arr) = 1;
2852 TREE_STATIC (arr) = 1;
2853 vec<constructor_elt, va_gc> *v;
2854 vec_alloc (v, fd->ordered - fd->collapse + 1);
2855 int idx;
2856
2857 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2858 {
2859 tree c;
2860 if (idx == 0 && fd->collapse > 1)
2861 c = fd->loop.n2;
2862 else
2863 c = counts[idx + fd->collapse - 1];
2864 tree purpose = size_int (idx);
2865 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2866 if (TREE_CODE (c) != INTEGER_CST)
2867 TREE_STATIC (arr) = 0;
2868 }
2869
2870 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2871 if (!TREE_STATIC (arr))
2872 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2873 void_type_node, arr),
2874 true, NULL_TREE, true, GSI_SAME_STMT);
2875 t1 = build_fold_addr_expr (arr);
2876 t2 = NULL_TREE;
2877 }
2878 else
2879 {
2880 t2 = fold_convert (fd->iter_type, fd->loop.step);
2881 t1 = fd->loop.n2;
2882 t0 = fd->loop.n1;
2883 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2884 {
2885 tree innerc
2886 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2887 OMP_CLAUSE__LOOPTEMP_);
2888 gcc_assert (innerc);
2889 t0 = OMP_CLAUSE_DECL (innerc);
2890 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2891 OMP_CLAUSE__LOOPTEMP_);
2892 gcc_assert (innerc);
2893 t1 = OMP_CLAUSE_DECL (innerc);
2894 }
2895 if (POINTER_TYPE_P (TREE_TYPE (t0))
2896 && TYPE_PRECISION (TREE_TYPE (t0))
2897 != TYPE_PRECISION (fd->iter_type))
2898 {
2899 /* Avoid casting pointers to integer of a different size. */
2900 tree itype = signed_type_for (type);
2901 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2902 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2903 }
2904 else
2905 {
2906 t1 = fold_convert (fd->iter_type, t1);
2907 t0 = fold_convert (fd->iter_type, t0);
2908 }
2909 if (bias)
2910 {
2911 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2912 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2913 }
2914 }
2915 if (fd->iter_type == long_integer_type_node || fd->ordered)
2916 {
2917 if (fd->chunk_size)
2918 {
2919 t = fold_convert (fd->iter_type, fd->chunk_size);
2920 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
2921 if (sched_arg)
2922 {
2923 if (fd->ordered)
2924 t = build_call_expr (builtin_decl_explicit (start_fn),
2925 8, t0, t1, sched_arg, t, t3, t4,
2926 reductions, mem);
2927 else
2928 t = build_call_expr (builtin_decl_explicit (start_fn),
2929 9, t0, t1, t2, sched_arg, t, t3, t4,
2930 reductions, mem);
2931 }
2932 else if (fd->ordered)
629b3d75
MJ
2933 t = build_call_expr (builtin_decl_explicit (start_fn),
2934 5, t0, t1, t, t3, t4);
2935 else
2936 t = build_call_expr (builtin_decl_explicit (start_fn),
2937 6, t0, t1, t2, t, t3, t4);
2938 }
2939 else if (fd->ordered)
2940 t = build_call_expr (builtin_decl_explicit (start_fn),
2941 4, t0, t1, t3, t4);
2942 else
2943 t = build_call_expr (builtin_decl_explicit (start_fn),
2944 5, t0, t1, t2, t3, t4);
2945 }
2946 else
2947 {
2948 tree t5;
2949 tree c_bool_type;
2950 tree bfn_decl;
2951
2952 /* The GOMP_loop_ull_*start functions have additional boolean
2953 argument, true for < loops and false for > loops.
2954 In Fortran, the C bool type can be different from
2955 boolean_type_node. */
2956 bfn_decl = builtin_decl_explicit (start_fn);
2957 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2958 t5 = build_int_cst (c_bool_type,
2959 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2960 if (fd->chunk_size)
2961 {
2962 tree bfn_decl = builtin_decl_explicit (start_fn);
2963 t = fold_convert (fd->iter_type, fd->chunk_size);
2964 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
2965 if (sched_arg)
2966 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2967 t, t3, t4, reductions, mem);
2968 else
2969 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
629b3d75
MJ
2970 }
2971 else
2972 t = build_call_expr (builtin_decl_explicit (start_fn),
2973 6, t5, t0, t1, t2, t3, t4);
2974 }
2975 }
2976 if (TREE_TYPE (t) != boolean_type_node)
2977 t = fold_build2 (NE_EXPR, boolean_type_node,
2978 t, build_int_cst (TREE_TYPE (t), 0));
2979 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 2980 true, GSI_SAME_STMT);
629b3d75
MJ
2981 if (arr && !TREE_STATIC (arr))
2982 {
2983 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2984 TREE_THIS_VOLATILE (clobber) = 1;
2985 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2986 GSI_SAME_STMT);
2987 }
8221c30b 2988 if (fd->have_pointer_condtemp)
6c7ae8c5 2989 expand_omp_build_assign (&gsi, condtemp, memv, false);
28567c40
JJ
2990 if (fd->have_reductemp)
2991 {
2992 gimple *g = gsi_stmt (gsi);
2993 gsi_remove (&gsi, true);
2994 release_ssa_name (gimple_assign_lhs (g));
2995
2996 entry_bb = region->entry;
2997 gsi = gsi_last_nondebug_bb (entry_bb);
2998
2999 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3000 }
629b3d75
MJ
3001 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3002
3003 /* Remove the GIMPLE_OMP_FOR statement. */
3004 gsi_remove (&gsi, true);
3005
3006 if (gsi_end_p (gsif))
3007 gsif = gsi_after_labels (gsi_bb (gsif));
3008 gsi_next (&gsif);
3009
3010 /* Iteration setup for sequential loop goes in L0_BB. */
3011 tree startvar = fd->loop.v;
3012 tree endvar = NULL_TREE;
3013
3014 if (gimple_omp_for_combined_p (fd->for_stmt))
3015 {
3016 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3017 && gimple_omp_for_kind (inner_stmt)
3018 == GF_OMP_FOR_KIND_SIMD);
3019 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3020 OMP_CLAUSE__LOOPTEMP_);
3021 gcc_assert (innerc);
3022 startvar = OMP_CLAUSE_DECL (innerc);
3023 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3024 OMP_CLAUSE__LOOPTEMP_);
3025 gcc_assert (innerc);
3026 endvar = OMP_CLAUSE_DECL (innerc);
3027 }
3028
3029 gsi = gsi_start_bb (l0_bb);
3030 t = istart0;
3031 if (fd->ordered && fd->collapse == 1)
3032 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3033 fold_convert (fd->iter_type, fd->loop.step));
3034 else if (bias)
3035 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3036 if (fd->ordered && fd->collapse == 1)
3037 {
3038 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3039 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3040 fd->loop.n1, fold_convert (sizetype, t));
3041 else
3042 {
3043 t = fold_convert (TREE_TYPE (startvar), t);
3044 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3045 fd->loop.n1, t);
3046 }
3047 }
3048 else
3049 {
3050 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3051 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3052 t = fold_convert (TREE_TYPE (startvar), t);
3053 }
3054 t = force_gimple_operand_gsi (&gsi, t,
3055 DECL_P (startvar)
3056 && TREE_ADDRESSABLE (startvar),
3057 NULL_TREE, false, GSI_CONTINUE_LINKING);
3058 assign_stmt = gimple_build_assign (startvar, t);
3059 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
3060 if (cond_var)
3061 {
3062 tree itype = TREE_TYPE (cond_var);
3063 /* For lastprivate(conditional:) itervar, we need some iteration
3064 counter that starts at unsigned non-zero and increases.
3065 Prefer as few IVs as possible, so if we can use startvar
3066 itself, use that, or startvar + constant (those would be
3067 incremented with step), and as last resort use the s0 + 1
3068 incremented by 1. */
3069 if ((fd->ordered && fd->collapse == 1)
3070 || bias
3071 || POINTER_TYPE_P (type)
3072 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3073 || fd->loop.cond_code != LT_EXPR)
3074 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3075 build_int_cst (itype, 1));
3076 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3077 t = fold_convert (itype, t);
3078 else
3079 {
3080 tree c = fold_convert (itype, fd->loop.n1);
3081 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3082 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3083 }
3084 t = force_gimple_operand_gsi (&gsi, t, false,
3085 NULL_TREE, false, GSI_CONTINUE_LINKING);
3086 assign_stmt = gimple_build_assign (cond_var, t);
3087 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3088 }
629b3d75
MJ
3089
3090 t = iend0;
3091 if (fd->ordered && fd->collapse == 1)
3092 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3093 fold_convert (fd->iter_type, fd->loop.step));
3094 else if (bias)
3095 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3096 if (fd->ordered && fd->collapse == 1)
3097 {
3098 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3099 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3100 fd->loop.n1, fold_convert (sizetype, t));
3101 else
3102 {
3103 t = fold_convert (TREE_TYPE (startvar), t);
3104 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3105 fd->loop.n1, t);
3106 }
3107 }
3108 else
3109 {
3110 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3111 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3112 t = fold_convert (TREE_TYPE (startvar), t);
3113 }
3114 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3115 false, GSI_CONTINUE_LINKING);
3116 if (endvar)
3117 {
3118 assign_stmt = gimple_build_assign (endvar, iend);
3119 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3120 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3121 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3122 else
3123 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3124 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3125 }
3126 /* Handle linear clause adjustments. */
3127 tree itercnt = NULL_TREE;
3128 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3129 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3130 c; c = OMP_CLAUSE_CHAIN (c))
3131 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3132 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3133 {
3134 tree d = OMP_CLAUSE_DECL (c);
3135 bool is_ref = omp_is_reference (d);
3136 tree t = d, a, dest;
3137 if (is_ref)
3138 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3139 tree type = TREE_TYPE (t);
3140 if (POINTER_TYPE_P (type))
3141 type = sizetype;
3142 dest = unshare_expr (t);
3143 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3144 expand_omp_build_assign (&gsif, v, t);
3145 if (itercnt == NULL_TREE)
3146 {
3147 itercnt = startvar;
3148 tree n1 = fd->loop.n1;
3149 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3150 {
3151 itercnt
3152 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3153 itercnt);
3154 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3155 }
3156 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3157 itercnt, n1);
3158 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3159 itercnt, fd->loop.step);
3160 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3161 NULL_TREE, false,
3162 GSI_CONTINUE_LINKING);
3163 }
3164 a = fold_build2 (MULT_EXPR, type,
3165 fold_convert (type, itercnt),
3166 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3167 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3168 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3169 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3170 false, GSI_CONTINUE_LINKING);
3171 assign_stmt = gimple_build_assign (dest, t);
3172 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3173 }
3174 if (fd->collapse > 1)
3175 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3176
3177 if (fd->ordered)
3178 {
3179 /* Until now, counts array contained number of iterations or
3180 variable containing it for ith loop. From now on, we need
3181 those counts only for collapsed loops, and only for the 2nd
3182 till the last collapsed one. Move those one element earlier,
3183 we'll use counts[fd->collapse - 1] for the first source/sink
3184 iteration counter and so on and counts[fd->ordered]
3185 as the array holding the current counter values for
3186 depend(source). */
3187 if (fd->collapse > 1)
3188 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3189 if (broken_loop)
3190 {
3191 int i;
3192 for (i = fd->collapse; i < fd->ordered; i++)
3193 {
3194 tree type = TREE_TYPE (fd->loops[i].v);
3195 tree this_cond
3196 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3197 fold_convert (type, fd->loops[i].n1),
3198 fold_convert (type, fd->loops[i].n2));
3199 if (!integer_onep (this_cond))
3200 break;
3201 }
3202 if (i < fd->ordered)
3203 {
3204 cont_bb
3205 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3206 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3207 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3208 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3209 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3210 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3211 make_edge (cont_bb, l1_bb, 0);
3212 l2_bb = create_empty_bb (cont_bb);
3213 broken_loop = false;
3214 }
3215 }
3216 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3217 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3218 ordered_lastprivate);
3219 if (counts[fd->collapse - 1])
3220 {
3221 gcc_assert (fd->collapse == 1);
3222 gsi = gsi_last_bb (l0_bb);
3223 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3224 istart0, true);
3225 gsi = gsi_last_bb (cont_bb);
3226 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3227 build_int_cst (fd->iter_type, 1));
3228 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3229 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3230 size_zero_node, NULL_TREE, NULL_TREE);
3231 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3232 t = counts[fd->collapse - 1];
3233 }
3234 else if (fd->collapse > 1)
3235 t = fd->loop.v;
3236 else
3237 {
3238 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3239 fd->loops[0].v, fd->loops[0].n1);
3240 t = fold_convert (fd->iter_type, t);
3241 }
3242 gsi = gsi_last_bb (l0_bb);
3243 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3244 size_zero_node, NULL_TREE, NULL_TREE);
3245 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3246 false, GSI_CONTINUE_LINKING);
3247 expand_omp_build_assign (&gsi, aref, t, true);
3248 }
3249
3250 if (!broken_loop)
3251 {
3252 /* Code to control the increment and predicate for the sequential
3253 loop goes in the CONT_BB. */
65f4b875 3254 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
3255 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3256 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3257 vmain = gimple_omp_continue_control_use (cont_stmt);
3258 vback = gimple_omp_continue_control_def (cont_stmt);
3259
3260 if (!gimple_omp_for_combined_p (fd->for_stmt))
3261 {
3262 if (POINTER_TYPE_P (type))
3263 t = fold_build_pointer_plus (vmain, fd->loop.step);
3264 else
3265 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3266 t = force_gimple_operand_gsi (&gsi, t,
3267 DECL_P (vback)
3268 && TREE_ADDRESSABLE (vback),
3269 NULL_TREE, true, GSI_SAME_STMT);
3270 assign_stmt = gimple_build_assign (vback, t);
3271 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3272
6c7ae8c5
JJ
3273 if (cond_var)
3274 {
3275 tree itype = TREE_TYPE (cond_var);
3276 tree t2;
3277 if ((fd->ordered && fd->collapse == 1)
3278 || bias
3279 || POINTER_TYPE_P (type)
3280 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3281 || fd->loop.cond_code != LT_EXPR)
3282 t2 = build_int_cst (itype, 1);
3283 else
3284 t2 = fold_convert (itype, fd->loop.step);
3285 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3286 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3287 NULL_TREE, true, GSI_SAME_STMT);
3288 assign_stmt = gimple_build_assign (cond_var, t2);
3289 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3290 }
3291
629b3d75
MJ
3292 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3293 {
d1ffbd43 3294 tree tem;
629b3d75 3295 if (fd->collapse > 1)
d1ffbd43 3296 tem = fd->loop.v;
629b3d75
MJ
3297 else
3298 {
d1ffbd43
JJ
3299 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3300 fd->loops[0].v, fd->loops[0].n1);
3301 tem = fold_convert (fd->iter_type, tem);
629b3d75
MJ
3302 }
3303 tree aref = build4 (ARRAY_REF, fd->iter_type,
3304 counts[fd->ordered], size_zero_node,
3305 NULL_TREE, NULL_TREE);
d1ffbd43
JJ
3306 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3307 true, GSI_SAME_STMT);
3308 expand_omp_build_assign (&gsi, aref, tem);
629b3d75
MJ
3309 }
3310
3311 t = build2 (fd->loop.cond_code, boolean_type_node,
3312 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3313 iend);
3314 gcond *cond_stmt = gimple_build_cond_empty (t);
3315 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3316 }
3317
3318 /* Remove GIMPLE_OMP_CONTINUE. */
3319 gsi_remove (&gsi, true);
3320
3321 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3322 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3323
3324 /* Emit code to get the next parallel iteration in L2_BB. */
3325 gsi = gsi_start_bb (l2_bb);
3326
3327 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3328 build_fold_addr_expr (istart0),
3329 build_fold_addr_expr (iend0));
3330 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3331 false, GSI_CONTINUE_LINKING);
3332 if (TREE_TYPE (t) != boolean_type_node)
3333 t = fold_build2 (NE_EXPR, boolean_type_node,
3334 t, build_int_cst (TREE_TYPE (t), 0));
3335 gcond *cond_stmt = gimple_build_cond_empty (t);
3336 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3337 }
3338
3339 /* Add the loop cleanup function. */
65f4b875 3340 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
3341 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3342 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3343 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3344 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3345 else
3346 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3347 gcall *call_stmt = gimple_build_call (t, 0);
629b3d75
MJ
3348 if (fd->ordered)
3349 {
3350 tree arr = counts[fd->ordered];
3351 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3352 TREE_THIS_VOLATILE (clobber) = 1;
3353 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3354 GSI_SAME_STMT);
3355 }
28567c40
JJ
3356 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3357 {
3358 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3359 if (fd->have_reductemp)
3360 {
3361 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3362 gimple_call_lhs (call_stmt));
3363 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3364 }
3365 }
3366 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
629b3d75
MJ
3367 gsi_remove (&gsi, true);
3368
3369 /* Connect the new blocks. */
3370 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3371 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3372
3373 if (!broken_loop)
3374 {
3375 gimple_seq phis;
3376
3377 e = find_edge (cont_bb, l3_bb);
3378 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3379
3380 phis = phi_nodes (l3_bb);
3381 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3382 {
3383 gimple *phi = gsi_stmt (gsi);
3384 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3385 PHI_ARG_DEF_FROM_EDGE (phi, e));
3386 }
3387 remove_edge (e);
3388
3389 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3390 e = find_edge (cont_bb, l1_bb);
3391 if (e == NULL)
3392 {
3393 e = BRANCH_EDGE (cont_bb);
3394 gcc_assert (single_succ (e->dest) == l1_bb);
3395 }
3396 if (gimple_omp_for_combined_p (fd->for_stmt))
3397 {
3398 remove_edge (e);
3399 e = NULL;
3400 }
3401 else if (fd->collapse > 1)
3402 {
3403 remove_edge (e);
3404 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3405 }
3406 else
3407 e->flags = EDGE_TRUE_VALUE;
3408 if (e)
3409 {
357067f2
JH
3410 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3411 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
3412 }
3413 else
3414 {
3415 e = find_edge (cont_bb, l2_bb);
3416 e->flags = EDGE_FALLTHRU;
3417 }
3418 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3419
3420 if (gimple_in_ssa_p (cfun))
3421 {
3422 /* Add phis to the outer loop that connect to the phis in the inner,
3423 original loop, and move the loop entry value of the inner phi to
3424 the loop entry value of the outer phi. */
3425 gphi_iterator psi;
3426 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3427 {
620e594b 3428 location_t locus;
629b3d75
MJ
3429 gphi *nphi;
3430 gphi *exit_phi = psi.phi ();
3431
164485b5
JJ
3432 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3433 continue;
3434
629b3d75
MJ
3435 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3436 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3437
3438 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3439 edge latch_to_l1 = find_edge (latch, l1_bb);
3440 gphi *inner_phi
3441 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3442
3443 tree t = gimple_phi_result (exit_phi);
3444 tree new_res = copy_ssa_name (t, NULL);
3445 nphi = create_phi_node (new_res, l0_bb);
3446
3447 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3448 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3449 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3450 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3451 add_phi_arg (nphi, t, entry_to_l0, locus);
3452
3453 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3454 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3455
3456 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
164485b5 3457 }
629b3d75
MJ
3458 }
3459
3460 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3461 recompute_dominator (CDI_DOMINATORS, l2_bb));
3462 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3463 recompute_dominator (CDI_DOMINATORS, l3_bb));
3464 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3465 recompute_dominator (CDI_DOMINATORS, l0_bb));
3466 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3467 recompute_dominator (CDI_DOMINATORS, l1_bb));
3468
3469 /* We enter expand_omp_for_generic with a loop. This original loop may
3470 have its own loop struct, or it may be part of an outer loop struct
3471 (which may be the fake loop). */
3472 struct loop *outer_loop = entry_bb->loop_father;
3473 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3474
3475 add_bb_to_loop (l2_bb, outer_loop);
3476
3477 /* We've added a new loop around the original loop. Allocate the
3478 corresponding loop struct. */
3479 struct loop *new_loop = alloc_loop ();
3480 new_loop->header = l0_bb;
3481 new_loop->latch = l2_bb;
3482 add_loop (new_loop, outer_loop);
3483
3484 /* Allocate a loop structure for the original loop unless we already
3485 had one. */
3486 if (!orig_loop_has_loop_struct
3487 && !gimple_omp_for_combined_p (fd->for_stmt))
3488 {
3489 struct loop *orig_loop = alloc_loop ();
3490 orig_loop->header = l1_bb;
3491 /* The loop may have multiple latches. */
3492 add_loop (orig_loop, new_loop);
3493 }
3494 }
3495}
3496
3497/* A subroutine of expand_omp_for. Generate code for a parallel
3498 loop with static schedule and no specified chunk size. Given
3499 parameters:
3500
3501 for (V = N1; V cond N2; V += STEP) BODY;
3502
3503 where COND is "<" or ">", we generate pseudocode
3504
3505 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3506 if (cond is <)
3507 adj = STEP - 1;
3508 else
3509 adj = STEP + 1;
3510 if ((__typeof (V)) -1 > 0 && cond is >)
3511 n = -(adj + N2 - N1) / -STEP;
3512 else
3513 n = (adj + N2 - N1) / STEP;
3514 q = n / nthreads;
3515 tt = n % nthreads;
3516 if (threadid < tt) goto L3; else goto L4;
3517 L3:
3518 tt = 0;
3519 q = q + 1;
3520 L4:
3521 s0 = q * threadid + tt;
3522 e0 = s0 + q;
3523 V = s0 * STEP + N1;
3524 if (s0 >= e0) goto L2; else goto L0;
3525 L0:
3526 e = e0 * STEP + N1;
3527 L1:
3528 BODY;
3529 V += STEP;
3530 if (V cond e) goto L1;
3531 L2:
3532*/
3533
3534static void
3535expand_omp_for_static_nochunk (struct omp_region *region,
3536 struct omp_for_data *fd,
3537 gimple *inner_stmt)
3538{
3539 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3540 tree type, itype, vmain, vback;
3541 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3542 basic_block body_bb, cont_bb, collapse_bb = NULL;
3543 basic_block fin_bb;
6c7ae8c5 3544 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
3545 edge ep;
3546 bool broken_loop = region->cont == NULL;
3547 tree *counts = NULL;
3548 tree n1, n2, step;
28567c40 3549 tree reductions = NULL_TREE;
8221c30b 3550 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
3551
3552 itype = type = TREE_TYPE (fd->loop.v);
3553 if (POINTER_TYPE_P (type))
3554 itype = signed_type_for (type);
3555
3556 entry_bb = region->entry;
3557 cont_bb = region->cont;
3558 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3559 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3560 gcc_assert (broken_loop
3561 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3562 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3563 body_bb = single_succ (seq_start_bb);
3564 if (!broken_loop)
3565 {
3566 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3567 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3568 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3569 }
3570 exit_bb = region->exit;
3571
3572 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 3573 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 3574 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
3575 gsip = gsi;
3576 gsi_prev (&gsip);
629b3d75
MJ
3577
3578 if (fd->collapse > 1)
3579 {
3580 int first_zero_iter = -1, dummy = -1;
3581 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3582
3583 counts = XALLOCAVEC (tree, fd->collapse);
3584 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3585 fin_bb, first_zero_iter,
3586 dummy_bb, dummy, l2_dom_bb);
3587 t = NULL_TREE;
3588 }
3589 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3590 t = integer_one_node;
3591 else
3592 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3593 fold_convert (type, fd->loop.n1),
3594 fold_convert (type, fd->loop.n2));
3595 if (fd->collapse == 1
3596 && TYPE_UNSIGNED (type)
3597 && (t == NULL_TREE || !integer_onep (t)))
3598 {
3599 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3600 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3601 true, GSI_SAME_STMT);
3602 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3603 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3604 true, GSI_SAME_STMT);
3605 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
6c7ae8c5 3606 NULL_TREE, NULL_TREE);
629b3d75
MJ
3607 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3608 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3609 expand_omp_regimplify_p, NULL, NULL)
3610 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3611 expand_omp_regimplify_p, NULL, NULL))
3612 {
3613 gsi = gsi_for_stmt (cond_stmt);
3614 gimple_regimplify_operands (cond_stmt, &gsi);
3615 }
3616 ep = split_block (entry_bb, cond_stmt);
3617 ep->flags = EDGE_TRUE_VALUE;
3618 entry_bb = ep->dest;
357067f2 3619 ep->probability = profile_probability::very_likely ();
629b3d75 3620 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 3621 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
3622 if (gimple_in_ssa_p (cfun))
3623 {
3624 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3625 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3626 !gsi_end_p (gpi); gsi_next (&gpi))
3627 {
3628 gphi *phi = gpi.phi ();
3629 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3630 ep, UNKNOWN_LOCATION);
3631 }
3632 }
3633 gsi = gsi_last_bb (entry_bb);
3634 }
3635
8221c30b
JJ
3636 if (fd->lastprivate_conditional)
3637 {
3638 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3639 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3640 if (fd->have_pointer_condtemp)
3641 condtemp = OMP_CLAUSE_DECL (c);
3642 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3643 cond_var = OMP_CLAUSE_DECL (c);
3644 }
3645 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
3646 {
3647 tree t1 = build_int_cst (long_integer_type_node, 0);
3648 tree t2 = build_int_cst (long_integer_type_node, 1);
3649 tree t3 = build_int_cstu (long_integer_type_node,
3650 (HOST_WIDE_INT_1U << 31) + 1);
3651 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
3652 gimple_stmt_iterator gsi2 = gsi_none ();
3653 gimple *g = NULL;
3654 tree mem = null_pointer_node, memv = NULL_TREE;
6c7ae8c5
JJ
3655 if (fd->have_reductemp)
3656 {
3657 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3658 reductions = OMP_CLAUSE_DECL (c);
3659 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3660 g = SSA_NAME_DEF_STMT (reductions);
3661 reductions = gimple_assign_rhs1 (g);
3662 OMP_CLAUSE_DECL (c) = reductions;
3663 gsi2 = gsi_for_stmt (g);
3664 }
3665 else
3666 {
3667 if (gsi_end_p (gsip))
3668 gsi2 = gsi_after_labels (region->entry);
3669 else
3670 gsi2 = gsip;
3671 reductions = null_pointer_node;
3672 }
8221c30b 3673 if (fd->have_pointer_condtemp)
6c7ae8c5 3674 {
6c7ae8c5
JJ
3675 tree type = TREE_TYPE (condtemp);
3676 memv = create_tmp_var (type);
3677 TREE_ADDRESSABLE (memv) = 1;
3678 unsigned HOST_WIDE_INT sz
3679 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3680 sz *= fd->lastprivate_conditional;
3681 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
3682 false);
3683 mem = build_fold_addr_expr (memv);
3684 }
28567c40
JJ
3685 tree t
3686 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3687 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 3688 null_pointer_node, reductions, mem);
28567c40
JJ
3689 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3690 true, GSI_SAME_STMT);
8221c30b 3691 if (fd->have_pointer_condtemp)
6c7ae8c5
JJ
3692 expand_omp_build_assign (&gsi2, condtemp, memv, false);
3693 if (fd->have_reductemp)
3694 {
3695 gsi_remove (&gsi2, true);
3696 release_ssa_name (gimple_assign_lhs (g));
3697 }
28567c40 3698 }
629b3d75
MJ
3699 switch (gimple_omp_for_kind (fd->for_stmt))
3700 {
3701 case GF_OMP_FOR_KIND_FOR:
3702 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3703 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3704 break;
3705 case GF_OMP_FOR_KIND_DISTRIBUTE:
3706 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3707 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3708 break;
3709 default:
3710 gcc_unreachable ();
3711 }
3712 nthreads = build_call_expr (nthreads, 0);
3713 nthreads = fold_convert (itype, nthreads);
3714 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3715 true, GSI_SAME_STMT);
3716 threadid = build_call_expr (threadid, 0);
3717 threadid = fold_convert (itype, threadid);
3718 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3719 true, GSI_SAME_STMT);
3720
3721 n1 = fd->loop.n1;
3722 n2 = fd->loop.n2;
3723 step = fd->loop.step;
3724 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3725 {
3726 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3727 OMP_CLAUSE__LOOPTEMP_);
3728 gcc_assert (innerc);
3729 n1 = OMP_CLAUSE_DECL (innerc);
3730 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3731 OMP_CLAUSE__LOOPTEMP_);
3732 gcc_assert (innerc);
3733 n2 = OMP_CLAUSE_DECL (innerc);
3734 }
3735 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3736 true, NULL_TREE, true, GSI_SAME_STMT);
3737 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3738 true, NULL_TREE, true, GSI_SAME_STMT);
3739 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3740 true, NULL_TREE, true, GSI_SAME_STMT);
3741
3742 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3743 t = fold_build2 (PLUS_EXPR, itype, step, t);
3744 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3745 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3746 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3747 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3748 fold_build1 (NEGATE_EXPR, itype, t),
3749 fold_build1 (NEGATE_EXPR, itype, step));
3750 else
3751 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3752 t = fold_convert (itype, t);
3753 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3754
3755 q = create_tmp_reg (itype, "q");
3756 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3757 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3758 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3759
3760 tt = create_tmp_reg (itype, "tt");
3761 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3762 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3763 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3764
3765 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3766 gcond *cond_stmt = gimple_build_cond_empty (t);
3767 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3768
3769 second_bb = split_block (entry_bb, cond_stmt)->dest;
65f4b875 3770 gsi = gsi_last_nondebug_bb (second_bb);
629b3d75
MJ
3771 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3772
3773 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3774 GSI_SAME_STMT);
3775 gassign *assign_stmt
3776 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3777 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3778
3779 third_bb = split_block (second_bb, assign_stmt)->dest;
65f4b875 3780 gsi = gsi_last_nondebug_bb (third_bb);
629b3d75
MJ
3781 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3782
3783 t = build2 (MULT_EXPR, itype, q, threadid);
3784 t = build2 (PLUS_EXPR, itype, t, tt);
3785 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3786
3787 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3788 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3789
3790 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3791 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3792
3793 /* Remove the GIMPLE_OMP_FOR statement. */
3794 gsi_remove (&gsi, true);
3795
3796 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3797 gsi = gsi_start_bb (seq_start_bb);
3798
3799 tree startvar = fd->loop.v;
3800 tree endvar = NULL_TREE;
3801
3802 if (gimple_omp_for_combined_p (fd->for_stmt))
3803 {
3804 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3805 ? gimple_omp_parallel_clauses (inner_stmt)
3806 : gimple_omp_for_clauses (inner_stmt);
3807 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3808 gcc_assert (innerc);
3809 startvar = OMP_CLAUSE_DECL (innerc);
3810 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3811 OMP_CLAUSE__LOOPTEMP_);
3812 gcc_assert (innerc);
3813 endvar = OMP_CLAUSE_DECL (innerc);
3814 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3815 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3816 {
3817 int i;
3818 for (i = 1; i < fd->collapse; i++)
3819 {
3820 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3821 OMP_CLAUSE__LOOPTEMP_);
3822 gcc_assert (innerc);
3823 }
3824 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3825 OMP_CLAUSE__LOOPTEMP_);
3826 if (innerc)
3827 {
3828 /* If needed (distribute parallel for with lastprivate),
3829 propagate down the total number of iterations. */
3830 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3831 fd->loop.n2);
3832 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3833 GSI_CONTINUE_LINKING);
3834 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3835 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3836 }
3837 }
3838 }
3839 t = fold_convert (itype, s0);
3840 t = fold_build2 (MULT_EXPR, itype, t, step);
3841 if (POINTER_TYPE_P (type))
bde84d51
RB
3842 {
3843 t = fold_build_pointer_plus (n1, t);
3844 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3845 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3846 t = fold_convert (signed_type_for (type), t);
3847 }
629b3d75
MJ
3848 else
3849 t = fold_build2 (PLUS_EXPR, type, t, n1);
3850 t = fold_convert (TREE_TYPE (startvar), t);
3851 t = force_gimple_operand_gsi (&gsi, t,
3852 DECL_P (startvar)
3853 && TREE_ADDRESSABLE (startvar),
3854 NULL_TREE, false, GSI_CONTINUE_LINKING);
3855 assign_stmt = gimple_build_assign (startvar, t);
3856 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
3857 if (cond_var)
3858 {
3859 tree itype = TREE_TYPE (cond_var);
3860 /* For lastprivate(conditional:) itervar, we need some iteration
3861 counter that starts at unsigned non-zero and increases.
3862 Prefer as few IVs as possible, so if we can use startvar
3863 itself, use that, or startvar + constant (those would be
3864 incremented with step), and as last resort use the s0 + 1
3865 incremented by 1. */
3866 if (POINTER_TYPE_P (type)
3867 || TREE_CODE (n1) != INTEGER_CST
3868 || fd->loop.cond_code != LT_EXPR)
3869 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
3870 build_int_cst (itype, 1));
3871 else if (tree_int_cst_sgn (n1) == 1)
3872 t = fold_convert (itype, t);
3873 else
3874 {
3875 tree c = fold_convert (itype, n1);
3876 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3877 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3878 }
3879 t = force_gimple_operand_gsi (&gsi, t, false,
3880 NULL_TREE, false, GSI_CONTINUE_LINKING);
3881 assign_stmt = gimple_build_assign (cond_var, t);
3882 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3883 }
629b3d75
MJ
3884
3885 t = fold_convert (itype, e0);
3886 t = fold_build2 (MULT_EXPR, itype, t, step);
3887 if (POINTER_TYPE_P (type))
bde84d51
RB
3888 {
3889 t = fold_build_pointer_plus (n1, t);
3890 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3891 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3892 t = fold_convert (signed_type_for (type), t);
3893 }
629b3d75
MJ
3894 else
3895 t = fold_build2 (PLUS_EXPR, type, t, n1);
3896 t = fold_convert (TREE_TYPE (startvar), t);
3897 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3898 false, GSI_CONTINUE_LINKING);
3899 if (endvar)
3900 {
3901 assign_stmt = gimple_build_assign (endvar, e);
3902 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3903 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3904 assign_stmt = gimple_build_assign (fd->loop.v, e);
3905 else
3906 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3907 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3908 }
3909 /* Handle linear clause adjustments. */
3910 tree itercnt = NULL_TREE;
3911 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3912 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3913 c; c = OMP_CLAUSE_CHAIN (c))
3914 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3915 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3916 {
3917 tree d = OMP_CLAUSE_DECL (c);
3918 bool is_ref = omp_is_reference (d);
3919 tree t = d, a, dest;
3920 if (is_ref)
3921 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3922 if (itercnt == NULL_TREE)
3923 {
3924 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3925 {
3926 itercnt = fold_build2 (MINUS_EXPR, itype,
3927 fold_convert (itype, n1),
3928 fold_convert (itype, fd->loop.n1));
3929 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3930 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3931 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3932 NULL_TREE, false,
3933 GSI_CONTINUE_LINKING);
3934 }
3935 else
3936 itercnt = s0;
3937 }
3938 tree type = TREE_TYPE (t);
3939 if (POINTER_TYPE_P (type))
3940 type = sizetype;
3941 a = fold_build2 (MULT_EXPR, type,
3942 fold_convert (type, itercnt),
3943 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3944 dest = unshare_expr (t);
3945 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3946 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3947 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3948 false, GSI_CONTINUE_LINKING);
3949 assign_stmt = gimple_build_assign (dest, t);
3950 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3951 }
3952 if (fd->collapse > 1)
3953 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3954
3955 if (!broken_loop)
3956 {
3957 /* The code controlling the sequential loop replaces the
3958 GIMPLE_OMP_CONTINUE. */
65f4b875 3959 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
3960 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3961 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3962 vmain = gimple_omp_continue_control_use (cont_stmt);
3963 vback = gimple_omp_continue_control_def (cont_stmt);
3964
3965 if (!gimple_omp_for_combined_p (fd->for_stmt))
3966 {
3967 if (POINTER_TYPE_P (type))
3968 t = fold_build_pointer_plus (vmain, step);
3969 else
3970 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3971 t = force_gimple_operand_gsi (&gsi, t,
3972 DECL_P (vback)
3973 && TREE_ADDRESSABLE (vback),
3974 NULL_TREE, true, GSI_SAME_STMT);
3975 assign_stmt = gimple_build_assign (vback, t);
3976 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3977
6c7ae8c5
JJ
3978 if (cond_var)
3979 {
3980 tree itype = TREE_TYPE (cond_var);
3981 tree t2;
3982 if (POINTER_TYPE_P (type)
3983 || TREE_CODE (n1) != INTEGER_CST
3984 || fd->loop.cond_code != LT_EXPR)
3985 t2 = build_int_cst (itype, 1);
3986 else
3987 t2 = fold_convert (itype, step);
3988 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3989 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3990 NULL_TREE, true, GSI_SAME_STMT);
3991 assign_stmt = gimple_build_assign (cond_var, t2);
3992 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3993 }
3994
629b3d75
MJ
3995 t = build2 (fd->loop.cond_code, boolean_type_node,
3996 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3997 ? t : vback, e);
3998 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3999 }
4000
4001 /* Remove the GIMPLE_OMP_CONTINUE statement. */
4002 gsi_remove (&gsi, true);
4003
4004 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4005 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4006 }
4007
4008 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 4009 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4010 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4011 {
4012 t = gimple_omp_return_lhs (gsi_stmt (gsi));
8221c30b 4013 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
4014 {
4015 tree fn;
4016 if (t)
4017 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4018 else
4019 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4020 gcall *g = gimple_build_call (fn, 0);
4021 if (t)
4022 {
4023 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
4024 if (fd->have_reductemp)
4025 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4026 NOP_EXPR, t),
4027 GSI_SAME_STMT);
28567c40
JJ
4028 }
4029 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4030 }
4031 else
4032 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75
MJ
4033 }
4034 gsi_remove (&gsi, true);
4035
4036 /* Connect all the blocks. */
4037 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 4038 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
4039 ep = find_edge (entry_bb, second_bb);
4040 ep->flags = EDGE_TRUE_VALUE;
357067f2 4041 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
629b3d75
MJ
4042 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4043 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4044
4045 if (!broken_loop)
4046 {
4047 ep = find_edge (cont_bb, body_bb);
4048 if (ep == NULL)
4049 {
4050 ep = BRANCH_EDGE (cont_bb);
4051 gcc_assert (single_succ (ep->dest) == body_bb);
4052 }
4053 if (gimple_omp_for_combined_p (fd->for_stmt))
4054 {
4055 remove_edge (ep);
4056 ep = NULL;
4057 }
4058 else if (fd->collapse > 1)
4059 {
4060 remove_edge (ep);
4061 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4062 }
4063 else
4064 ep->flags = EDGE_TRUE_VALUE;
4065 find_edge (cont_bb, fin_bb)->flags
4066 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4067 }
4068
4069 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4070 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
4071 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
4072
4073 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4074 recompute_dominator (CDI_DOMINATORS, body_bb));
4075 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4076 recompute_dominator (CDI_DOMINATORS, fin_bb));
4077
4078 struct loop *loop = body_bb->loop_father;
4079 if (loop != entry_bb->loop_father)
4080 {
4081 gcc_assert (broken_loop || loop->header == body_bb);
4082 gcc_assert (broken_loop
4083 || loop->latch == region->cont
4084 || single_pred (loop->latch) == region->cont);
4085 return;
4086 }
4087
4088 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4089 {
4090 loop = alloc_loop ();
4091 loop->header = body_bb;
4092 if (collapse_bb == NULL)
4093 loop->latch = cont_bb;
4094 add_loop (loop, body_bb->loop_father);
4095 }
4096}
4097
4098/* Return phi in E->DEST with ARG on edge E. */
4099
4100static gphi *
4101find_phi_with_arg_on_edge (tree arg, edge e)
4102{
4103 basic_block bb = e->dest;
4104
4105 for (gphi_iterator gpi = gsi_start_phis (bb);
4106 !gsi_end_p (gpi);
4107 gsi_next (&gpi))
4108 {
4109 gphi *phi = gpi.phi ();
4110 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4111 return phi;
4112 }
4113
4114 return NULL;
4115}
4116
4117/* A subroutine of expand_omp_for. Generate code for a parallel
4118 loop with static schedule and a specified chunk size. Given
4119 parameters:
4120
4121 for (V = N1; V cond N2; V += STEP) BODY;
4122
4123 where COND is "<" or ">", we generate pseudocode
4124
4125 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4126 if (cond is <)
4127 adj = STEP - 1;
4128 else
4129 adj = STEP + 1;
4130 if ((__typeof (V)) -1 > 0 && cond is >)
4131 n = -(adj + N2 - N1) / -STEP;
4132 else
4133 n = (adj + N2 - N1) / STEP;
4134 trip = 0;
4135 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4136 here so that V is defined
4137 if the loop is not entered
4138 L0:
4139 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 4140 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
4141 if (s0 < n) goto L1; else goto L4;
4142 L1:
4143 V = s0 * STEP + N1;
4144 e = e0 * STEP + N1;
4145 L2:
4146 BODY;
4147 V += STEP;
4148 if (V cond e) goto L2; else goto L3;
4149 L3:
4150 trip += 1;
4151 goto L0;
4152 L4:
4153*/
4154
4155static void
4156expand_omp_for_static_chunk (struct omp_region *region,
4157 struct omp_for_data *fd, gimple *inner_stmt)
4158{
4159 tree n, s0, e0, e, t;
4160 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4161 tree type, itype, vmain, vback, vextra;
4162 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4163 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
6c7ae8c5 4164 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
4165 edge se;
4166 bool broken_loop = region->cont == NULL;
4167 tree *counts = NULL;
4168 tree n1, n2, step;
28567c40 4169 tree reductions = NULL_TREE;
8221c30b 4170 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
4171
4172 itype = type = TREE_TYPE (fd->loop.v);
4173 if (POINTER_TYPE_P (type))
4174 itype = signed_type_for (type);
4175
4176 entry_bb = region->entry;
4177 se = split_block (entry_bb, last_stmt (entry_bb));
4178 entry_bb = se->src;
4179 iter_part_bb = se->dest;
4180 cont_bb = region->cont;
4181 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4182 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4183 gcc_assert (broken_loop
4184 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4185 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4186 body_bb = single_succ (seq_start_bb);
4187 if (!broken_loop)
4188 {
4189 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4190 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4191 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4192 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4193 }
4194 exit_bb = region->exit;
4195
4196 /* Trip and adjustment setup goes in ENTRY_BB. */
65f4b875 4197 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 4198 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
4199 gsip = gsi;
4200 gsi_prev (&gsip);
629b3d75
MJ
4201
4202 if (fd->collapse > 1)
4203 {
4204 int first_zero_iter = -1, dummy = -1;
4205 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4206
4207 counts = XALLOCAVEC (tree, fd->collapse);
4208 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4209 fin_bb, first_zero_iter,
4210 dummy_bb, dummy, l2_dom_bb);
4211 t = NULL_TREE;
4212 }
4213 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4214 t = integer_one_node;
4215 else
4216 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4217 fold_convert (type, fd->loop.n1),
4218 fold_convert (type, fd->loop.n2));
4219 if (fd->collapse == 1
4220 && TYPE_UNSIGNED (type)
4221 && (t == NULL_TREE || !integer_onep (t)))
4222 {
4223 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4224 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4225 true, GSI_SAME_STMT);
4226 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4227 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4228 true, GSI_SAME_STMT);
4229 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4230 NULL_TREE, NULL_TREE);
4231 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4232 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4233 expand_omp_regimplify_p, NULL, NULL)
4234 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4235 expand_omp_regimplify_p, NULL, NULL))
4236 {
4237 gsi = gsi_for_stmt (cond_stmt);
4238 gimple_regimplify_operands (cond_stmt, &gsi);
4239 }
4240 se = split_block (entry_bb, cond_stmt);
4241 se->flags = EDGE_TRUE_VALUE;
4242 entry_bb = se->dest;
357067f2 4243 se->probability = profile_probability::very_likely ();
629b3d75 4244 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 4245 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
4246 if (gimple_in_ssa_p (cfun))
4247 {
4248 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4249 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4250 !gsi_end_p (gpi); gsi_next (&gpi))
4251 {
4252 gphi *phi = gpi.phi ();
4253 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4254 se, UNKNOWN_LOCATION);
4255 }
4256 }
4257 gsi = gsi_last_bb (entry_bb);
4258 }
4259
8221c30b
JJ
4260 if (fd->lastprivate_conditional)
4261 {
4262 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4263 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4264 if (fd->have_pointer_condtemp)
4265 condtemp = OMP_CLAUSE_DECL (c);
4266 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4267 cond_var = OMP_CLAUSE_DECL (c);
4268 }
4269 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
4270 {
4271 tree t1 = build_int_cst (long_integer_type_node, 0);
4272 tree t2 = build_int_cst (long_integer_type_node, 1);
4273 tree t3 = build_int_cstu (long_integer_type_node,
4274 (HOST_WIDE_INT_1U << 31) + 1);
4275 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
4276 gimple_stmt_iterator gsi2 = gsi_none ();
4277 gimple *g = NULL;
4278 tree mem = null_pointer_node, memv = NULL_TREE;
6c7ae8c5
JJ
4279 if (fd->have_reductemp)
4280 {
4281 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4282 reductions = OMP_CLAUSE_DECL (c);
4283 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4284 g = SSA_NAME_DEF_STMT (reductions);
4285 reductions = gimple_assign_rhs1 (g);
4286 OMP_CLAUSE_DECL (c) = reductions;
4287 gsi2 = gsi_for_stmt (g);
4288 }
4289 else
4290 {
4291 if (gsi_end_p (gsip))
4292 gsi2 = gsi_after_labels (region->entry);
4293 else
4294 gsi2 = gsip;
4295 reductions = null_pointer_node;
4296 }
8221c30b 4297 if (fd->have_pointer_condtemp)
6c7ae8c5 4298 {
6c7ae8c5
JJ
4299 tree type = TREE_TYPE (condtemp);
4300 memv = create_tmp_var (type);
4301 TREE_ADDRESSABLE (memv) = 1;
4302 unsigned HOST_WIDE_INT sz
4303 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4304 sz *= fd->lastprivate_conditional;
4305 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4306 false);
4307 mem = build_fold_addr_expr (memv);
4308 }
28567c40
JJ
4309 tree t
4310 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4311 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 4312 null_pointer_node, reductions, mem);
28567c40
JJ
4313 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4314 true, GSI_SAME_STMT);
8221c30b 4315 if (fd->have_pointer_condtemp)
6c7ae8c5
JJ
4316 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4317 if (fd->have_reductemp)
4318 {
4319 gsi_remove (&gsi2, true);
4320 release_ssa_name (gimple_assign_lhs (g));
4321 }
28567c40 4322 }
629b3d75
MJ
4323 switch (gimple_omp_for_kind (fd->for_stmt))
4324 {
4325 case GF_OMP_FOR_KIND_FOR:
4326 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4327 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4328 break;
4329 case GF_OMP_FOR_KIND_DISTRIBUTE:
4330 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4331 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4332 break;
4333 default:
4334 gcc_unreachable ();
4335 }
4336 nthreads = build_call_expr (nthreads, 0);
4337 nthreads = fold_convert (itype, nthreads);
4338 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4339 true, GSI_SAME_STMT);
4340 threadid = build_call_expr (threadid, 0);
4341 threadid = fold_convert (itype, threadid);
4342 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4343 true, GSI_SAME_STMT);
4344
4345 n1 = fd->loop.n1;
4346 n2 = fd->loop.n2;
4347 step = fd->loop.step;
4348 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4349 {
4350 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4351 OMP_CLAUSE__LOOPTEMP_);
4352 gcc_assert (innerc);
4353 n1 = OMP_CLAUSE_DECL (innerc);
4354 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4355 OMP_CLAUSE__LOOPTEMP_);
4356 gcc_assert (innerc);
4357 n2 = OMP_CLAUSE_DECL (innerc);
4358 }
4359 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4360 true, NULL_TREE, true, GSI_SAME_STMT);
4361 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4362 true, NULL_TREE, true, GSI_SAME_STMT);
4363 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4364 true, NULL_TREE, true, GSI_SAME_STMT);
4365 tree chunk_size = fold_convert (itype, fd->chunk_size);
4366 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4367 chunk_size
4368 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4369 GSI_SAME_STMT);
4370
4371 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4372 t = fold_build2 (PLUS_EXPR, itype, step, t);
4373 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4374 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4375 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4376 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4377 fold_build1 (NEGATE_EXPR, itype, t),
4378 fold_build1 (NEGATE_EXPR, itype, step));
4379 else
4380 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4381 t = fold_convert (itype, t);
4382 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4383 true, GSI_SAME_STMT);
4384
4385 trip_var = create_tmp_reg (itype, ".trip");
4386 if (gimple_in_ssa_p (cfun))
4387 {
4388 trip_init = make_ssa_name (trip_var);
4389 trip_main = make_ssa_name (trip_var);
4390 trip_back = make_ssa_name (trip_var);
4391 }
4392 else
4393 {
4394 trip_init = trip_var;
4395 trip_main = trip_var;
4396 trip_back = trip_var;
4397 }
4398
4399 gassign *assign_stmt
4400 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4401 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4402
4403 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4404 t = fold_build2 (MULT_EXPR, itype, t, step);
4405 if (POINTER_TYPE_P (type))
4406 t = fold_build_pointer_plus (n1, t);
4407 else
4408 t = fold_build2 (PLUS_EXPR, type, t, n1);
4409 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4410 true, GSI_SAME_STMT);
4411
4412 /* Remove the GIMPLE_OMP_FOR. */
4413 gsi_remove (&gsi, true);
4414
4415 gimple_stmt_iterator gsif = gsi;
4416
4417 /* Iteration space partitioning goes in ITER_PART_BB. */
4418 gsi = gsi_last_bb (iter_part_bb);
4419
4420 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4421 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4422 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4423 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4424 false, GSI_CONTINUE_LINKING);
4425
4426 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4427 t = fold_build2 (MIN_EXPR, itype, t, n);
4428 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4429 false, GSI_CONTINUE_LINKING);
4430
4431 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4432 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4433
4434 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4435 gsi = gsi_start_bb (seq_start_bb);
4436
4437 tree startvar = fd->loop.v;
4438 tree endvar = NULL_TREE;
4439
4440 if (gimple_omp_for_combined_p (fd->for_stmt))
4441 {
4442 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4443 ? gimple_omp_parallel_clauses (inner_stmt)
4444 : gimple_omp_for_clauses (inner_stmt);
4445 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4446 gcc_assert (innerc);
4447 startvar = OMP_CLAUSE_DECL (innerc);
4448 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4449 OMP_CLAUSE__LOOPTEMP_);
4450 gcc_assert (innerc);
4451 endvar = OMP_CLAUSE_DECL (innerc);
4452 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4453 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4454 {
4455 int i;
4456 for (i = 1; i < fd->collapse; i++)
4457 {
4458 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4459 OMP_CLAUSE__LOOPTEMP_);
4460 gcc_assert (innerc);
4461 }
4462 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4463 OMP_CLAUSE__LOOPTEMP_);
4464 if (innerc)
4465 {
4466 /* If needed (distribute parallel for with lastprivate),
4467 propagate down the total number of iterations. */
4468 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4469 fd->loop.n2);
4470 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4471 GSI_CONTINUE_LINKING);
4472 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4473 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4474 }
4475 }
4476 }
4477
4478 t = fold_convert (itype, s0);
4479 t = fold_build2 (MULT_EXPR, itype, t, step);
4480 if (POINTER_TYPE_P (type))
bde84d51
RB
4481 {
4482 t = fold_build_pointer_plus (n1, t);
4483 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4484 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4485 t = fold_convert (signed_type_for (type), t);
4486 }
629b3d75
MJ
4487 else
4488 t = fold_build2 (PLUS_EXPR, type, t, n1);
4489 t = fold_convert (TREE_TYPE (startvar), t);
4490 t = force_gimple_operand_gsi (&gsi, t,
4491 DECL_P (startvar)
4492 && TREE_ADDRESSABLE (startvar),
4493 NULL_TREE, false, GSI_CONTINUE_LINKING);
4494 assign_stmt = gimple_build_assign (startvar, t);
4495 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
4496 if (cond_var)
4497 {
4498 tree itype = TREE_TYPE (cond_var);
4499 /* For lastprivate(conditional:) itervar, we need some iteration
4500 counter that starts at unsigned non-zero and increases.
4501 Prefer as few IVs as possible, so if we can use startvar
4502 itself, use that, or startvar + constant (those would be
4503 incremented with step), and as last resort use the s0 + 1
4504 incremented by 1. */
4505 if (POINTER_TYPE_P (type)
4506 || TREE_CODE (n1) != INTEGER_CST
4507 || fd->loop.cond_code != LT_EXPR)
4508 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4509 build_int_cst (itype, 1));
4510 else if (tree_int_cst_sgn (n1) == 1)
4511 t = fold_convert (itype, t);
4512 else
4513 {
4514 tree c = fold_convert (itype, n1);
4515 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4516 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4517 }
4518 t = force_gimple_operand_gsi (&gsi, t, false,
4519 NULL_TREE, false, GSI_CONTINUE_LINKING);
4520 assign_stmt = gimple_build_assign (cond_var, t);
4521 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4522 }
629b3d75
MJ
4523
4524 t = fold_convert (itype, e0);
4525 t = fold_build2 (MULT_EXPR, itype, t, step);
4526 if (POINTER_TYPE_P (type))
bde84d51
RB
4527 {
4528 t = fold_build_pointer_plus (n1, t);
4529 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4530 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4531 t = fold_convert (signed_type_for (type), t);
4532 }
629b3d75
MJ
4533 else
4534 t = fold_build2 (PLUS_EXPR, type, t, n1);
4535 t = fold_convert (TREE_TYPE (startvar), t);
4536 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4537 false, GSI_CONTINUE_LINKING);
4538 if (endvar)
4539 {
4540 assign_stmt = gimple_build_assign (endvar, e);
4541 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4542 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4543 assign_stmt = gimple_build_assign (fd->loop.v, e);
4544 else
4545 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4546 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4547 }
4548 /* Handle linear clause adjustments. */
4549 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4550 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4551 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4552 c; c = OMP_CLAUSE_CHAIN (c))
4553 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4554 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4555 {
4556 tree d = OMP_CLAUSE_DECL (c);
4557 bool is_ref = omp_is_reference (d);
4558 tree t = d, a, dest;
4559 if (is_ref)
4560 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4561 tree type = TREE_TYPE (t);
4562 if (POINTER_TYPE_P (type))
4563 type = sizetype;
4564 dest = unshare_expr (t);
4565 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4566 expand_omp_build_assign (&gsif, v, t);
4567 if (itercnt == NULL_TREE)
4568 {
4569 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4570 {
4571 itercntbias
4572 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4573 fold_convert (itype, fd->loop.n1));
4574 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4575 itercntbias, step);
4576 itercntbias
4577 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4578 NULL_TREE, true,
4579 GSI_SAME_STMT);
4580 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4581 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4582 NULL_TREE, false,
4583 GSI_CONTINUE_LINKING);
4584 }
4585 else
4586 itercnt = s0;
4587 }
4588 a = fold_build2 (MULT_EXPR, type,
4589 fold_convert (type, itercnt),
4590 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4591 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4592 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4593 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4594 false, GSI_CONTINUE_LINKING);
4595 assign_stmt = gimple_build_assign (dest, t);
4596 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4597 }
4598 if (fd->collapse > 1)
4599 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4600
4601 if (!broken_loop)
4602 {
4603 /* The code controlling the sequential loop goes in CONT_BB,
4604 replacing the GIMPLE_OMP_CONTINUE. */
65f4b875 4605 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4606 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4607 vmain = gimple_omp_continue_control_use (cont_stmt);
4608 vback = gimple_omp_continue_control_def (cont_stmt);
4609
4610 if (!gimple_omp_for_combined_p (fd->for_stmt))
4611 {
4612 if (POINTER_TYPE_P (type))
4613 t = fold_build_pointer_plus (vmain, step);
4614 else
4615 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4616 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4617 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4618 true, GSI_SAME_STMT);
4619 assign_stmt = gimple_build_assign (vback, t);
4620 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4621
4622 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4623 t = build2 (EQ_EXPR, boolean_type_node,
4624 build_int_cst (itype, 0),
4625 build_int_cst (itype, 1));
4626 else
4627 t = build2 (fd->loop.cond_code, boolean_type_node,
4628 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4629 ? t : vback, e);
4630 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4631 }
4632
4633 /* Remove GIMPLE_OMP_CONTINUE. */
4634 gsi_remove (&gsi, true);
4635
4636 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4637 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4638
4639 /* Trip update code goes into TRIP_UPDATE_BB. */
4640 gsi = gsi_start_bb (trip_update_bb);
4641
4642 t = build_int_cst (itype, 1);
4643 t = build2 (PLUS_EXPR, itype, trip_main, t);
4644 assign_stmt = gimple_build_assign (trip_back, t);
4645 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4646 }
4647
4648 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 4649 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4650 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4651 {
4652 t = gimple_omp_return_lhs (gsi_stmt (gsi));
8221c30b 4653 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
4654 {
4655 tree fn;
4656 if (t)
4657 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4658 else
4659 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4660 gcall *g = gimple_build_call (fn, 0);
4661 if (t)
4662 {
4663 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
4664 if (fd->have_reductemp)
4665 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4666 NOP_EXPR, t),
4667 GSI_SAME_STMT);
28567c40
JJ
4668 }
4669 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4670 }
4671 else
4672 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75
MJ
4673 }
4674 gsi_remove (&gsi, true);
4675
4676 /* Connect the new blocks. */
4677 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4678 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4679
4680 if (!broken_loop)
4681 {
4682 se = find_edge (cont_bb, body_bb);
4683 if (se == NULL)
4684 {
4685 se = BRANCH_EDGE (cont_bb);
4686 gcc_assert (single_succ (se->dest) == body_bb);
4687 }
4688 if (gimple_omp_for_combined_p (fd->for_stmt))
4689 {
4690 remove_edge (se);
4691 se = NULL;
4692 }
4693 else if (fd->collapse > 1)
4694 {
4695 remove_edge (se);
4696 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4697 }
4698 else
4699 se->flags = EDGE_TRUE_VALUE;
4700 find_edge (cont_bb, trip_update_bb)->flags
4701 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4702
01914336
MJ
4703 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4704 iter_part_bb);
629b3d75
MJ
4705 }
4706
4707 if (gimple_in_ssa_p (cfun))
4708 {
4709 gphi_iterator psi;
4710 gphi *phi;
4711 edge re, ene;
4712 edge_var_map *vm;
4713 size_t i;
4714
4715 gcc_assert (fd->collapse == 1 && !broken_loop);
4716
4717 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4718 remove arguments of the phi nodes in fin_bb. We need to create
4719 appropriate phi nodes in iter_part_bb instead. */
4720 se = find_edge (iter_part_bb, fin_bb);
4721 re = single_succ_edge (trip_update_bb);
4722 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4723 ene = single_succ_edge (entry_bb);
4724
4725 psi = gsi_start_phis (fin_bb);
4726 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4727 gsi_next (&psi), ++i)
4728 {
4729 gphi *nphi;
620e594b 4730 location_t locus;
629b3d75
MJ
4731
4732 phi = psi.phi ();
d83cc5cc
TV
4733 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4734 redirect_edge_var_map_def (vm), 0))
4735 continue;
4736
629b3d75
MJ
4737 t = gimple_phi_result (phi);
4738 gcc_assert (t == redirect_edge_var_map_result (vm));
4739
4740 if (!single_pred_p (fin_bb))
4741 t = copy_ssa_name (t, phi);
4742
4743 nphi = create_phi_node (t, iter_part_bb);
4744
4745 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4746 locus = gimple_phi_arg_location_from_edge (phi, se);
4747
4748 /* A special case -- fd->loop.v is not yet computed in
4749 iter_part_bb, we need to use vextra instead. */
4750 if (t == fd->loop.v)
4751 t = vextra;
4752 add_phi_arg (nphi, t, ene, locus);
4753 locus = redirect_edge_var_map_location (vm);
4754 tree back_arg = redirect_edge_var_map_def (vm);
4755 add_phi_arg (nphi, back_arg, re, locus);
4756 edge ce = find_edge (cont_bb, body_bb);
4757 if (ce == NULL)
4758 {
4759 ce = BRANCH_EDGE (cont_bb);
4760 gcc_assert (single_succ (ce->dest) == body_bb);
4761 ce = single_succ_edge (ce->dest);
4762 }
4763 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4764 gcc_assert (inner_loop_phi != NULL);
4765 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4766 find_edge (seq_start_bb, body_bb), locus);
4767
4768 if (!single_pred_p (fin_bb))
4769 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4770 }
4771 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4772 redirect_edge_var_map_clear (re);
4773 if (single_pred_p (fin_bb))
4774 while (1)
4775 {
4776 psi = gsi_start_phis (fin_bb);
4777 if (gsi_end_p (psi))
4778 break;
4779 remove_phi_node (&psi, false);
4780 }
4781
4782 /* Make phi node for trip. */
4783 phi = create_phi_node (trip_main, iter_part_bb);
4784 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4785 UNKNOWN_LOCATION);
4786 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4787 UNKNOWN_LOCATION);
4788 }
4789
4790 if (!broken_loop)
4791 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4792 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4793 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4794 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4795 recompute_dominator (CDI_DOMINATORS, fin_bb));
4796 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4797 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4798 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4799 recompute_dominator (CDI_DOMINATORS, body_bb));
4800
4801 if (!broken_loop)
4802 {
4803 struct loop *loop = body_bb->loop_father;
4804 struct loop *trip_loop = alloc_loop ();
4805 trip_loop->header = iter_part_bb;
4806 trip_loop->latch = trip_update_bb;
4807 add_loop (trip_loop, iter_part_bb->loop_father);
4808
4809 if (loop != entry_bb->loop_father)
4810 {
4811 gcc_assert (loop->header == body_bb);
4812 gcc_assert (loop->latch == region->cont
4813 || single_pred (loop->latch) == region->cont);
4814 trip_loop->inner = loop;
4815 return;
4816 }
4817
4818 if (!gimple_omp_for_combined_p (fd->for_stmt))
4819 {
4820 loop = alloc_loop ();
4821 loop->header = body_bb;
4822 if (collapse_bb == NULL)
4823 loop->latch = cont_bb;
4824 add_loop (loop, trip_loop);
4825 }
4826 }
4827}
4828
629b3d75
MJ
4829/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4830 loop. Given parameters:
4831
4832 for (V = N1; V cond N2; V += STEP) BODY;
4833
4834 where COND is "<" or ">", we generate pseudocode
4835
4836 V = N1;
4837 goto L1;
4838 L0:
4839 BODY;
4840 V += STEP;
4841 L1:
4842 if (V cond N2) goto L0; else goto L2;
4843 L2:
4844
4845 For collapsed loops, given parameters:
4846 collapse(3)
4847 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4848 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4849 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4850 BODY;
4851
4852 we generate pseudocode
4853
4854 if (cond3 is <)
4855 adj = STEP3 - 1;
4856 else
4857 adj = STEP3 + 1;
4858 count3 = (adj + N32 - N31) / STEP3;
4859 if (cond2 is <)
4860 adj = STEP2 - 1;
4861 else
4862 adj = STEP2 + 1;
4863 count2 = (adj + N22 - N21) / STEP2;
4864 if (cond1 is <)
4865 adj = STEP1 - 1;
4866 else
4867 adj = STEP1 + 1;
4868 count1 = (adj + N12 - N11) / STEP1;
4869 count = count1 * count2 * count3;
4870 V = 0;
4871 V1 = N11;
4872 V2 = N21;
4873 V3 = N31;
4874 goto L1;
4875 L0:
4876 BODY;
4877 V += 1;
4878 V3 += STEP3;
4879 V2 += (V3 cond3 N32) ? 0 : STEP2;
4880 V3 = (V3 cond3 N32) ? V3 : N31;
4881 V1 += (V2 cond2 N22) ? 0 : STEP1;
4882 V2 = (V2 cond2 N22) ? V2 : N21;
4883 L1:
4884 if (V < count) goto L0; else goto L2;
4885 L2:
4886
4887 */
4888
4889static void
4890expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4891{
4892 tree type, t;
4893 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4894 gimple_stmt_iterator gsi;
4895 gimple *stmt;
4896 gcond *cond_stmt;
4897 bool broken_loop = region->cont == NULL;
4898 edge e, ne;
4899 tree *counts = NULL;
4900 int i;
4901 int safelen_int = INT_MAX;
fed2a43c 4902 bool dont_vectorize = false;
629b3d75
MJ
4903 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4904 OMP_CLAUSE_SAFELEN);
4905 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4906 OMP_CLAUSE__SIMDUID_);
fed2a43c
JJ
4907 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4908 OMP_CLAUSE_IF);
4909 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4910 OMP_CLAUSE_SIMDLEN);
629b3d75
MJ
4911 tree n1, n2;
4912
4913 if (safelen)
4914 {
9d2f08ab 4915 poly_uint64 val;
629b3d75 4916 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
9d2f08ab 4917 if (!poly_int_tree_p (safelen, &val))
629b3d75 4918 safelen_int = 0;
9d2f08ab
RS
4919 else
4920 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
629b3d75
MJ
4921 if (safelen_int == 1)
4922 safelen_int = 0;
4923 }
fed2a43c
JJ
4924 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
4925 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
4926 {
4927 safelen_int = 0;
4928 dont_vectorize = true;
4929 }
629b3d75
MJ
4930 type = TREE_TYPE (fd->loop.v);
4931 entry_bb = region->entry;
4932 cont_bb = region->cont;
4933 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4934 gcc_assert (broken_loop
4935 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4936 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4937 if (!broken_loop)
4938 {
4939 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4940 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4941 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4942 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4943 }
4944 else
4945 {
4946 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4947 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4948 l2_bb = single_succ (l1_bb);
4949 }
4950 exit_bb = region->exit;
4951 l2_dom_bb = NULL;
4952
65f4b875 4953 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
4954
4955 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4956 /* Not needed in SSA form right now. */
4957 gcc_assert (!gimple_in_ssa_p (cfun));
4958 if (fd->collapse > 1)
4959 {
4960 int first_zero_iter = -1, dummy = -1;
4961 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4962
4963 counts = XALLOCAVEC (tree, fd->collapse);
4964 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4965 zero_iter_bb, first_zero_iter,
4966 dummy_bb, dummy, l2_dom_bb);
4967 }
4968 if (l2_dom_bb == NULL)
4969 l2_dom_bb = l1_bb;
4970
4971 n1 = fd->loop.n1;
4972 n2 = fd->loop.n2;
4973 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4974 {
4975 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4976 OMP_CLAUSE__LOOPTEMP_);
4977 gcc_assert (innerc);
4978 n1 = OMP_CLAUSE_DECL (innerc);
4979 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4980 OMP_CLAUSE__LOOPTEMP_);
4981 gcc_assert (innerc);
4982 n2 = OMP_CLAUSE_DECL (innerc);
4983 }
4984 tree step = fd->loop.step;
4985
4cea8675
AM
4986 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4987 OMP_CLAUSE__SIMT_);
629b3d75
MJ
4988 if (is_simt)
4989 {
4990 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
4991 is_simt = safelen_int > 1;
4992 }
4993 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4994 if (is_simt)
4995 {
629b3d75
MJ
4996 simt_lane = create_tmp_var (unsigned_type_node);
4997 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4998 gimple_call_set_lhs (g, simt_lane);
4999 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5000 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5001 fold_convert (TREE_TYPE (step), simt_lane));
5002 n1 = fold_convert (type, n1);
5003 if (POINTER_TYPE_P (type))
5004 n1 = fold_build_pointer_plus (n1, offset);
5005 else
5006 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5007
5008 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
5009 if (fd->collapse > 1)
5010 simt_maxlane = build_one_cst (unsigned_type_node);
5011 else if (safelen_int < omp_max_simt_vf ())
5012 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5013 tree vf
5014 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5015 unsigned_type_node, 0);
5016 if (simt_maxlane)
5017 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5018 vf = fold_convert (TREE_TYPE (step), vf);
5019 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5020 }
5021
5022 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5023 if (fd->collapse > 1)
5024 {
5025 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5026 {
5027 gsi_prev (&gsi);
5028 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5029 gsi_next (&gsi);
5030 }
5031 else
5032 for (i = 0; i < fd->collapse; i++)
5033 {
5034 tree itype = TREE_TYPE (fd->loops[i].v);
5035 if (POINTER_TYPE_P (itype))
5036 itype = signed_type_for (itype);
5037 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5038 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5039 }
5040 }
5041
5042 /* Remove the GIMPLE_OMP_FOR statement. */
5043 gsi_remove (&gsi, true);
5044
5045 if (!broken_loop)
5046 {
5047 /* Code to control the increment goes in the CONT_BB. */
65f4b875 5048 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5049 stmt = gsi_stmt (gsi);
5050 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5051
5052 if (POINTER_TYPE_P (type))
5053 t = fold_build_pointer_plus (fd->loop.v, step);
5054 else
5055 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5056 expand_omp_build_assign (&gsi, fd->loop.v, t);
5057
5058 if (fd->collapse > 1)
5059 {
5060 i = fd->collapse - 1;
5061 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5062 {
5063 t = fold_convert (sizetype, fd->loops[i].step);
5064 t = fold_build_pointer_plus (fd->loops[i].v, t);
5065 }
5066 else
5067 {
5068 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5069 fd->loops[i].step);
5070 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5071 fd->loops[i].v, t);
5072 }
5073 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5074
5075 for (i = fd->collapse - 1; i > 0; i--)
5076 {
5077 tree itype = TREE_TYPE (fd->loops[i].v);
5078 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5079 if (POINTER_TYPE_P (itype2))
5080 itype2 = signed_type_for (itype2);
bcc6842b
JJ
5081 t = fold_convert (itype2, fd->loops[i - 1].step);
5082 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5083 GSI_SAME_STMT);
629b3d75
MJ
5084 t = build3 (COND_EXPR, itype2,
5085 build2 (fd->loops[i].cond_code, boolean_type_node,
5086 fd->loops[i].v,
5087 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 5088 build_int_cst (itype2, 0), t);
629b3d75
MJ
5089 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5090 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5091 else
5092 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5093 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5094
bcc6842b
JJ
5095 t = fold_convert (itype, fd->loops[i].n1);
5096 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5097 GSI_SAME_STMT);
629b3d75
MJ
5098 t = build3 (COND_EXPR, itype,
5099 build2 (fd->loops[i].cond_code, boolean_type_node,
5100 fd->loops[i].v,
5101 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 5102 fd->loops[i].v, t);
629b3d75
MJ
5103 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5104 }
5105 }
5106
5107 /* Remove GIMPLE_OMP_CONTINUE. */
5108 gsi_remove (&gsi, true);
5109 }
5110
5111 /* Emit the condition in L1_BB. */
5112 gsi = gsi_start_bb (l1_bb);
5113
5114 t = fold_convert (type, n2);
5115 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5116 false, GSI_CONTINUE_LINKING);
5117 tree v = fd->loop.v;
5118 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5119 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5120 false, GSI_CONTINUE_LINKING);
5121 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5122 cond_stmt = gimple_build_cond_empty (t);
5123 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5124 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5125 NULL, NULL)
5126 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5127 NULL, NULL))
5128 {
5129 gsi = gsi_for_stmt (cond_stmt);
5130 gimple_regimplify_operands (cond_stmt, &gsi);
5131 }
5132
5133 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5134 if (is_simt)
5135 {
5136 gsi = gsi_start_bb (l2_bb);
5137 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5138 if (POINTER_TYPE_P (type))
5139 t = fold_build_pointer_plus (fd->loop.v, step);
5140 else
5141 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5142 expand_omp_build_assign (&gsi, fd->loop.v, t);
5143 }
5144
5145 /* Remove GIMPLE_OMP_RETURN. */
65f4b875 5146 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5147 gsi_remove (&gsi, true);
5148
5149 /* Connect the new blocks. */
5150 remove_edge (FALLTHRU_EDGE (entry_bb));
5151
5152 if (!broken_loop)
5153 {
5154 remove_edge (BRANCH_EDGE (entry_bb));
5155 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5156
5157 e = BRANCH_EDGE (l1_bb);
5158 ne = FALLTHRU_EDGE (l1_bb);
5159 e->flags = EDGE_TRUE_VALUE;
5160 }
5161 else
5162 {
5163 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5164
5165 ne = single_succ_edge (l1_bb);
5166 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5167
5168 }
5169 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
5170 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5171 ne->probability = e->probability.invert ();
629b3d75
MJ
5172
5173 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5174 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5175
5176 if (simt_maxlane)
5177 {
5178 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5179 NULL_TREE, NULL_TREE);
5180 gsi = gsi_last_bb (entry_bb);
5181 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5182 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5183 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
5184 FALLTHRU_EDGE (entry_bb)->probability
5185 = profile_probability::guessed_always ().apply_scale (7, 8);
5186 BRANCH_EDGE (entry_bb)->probability
5187 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
5188 l2_dom_bb = entry_bb;
5189 }
5190 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5191
5192 if (!broken_loop)
5193 {
5194 struct loop *loop = alloc_loop ();
5195 loop->header = l1_bb;
5196 loop->latch = cont_bb;
5197 add_loop (loop, l1_bb->loop_father);
5198 loop->safelen = safelen_int;
5199 if (simduid)
5200 {
5201 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5202 cfun->has_simduid_loops = true;
5203 }
5204 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5205 the loop. */
5206 if ((flag_tree_loop_vectorize
26d476cd 5207 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
5208 && flag_tree_loop_optimize
5209 && loop->safelen > 1)
5210 {
5211 loop->force_vectorize = true;
f63445e5
JJ
5212 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5213 {
5214 unsigned HOST_WIDE_INT v
5215 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5216 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5217 loop->simdlen = v;
5218 }
629b3d75
MJ
5219 cfun->has_force_vectorize_loops = true;
5220 }
fed2a43c
JJ
5221 else if (dont_vectorize)
5222 loop->dont_vectorize = true;
629b3d75
MJ
5223 }
5224 else if (simduid)
5225 cfun->has_simduid_loops = true;
5226}
5227
5228/* Taskloop construct is represented after gimplification with
5229 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5230 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5231 which should just compute all the needed loop temporaries
5232 for GIMPLE_OMP_TASK. */
5233
5234static void
5235expand_omp_taskloop_for_outer (struct omp_region *region,
5236 struct omp_for_data *fd,
5237 gimple *inner_stmt)
5238{
5239 tree type, bias = NULL_TREE;
5240 basic_block entry_bb, cont_bb, exit_bb;
5241 gimple_stmt_iterator gsi;
5242 gassign *assign_stmt;
5243 tree *counts = NULL;
5244 int i;
5245
5246 gcc_assert (inner_stmt);
5247 gcc_assert (region->cont);
5248 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5249 && gimple_omp_task_taskloop_p (inner_stmt));
5250 type = TREE_TYPE (fd->loop.v);
5251
5252 /* See if we need to bias by LLONG_MIN. */
5253 if (fd->iter_type == long_long_unsigned_type_node
5254 && TREE_CODE (type) == INTEGER_TYPE
5255 && !TYPE_UNSIGNED (type))
5256 {
5257 tree n1, n2;
5258
5259 if (fd->loop.cond_code == LT_EXPR)
5260 {
5261 n1 = fd->loop.n1;
5262 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5263 }
5264 else
5265 {
5266 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5267 n2 = fd->loop.n1;
5268 }
5269 if (TREE_CODE (n1) != INTEGER_CST
5270 || TREE_CODE (n2) != INTEGER_CST
5271 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5272 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5273 }
5274
5275 entry_bb = region->entry;
5276 cont_bb = region->cont;
5277 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5278 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5279 exit_bb = region->exit;
5280
65f4b875 5281 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5282 gimple *for_stmt = gsi_stmt (gsi);
5283 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5284 if (fd->collapse > 1)
5285 {
5286 int first_zero_iter = -1, dummy = -1;
5287 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5288
5289 counts = XALLOCAVEC (tree, fd->collapse);
5290 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5291 zero_iter_bb, first_zero_iter,
5292 dummy_bb, dummy, l2_dom_bb);
5293
5294 if (zero_iter_bb)
5295 {
5296 /* Some counts[i] vars might be uninitialized if
5297 some loop has zero iterations. But the body shouldn't
5298 be executed in that case, so just avoid uninit warnings. */
5299 for (i = first_zero_iter; i < fd->collapse; i++)
5300 if (SSA_VAR_P (counts[i]))
5301 TREE_NO_WARNING (counts[i]) = 1;
5302 gsi_prev (&gsi);
5303 edge e = split_block (entry_bb, gsi_stmt (gsi));
5304 entry_bb = e->dest;
5305 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5306 gsi = gsi_last_bb (entry_bb);
5307 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5308 get_immediate_dominator (CDI_DOMINATORS,
5309 zero_iter_bb));
5310 }
5311 }
5312
5313 tree t0, t1;
5314 t1 = fd->loop.n2;
5315 t0 = fd->loop.n1;
5316 if (POINTER_TYPE_P (TREE_TYPE (t0))
5317 && TYPE_PRECISION (TREE_TYPE (t0))
5318 != TYPE_PRECISION (fd->iter_type))
5319 {
5320 /* Avoid casting pointers to integer of a different size. */
5321 tree itype = signed_type_for (type);
5322 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5323 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5324 }
5325 else
5326 {
5327 t1 = fold_convert (fd->iter_type, t1);
5328 t0 = fold_convert (fd->iter_type, t0);
5329 }
5330 if (bias)
5331 {
5332 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5333 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5334 }
5335
5336 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5337 OMP_CLAUSE__LOOPTEMP_);
5338 gcc_assert (innerc);
5339 tree startvar = OMP_CLAUSE_DECL (innerc);
5340 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5341 gcc_assert (innerc);
5342 tree endvar = OMP_CLAUSE_DECL (innerc);
5343 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5344 {
5345 gcc_assert (innerc);
5346 for (i = 1; i < fd->collapse; i++)
5347 {
5348 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5349 OMP_CLAUSE__LOOPTEMP_);
5350 gcc_assert (innerc);
5351 }
5352 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5353 OMP_CLAUSE__LOOPTEMP_);
5354 if (innerc)
5355 {
5356 /* If needed (inner taskloop has lastprivate clause), propagate
5357 down the total number of iterations. */
5358 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5359 NULL_TREE, false,
5360 GSI_CONTINUE_LINKING);
5361 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5362 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5363 }
5364 }
5365
5366 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5367 GSI_CONTINUE_LINKING);
5368 assign_stmt = gimple_build_assign (startvar, t0);
5369 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5370
5371 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5372 GSI_CONTINUE_LINKING);
5373 assign_stmt = gimple_build_assign (endvar, t1);
5374 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5375 if (fd->collapse > 1)
5376 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5377
5378 /* Remove the GIMPLE_OMP_FOR statement. */
5379 gsi = gsi_for_stmt (for_stmt);
5380 gsi_remove (&gsi, true);
5381
65f4b875 5382 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5383 gsi_remove (&gsi, true);
5384
65f4b875 5385 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5386 gsi_remove (&gsi, true);
5387
357067f2 5388 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 5389 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 5390 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
5391 remove_edge (BRANCH_EDGE (cont_bb));
5392 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5393 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5394 recompute_dominator (CDI_DOMINATORS, region->entry));
5395}
5396
5397/* Taskloop construct is represented after gimplification with
5398 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5399 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5400 GOMP_taskloop{,_ull} function arranges for each task to be given just
5401 a single range of iterations. */
5402
5403static void
5404expand_omp_taskloop_for_inner (struct omp_region *region,
5405 struct omp_for_data *fd,
5406 gimple *inner_stmt)
5407{
5408 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5409 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5410 basic_block fin_bb;
5411 gimple_stmt_iterator gsi;
5412 edge ep;
5413 bool broken_loop = region->cont == NULL;
5414 tree *counts = NULL;
5415 tree n1, n2, step;
5416
5417 itype = type = TREE_TYPE (fd->loop.v);
5418 if (POINTER_TYPE_P (type))
5419 itype = signed_type_for (type);
5420
5421 /* See if we need to bias by LLONG_MIN. */
5422 if (fd->iter_type == long_long_unsigned_type_node
5423 && TREE_CODE (type) == INTEGER_TYPE
5424 && !TYPE_UNSIGNED (type))
5425 {
5426 tree n1, n2;
5427
5428 if (fd->loop.cond_code == LT_EXPR)
5429 {
5430 n1 = fd->loop.n1;
5431 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5432 }
5433 else
5434 {
5435 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5436 n2 = fd->loop.n1;
5437 }
5438 if (TREE_CODE (n1) != INTEGER_CST
5439 || TREE_CODE (n2) != INTEGER_CST
5440 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5441 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5442 }
5443
5444 entry_bb = region->entry;
5445 cont_bb = region->cont;
5446 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5447 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5448 gcc_assert (broken_loop
5449 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5450 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5451 if (!broken_loop)
5452 {
5453 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5454 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5455 }
5456 exit_bb = region->exit;
5457
5458 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 5459 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5460 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5461
5462 if (fd->collapse > 1)
5463 {
5464 int first_zero_iter = -1, dummy = -1;
5465 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5466
5467 counts = XALLOCAVEC (tree, fd->collapse);
5468 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5469 fin_bb, first_zero_iter,
5470 dummy_bb, dummy, l2_dom_bb);
5471 t = NULL_TREE;
5472 }
5473 else
5474 t = integer_one_node;
5475
5476 step = fd->loop.step;
5477 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5478 OMP_CLAUSE__LOOPTEMP_);
5479 gcc_assert (innerc);
5480 n1 = OMP_CLAUSE_DECL (innerc);
5481 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5482 gcc_assert (innerc);
5483 n2 = OMP_CLAUSE_DECL (innerc);
5484 if (bias)
5485 {
5486 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5487 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5488 }
5489 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5490 true, NULL_TREE, true, GSI_SAME_STMT);
5491 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5492 true, NULL_TREE, true, GSI_SAME_STMT);
5493 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5494 true, NULL_TREE, true, GSI_SAME_STMT);
5495
5496 tree startvar = fd->loop.v;
5497 tree endvar = NULL_TREE;
5498
5499 if (gimple_omp_for_combined_p (fd->for_stmt))
5500 {
5501 tree clauses = gimple_omp_for_clauses (inner_stmt);
5502 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5503 gcc_assert (innerc);
5504 startvar = OMP_CLAUSE_DECL (innerc);
5505 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5506 OMP_CLAUSE__LOOPTEMP_);
5507 gcc_assert (innerc);
5508 endvar = OMP_CLAUSE_DECL (innerc);
5509 }
5510 t = fold_convert (TREE_TYPE (startvar), n1);
5511 t = force_gimple_operand_gsi (&gsi, t,
5512 DECL_P (startvar)
5513 && TREE_ADDRESSABLE (startvar),
5514 NULL_TREE, false, GSI_CONTINUE_LINKING);
5515 gimple *assign_stmt = gimple_build_assign (startvar, t);
5516 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5517
5518 t = fold_convert (TREE_TYPE (startvar), n2);
5519 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5520 false, GSI_CONTINUE_LINKING);
5521 if (endvar)
5522 {
5523 assign_stmt = gimple_build_assign (endvar, e);
5524 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5525 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5526 assign_stmt = gimple_build_assign (fd->loop.v, e);
5527 else
5528 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5529 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5530 }
5531 if (fd->collapse > 1)
5532 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5533
5534 if (!broken_loop)
5535 {
5536 /* The code controlling the sequential loop replaces the
5537 GIMPLE_OMP_CONTINUE. */
65f4b875 5538 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5539 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5540 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5541 vmain = gimple_omp_continue_control_use (cont_stmt);
5542 vback = gimple_omp_continue_control_def (cont_stmt);
5543
5544 if (!gimple_omp_for_combined_p (fd->for_stmt))
5545 {
5546 if (POINTER_TYPE_P (type))
5547 t = fold_build_pointer_plus (vmain, step);
5548 else
5549 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5550 t = force_gimple_operand_gsi (&gsi, t,
5551 DECL_P (vback)
5552 && TREE_ADDRESSABLE (vback),
5553 NULL_TREE, true, GSI_SAME_STMT);
5554 assign_stmt = gimple_build_assign (vback, t);
5555 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5556
5557 t = build2 (fd->loop.cond_code, boolean_type_node,
5558 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5559 ? t : vback, e);
5560 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5561 }
5562
5563 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5564 gsi_remove (&gsi, true);
5565
5566 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5567 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5568 }
5569
5570 /* Remove the GIMPLE_OMP_FOR statement. */
5571 gsi = gsi_for_stmt (fd->for_stmt);
5572 gsi_remove (&gsi, true);
5573
5574 /* Remove the GIMPLE_OMP_RETURN statement. */
65f4b875 5575 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5576 gsi_remove (&gsi, true);
5577
357067f2 5578 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
5579 if (!broken_loop)
5580 remove_edge (BRANCH_EDGE (entry_bb));
5581 else
5582 {
5583 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5584 region->outer->cont = NULL;
5585 }
5586
5587 /* Connect all the blocks. */
5588 if (!broken_loop)
5589 {
5590 ep = find_edge (cont_bb, body_bb);
5591 if (gimple_omp_for_combined_p (fd->for_stmt))
5592 {
5593 remove_edge (ep);
5594 ep = NULL;
5595 }
5596 else if (fd->collapse > 1)
5597 {
5598 remove_edge (ep);
5599 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5600 }
5601 else
5602 ep->flags = EDGE_TRUE_VALUE;
5603 find_edge (cont_bb, fin_bb)->flags
5604 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5605 }
5606
5607 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5608 recompute_dominator (CDI_DOMINATORS, body_bb));
5609 if (!broken_loop)
5610 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5611 recompute_dominator (CDI_DOMINATORS, fin_bb));
5612
5613 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5614 {
5615 struct loop *loop = alloc_loop ();
5616 loop->header = body_bb;
5617 if (collapse_bb == NULL)
5618 loop->latch = cont_bb;
5619 add_loop (loop, body_bb->loop_father);
5620 }
5621}
5622
5623/* A subroutine of expand_omp_for. Generate code for an OpenACC
5624 partitioned loop. The lowering here is abstracted, in that the
5625 loop parameters are passed through internal functions, which are
5626 further lowered by oacc_device_lower, once we get to the target
5627 compiler. The loop is of the form:
5628
5629 for (V = B; V LTGT E; V += S) {BODY}
5630
5631 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5632 (constant 0 for no chunking) and we will have a GWV partitioning
5633 mask, specifying dimensions over which the loop is to be
02889d23
CLT
5634 partitioned (see note below). We generate code that looks like
5635 (this ignores tiling):
629b3d75
MJ
5636
5637 <entry_bb> [incoming FALL->body, BRANCH->exit]
5638 typedef signedintify (typeof (V)) T; // underlying signed integral type
5639 T range = E - B;
5640 T chunk_no = 0;
5641 T DIR = LTGT == '<' ? +1 : -1;
5642 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5643 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5644
5645 <head_bb> [created by splitting end of entry_bb]
5646 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5647 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5648 if (!(offset LTGT bound)) goto bottom_bb;
5649
5650 <body_bb> [incoming]
5651 V = B + offset;
5652 {BODY}
5653
5654 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5655 offset += step;
5656 if (offset LTGT bound) goto body_bb; [*]
5657
5658 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5659 chunk_no++;
5660 if (chunk < chunk_max) goto head_bb;
5661
5662 <exit_bb> [incoming]
5663 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5664
02889d23 5665 [*] Needed if V live at end of loop. */
629b3d75
MJ
5666
5667static void
5668expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5669{
5670 tree v = fd->loop.v;
5671 enum tree_code cond_code = fd->loop.cond_code;
5672 enum tree_code plus_code = PLUS_EXPR;
5673
5674 tree chunk_size = integer_minus_one_node;
5675 tree gwv = integer_zero_node;
5676 tree iter_type = TREE_TYPE (v);
5677 tree diff_type = iter_type;
5678 tree plus_type = iter_type;
5679 struct oacc_collapse *counts = NULL;
5680
5681 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5682 == GF_OMP_FOR_KIND_OACC_LOOP);
5683 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5684 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5685
5686 if (POINTER_TYPE_P (iter_type))
5687 {
5688 plus_code = POINTER_PLUS_EXPR;
5689 plus_type = sizetype;
5690 }
5691 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5692 diff_type = signed_type_for (diff_type);
f4c222c0
TV
5693 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5694 diff_type = integer_type_node;
629b3d75
MJ
5695
5696 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5697 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5698 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5699 basic_block bottom_bb = NULL;
5700
5701 /* entry_bb has two sucessors; the branch edge is to the exit
5702 block, fallthrough edge to body. */
5703 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5704 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5705
5706 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5707 body_bb, or to a block whose only successor is the body_bb. Its
5708 fallthrough successor is the final block (same as the branch
5709 successor of the entry_bb). */
5710 if (cont_bb)
5711 {
5712 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5713 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5714
5715 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5716 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5717 }
5718 else
5719 gcc_assert (!gimple_in_ssa_p (cfun));
5720
5721 /* The exit block only has entry_bb and cont_bb as predecessors. */
5722 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5723
5724 tree chunk_no;
5725 tree chunk_max = NULL_TREE;
5726 tree bound, offset;
5727 tree step = create_tmp_var (diff_type, ".step");
5728 bool up = cond_code == LT_EXPR;
5729 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 5730 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
5731 bool negating;
5732
02889d23
CLT
5733 /* Tiling vars. */
5734 tree tile_size = NULL_TREE;
5735 tree element_s = NULL_TREE;
5736 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5737 basic_block elem_body_bb = NULL;
5738 basic_block elem_cont_bb = NULL;
5739
629b3d75
MJ
5740 /* SSA instances. */
5741 tree offset_incr = NULL_TREE;
5742 tree offset_init = NULL_TREE;
5743
5744 gimple_stmt_iterator gsi;
5745 gassign *ass;
5746 gcall *call;
5747 gimple *stmt;
5748 tree expr;
5749 location_t loc;
5750 edge split, be, fte;
5751
5752 /* Split the end of entry_bb to create head_bb. */
5753 split = split_block (entry_bb, last_stmt (entry_bb));
5754 basic_block head_bb = split->dest;
5755 entry_bb = split->src;
5756
5757 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
65f4b875 5758 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5759 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5760 loc = gimple_location (for_stmt);
5761
5762 if (gimple_in_ssa_p (cfun))
5763 {
5764 offset_init = gimple_omp_for_index (for_stmt, 0);
5765 gcc_assert (integer_zerop (fd->loop.n1));
5766 /* The SSA parallelizer does gang parallelism. */
5767 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5768 }
5769
02889d23 5770 if (fd->collapse > 1 || fd->tiling)
629b3d75 5771 {
02889d23 5772 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75
MJ
5773 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5774 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
02889d23 5775 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
5776
5777 if (SSA_VAR_P (fd->loop.n2))
5778 {
5779 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5780 true, GSI_SAME_STMT);
5781 ass = gimple_build_assign (fd->loop.n2, total);
5782 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5783 }
629b3d75
MJ
5784 }
5785
5786 tree b = fd->loop.n1;
5787 tree e = fd->loop.n2;
5788 tree s = fd->loop.step;
5789
5790 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5791 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5792
01914336 5793 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5794 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5795 if (negating)
5796 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5797 s = fold_convert (diff_type, s);
5798 if (negating)
5799 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5800 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5801
5802 if (!chunking)
5803 chunk_size = integer_zero_node;
5804 expr = fold_convert (diff_type, chunk_size);
5805 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5806 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
5807
5808 if (fd->tiling)
5809 {
5810 /* Determine the tile size and element step,
5811 modify the outer loop step size. */
5812 tile_size = create_tmp_var (diff_type, ".tile_size");
5813 expr = build_int_cst (diff_type, 1);
5814 for (int ix = 0; ix < fd->collapse; ix++)
5815 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5816 expr = force_gimple_operand_gsi (&gsi, expr, true,
5817 NULL_TREE, true, GSI_SAME_STMT);
5818 ass = gimple_build_assign (tile_size, expr);
5819 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5820
5821 element_s = create_tmp_var (diff_type, ".element_s");
5822 ass = gimple_build_assign (element_s, s);
5823 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5824
5825 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5826 s = force_gimple_operand_gsi (&gsi, expr, true,
5827 NULL_TREE, true, GSI_SAME_STMT);
5828 }
5829
01914336 5830 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5831 negating = !up && TYPE_UNSIGNED (iter_type);
5832 expr = fold_build2 (MINUS_EXPR, plus_type,
5833 fold_convert (plus_type, negating ? b : e),
5834 fold_convert (plus_type, negating ? e : b));
5835 expr = fold_convert (diff_type, expr);
5836 if (negating)
5837 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5838 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5839 NULL_TREE, true, GSI_SAME_STMT);
5840
5841 chunk_no = build_int_cst (diff_type, 0);
5842 if (chunking)
5843 {
5844 gcc_assert (!gimple_in_ssa_p (cfun));
5845
5846 expr = chunk_no;
5847 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5848 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5849
5850 ass = gimple_build_assign (chunk_no, expr);
5851 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5852
5853 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5854 build_int_cst (integer_type_node,
5855 IFN_GOACC_LOOP_CHUNKS),
5856 dir, range, s, chunk_size, gwv);
5857 gimple_call_set_lhs (call, chunk_max);
5858 gimple_set_location (call, loc);
5859 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5860 }
5861 else
5862 chunk_size = chunk_no;
5863
5864 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5865 build_int_cst (integer_type_node,
5866 IFN_GOACC_LOOP_STEP),
5867 dir, range, s, chunk_size, gwv);
5868 gimple_call_set_lhs (call, step);
5869 gimple_set_location (call, loc);
5870 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5871
5872 /* Remove the GIMPLE_OMP_FOR. */
5873 gsi_remove (&gsi, true);
5874
01914336 5875 /* Fixup edges from head_bb. */
629b3d75
MJ
5876 be = BRANCH_EDGE (head_bb);
5877 fte = FALLTHRU_EDGE (head_bb);
5878 be->flags |= EDGE_FALSE_VALUE;
5879 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5880
5881 basic_block body_bb = fte->dest;
5882
5883 if (gimple_in_ssa_p (cfun))
5884 {
65f4b875 5885 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5886 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5887
5888 offset = gimple_omp_continue_control_use (cont_stmt);
5889 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5890 }
5891 else
5892 {
5893 offset = create_tmp_var (diff_type, ".offset");
5894 offset_init = offset_incr = offset;
5895 }
5896 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5897
5898 /* Loop offset & bound go into head_bb. */
5899 gsi = gsi_start_bb (head_bb);
5900
5901 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5902 build_int_cst (integer_type_node,
5903 IFN_GOACC_LOOP_OFFSET),
5904 dir, range, s,
5905 chunk_size, gwv, chunk_no);
5906 gimple_call_set_lhs (call, offset_init);
5907 gimple_set_location (call, loc);
5908 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5909
5910 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5911 build_int_cst (integer_type_node,
5912 IFN_GOACC_LOOP_BOUND),
5913 dir, range, s,
5914 chunk_size, gwv, offset_init);
5915 gimple_call_set_lhs (call, bound);
5916 gimple_set_location (call, loc);
5917 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5918
5919 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5920 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5921 GSI_CONTINUE_LINKING);
5922
5923 /* V assignment goes into body_bb. */
5924 if (!gimple_in_ssa_p (cfun))
5925 {
5926 gsi = gsi_start_bb (body_bb);
5927
5928 expr = build2 (plus_code, iter_type, b,
5929 fold_convert (plus_type, offset));
5930 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5931 true, GSI_SAME_STMT);
5932 ass = gimple_build_assign (v, expr);
5933 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
5934
5935 if (fd->collapse > 1 || fd->tiling)
5936 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5937
5938 if (fd->tiling)
5939 {
5940 /* Determine the range of the element loop -- usually simply
5941 the tile_size, but could be smaller if the final
5942 iteration of the outer loop is a partial tile. */
5943 tree e_range = create_tmp_var (diff_type, ".e_range");
5944
5945 expr = build2 (MIN_EXPR, diff_type,
5946 build2 (MINUS_EXPR, diff_type, bound, offset),
5947 build2 (MULT_EXPR, diff_type, tile_size,
5948 element_s));
5949 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5950 true, GSI_SAME_STMT);
5951 ass = gimple_build_assign (e_range, expr);
5952 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5953
5954 /* Determine bound, offset & step of inner loop. */
5955 e_bound = create_tmp_var (diff_type, ".e_bound");
5956 e_offset = create_tmp_var (diff_type, ".e_offset");
5957 e_step = create_tmp_var (diff_type, ".e_step");
5958
5959 /* Mark these as element loops. */
5960 tree t, e_gwv = integer_minus_one_node;
5961 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5962
5963 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5964 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5965 element_s, chunk, e_gwv, chunk);
5966 gimple_call_set_lhs (call, e_offset);
5967 gimple_set_location (call, loc);
5968 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5969
5970 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5971 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5972 element_s, chunk, e_gwv, e_offset);
5973 gimple_call_set_lhs (call, e_bound);
5974 gimple_set_location (call, loc);
5975 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5976
5977 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5978 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5979 element_s, chunk, e_gwv);
5980 gimple_call_set_lhs (call, e_step);
5981 gimple_set_location (call, loc);
5982 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5983
5984 /* Add test and split block. */
5985 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5986 stmt = gimple_build_cond_empty (expr);
5987 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5988 split = split_block (body_bb, stmt);
5989 elem_body_bb = split->dest;
5990 if (cont_bb == body_bb)
5991 cont_bb = elem_body_bb;
5992 body_bb = split->src;
5993
5994 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5995
05e0af43
CP
5996 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5997 if (cont_bb == NULL)
5998 {
5999 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6000 e->probability = profile_probability::even ();
6001 split->probability = profile_probability::even ();
6002 }
6003
02889d23
CLT
6004 /* Initialize the user's loop vars. */
6005 gsi = gsi_start_bb (elem_body_bb);
6006 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
6007 }
629b3d75
MJ
6008 }
6009
6010 /* Loop increment goes into cont_bb. If this is not a loop, we
6011 will have spawned threads as if it was, and each one will
6012 execute one iteration. The specification is not explicit about
6013 whether such constructs are ill-formed or not, and they can
6014 occur, especially when noreturn routines are involved. */
6015 if (cont_bb)
6016 {
65f4b875 6017 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6018 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6019 loc = gimple_location (cont_stmt);
6020
02889d23
CLT
6021 if (fd->tiling)
6022 {
6023 /* Insert element loop increment and test. */
6024 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6025 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6026 true, GSI_SAME_STMT);
6027 ass = gimple_build_assign (e_offset, expr);
6028 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6029 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6030
6031 stmt = gimple_build_cond_empty (expr);
6032 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6033 split = split_block (cont_bb, stmt);
6034 elem_cont_bb = split->src;
6035 cont_bb = split->dest;
6036
6037 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
6038 split->probability = profile_probability::unlikely ().guessed ();
6039 edge latch_edge
6040 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6041 latch_edge->probability = profile_probability::likely ().guessed ();
6042
6043 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6044 skip_edge->probability = profile_probability::unlikely ().guessed ();
6045 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6046 loop_entry_edge->probability
6047 = profile_probability::likely ().guessed ();
02889d23
CLT
6048
6049 gsi = gsi_for_stmt (cont_stmt);
6050 }
6051
629b3d75
MJ
6052 /* Increment offset. */
6053 if (gimple_in_ssa_p (cfun))
02889d23
CLT
6054 expr = build2 (plus_code, iter_type, offset,
6055 fold_convert (plus_type, step));
629b3d75
MJ
6056 else
6057 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6058 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6059 true, GSI_SAME_STMT);
6060 ass = gimple_build_assign (offset_incr, expr);
6061 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6062 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6063 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6064
6065 /* Remove the GIMPLE_OMP_CONTINUE. */
6066 gsi_remove (&gsi, true);
6067
01914336 6068 /* Fixup edges from cont_bb. */
629b3d75
MJ
6069 be = BRANCH_EDGE (cont_bb);
6070 fte = FALLTHRU_EDGE (cont_bb);
6071 be->flags |= EDGE_TRUE_VALUE;
6072 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6073
6074 if (chunking)
6075 {
6076 /* Split the beginning of exit_bb to make bottom_bb. We
6077 need to insert a nop at the start, because splitting is
01914336 6078 after a stmt, not before. */
629b3d75
MJ
6079 gsi = gsi_start_bb (exit_bb);
6080 stmt = gimple_build_nop ();
6081 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6082 split = split_block (exit_bb, stmt);
6083 bottom_bb = split->src;
6084 exit_bb = split->dest;
6085 gsi = gsi_last_bb (bottom_bb);
6086
6087 /* Chunk increment and test goes into bottom_bb. */
6088 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6089 build_int_cst (diff_type, 1));
6090 ass = gimple_build_assign (chunk_no, expr);
6091 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6092
6093 /* Chunk test at end of bottom_bb. */
6094 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6095 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6096 GSI_CONTINUE_LINKING);
6097
01914336 6098 /* Fixup edges from bottom_bb. */
629b3d75 6099 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
6100 split->probability = profile_probability::unlikely ().guessed ();
6101 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6102 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
6103 }
6104 }
6105
65f4b875 6106 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6107 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6108 loc = gimple_location (gsi_stmt (gsi));
6109
6110 if (!gimple_in_ssa_p (cfun))
6111 {
6112 /* Insert the final value of V, in case it is live. This is the
6113 value for the only thread that survives past the join. */
6114 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6115 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6116 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6117 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6118 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6119 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6120 true, GSI_SAME_STMT);
6121 ass = gimple_build_assign (v, expr);
6122 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6123 }
6124
01914336 6125 /* Remove the OMP_RETURN. */
629b3d75
MJ
6126 gsi_remove (&gsi, true);
6127
6128 if (cont_bb)
6129 {
02889d23 6130 /* We now have one, two or three nested loops. Update the loop
629b3d75
MJ
6131 structures. */
6132 struct loop *parent = entry_bb->loop_father;
6133 struct loop *body = body_bb->loop_father;
6134
6135 if (chunking)
6136 {
6137 struct loop *chunk_loop = alloc_loop ();
6138 chunk_loop->header = head_bb;
6139 chunk_loop->latch = bottom_bb;
6140 add_loop (chunk_loop, parent);
6141 parent = chunk_loop;
6142 }
6143 else if (parent != body)
6144 {
6145 gcc_assert (body->header == body_bb);
6146 gcc_assert (body->latch == cont_bb
6147 || single_pred (body->latch) == cont_bb);
6148 parent = NULL;
6149 }
6150
6151 if (parent)
6152 {
6153 struct loop *body_loop = alloc_loop ();
6154 body_loop->header = body_bb;
6155 body_loop->latch = cont_bb;
6156 add_loop (body_loop, parent);
02889d23
CLT
6157
6158 if (fd->tiling)
6159 {
6160 /* Insert tiling's element loop. */
6161 struct loop *inner_loop = alloc_loop ();
6162 inner_loop->header = elem_body_bb;
6163 inner_loop->latch = elem_cont_bb;
6164 add_loop (inner_loop, body_loop);
6165 }
629b3d75
MJ
6166 }
6167 }
6168}
6169
6170/* Expand the OMP loop defined by REGION. */
6171
6172static void
6173expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6174{
6175 struct omp_for_data fd;
6176 struct omp_for_data_loop *loops;
6177
6178 loops
6179 = (struct omp_for_data_loop *)
6180 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6181 * sizeof (struct omp_for_data_loop));
6182 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6183 &fd, loops);
6184 region->sched_kind = fd.sched_kind;
6185 region->sched_modifiers = fd.sched_modifiers;
6186
6187 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6188 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6189 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6190 if (region->cont)
6191 {
6192 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6193 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6194 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6195 }
6196 else
6197 /* If there isn't a continue then this is a degerate case where
6198 the introduction of abnormal edges during lowering will prevent
6199 original loops from being detected. Fix that up. */
6200 loops_state_set (LOOPS_NEED_FIXUP);
6201
6202 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
6203 expand_omp_simd (region, &fd);
629b3d75
MJ
6204 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6205 {
6206 gcc_assert (!inner_stmt);
6207 expand_oacc_for (region, &fd);
6208 }
6209 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6210 {
6211 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6212 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6213 else
6214 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6215 }
6216 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6217 && !fd.have_ordered)
6218 {
6219 if (fd.chunk_size == NULL)
6220 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6221 else
6222 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6223 }
6224 else
6225 {
6226 int fn_index, start_ix, next_ix;
28567c40
JJ
6227 unsigned HOST_WIDE_INT sched = 0;
6228 tree sched_arg = NULL_TREE;
629b3d75
MJ
6229
6230 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6231 == GF_OMP_FOR_KIND_FOR);
6232 if (fd.chunk_size == NULL
6233 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6234 fd.chunk_size = integer_zero_node;
629b3d75
MJ
6235 switch (fd.sched_kind)
6236 {
6237 case OMP_CLAUSE_SCHEDULE_RUNTIME:
28567c40
JJ
6238 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
6239 {
6240 gcc_assert (!fd.have_ordered);
6241 fn_index = 6;
6242 sched = 4;
6243 }
6244 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6245 && !fd.have_ordered)
6246 fn_index = 7;
6247 else
6248 {
6249 fn_index = 3;
6250 sched = (HOST_WIDE_INT_1U << 31);
6251 }
629b3d75
MJ
6252 break;
6253 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6254 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 6255 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
629b3d75
MJ
6256 && !fd.have_ordered)
6257 {
6258 fn_index = 3 + fd.sched_kind;
28567c40 6259 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
629b3d75
MJ
6260 break;
6261 }
629b3d75 6262 fn_index = fd.sched_kind;
28567c40
JJ
6263 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6264 sched += (HOST_WIDE_INT_1U << 31);
629b3d75 6265 break;
28567c40
JJ
6266 case OMP_CLAUSE_SCHEDULE_STATIC:
6267 gcc_assert (fd.have_ordered);
6268 fn_index = 0;
6269 sched = (HOST_WIDE_INT_1U << 31) + 1;
6270 break;
6271 default:
6272 gcc_unreachable ();
629b3d75
MJ
6273 }
6274 if (!fd.ordered)
28567c40 6275 fn_index += fd.have_ordered * 8;
629b3d75
MJ
6276 if (fd.ordered)
6277 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6278 else
6279 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6280 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8221c30b 6281 if (fd.have_reductemp || fd.have_pointer_condtemp)
28567c40
JJ
6282 {
6283 if (fd.ordered)
6284 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6285 else if (fd.have_ordered)
6286 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6287 else
6288 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6289 sched_arg = build_int_cstu (long_integer_type_node, sched);
6290 if (!fd.chunk_size)
6291 fd.chunk_size = integer_zero_node;
6292 }
629b3d75
MJ
6293 if (fd.iter_type == long_long_unsigned_type_node)
6294 {
6295 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6296 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6297 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6298 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6299 }
6300 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
28567c40
JJ
6301 (enum built_in_function) next_ix, sched_arg,
6302 inner_stmt);
629b3d75
MJ
6303 }
6304
6305 if (gimple_in_ssa_p (cfun))
6306 update_ssa (TODO_update_ssa_only_virtuals);
6307}
6308
6309/* Expand code for an OpenMP sections directive. In pseudo code, we generate
6310
6311 v = GOMP_sections_start (n);
6312 L0:
6313 switch (v)
6314 {
6315 case 0:
6316 goto L2;
6317 case 1:
6318 section 1;
6319 goto L1;
6320 case 2:
6321 ...
6322 case n:
6323 ...
6324 default:
6325 abort ();
6326 }
6327 L1:
6328 v = GOMP_sections_next ();
6329 goto L0;
6330 L2:
6331 reduction;
6332
6333 If this is a combined parallel sections, replace the call to
6334 GOMP_sections_start with call to GOMP_sections_next. */
6335
6336static void
6337expand_omp_sections (struct omp_region *region)
6338{
6339 tree t, u, vin = NULL, vmain, vnext, l2;
6340 unsigned len;
6341 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6342 gimple_stmt_iterator si, switch_si;
6343 gomp_sections *sections_stmt;
6344 gimple *stmt;
6345 gomp_continue *cont;
6346 edge_iterator ei;
6347 edge e;
6348 struct omp_region *inner;
6349 unsigned i, casei;
6350 bool exit_reachable = region->cont != NULL;
6351
6352 gcc_assert (region->exit != NULL);
6353 entry_bb = region->entry;
6354 l0_bb = single_succ (entry_bb);
6355 l1_bb = region->cont;
6356 l2_bb = region->exit;
6357 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6358 l2 = gimple_block_label (l2_bb);
6359 else
6360 {
6361 /* This can happen if there are reductions. */
6362 len = EDGE_COUNT (l0_bb->succs);
6363 gcc_assert (len > 0);
6364 e = EDGE_SUCC (l0_bb, len - 1);
65f4b875 6365 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
6366 l2 = NULL_TREE;
6367 if (gsi_end_p (si)
01914336 6368 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
6369 l2 = gimple_block_label (e->dest);
6370 else
6371 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6372 {
65f4b875 6373 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
6374 if (gsi_end_p (si)
6375 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6376 {
6377 l2 = gimple_block_label (e->dest);
6378 break;
6379 }
6380 }
6381 }
6382 if (exit_reachable)
6383 default_bb = create_empty_bb (l1_bb->prev_bb);
6384 else
6385 default_bb = create_empty_bb (l0_bb);
6386
6387 /* We will build a switch() with enough cases for all the
6388 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6389 and a default case to abort if something goes wrong. */
6390 len = EDGE_COUNT (l0_bb->succs);
6391
6392 /* Use vec::quick_push on label_vec throughout, since we know the size
6393 in advance. */
6394 auto_vec<tree> label_vec (len);
6395
6396 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6397 GIMPLE_OMP_SECTIONS statement. */
65f4b875 6398 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6399 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6400 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6401 vin = gimple_omp_sections_control (sections_stmt);
28567c40
JJ
6402 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6403 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8e7757ba
JJ
6404 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6405 tree cond_var = NULL_TREE;
6406 if (reductmp || condtmp)
6407 {
6408 tree reductions = null_pointer_node, mem = null_pointer_node;
6409 tree memv = NULL_TREE, condtemp = NULL_TREE;
6410 gimple_stmt_iterator gsi = gsi_none ();
6411 gimple *g = NULL;
6412 if (reductmp)
6413 {
6414 reductions = OMP_CLAUSE_DECL (reductmp);
6415 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6416 g = SSA_NAME_DEF_STMT (reductions);
6417 reductions = gimple_assign_rhs1 (g);
6418 OMP_CLAUSE_DECL (reductmp) = reductions;
6419 gsi = gsi_for_stmt (g);
6420 }
6421 else
6422 gsi = si;
6423 if (condtmp)
6424 {
6425 condtemp = OMP_CLAUSE_DECL (condtmp);
6426 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6427 OMP_CLAUSE__CONDTEMP_);
6428 cond_var = OMP_CLAUSE_DECL (c);
6429 tree type = TREE_TYPE (condtemp);
6430 memv = create_tmp_var (type);
6431 TREE_ADDRESSABLE (memv) = 1;
6432 unsigned cnt = 0;
6433 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6434 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6435 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6436 ++cnt;
6437 unsigned HOST_WIDE_INT sz
6438 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6439 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6440 false);
6441 mem = build_fold_addr_expr (memv);
6442 }
28567c40
JJ
6443 t = build_int_cst (unsigned_type_node, len - 1);
6444 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8e7757ba 6445 stmt = gimple_build_call (u, 3, t, reductions, mem);
28567c40
JJ
6446 gimple_call_set_lhs (stmt, vin);
6447 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8e7757ba
JJ
6448 if (condtmp)
6449 {
6450 expand_omp_build_assign (&gsi, condtemp, memv, false);
6451 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6452 vin, build_one_cst (TREE_TYPE (cond_var)));
6453 expand_omp_build_assign (&gsi, cond_var, t, false);
6454 }
6455 if (reductmp)
6456 {
6457 gsi_remove (&gsi, true);
6458 release_ssa_name (gimple_assign_lhs (g));
6459 }
28567c40
JJ
6460 }
6461 else if (!is_combined_parallel (region))
629b3d75
MJ
6462 {
6463 /* If we are not inside a combined parallel+sections region,
6464 call GOMP_sections_start. */
6465 t = build_int_cst (unsigned_type_node, len - 1);
6466 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6467 stmt = gimple_build_call (u, 1, t);
6468 }
6469 else
6470 {
6471 /* Otherwise, call GOMP_sections_next. */
6472 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6473 stmt = gimple_build_call (u, 0);
6474 }
8e7757ba 6475 if (!reductmp && !condtmp)
28567c40
JJ
6476 {
6477 gimple_call_set_lhs (stmt, vin);
6478 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6479 }
629b3d75
MJ
6480 gsi_remove (&si, true);
6481
6482 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6483 L0_BB. */
65f4b875 6484 switch_si = gsi_last_nondebug_bb (l0_bb);
629b3d75
MJ
6485 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6486 if (exit_reachable)
6487 {
6488 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6489 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6490 vmain = gimple_omp_continue_control_use (cont);
6491 vnext = gimple_omp_continue_control_def (cont);
6492 }
6493 else
6494 {
6495 vmain = vin;
6496 vnext = NULL_TREE;
6497 }
6498
6499 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6500 label_vec.quick_push (t);
6501 i = 1;
6502
6503 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6504 for (inner = region->inner, casei = 1;
6505 inner;
6506 inner = inner->next, i++, casei++)
6507 {
6508 basic_block s_entry_bb, s_exit_bb;
6509
6510 /* Skip optional reduction region. */
6511 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6512 {
6513 --i;
6514 --casei;
6515 continue;
6516 }
6517
6518 s_entry_bb = inner->entry;
6519 s_exit_bb = inner->exit;
6520
6521 t = gimple_block_label (s_entry_bb);
6522 u = build_int_cst (unsigned_type_node, casei);
6523 u = build_case_label (u, NULL, t);
6524 label_vec.quick_push (u);
6525
65f4b875 6526 si = gsi_last_nondebug_bb (s_entry_bb);
629b3d75
MJ
6527 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6528 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6529 gsi_remove (&si, true);
6530 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6531
6532 if (s_exit_bb == NULL)
6533 continue;
6534
65f4b875 6535 si = gsi_last_nondebug_bb (s_exit_bb);
629b3d75
MJ
6536 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6537 gsi_remove (&si, true);
6538
6539 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6540 }
6541
6542 /* Error handling code goes in DEFAULT_BB. */
6543 t = gimple_block_label (default_bb);
6544 u = build_case_label (NULL, NULL, t);
6545 make_edge (l0_bb, default_bb, 0);
6546 add_bb_to_loop (default_bb, current_loops->tree_root);
6547
6548 stmt = gimple_build_switch (vmain, u, label_vec);
6549 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6550 gsi_remove (&switch_si, true);
6551
6552 si = gsi_start_bb (default_bb);
6553 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6554 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6555
6556 if (exit_reachable)
6557 {
6558 tree bfn_decl;
6559
6560 /* Code to get the next section goes in L1_BB. */
65f4b875 6561 si = gsi_last_nondebug_bb (l1_bb);
629b3d75
MJ
6562 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6563
6564 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6565 stmt = gimple_build_call (bfn_decl, 0);
6566 gimple_call_set_lhs (stmt, vnext);
8e7757ba
JJ
6567 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6568 if (cond_var)
6569 {
6570 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6571 vnext, build_one_cst (TREE_TYPE (cond_var)));
6572 expand_omp_build_assign (&si, cond_var, t, false);
6573 }
629b3d75
MJ
6574 gsi_remove (&si, true);
6575
6576 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6577 }
6578
6579 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
65f4b875 6580 si = gsi_last_nondebug_bb (l2_bb);
629b3d75
MJ
6581 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6582 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6583 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6584 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6585 else
6586 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6587 stmt = gimple_build_call (t, 0);
6588 if (gimple_omp_return_lhs (gsi_stmt (si)))
6589 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6590 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6591 gsi_remove (&si, true);
6592
6593 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6594}
6595
6596/* Expand code for an OpenMP single directive. We've already expanded
6597 much of the code, here we simply place the GOMP_barrier call. */
6598
6599static void
6600expand_omp_single (struct omp_region *region)
6601{
6602 basic_block entry_bb, exit_bb;
6603 gimple_stmt_iterator si;
6604
6605 entry_bb = region->entry;
6606 exit_bb = region->exit;
6607
65f4b875 6608 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6609 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6610 gsi_remove (&si, true);
6611 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6612
65f4b875 6613 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6614 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6615 {
6616 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6617 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6618 }
6619 gsi_remove (&si, true);
6620 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6621}
6622
6623/* Generic expansion for OpenMP synchronization directives: master,
6624 ordered and critical. All we need to do here is remove the entry
6625 and exit markers for REGION. */
6626
6627static void
6628expand_omp_synch (struct omp_region *region)
6629{
6630 basic_block entry_bb, exit_bb;
6631 gimple_stmt_iterator si;
6632
6633 entry_bb = region->entry;
6634 exit_bb = region->exit;
6635
65f4b875 6636 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6637 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6638 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6639 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6640 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6641 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6642 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
28567c40
JJ
6643 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6644 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6645 {
6646 expand_omp_taskreg (region);
6647 return;
6648 }
629b3d75
MJ
6649 gsi_remove (&si, true);
6650 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6651
6652 if (exit_bb)
6653 {
65f4b875 6654 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6655 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6656 gsi_remove (&si, true);
6657 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6658 }
6659}
6660
28567c40
JJ
6661/* Translate enum omp_memory_order to enum memmodel. The two enums
6662 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6663 is 0. */
6664
6665static enum memmodel
6666omp_memory_order_to_memmodel (enum omp_memory_order mo)
6667{
6668 switch (mo)
6669 {
6670 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6671 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6672 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6673 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6674 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6675 default: gcc_unreachable ();
6676 }
6677}
6678
629b3d75
MJ
6679/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6680 operation as a normal volatile load. */
6681
6682static bool
6683expand_omp_atomic_load (basic_block load_bb, tree addr,
6684 tree loaded_val, int index)
6685{
6686 enum built_in_function tmpbase;
6687 gimple_stmt_iterator gsi;
6688 basic_block store_bb;
6689 location_t loc;
6690 gimple *stmt;
6691 tree decl, call, type, itype;
6692
65f4b875 6693 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6694 stmt = gsi_stmt (gsi);
6695 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6696 loc = gimple_location (stmt);
6697
6698 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6699 is smaller than word size, then expand_atomic_load assumes that the load
6700 is atomic. We could avoid the builtin entirely in this case. */
6701
6702 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6703 decl = builtin_decl_explicit (tmpbase);
6704 if (decl == NULL_TREE)
6705 return false;
6706
6707 type = TREE_TYPE (loaded_val);
6708 itype = TREE_TYPE (TREE_TYPE (decl));
6709
28567c40
JJ
6710 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6711 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6712 call = build_call_expr_loc (loc, decl, 2, addr, mo);
629b3d75
MJ
6713 if (!useless_type_conversion_p (type, itype))
6714 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6715 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6716
6717 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6718 gsi_remove (&gsi, true);
6719
6720 store_bb = single_succ (load_bb);
65f4b875 6721 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6722 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6723 gsi_remove (&gsi, true);
6724
6725 if (gimple_in_ssa_p (cfun))
6726 update_ssa (TODO_update_ssa_no_phi);
6727
6728 return true;
6729}
6730
6731/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6732 operation as a normal volatile store. */
6733
6734static bool
6735expand_omp_atomic_store (basic_block load_bb, tree addr,
6736 tree loaded_val, tree stored_val, int index)
6737{
6738 enum built_in_function tmpbase;
6739 gimple_stmt_iterator gsi;
6740 basic_block store_bb = single_succ (load_bb);
6741 location_t loc;
6742 gimple *stmt;
6743 tree decl, call, type, itype;
6744 machine_mode imode;
6745 bool exchange;
6746
65f4b875 6747 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6748 stmt = gsi_stmt (gsi);
6749 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6750
6751 /* If the load value is needed, then this isn't a store but an exchange. */
6752 exchange = gimple_omp_atomic_need_value_p (stmt);
6753
65f4b875 6754 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6755 stmt = gsi_stmt (gsi);
6756 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6757 loc = gimple_location (stmt);
6758
6759 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6760 is smaller than word size, then expand_atomic_store assumes that the store
6761 is atomic. We could avoid the builtin entirely in this case. */
6762
6763 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6764 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6765 decl = builtin_decl_explicit (tmpbase);
6766 if (decl == NULL_TREE)
6767 return false;
6768
6769 type = TREE_TYPE (stored_val);
6770
6771 /* Dig out the type of the function's second argument. */
6772 itype = TREE_TYPE (decl);
6773 itype = TYPE_ARG_TYPES (itype);
6774 itype = TREE_CHAIN (itype);
6775 itype = TREE_VALUE (itype);
6776 imode = TYPE_MODE (itype);
6777
6778 if (exchange && !can_atomic_exchange_p (imode, true))
6779 return false;
6780
6781 if (!useless_type_conversion_p (itype, type))
6782 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
28567c40
JJ
6783 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6784 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6785 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
629b3d75
MJ
6786 if (exchange)
6787 {
6788 if (!useless_type_conversion_p (type, itype))
6789 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6790 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6791 }
6792
6793 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6794 gsi_remove (&gsi, true);
6795
6796 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
65f4b875 6797 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6798 gsi_remove (&gsi, true);
6799
6800 if (gimple_in_ssa_p (cfun))
6801 update_ssa (TODO_update_ssa_no_phi);
6802
6803 return true;
6804}
6805
6806/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6807 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6808 size of the data type, and thus usable to find the index of the builtin
6809 decl. Returns false if the expression is not of the proper form. */
6810
6811static bool
6812expand_omp_atomic_fetch_op (basic_block load_bb,
6813 tree addr, tree loaded_val,
6814 tree stored_val, int index)
6815{
6816 enum built_in_function oldbase, newbase, tmpbase;
6817 tree decl, itype, call;
6818 tree lhs, rhs;
6819 basic_block store_bb = single_succ (load_bb);
6820 gimple_stmt_iterator gsi;
6821 gimple *stmt;
6822 location_t loc;
6823 enum tree_code code;
6824 bool need_old, need_new;
6825 machine_mode imode;
629b3d75
MJ
6826
6827 /* We expect to find the following sequences:
6828
6829 load_bb:
6830 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6831
6832 store_bb:
6833 val = tmp OP something; (or: something OP tmp)
6834 GIMPLE_OMP_STORE (val)
6835
6836 ???FIXME: Allow a more flexible sequence.
6837 Perhaps use data flow to pick the statements.
6838
6839 */
6840
6841 gsi = gsi_after_labels (store_bb);
6842 stmt = gsi_stmt (gsi);
65f4b875
AO
6843 if (is_gimple_debug (stmt))
6844 {
6845 gsi_next_nondebug (&gsi);
6846 if (gsi_end_p (gsi))
6847 return false;
6848 stmt = gsi_stmt (gsi);
6849 }
629b3d75
MJ
6850 loc = gimple_location (stmt);
6851 if (!is_gimple_assign (stmt))
6852 return false;
65f4b875 6853 gsi_next_nondebug (&gsi);
629b3d75
MJ
6854 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6855 return false;
6856 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6857 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
28567c40
JJ
6858 enum omp_memory_order omo
6859 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6860 enum memmodel mo = omp_memory_order_to_memmodel (omo);
629b3d75
MJ
6861 gcc_checking_assert (!need_old || !need_new);
6862
6863 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6864 return false;
6865
6866 /* Check for one of the supported fetch-op operations. */
6867 code = gimple_assign_rhs_code (stmt);
6868 switch (code)
6869 {
6870 case PLUS_EXPR:
6871 case POINTER_PLUS_EXPR:
6872 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6873 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6874 break;
6875 case MINUS_EXPR:
6876 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6877 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6878 break;
6879 case BIT_AND_EXPR:
6880 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6881 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6882 break;
6883 case BIT_IOR_EXPR:
6884 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6885 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6886 break;
6887 case BIT_XOR_EXPR:
6888 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6889 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6890 break;
6891 default:
6892 return false;
6893 }
6894
6895 /* Make sure the expression is of the proper form. */
6896 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6897 rhs = gimple_assign_rhs2 (stmt);
6898 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6899 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6900 rhs = gimple_assign_rhs1 (stmt);
6901 else
6902 return false;
6903
6904 tmpbase = ((enum built_in_function)
6905 ((need_new ? newbase : oldbase) + index + 1));
6906 decl = builtin_decl_explicit (tmpbase);
6907 if (decl == NULL_TREE)
6908 return false;
6909 itype = TREE_TYPE (TREE_TYPE (decl));
6910 imode = TYPE_MODE (itype);
6911
6912 /* We could test all of the various optabs involved, but the fact of the
6913 matter is that (with the exception of i486 vs i586 and xadd) all targets
6914 that support any atomic operaton optab also implements compare-and-swap.
6915 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 6916 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
6917 return false;
6918
65f4b875 6919 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6920 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6921
6922 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6923 It only requires that the operation happen atomically. Thus we can
6924 use the RELAXED memory model. */
6925 call = build_call_expr_loc (loc, decl, 3, addr,
6926 fold_convert_loc (loc, itype, rhs),
28567c40 6927 build_int_cst (NULL, mo));
629b3d75
MJ
6928
6929 if (need_old || need_new)
6930 {
6931 lhs = need_old ? loaded_val : stored_val;
6932 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6933 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6934 }
6935 else
6936 call = fold_convert_loc (loc, void_type_node, call);
6937 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6938 gsi_remove (&gsi, true);
6939
65f4b875 6940 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6941 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6942 gsi_remove (&gsi, true);
65f4b875 6943 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6944 stmt = gsi_stmt (gsi);
6945 gsi_remove (&gsi, true);
6946
6947 if (gimple_in_ssa_p (cfun))
6948 {
6949 release_defs (stmt);
6950 update_ssa (TODO_update_ssa_no_phi);
6951 }
6952
6953 return true;
6954}
6955
6956/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6957
6958 oldval = *addr;
6959 repeat:
01914336 6960 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
6961 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6962 if (oldval != newval)
6963 goto repeat;
6964
6965 INDEX is log2 of the size of the data type, and thus usable to find the
6966 index of the builtin decl. */
6967
6968static bool
6969expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6970 tree addr, tree loaded_val, tree stored_val,
6971 int index)
6972{
6973 tree loadedi, storedi, initial, new_storedi, old_vali;
b4e47472 6974 tree type, itype, cmpxchg, iaddr, atype;
629b3d75
MJ
6975 gimple_stmt_iterator si;
6976 basic_block loop_header = single_succ (load_bb);
6977 gimple *phi, *stmt;
6978 edge e;
6979 enum built_in_function fncode;
6980
6981 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6982 order to use the RELAXED memory model effectively. */
6983 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6984 + index + 1);
6985 cmpxchg = builtin_decl_explicit (fncode);
6986 if (cmpxchg == NULL_TREE)
6987 return false;
b4e47472
JJ
6988 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6989 atype = type;
629b3d75
MJ
6990 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6991
dc06356a
JJ
6992 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6993 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
6994 return false;
6995
6996 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
65f4b875 6997 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6998 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6999
7000 /* For floating-point values, we'll need to view-convert them to integers
7001 so that we can perform the atomic compare and swap. Simplify the
7002 following code by always setting up the "i"ntegral variables. */
7003 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7004 {
7005 tree iaddr_val;
7006
7007 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7008 true));
b4e47472 7009 atype = itype;
629b3d75
MJ
7010 iaddr_val
7011 = force_gimple_operand_gsi (&si,
7012 fold_convert (TREE_TYPE (iaddr), addr),
7013 false, NULL_TREE, true, GSI_SAME_STMT);
7014 stmt = gimple_build_assign (iaddr, iaddr_val);
7015 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7016 loadedi = create_tmp_var (itype);
7017 if (gimple_in_ssa_p (cfun))
7018 loadedi = make_ssa_name (loadedi);
7019 }
7020 else
7021 {
7022 iaddr = addr;
7023 loadedi = loaded_val;
7024 }
7025
7026 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7027 tree loaddecl = builtin_decl_explicit (fncode);
7028 if (loaddecl)
7029 initial
b4e47472 7030 = fold_convert (atype,
629b3d75
MJ
7031 build_call_expr (loaddecl, 2, iaddr,
7032 build_int_cst (NULL_TREE,
7033 MEMMODEL_RELAXED)));
7034 else
b4e47472
JJ
7035 {
7036 tree off
7037 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7038 true), 0);
7039 initial = build2 (MEM_REF, atype, iaddr, off);
7040 }
629b3d75
MJ
7041
7042 initial
7043 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7044 GSI_SAME_STMT);
7045
7046 /* Move the value to the LOADEDI temporary. */
7047 if (gimple_in_ssa_p (cfun))
7048 {
7049 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7050 phi = create_phi_node (loadedi, loop_header);
7051 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7052 initial);
7053 }
7054 else
7055 gsi_insert_before (&si,
7056 gimple_build_assign (loadedi, initial),
7057 GSI_SAME_STMT);
7058 if (loadedi != loaded_val)
7059 {
7060 gimple_stmt_iterator gsi2;
7061 tree x;
7062
7063 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7064 gsi2 = gsi_start_bb (loop_header);
7065 if (gimple_in_ssa_p (cfun))
7066 {
7067 gassign *stmt;
7068 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7069 true, GSI_SAME_STMT);
7070 stmt = gimple_build_assign (loaded_val, x);
7071 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7072 }
7073 else
7074 {
7075 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7076 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7077 true, GSI_SAME_STMT);
7078 }
7079 }
7080 gsi_remove (&si, true);
7081
65f4b875 7082 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7083 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7084
7085 if (iaddr == addr)
7086 storedi = stored_val;
7087 else
01914336
MJ
7088 storedi
7089 = force_gimple_operand_gsi (&si,
7090 build1 (VIEW_CONVERT_EXPR, itype,
7091 stored_val), true, NULL_TREE, true,
7092 GSI_SAME_STMT);
629b3d75
MJ
7093
7094 /* Build the compare&swap statement. */
7095 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7096 new_storedi = force_gimple_operand_gsi (&si,
7097 fold_convert (TREE_TYPE (loadedi),
7098 new_storedi),
7099 true, NULL_TREE,
7100 true, GSI_SAME_STMT);
7101
7102 if (gimple_in_ssa_p (cfun))
7103 old_vali = loadedi;
7104 else
7105 {
7106 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7107 stmt = gimple_build_assign (old_vali, loadedi);
7108 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7109
7110 stmt = gimple_build_assign (loadedi, new_storedi);
7111 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7112 }
7113
7114 /* Note that we always perform the comparison as an integer, even for
7115 floating point. This allows the atomic operation to properly
7116 succeed even with NaNs and -0.0. */
01914336
MJ
7117 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7118 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
7119 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7120
7121 /* Update cfg. */
7122 e = single_succ_edge (store_bb);
7123 e->flags &= ~EDGE_FALLTHRU;
7124 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
7125 /* Expect no looping. */
7126 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
7127
7128 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 7129 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
7130
7131 /* Copy the new value to loadedi (we already did that before the condition
7132 if we are not in SSA). */
7133 if (gimple_in_ssa_p (cfun))
7134 {
7135 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7136 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7137 }
7138
7139 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7140 gsi_remove (&si, true);
7141
7142 struct loop *loop = alloc_loop ();
7143 loop->header = loop_header;
7144 loop->latch = store_bb;
7145 add_loop (loop, loop_header->loop_father);
7146
7147 if (gimple_in_ssa_p (cfun))
7148 update_ssa (TODO_update_ssa_no_phi);
7149
7150 return true;
7151}
7152
7153/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7154
01914336
MJ
7155 GOMP_atomic_start ();
7156 *addr = rhs;
7157 GOMP_atomic_end ();
629b3d75
MJ
7158
7159 The result is not globally atomic, but works so long as all parallel
7160 references are within #pragma omp atomic directives. According to
7161 responses received from omp@openmp.org, appears to be within spec.
7162 Which makes sense, since that's how several other compilers handle
7163 this situation as well.
7164 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7165 expanding. STORED_VAL is the operand of the matching
7166 GIMPLE_OMP_ATOMIC_STORE.
7167
7168 We replace
7169 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7170 loaded_val = *addr;
7171
7172 and replace
7173 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7174 *addr = stored_val;
7175*/
7176
7177static bool
7178expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7179 tree addr, tree loaded_val, tree stored_val)
7180{
7181 gimple_stmt_iterator si;
7182 gassign *stmt;
7183 tree t;
7184
65f4b875 7185 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7186 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7187
7188 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7189 t = build_call_expr (t, 0);
7190 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7191
b4e47472
JJ
7192 tree mem = build_simple_mem_ref (addr);
7193 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7194 TREE_OPERAND (mem, 1)
7195 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7196 true),
7197 TREE_OPERAND (mem, 1));
7198 stmt = gimple_build_assign (loaded_val, mem);
629b3d75
MJ
7199 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7200 gsi_remove (&si, true);
7201
65f4b875 7202 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7203 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7204
b4e47472 7205 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
629b3d75
MJ
7206 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7207
7208 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7209 t = build_call_expr (t, 0);
7210 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7211 gsi_remove (&si, true);
7212
7213 if (gimple_in_ssa_p (cfun))
7214 update_ssa (TODO_update_ssa_no_phi);
7215 return true;
7216}
7217
7218/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 7219 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
7220 call expand_omp_atomic_pipeline, and if it fails too, the
7221 ultimate fallback is wrapping the operation in a mutex
7222 (expand_omp_atomic_mutex). REGION is the atomic region built
7223 by build_omp_regions_1(). */
7224
7225static void
7226expand_omp_atomic (struct omp_region *region)
7227{
7228 basic_block load_bb = region->entry, store_bb = region->exit;
7229 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7230 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7231 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7232 tree addr = gimple_omp_atomic_load_rhs (load);
7233 tree stored_val = gimple_omp_atomic_store_val (store);
b4e47472 7234 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
629b3d75
MJ
7235 HOST_WIDE_INT index;
7236
7237 /* Make sure the type is one of the supported sizes. */
7238 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7239 index = exact_log2 (index);
7240 if (index >= 0 && index <= 4)
7241 {
7242 unsigned int align = TYPE_ALIGN_UNIT (type);
7243
7244 /* __sync builtins require strict data alignment. */
7245 if (exact_log2 (align) >= index)
7246 {
7247 /* Atomic load. */
3bd8f481 7248 scalar_mode smode;
629b3d75 7249 if (loaded_val == stored_val
3bd8f481
RS
7250 && (is_int_mode (TYPE_MODE (type), &smode)
7251 || is_float_mode (TYPE_MODE (type), &smode))
7252 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
7253 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7254 return;
7255
7256 /* Atomic store. */
3bd8f481
RS
7257 if ((is_int_mode (TYPE_MODE (type), &smode)
7258 || is_float_mode (TYPE_MODE (type), &smode))
7259 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
7260 && store_bb == single_succ (load_bb)
7261 && first_stmt (store_bb) == store
7262 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7263 stored_val, index))
7264 return;
7265
7266 /* When possible, use specialized atomic update functions. */
7267 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7268 && store_bb == single_succ (load_bb)
7269 && expand_omp_atomic_fetch_op (load_bb, addr,
7270 loaded_val, stored_val, index))
7271 return;
7272
7273 /* If we don't have specialized __sync builtins, try and implement
7274 as a compare and swap loop. */
7275 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7276 loaded_val, stored_val, index))
7277 return;
7278 }
7279 }
7280
7281 /* The ultimate fallback is wrapping the operation in a mutex. */
7282 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7283}
7284
7285/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7286 at REGION_EXIT. */
7287
7288static void
7289mark_loops_in_oacc_kernels_region (basic_block region_entry,
7290 basic_block region_exit)
7291{
7292 struct loop *outer = region_entry->loop_father;
7293 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7294
7295 /* Don't parallelize the kernels region if it contains more than one outer
7296 loop. */
7297 unsigned int nr_outer_loops = 0;
7298 struct loop *single_outer = NULL;
7299 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
7300 {
7301 gcc_assert (loop_outer (loop) == outer);
7302
7303 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7304 continue;
7305
7306 if (region_exit != NULL
7307 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7308 continue;
7309
7310 nr_outer_loops++;
7311 single_outer = loop;
7312 }
7313 if (nr_outer_loops != 1)
7314 return;
7315
01914336
MJ
7316 for (struct loop *loop = single_outer->inner;
7317 loop != NULL;
7318 loop = loop->inner)
629b3d75
MJ
7319 if (loop->next)
7320 return;
7321
7322 /* Mark the loops in the region. */
7323 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7324 loop->in_oacc_kernels_region = true;
7325}
7326
7327/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7328
7329struct GTY(()) grid_launch_attributes_trees
7330{
7331 tree kernel_dim_array_type;
7332 tree kernel_lattrs_dimnum_decl;
7333 tree kernel_lattrs_grid_decl;
7334 tree kernel_lattrs_group_decl;
7335 tree kernel_launch_attributes_type;
7336};
7337
7338static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7339
7340/* Create types used to pass kernel launch attributes to target. */
7341
7342static void
7343grid_create_kernel_launch_attr_types (void)
7344{
7345 if (grid_attr_trees)
7346 return;
7347 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7348
7349 tree dim_arr_index_type
7350 = build_index_type (build_int_cst (integer_type_node, 2));
7351 grid_attr_trees->kernel_dim_array_type
7352 = build_array_type (uint32_type_node, dim_arr_index_type);
7353
7354 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7355 grid_attr_trees->kernel_lattrs_dimnum_decl
7356 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7357 uint32_type_node);
7358 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7359
7360 grid_attr_trees->kernel_lattrs_grid_decl
7361 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7362 grid_attr_trees->kernel_dim_array_type);
7363 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7364 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7365 grid_attr_trees->kernel_lattrs_group_decl
7366 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7367 grid_attr_trees->kernel_dim_array_type);
7368 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7369 = grid_attr_trees->kernel_lattrs_grid_decl;
7370 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7371 "__gomp_kernel_launch_attributes",
7372 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7373}
7374
7375/* Insert before the current statement in GSI a store of VALUE to INDEX of
7376 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7377 of type uint32_type_node. */
7378
7379static void
7380grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7381 tree fld_decl, int index, tree value)
7382{
7383 tree ref = build4 (ARRAY_REF, uint32_type_node,
7384 build3 (COMPONENT_REF,
7385 grid_attr_trees->kernel_dim_array_type,
7386 range_var, fld_decl, NULL_TREE),
7387 build_int_cst (integer_type_node, index),
7388 NULL_TREE, NULL_TREE);
7389 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7390}
7391
7392/* Return a tree representation of a pointer to a structure with grid and
7393 work-group size information. Statements filling that information will be
7394 inserted before GSI, TGT_STMT is the target statement which has the
7395 necessary information in it. */
7396
7397static tree
7398grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7399 gomp_target *tgt_stmt)
7400{
7401 grid_create_kernel_launch_attr_types ();
7402 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7403 "__kernel_launch_attrs");
7404
7405 unsigned max_dim = 0;
7406 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7407 clause;
7408 clause = OMP_CLAUSE_CHAIN (clause))
7409 {
7410 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7411 continue;
7412
7413 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7414 max_dim = MAX (dim, max_dim);
7415
7416 grid_insert_store_range_dim (gsi, lattrs,
7417 grid_attr_trees->kernel_lattrs_grid_decl,
7418 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7419 grid_insert_store_range_dim (gsi, lattrs,
7420 grid_attr_trees->kernel_lattrs_group_decl,
7421 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7422 }
7423
7424 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7425 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7426 gcc_checking_assert (max_dim <= 2);
7427 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7428 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7429 GSI_SAME_STMT);
7430 TREE_ADDRESSABLE (lattrs) = 1;
7431 return build_fold_addr_expr (lattrs);
7432}
7433
7434/* Build target argument identifier from the DEVICE identifier, value
7435 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7436
7437static tree
7438get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7439{
7440 tree t = build_int_cst (integer_type_node, device);
7441 if (subseqent_param)
7442 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7443 build_int_cst (integer_type_node,
7444 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7445 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7446 build_int_cst (integer_type_node, id));
7447 return t;
7448}
7449
7450/* Like above but return it in type that can be directly stored as an element
7451 of the argument array. */
7452
7453static tree
7454get_target_argument_identifier (int device, bool subseqent_param, int id)
7455{
7456 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7457 return fold_convert (ptr_type_node, t);
7458}
7459
7460/* Return a target argument consisting of DEVICE identifier, value identifier
7461 ID, and the actual VALUE. */
7462
7463static tree
7464get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7465 tree value)
7466{
7467 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7468 fold_convert (integer_type_node, value),
7469 build_int_cst (unsigned_type_node,
7470 GOMP_TARGET_ARG_VALUE_SHIFT));
7471 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7472 get_target_argument_identifier_1 (device, false, id));
7473 t = fold_convert (ptr_type_node, t);
7474 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7475}
7476
7477/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7478 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7479 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7480 arguments. */
7481
7482static void
7483push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7484 int id, tree value, vec <tree> *args)
7485{
7486 if (tree_fits_shwi_p (value)
7487 && tree_to_shwi (value) > -(1 << 15)
7488 && tree_to_shwi (value) < (1 << 15))
7489 args->quick_push (get_target_argument_value (gsi, device, id, value));
7490 else
7491 {
7492 args->quick_push (get_target_argument_identifier (device, true, id));
7493 value = fold_convert (ptr_type_node, value);
7494 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7495 GSI_SAME_STMT);
7496 args->quick_push (value);
7497 }
7498}
7499
01914336 7500/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
7501
7502static tree
7503get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7504{
7505 auto_vec <tree, 6> args;
7506 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7507 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7508 if (c)
7509 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7510 else
7511 t = integer_minus_one_node;
7512 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7513 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7514
7515 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7516 if (c)
7517 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7518 else
7519 t = integer_minus_one_node;
7520 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7521 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7522 &args);
7523
7524 /* Add HSA-specific grid sizes, if available. */
7525 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7526 OMP_CLAUSE__GRIDDIM_))
7527 {
01914336
MJ
7528 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7529 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
629b3d75
MJ
7530 args.quick_push (t);
7531 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7532 }
7533
7534 /* Produce more, perhaps device specific, arguments here. */
7535
7536 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7537 args.length () + 1),
7538 ".omp_target_args");
7539 for (unsigned i = 0; i < args.length (); i++)
7540 {
7541 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7542 build_int_cst (integer_type_node, i),
7543 NULL_TREE, NULL_TREE);
7544 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7545 GSI_SAME_STMT);
7546 }
7547 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7548 build_int_cst (integer_type_node, args.length ()),
7549 NULL_TREE, NULL_TREE);
7550 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7551 GSI_SAME_STMT);
7552 TREE_ADDRESSABLE (argarray) = 1;
7553 return build_fold_addr_expr (argarray);
7554}
7555
7556/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7557
7558static void
7559expand_omp_target (struct omp_region *region)
7560{
7561 basic_block entry_bb, exit_bb, new_bb;
7562 struct function *child_cfun;
7563 tree child_fn, block, t;
7564 gimple_stmt_iterator gsi;
7565 gomp_target *entry_stmt;
7566 gimple *stmt;
7567 edge e;
7568 bool offloaded, data_region;
7569
7570 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7571 new_bb = region->entry;
7572
7573 offloaded = is_gimple_omp_offloaded (entry_stmt);
7574 switch (gimple_omp_target_kind (entry_stmt))
7575 {
7576 case GF_OMP_TARGET_KIND_REGION:
7577 case GF_OMP_TARGET_KIND_UPDATE:
7578 case GF_OMP_TARGET_KIND_ENTER_DATA:
7579 case GF_OMP_TARGET_KIND_EXIT_DATA:
7580 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7581 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7582 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7583 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7584 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7585 data_region = false;
7586 break;
7587 case GF_OMP_TARGET_KIND_DATA:
7588 case GF_OMP_TARGET_KIND_OACC_DATA:
7589 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7590 data_region = true;
7591 break;
7592 default:
7593 gcc_unreachable ();
7594 }
7595
7596 child_fn = NULL_TREE;
7597 child_cfun = NULL;
7598 if (offloaded)
7599 {
7600 child_fn = gimple_omp_target_child_fn (entry_stmt);
7601 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7602 }
7603
7604 /* Supported by expand_omp_taskreg, but not here. */
7605 if (child_cfun != NULL)
7606 gcc_checking_assert (!child_cfun->cfg);
7607 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7608
7609 entry_bb = region->entry;
7610 exit_bb = region->exit;
7611
7612 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
25651634
TS
7613 {
7614 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7615
7616 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7617 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7618 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7619 DECL_ATTRIBUTES (child_fn)
7620 = tree_cons (get_identifier ("oacc kernels"),
7621 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7622 }
629b3d75
MJ
7623
7624 if (offloaded)
7625 {
7626 unsigned srcidx, dstidx, num;
7627
7628 /* If the offloading region needs data sent from the parent
7629 function, then the very first statement (except possible
7630 tree profile counter updates) of the offloading body
7631 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7632 &.OMP_DATA_O is passed as an argument to the child function,
7633 we need to replace it with the argument as seen by the child
7634 function.
7635
7636 In most cases, this will end up being the identity assignment
7637 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7638 a function call that has been inlined, the original PARM_DECL
7639 .OMP_DATA_I may have been converted into a different local
7640 variable. In which case, we need to keep the assignment. */
7641 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7642 if (data_arg)
7643 {
7644 basic_block entry_succ_bb = single_succ (entry_bb);
7645 gimple_stmt_iterator gsi;
7646 tree arg;
7647 gimple *tgtcopy_stmt = NULL;
7648 tree sender = TREE_VEC_ELT (data_arg, 0);
7649
7650 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7651 {
7652 gcc_assert (!gsi_end_p (gsi));
7653 stmt = gsi_stmt (gsi);
7654 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7655 continue;
7656
7657 if (gimple_num_ops (stmt) == 2)
7658 {
7659 tree arg = gimple_assign_rhs1 (stmt);
7660
7661 /* We're ignoring the subcode because we're
7662 effectively doing a STRIP_NOPS. */
7663
7664 if (TREE_CODE (arg) == ADDR_EXPR
7665 && TREE_OPERAND (arg, 0) == sender)
7666 {
7667 tgtcopy_stmt = stmt;
7668 break;
7669 }
7670 }
7671 }
7672
7673 gcc_assert (tgtcopy_stmt != NULL);
7674 arg = DECL_ARGUMENTS (child_fn);
7675
7676 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7677 gsi_remove (&gsi, true);
7678 }
7679
7680 /* Declare local variables needed in CHILD_CFUN. */
7681 block = DECL_INITIAL (child_fn);
7682 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7683 /* The gimplifier could record temporaries in the offloading block
7684 rather than in containing function's local_decls chain,
7685 which would mean cgraph missed finalizing them. Do it now. */
7686 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7687 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7688 varpool_node::finalize_decl (t);
7689 DECL_SAVED_TREE (child_fn) = NULL;
7690 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7691 gimple_set_body (child_fn, NULL);
7692 TREE_USED (block) = 1;
7693
7694 /* Reset DECL_CONTEXT on function arguments. */
7695 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7696 DECL_CONTEXT (t) = child_fn;
7697
7698 /* Split ENTRY_BB at GIMPLE_*,
7699 so that it can be moved to the child function. */
65f4b875 7700 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7701 stmt = gsi_stmt (gsi);
7702 gcc_assert (stmt
7703 && gimple_code (stmt) == gimple_code (entry_stmt));
7704 e = split_block (entry_bb, stmt);
7705 gsi_remove (&gsi, true);
7706 entry_bb = e->dest;
7707 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7708
7709 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7710 if (exit_bb)
7711 {
65f4b875 7712 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7713 gcc_assert (!gsi_end_p (gsi)
7714 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7715 stmt = gimple_build_return (NULL);
7716 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7717 gsi_remove (&gsi, true);
7718 }
7719
7720 /* Move the offloading region into CHILD_CFUN. */
7721
7722 block = gimple_block (entry_stmt);
7723
7724 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7725 if (exit_bb)
7726 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7727 /* When the OMP expansion process cannot guarantee an up-to-date
7728 loop tree arrange for the child function to fixup loops. */
7729 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7730 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7731
7732 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7733 num = vec_safe_length (child_cfun->local_decls);
7734 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7735 {
7736 t = (*child_cfun->local_decls)[srcidx];
7737 if (DECL_CONTEXT (t) == cfun->decl)
7738 continue;
7739 if (srcidx != dstidx)
7740 (*child_cfun->local_decls)[dstidx] = t;
7741 dstidx++;
7742 }
7743 if (dstidx != num)
7744 vec_safe_truncate (child_cfun->local_decls, dstidx);
7745
7746 /* Inform the callgraph about the new function. */
7747 child_cfun->curr_properties = cfun->curr_properties;
7748 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7749 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7750 cgraph_node *node = cgraph_node::get_create (child_fn);
7751 node->parallelized_function = 1;
7752 cgraph_node::add_new_function (child_fn, true);
7753
7754 /* Add the new function to the offload table. */
7755 if (ENABLE_OFFLOADING)
60bf575c
TV
7756 {
7757 if (in_lto_p)
7758 DECL_PRESERVE_P (child_fn) = 1;
7759 vec_safe_push (offload_funcs, child_fn);
7760 }
629b3d75
MJ
7761
7762 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7763 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7764
7765 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7766 fixed in a following pass. */
7767 push_cfun (child_cfun);
7768 if (need_asm)
9579db35 7769 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
7770 cgraph_edge::rebuild_edges ();
7771
7772 /* Some EH regions might become dead, see PR34608. If
7773 pass_cleanup_cfg isn't the first pass to happen with the
7774 new child, these dead EH edges might cause problems.
7775 Clean them up now. */
7776 if (flag_exceptions)
7777 {
7778 basic_block bb;
7779 bool changed = false;
7780
7781 FOR_EACH_BB_FN (bb, cfun)
7782 changed |= gimple_purge_dead_eh_edges (bb);
7783 if (changed)
7784 cleanup_tree_cfg ();
7785 }
7786 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7787 verify_loop_structure ();
7788 pop_cfun ();
7789
7790 if (dump_file && !gimple_in_ssa_p (cfun))
7791 {
7792 omp_any_child_fn_dumped = true;
7793 dump_function_header (dump_file, child_fn, dump_flags);
7794 dump_function_to_file (child_fn, dump_file, dump_flags);
7795 }
4ccc4e30
JJ
7796
7797 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
629b3d75
MJ
7798 }
7799
7800 /* Emit a library call to launch the offloading region, or do data
7801 transfers. */
59d5960c 7802 tree t1, t2, t3, t4, depend, c, clauses;
629b3d75 7803 enum built_in_function start_ix;
629b3d75 7804 unsigned int flags_i = 0;
629b3d75
MJ
7805
7806 switch (gimple_omp_target_kind (entry_stmt))
7807 {
7808 case GF_OMP_TARGET_KIND_REGION:
7809 start_ix = BUILT_IN_GOMP_TARGET;
7810 break;
7811 case GF_OMP_TARGET_KIND_DATA:
7812 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7813 break;
7814 case GF_OMP_TARGET_KIND_UPDATE:
7815 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7816 break;
7817 case GF_OMP_TARGET_KIND_ENTER_DATA:
7818 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7819 break;
7820 case GF_OMP_TARGET_KIND_EXIT_DATA:
7821 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7822 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7823 break;
7824 case GF_OMP_TARGET_KIND_OACC_KERNELS:
629b3d75
MJ
7825 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7826 start_ix = BUILT_IN_GOACC_PARALLEL;
7827 break;
7828 case GF_OMP_TARGET_KIND_OACC_DATA:
7829 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7830 start_ix = BUILT_IN_GOACC_DATA_START;
7831 break;
7832 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7833 start_ix = BUILT_IN_GOACC_UPDATE;
7834 break;
7835 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7836 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7837 break;
7838 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7839 start_ix = BUILT_IN_GOACC_DECLARE;
7840 break;
7841 default:
7842 gcc_unreachable ();
7843 }
7844
7845 clauses = gimple_omp_target_clauses (entry_stmt);
7846
59d5960c
TS
7847 tree device = NULL_TREE;
7848 location_t device_loc = UNKNOWN_LOCATION;
7849 tree goacc_flags = NULL_TREE;
7850 if (is_gimple_omp_oacc (entry_stmt))
629b3d75 7851 {
59d5960c
TS
7852 /* By default, no GOACC_FLAGs are set. */
7853 goacc_flags = integer_zero_node;
629b3d75
MJ
7854 }
7855 else
59d5960c
TS
7856 {
7857 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7858 if (c)
7859 {
7860 device = OMP_CLAUSE_DEVICE_ID (c);
7861 device_loc = OMP_CLAUSE_LOCATION (c);
7862 }
7863 else
7864 {
7865 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7866 library choose). */
7867 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7868 device_loc = gimple_location (entry_stmt);
7869 }
629b3d75 7870
59d5960c
TS
7871 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7872 if (c)
7873 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7874 }
629b3d75 7875
59d5960c
TS
7876 /* By default, there is no conditional. */
7877 tree cond = NULL_TREE;
7878 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7879 if (c)
7880 cond = OMP_CLAUSE_IF_EXPR (c);
7881 /* If we found the clause 'if (cond)', build:
7882 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
7883 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
629b3d75
MJ
7884 if (cond)
7885 {
59d5960c
TS
7886 tree *tp;
7887 if (is_gimple_omp_oacc (entry_stmt))
7888 tp = &goacc_flags;
7889 else
7890 {
7891 /* Ensure 'device' is of the correct type. */
7892 device = fold_convert_loc (device_loc, integer_type_node, device);
7893
7894 tp = &device;
7895 }
7896
629b3d75
MJ
7897 cond = gimple_boolify (cond);
7898
7899 basic_block cond_bb, then_bb, else_bb;
7900 edge e;
7901 tree tmp_var;
7902
59d5960c 7903 tmp_var = create_tmp_var (TREE_TYPE (*tp));
629b3d75
MJ
7904 if (offloaded)
7905 e = split_block_after_labels (new_bb);
7906 else
7907 {
65f4b875 7908 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
7909 gsi_prev (&gsi);
7910 e = split_block (new_bb, gsi_stmt (gsi));
7911 }
7912 cond_bb = e->src;
7913 new_bb = e->dest;
7914 remove_edge (e);
7915
7916 then_bb = create_empty_bb (cond_bb);
7917 else_bb = create_empty_bb (then_bb);
7918 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7919 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7920
7921 stmt = gimple_build_cond_empty (cond);
7922 gsi = gsi_last_bb (cond_bb);
7923 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7924
7925 gsi = gsi_start_bb (then_bb);
59d5960c 7926 stmt = gimple_build_assign (tmp_var, *tp);
629b3d75
MJ
7927 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7928
7929 gsi = gsi_start_bb (else_bb);
59d5960c
TS
7930 if (is_gimple_omp_oacc (entry_stmt))
7931 stmt = gimple_build_assign (tmp_var,
7932 BIT_IOR_EXPR,
7933 *tp,
7934 build_int_cst (integer_type_node,
7935 GOACC_FLAG_HOST_FALLBACK));
7936 else
7937 stmt = gimple_build_assign (tmp_var,
7938 build_int_cst (integer_type_node,
7939 GOMP_DEVICE_HOST_FALLBACK));
629b3d75
MJ
7940 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7941
7942 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7943 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7944 add_bb_to_loop (then_bb, cond_bb->loop_father);
7945 add_bb_to_loop (else_bb, cond_bb->loop_father);
7946 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7947 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7948
59d5960c
TS
7949 *tp = tmp_var;
7950
65f4b875 7951 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
7952 }
7953 else
7954 {
65f4b875 7955 gsi = gsi_last_nondebug_bb (new_bb);
59d5960c
TS
7956
7957 if (device != NULL_TREE)
7958 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7959 true, GSI_SAME_STMT);
629b3d75
MJ
7960 }
7961
7962 t = gimple_omp_target_data_arg (entry_stmt);
7963 if (t == NULL)
7964 {
7965 t1 = size_zero_node;
7966 t2 = build_zero_cst (ptr_type_node);
7967 t3 = t2;
7968 t4 = t2;
7969 }
7970 else
7971 {
7972 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7973 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7974 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7975 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7976 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7977 }
7978
7979 gimple *g;
7980 bool tagging = false;
7981 /* The maximum number used by any start_ix, without varargs. */
7982 auto_vec<tree, 11> args;
59d5960c
TS
7983 if (is_gimple_omp_oacc (entry_stmt))
7984 {
7985 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
7986 TREE_TYPE (goacc_flags), goacc_flags);
7987 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
7988 NULL_TREE, true,
7989 GSI_SAME_STMT);
7990 args.quick_push (goacc_flags_m);
7991 }
7992 else
7993 args.quick_push (device);
629b3d75
MJ
7994 if (offloaded)
7995 args.quick_push (build_fold_addr_expr (child_fn));
7996 args.quick_push (t1);
7997 args.quick_push (t2);
7998 args.quick_push (t3);
7999 args.quick_push (t4);
8000 switch (start_ix)
8001 {
8002 case BUILT_IN_GOACC_DATA_START:
8003 case BUILT_IN_GOACC_DECLARE:
8004 case BUILT_IN_GOMP_TARGET_DATA:
8005 break;
8006 case BUILT_IN_GOMP_TARGET:
8007 case BUILT_IN_GOMP_TARGET_UPDATE:
8008 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8009 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8010 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8011 if (c)
8012 depend = OMP_CLAUSE_DECL (c);
8013 else
8014 depend = build_int_cst (ptr_type_node, 0);
8015 args.quick_push (depend);
8016 if (start_ix == BUILT_IN_GOMP_TARGET)
8017 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8018 break;
8019 case BUILT_IN_GOACC_PARALLEL:
25651634
TS
8020 oacc_set_fn_attrib (child_fn, clauses, &args);
8021 tagging = true;
629b3d75
MJ
8022 /* FALLTHRU */
8023 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8024 case BUILT_IN_GOACC_UPDATE:
8025 {
8026 tree t_async = NULL_TREE;
8027
8028 /* If present, use the value specified by the respective
8029 clause, making sure that is of the correct type. */
8030 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8031 if (c)
8032 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8033 integer_type_node,
8034 OMP_CLAUSE_ASYNC_EXPR (c));
8035 else if (!tagging)
8036 /* Default values for t_async. */
8037 t_async = fold_convert_loc (gimple_location (entry_stmt),
8038 integer_type_node,
8039 build_int_cst (integer_type_node,
8040 GOMP_ASYNC_SYNC));
8041 if (tagging && t_async)
8042 {
8043 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8044
8045 if (TREE_CODE (t_async) == INTEGER_CST)
8046 {
8047 /* See if we can pack the async arg in to the tag's
8048 operand. */
8049 i_async = TREE_INT_CST_LOW (t_async);
8050 if (i_async < GOMP_LAUNCH_OP_MAX)
8051 t_async = NULL_TREE;
8052 else
8053 i_async = GOMP_LAUNCH_OP_MAX;
8054 }
8055 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8056 i_async));
8057 }
8058 if (t_async)
8059 args.safe_push (t_async);
8060
8061 /* Save the argument index, and ... */
8062 unsigned t_wait_idx = args.length ();
8063 unsigned num_waits = 0;
8064 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8065 if (!tagging || c)
8066 /* ... push a placeholder. */
8067 args.safe_push (integer_zero_node);
8068
8069 for (; c; c = OMP_CLAUSE_CHAIN (c))
8070 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8071 {
8072 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8073 integer_type_node,
8074 OMP_CLAUSE_WAIT_EXPR (c)));
8075 num_waits++;
8076 }
8077
8078 if (!tagging || num_waits)
8079 {
8080 tree len;
8081
8082 /* Now that we know the number, update the placeholder. */
8083 if (tagging)
8084 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8085 else
8086 len = build_int_cst (integer_type_node, num_waits);
8087 len = fold_convert_loc (gimple_location (entry_stmt),
8088 unsigned_type_node, len);
8089 args[t_wait_idx] = len;
8090 }
8091 }
8092 break;
8093 default:
8094 gcc_unreachable ();
8095 }
8096 if (tagging)
8097 /* Push terminal marker - zero. */
8098 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8099
8100 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8101 gimple_set_location (g, gimple_location (entry_stmt));
8102 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8103 if (!offloaded)
8104 {
8105 g = gsi_stmt (gsi);
8106 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8107 gsi_remove (&gsi, true);
8108 }
8109 if (data_region && region->exit)
8110 {
65f4b875 8111 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
8112 g = gsi_stmt (gsi);
8113 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
8114 gsi_remove (&gsi, true);
8115 }
8116}
8117
8118/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8119 iteration variable derived from the thread number. INTRA_GROUP means this
8120 is an expansion of a loop iterating over work-items within a separate
01914336 8121 iteration over groups. */
629b3d75
MJ
8122
8123static void
8124grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8125{
8126 gimple_stmt_iterator gsi;
8127 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8128 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8129 == GF_OMP_FOR_KIND_GRID_LOOP);
8130 size_t collapse = gimple_omp_for_collapse (for_stmt);
8131 struct omp_for_data_loop *loops
8132 = XALLOCAVEC (struct omp_for_data_loop,
01914336 8133 gimple_omp_for_collapse (for_stmt));
629b3d75
MJ
8134 struct omp_for_data fd;
8135
8136 remove_edge (BRANCH_EDGE (kfor->entry));
8137 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8138
8139 gcc_assert (kfor->cont);
8140 omp_extract_for_data (for_stmt, &fd, loops);
8141
8142 gsi = gsi_start_bb (body_bb);
8143
8144 for (size_t dim = 0; dim < collapse; dim++)
8145 {
8146 tree type, itype;
8147 itype = type = TREE_TYPE (fd.loops[dim].v);
8148 if (POINTER_TYPE_P (type))
8149 itype = signed_type_for (type);
8150
8151 tree n1 = fd.loops[dim].n1;
8152 tree step = fd.loops[dim].step;
8153 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8154 true, NULL_TREE, true, GSI_SAME_STMT);
8155 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8156 true, NULL_TREE, true, GSI_SAME_STMT);
8157 tree threadid;
8158 if (gimple_omp_for_grid_group_iter (for_stmt))
8159 {
8160 gcc_checking_assert (!intra_group);
8161 threadid = build_call_expr (builtin_decl_explicit
8162 (BUILT_IN_HSA_WORKGROUPID), 1,
8163 build_int_cstu (unsigned_type_node, dim));
8164 }
8165 else if (intra_group)
8166 threadid = build_call_expr (builtin_decl_explicit
8167 (BUILT_IN_HSA_WORKITEMID), 1,
8168 build_int_cstu (unsigned_type_node, dim));
8169 else
8170 threadid = build_call_expr (builtin_decl_explicit
8171 (BUILT_IN_HSA_WORKITEMABSID), 1,
8172 build_int_cstu (unsigned_type_node, dim));
8173 threadid = fold_convert (itype, threadid);
8174 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8175 true, GSI_SAME_STMT);
8176
8177 tree startvar = fd.loops[dim].v;
8178 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8179 if (POINTER_TYPE_P (type))
8180 t = fold_build_pointer_plus (n1, t);
8181 else
8182 t = fold_build2 (PLUS_EXPR, type, t, n1);
8183 t = fold_convert (type, t);
8184 t = force_gimple_operand_gsi (&gsi, t,
8185 DECL_P (startvar)
8186 && TREE_ADDRESSABLE (startvar),
8187 NULL_TREE, true, GSI_SAME_STMT);
8188 gassign *assign_stmt = gimple_build_assign (startvar, t);
8189 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8190 }
01914336 8191 /* Remove the omp for statement. */
65f4b875 8192 gsi = gsi_last_nondebug_bb (kfor->entry);
629b3d75
MJ
8193 gsi_remove (&gsi, true);
8194
8195 /* Remove the GIMPLE_OMP_CONTINUE statement. */
65f4b875 8196 gsi = gsi_last_nondebug_bb (kfor->cont);
629b3d75
MJ
8197 gcc_assert (!gsi_end_p (gsi)
8198 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8199 gsi_remove (&gsi, true);
8200
8201 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
65f4b875 8202 gsi = gsi_last_nondebug_bb (kfor->exit);
629b3d75
MJ
8203 gcc_assert (!gsi_end_p (gsi)
8204 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8205 if (intra_group)
8206 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8207 gsi_remove (&gsi, true);
8208
8209 /* Fixup the much simpler CFG. */
8210 remove_edge (find_edge (kfor->cont, body_bb));
8211
8212 if (kfor->cont != body_bb)
8213 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8214 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8215}
8216
8217/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8218 argument_decls. */
8219
8220struct grid_arg_decl_map
8221{
8222 tree old_arg;
8223 tree new_arg;
8224};
8225
8226/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8227 pertaining to kernel function. */
8228
8229static tree
8230grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8231{
8232 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8233 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8234 tree t = *tp;
8235
8236 if (t == adm->old_arg)
8237 *tp = adm->new_arg;
8238 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8239 return NULL_TREE;
8240}
8241
8242/* If TARGET region contains a kernel body for loop, remove its region from the
01914336 8243 TARGET and expand it in HSA gridified kernel fashion. */
629b3d75
MJ
8244
8245static void
8246grid_expand_target_grid_body (struct omp_region *target)
8247{
8248 if (!hsa_gen_requested_p ())
8249 return;
8250
8251 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8252 struct omp_region **pp;
8253
8254 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8255 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8256 break;
8257
8258 struct omp_region *gpukernel = *pp;
8259
8260 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8261 if (!gpukernel)
8262 {
8263 /* HSA cannot handle OACC stuff. */
8264 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8265 return;
8266 gcc_checking_assert (orig_child_fndecl);
8267 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8268 OMP_CLAUSE__GRIDDIM_));
8269 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8270
8271 hsa_register_kernel (n);
8272 return;
8273 }
8274
8275 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8276 OMP_CLAUSE__GRIDDIM_));
01914336
MJ
8277 tree inside_block
8278 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
629b3d75
MJ
8279 *pp = gpukernel->next;
8280 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8281 if ((*pp)->type == GIMPLE_OMP_FOR)
8282 break;
8283
8284 struct omp_region *kfor = *pp;
8285 gcc_assert (kfor);
8286 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8287 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8288 *pp = kfor->next;
8289 if (kfor->inner)
8290 {
8291 if (gimple_omp_for_grid_group_iter (for_stmt))
8292 {
8293 struct omp_region **next_pp;
8294 for (pp = &kfor->inner; *pp; pp = next_pp)
8295 {
8296 next_pp = &(*pp)->next;
8297 if ((*pp)->type != GIMPLE_OMP_FOR)
8298 continue;
8299 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8300 gcc_assert (gimple_omp_for_kind (inner)
8301 == GF_OMP_FOR_KIND_GRID_LOOP);
8302 grid_expand_omp_for_loop (*pp, true);
8303 *pp = (*pp)->next;
8304 next_pp = pp;
8305 }
8306 }
8307 expand_omp (kfor->inner);
8308 }
8309 if (gpukernel->inner)
8310 expand_omp (gpukernel->inner);
8311
8312 tree kern_fndecl = copy_node (orig_child_fndecl);
7958186b
MP
8313 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8314 "kernel");
629b3d75
MJ
8315 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8316 tree tgtblock = gimple_block (tgt_stmt);
8317 tree fniniblock = make_node (BLOCK);
dc16b007 8318 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
629b3d75
MJ
8319 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8320 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8321 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8322 DECL_INITIAL (kern_fndecl) = fniniblock;
8323 push_struct_function (kern_fndecl);
8324 cfun->function_end_locus = gimple_location (tgt_stmt);
8325 init_tree_ssa (cfun);
8326 pop_cfun ();
8327
8328 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8329 gcc_assert (!DECL_CHAIN (old_parm_decl));
8330 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8331 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8332 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8333 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8334 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8335 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8336 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8337 kern_cfun->curr_properties = cfun->curr_properties;
8338
8339 grid_expand_omp_for_loop (kfor, false);
8340
01914336 8341 /* Remove the omp for statement. */
65f4b875 8342 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
629b3d75
MJ
8343 gsi_remove (&gsi, true);
8344 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8345 return. */
65f4b875 8346 gsi = gsi_last_nondebug_bb (gpukernel->exit);
629b3d75
MJ
8347 gcc_assert (!gsi_end_p (gsi)
8348 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8349 gimple *ret_stmt = gimple_build_return (NULL);
8350 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8351 gsi_remove (&gsi, true);
8352
8353 /* Statements in the first BB in the target construct have been produced by
8354 target lowering and must be copied inside the GPUKERNEL, with the two
8355 exceptions of the first OMP statement and the OMP_DATA assignment
8356 statement. */
8357 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8358 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8359 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8360 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8361 !gsi_end_p (tsi); gsi_next (&tsi))
8362 {
8363 gimple *stmt = gsi_stmt (tsi);
8364 if (is_gimple_omp (stmt))
8365 break;
8366 if (sender
8367 && is_gimple_assign (stmt)
8368 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8369 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8370 continue;
8371 gimple *copy = gimple_copy (stmt);
8372 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8373 gimple_set_block (copy, fniniblock);
8374 }
8375
8376 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8377 gpukernel->exit, inside_block);
8378
8379 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8380 kcn->mark_force_output ();
8381 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8382
8383 hsa_register_kernel (kcn, orig_child);
8384
8385 cgraph_node::add_new_function (kern_fndecl, true);
8386 push_cfun (kern_cfun);
8387 cgraph_edge::rebuild_edges ();
8388
8389 /* Re-map any mention of the PARM_DECL of the original function to the
8390 PARM_DECL of the new one.
8391
8392 TODO: It would be great if lowering produced references into the GPU
8393 kernel decl straight away and we did not have to do this. */
8394 struct grid_arg_decl_map adm;
8395 adm.old_arg = old_parm_decl;
8396 adm.new_arg = new_parm_decl;
8397 basic_block bb;
8398 FOR_EACH_BB_FN (bb, kern_cfun)
8399 {
8400 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8401 {
8402 gimple *stmt = gsi_stmt (gsi);
8403 struct walk_stmt_info wi;
8404 memset (&wi, 0, sizeof (wi));
8405 wi.info = &adm;
8406 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8407 }
8408 }
8409 pop_cfun ();
8410
8411 return;
8412}
8413
8414/* Expand the parallel region tree rooted at REGION. Expansion
8415 proceeds in depth-first order. Innermost regions are expanded
8416 first. This way, parallel regions that require a new function to
8417 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8418 internal dependencies in their body. */
8419
8420static void
8421expand_omp (struct omp_region *region)
8422{
8423 omp_any_child_fn_dumped = false;
8424 while (region)
8425 {
8426 location_t saved_location;
8427 gimple *inner_stmt = NULL;
8428
8429 /* First, determine whether this is a combined parallel+workshare
01914336 8430 region. */
629b3d75
MJ
8431 if (region->type == GIMPLE_OMP_PARALLEL)
8432 determine_parallel_type (region);
8433 else if (region->type == GIMPLE_OMP_TARGET)
8434 grid_expand_target_grid_body (region);
8435
8436 if (region->type == GIMPLE_OMP_FOR
8437 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8438 inner_stmt = last_stmt (region->inner->entry);
8439
8440 if (region->inner)
8441 expand_omp (region->inner);
8442
8443 saved_location = input_location;
8444 if (gimple_has_location (last_stmt (region->entry)))
8445 input_location = gimple_location (last_stmt (region->entry));
8446
8447 switch (region->type)
8448 {
8449 case GIMPLE_OMP_PARALLEL:
8450 case GIMPLE_OMP_TASK:
8451 expand_omp_taskreg (region);
8452 break;
8453
8454 case GIMPLE_OMP_FOR:
8455 expand_omp_for (region, inner_stmt);
8456 break;
8457
8458 case GIMPLE_OMP_SECTIONS:
8459 expand_omp_sections (region);
8460 break;
8461
8462 case GIMPLE_OMP_SECTION:
8463 /* Individual omp sections are handled together with their
8464 parent GIMPLE_OMP_SECTIONS region. */
8465 break;
8466
8467 case GIMPLE_OMP_SINGLE:
8468 expand_omp_single (region);
8469 break;
8470
8471 case GIMPLE_OMP_ORDERED:
8472 {
8473 gomp_ordered *ord_stmt
8474 = as_a <gomp_ordered *> (last_stmt (region->entry));
8475 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8476 OMP_CLAUSE_DEPEND))
8477 {
8478 /* We'll expand these when expanding corresponding
8479 worksharing region with ordered(n) clause. */
8480 gcc_assert (region->outer
8481 && region->outer->type == GIMPLE_OMP_FOR);
8482 region->ord_stmt = ord_stmt;
8483 break;
8484 }
8485 }
8486 /* FALLTHRU */
8487 case GIMPLE_OMP_MASTER:
8488 case GIMPLE_OMP_TASKGROUP:
8489 case GIMPLE_OMP_CRITICAL:
8490 case GIMPLE_OMP_TEAMS:
8491 expand_omp_synch (region);
8492 break;
8493
8494 case GIMPLE_OMP_ATOMIC_LOAD:
8495 expand_omp_atomic (region);
8496 break;
8497
8498 case GIMPLE_OMP_TARGET:
8499 expand_omp_target (region);
8500 break;
8501
8502 default:
8503 gcc_unreachable ();
8504 }
8505
8506 input_location = saved_location;
8507 region = region->next;
8508 }
8509 if (omp_any_child_fn_dumped)
8510 {
8511 if (dump_file)
8512 dump_function_header (dump_file, current_function_decl, dump_flags);
8513 omp_any_child_fn_dumped = false;
8514 }
8515}
8516
8517/* Helper for build_omp_regions. Scan the dominator tree starting at
8518 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8519 true, the function ends once a single tree is built (otherwise, whole
8520 forest of OMP constructs may be built). */
8521
8522static void
8523build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8524 bool single_tree)
8525{
8526 gimple_stmt_iterator gsi;
8527 gimple *stmt;
8528 basic_block son;
8529
65f4b875 8530 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
8531 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8532 {
8533 struct omp_region *region;
8534 enum gimple_code code;
8535
8536 stmt = gsi_stmt (gsi);
8537 code = gimple_code (stmt);
8538 if (code == GIMPLE_OMP_RETURN)
8539 {
8540 /* STMT is the return point out of region PARENT. Mark it
8541 as the exit point and make PARENT the immediately
8542 enclosing region. */
8543 gcc_assert (parent);
8544 region = parent;
8545 region->exit = bb;
8546 parent = parent->outer;
8547 }
8548 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8549 {
5764ee3c 8550 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
8551 GIMPLE_OMP_RETURN, but matches with
8552 GIMPLE_OMP_ATOMIC_LOAD. */
8553 gcc_assert (parent);
8554 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8555 region = parent;
8556 region->exit = bb;
8557 parent = parent->outer;
8558 }
8559 else if (code == GIMPLE_OMP_CONTINUE)
8560 {
8561 gcc_assert (parent);
8562 parent->cont = bb;
8563 }
8564 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8565 {
8566 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8567 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8568 }
8569 else
8570 {
8571 region = new_omp_region (bb, code, parent);
8572 /* Otherwise... */
8573 if (code == GIMPLE_OMP_TARGET)
8574 {
8575 switch (gimple_omp_target_kind (stmt))
8576 {
8577 case GF_OMP_TARGET_KIND_REGION:
8578 case GF_OMP_TARGET_KIND_DATA:
8579 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8580 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8581 case GF_OMP_TARGET_KIND_OACC_DATA:
8582 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8583 break;
8584 case GF_OMP_TARGET_KIND_UPDATE:
8585 case GF_OMP_TARGET_KIND_ENTER_DATA:
8586 case GF_OMP_TARGET_KIND_EXIT_DATA:
8587 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8588 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8589 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8590 /* ..., other than for those stand-alone directives... */
8591 region = NULL;
8592 break;
8593 default:
8594 gcc_unreachable ();
8595 }
8596 }
8597 else if (code == GIMPLE_OMP_ORDERED
8598 && omp_find_clause (gimple_omp_ordered_clauses
8599 (as_a <gomp_ordered *> (stmt)),
8600 OMP_CLAUSE_DEPEND))
8601 /* #pragma omp ordered depend is also just a stand-alone
8602 directive. */
8603 region = NULL;
28567c40
JJ
8604 else if (code == GIMPLE_OMP_TASK
8605 && gimple_omp_task_taskwait_p (stmt))
8606 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8607 region = NULL;
629b3d75
MJ
8608 /* ..., this directive becomes the parent for a new region. */
8609 if (region)
8610 parent = region;
8611 }
8612 }
8613
8614 if (single_tree && !parent)
8615 return;
8616
8617 for (son = first_dom_son (CDI_DOMINATORS, bb);
8618 son;
8619 son = next_dom_son (CDI_DOMINATORS, son))
8620 build_omp_regions_1 (son, parent, single_tree);
8621}
8622
8623/* Builds the tree of OMP regions rooted at ROOT, storing it to
8624 root_omp_region. */
8625
8626static void
8627build_omp_regions_root (basic_block root)
8628{
8629 gcc_assert (root_omp_region == NULL);
8630 build_omp_regions_1 (root, NULL, true);
8631 gcc_assert (root_omp_region != NULL);
8632}
8633
8634/* Expands omp construct (and its subconstructs) starting in HEAD. */
8635
8636void
8637omp_expand_local (basic_block head)
8638{
8639 build_omp_regions_root (head);
8640 if (dump_file && (dump_flags & TDF_DETAILS))
8641 {
8642 fprintf (dump_file, "\nOMP region tree\n\n");
8643 dump_omp_region (dump_file, root_omp_region, 0);
8644 fprintf (dump_file, "\n");
8645 }
8646
8647 remove_exit_barriers (root_omp_region);
8648 expand_omp (root_omp_region);
8649
8650 omp_free_regions ();
8651}
8652
8653/* Scan the CFG and build a tree of OMP regions. Return the root of
8654 the OMP region tree. */
8655
8656static void
8657build_omp_regions (void)
8658{
8659 gcc_assert (root_omp_region == NULL);
8660 calculate_dominance_info (CDI_DOMINATORS);
8661 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8662}
8663
8664/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8665
8666static unsigned int
8667execute_expand_omp (void)
8668{
8669 build_omp_regions ();
8670
8671 if (!root_omp_region)
8672 return 0;
8673
8674 if (dump_file)
8675 {
8676 fprintf (dump_file, "\nOMP region tree\n\n");
8677 dump_omp_region (dump_file, root_omp_region, 0);
8678 fprintf (dump_file, "\n");
8679 }
8680
8681 remove_exit_barriers (root_omp_region);
8682
8683 expand_omp (root_omp_region);
8684
8685 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8686 verify_loop_structure ();
8687 cleanup_tree_cfg ();
8688
8689 omp_free_regions ();
8690
8691 return 0;
8692}
8693
8694/* OMP expansion -- the default pass, run before creation of SSA form. */
8695
8696namespace {
8697
8698const pass_data pass_data_expand_omp =
8699{
8700 GIMPLE_PASS, /* type */
8701 "ompexp", /* name */
fd2b8c8b 8702 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
8703 TV_NONE, /* tv_id */
8704 PROP_gimple_any, /* properties_required */
8705 PROP_gimple_eomp, /* properties_provided */
8706 0, /* properties_destroyed */
8707 0, /* todo_flags_start */
8708 0, /* todo_flags_finish */
8709};
8710
8711class pass_expand_omp : public gimple_opt_pass
8712{
8713public:
8714 pass_expand_omp (gcc::context *ctxt)
8715 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8716 {}
8717
8718 /* opt_pass methods: */
8719 virtual unsigned int execute (function *)
8720 {
5e9d6aa4 8721 bool gate = ((flag_openacc != 0 || flag_openmp != 0
629b3d75
MJ
8722 || flag_openmp_simd != 0)
8723 && !seen_error ());
8724
8725 /* This pass always runs, to provide PROP_gimple_eomp.
8726 But often, there is nothing to do. */
8727 if (!gate)
8728 return 0;
8729
8730 return execute_expand_omp ();
8731 }
8732
8733}; // class pass_expand_omp
8734
8735} // anon namespace
8736
8737gimple_opt_pass *
8738make_pass_expand_omp (gcc::context *ctxt)
8739{
8740 return new pass_expand_omp (ctxt);
8741}
8742
8743namespace {
8744
8745const pass_data pass_data_expand_omp_ssa =
8746{
8747 GIMPLE_PASS, /* type */
8748 "ompexpssa", /* name */
fd2b8c8b 8749 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
8750 TV_NONE, /* tv_id */
8751 PROP_cfg | PROP_ssa, /* properties_required */
8752 PROP_gimple_eomp, /* properties_provided */
8753 0, /* properties_destroyed */
8754 0, /* todo_flags_start */
8755 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8756};
8757
8758class pass_expand_omp_ssa : public gimple_opt_pass
8759{
8760public:
8761 pass_expand_omp_ssa (gcc::context *ctxt)
8762 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8763 {}
8764
8765 /* opt_pass methods: */
8766 virtual bool gate (function *fun)
8767 {
8768 return !(fun->curr_properties & PROP_gimple_eomp);
8769 }
8770 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8771 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8772
8773}; // class pass_expand_omp_ssa
8774
8775} // anon namespace
8776
8777gimple_opt_pass *
8778make_pass_expand_omp_ssa (gcc::context *ctxt)
8779{
8780 return new pass_expand_omp_ssa (ctxt);
8781}
8782
8783/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8784 GIMPLE_* codes. */
8785
8786bool
8787omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8788 int *region_idx)
8789{
8790 gimple *last = last_stmt (bb);
8791 enum gimple_code code = gimple_code (last);
8792 struct omp_region *cur_region = *region;
8793 bool fallthru = false;
8794
8795 switch (code)
8796 {
8797 case GIMPLE_OMP_PARALLEL:
629b3d75
MJ
8798 case GIMPLE_OMP_FOR:
8799 case GIMPLE_OMP_SINGLE:
8800 case GIMPLE_OMP_TEAMS:
8801 case GIMPLE_OMP_MASTER:
8802 case GIMPLE_OMP_TASKGROUP:
8803 case GIMPLE_OMP_CRITICAL:
8804 case GIMPLE_OMP_SECTION:
8805 case GIMPLE_OMP_GRID_BODY:
8806 cur_region = new_omp_region (bb, code, cur_region);
8807 fallthru = true;
8808 break;
8809
28567c40
JJ
8810 case GIMPLE_OMP_TASK:
8811 cur_region = new_omp_region (bb, code, cur_region);
8812 fallthru = true;
8813 if (gimple_omp_task_taskwait_p (last))
8814 cur_region = cur_region->outer;
8815 break;
8816
629b3d75
MJ
8817 case GIMPLE_OMP_ORDERED:
8818 cur_region = new_omp_region (bb, code, cur_region);
8819 fallthru = true;
8820 if (omp_find_clause (gimple_omp_ordered_clauses
8821 (as_a <gomp_ordered *> (last)),
8822 OMP_CLAUSE_DEPEND))
8823 cur_region = cur_region->outer;
8824 break;
8825
8826 case GIMPLE_OMP_TARGET:
8827 cur_region = new_omp_region (bb, code, cur_region);
8828 fallthru = true;
8829 switch (gimple_omp_target_kind (last))
8830 {
8831 case GF_OMP_TARGET_KIND_REGION:
8832 case GF_OMP_TARGET_KIND_DATA:
8833 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8834 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8835 case GF_OMP_TARGET_KIND_OACC_DATA:
8836 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8837 break;
8838 case GF_OMP_TARGET_KIND_UPDATE:
8839 case GF_OMP_TARGET_KIND_ENTER_DATA:
8840 case GF_OMP_TARGET_KIND_EXIT_DATA:
8841 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8842 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8843 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8844 cur_region = cur_region->outer;
8845 break;
8846 default:
8847 gcc_unreachable ();
8848 }
8849 break;
8850
8851 case GIMPLE_OMP_SECTIONS:
8852 cur_region = new_omp_region (bb, code, cur_region);
8853 fallthru = true;
8854 break;
8855
8856 case GIMPLE_OMP_SECTIONS_SWITCH:
8857 fallthru = false;
8858 break;
8859
8860 case GIMPLE_OMP_ATOMIC_LOAD:
8861 case GIMPLE_OMP_ATOMIC_STORE:
8862 fallthru = true;
8863 break;
8864
8865 case GIMPLE_OMP_RETURN:
8866 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8867 somewhere other than the next block. This will be
8868 created later. */
8869 cur_region->exit = bb;
8870 if (cur_region->type == GIMPLE_OMP_TASK)
8871 /* Add an edge corresponding to not scheduling the task
8872 immediately. */
8873 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8874 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8875 cur_region = cur_region->outer;
8876 break;
8877
8878 case GIMPLE_OMP_CONTINUE:
8879 cur_region->cont = bb;
8880 switch (cur_region->type)
8881 {
8882 case GIMPLE_OMP_FOR:
8883 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8884 succs edges as abnormal to prevent splitting
8885 them. */
8886 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8887 /* Make the loopback edge. */
8888 make_edge (bb, single_succ (cur_region->entry),
8889 EDGE_ABNORMAL);
8890
8891 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8892 corresponds to the case that the body of the loop
8893 is not executed at all. */
8894 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8895 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8896 fallthru = false;
8897 break;
8898
8899 case GIMPLE_OMP_SECTIONS:
8900 /* Wire up the edges into and out of the nested sections. */
8901 {
8902 basic_block switch_bb = single_succ (cur_region->entry);
8903
8904 struct omp_region *i;
8905 for (i = cur_region->inner; i ; i = i->next)
8906 {
8907 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8908 make_edge (switch_bb, i->entry, 0);
8909 make_edge (i->exit, bb, EDGE_FALLTHRU);
8910 }
8911
8912 /* Make the loopback edge to the block with
8913 GIMPLE_OMP_SECTIONS_SWITCH. */
8914 make_edge (bb, switch_bb, 0);
8915
8916 /* Make the edge from the switch to exit. */
8917 make_edge (switch_bb, bb->next_bb, 0);
8918 fallthru = false;
8919 }
8920 break;
8921
8922 case GIMPLE_OMP_TASK:
8923 fallthru = true;
8924 break;
8925
8926 default:
8927 gcc_unreachable ();
8928 }
8929 break;
8930
8931 default:
8932 gcc_unreachable ();
8933 }
8934
8935 if (*region != cur_region)
8936 {
8937 *region = cur_region;
8938 if (cur_region)
8939 *region_idx = cur_region->entry->index;
8940 else
8941 *region_idx = 0;
8942 }
8943
8944 return fallthru;
8945}
8946
8947#include "gt-omp-expand.h"
This page took 2.298991 seconds and 5 git commands to generate.