]> gcc.gnu.org Git - gcc.git/blame - gcc/omp-expand.c
return auto_vec from cgraph_node::collect_callers
[gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
99dee823 5Copyright (C) 2005-2021 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
a895e6d7 55#include "alloc-pool.h"
629b3d75 56#include "symbol-summary.h"
629b3d75
MJ
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
314e6352
ML
59#include "stringpool.h"
60#include "attribs.h"
87d6dae3 61#include "tree-eh.h"
629b3d75
MJ
62
63/* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67struct omp_region
68{
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
0b887b75
JJ
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
106
629b3d75
MJ
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
110};
111
112static struct omp_region *root_omp_region;
113static bool omp_any_child_fn_dumped;
114
115static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117static gphi *find_phi_with_arg_on_edge (tree, edge);
118static void expand_omp (struct omp_region *region);
119
120/* Return true if REGION is a combined parallel+workshare region. */
121
122static inline bool
123is_combined_parallel (struct omp_region *region)
124{
125 return region->is_combined_parallel;
126}
127
128/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
138
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
141
142 Is lowered into:
143
01914336 144 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
153
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
158
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
164
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
169
170static bool
171workshare_safe_to_combine_p (basic_block ws_entry_bb)
172{
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
178
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
28567c40
JJ
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
629b3d75
MJ
182
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
189
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
200
201 return true;
202}
203
204/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
206
207static tree
208omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209{
28567c40 210 if (!simd_schedule || integer_zerop (chunk_size))
629b3d75
MJ
211 return chunk_size;
212
9d2f08ab
RS
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
629b3d75
MJ
215 return chunk_size;
216
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
222}
223
224/* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
227
228static vec<tree, va_gc> *
229get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230{
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
234
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236 {
237 struct omp_for_data fd;
238 tree n1, n2;
239
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
243
244 if (gimple_omp_for_combined_into_p (for_stmt))
245 {
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
255 }
256
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
261
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
264
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
267
268 if (fd.chunk_size)
269 {
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
273 }
274
275 return ws_args;
276 }
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278 {
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
287 }
288
289 gcc_unreachable ();
290}
291
292/* Discover whether REGION is a combined parallel+workshare region. */
293
294static void
295determine_parallel_type (struct omp_region *region)
296{
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
299
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
304
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
310
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
317
28567c40
JJ
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
324
629b3d75
MJ
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
331 {
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335 if (region->inner->type == GIMPLE_OMP_FOR)
336 {
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
28567c40 351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
6c7ae8c5 352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
8221c30b
JJ
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
28567c40 355 return;
629b3d75 356 }
28567c40 357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
8221c30b
JJ
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
28567c40 362 return;
629b3d75
MJ
363
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367 }
368}
369
370/* Debugging dumps for parallel regions. */
371void dump_omp_region (FILE *, struct omp_region *, int);
372void debug_omp_region (struct omp_region *);
373void debug_all_omp_regions (void);
374
375/* Dump the parallel region tree rooted at REGION. */
376
377void
378dump_omp_region (FILE *file, struct omp_region *region, int indent)
379{
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
382
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
385
386 if (region->cont)
387 {
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
390 }
391
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
400}
401
402DEBUG_FUNCTION void
403debug_omp_region (struct omp_region *region)
404{
405 dump_omp_region (stderr, region, 0);
406}
407
408DEBUG_FUNCTION void
409debug_all_omp_regions (void)
410{
411 dump_omp_region (stderr, root_omp_region, 0);
412}
413
414/* Create a new parallel region starting at STMT inside region PARENT. */
415
416static struct omp_region *
417new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
419{
420 struct omp_region *region = XCNEW (struct omp_region);
421
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
425
426 if (parent)
427 {
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
432 }
433 else
434 {
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
439 }
440
441 return region;
442}
443
444/* Release the memory associated with the region tree rooted at REGION. */
445
446static void
447free_omp_region_1 (struct omp_region *region)
448{
449 struct omp_region *i, *n;
450
451 for (i = region->inner; i ; i = n)
452 {
453 n = i->next;
454 free_omp_region_1 (i);
455 }
456
457 free (region);
458}
459
460/* Release the memory for the entire omp region tree. */
461
462void
463omp_free_regions (void)
464{
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
467 {
468 n = r->next;
469 free_omp_region_1 (r);
470 }
471 root_omp_region = NULL;
472}
473
474/* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
476
477static gcond *
478gimple_build_cond_empty (tree cond)
479{
480 enum tree_code pred_code;
481 tree lhs, rhs;
482
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485}
486
623c6df5
KB
487/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
491
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
494
495static void
4ccc4e30
JJ
496adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
623c6df5 498{
4ccc4e30
JJ
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
509 {
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
523 }
524
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
528
623c6df5
KB
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
530 {
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
623c6df5
KB
532 if (TREE_CODE (b) == BLOCK)
533 {
623c6df5
KB
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
536 }
537 }
538}
539
28567c40 540/* Build the function calls to GOMP_parallel etc to actually
629b3d75
MJ
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
546
547static void
548expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
551{
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
559
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
561
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
28567c40
JJ
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
629b3d75
MJ
569 {
570 switch (region->inner->type)
571 {
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
575 {
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
28567c40
JJ
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
629b3d75
MJ
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 592 if ((region->inner->sched_modifiers
0b887b75
JJ
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
629b3d75
MJ
595 {
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
598 }
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
603 }
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
612 }
613 }
614
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
620
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
624
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
627 {
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
630 }
631 else
632 clause_loc = gimple_location (entry_stmt);
633
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
637
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
640
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
644 {
645 cond = gimple_boolify (cond);
646
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
652 {
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
656
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
659 {
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
663 }
664 else
665 {
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
669 }
670
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
675
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
680
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
684
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
687
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
692
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
699
700 if (gimple_in_ssa_p (cfun))
701 {
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
705 }
706
707 val = tmp_join;
708 }
709
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
713 }
714
65f4b875 715 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
723
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
731
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
734
28567c40
JJ
735 if (rtmp)
736 {
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
741 }
629b3d75
MJ
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
629b3d75
MJ
744}
745
629b3d75
MJ
746/* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
748
749static void
750expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
752{
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
756
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
758
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
a6d22fb2 765 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
629b3d75
MJ
766
767 unsigned int iflags
768 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
769 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
770 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
771
772 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
773 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
774 tree num_tasks = NULL_TREE;
775 bool ull = false;
776 if (taskloop_p)
777 {
778 gimple *g = last_stmt (region->outer->entry);
779 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
780 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
781 struct omp_for_data fd;
782 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
783 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
784 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
785 OMP_CLAUSE__LOOPTEMP_);
786 startvar = OMP_CLAUSE_DECL (startvar);
787 endvar = OMP_CLAUSE_DECL (endvar);
788 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
789 if (fd.loop.cond_code == LT_EXPR)
790 iflags |= GOMP_TASK_FLAG_UP;
791 tree tclauses = gimple_omp_for_clauses (g);
792 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
793 if (num_tasks)
794 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
795 else
796 {
797 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
798 if (num_tasks)
799 {
800 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
801 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
802 }
803 else
804 num_tasks = integer_zero_node;
805 }
806 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
807 if (ifc == NULL_TREE)
808 iflags |= GOMP_TASK_FLAG_IF;
809 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
810 iflags |= GOMP_TASK_FLAG_NOGROUP;
811 ull = fd.iter_type == long_long_unsigned_type_node;
28567c40
JJ
812 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
813 iflags |= GOMP_TASK_FLAG_REDUCTION;
629b3d75 814 }
a6d22fb2
KCY
815 else
816 {
817 if (priority)
818 iflags |= GOMP_TASK_FLAG_PRIORITY;
819 if (detach)
820 iflags |= GOMP_TASK_FLAG_DETACH;
821 }
629b3d75
MJ
822
823 tree flags = build_int_cst (unsigned_type_node, iflags);
824
825 tree cond = boolean_true_node;
826 if (ifc)
827 {
828 if (taskloop_p)
829 {
830 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
831 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
832 build_int_cst (unsigned_type_node,
833 GOMP_TASK_FLAG_IF),
834 build_int_cst (unsigned_type_node, 0));
835 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
836 flags, t);
837 }
838 else
839 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
840 }
841
842 if (finalc)
843 {
844 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
845 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
846 build_int_cst (unsigned_type_node,
847 GOMP_TASK_FLAG_FINAL),
848 build_int_cst (unsigned_type_node, 0));
849 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
850 }
851 if (depend)
852 depend = OMP_CLAUSE_DECL (depend);
853 else
854 depend = build_int_cst (ptr_type_node, 0);
855 if (priority)
856 priority = fold_convert (integer_type_node,
857 OMP_CLAUSE_PRIORITY_EXPR (priority));
858 else
859 priority = integer_zero_node;
860
65f4b875 861 gsi = gsi_last_nondebug_bb (bb);
a6d22fb2
KCY
862
863 detach = (detach
864 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
865 : null_pointer_node);
866
629b3d75
MJ
867 tree t = gimple_omp_task_data_arg (entry_stmt);
868 if (t == NULL)
869 t2 = null_pointer_node;
870 else
871 t2 = build_fold_addr_expr_loc (loc, t);
872 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
873 t = gimple_omp_task_copy_fn (entry_stmt);
874 if (t == NULL)
875 t3 = null_pointer_node;
876 else
877 t3 = build_fold_addr_expr_loc (loc, t);
878
879 if (taskloop_p)
880 t = build_call_expr (ull
881 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
882 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
883 11, t1, t2, t3,
884 gimple_omp_task_arg_size (entry_stmt),
885 gimple_omp_task_arg_align (entry_stmt), flags,
886 num_tasks, priority, startvar, endvar, step);
887 else
888 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
a6d22fb2 889 10, t1, t2, t3,
629b3d75
MJ
890 gimple_omp_task_arg_size (entry_stmt),
891 gimple_omp_task_arg_align (entry_stmt), cond, flags,
a6d22fb2 892 depend, priority, detach);
629b3d75
MJ
893
894 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
895 false, GSI_CONTINUE_LINKING);
896}
897
28567c40
JJ
898/* Build the function call to GOMP_taskwait_depend to actually
899 generate the taskwait operation. BB is the block where to insert the
900 code. */
901
902static void
903expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
904{
905 tree clauses = gimple_omp_task_clauses (entry_stmt);
906 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
907 if (depend == NULL_TREE)
908 return;
909
910 depend = OMP_CLAUSE_DECL (depend);
911
912 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
913 tree t
914 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
915 1, depend);
916
917 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
918 false, GSI_CONTINUE_LINKING);
919}
920
921/* Build the function call to GOMP_teams_reg to actually
922 generate the host teams operation. REGION is the teams region
923 being expanded. BB is the block where to insert the code. */
924
925static void
926expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
927{
928 tree clauses = gimple_omp_teams_clauses (entry_stmt);
929 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
930 if (num_teams == NULL_TREE)
931 num_teams = build_int_cst (unsigned_type_node, 0);
932 else
933 {
934 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
935 num_teams = fold_convert (unsigned_type_node, num_teams);
936 }
937 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
938 if (thread_limit == NULL_TREE)
939 thread_limit = build_int_cst (unsigned_type_node, 0);
940 else
941 {
942 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
943 thread_limit = fold_convert (unsigned_type_node, thread_limit);
944 }
945
946 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
947 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
948 if (t == NULL)
949 t1 = null_pointer_node;
950 else
951 t1 = build_fold_addr_expr (t);
952 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
953 tree t2 = build_fold_addr_expr (child_fndecl);
954
28567c40
JJ
955 vec<tree, va_gc> *args;
956 vec_alloc (args, 5);
957 args->quick_push (t2);
958 args->quick_push (t1);
959 args->quick_push (num_teams);
960 args->quick_push (thread_limit);
961 /* For future extensibility. */
962 args->quick_push (build_zero_cst (unsigned_type_node));
963
964 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
965 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
966 args);
967
968 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
969 false, GSI_CONTINUE_LINKING);
970}
971
629b3d75
MJ
972/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
973
974static tree
975vec2chain (vec<tree, va_gc> *v)
976{
977 tree chain = NULL_TREE, t;
978 unsigned ix;
979
980 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
981 {
982 DECL_CHAIN (t) = chain;
983 chain = t;
984 }
985
986 return chain;
987}
988
989/* Remove barriers in REGION->EXIT's block. Note that this is only
990 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
991 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
992 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
993 removed. */
994
995static void
996remove_exit_barrier (struct omp_region *region)
997{
998 gimple_stmt_iterator gsi;
999 basic_block exit_bb;
1000 edge_iterator ei;
1001 edge e;
1002 gimple *stmt;
1003 int any_addressable_vars = -1;
1004
1005 exit_bb = region->exit;
1006
1007 /* If the parallel region doesn't return, we don't have REGION->EXIT
1008 block at all. */
1009 if (! exit_bb)
1010 return;
1011
1012 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1013 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1014 statements that can appear in between are extremely limited -- no
1015 memory operations at all. Here, we allow nothing at all, so the
1016 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
65f4b875 1017 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75 1018 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
65f4b875 1019 gsi_prev_nondebug (&gsi);
629b3d75
MJ
1020 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1021 return;
1022
1023 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1024 {
65f4b875 1025 gsi = gsi_last_nondebug_bb (e->src);
629b3d75
MJ
1026 if (gsi_end_p (gsi))
1027 continue;
1028 stmt = gsi_stmt (gsi);
1029 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1030 && !gimple_omp_return_nowait_p (stmt))
1031 {
1032 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1033 in many cases. If there could be tasks queued, the barrier
1034 might be needed to let the tasks run before some local
1035 variable of the parallel that the task uses as shared
1036 runs out of scope. The task can be spawned either
1037 from within current function (this would be easy to check)
1038 or from some function it calls and gets passed an address
1039 of such a variable. */
1040 if (any_addressable_vars < 0)
1041 {
1042 gomp_parallel *parallel_stmt
1043 = as_a <gomp_parallel *> (last_stmt (region->entry));
1044 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1045 tree local_decls, block, decl;
1046 unsigned ix;
1047
1048 any_addressable_vars = 0;
1049 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1050 if (TREE_ADDRESSABLE (decl))
1051 {
1052 any_addressable_vars = 1;
1053 break;
1054 }
1055 for (block = gimple_block (stmt);
1056 !any_addressable_vars
1057 && block
1058 && TREE_CODE (block) == BLOCK;
1059 block = BLOCK_SUPERCONTEXT (block))
1060 {
1061 for (local_decls = BLOCK_VARS (block);
1062 local_decls;
1063 local_decls = DECL_CHAIN (local_decls))
1064 if (TREE_ADDRESSABLE (local_decls))
1065 {
1066 any_addressable_vars = 1;
1067 break;
1068 }
1069 if (block == gimple_block (parallel_stmt))
1070 break;
1071 }
1072 }
1073 if (!any_addressable_vars)
1074 gimple_omp_return_set_nowait (stmt);
1075 }
1076 }
1077}
1078
1079static void
1080remove_exit_barriers (struct omp_region *region)
1081{
1082 if (region->type == GIMPLE_OMP_PARALLEL)
1083 remove_exit_barrier (region);
1084
1085 if (region->inner)
1086 {
1087 region = region->inner;
1088 remove_exit_barriers (region);
1089 while (region->next)
1090 {
1091 region = region->next;
1092 remove_exit_barriers (region);
1093 }
1094 }
1095}
1096
1097/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1098 calls. These can't be declared as const functions, but
1099 within one parallel body they are constant, so they can be
1100 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1101 which are declared const. Similarly for task body, except
1102 that in untied task omp_get_thread_num () can change at any task
1103 scheduling point. */
1104
1105static void
1106optimize_omp_library_calls (gimple *entry_stmt)
1107{
1108 basic_block bb;
1109 gimple_stmt_iterator gsi;
1110 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1111 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1112 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1113 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1114 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1115 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1116 OMP_CLAUSE_UNTIED) != NULL);
1117
1118 FOR_EACH_BB_FN (bb, cfun)
1119 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1120 {
1121 gimple *call = gsi_stmt (gsi);
1122 tree decl;
1123
1124 if (is_gimple_call (call)
1125 && (decl = gimple_call_fndecl (call))
1126 && DECL_EXTERNAL (decl)
1127 && TREE_PUBLIC (decl)
1128 && DECL_INITIAL (decl) == NULL)
1129 {
1130 tree built_in;
1131
1132 if (DECL_NAME (decl) == thr_num_id)
1133 {
1134 /* In #pragma omp task untied omp_get_thread_num () can change
1135 during the execution of the task region. */
1136 if (untied_task)
1137 continue;
1138 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1139 }
1140 else if (DECL_NAME (decl) == num_thr_id)
1141 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1142 else
1143 continue;
1144
1145 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1146 || gimple_call_num_args (call) != 0)
1147 continue;
1148
1149 if (flag_exceptions && !TREE_NOTHROW (decl))
1150 continue;
1151
1152 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1153 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1154 TREE_TYPE (TREE_TYPE (built_in))))
1155 continue;
1156
1157 gimple_call_set_fndecl (call, built_in);
1158 }
1159 }
1160}
1161
1162/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1163 regimplified. */
1164
1165static tree
1166expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1167{
1168 tree t = *tp;
1169
1170 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1171 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1172 return t;
1173
1174 if (TREE_CODE (t) == ADDR_EXPR)
1175 recompute_tree_invariant_for_addr_expr (t);
1176
1177 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1178 return NULL_TREE;
1179}
1180
1181/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1182
1183static void
1184expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1185 bool after)
1186{
1187 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1188 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1189 !after, after ? GSI_CONTINUE_LINKING
1190 : GSI_SAME_STMT);
1191 gimple *stmt = gimple_build_assign (to, from);
1192 if (after)
1193 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1194 else
1195 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1196 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1197 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1198 {
1199 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1200 gimple_regimplify_operands (stmt, &gsi);
1201 }
1202}
1203
1204/* Expand the OpenMP parallel or task directive starting at REGION. */
1205
1206static void
1207expand_omp_taskreg (struct omp_region *region)
1208{
1209 basic_block entry_bb, exit_bb, new_bb;
1210 struct function *child_cfun;
1211 tree child_fn, block, t;
1212 gimple_stmt_iterator gsi;
1213 gimple *entry_stmt, *stmt;
1214 edge e;
1215 vec<tree, va_gc> *ws_args;
1216
1217 entry_stmt = last_stmt (region->entry);
28567c40
JJ
1218 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1219 && gimple_omp_task_taskwait_p (entry_stmt))
1220 {
1221 new_bb = region->entry;
1222 gsi = gsi_last_nondebug_bb (region->entry);
1223 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1224 gsi_remove (&gsi, true);
1225 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1226 return;
1227 }
1228
629b3d75
MJ
1229 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1230 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1231
1232 entry_bb = region->entry;
1233 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1234 exit_bb = region->cont;
1235 else
1236 exit_bb = region->exit;
1237
5e9d6aa4 1238 if (is_combined_parallel (region))
629b3d75
MJ
1239 ws_args = region->ws_args;
1240 else
1241 ws_args = NULL;
1242
1243 if (child_cfun->cfg)
1244 {
1245 /* Due to inlining, it may happen that we have already outlined
1246 the region, in which case all we need to do is make the
1247 sub-graph unreachable and emit the parallel call. */
1248 edge entry_succ_e, exit_succ_e;
1249
1250 entry_succ_e = single_succ_edge (entry_bb);
1251
65f4b875 1252 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 1253 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1254 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1255 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
629b3d75
MJ
1256 gsi_remove (&gsi, true);
1257
1258 new_bb = entry_bb;
1259 if (exit_bb)
1260 {
1261 exit_succ_e = single_succ_edge (exit_bb);
1262 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1263 }
1264 remove_edge_and_dominated_blocks (entry_succ_e);
1265 }
1266 else
1267 {
1268 unsigned srcidx, dstidx, num;
1269
1270 /* If the parallel region needs data sent from the parent
1271 function, then the very first statement (except possible
1272 tree profile counter updates) of the parallel body
1273 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1274 &.OMP_DATA_O is passed as an argument to the child function,
1275 we need to replace it with the argument as seen by the child
1276 function.
1277
1278 In most cases, this will end up being the identity assignment
1279 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1280 a function call that has been inlined, the original PARM_DECL
1281 .OMP_DATA_I may have been converted into a different local
1282 variable. In which case, we need to keep the assignment. */
1283 if (gimple_omp_taskreg_data_arg (entry_stmt))
1284 {
1285 basic_block entry_succ_bb
1286 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1287 : FALLTHRU_EDGE (entry_bb)->dest;
1288 tree arg;
1289 gimple *parcopy_stmt = NULL;
1290
1291 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1292 {
1293 gimple *stmt;
1294
1295 gcc_assert (!gsi_end_p (gsi));
1296 stmt = gsi_stmt (gsi);
1297 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1298 continue;
1299
1300 if (gimple_num_ops (stmt) == 2)
1301 {
1302 tree arg = gimple_assign_rhs1 (stmt);
1303
1304 /* We're ignore the subcode because we're
1305 effectively doing a STRIP_NOPS. */
1306
1307 if (TREE_CODE (arg) == ADDR_EXPR
28567c40
JJ
1308 && (TREE_OPERAND (arg, 0)
1309 == gimple_omp_taskreg_data_arg (entry_stmt)))
629b3d75
MJ
1310 {
1311 parcopy_stmt = stmt;
1312 break;
1313 }
1314 }
1315 }
1316
1317 gcc_assert (parcopy_stmt != NULL);
1318 arg = DECL_ARGUMENTS (child_fn);
1319
1320 if (!gimple_in_ssa_p (cfun))
1321 {
1322 if (gimple_assign_lhs (parcopy_stmt) == arg)
1323 gsi_remove (&gsi, true);
1324 else
1325 {
01914336 1326 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1327 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1328 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1329 }
1330 }
1331 else
1332 {
1333 tree lhs = gimple_assign_lhs (parcopy_stmt);
1334 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1335 /* We'd like to set the rhs to the default def in the child_fn,
1336 but it's too early to create ssa names in the child_fn.
1337 Instead, we set the rhs to the parm. In
1338 move_sese_region_to_fn, we introduce a default def for the
1339 parm, map the parm to it's default def, and once we encounter
1340 this stmt, replace the parm with the default def. */
1341 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1342 update_stmt (parcopy_stmt);
1343 }
1344 }
1345
1346 /* Declare local variables needed in CHILD_CFUN. */
1347 block = DECL_INITIAL (child_fn);
1348 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1349 /* The gimplifier could record temporaries in parallel/task block
1350 rather than in containing function's local_decls chain,
1351 which would mean cgraph missed finalizing them. Do it now. */
1352 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1353 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1354 varpool_node::finalize_decl (t);
1355 DECL_SAVED_TREE (child_fn) = NULL;
1356 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1357 gimple_set_body (child_fn, NULL);
1358 TREE_USED (block) = 1;
1359
1360 /* Reset DECL_CONTEXT on function arguments. */
1361 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1362 DECL_CONTEXT (t) = child_fn;
1363
1364 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1365 so that it can be moved to the child function. */
65f4b875 1366 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1367 stmt = gsi_stmt (gsi);
1368 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1369 || gimple_code (stmt) == GIMPLE_OMP_TASK
1370 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
629b3d75
MJ
1371 e = split_block (entry_bb, stmt);
1372 gsi_remove (&gsi, true);
1373 entry_bb = e->dest;
1374 edge e2 = NULL;
28567c40 1375 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
629b3d75
MJ
1376 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1377 else
1378 {
1379 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1380 gcc_assert (e2->dest == region->exit);
1381 remove_edge (BRANCH_EDGE (entry_bb));
1382 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
65f4b875 1383 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
1384 gcc_assert (!gsi_end_p (gsi)
1385 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1386 gsi_remove (&gsi, true);
1387 }
1388
1389 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1390 if (exit_bb)
1391 {
65f4b875 1392 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
1393 gcc_assert (!gsi_end_p (gsi)
1394 && (gimple_code (gsi_stmt (gsi))
1395 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1396 stmt = gimple_build_return (NULL);
1397 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1398 gsi_remove (&gsi, true);
1399 }
1400
1401 /* Move the parallel region into CHILD_CFUN. */
1402
1403 if (gimple_in_ssa_p (cfun))
1404 {
1405 init_tree_ssa (child_cfun);
1406 init_ssa_operands (child_cfun);
1407 child_cfun->gimple_df->in_ssa_p = true;
1408 block = NULL_TREE;
1409 }
1410 else
1411 block = gimple_block (entry_stmt);
1412
1413 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1414 if (exit_bb)
1415 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1416 if (e2)
1417 {
1418 basic_block dest_bb = e2->dest;
1419 if (!exit_bb)
1420 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1421 remove_edge (e2);
1422 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1423 }
1424 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1425 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1426 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1427 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1428
1429 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1430 num = vec_safe_length (child_cfun->local_decls);
1431 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1432 {
1433 t = (*child_cfun->local_decls)[srcidx];
1434 if (DECL_CONTEXT (t) == cfun->decl)
1435 continue;
1436 if (srcidx != dstidx)
1437 (*child_cfun->local_decls)[dstidx] = t;
1438 dstidx++;
1439 }
1440 if (dstidx != num)
1441 vec_safe_truncate (child_cfun->local_decls, dstidx);
1442
1443 /* Inform the callgraph about the new function. */
1444 child_cfun->curr_properties = cfun->curr_properties;
1445 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1446 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1447 cgraph_node *node = cgraph_node::get_create (child_fn);
1448 node->parallelized_function = 1;
1449 cgraph_node::add_new_function (child_fn, true);
1450
1451 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1452 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1453
1454 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1455 fixed in a following pass. */
1456 push_cfun (child_cfun);
1457 if (need_asm)
9579db35 1458 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1459
1460 if (optimize)
1461 optimize_omp_library_calls (entry_stmt);
fc06ae0d 1462 update_max_bb_count ();
629b3d75
MJ
1463 cgraph_edge::rebuild_edges ();
1464
1465 /* Some EH regions might become dead, see PR34608. If
1466 pass_cleanup_cfg isn't the first pass to happen with the
1467 new child, these dead EH edges might cause problems.
1468 Clean them up now. */
1469 if (flag_exceptions)
1470 {
1471 basic_block bb;
1472 bool changed = false;
1473
1474 FOR_EACH_BB_FN (bb, cfun)
1475 changed |= gimple_purge_dead_eh_edges (bb);
1476 if (changed)
1477 cleanup_tree_cfg ();
1478 }
1479 if (gimple_in_ssa_p (cfun))
1480 update_ssa (TODO_update_ssa);
1481 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1482 verify_loop_structure ();
1483 pop_cfun ();
1484
1485 if (dump_file && !gimple_in_ssa_p (cfun))
1486 {
1487 omp_any_child_fn_dumped = true;
1488 dump_function_header (dump_file, child_fn, dump_flags);
1489 dump_function_to_file (child_fn, dump_file, dump_flags);
1490 }
1491 }
1492
4ccc4e30
JJ
1493 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1494
5e9d6aa4 1495 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
629b3d75
MJ
1496 expand_parallel_call (region, new_bb,
1497 as_a <gomp_parallel *> (entry_stmt), ws_args);
28567c40
JJ
1498 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1499 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
629b3d75
MJ
1500 else
1501 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1502 if (gimple_in_ssa_p (cfun))
1503 update_ssa (TODO_update_ssa_only_virtuals);
1504}
1505
1506/* Information about members of an OpenACC collapsed loop nest. */
1507
1508struct oacc_collapse
1509{
01914336 1510 tree base; /* Base value. */
629b3d75 1511 tree iters; /* Number of steps. */
02889d23
CLT
1512 tree step; /* Step size. */
1513 tree tile; /* Tile increment (if tiled). */
1514 tree outer; /* Tile iterator var. */
629b3d75
MJ
1515};
1516
1517/* Helper for expand_oacc_for. Determine collapsed loop information.
1518 Fill in COUNTS array. Emit any initialization code before GSI.
1519 Return the calculated outer loop bound of BOUND_TYPE. */
1520
1521static tree
1522expand_oacc_collapse_init (const struct omp_for_data *fd,
1523 gimple_stmt_iterator *gsi,
f324479c
TB
1524 oacc_collapse *counts, tree diff_type,
1525 tree bound_type, location_t loc)
629b3d75 1526{
02889d23 1527 tree tiling = fd->tiling;
629b3d75
MJ
1528 tree total = build_int_cst (bound_type, 1);
1529 int ix;
1530
1531 gcc_assert (integer_onep (fd->loop.step));
1532 gcc_assert (integer_zerop (fd->loop.n1));
1533
02889d23
CLT
1534 /* When tiling, the first operand of the tile clause applies to the
1535 innermost loop, and we work outwards from there. Seems
1536 backwards, but whatever. */
1537 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1538 {
1539 const omp_for_data_loop *loop = &fd->loops[ix];
1540
1541 tree iter_type = TREE_TYPE (loop->v);
629b3d75
MJ
1542 tree plus_type = iter_type;
1543
ac200799 1544 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
629b3d75
MJ
1545
1546 if (POINTER_TYPE_P (iter_type))
1547 plus_type = sizetype;
629b3d75 1548
02889d23
CLT
1549 if (tiling)
1550 {
1551 tree num = build_int_cst (integer_type_node, fd->collapse);
1552 tree loop_no = build_int_cst (integer_type_node, ix);
1553 tree tile = TREE_VALUE (tiling);
1554 gcall *call
1555 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1556 /* gwv-outer=*/integer_zero_node,
1557 /* gwv-inner=*/integer_zero_node);
1558
1559 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1560 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1561 gimple_call_set_lhs (call, counts[ix].tile);
1562 gimple_set_location (call, loc);
1563 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1564
1565 tiling = TREE_CHAIN (tiling);
1566 }
1567 else
1568 {
1569 counts[ix].tile = NULL;
1570 counts[ix].outer = loop->v;
1571 }
1572
629b3d75
MJ
1573 tree b = loop->n1;
1574 tree e = loop->n2;
1575 tree s = loop->step;
1576 bool up = loop->cond_code == LT_EXPR;
1577 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1578 bool negating;
1579 tree expr;
1580
1581 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1582 true, GSI_SAME_STMT);
1583 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1584 true, GSI_SAME_STMT);
1585
01914336 1586 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1587 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1588 if (negating)
1589 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1590 s = fold_convert (diff_type, s);
1591 if (negating)
1592 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1593 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1594 true, GSI_SAME_STMT);
1595
01914336 1596 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1597 negating = !up && TYPE_UNSIGNED (iter_type);
1598 expr = fold_build2 (MINUS_EXPR, plus_type,
1599 fold_convert (plus_type, negating ? b : e),
1600 fold_convert (plus_type, negating ? e : b));
1601 expr = fold_convert (diff_type, expr);
1602 if (negating)
1603 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1604 tree range = force_gimple_operand_gsi
1605 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1606
1607 /* Determine number of iterations. */
1608 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1609 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1610 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1611
1612 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1614
1615 counts[ix].base = b;
1616 counts[ix].iters = iters;
1617 counts[ix].step = s;
1618
1619 total = fold_build2 (MULT_EXPR, bound_type, total,
1620 fold_convert (bound_type, iters));
1621 }
1622
1623 return total;
1624}
1625
02889d23
CLT
1626/* Emit initializers for collapsed loop members. INNER is true if
1627 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1628 loop iteration variable, from which collapsed loop iteration values
1629 are calculated. COUNTS array has been initialized by
1630 expand_oacc_collapse_inits. */
1631
1632static void
02889d23 1633expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75 1634 gimple_stmt_iterator *gsi,
f324479c
TB
1635 const oacc_collapse *counts, tree ivar,
1636 tree diff_type)
629b3d75
MJ
1637{
1638 tree ivar_type = TREE_TYPE (ivar);
1639
1640 /* The most rapidly changing iteration variable is the innermost
1641 one. */
1642 for (int ix = fd->collapse; ix--;)
1643 {
1644 const omp_for_data_loop *loop = &fd->loops[ix];
1645 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1646 tree v = inner ? loop->v : collapse->outer;
1647 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1648 tree plus_type = iter_type;
1649 enum tree_code plus_code = PLUS_EXPR;
1650 tree expr;
1651
1652 if (POINTER_TYPE_P (iter_type))
1653 {
1654 plus_code = POINTER_PLUS_EXPR;
1655 plus_type = sizetype;
1656 }
1657
02889d23
CLT
1658 expr = ivar;
1659 if (ix)
1660 {
1661 tree mod = fold_convert (ivar_type, collapse->iters);
1662 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1663 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1664 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1665 true, GSI_SAME_STMT);
1666 }
1667
629b3d75 1668 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
f324479c 1669 fold_convert (diff_type, collapse->step));
02889d23
CLT
1670 expr = fold_build2 (plus_code, iter_type,
1671 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1672 fold_convert (plus_type, expr));
1673 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1674 true, GSI_SAME_STMT);
02889d23 1675 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1676 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1677 }
1678}
1679
1680/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1681 of the combined collapse > 1 loop constructs, generate code like:
1682 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1683 if (cond3 is <)
1684 adj = STEP3 - 1;
1685 else
1686 adj = STEP3 + 1;
1687 count3 = (adj + N32 - N31) / STEP3;
1688 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1689 if (cond2 is <)
1690 adj = STEP2 - 1;
1691 else
1692 adj = STEP2 + 1;
1693 count2 = (adj + N22 - N21) / STEP2;
1694 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1695 if (cond1 is <)
1696 adj = STEP1 - 1;
1697 else
1698 adj = STEP1 + 1;
1699 count1 = (adj + N12 - N11) / STEP1;
1700 count = count1 * count2 * count3;
1701 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1702 count = 0;
1703 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1704 of the combined loop constructs, just initialize COUNTS array
aed3ab25
JJ
1705 from the _looptemp_ clauses. For loop nests with non-rectangular
1706 loops, do this only for the rectangular loops. Then pick
1707 the loops which reference outer vars in their bound expressions
1708 and the loops which they refer to and for this sub-nest compute
c2ebf4f1
JJ
1709 number of iterations. For triangular loops use Faulhaber's formula,
1710 otherwise as a fallback, compute by iterating the loops.
aed3ab25
JJ
1711 If e.g. the sub-nest is
1712 for (I = N11; I COND1 N12; I += STEP1)
1713 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1714 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1715 do:
1716 COUNT = 0;
1717 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1718 for (tmpj = M21 * tmpi + N21;
1719 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1720 {
1721 int tmpk1 = M31 * tmpj + N31;
1722 int tmpk2 = M32 * tmpj + N32;
1723 if (tmpk1 COND3 tmpk2)
1724 {
1725 if (COND3 is <)
1726 adj = STEP3 - 1;
1727 else
1728 adj = STEP3 + 1;
1729 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1730 }
1731 }
1732 and finally multiply the counts of the rectangular loops not
1733 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1734 store number of iterations of the loops from fd->first_nonrect
1735 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1736 by the counts of rectangular loops not referenced in any non-rectangular
1737 loops sandwitched in between those. */
629b3d75
MJ
1738
1739/* NOTE: It *could* be better to moosh all of the BBs together,
1740 creating one larger BB with all the computation and the unexpected
1741 jump at the end. I.e.
1742
1743 bool zero3, zero2, zero1, zero;
1744
1745 zero3 = N32 c3 N31;
1746 count3 = (N32 - N31) /[cl] STEP3;
1747 zero2 = N22 c2 N21;
1748 count2 = (N22 - N21) /[cl] STEP2;
1749 zero1 = N12 c1 N11;
1750 count1 = (N12 - N11) /[cl] STEP1;
1751 zero = zero3 || zero2 || zero1;
1752 count = count1 * count2 * count3;
1753 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1754
1755 After all, we expect the zero=false, and thus we expect to have to
1756 evaluate all of the comparison expressions, so short-circuiting
1757 oughtn't be a win. Since the condition isn't protecting a
1758 denominator, we're not concerned about divide-by-zero, so we can
1759 fully evaluate count even if a numerator turned out to be wrong.
1760
1761 It seems like putting this all together would create much better
1762 scheduling opportunities, and less pressure on the chip's branch
1763 predictor. */
1764
1765static void
1766expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1767 basic_block &entry_bb, tree *counts,
1768 basic_block &zero_iter1_bb, int &first_zero_iter1,
1769 basic_block &zero_iter2_bb, int &first_zero_iter2,
1770 basic_block &l2_dom_bb)
1771{
1772 tree t, type = TREE_TYPE (fd->loop.v);
1773 edge e, ne;
1774 int i;
1775
1776 /* Collapsed loops need work for expansion into SSA form. */
1777 gcc_assert (!gimple_in_ssa_p (cfun));
1778
1779 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1780 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1781 {
1782 gcc_assert (fd->ordered == 0);
1783 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1784 isn't supposed to be handled, as the inner loop doesn't
1785 use it. */
1786 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1787 OMP_CLAUSE__LOOPTEMP_);
1788 gcc_assert (innerc);
1789 for (i = 0; i < fd->collapse; i++)
1790 {
1791 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1792 OMP_CLAUSE__LOOPTEMP_);
1793 gcc_assert (innerc);
1794 if (i)
1795 counts[i] = OMP_CLAUSE_DECL (innerc);
1796 else
1797 counts[0] = NULL_TREE;
1798 }
14707c89
JJ
1799 if (fd->non_rect
1800 && fd->last_nonrect == fd->first_nonrect + 1
1801 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1802 {
1803 tree c[4];
1804 for (i = 0; i < 4; i++)
1805 {
1806 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1807 OMP_CLAUSE__LOOPTEMP_);
1808 gcc_assert (innerc);
1809 c[i] = OMP_CLAUSE_DECL (innerc);
1810 }
1811 counts[0] = c[0];
1812 fd->first_inner_iterations = c[1];
1813 fd->factor = c[2];
1814 fd->adjn1 = c[3];
1815 }
629b3d75
MJ
1816 return;
1817 }
1818
1819 for (i = fd->collapse; i < fd->ordered; i++)
1820 {
1821 tree itype = TREE_TYPE (fd->loops[i].v);
1822 counts[i] = NULL_TREE;
1823 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1824 fold_convert (itype, fd->loops[i].n1),
1825 fold_convert (itype, fd->loops[i].n2));
1826 if (t && integer_zerop (t))
1827 {
1828 for (i = fd->collapse; i < fd->ordered; i++)
1829 counts[i] = build_int_cst (type, 0);
1830 break;
1831 }
1832 }
aed3ab25 1833 bool rect_count_seen = false;
629b3d75
MJ
1834 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1835 {
1836 tree itype = TREE_TYPE (fd->loops[i].v);
1837
1838 if (i >= fd->collapse && counts[i])
1839 continue;
aed3ab25
JJ
1840 if (fd->non_rect)
1841 {
1842 /* Skip loops that use outer iterators in their expressions
1843 during this phase. */
1844 if (fd->loops[i].m1 || fd->loops[i].m2)
1845 {
1846 counts[i] = build_zero_cst (type);
1847 continue;
1848 }
1849 }
629b3d75
MJ
1850 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1851 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1852 fold_convert (itype, fd->loops[i].n1),
1853 fold_convert (itype, fd->loops[i].n2)))
1854 == NULL_TREE || !integer_onep (t)))
1855 {
1856 gcond *cond_stmt;
1857 tree n1, n2;
1858 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1859 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1860 true, GSI_SAME_STMT);
1861 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1862 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1863 true, GSI_SAME_STMT);
1864 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1865 NULL_TREE, NULL_TREE);
1866 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1867 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1868 expand_omp_regimplify_p, NULL, NULL)
1869 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1870 expand_omp_regimplify_p, NULL, NULL))
1871 {
1872 *gsi = gsi_for_stmt (cond_stmt);
1873 gimple_regimplify_operands (cond_stmt, gsi);
1874 }
1875 e = split_block (entry_bb, cond_stmt);
1876 basic_block &zero_iter_bb
1877 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1878 int &first_zero_iter
1879 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1880 if (zero_iter_bb == NULL)
1881 {
1882 gassign *assign_stmt;
1883 first_zero_iter = i;
1884 zero_iter_bb = create_empty_bb (entry_bb);
1885 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1886 *gsi = gsi_after_labels (zero_iter_bb);
1887 if (i < fd->collapse)
1888 assign_stmt = gimple_build_assign (fd->loop.n2,
1889 build_zero_cst (type));
1890 else
1891 {
1892 counts[i] = create_tmp_reg (type, ".count");
1893 assign_stmt
1894 = gimple_build_assign (counts[i], build_zero_cst (type));
1895 }
1896 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1897 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1898 entry_bb);
1899 }
1900 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1901 ne->probability = profile_probability::very_unlikely ();
629b3d75 1902 e->flags = EDGE_TRUE_VALUE;
357067f2 1903 e->probability = ne->probability.invert ();
629b3d75
MJ
1904 if (l2_dom_bb == NULL)
1905 l2_dom_bb = entry_bb;
1906 entry_bb = e->dest;
65f4b875 1907 *gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1908 }
1909
1910 if (POINTER_TYPE_P (itype))
1911 itype = signed_type_for (itype);
1912 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1913 ? -1 : 1));
1914 t = fold_build2 (PLUS_EXPR, itype,
1915 fold_convert (itype, fd->loops[i].step), t);
1916 t = fold_build2 (PLUS_EXPR, itype, t,
1917 fold_convert (itype, fd->loops[i].n2));
1918 t = fold_build2 (MINUS_EXPR, itype, t,
1919 fold_convert (itype, fd->loops[i].n1));
1920 /* ?? We could probably use CEIL_DIV_EXPR instead of
1921 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1922 generate the same code in the end because generically we
1923 don't know that the values involved must be negative for
1924 GT?? */
1925 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1926 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1927 fold_build1 (NEGATE_EXPR, itype, t),
1928 fold_build1 (NEGATE_EXPR, itype,
1929 fold_convert (itype,
1930 fd->loops[i].step)));
1931 else
1932 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1933 fold_convert (itype, fd->loops[i].step));
1934 t = fold_convert (type, t);
1935 if (TREE_CODE (t) == INTEGER_CST)
1936 counts[i] = t;
1937 else
1938 {
1939 if (i < fd->collapse || i != first_zero_iter2)
1940 counts[i] = create_tmp_reg (type, ".count");
1941 expand_omp_build_assign (gsi, counts[i], t);
1942 }
1943 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1944 {
aed3ab25
JJ
1945 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1946 continue;
1947 if (!rect_count_seen)
1948 {
1949 t = counts[i];
1950 rect_count_seen = true;
1951 }
629b3d75
MJ
1952 else
1953 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1954 expand_omp_build_assign (gsi, fd->loop.n2, t);
1955 }
1956 }
aed3ab25
JJ
1957 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1958 {
1959 gcc_assert (fd->last_nonrect != -1);
1960
aed3ab25
JJ
1961 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1962 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1963 build_zero_cst (type));
29e0ad45
JJ
1964 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1965 if (fd->loops[i].m1
1966 || fd->loops[i].m2
1967 || fd->loops[i].non_rect_referenced)
1968 break;
1969 if (i == fd->last_nonrect
1970 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1971 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
aed3ab25 1972 {
29e0ad45
JJ
1973 int o = fd->first_nonrect;
1974 tree itype = TREE_TYPE (fd->loops[o].v);
1975 tree n1o = create_tmp_reg (itype, ".n1o");
1976 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1977 expand_omp_build_assign (gsi, n1o, t);
1978 tree n2o = create_tmp_reg (itype, ".n2o");
1979 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1980 expand_omp_build_assign (gsi, n2o, t);
1981 if (fd->loops[i].m1 && fd->loops[i].m2)
1982 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1983 unshare_expr (fd->loops[i].m1));
1984 else if (fd->loops[i].m1)
1985 t = fold_unary (NEGATE_EXPR, itype,
1986 unshare_expr (fd->loops[i].m1));
1987 else
1988 t = unshare_expr (fd->loops[i].m2);
1989 tree m2minusm1
1990 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1991 true, GSI_SAME_STMT);
aed3ab25 1992
29e0ad45
JJ
1993 gimple_stmt_iterator gsi2 = *gsi;
1994 gsi_prev (&gsi2);
1995 e = split_block (entry_bb, gsi_stmt (gsi2));
1996 e = split_block (e->dest, (gimple *) NULL);
1997 basic_block bb1 = e->src;
1998 entry_bb = e->dest;
1999 *gsi = gsi_after_labels (entry_bb);
aed3ab25 2000
29e0ad45
JJ
2001 gsi2 = gsi_after_labels (bb1);
2002 tree ostep = fold_convert (itype, fd->loops[o].step);
2003 t = build_int_cst (itype, (fd->loops[o].cond_code
2004 == LT_EXPR ? -1 : 1));
2005 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2006 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2007 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2008 if (TYPE_UNSIGNED (itype)
2009 && fd->loops[o].cond_code == GT_EXPR)
2010 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2011 fold_build1 (NEGATE_EXPR, itype, t),
2012 fold_build1 (NEGATE_EXPR, itype, ostep));
2013 else
2014 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2015 tree outer_niters
2016 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2017 true, GSI_SAME_STMT);
2018 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2019 build_one_cst (itype));
2020 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2021 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2022 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2023 true, GSI_SAME_STMT);
2024 tree n1, n2, n1e, n2e;
aed3ab25
JJ
2025 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2026 if (fd->loops[i].m1)
2027 {
2028 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
29e0ad45 2029 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
aed3ab25
JJ
2030 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2031 }
2032 else
2033 n1 = t;
2034 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2035 true, GSI_SAME_STMT);
aed3ab25
JJ
2036 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2037 if (fd->loops[i].m2)
2038 {
2039 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
29e0ad45 2040 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
aed3ab25
JJ
2041 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2042 }
2043 else
2044 n2 = t;
2045 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2046 true, GSI_SAME_STMT);
29e0ad45
JJ
2047 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2048 if (fd->loops[i].m1)
aed3ab25 2049 {
29e0ad45
JJ
2050 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2051 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2052 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2053 }
2054 else
2055 n1e = t;
2056 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2057 true, GSI_SAME_STMT);
2058 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2059 if (fd->loops[i].m2)
2060 {
2061 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2062 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2063 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2064 }
2065 else
2066 n2e = t;
2067 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2068 true, GSI_SAME_STMT);
2069 gcond *cond_stmt
2070 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2071 NULL_TREE, NULL_TREE);
2072 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2073 e = split_block (bb1, cond_stmt);
2074 e->flags = EDGE_TRUE_VALUE;
2075 e->probability = profile_probability::likely ().guessed ();
2076 basic_block bb2 = e->dest;
2077 gsi2 = gsi_after_labels (bb2);
2078
2079 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2080 NULL_TREE, NULL_TREE);
2081 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2082 e = split_block (bb2, cond_stmt);
2083 e->flags = EDGE_TRUE_VALUE;
2084 e->probability = profile_probability::likely ().guessed ();
2085 gsi2 = gsi_after_labels (e->dest);
2086
2087 tree step = fold_convert (itype, fd->loops[i].step);
2088 t = build_int_cst (itype, (fd->loops[i].cond_code
2089 == LT_EXPR ? -1 : 1));
2090 t = fold_build2 (PLUS_EXPR, itype, step, t);
2091 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2092 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2093 if (TYPE_UNSIGNED (itype)
2094 && fd->loops[i].cond_code == GT_EXPR)
2095 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2096 fold_build1 (NEGATE_EXPR, itype, t),
2097 fold_build1 (NEGATE_EXPR, itype, step));
2098 else
2099 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2100 tree first_inner_iterations
2101 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2102 true, GSI_SAME_STMT);
2103 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2104 if (TYPE_UNSIGNED (itype)
2105 && fd->loops[i].cond_code == GT_EXPR)
2106 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2107 fold_build1 (NEGATE_EXPR, itype, t),
2108 fold_build1 (NEGATE_EXPR, itype, step));
2109 else
2110 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2111 tree factor
2112 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2113 true, GSI_SAME_STMT);
2114 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2115 build_one_cst (itype));
2116 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2117 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2118 t = fold_build2 (MULT_EXPR, itype, factor, t);
2119 t = fold_build2 (PLUS_EXPR, itype,
2120 fold_build2 (MULT_EXPR, itype, outer_niters,
2121 first_inner_iterations), t);
2122 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2123 fold_convert (type, t));
2124
2125 basic_block bb3 = create_empty_bb (bb1);
2126 add_bb_to_loop (bb3, bb1->loop_father);
2127
2128 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2129 e->probability = profile_probability::unlikely ().guessed ();
2130
2131 gsi2 = gsi_after_labels (bb3);
2132 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2133 NULL_TREE, NULL_TREE);
2134 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2135 e = split_block (bb3, cond_stmt);
2136 e->flags = EDGE_TRUE_VALUE;
2137 e->probability = profile_probability::likely ().guessed ();
2138 basic_block bb4 = e->dest;
2139
2140 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2141 ne->probability = e->probability.invert ();
2142
2143 basic_block bb5 = create_empty_bb (bb2);
2144 add_bb_to_loop (bb5, bb2->loop_father);
2145
2146 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2147 ne->probability = profile_probability::unlikely ().guessed ();
2148
2149 for (int j = 0; j < 2; j++)
2150 {
2151 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2152 t = fold_build2 (MINUS_EXPR, itype,
2153 unshare_expr (fd->loops[i].n1),
2154 unshare_expr (fd->loops[i].n2));
2155 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2156 tree tem
2157 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2158 true, GSI_SAME_STMT);
2159 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2160 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2161 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2162 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2163 true, GSI_SAME_STMT);
2164 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2165 if (fd->loops[i].m1)
2166 {
2167 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2168 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2169 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2170 }
2171 else
2172 n1 = t;
2173 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2174 true, GSI_SAME_STMT);
2175 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2176 if (fd->loops[i].m2)
2177 {
2178 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2179 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2180 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2181 }
2182 else
2183 n2 = t;
2184 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2185 true, GSI_SAME_STMT);
2186 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2187
2188 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2189 NULL_TREE, NULL_TREE);
aed3ab25 2190 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
29e0ad45
JJ
2191 e = split_block (gsi_bb (gsi2), cond_stmt);
2192 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2193 e->probability = profile_probability::unlikely ().guessed ();
2194 ne = make_edge (e->src, bb1,
2195 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
aed3ab25
JJ
2196 ne->probability = e->probability.invert ();
2197 gsi2 = gsi_after_labels (e->dest);
2198
29e0ad45
JJ
2199 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2200 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
aed3ab25 2201
29e0ad45
JJ
2202 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2203 }
aed3ab25 2204
29e0ad45
JJ
2205 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2206 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2207 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
325714b4
JJ
2208
2209 if (fd->first_nonrect + 1 == fd->last_nonrect)
2210 {
2211 fd->first_inner_iterations = first_inner_iterations;
2212 fd->factor = factor;
2213 fd->adjn1 = n1o;
2214 }
29e0ad45
JJ
2215 }
2216 else
2217 {
2218 /* Fallback implementation. Evaluate the loops with m1/m2
2219 non-NULL as well as their outer loops at runtime using temporaries
2220 instead of the original iteration variables, and in the
2221 body just bump the counter. */
2222 gimple_stmt_iterator gsi2 = *gsi;
2223 gsi_prev (&gsi2);
2224 e = split_block (entry_bb, gsi_stmt (gsi2));
2225 e = split_block (e->dest, (gimple *) NULL);
2226 basic_block cur_bb = e->src;
2227 basic_block next_bb = e->dest;
2228 entry_bb = e->dest;
2229 *gsi = gsi_after_labels (entry_bb);
aed3ab25 2230
29e0ad45
JJ
2231 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2232 memset (vs, 0, fd->last_nonrect * sizeof (tree));
aed3ab25 2233
29e0ad45 2234 for (i = 0; i <= fd->last_nonrect; i++)
aed3ab25 2235 {
29e0ad45
JJ
2236 if (fd->loops[i].m1 == NULL_TREE
2237 && fd->loops[i].m2 == NULL_TREE
2238 && !fd->loops[i].non_rect_referenced)
2239 continue;
2240
2241 tree itype = TREE_TYPE (fd->loops[i].v);
2242
2243 gsi2 = gsi_after_labels (cur_bb);
2244 tree n1, n2;
2245 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2246 if (fd->loops[i].m1)
2247 {
2248 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2249 n1 = fold_build2 (MULT_EXPR, itype,
2250 vs[i - fd->loops[i].outer], n1);
2251 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2252 }
2253 else
2254 n1 = t;
2255 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2256 true, GSI_SAME_STMT);
2257 if (i < fd->last_nonrect)
2258 {
2259 vs[i] = create_tmp_reg (itype, ".it");
2260 expand_omp_build_assign (&gsi2, vs[i], n1);
2261 }
2262 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2263 if (fd->loops[i].m2)
2264 {
2265 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2266 n2 = fold_build2 (MULT_EXPR, itype,
2267 vs[i - fd->loops[i].outer], n2);
2268 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2269 }
2270 else
2271 n2 = t;
2272 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2273 true, GSI_SAME_STMT);
2274 if (i == fd->last_nonrect)
2275 {
2276 gcond *cond_stmt
2277 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2278 NULL_TREE, NULL_TREE);
2279 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2280 e = split_block (cur_bb, cond_stmt);
2281 e->flags = EDGE_TRUE_VALUE;
2282 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2283 e->probability = profile_probability::likely ().guessed ();
2284 ne->probability = e->probability.invert ();
2285 gsi2 = gsi_after_labels (e->dest);
2286
2287 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2288 ? -1 : 1));
2289 t = fold_build2 (PLUS_EXPR, itype,
2290 fold_convert (itype, fd->loops[i].step), t);
2291 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2292 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2293 tree step = fold_convert (itype, fd->loops[i].step);
2294 if (TYPE_UNSIGNED (itype)
2295 && fd->loops[i].cond_code == GT_EXPR)
2296 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2297 fold_build1 (NEGATE_EXPR, itype, t),
2298 fold_build1 (NEGATE_EXPR, itype, step));
2299 else
2300 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2301 t = fold_convert (type, t);
2302 t = fold_build2 (PLUS_EXPR, type,
2303 counts[fd->last_nonrect], t);
2304 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2305 true, GSI_SAME_STMT);
2306 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2307 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2308 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2309 break;
2310 }
2311 e = split_block (cur_bb, last_stmt (cur_bb));
2312
2313 basic_block new_cur_bb = create_empty_bb (cur_bb);
2314 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2315
2316 gsi2 = gsi_after_labels (e->dest);
2317 tree step = fold_convert (itype,
2318 unshare_expr (fd->loops[i].step));
2319 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2320 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2321 true, GSI_SAME_STMT);
2322 expand_omp_build_assign (&gsi2, vs[i], t);
2323
2324 ne = split_block (e->dest, last_stmt (e->dest));
2325 gsi2 = gsi_after_labels (ne->dest);
2326
2327 gcond *cond_stmt
2328 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2329 NULL_TREE, NULL_TREE);
2330 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2331 edge e3, e4;
2332 if (next_bb == entry_bb)
2333 {
2334 e3 = find_edge (ne->dest, next_bb);
2335 e3->flags = EDGE_FALSE_VALUE;
2336 }
2337 else
2338 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2339 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2340 e4->probability = profile_probability::likely ().guessed ();
2341 e3->probability = e4->probability.invert ();
2342 basic_block esrc = e->src;
2343 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2344 cur_bb = new_cur_bb;
2345 basic_block latch_bb = next_bb;
2346 next_bb = e->dest;
2347 remove_edge (e);
2348 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2349 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2350 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
aed3ab25 2351 }
aed3ab25
JJ
2352 }
2353 t = NULL_TREE;
2354 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2355 if (!fd->loops[i].non_rect_referenced
2356 && fd->loops[i].m1 == NULL_TREE
2357 && fd->loops[i].m2 == NULL_TREE)
2358 {
2359 if (t == NULL_TREE)
2360 t = counts[i];
2361 else
2362 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2363 }
2364 if (t)
2365 {
2366 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2367 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2368 }
2369 if (!rect_count_seen)
2370 t = counts[fd->last_nonrect];
2371 else
2372 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2373 counts[fd->last_nonrect]);
2374 expand_omp_build_assign (gsi, fd->loop.n2, t);
2375 }
2376 else if (fd->non_rect)
2377 {
2378 tree t = fd->loop.n2;
2379 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2380 int non_rect_referenced = 0, non_rect = 0;
2381 for (i = 0; i < fd->collapse; i++)
2382 {
5acef69f 2383 if ((i < fd->first_nonrect || i > fd->last_nonrect)
aed3ab25
JJ
2384 && !integer_zerop (counts[i]))
2385 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2386 if (fd->loops[i].non_rect_referenced)
2387 non_rect_referenced++;
2388 if (fd->loops[i].m1 || fd->loops[i].m2)
2389 non_rect++;
2390 }
2391 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2392 counts[fd->last_nonrect] = t;
2393 }
629b3d75
MJ
2394}
2395
2396/* Helper function for expand_omp_{for_*,simd}. Generate code like:
2397 T = V;
2398 V3 = N31 + (T % count3) * STEP3;
2399 T = T / count3;
2400 V2 = N21 + (T % count2) * STEP2;
2401 T = T / count2;
2402 V1 = N11 + T * STEP1;
2403 if this loop doesn't have an inner loop construct combined with it.
2404 If it does have an inner loop construct combined with it and the
2405 iteration count isn't known constant, store values from counts array
aed3ab25
JJ
2406 into its _looptemp_ temporaries instead.
2407 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2408 inclusive), use the count of all those loops together, and either
c2ebf4f1 2409 find quadratic etc. equation roots, or as a fallback, do:
aed3ab25
JJ
2410 COUNT = 0;
2411 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2412 for (tmpj = M21 * tmpi + N21;
2413 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2414 {
2415 int tmpk1 = M31 * tmpj + N31;
2416 int tmpk2 = M32 * tmpj + N32;
2417 if (tmpk1 COND3 tmpk2)
2418 {
2419 if (COND3 is <)
2420 adj = STEP3 - 1;
2421 else
2422 adj = STEP3 + 1;
2423 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2424 if (COUNT + temp > T)
2425 {
2426 V1 = tmpi;
2427 V2 = tmpj;
2428 V3 = tmpk1 + (T - COUNT) * STEP3;
2429 goto done;
2430 }
2431 else
2432 COUNT += temp;
2433 }
2434 }
2435 done:;
2436 but for optional innermost or outermost rectangular loops that aren't
2437 referenced by other loop expressions keep doing the division/modulo. */
629b3d75
MJ
2438
2439static void
2440expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
aed3ab25
JJ
2441 tree *counts, tree *nonrect_bounds,
2442 gimple *inner_stmt, tree startvar)
629b3d75
MJ
2443{
2444 int i;
2445 if (gimple_omp_for_combined_p (fd->for_stmt))
2446 {
2447 /* If fd->loop.n2 is constant, then no propagation of the counts
2448 is needed, they are constant. */
2449 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2450 return;
2451
2452 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2453 ? gimple_omp_taskreg_clauses (inner_stmt)
2454 : gimple_omp_for_clauses (inner_stmt);
2455 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2456 isn't supposed to be handled, as the inner loop doesn't
2457 use it. */
2458 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2459 gcc_assert (innerc);
14707c89
JJ
2460 int count = 0;
2461 if (fd->non_rect
2462 && fd->last_nonrect == fd->first_nonrect + 1
2463 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2464 count = 4;
2465 for (i = 0; i < fd->collapse + count; i++)
629b3d75
MJ
2466 {
2467 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2468 OMP_CLAUSE__LOOPTEMP_);
2469 gcc_assert (innerc);
2470 if (i)
2471 {
2472 tree tem = OMP_CLAUSE_DECL (innerc);
14707c89
JJ
2473 tree t;
2474 if (i < fd->collapse)
2475 t = counts[i];
2476 else
2477 switch (i - fd->collapse)
2478 {
2479 case 0: t = counts[0]; break;
2480 case 1: t = fd->first_inner_iterations; break;
2481 case 2: t = fd->factor; break;
2482 case 3: t = fd->adjn1; break;
2483 default: gcc_unreachable ();
2484 }
2485 t = fold_convert (TREE_TYPE (tem), t);
629b3d75
MJ
2486 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2487 false, GSI_CONTINUE_LINKING);
2488 gassign *stmt = gimple_build_assign (tem, t);
2489 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2490 }
2491 }
2492 return;
2493 }
2494
2495 tree type = TREE_TYPE (fd->loop.v);
2496 tree tem = create_tmp_reg (type, ".tem");
2497 gassign *stmt = gimple_build_assign (tem, startvar);
2498 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2499
2500 for (i = fd->collapse - 1; i >= 0; i--)
2501 {
2502 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2503 itype = vtype;
2504 if (POINTER_TYPE_P (vtype))
2505 itype = signed_type_for (vtype);
aed3ab25 2506 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
629b3d75
MJ
2507 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2508 else
2509 t = tem;
aed3ab25
JJ
2510 if (i == fd->last_nonrect)
2511 {
aed3ab25
JJ
2512 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2513 false, GSI_CONTINUE_LINKING);
2514 tree stopval = t;
2515 tree idx = create_tmp_reg (type, ".count");
2516 expand_omp_build_assign (gsi, idx,
2517 build_zero_cst (type), true);
29e0ad45 2518 basic_block bb_triang = NULL, bb_triang_dom = NULL;
5acef69f 2519 if (fd->first_nonrect + 1 == fd->last_nonrect
325714b4 2520 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
14707c89 2521 || fd->first_inner_iterations)
5acef69f 2522 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
ba009860
JJ
2523 != CODE_FOR_nothing)
2524 && !integer_zerop (fd->loop.n2))
5acef69f 2525 {
f418bd4b 2526 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
5acef69f 2527 tree itype = TREE_TYPE (fd->loops[i].v);
79c12969 2528 tree first_inner_iterations = fd->first_inner_iterations;
5acef69f
JJ
2529 tree factor = fd->factor;
2530 gcond *cond_stmt
2531 = gimple_build_cond (NE_EXPR, factor,
2532 build_zero_cst (TREE_TYPE (factor)),
2533 NULL_TREE, NULL_TREE);
2534 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2535 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2536 basic_block bb0 = e->src;
2537 e->flags = EDGE_TRUE_VALUE;
2538 e->probability = profile_probability::likely ();
29e0ad45 2539 bb_triang_dom = bb0;
5acef69f
JJ
2540 *gsi = gsi_after_labels (e->dest);
2541 tree slltype = long_long_integer_type_node;
2542 tree ulltype = long_long_unsigned_type_node;
2543 tree stopvalull = fold_convert (ulltype, stopval);
2544 stopvalull
2545 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2546 false, GSI_CONTINUE_LINKING);
79c12969
JJ
2547 first_inner_iterations
2548 = fold_convert (slltype, first_inner_iterations);
2549 first_inner_iterations
2550 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
5acef69f
JJ
2551 NULL_TREE, false,
2552 GSI_CONTINUE_LINKING);
2553 factor = fold_convert (slltype, factor);
2554 factor
2555 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2556 false, GSI_CONTINUE_LINKING);
79c12969 2557 tree first_inner_iterationsd
5acef69f 2558 = fold_build1 (FLOAT_EXPR, double_type_node,
79c12969
JJ
2559 first_inner_iterations);
2560 first_inner_iterationsd
2561 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
5acef69f
JJ
2562 NULL_TREE, false,
2563 GSI_CONTINUE_LINKING);
2564 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2565 factor);
2566 factord = force_gimple_operand_gsi (gsi, factord, true,
2567 NULL_TREE, false,
2568 GSI_CONTINUE_LINKING);
2569 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2570 stopvalull);
2571 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2572 NULL_TREE, false,
2573 GSI_CONTINUE_LINKING);
2574 /* Temporarily disable flag_rounding_math, values will be
2575 decimal numbers divided by 2 and worst case imprecisions
2576 due to too large values ought to be caught later by the
2577 checks for fallback. */
2578 int save_flag_rounding_math = flag_rounding_math;
2579 flag_rounding_math = 0;
2580 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2581 build_real (double_type_node, dconst2));
2582 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
79c12969 2583 first_inner_iterationsd, t);
5acef69f
JJ
2584 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2585 GSI_CONTINUE_LINKING);
2586 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2587 build_real (double_type_node, dconst2));
2588 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2589 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2590 fold_build2 (MULT_EXPR, double_type_node,
2591 t3, t3));
2592 flag_rounding_math = save_flag_rounding_math;
2593 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2594 GSI_CONTINUE_LINKING);
87d6dae3
JJ
2595 if (flag_exceptions
2596 && cfun->can_throw_non_call_exceptions
2597 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2598 {
2599 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2600 build_zero_cst (double_type_node));
2601 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2602 false, GSI_CONTINUE_LINKING);
2603 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2604 boolean_false_node,
2605 NULL_TREE, NULL_TREE);
2606 }
2607 else
2608 cond_stmt
2609 = gimple_build_cond (LT_EXPR, t,
2610 build_zero_cst (double_type_node),
2611 NULL_TREE, NULL_TREE);
5acef69f
JJ
2612 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2613 e = split_block (gsi_bb (*gsi), cond_stmt);
2614 basic_block bb1 = e->src;
2615 e->flags = EDGE_FALSE_VALUE;
2616 e->probability = profile_probability::very_likely ();
2617 *gsi = gsi_after_labels (e->dest);
2618 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2619 tree sqrtr = create_tmp_var (double_type_node);
2620 gimple_call_set_lhs (call, sqrtr);
2621 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2622 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2623 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2624 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2625 tree c = create_tmp_var (ulltype);
2626 tree d = create_tmp_var (ulltype);
2627 expand_omp_build_assign (gsi, c, t, true);
2628 t = fold_build2 (MINUS_EXPR, ulltype, c,
2629 build_one_cst (ulltype));
2630 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2631 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
325714b4
JJ
2632 t = fold_build2 (MULT_EXPR, ulltype,
2633 fold_convert (ulltype, fd->factor), t);
2634 tree t2
2635 = fold_build2 (MULT_EXPR, ulltype, c,
2636 fold_convert (ulltype,
2637 fd->first_inner_iterations));
5acef69f
JJ
2638 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2639 expand_omp_build_assign (gsi, d, t, true);
325714b4
JJ
2640 t = fold_build2 (MULT_EXPR, ulltype,
2641 fold_convert (ulltype, fd->factor), c);
5acef69f 2642 t = fold_build2 (PLUS_EXPR, ulltype,
325714b4
JJ
2643 t, fold_convert (ulltype,
2644 fd->first_inner_iterations));
5acef69f
JJ
2645 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2646 GSI_CONTINUE_LINKING);
2647 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2648 NULL_TREE, NULL_TREE);
2649 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2650 e = split_block (gsi_bb (*gsi), cond_stmt);
2651 basic_block bb2 = e->src;
2652 e->flags = EDGE_TRUE_VALUE;
2653 e->probability = profile_probability::very_likely ();
2654 *gsi = gsi_after_labels (e->dest);
2655 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2656 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2657 GSI_CONTINUE_LINKING);
2658 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2659 NULL_TREE, NULL_TREE);
2660 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2661 e = split_block (gsi_bb (*gsi), cond_stmt);
2662 basic_block bb3 = e->src;
2663 e->flags = EDGE_FALSE_VALUE;
2664 e->probability = profile_probability::very_likely ();
2665 *gsi = gsi_after_labels (e->dest);
2666 t = fold_convert (itype, c);
2667 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
f418bd4b 2668 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
5acef69f
JJ
2669 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2670 GSI_CONTINUE_LINKING);
2671 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2672 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2673 t2 = fold_convert (itype, t2);
2674 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2675 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2676 if (fd->loops[i].m1)
2677 {
2678 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2679 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2680 }
2681 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2682 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2683 bb_triang = e->src;
2684 *gsi = gsi_after_labels (e->dest);
2685 remove_edge (e);
2686 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2687 e->probability = profile_probability::very_unlikely ();
2688 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2689 e->probability = profile_probability::very_unlikely ();
2690 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2691 e->probability = profile_probability::very_unlikely ();
2692
2693 basic_block bb4 = create_empty_bb (bb0);
2694 add_bb_to_loop (bb4, bb0->loop_father);
2695 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2696 e->probability = profile_probability::unlikely ();
2697 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2698 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2699 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2700 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2701 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2702 counts[i], counts[i - 1]);
2703 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2704 GSI_CONTINUE_LINKING);
2705 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2706 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2707 t = fold_convert (itype, t);
2708 t2 = fold_convert (itype, t2);
2709 t = fold_build2 (MULT_EXPR, itype, t,
2710 fold_convert (itype, fd->loops[i].step));
2711 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2712 t2 = fold_build2 (MULT_EXPR, itype, t2,
2713 fold_convert (itype, fd->loops[i - 1].step));
2714 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2715 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2716 false, GSI_CONTINUE_LINKING);
2717 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2718 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2719 if (fd->loops[i].m1)
2720 {
2721 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2722 fd->loops[i - 1].v);
2723 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2724 }
2725 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2726 false, GSI_CONTINUE_LINKING);
2727 stmt = gimple_build_assign (fd->loops[i].v, t);
2728 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2729 }
2730 /* Fallback implementation. Evaluate the loops in between
2731 (inclusive) fd->first_nonrect and fd->last_nonrect at
2732 runtime unsing temporaries instead of the original iteration
2733 variables, in the body just bump the counter and compare
2734 with the desired value. */
aed3ab25
JJ
2735 gimple_stmt_iterator gsi2 = *gsi;
2736 basic_block entry_bb = gsi_bb (gsi2);
2737 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2738 e = split_block (e->dest, (gimple *) NULL);
2739 basic_block dom_bb = NULL;
2740 basic_block cur_bb = e->src;
2741 basic_block next_bb = e->dest;
2742 entry_bb = e->dest;
2743 *gsi = gsi_after_labels (entry_bb);
2744
2745 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2746 tree n1 = NULL_TREE, n2 = NULL_TREE;
2747 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2748
2749 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2750 {
2751 tree itype = TREE_TYPE (fd->loops[j].v);
2752 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2753 && fd->loops[j].m2 == NULL_TREE
2754 && !fd->loops[j].non_rect_referenced);
2755 gsi2 = gsi_after_labels (cur_bb);
2756 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2757 if (fd->loops[j].m1)
2758 {
2759 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2760 n1 = fold_build2 (MULT_EXPR, itype,
2761 vs[j - fd->loops[j].outer], n1);
2762 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2763 }
2764 else if (rect_p)
2765 n1 = build_zero_cst (type);
2766 else
2767 n1 = t;
2768 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2769 true, GSI_SAME_STMT);
2770 if (j < fd->last_nonrect)
2771 {
2772 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2773 expand_omp_build_assign (&gsi2, vs[j], n1);
2774 }
2775 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2776 if (fd->loops[j].m2)
2777 {
2778 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2779 n2 = fold_build2 (MULT_EXPR, itype,
2780 vs[j - fd->loops[j].outer], n2);
2781 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2782 }
2783 else if (rect_p)
2784 n2 = counts[j];
2785 else
2786 n2 = t;
2787 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2788 true, GSI_SAME_STMT);
2789 if (j == fd->last_nonrect)
2790 {
2791 gcond *cond_stmt
2792 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2793 NULL_TREE, NULL_TREE);
2794 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2795 e = split_block (cur_bb, cond_stmt);
2796 e->flags = EDGE_TRUE_VALUE;
2797 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2798 e->probability = profile_probability::likely ().guessed ();
2799 ne->probability = e->probability.invert ();
2800 gsi2 = gsi_after_labels (e->dest);
2801
2802 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2803 ? -1 : 1));
2804 t = fold_build2 (PLUS_EXPR, itype,
2805 fold_convert (itype, fd->loops[j].step), t);
2806 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2807 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2808 tree step = fold_convert (itype, fd->loops[j].step);
2809 if (TYPE_UNSIGNED (itype)
2810 && fd->loops[j].cond_code == GT_EXPR)
2811 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2812 fold_build1 (NEGATE_EXPR, itype, t),
2813 fold_build1 (NEGATE_EXPR, itype, step));
2814 else
2815 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2816 t = fold_convert (type, t);
2817 t = fold_build2 (PLUS_EXPR, type, idx, t);
2818 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2819 true, GSI_SAME_STMT);
2820 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2821 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2822 cond_stmt
2823 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2824 NULL_TREE);
2825 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2826 e = split_block (gsi_bb (gsi2), cond_stmt);
2827 e->flags = EDGE_TRUE_VALUE;
2828 e->probability = profile_probability::likely ().guessed ();
2829 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2830 ne->probability = e->probability.invert ();
2831 gsi2 = gsi_after_labels (e->dest);
2832 expand_omp_build_assign (&gsi2, idx, t);
2833 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2834 break;
2835 }
2836 e = split_block (cur_bb, last_stmt (cur_bb));
2837
2838 basic_block new_cur_bb = create_empty_bb (cur_bb);
2839 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2840
2841 gsi2 = gsi_after_labels (e->dest);
2842 if (rect_p)
2843 t = fold_build2 (PLUS_EXPR, type, vs[j],
2844 build_one_cst (type));
2845 else
2846 {
2847 tree step
2848 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2849 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2850 }
2851 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2852 true, GSI_SAME_STMT);
2853 expand_omp_build_assign (&gsi2, vs[j], t);
2854
2855 edge ne = split_block (e->dest, last_stmt (e->dest));
2856 gsi2 = gsi_after_labels (ne->dest);
2857
2858 gcond *cond_stmt;
2859 if (next_bb == entry_bb)
2860 /* No need to actually check the outermost condition. */
2861 cond_stmt
2862 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2863 boolean_true_node,
2864 NULL_TREE, NULL_TREE);
2865 else
2866 cond_stmt
2867 = gimple_build_cond (rect_p ? LT_EXPR
2868 : fd->loops[j].cond_code,
2869 vs[j], n2, NULL_TREE, NULL_TREE);
2870 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2871 edge e3, e4;
2872 if (next_bb == entry_bb)
2873 {
2874 e3 = find_edge (ne->dest, next_bb);
2875 e3->flags = EDGE_FALSE_VALUE;
2876 dom_bb = ne->dest;
2877 }
2878 else
2879 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2880 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2881 e4->probability = profile_probability::likely ().guessed ();
2882 e3->probability = e4->probability.invert ();
2883 basic_block esrc = e->src;
2884 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2885 cur_bb = new_cur_bb;
2886 basic_block latch_bb = next_bb;
2887 next_bb = e->dest;
2888 remove_edge (e);
2889 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2890 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2891 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2892 }
2893 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2894 {
2895 tree itype = TREE_TYPE (fd->loops[j].v);
2896 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2897 && fd->loops[j].m2 == NULL_TREE
2898 && !fd->loops[j].non_rect_referenced);
2899 if (j == fd->last_nonrect)
2900 {
2901 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2902 t = fold_convert (itype, t);
2903 tree t2
2904 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2905 t = fold_build2 (MULT_EXPR, itype, t, t2);
2906 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2907 }
2908 else if (rect_p)
2909 {
2910 t = fold_convert (itype, vs[j]);
2911 t = fold_build2 (MULT_EXPR, itype, t,
2912 fold_convert (itype, fd->loops[j].step));
2913 if (POINTER_TYPE_P (vtype))
2914 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2915 else
2916 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2917 }
2918 else
2919 t = vs[j];
2920 t = force_gimple_operand_gsi (gsi, t, false,
2921 NULL_TREE, true,
2922 GSI_SAME_STMT);
2923 stmt = gimple_build_assign (fd->loops[j].v, t);
2924 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2925 }
2926 if (gsi_end_p (*gsi))
2927 *gsi = gsi_last_bb (gsi_bb (*gsi));
2928 else
2929 gsi_prev (gsi);
5acef69f
JJ
2930 if (bb_triang)
2931 {
2932 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2933 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2934 *gsi = gsi_after_labels (e->dest);
2935 if (!gsi_end_p (*gsi))
2936 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
29e0ad45 2937 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
5acef69f 2938 }
aed3ab25 2939 }
629b3d75 2940 else
aed3ab25
JJ
2941 {
2942 t = fold_convert (itype, t);
2943 t = fold_build2 (MULT_EXPR, itype, t,
2944 fold_convert (itype, fd->loops[i].step));
2945 if (POINTER_TYPE_P (vtype))
2946 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2947 else
2948 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2949 t = force_gimple_operand_gsi (gsi, t,
2950 DECL_P (fd->loops[i].v)
2951 && TREE_ADDRESSABLE (fd->loops[i].v),
2952 NULL_TREE, false,
2953 GSI_CONTINUE_LINKING);
2954 stmt = gimple_build_assign (fd->loops[i].v, t);
2955 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2956 }
2957 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
629b3d75
MJ
2958 {
2959 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2960 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2961 false, GSI_CONTINUE_LINKING);
2962 stmt = gimple_build_assign (tem, t);
2963 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2964 }
aed3ab25
JJ
2965 if (i == fd->last_nonrect)
2966 i = fd->first_nonrect;
629b3d75 2967 }
aed3ab25
JJ
2968 if (fd->non_rect)
2969 for (i = 0; i <= fd->last_nonrect; i++)
2970 if (fd->loops[i].m2)
2971 {
2972 tree itype = TREE_TYPE (fd->loops[i].v);
2973
2974 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2975 t = fold_build2 (MULT_EXPR, itype,
2976 fd->loops[i - fd->loops[i].outer].v, t);
2977 t = fold_build2 (PLUS_EXPR, itype, t,
2978 fold_convert (itype,
2979 unshare_expr (fd->loops[i].n2)));
2980 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2981 t = force_gimple_operand_gsi (gsi, t, false,
2982 NULL_TREE, false,
2983 GSI_CONTINUE_LINKING);
2984 stmt = gimple_build_assign (nonrect_bounds[i], t);
2985 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2986 }
629b3d75
MJ
2987}
2988
2989/* Helper function for expand_omp_for_*. Generate code like:
2990 L10:
2991 V3 += STEP3;
2992 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2993 L11:
2994 V3 = N31;
2995 V2 += STEP2;
2996 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2997 L12:
2998 V2 = N21;
2999 V1 += STEP1;
aed3ab25
JJ
3000 goto BODY_BB;
3001 For non-rectangular loops, use temporaries stored in nonrect_bounds
3002 for the upper bounds if M?2 multiplier is present. Given e.g.
3003 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3004 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3005 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3006 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3007 do:
3008 L10:
3009 V4 += STEP4;
3010 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3011 L11:
3012 V4 = N41 + M41 * V2; // This can be left out if the loop
3013 // refers to the immediate parent loop
3014 V3 += STEP3;
3015 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3016 L12:
3017 V3 = N31;
3018 V2 += STEP2;
3019 if (V2 cond2 N22) goto L120; else goto L13;
3020 L120:
3021 V4 = N41 + M41 * V2;
3022 NONRECT_BOUND4 = N42 + M42 * V2;
3023 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3024 L13:
3025 V2 = N21;
3026 V1 += STEP1;
3027 goto L120; */
629b3d75
MJ
3028
3029static basic_block
aed3ab25
JJ
3030extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3031 basic_block cont_bb, basic_block body_bb)
629b3d75
MJ
3032{
3033 basic_block last_bb, bb, collapse_bb = NULL;
3034 int i;
3035 gimple_stmt_iterator gsi;
3036 edge e;
3037 tree t;
3038 gimple *stmt;
3039
3040 last_bb = cont_bb;
3041 for (i = fd->collapse - 1; i >= 0; i--)
3042 {
3043 tree vtype = TREE_TYPE (fd->loops[i].v);
3044
3045 bb = create_empty_bb (last_bb);
3046 add_bb_to_loop (bb, last_bb->loop_father);
3047 gsi = gsi_start_bb (bb);
3048
3049 if (i < fd->collapse - 1)
3050 {
3051 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
aed3ab25
JJ
3052 e->probability
3053 = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 3054
aed3ab25
JJ
3055 struct omp_for_data_loop *l = &fd->loops[i + 1];
3056 if (l->m1 == NULL_TREE || l->outer != 1)
3057 {
3058 t = l->n1;
3059 if (l->m1)
3060 {
3061 tree t2
3062 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3063 fd->loops[i + 1 - l->outer].v, l->m1);
3064 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3065 }
3066 t = force_gimple_operand_gsi (&gsi, t,
3067 DECL_P (l->v)
3068 && TREE_ADDRESSABLE (l->v),
3069 NULL_TREE, false,
3070 GSI_CONTINUE_LINKING);
3071 stmt = gimple_build_assign (l->v, t);
3072 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3073 }
629b3d75
MJ
3074 }
3075 else
3076 collapse_bb = bb;
3077
3078 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3079
3080 if (POINTER_TYPE_P (vtype))
3081 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3082 else
3083 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3084 t = force_gimple_operand_gsi (&gsi, t,
3085 DECL_P (fd->loops[i].v)
3086 && TREE_ADDRESSABLE (fd->loops[i].v),
3087 NULL_TREE, false, GSI_CONTINUE_LINKING);
3088 stmt = gimple_build_assign (fd->loops[i].v, t);
3089 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3090
aed3ab25
JJ
3091 if (fd->loops[i].non_rect_referenced)
3092 {
3093 basic_block update_bb = NULL, prev_bb = NULL;
3094 for (int j = i + 1; j <= fd->last_nonrect; j++)
3095 if (j - fd->loops[j].outer == i)
3096 {
3097 tree n1, n2;
3098 struct omp_for_data_loop *l = &fd->loops[j];
3099 basic_block this_bb = create_empty_bb (last_bb);
3100 add_bb_to_loop (this_bb, last_bb->loop_father);
3101 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3102 if (prev_bb)
3103 {
3104 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3105 e->probability
3106 = profile_probability::guessed_always ().apply_scale (7,
3107 8);
3108 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
aed3ab25
JJ
3109 }
3110 if (l->m1)
3111 {
3112 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3113 fd->loops[i].v);
3114 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3115 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3116 false,
3117 GSI_CONTINUE_LINKING);
3118 stmt = gimple_build_assign (l->v, n1);
3119 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3120 n1 = l->v;
3121 }
3122 else
3123 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3124 NULL_TREE, false,
3125 GSI_CONTINUE_LINKING);
3126 if (l->m2)
3127 {
3128 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3129 fd->loops[i].v);
3130 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3131 t, unshare_expr (l->n2));
3132 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3133 false,
3134 GSI_CONTINUE_LINKING);
3135 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3136 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3137 n2 = nonrect_bounds[j];
3138 }
3139 else
3140 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3141 true, NULL_TREE, false,
3142 GSI_CONTINUE_LINKING);
3143 gcond *cond_stmt
3144 = gimple_build_cond (l->cond_code, n1, n2,
3145 NULL_TREE, NULL_TREE);
3146 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3147 if (update_bb == NULL)
3148 update_bb = this_bb;
3149 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3150 e->probability
3151 = profile_probability::guessed_always ().apply_scale (1, 8);
3152 if (prev_bb == NULL)
29e0ad45 3153 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
aed3ab25
JJ
3154 prev_bb = this_bb;
3155 }
3156 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3157 e->probability
3158 = profile_probability::guessed_always ().apply_scale (7, 8);
3159 body_bb = update_bb;
3160 }
3161
629b3d75
MJ
3162 if (i > 0)
3163 {
aed3ab25
JJ
3164 if (fd->loops[i].m2)
3165 t = nonrect_bounds[i];
3166 else
3167 t = unshare_expr (fd->loops[i].n2);
629b3d75
MJ
3168 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3169 false, GSI_CONTINUE_LINKING);
3170 tree v = fd->loops[i].v;
3171 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3172 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3173 false, GSI_CONTINUE_LINKING);
3174 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3175 stmt = gimple_build_cond_empty (t);
3176 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
d1ffbd43
JJ
3177 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3178 expand_omp_regimplify_p, NULL, NULL)
3179 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3180 expand_omp_regimplify_p, NULL, NULL))
3181 gimple_regimplify_operands (stmt, &gsi);
629b3d75 3182 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 3183 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
3184 }
3185 else
3186 make_edge (bb, body_bb, EDGE_FALLTHRU);
aed3ab25 3187 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
629b3d75
MJ
3188 last_bb = bb;
3189 }
3190
3191 return collapse_bb;
3192}
3193
3194/* Expand #pragma omp ordered depend(source). */
3195
3196static void
3197expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3198 tree *counts, location_t loc)
3199{
3200 enum built_in_function source_ix
3201 = fd->iter_type == long_integer_type_node
3202 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3203 gimple *g
3204 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3205 build_fold_addr_expr (counts[fd->ordered]));
3206 gimple_set_location (g, loc);
3207 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3208}
3209
3210/* Expand a single depend from #pragma omp ordered depend(sink:...). */
3211
3212static void
3213expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3214 tree *counts, tree c, location_t loc)
3215{
3216 auto_vec<tree, 10> args;
3217 enum built_in_function sink_ix
3218 = fd->iter_type == long_integer_type_node
3219 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3220 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3221 int i;
3222 gimple_stmt_iterator gsi2 = *gsi;
3223 bool warned_step = false;
3224
3225 for (i = 0; i < fd->ordered; i++)
3226 {
3227 tree step = NULL_TREE;
3228 off = TREE_PURPOSE (deps);
3229 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3230 {
3231 step = TREE_OPERAND (off, 1);
3232 off = TREE_OPERAND (off, 0);
3233 }
3234 if (!integer_zerop (off))
3235 {
3236 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3237 || fd->loops[i].cond_code == GT_EXPR);
3238 bool forward = fd->loops[i].cond_code == LT_EXPR;
3239 if (step)
3240 {
3241 /* Non-simple Fortran DO loops. If step is variable,
3242 we don't know at compile even the direction, so can't
3243 warn. */
3244 if (TREE_CODE (step) != INTEGER_CST)
3245 break;
3246 forward = tree_int_cst_sgn (step) != -1;
3247 }
3248 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
90a0bf4e
JJ
3249 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3250 "waiting for lexically later iteration");
629b3d75
MJ
3251 break;
3252 }
3253 deps = TREE_CHAIN (deps);
3254 }
3255 /* If all offsets corresponding to the collapsed loops are zero,
3256 this depend clause can be ignored. FIXME: but there is still a
3257 flush needed. We need to emit one __sync_synchronize () for it
3258 though (perhaps conditionally)? Solve this together with the
3259 conservative dependence folding optimization.
3260 if (i >= fd->collapse)
3261 return; */
3262
3263 deps = OMP_CLAUSE_DECL (c);
3264 gsi_prev (&gsi2);
3265 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3266 edge e2 = split_block_after_labels (e1->dest);
3267
3268 gsi2 = gsi_after_labels (e1->dest);
3269 *gsi = gsi_last_bb (e1->src);
3270 for (i = 0; i < fd->ordered; i++)
3271 {
3272 tree itype = TREE_TYPE (fd->loops[i].v);
3273 tree step = NULL_TREE;
3274 tree orig_off = NULL_TREE;
3275 if (POINTER_TYPE_P (itype))
3276 itype = sizetype;
3277 if (i)
3278 deps = TREE_CHAIN (deps);
3279 off = TREE_PURPOSE (deps);
3280 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3281 {
3282 step = TREE_OPERAND (off, 1);
3283 off = TREE_OPERAND (off, 0);
3284 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3285 && integer_onep (fd->loops[i].step)
3286 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3287 }
3288 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3289 if (step)
3290 {
3291 off = fold_convert_loc (loc, itype, off);
3292 orig_off = off;
3293 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3294 }
3295
3296 if (integer_zerop (off))
3297 t = boolean_true_node;
3298 else
3299 {
3300 tree a;
3301 tree co = fold_convert_loc (loc, itype, off);
3302 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3303 {
3304 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3305 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3306 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3307 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3308 co);
3309 }
3310 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3311 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3312 fd->loops[i].v, co);
3313 else
3314 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3315 fd->loops[i].v, co);
3316 if (step)
3317 {
3318 tree t1, t2;
3319 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3320 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3321 fd->loops[i].n1);
3322 else
3323 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3324 fd->loops[i].n2);
3325 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3326 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3327 fd->loops[i].n2);
3328 else
3329 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3330 fd->loops[i].n1);
3331 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3332 step, build_int_cst (TREE_TYPE (step), 0));
3333 if (TREE_CODE (step) != INTEGER_CST)
3334 {
3335 t1 = unshare_expr (t1);
3336 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3337 false, GSI_CONTINUE_LINKING);
3338 t2 = unshare_expr (t2);
3339 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3340 false, GSI_CONTINUE_LINKING);
3341 }
3342 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3343 t, t2, t1);
3344 }
3345 else if (fd->loops[i].cond_code == LT_EXPR)
3346 {
3347 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3348 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3349 fd->loops[i].n1);
3350 else
3351 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3352 fd->loops[i].n2);
3353 }
3354 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3355 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3356 fd->loops[i].n2);
3357 else
3358 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3359 fd->loops[i].n1);
3360 }
3361 if (cond)
3362 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3363 else
3364 cond = t;
3365
3366 off = fold_convert_loc (loc, itype, off);
3367
3368 if (step
3369 || (fd->loops[i].cond_code == LT_EXPR
3370 ? !integer_onep (fd->loops[i].step)
3371 : !integer_minus_onep (fd->loops[i].step)))
3372 {
3373 if (step == NULL_TREE
3374 && TYPE_UNSIGNED (itype)
3375 && fd->loops[i].cond_code == GT_EXPR)
3376 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3377 fold_build1_loc (loc, NEGATE_EXPR, itype,
3378 s));
3379 else
3380 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3381 orig_off ? orig_off : off, s);
3382 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3383 build_int_cst (itype, 0));
3384 if (integer_zerop (t) && !warned_step)
3385 {
90a0bf4e
JJ
3386 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3387 "refers to iteration never in the iteration "
3388 "space");
629b3d75
MJ
3389 warned_step = true;
3390 }
3391 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3392 cond, t);
3393 }
3394
3395 if (i <= fd->collapse - 1 && fd->collapse > 1)
3396 t = fd->loop.v;
3397 else if (counts[i])
3398 t = counts[i];
3399 else
3400 {
3401 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3402 fd->loops[i].v, fd->loops[i].n1);
3403 t = fold_convert_loc (loc, fd->iter_type, t);
3404 }
3405 if (step)
3406 /* We have divided off by step already earlier. */;
3407 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3408 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3409 fold_build1_loc (loc, NEGATE_EXPR, itype,
3410 s));
3411 else
3412 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3413 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3414 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3415 off = fold_convert_loc (loc, fd->iter_type, off);
3416 if (i <= fd->collapse - 1 && fd->collapse > 1)
3417 {
3418 if (i)
3419 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3420 off);
3421 if (i < fd->collapse - 1)
3422 {
3423 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3424 counts[i]);
3425 continue;
3426 }
3427 }
3428 off = unshare_expr (off);
3429 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3430 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3431 true, GSI_SAME_STMT);
3432 args.safe_push (t);
3433 }
3434 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3435 gimple_set_location (g, loc);
3436 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3437
3438 cond = unshare_expr (cond);
3439 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3440 GSI_CONTINUE_LINKING);
3441 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3442 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
3443 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3444 e1->probability = e3->probability.invert ();
629b3d75
MJ
3445 e1->flags = EDGE_TRUE_VALUE;
3446 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3447
3448 *gsi = gsi_after_labels (e2->dest);
3449}
3450
3451/* Expand all #pragma omp ordered depend(source) and
3452 #pragma omp ordered depend(sink:...) constructs in the current
3453 #pragma omp for ordered(n) region. */
3454
3455static void
3456expand_omp_ordered_source_sink (struct omp_region *region,
3457 struct omp_for_data *fd, tree *counts,
3458 basic_block cont_bb)
3459{
3460 struct omp_region *inner;
3461 int i;
3462 for (i = fd->collapse - 1; i < fd->ordered; i++)
3463 if (i == fd->collapse - 1 && fd->collapse > 1)
3464 counts[i] = NULL_TREE;
3465 else if (i >= fd->collapse && !cont_bb)
3466 counts[i] = build_zero_cst (fd->iter_type);
3467 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3468 && integer_onep (fd->loops[i].step))
3469 counts[i] = NULL_TREE;
3470 else
3471 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3472 tree atype
3473 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3474 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3475 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3476
3477 for (inner = region->inner; inner; inner = inner->next)
3478 if (inner->type == GIMPLE_OMP_ORDERED)
3479 {
3480 gomp_ordered *ord_stmt = inner->ord_stmt;
3481 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3482 location_t loc = gimple_location (ord_stmt);
3483 tree c;
3484 for (c = gimple_omp_ordered_clauses (ord_stmt);
3485 c; c = OMP_CLAUSE_CHAIN (c))
3486 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3487 break;
3488 if (c)
3489 expand_omp_ordered_source (&gsi, fd, counts, loc);
3490 for (c = gimple_omp_ordered_clauses (ord_stmt);
3491 c; c = OMP_CLAUSE_CHAIN (c))
3492 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3493 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3494 gsi_remove (&gsi, true);
3495 }
3496}
3497
3498/* Wrap the body into fd->ordered - fd->collapse loops that aren't
3499 collapsed. */
3500
3501static basic_block
3502expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3503 basic_block cont_bb, basic_block body_bb,
3504 bool ordered_lastprivate)
3505{
3506 if (fd->ordered == fd->collapse)
3507 return cont_bb;
3508
3509 if (!cont_bb)
3510 {
3511 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3512 for (int i = fd->collapse; i < fd->ordered; i++)
3513 {
3514 tree type = TREE_TYPE (fd->loops[i].v);
3515 tree n1 = fold_convert (type, fd->loops[i].n1);
3516 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3517 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3518 size_int (i - fd->collapse + 1),
3519 NULL_TREE, NULL_TREE);
3520 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3521 }
3522 return NULL;
3523 }
3524
3525 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3526 {
3527 tree t, type = TREE_TYPE (fd->loops[i].v);
3528 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3529 expand_omp_build_assign (&gsi, fd->loops[i].v,
3530 fold_convert (type, fd->loops[i].n1));
3531 if (counts[i])
3532 expand_omp_build_assign (&gsi, counts[i],
3533 build_zero_cst (fd->iter_type));
3534 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3535 size_int (i - fd->collapse + 1),
3536 NULL_TREE, NULL_TREE);
3537 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3538 if (!gsi_end_p (gsi))
3539 gsi_prev (&gsi);
3540 else
3541 gsi = gsi_last_bb (body_bb);
3542 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3543 basic_block new_body = e1->dest;
3544 if (body_bb == cont_bb)
3545 cont_bb = new_body;
3546 edge e2 = NULL;
3547 basic_block new_header;
3548 if (EDGE_COUNT (cont_bb->preds) > 0)
3549 {
3550 gsi = gsi_last_bb (cont_bb);
3551 if (POINTER_TYPE_P (type))
3552 t = fold_build_pointer_plus (fd->loops[i].v,
3553 fold_convert (sizetype,
3554 fd->loops[i].step));
3555 else
3556 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3557 fold_convert (type, fd->loops[i].step));
3558 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3559 if (counts[i])
3560 {
3561 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3562 build_int_cst (fd->iter_type, 1));
3563 expand_omp_build_assign (&gsi, counts[i], t);
3564 t = counts[i];
3565 }
3566 else
3567 {
3568 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3569 fd->loops[i].v, fd->loops[i].n1);
3570 t = fold_convert (fd->iter_type, t);
3571 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3572 true, GSI_SAME_STMT);
3573 }
3574 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3575 size_int (i - fd->collapse + 1),
3576 NULL_TREE, NULL_TREE);
3577 expand_omp_build_assign (&gsi, aref, t);
3578 gsi_prev (&gsi);
3579 e2 = split_block (cont_bb, gsi_stmt (gsi));
3580 new_header = e2->dest;
3581 }
3582 else
3583 new_header = cont_bb;
3584 gsi = gsi_after_labels (new_header);
3585 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3586 true, GSI_SAME_STMT);
3587 tree n2
3588 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3589 true, NULL_TREE, true, GSI_SAME_STMT);
3590 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3591 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3592 edge e3 = split_block (new_header, gsi_stmt (gsi));
3593 cont_bb = e3->dest;
3594 remove_edge (e1);
3595 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3596 e3->flags = EDGE_FALSE_VALUE;
357067f2 3597 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 3598 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 3599 e1->probability = e3->probability.invert ();
629b3d75
MJ
3600
3601 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3602 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3603
3604 if (e2)
3605 {
99b1c316 3606 class loop *loop = alloc_loop ();
629b3d75
MJ
3607 loop->header = new_header;
3608 loop->latch = e2->src;
3609 add_loop (loop, body_bb->loop_father);
3610 }
3611 }
3612
3613 /* If there are any lastprivate clauses and it is possible some loops
3614 might have zero iterations, ensure all the decls are initialized,
3615 otherwise we could crash evaluating C++ class iterators with lastprivate
3616 clauses. */
3617 bool need_inits = false;
3618 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3619 if (need_inits)
3620 {
3621 tree type = TREE_TYPE (fd->loops[i].v);
3622 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3623 expand_omp_build_assign (&gsi, fd->loops[i].v,
3624 fold_convert (type, fd->loops[i].n1));
3625 }
3626 else
3627 {
3628 tree type = TREE_TYPE (fd->loops[i].v);
3629 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3630 boolean_type_node,
3631 fold_convert (type, fd->loops[i].n1),
3632 fold_convert (type, fd->loops[i].n2));
3633 if (!integer_onep (this_cond))
3634 need_inits = true;
3635 }
3636
3637 return cont_bb;
3638}
3639
3640/* A subroutine of expand_omp_for. Generate code for a parallel
3641 loop with any schedule. Given parameters:
3642
3643 for (V = N1; V cond N2; V += STEP) BODY;
3644
3645 where COND is "<" or ">", we generate pseudocode
3646
3647 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3648 if (more) goto L0; else goto L3;
3649 L0:
3650 V = istart0;
3651 iend = iend0;
3652 L1:
3653 BODY;
3654 V += STEP;
3655 if (V cond iend) goto L1; else goto L2;
3656 L2:
3657 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3658 L3:
3659
3660 If this is a combined omp parallel loop, instead of the call to
3661 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3662 If this is gimple_omp_for_combined_p loop, then instead of assigning
3663 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3664 inner GIMPLE_OMP_FOR and V += STEP; and
3665 if (V cond iend) goto L1; else goto L2; are removed.
3666
3667 For collapsed loops, given parameters:
3668 collapse(3)
3669 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3670 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3671 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3672 BODY;
3673
3674 we generate pseudocode
3675
3676 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3677 if (cond3 is <)
3678 adj = STEP3 - 1;
3679 else
3680 adj = STEP3 + 1;
3681 count3 = (adj + N32 - N31) / STEP3;
3682 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3683 if (cond2 is <)
3684 adj = STEP2 - 1;
3685 else
3686 adj = STEP2 + 1;
3687 count2 = (adj + N22 - N21) / STEP2;
3688 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3689 if (cond1 is <)
3690 adj = STEP1 - 1;
3691 else
3692 adj = STEP1 + 1;
3693 count1 = (adj + N12 - N11) / STEP1;
3694 count = count1 * count2 * count3;
3695 goto Z1;
3696 Z0:
3697 count = 0;
3698 Z1:
3699 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3700 if (more) goto L0; else goto L3;
3701 L0:
3702 V = istart0;
3703 T = V;
3704 V3 = N31 + (T % count3) * STEP3;
3705 T = T / count3;
3706 V2 = N21 + (T % count2) * STEP2;
3707 T = T / count2;
3708 V1 = N11 + T * STEP1;
3709 iend = iend0;
3710 L1:
3711 BODY;
3712 V += 1;
3713 if (V < iend) goto L10; else goto L2;
3714 L10:
3715 V3 += STEP3;
3716 if (V3 cond3 N32) goto L1; else goto L11;
3717 L11:
3718 V3 = N31;
3719 V2 += STEP2;
3720 if (V2 cond2 N22) goto L1; else goto L12;
3721 L12:
3722 V2 = N21;
3723 V1 += STEP1;
3724 goto L1;
3725 L2:
3726 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3727 L3:
3728
3729 */
3730
3731static void
3732expand_omp_for_generic (struct omp_region *region,
3733 struct omp_for_data *fd,
3734 enum built_in_function start_fn,
3735 enum built_in_function next_fn,
28567c40 3736 tree sched_arg,
629b3d75
MJ
3737 gimple *inner_stmt)
3738{
3739 tree type, istart0, iend0, iend;
3740 tree t, vmain, vback, bias = NULL_TREE;
3741 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3742 basic_block l2_bb = NULL, l3_bb = NULL;
3743 gimple_stmt_iterator gsi;
3744 gassign *assign_stmt;
3745 bool in_combined_parallel = is_combined_parallel (region);
3746 bool broken_loop = region->cont == NULL;
3747 edge e, ne;
3748 tree *counts = NULL;
3749 int i;
3750 bool ordered_lastprivate = false;
3751
3752 gcc_assert (!broken_loop || !in_combined_parallel);
3753 gcc_assert (fd->iter_type == long_integer_type_node
3754 || !in_combined_parallel);
3755
3756 entry_bb = region->entry;
3757 cont_bb = region->cont;
3758 collapse_bb = NULL;
3759 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3760 gcc_assert (broken_loop
3761 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3762 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3763 l1_bb = single_succ (l0_bb);
3764 if (!broken_loop)
3765 {
3766 l2_bb = create_empty_bb (cont_bb);
3767 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3768 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3769 == l1_bb));
3770 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3771 }
3772 else
3773 l2_bb = NULL;
3774 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3775 exit_bb = region->exit;
3776
65f4b875 3777 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3778
3779 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3780 if (fd->ordered
6c7ae8c5 3781 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
629b3d75
MJ
3782 OMP_CLAUSE_LASTPRIVATE))
3783 ordered_lastprivate = false;
28567c40 3784 tree reductions = NULL_TREE;
6c7ae8c5
JJ
3785 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3786 tree memv = NULL_TREE;
8221c30b
JJ
3787 if (fd->lastprivate_conditional)
3788 {
3789 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3790 OMP_CLAUSE__CONDTEMP_);
3791 if (fd->have_pointer_condtemp)
3792 condtemp = OMP_CLAUSE_DECL (c);
3793 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3794 cond_var = OMP_CLAUSE_DECL (c);
3795 }
28567c40
JJ
3796 if (sched_arg)
3797 {
3798 if (fd->have_reductemp)
3799 {
6c7ae8c5 3800 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
28567c40
JJ
3801 OMP_CLAUSE__REDUCTEMP_);
3802 reductions = OMP_CLAUSE_DECL (c);
3803 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3804 gimple *g = SSA_NAME_DEF_STMT (reductions);
3805 reductions = gimple_assign_rhs1 (g);
3806 OMP_CLAUSE_DECL (c) = reductions;
3807 entry_bb = gimple_bb (g);
3808 edge e = split_block (entry_bb, g);
3809 if (region->entry == entry_bb)
3810 region->entry = e->dest;
3811 gsi = gsi_last_bb (entry_bb);
3812 }
3813 else
3814 reductions = null_pointer_node;
8221c30b 3815 if (fd->have_pointer_condtemp)
6c7ae8c5 3816 {
6c7ae8c5
JJ
3817 tree type = TREE_TYPE (condtemp);
3818 memv = create_tmp_var (type);
3819 TREE_ADDRESSABLE (memv) = 1;
3820 unsigned HOST_WIDE_INT sz
3821 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3822 sz *= fd->lastprivate_conditional;
3823 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3824 false);
3825 mem = build_fold_addr_expr (memv);
3826 }
3827 else
3828 mem = null_pointer_node;
28567c40 3829 }
629b3d75
MJ
3830 if (fd->collapse > 1 || fd->ordered)
3831 {
3832 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3833 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3834
3835 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3836 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3837 zero_iter1_bb, first_zero_iter1,
3838 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3839
3840 if (zero_iter1_bb)
3841 {
3842 /* Some counts[i] vars might be uninitialized if
3843 some loop has zero iterations. But the body shouldn't
3844 be executed in that case, so just avoid uninit warnings. */
3845 for (i = first_zero_iter1;
3846 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3847 if (SSA_VAR_P (counts[i]))
3848 TREE_NO_WARNING (counts[i]) = 1;
3849 gsi_prev (&gsi);
3850 e = split_block (entry_bb, gsi_stmt (gsi));
3851 entry_bb = e->dest;
3852 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 3853 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3854 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3855 get_immediate_dominator (CDI_DOMINATORS,
3856 zero_iter1_bb));
3857 }
3858 if (zero_iter2_bb)
3859 {
3860 /* Some counts[i] vars might be uninitialized if
3861 some loop has zero iterations. But the body shouldn't
3862 be executed in that case, so just avoid uninit warnings. */
3863 for (i = first_zero_iter2; i < fd->ordered; i++)
3864 if (SSA_VAR_P (counts[i]))
3865 TREE_NO_WARNING (counts[i]) = 1;
3866 if (zero_iter1_bb)
3867 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3868 else
3869 {
3870 gsi_prev (&gsi);
3871 e = split_block (entry_bb, gsi_stmt (gsi));
3872 entry_bb = e->dest;
3873 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 3874 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3875 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3876 get_immediate_dominator
3877 (CDI_DOMINATORS, zero_iter2_bb));
3878 }
3879 }
3880 if (fd->collapse == 1)
3881 {
3882 counts[0] = fd->loop.n2;
3883 fd->loop = fd->loops[0];
3884 }
3885 }
3886
3887 type = TREE_TYPE (fd->loop.v);
3888 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3889 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3890 TREE_ADDRESSABLE (istart0) = 1;
3891 TREE_ADDRESSABLE (iend0) = 1;
3892
3893 /* See if we need to bias by LLONG_MIN. */
3894 if (fd->iter_type == long_long_unsigned_type_node
3895 && TREE_CODE (type) == INTEGER_TYPE
3896 && !TYPE_UNSIGNED (type)
3897 && fd->ordered == 0)
3898 {
3899 tree n1, n2;
3900
3901 if (fd->loop.cond_code == LT_EXPR)
3902 {
3903 n1 = fd->loop.n1;
3904 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3905 }
3906 else
3907 {
3908 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3909 n2 = fd->loop.n1;
3910 }
3911 if (TREE_CODE (n1) != INTEGER_CST
3912 || TREE_CODE (n2) != INTEGER_CST
3913 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3914 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3915 }
3916
3917 gimple_stmt_iterator gsif = gsi;
3918 gsi_prev (&gsif);
3919
3920 tree arr = NULL_TREE;
3921 if (in_combined_parallel)
3922 {
3923 gcc_assert (fd->ordered == 0);
3924 /* In a combined parallel loop, emit a call to
3925 GOMP_loop_foo_next. */
3926 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3927 build_fold_addr_expr (istart0),
3928 build_fold_addr_expr (iend0));
3929 }
3930 else
3931 {
3932 tree t0, t1, t2, t3, t4;
3933 /* If this is not a combined parallel loop, emit a call to
3934 GOMP_loop_foo_start in ENTRY_BB. */
3935 t4 = build_fold_addr_expr (iend0);
3936 t3 = build_fold_addr_expr (istart0);
3937 if (fd->ordered)
3938 {
3939 t0 = build_int_cst (unsigned_type_node,
3940 fd->ordered - fd->collapse + 1);
3941 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3942 fd->ordered
3943 - fd->collapse + 1),
3944 ".omp_counts");
3945 DECL_NAMELESS (arr) = 1;
3946 TREE_ADDRESSABLE (arr) = 1;
3947 TREE_STATIC (arr) = 1;
3948 vec<constructor_elt, va_gc> *v;
3949 vec_alloc (v, fd->ordered - fd->collapse + 1);
3950 int idx;
3951
3952 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3953 {
3954 tree c;
3955 if (idx == 0 && fd->collapse > 1)
3956 c = fd->loop.n2;
3957 else
3958 c = counts[idx + fd->collapse - 1];
3959 tree purpose = size_int (idx);
3960 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3961 if (TREE_CODE (c) != INTEGER_CST)
3962 TREE_STATIC (arr) = 0;
3963 }
3964
3965 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3966 if (!TREE_STATIC (arr))
3967 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3968 void_type_node, arr),
3969 true, NULL_TREE, true, GSI_SAME_STMT);
3970 t1 = build_fold_addr_expr (arr);
3971 t2 = NULL_TREE;
3972 }
3973 else
3974 {
3975 t2 = fold_convert (fd->iter_type, fd->loop.step);
3976 t1 = fd->loop.n2;
3977 t0 = fd->loop.n1;
3978 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3979 {
3980 tree innerc
3981 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3982 OMP_CLAUSE__LOOPTEMP_);
3983 gcc_assert (innerc);
3984 t0 = OMP_CLAUSE_DECL (innerc);
3985 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3986 OMP_CLAUSE__LOOPTEMP_);
3987 gcc_assert (innerc);
3988 t1 = OMP_CLAUSE_DECL (innerc);
3989 }
3990 if (POINTER_TYPE_P (TREE_TYPE (t0))
3991 && TYPE_PRECISION (TREE_TYPE (t0))
3992 != TYPE_PRECISION (fd->iter_type))
3993 {
3994 /* Avoid casting pointers to integer of a different size. */
3995 tree itype = signed_type_for (type);
3996 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3997 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3998 }
3999 else
4000 {
4001 t1 = fold_convert (fd->iter_type, t1);
4002 t0 = fold_convert (fd->iter_type, t0);
4003 }
4004 if (bias)
4005 {
4006 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4007 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4008 }
4009 }
4010 if (fd->iter_type == long_integer_type_node || fd->ordered)
4011 {
4012 if (fd->chunk_size)
4013 {
4014 t = fold_convert (fd->iter_type, fd->chunk_size);
4015 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
4016 if (sched_arg)
4017 {
4018 if (fd->ordered)
4019 t = build_call_expr (builtin_decl_explicit (start_fn),
4020 8, t0, t1, sched_arg, t, t3, t4,
4021 reductions, mem);
4022 else
4023 t = build_call_expr (builtin_decl_explicit (start_fn),
4024 9, t0, t1, t2, sched_arg, t, t3, t4,
4025 reductions, mem);
4026 }
4027 else if (fd->ordered)
629b3d75
MJ
4028 t = build_call_expr (builtin_decl_explicit (start_fn),
4029 5, t0, t1, t, t3, t4);
4030 else
4031 t = build_call_expr (builtin_decl_explicit (start_fn),
4032 6, t0, t1, t2, t, t3, t4);
4033 }
4034 else if (fd->ordered)
4035 t = build_call_expr (builtin_decl_explicit (start_fn),
4036 4, t0, t1, t3, t4);
4037 else
4038 t = build_call_expr (builtin_decl_explicit (start_fn),
4039 5, t0, t1, t2, t3, t4);
4040 }
4041 else
4042 {
4043 tree t5;
4044 tree c_bool_type;
4045 tree bfn_decl;
4046
4047 /* The GOMP_loop_ull_*start functions have additional boolean
4048 argument, true for < loops and false for > loops.
4049 In Fortran, the C bool type can be different from
4050 boolean_type_node. */
4051 bfn_decl = builtin_decl_explicit (start_fn);
4052 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4053 t5 = build_int_cst (c_bool_type,
4054 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4055 if (fd->chunk_size)
4056 {
4057 tree bfn_decl = builtin_decl_explicit (start_fn);
4058 t = fold_convert (fd->iter_type, fd->chunk_size);
4059 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
4060 if (sched_arg)
4061 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4062 t, t3, t4, reductions, mem);
4063 else
4064 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
629b3d75
MJ
4065 }
4066 else
4067 t = build_call_expr (builtin_decl_explicit (start_fn),
4068 6, t5, t0, t1, t2, t3, t4);
4069 }
4070 }
4071 if (TREE_TYPE (t) != boolean_type_node)
4072 t = fold_build2 (NE_EXPR, boolean_type_node,
4073 t, build_int_cst (TREE_TYPE (t), 0));
4074 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 4075 true, GSI_SAME_STMT);
629b3d75
MJ
4076 if (arr && !TREE_STATIC (arr))
4077 {
25b45c7c 4078 tree clobber = build_clobber (TREE_TYPE (arr));
629b3d75
MJ
4079 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4080 GSI_SAME_STMT);
4081 }
8221c30b 4082 if (fd->have_pointer_condtemp)
6c7ae8c5 4083 expand_omp_build_assign (&gsi, condtemp, memv, false);
28567c40
JJ
4084 if (fd->have_reductemp)
4085 {
4086 gimple *g = gsi_stmt (gsi);
4087 gsi_remove (&gsi, true);
4088 release_ssa_name (gimple_assign_lhs (g));
4089
4090 entry_bb = region->entry;
4091 gsi = gsi_last_nondebug_bb (entry_bb);
4092
4093 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4094 }
629b3d75
MJ
4095 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4096
4097 /* Remove the GIMPLE_OMP_FOR statement. */
4098 gsi_remove (&gsi, true);
4099
4100 if (gsi_end_p (gsif))
4101 gsif = gsi_after_labels (gsi_bb (gsif));
4102 gsi_next (&gsif);
4103
4104 /* Iteration setup for sequential loop goes in L0_BB. */
4105 tree startvar = fd->loop.v;
4106 tree endvar = NULL_TREE;
4107
4108 if (gimple_omp_for_combined_p (fd->for_stmt))
4109 {
4110 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4111 && gimple_omp_for_kind (inner_stmt)
4112 == GF_OMP_FOR_KIND_SIMD);
4113 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4114 OMP_CLAUSE__LOOPTEMP_);
4115 gcc_assert (innerc);
4116 startvar = OMP_CLAUSE_DECL (innerc);
4117 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4118 OMP_CLAUSE__LOOPTEMP_);
4119 gcc_assert (innerc);
4120 endvar = OMP_CLAUSE_DECL (innerc);
4121 }
4122
4123 gsi = gsi_start_bb (l0_bb);
4124 t = istart0;
4125 if (fd->ordered && fd->collapse == 1)
4126 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4127 fold_convert (fd->iter_type, fd->loop.step));
4128 else if (bias)
4129 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4130 if (fd->ordered && fd->collapse == 1)
4131 {
4132 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4133 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4134 fd->loop.n1, fold_convert (sizetype, t));
4135 else
4136 {
4137 t = fold_convert (TREE_TYPE (startvar), t);
4138 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4139 fd->loop.n1, t);
4140 }
4141 }
4142 else
4143 {
4144 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4145 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4146 t = fold_convert (TREE_TYPE (startvar), t);
4147 }
4148 t = force_gimple_operand_gsi (&gsi, t,
4149 DECL_P (startvar)
4150 && TREE_ADDRESSABLE (startvar),
4151 NULL_TREE, false, GSI_CONTINUE_LINKING);
4152 assign_stmt = gimple_build_assign (startvar, t);
4153 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
4154 if (cond_var)
4155 {
4156 tree itype = TREE_TYPE (cond_var);
4157 /* For lastprivate(conditional:) itervar, we need some iteration
4158 counter that starts at unsigned non-zero and increases.
4159 Prefer as few IVs as possible, so if we can use startvar
4160 itself, use that, or startvar + constant (those would be
4161 incremented with step), and as last resort use the s0 + 1
4162 incremented by 1. */
4163 if ((fd->ordered && fd->collapse == 1)
4164 || bias
4165 || POINTER_TYPE_P (type)
4166 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4167 || fd->loop.cond_code != LT_EXPR)
4168 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4169 build_int_cst (itype, 1));
4170 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4171 t = fold_convert (itype, t);
4172 else
4173 {
4174 tree c = fold_convert (itype, fd->loop.n1);
4175 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4176 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4177 }
4178 t = force_gimple_operand_gsi (&gsi, t, false,
4179 NULL_TREE, false, GSI_CONTINUE_LINKING);
4180 assign_stmt = gimple_build_assign (cond_var, t);
4181 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4182 }
629b3d75
MJ
4183
4184 t = iend0;
4185 if (fd->ordered && fd->collapse == 1)
4186 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4187 fold_convert (fd->iter_type, fd->loop.step));
4188 else if (bias)
4189 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4190 if (fd->ordered && fd->collapse == 1)
4191 {
4192 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4193 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4194 fd->loop.n1, fold_convert (sizetype, t));
4195 else
4196 {
4197 t = fold_convert (TREE_TYPE (startvar), t);
4198 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4199 fd->loop.n1, t);
4200 }
4201 }
4202 else
4203 {
4204 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4205 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4206 t = fold_convert (TREE_TYPE (startvar), t);
4207 }
4208 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4209 false, GSI_CONTINUE_LINKING);
4210 if (endvar)
4211 {
4212 assign_stmt = gimple_build_assign (endvar, iend);
4213 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4214 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4215 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4216 else
4217 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4218 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4219 }
4220 /* Handle linear clause adjustments. */
4221 tree itercnt = NULL_TREE;
4222 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4223 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4224 c; c = OMP_CLAUSE_CHAIN (c))
4225 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4226 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4227 {
4228 tree d = OMP_CLAUSE_DECL (c);
4229 bool is_ref = omp_is_reference (d);
4230 tree t = d, a, dest;
4231 if (is_ref)
4232 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4233 tree type = TREE_TYPE (t);
4234 if (POINTER_TYPE_P (type))
4235 type = sizetype;
4236 dest = unshare_expr (t);
4237 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4238 expand_omp_build_assign (&gsif, v, t);
4239 if (itercnt == NULL_TREE)
4240 {
4241 itercnt = startvar;
4242 tree n1 = fd->loop.n1;
4243 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4244 {
4245 itercnt
4246 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4247 itercnt);
4248 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4249 }
4250 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4251 itercnt, n1);
4252 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4253 itercnt, fd->loop.step);
4254 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4255 NULL_TREE, false,
4256 GSI_CONTINUE_LINKING);
4257 }
4258 a = fold_build2 (MULT_EXPR, type,
4259 fold_convert (type, itercnt),
4260 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4261 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4262 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4263 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4264 false, GSI_CONTINUE_LINKING);
6fcc3cac 4265 expand_omp_build_assign (&gsi, dest, t, true);
629b3d75
MJ
4266 }
4267 if (fd->collapse > 1)
aed3ab25 4268 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
4269
4270 if (fd->ordered)
4271 {
4272 /* Until now, counts array contained number of iterations or
4273 variable containing it for ith loop. From now on, we need
4274 those counts only for collapsed loops, and only for the 2nd
4275 till the last collapsed one. Move those one element earlier,
4276 we'll use counts[fd->collapse - 1] for the first source/sink
4277 iteration counter and so on and counts[fd->ordered]
4278 as the array holding the current counter values for
4279 depend(source). */
4280 if (fd->collapse > 1)
4281 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4282 if (broken_loop)
4283 {
4284 int i;
4285 for (i = fd->collapse; i < fd->ordered; i++)
4286 {
4287 tree type = TREE_TYPE (fd->loops[i].v);
4288 tree this_cond
4289 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4290 fold_convert (type, fd->loops[i].n1),
4291 fold_convert (type, fd->loops[i].n2));
4292 if (!integer_onep (this_cond))
4293 break;
4294 }
4295 if (i < fd->ordered)
4296 {
4297 cont_bb
4298 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4299 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4300 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4301 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4302 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4303 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4304 make_edge (cont_bb, l1_bb, 0);
4305 l2_bb = create_empty_bb (cont_bb);
4306 broken_loop = false;
4307 }
4308 }
4309 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4310 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4311 ordered_lastprivate);
4312 if (counts[fd->collapse - 1])
4313 {
4314 gcc_assert (fd->collapse == 1);
4315 gsi = gsi_last_bb (l0_bb);
4316 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4317 istart0, true);
a2a17ae7
JJ
4318 if (cont_bb)
4319 {
4320 gsi = gsi_last_bb (cont_bb);
4321 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4322 counts[fd->collapse - 1],
4323 build_int_cst (fd->iter_type, 1));
4324 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4325 tree aref = build4 (ARRAY_REF, fd->iter_type,
4326 counts[fd->ordered], size_zero_node,
4327 NULL_TREE, NULL_TREE);
4328 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4329 }
629b3d75
MJ
4330 t = counts[fd->collapse - 1];
4331 }
4332 else if (fd->collapse > 1)
4333 t = fd->loop.v;
4334 else
4335 {
4336 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4337 fd->loops[0].v, fd->loops[0].n1);
4338 t = fold_convert (fd->iter_type, t);
4339 }
4340 gsi = gsi_last_bb (l0_bb);
4341 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4342 size_zero_node, NULL_TREE, NULL_TREE);
4343 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4344 false, GSI_CONTINUE_LINKING);
4345 expand_omp_build_assign (&gsi, aref, t, true);
4346 }
4347
4348 if (!broken_loop)
4349 {
4350 /* Code to control the increment and predicate for the sequential
4351 loop goes in the CONT_BB. */
65f4b875 4352 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4353 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4354 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4355 vmain = gimple_omp_continue_control_use (cont_stmt);
4356 vback = gimple_omp_continue_control_def (cont_stmt);
4357
7855700e
JJ
4358 if (cond_var)
4359 {
4360 tree itype = TREE_TYPE (cond_var);
4361 tree t2;
4362 if ((fd->ordered && fd->collapse == 1)
4363 || bias
4364 || POINTER_TYPE_P (type)
4365 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4366 || fd->loop.cond_code != LT_EXPR)
4367 t2 = build_int_cst (itype, 1);
4368 else
4369 t2 = fold_convert (itype, fd->loop.step);
4370 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4371 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4372 NULL_TREE, true, GSI_SAME_STMT);
4373 assign_stmt = gimple_build_assign (cond_var, t2);
4374 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4375 }
4376
629b3d75
MJ
4377 if (!gimple_omp_for_combined_p (fd->for_stmt))
4378 {
4379 if (POINTER_TYPE_P (type))
4380 t = fold_build_pointer_plus (vmain, fd->loop.step);
4381 else
4382 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4383 t = force_gimple_operand_gsi (&gsi, t,
4384 DECL_P (vback)
4385 && TREE_ADDRESSABLE (vback),
4386 NULL_TREE, true, GSI_SAME_STMT);
4387 assign_stmt = gimple_build_assign (vback, t);
4388 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4389
4390 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4391 {
d1ffbd43 4392 tree tem;
629b3d75 4393 if (fd->collapse > 1)
d1ffbd43 4394 tem = fd->loop.v;
629b3d75
MJ
4395 else
4396 {
d1ffbd43
JJ
4397 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4398 fd->loops[0].v, fd->loops[0].n1);
4399 tem = fold_convert (fd->iter_type, tem);
629b3d75
MJ
4400 }
4401 tree aref = build4 (ARRAY_REF, fd->iter_type,
4402 counts[fd->ordered], size_zero_node,
4403 NULL_TREE, NULL_TREE);
d1ffbd43
JJ
4404 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4405 true, GSI_SAME_STMT);
4406 expand_omp_build_assign (&gsi, aref, tem);
629b3d75
MJ
4407 }
4408
4409 t = build2 (fd->loop.cond_code, boolean_type_node,
4410 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4411 iend);
4412 gcond *cond_stmt = gimple_build_cond_empty (t);
4413 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4414 }
4415
4416 /* Remove GIMPLE_OMP_CONTINUE. */
4417 gsi_remove (&gsi, true);
4418
4419 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25 4420 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
629b3d75
MJ
4421
4422 /* Emit code to get the next parallel iteration in L2_BB. */
4423 gsi = gsi_start_bb (l2_bb);
4424
4425 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4426 build_fold_addr_expr (istart0),
4427 build_fold_addr_expr (iend0));
4428 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4429 false, GSI_CONTINUE_LINKING);
4430 if (TREE_TYPE (t) != boolean_type_node)
4431 t = fold_build2 (NE_EXPR, boolean_type_node,
4432 t, build_int_cst (TREE_TYPE (t), 0));
4433 gcond *cond_stmt = gimple_build_cond_empty (t);
4434 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4435 }
4436
4437 /* Add the loop cleanup function. */
65f4b875 4438 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4439 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4440 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4441 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4442 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4443 else
4444 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4445 gcall *call_stmt = gimple_build_call (t, 0);
629b3d75
MJ
4446 if (fd->ordered)
4447 {
4448 tree arr = counts[fd->ordered];
25b45c7c 4449 tree clobber = build_clobber (TREE_TYPE (arr));
629b3d75
MJ
4450 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4451 GSI_SAME_STMT);
4452 }
28567c40
JJ
4453 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4454 {
4455 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4456 if (fd->have_reductemp)
4457 {
4458 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4459 gimple_call_lhs (call_stmt));
4460 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4461 }
4462 }
4463 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
629b3d75
MJ
4464 gsi_remove (&gsi, true);
4465
4466 /* Connect the new blocks. */
4467 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4468 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4469
4470 if (!broken_loop)
4471 {
4472 gimple_seq phis;
4473
4474 e = find_edge (cont_bb, l3_bb);
4475 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4476
4477 phis = phi_nodes (l3_bb);
4478 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4479 {
4480 gimple *phi = gsi_stmt (gsi);
4481 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4482 PHI_ARG_DEF_FROM_EDGE (phi, e));
4483 }
4484 remove_edge (e);
4485
4486 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4487 e = find_edge (cont_bb, l1_bb);
4488 if (e == NULL)
4489 {
4490 e = BRANCH_EDGE (cont_bb);
4491 gcc_assert (single_succ (e->dest) == l1_bb);
4492 }
4493 if (gimple_omp_for_combined_p (fd->for_stmt))
4494 {
4495 remove_edge (e);
4496 e = NULL;
4497 }
4498 else if (fd->collapse > 1)
4499 {
4500 remove_edge (e);
4501 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4502 }
4503 else
4504 e->flags = EDGE_TRUE_VALUE;
4505 if (e)
4506 {
357067f2
JH
4507 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4508 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
4509 }
4510 else
4511 {
4512 e = find_edge (cont_bb, l2_bb);
4513 e->flags = EDGE_FALLTHRU;
4514 }
4515 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4516
4517 if (gimple_in_ssa_p (cfun))
4518 {
4519 /* Add phis to the outer loop that connect to the phis in the inner,
4520 original loop, and move the loop entry value of the inner phi to
4521 the loop entry value of the outer phi. */
4522 gphi_iterator psi;
4523 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4524 {
620e594b 4525 location_t locus;
629b3d75
MJ
4526 gphi *nphi;
4527 gphi *exit_phi = psi.phi ();
4528
164485b5
JJ
4529 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4530 continue;
4531
629b3d75
MJ
4532 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4533 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4534
4535 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4536 edge latch_to_l1 = find_edge (latch, l1_bb);
4537 gphi *inner_phi
4538 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4539
4540 tree t = gimple_phi_result (exit_phi);
4541 tree new_res = copy_ssa_name (t, NULL);
4542 nphi = create_phi_node (new_res, l0_bb);
4543
4544 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4545 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4546 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4547 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4548 add_phi_arg (nphi, t, entry_to_l0, locus);
4549
4550 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4551 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4552
4553 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
164485b5 4554 }
629b3d75
MJ
4555 }
4556
4557 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4558 recompute_dominator (CDI_DOMINATORS, l2_bb));
4559 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4560 recompute_dominator (CDI_DOMINATORS, l3_bb));
4561 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4562 recompute_dominator (CDI_DOMINATORS, l0_bb));
4563 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4564 recompute_dominator (CDI_DOMINATORS, l1_bb));
4565
4566 /* We enter expand_omp_for_generic with a loop. This original loop may
4567 have its own loop struct, or it may be part of an outer loop struct
4568 (which may be the fake loop). */
99b1c316 4569 class loop *outer_loop = entry_bb->loop_father;
629b3d75
MJ
4570 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4571
4572 add_bb_to_loop (l2_bb, outer_loop);
4573
4574 /* We've added a new loop around the original loop. Allocate the
4575 corresponding loop struct. */
99b1c316 4576 class loop *new_loop = alloc_loop ();
629b3d75
MJ
4577 new_loop->header = l0_bb;
4578 new_loop->latch = l2_bb;
4579 add_loop (new_loop, outer_loop);
4580
4581 /* Allocate a loop structure for the original loop unless we already
4582 had one. */
4583 if (!orig_loop_has_loop_struct
4584 && !gimple_omp_for_combined_p (fd->for_stmt))
4585 {
99b1c316 4586 class loop *orig_loop = alloc_loop ();
629b3d75
MJ
4587 orig_loop->header = l1_bb;
4588 /* The loop may have multiple latches. */
4589 add_loop (orig_loop, new_loop);
4590 }
4591 }
4592}
4593
2f6bb511
JJ
4594/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4595 compute needed allocation size. If !ALLOC of team allocations,
4596 if ALLOC of thread allocation. SZ is the initial needed size for
4597 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4598 CNT number of elements of each array, for !ALLOC this is
4599 omp_get_num_threads (), for ALLOC number of iterations handled by the
4600 current thread. If PTR is non-NULL, it is the start of the allocation
4601 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4602 clauses pointers to the corresponding arrays. */
4603
4604static tree
4605expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4606 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4607 gimple_stmt_iterator *gsi, bool alloc)
4608{
4609 tree eltsz = NULL_TREE;
4610 unsigned HOST_WIDE_INT preval = 0;
4611 if (ptr && sz)
4612 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4613 ptr, size_int (sz));
4614 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4615 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4616 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4617 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4618 {
4619 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4620 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4621 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4622 {
4623 unsigned HOST_WIDE_INT szl
4624 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4625 szl = least_bit_hwi (szl);
4626 if (szl)
4627 al = MIN (al, szl);
4628 }
4629 if (ptr == NULL_TREE)
4630 {
4631 if (eltsz == NULL_TREE)
4632 eltsz = TYPE_SIZE_UNIT (pointee_type);
4633 else
4634 eltsz = size_binop (PLUS_EXPR, eltsz,
4635 TYPE_SIZE_UNIT (pointee_type));
4636 }
4637 if (preval == 0 && al <= alloc_align)
4638 {
4639 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4640 sz += diff;
4641 if (diff && ptr)
4642 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4643 ptr, size_int (diff));
4644 }
4645 else if (al > preval)
4646 {
4647 if (ptr)
4648 {
4649 ptr = fold_convert (pointer_sized_int_node, ptr);
4650 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4651 build_int_cst (pointer_sized_int_node,
4652 al - 1));
4653 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4654 build_int_cst (pointer_sized_int_node,
4655 -(HOST_WIDE_INT) al));
4656 ptr = fold_convert (ptr_type_node, ptr);
4657 }
4658 else
4659 sz += al - 1;
4660 }
4661 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4662 preval = al;
4663 else
4664 preval = 1;
4665 if (ptr)
4666 {
4667 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4668 ptr = OMP_CLAUSE_DECL (c);
4669 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4670 size_binop (MULT_EXPR, cnt,
4671 TYPE_SIZE_UNIT (pointee_type)));
4672 }
4673 }
4674
4675 if (ptr == NULL_TREE)
4676 {
4677 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4678 if (sz)
4679 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4680 return eltsz;
4681 }
4682 else
4683 return ptr;
4684}
4685
14707c89
JJ
4686/* Return the last _looptemp_ clause if one has been created for
4687 lastprivate on distribute parallel for{, simd} or taskloop.
4688 FD is the loop data and INNERC should be the second _looptemp_
4689 clause (the one holding the end of the range).
4690 This is followed by collapse - 1 _looptemp_ clauses for the
4691 counts[1] and up, and for triangular loops followed by 4
4692 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4693 one factor and one adjn1). After this there is optionally one
4694 _looptemp_ clause that this function returns. */
4695
4696static tree
4697find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4698{
4699 gcc_assert (innerc);
4700 int count = fd->collapse - 1;
4701 if (fd->non_rect
4702 && fd->last_nonrect == fd->first_nonrect + 1
4703 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4704 count += 4;
4705 for (int i = 0; i < count; i++)
4706 {
4707 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4708 OMP_CLAUSE__LOOPTEMP_);
4709 gcc_assert (innerc);
4710 }
4711 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4712 OMP_CLAUSE__LOOPTEMP_);
4713}
4714
629b3d75
MJ
4715/* A subroutine of expand_omp_for. Generate code for a parallel
4716 loop with static schedule and no specified chunk size. Given
4717 parameters:
4718
4719 for (V = N1; V cond N2; V += STEP) BODY;
4720
4721 where COND is "<" or ">", we generate pseudocode
4722
4723 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4724 if (cond is <)
4725 adj = STEP - 1;
4726 else
4727 adj = STEP + 1;
4728 if ((__typeof (V)) -1 > 0 && cond is >)
4729 n = -(adj + N2 - N1) / -STEP;
4730 else
4731 n = (adj + N2 - N1) / STEP;
4732 q = n / nthreads;
4733 tt = n % nthreads;
4734 if (threadid < tt) goto L3; else goto L4;
4735 L3:
4736 tt = 0;
4737 q = q + 1;
4738 L4:
4739 s0 = q * threadid + tt;
4740 e0 = s0 + q;
4741 V = s0 * STEP + N1;
4742 if (s0 >= e0) goto L2; else goto L0;
4743 L0:
4744 e = e0 * STEP + N1;
4745 L1:
4746 BODY;
4747 V += STEP;
4748 if (V cond e) goto L1;
4749 L2:
4750*/
4751
4752static void
4753expand_omp_for_static_nochunk (struct omp_region *region,
4754 struct omp_for_data *fd,
4755 gimple *inner_stmt)
4756{
2f6bb511 4757 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
629b3d75
MJ
4758 tree type, itype, vmain, vback;
4759 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4760 basic_block body_bb, cont_bb, collapse_bb = NULL;
2f6bb511
JJ
4761 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4762 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
6c7ae8c5 4763 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
4764 edge ep;
4765 bool broken_loop = region->cont == NULL;
4766 tree *counts = NULL;
4767 tree n1, n2, step;
28567c40 4768 tree reductions = NULL_TREE;
8221c30b 4769 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
4770
4771 itype = type = TREE_TYPE (fd->loop.v);
4772 if (POINTER_TYPE_P (type))
4773 itype = signed_type_for (type);
4774
4775 entry_bb = region->entry;
4776 cont_bb = region->cont;
4777 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4778 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4779 gcc_assert (broken_loop
4780 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4781 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4782 body_bb = single_succ (seq_start_bb);
4783 if (!broken_loop)
4784 {
4785 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4786 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4787 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4788 }
4789 exit_bb = region->exit;
4790
4791 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 4792 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 4793 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
4794 gsip = gsi;
4795 gsi_prev (&gsip);
629b3d75
MJ
4796
4797 if (fd->collapse > 1)
4798 {
4799 int first_zero_iter = -1, dummy = -1;
4800 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4801
4802 counts = XALLOCAVEC (tree, fd->collapse);
4803 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4804 fin_bb, first_zero_iter,
4805 dummy_bb, dummy, l2_dom_bb);
4806 t = NULL_TREE;
4807 }
4808 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4809 t = integer_one_node;
4810 else
4811 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4812 fold_convert (type, fd->loop.n1),
4813 fold_convert (type, fd->loop.n2));
4814 if (fd->collapse == 1
4815 && TYPE_UNSIGNED (type)
4816 && (t == NULL_TREE || !integer_onep (t)))
4817 {
4818 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4819 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4820 true, GSI_SAME_STMT);
4821 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4822 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4823 true, GSI_SAME_STMT);
4824 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
6c7ae8c5 4825 NULL_TREE, NULL_TREE);
629b3d75
MJ
4826 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4827 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4828 expand_omp_regimplify_p, NULL, NULL)
4829 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4830 expand_omp_regimplify_p, NULL, NULL))
4831 {
4832 gsi = gsi_for_stmt (cond_stmt);
4833 gimple_regimplify_operands (cond_stmt, &gsi);
4834 }
4835 ep = split_block (entry_bb, cond_stmt);
4836 ep->flags = EDGE_TRUE_VALUE;
4837 entry_bb = ep->dest;
357067f2 4838 ep->probability = profile_probability::very_likely ();
629b3d75 4839 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 4840 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
4841 if (gimple_in_ssa_p (cfun))
4842 {
4843 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4844 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4845 !gsi_end_p (gpi); gsi_next (&gpi))
4846 {
4847 gphi *phi = gpi.phi ();
4848 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4849 ep, UNKNOWN_LOCATION);
4850 }
4851 }
4852 gsi = gsi_last_bb (entry_bb);
4853 }
4854
8221c30b
JJ
4855 if (fd->lastprivate_conditional)
4856 {
4857 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4858 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4859 if (fd->have_pointer_condtemp)
4860 condtemp = OMP_CLAUSE_DECL (c);
4861 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4862 cond_var = OMP_CLAUSE_DECL (c);
4863 }
2f6bb511 4864 if (fd->have_reductemp
2f03073f
JJ
4865 /* For scan, we don't want to reinitialize condtemp before the
4866 second loop. */
4867 || (fd->have_pointer_condtemp && !fd->have_scantemp)
2f6bb511 4868 || fd->have_nonctrl_scantemp)
28567c40
JJ
4869 {
4870 tree t1 = build_int_cst (long_integer_type_node, 0);
4871 tree t2 = build_int_cst (long_integer_type_node, 1);
4872 tree t3 = build_int_cstu (long_integer_type_node,
4873 (HOST_WIDE_INT_1U << 31) + 1);
4874 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
4875 gimple_stmt_iterator gsi2 = gsi_none ();
4876 gimple *g = NULL;
4877 tree mem = null_pointer_node, memv = NULL_TREE;
2f6bb511
JJ
4878 unsigned HOST_WIDE_INT condtemp_sz = 0;
4879 unsigned HOST_WIDE_INT alloc_align = 0;
6c7ae8c5
JJ
4880 if (fd->have_reductemp)
4881 {
2f6bb511 4882 gcc_assert (!fd->have_nonctrl_scantemp);
6c7ae8c5
JJ
4883 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4884 reductions = OMP_CLAUSE_DECL (c);
4885 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4886 g = SSA_NAME_DEF_STMT (reductions);
4887 reductions = gimple_assign_rhs1 (g);
4888 OMP_CLAUSE_DECL (c) = reductions;
4889 gsi2 = gsi_for_stmt (g);
4890 }
4891 else
4892 {
4893 if (gsi_end_p (gsip))
4894 gsi2 = gsi_after_labels (region->entry);
4895 else
4896 gsi2 = gsip;
4897 reductions = null_pointer_node;
4898 }
2f6bb511 4899 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
6c7ae8c5 4900 {
2f6bb511
JJ
4901 tree type;
4902 if (fd->have_pointer_condtemp)
4903 type = TREE_TYPE (condtemp);
4904 else
4905 type = ptr_type_node;
6c7ae8c5
JJ
4906 memv = create_tmp_var (type);
4907 TREE_ADDRESSABLE (memv) = 1;
2f6bb511
JJ
4908 unsigned HOST_WIDE_INT sz = 0;
4909 tree size = NULL_TREE;
4910 if (fd->have_pointer_condtemp)
4911 {
4912 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4913 sz *= fd->lastprivate_conditional;
4914 condtemp_sz = sz;
4915 }
4916 if (fd->have_nonctrl_scantemp)
4917 {
4918 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4919 gimple *g = gimple_build_call (nthreads, 0);
4920 nthreads = create_tmp_var (integer_type_node);
4921 gimple_call_set_lhs (g, nthreads);
4922 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4923 nthreads = fold_convert (sizetype, nthreads);
4924 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4925 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4926 alloc_align, nthreads, NULL,
4927 false);
4928 size = fold_convert (type, size);
4929 }
4930 else
4931 size = build_int_cst (type, sz);
4932 expand_omp_build_assign (&gsi2, memv, size, false);
6c7ae8c5
JJ
4933 mem = build_fold_addr_expr (memv);
4934 }
28567c40
JJ
4935 tree t
4936 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4937 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 4938 null_pointer_node, reductions, mem);
28567c40
JJ
4939 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4940 true, GSI_SAME_STMT);
8221c30b 4941 if (fd->have_pointer_condtemp)
6c7ae8c5 4942 expand_omp_build_assign (&gsi2, condtemp, memv, false);
2f6bb511
JJ
4943 if (fd->have_nonctrl_scantemp)
4944 {
4945 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4946 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4947 alloc_align, nthreads, &gsi2, false);
4948 }
6c7ae8c5
JJ
4949 if (fd->have_reductemp)
4950 {
4951 gsi_remove (&gsi2, true);
4952 release_ssa_name (gimple_assign_lhs (g));
4953 }
28567c40 4954 }
629b3d75
MJ
4955 switch (gimple_omp_for_kind (fd->for_stmt))
4956 {
4957 case GF_OMP_FOR_KIND_FOR:
4958 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4959 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4960 break;
4961 case GF_OMP_FOR_KIND_DISTRIBUTE:
4962 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4963 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4964 break;
4965 default:
4966 gcc_unreachable ();
4967 }
4968 nthreads = build_call_expr (nthreads, 0);
4969 nthreads = fold_convert (itype, nthreads);
4970 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4971 true, GSI_SAME_STMT);
4972 threadid = build_call_expr (threadid, 0);
4973 threadid = fold_convert (itype, threadid);
4974 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4975 true, GSI_SAME_STMT);
4976
4977 n1 = fd->loop.n1;
4978 n2 = fd->loop.n2;
4979 step = fd->loop.step;
4980 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4981 {
4982 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4983 OMP_CLAUSE__LOOPTEMP_);
4984 gcc_assert (innerc);
4985 n1 = OMP_CLAUSE_DECL (innerc);
4986 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4987 OMP_CLAUSE__LOOPTEMP_);
4988 gcc_assert (innerc);
4989 n2 = OMP_CLAUSE_DECL (innerc);
4990 }
4991 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4992 true, NULL_TREE, true, GSI_SAME_STMT);
4993 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4994 true, NULL_TREE, true, GSI_SAME_STMT);
4995 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4996 true, NULL_TREE, true, GSI_SAME_STMT);
4997
4998 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4999 t = fold_build2 (PLUS_EXPR, itype, step, t);
5000 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5001 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5002 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5003 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5004 fold_build1 (NEGATE_EXPR, itype, t),
5005 fold_build1 (NEGATE_EXPR, itype, step));
5006 else
5007 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5008 t = fold_convert (itype, t);
5009 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5010
5011 q = create_tmp_reg (itype, "q");
5012 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5013 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5014 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5015
5016 tt = create_tmp_reg (itype, "tt");
5017 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5018 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5019 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5020
5021 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5022 gcond *cond_stmt = gimple_build_cond_empty (t);
5023 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5024
5025 second_bb = split_block (entry_bb, cond_stmt)->dest;
65f4b875 5026 gsi = gsi_last_nondebug_bb (second_bb);
629b3d75
MJ
5027 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5028
5029 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5030 GSI_SAME_STMT);
5031 gassign *assign_stmt
5032 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5033 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5034
5035 third_bb = split_block (second_bb, assign_stmt)->dest;
65f4b875 5036 gsi = gsi_last_nondebug_bb (third_bb);
629b3d75
MJ
5037 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5038
2f6bb511
JJ
5039 if (fd->have_nonctrl_scantemp)
5040 {
5041 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5042 tree controlp = NULL_TREE, controlb = NULL_TREE;
5043 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5044 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5045 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5046 {
5047 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5048 controlb = OMP_CLAUSE_DECL (c);
5049 else
5050 controlp = OMP_CLAUSE_DECL (c);
5051 if (controlb && controlp)
5052 break;
5053 }
5054 gcc_assert (controlp && controlb);
5055 tree cnt = create_tmp_var (sizetype);
5056 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5057 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5058 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5059 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5060 alloc_align, cnt, NULL, true);
5061 tree size = create_tmp_var (sizetype);
5062 expand_omp_build_assign (&gsi, size, sz, false);
5063 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5064 size, size_int (16384));
5065 expand_omp_build_assign (&gsi, controlb, cmp);
5066 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5067 NULL_TREE, NULL_TREE);
5068 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5069 fourth_bb = split_block (third_bb, g)->dest;
5070 gsi = gsi_last_nondebug_bb (fourth_bb);
5071 /* FIXME: Once we have allocators, this should use allocator. */
5072 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5073 gimple_call_set_lhs (g, controlp);
5074 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5075 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5076 &gsi, true);
5077 gsi_prev (&gsi);
5078 g = gsi_stmt (gsi);
5079 fifth_bb = split_block (fourth_bb, g)->dest;
5080 gsi = gsi_last_nondebug_bb (fifth_bb);
5081
5082 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5083 gimple_call_set_lhs (g, controlp);
5084 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5085 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5086 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5087 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5088 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5089 {
5090 tree tmp = create_tmp_var (sizetype);
5091 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5092 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5093 TYPE_SIZE_UNIT (pointee_type));
5094 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5095 g = gimple_build_call (alloca_decl, 2, tmp,
5096 size_int (TYPE_ALIGN (pointee_type)));
5097 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5098 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5099 }
5100
5101 sixth_bb = split_block (fifth_bb, g)->dest;
5102 gsi = gsi_last_nondebug_bb (sixth_bb);
5103 }
5104
629b3d75
MJ
5105 t = build2 (MULT_EXPR, itype, q, threadid);
5106 t = build2 (PLUS_EXPR, itype, t, tt);
5107 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5108
5109 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5110 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5111
5112 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5113 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5114
5115 /* Remove the GIMPLE_OMP_FOR statement. */
5116 gsi_remove (&gsi, true);
5117
5118 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5119 gsi = gsi_start_bb (seq_start_bb);
5120
5121 tree startvar = fd->loop.v;
5122 tree endvar = NULL_TREE;
5123
5124 if (gimple_omp_for_combined_p (fd->for_stmt))
5125 {
5126 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5127 ? gimple_omp_parallel_clauses (inner_stmt)
5128 : gimple_omp_for_clauses (inner_stmt);
5129 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5130 gcc_assert (innerc);
5131 startvar = OMP_CLAUSE_DECL (innerc);
5132 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5133 OMP_CLAUSE__LOOPTEMP_);
5134 gcc_assert (innerc);
5135 endvar = OMP_CLAUSE_DECL (innerc);
5136 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5137 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5138 {
14707c89 5139 innerc = find_lastprivate_looptemp (fd, innerc);
629b3d75
MJ
5140 if (innerc)
5141 {
5142 /* If needed (distribute parallel for with lastprivate),
5143 propagate down the total number of iterations. */
5144 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5145 fd->loop.n2);
5146 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5147 GSI_CONTINUE_LINKING);
5148 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5149 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5150 }
5151 }
5152 }
5153 t = fold_convert (itype, s0);
5154 t = fold_build2 (MULT_EXPR, itype, t, step);
5155 if (POINTER_TYPE_P (type))
bde84d51
RB
5156 {
5157 t = fold_build_pointer_plus (n1, t);
5158 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5159 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5160 t = fold_convert (signed_type_for (type), t);
5161 }
629b3d75
MJ
5162 else
5163 t = fold_build2 (PLUS_EXPR, type, t, n1);
5164 t = fold_convert (TREE_TYPE (startvar), t);
5165 t = force_gimple_operand_gsi (&gsi, t,
5166 DECL_P (startvar)
5167 && TREE_ADDRESSABLE (startvar),
5168 NULL_TREE, false, GSI_CONTINUE_LINKING);
5169 assign_stmt = gimple_build_assign (startvar, t);
5170 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
5171 if (cond_var)
5172 {
5173 tree itype = TREE_TYPE (cond_var);
5174 /* For lastprivate(conditional:) itervar, we need some iteration
5175 counter that starts at unsigned non-zero and increases.
5176 Prefer as few IVs as possible, so if we can use startvar
5177 itself, use that, or startvar + constant (those would be
5178 incremented with step), and as last resort use the s0 + 1
5179 incremented by 1. */
5180 if (POINTER_TYPE_P (type)
5181 || TREE_CODE (n1) != INTEGER_CST
5182 || fd->loop.cond_code != LT_EXPR)
5183 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5184 build_int_cst (itype, 1));
5185 else if (tree_int_cst_sgn (n1) == 1)
5186 t = fold_convert (itype, t);
5187 else
5188 {
5189 tree c = fold_convert (itype, n1);
5190 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5191 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5192 }
5193 t = force_gimple_operand_gsi (&gsi, t, false,
5194 NULL_TREE, false, GSI_CONTINUE_LINKING);
5195 assign_stmt = gimple_build_assign (cond_var, t);
5196 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5197 }
629b3d75
MJ
5198
5199 t = fold_convert (itype, e0);
5200 t = fold_build2 (MULT_EXPR, itype, t, step);
5201 if (POINTER_TYPE_P (type))
bde84d51
RB
5202 {
5203 t = fold_build_pointer_plus (n1, t);
5204 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5205 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5206 t = fold_convert (signed_type_for (type), t);
5207 }
629b3d75
MJ
5208 else
5209 t = fold_build2 (PLUS_EXPR, type, t, n1);
5210 t = fold_convert (TREE_TYPE (startvar), t);
5211 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5212 false, GSI_CONTINUE_LINKING);
5213 if (endvar)
5214 {
5215 assign_stmt = gimple_build_assign (endvar, e);
5216 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5217 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5218 assign_stmt = gimple_build_assign (fd->loop.v, e);
5219 else
5220 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5221 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5222 }
5223 /* Handle linear clause adjustments. */
5224 tree itercnt = NULL_TREE;
aed3ab25 5225 tree *nonrect_bounds = NULL;
629b3d75
MJ
5226 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5227 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5228 c; c = OMP_CLAUSE_CHAIN (c))
5229 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5230 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5231 {
5232 tree d = OMP_CLAUSE_DECL (c);
5233 bool is_ref = omp_is_reference (d);
5234 tree t = d, a, dest;
5235 if (is_ref)
5236 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5237 if (itercnt == NULL_TREE)
5238 {
5239 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5240 {
5241 itercnt = fold_build2 (MINUS_EXPR, itype,
5242 fold_convert (itype, n1),
5243 fold_convert (itype, fd->loop.n1));
5244 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5245 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5246 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5247 NULL_TREE, false,
5248 GSI_CONTINUE_LINKING);
5249 }
5250 else
5251 itercnt = s0;
5252 }
5253 tree type = TREE_TYPE (t);
5254 if (POINTER_TYPE_P (type))
5255 type = sizetype;
5256 a = fold_build2 (MULT_EXPR, type,
5257 fold_convert (type, itercnt),
5258 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5259 dest = unshare_expr (t);
5260 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5261 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5262 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5263 false, GSI_CONTINUE_LINKING);
6fcc3cac 5264 expand_omp_build_assign (&gsi, dest, t, true);
629b3d75
MJ
5265 }
5266 if (fd->collapse > 1)
aed3ab25
JJ
5267 {
5268 if (fd->non_rect)
5269 {
5270 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5271 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5272 }
5273 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5274 startvar);
5275 }
629b3d75
MJ
5276
5277 if (!broken_loop)
5278 {
5279 /* The code controlling the sequential loop replaces the
5280 GIMPLE_OMP_CONTINUE. */
65f4b875 5281 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5282 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5283 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5284 vmain = gimple_omp_continue_control_use (cont_stmt);
5285 vback = gimple_omp_continue_control_def (cont_stmt);
5286
7855700e
JJ
5287 if (cond_var)
5288 {
5289 tree itype = TREE_TYPE (cond_var);
5290 tree t2;
5291 if (POINTER_TYPE_P (type)
5292 || TREE_CODE (n1) != INTEGER_CST
5293 || fd->loop.cond_code != LT_EXPR)
5294 t2 = build_int_cst (itype, 1);
5295 else
5296 t2 = fold_convert (itype, step);
5297 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5298 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5299 NULL_TREE, true, GSI_SAME_STMT);
5300 assign_stmt = gimple_build_assign (cond_var, t2);
5301 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5302 }
5303
629b3d75
MJ
5304 if (!gimple_omp_for_combined_p (fd->for_stmt))
5305 {
5306 if (POINTER_TYPE_P (type))
5307 t = fold_build_pointer_plus (vmain, step);
5308 else
5309 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5310 t = force_gimple_operand_gsi (&gsi, t,
5311 DECL_P (vback)
5312 && TREE_ADDRESSABLE (vback),
5313 NULL_TREE, true, GSI_SAME_STMT);
5314 assign_stmt = gimple_build_assign (vback, t);
5315 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5316
5317 t = build2 (fd->loop.cond_code, boolean_type_node,
5318 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5319 ? t : vback, e);
5320 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5321 }
5322
5323 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5324 gsi_remove (&gsi, true);
5325
5326 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25
JJ
5327 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5328 cont_bb, body_bb);
629b3d75
MJ
5329 }
5330
5331 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 5332 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5333 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5334 {
5335 t = gimple_omp_return_lhs (gsi_stmt (gsi));
2f6bb511
JJ
5336 if (fd->have_reductemp
5337 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5338 && !fd->have_nonctrl_scantemp))
28567c40
JJ
5339 {
5340 tree fn;
5341 if (t)
5342 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5343 else
5344 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5345 gcall *g = gimple_build_call (fn, 0);
5346 if (t)
5347 {
5348 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
5349 if (fd->have_reductemp)
5350 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5351 NOP_EXPR, t),
5352 GSI_SAME_STMT);
28567c40
JJ
5353 }
5354 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5355 }
5356 else
5357 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 5358 }
2f03073f
JJ
5359 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5360 && !fd->have_nonctrl_scantemp)
1a39b3d3
JJ
5361 {
5362 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5363 gcall *g = gimple_build_call (fn, 0);
5364 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5365 }
2f6bb511
JJ
5366 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5367 {
5368 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5369 tree controlp = NULL_TREE, controlb = NULL_TREE;
5370 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5371 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5372 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5373 {
5374 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5375 controlb = OMP_CLAUSE_DECL (c);
5376 else
5377 controlp = OMP_CLAUSE_DECL (c);
5378 if (controlb && controlp)
5379 break;
5380 }
5381 gcc_assert (controlp && controlb);
5382 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5383 NULL_TREE, NULL_TREE);
5384 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5385 exit1_bb = split_block (exit_bb, g)->dest;
5386 gsi = gsi_after_labels (exit1_bb);
5387 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5388 controlp);
5389 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5390 exit2_bb = split_block (exit1_bb, g)->dest;
5391 gsi = gsi_after_labels (exit2_bb);
5392 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5393 controlp);
5394 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5395 exit3_bb = split_block (exit2_bb, g)->dest;
5396 gsi = gsi_after_labels (exit3_bb);
5397 }
629b3d75
MJ
5398 gsi_remove (&gsi, true);
5399
5400 /* Connect all the blocks. */
5401 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 5402 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
5403 ep = find_edge (entry_bb, second_bb);
5404 ep->flags = EDGE_TRUE_VALUE;
357067f2 5405 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
2f6bb511
JJ
5406 if (fourth_bb)
5407 {
5408 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5409 ep->probability
5410 = profile_probability::guessed_always ().apply_scale (1, 2);
5411 ep = find_edge (third_bb, fourth_bb);
5412 ep->flags = EDGE_TRUE_VALUE;
5413 ep->probability
5414 = profile_probability::guessed_always ().apply_scale (1, 2);
5415 ep = find_edge (fourth_bb, fifth_bb);
5416 redirect_edge_and_branch (ep, sixth_bb);
5417 }
5418 else
5419 sixth_bb = third_bb;
5420 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5421 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5422 if (exit1_bb)
5423 {
5424 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5425 ep->probability
5426 = profile_probability::guessed_always ().apply_scale (1, 2);
5427 ep = find_edge (exit_bb, exit1_bb);
5428 ep->flags = EDGE_TRUE_VALUE;
5429 ep->probability
5430 = profile_probability::guessed_always ().apply_scale (1, 2);
5431 ep = find_edge (exit1_bb, exit2_bb);
5432 redirect_edge_and_branch (ep, exit3_bb);
5433 }
629b3d75
MJ
5434
5435 if (!broken_loop)
5436 {
5437 ep = find_edge (cont_bb, body_bb);
5438 if (ep == NULL)
5439 {
5440 ep = BRANCH_EDGE (cont_bb);
5441 gcc_assert (single_succ (ep->dest) == body_bb);
5442 }
5443 if (gimple_omp_for_combined_p (fd->for_stmt))
5444 {
5445 remove_edge (ep);
5446 ep = NULL;
5447 }
5448 else if (fd->collapse > 1)
5449 {
5450 remove_edge (ep);
5451 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5452 }
5453 else
5454 ep->flags = EDGE_TRUE_VALUE;
5455 find_edge (cont_bb, fin_bb)->flags
5456 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5457 }
5458
5459 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5460 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
2f6bb511
JJ
5461 if (fourth_bb)
5462 {
5463 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5464 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5465 }
5466 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
629b3d75
MJ
5467
5468 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5469 recompute_dominator (CDI_DOMINATORS, body_bb));
5470 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5471 recompute_dominator (CDI_DOMINATORS, fin_bb));
2f6bb511
JJ
5472 if (exit1_bb)
5473 {
5474 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5475 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5476 }
629b3d75 5477
99b1c316 5478 class loop *loop = body_bb->loop_father;
629b3d75
MJ
5479 if (loop != entry_bb->loop_father)
5480 {
5481 gcc_assert (broken_loop || loop->header == body_bb);
5482 gcc_assert (broken_loop
5483 || loop->latch == region->cont
5484 || single_pred (loop->latch) == region->cont);
5485 return;
5486 }
5487
5488 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5489 {
5490 loop = alloc_loop ();
5491 loop->header = body_bb;
5492 if (collapse_bb == NULL)
5493 loop->latch = cont_bb;
5494 add_loop (loop, body_bb->loop_father);
5495 }
5496}
5497
5498/* Return phi in E->DEST with ARG on edge E. */
5499
5500static gphi *
5501find_phi_with_arg_on_edge (tree arg, edge e)
5502{
5503 basic_block bb = e->dest;
5504
5505 for (gphi_iterator gpi = gsi_start_phis (bb);
5506 !gsi_end_p (gpi);
5507 gsi_next (&gpi))
5508 {
5509 gphi *phi = gpi.phi ();
5510 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5511 return phi;
5512 }
5513
5514 return NULL;
5515}
5516
5517/* A subroutine of expand_omp_for. Generate code for a parallel
5518 loop with static schedule and a specified chunk size. Given
5519 parameters:
5520
5521 for (V = N1; V cond N2; V += STEP) BODY;
5522
5523 where COND is "<" or ">", we generate pseudocode
5524
5525 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5526 if (cond is <)
5527 adj = STEP - 1;
5528 else
5529 adj = STEP + 1;
5530 if ((__typeof (V)) -1 > 0 && cond is >)
5531 n = -(adj + N2 - N1) / -STEP;
5532 else
5533 n = (adj + N2 - N1) / STEP;
5534 trip = 0;
5535 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5536 here so that V is defined
5537 if the loop is not entered
5538 L0:
5539 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 5540 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
5541 if (s0 < n) goto L1; else goto L4;
5542 L1:
5543 V = s0 * STEP + N1;
5544 e = e0 * STEP + N1;
5545 L2:
5546 BODY;
5547 V += STEP;
5548 if (V cond e) goto L2; else goto L3;
5549 L3:
5550 trip += 1;
5551 goto L0;
5552 L4:
5553*/
5554
5555static void
5556expand_omp_for_static_chunk (struct omp_region *region,
5557 struct omp_for_data *fd, gimple *inner_stmt)
5558{
5559 tree n, s0, e0, e, t;
5560 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5561 tree type, itype, vmain, vback, vextra;
5562 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5563 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
6c7ae8c5 5564 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
5565 edge se;
5566 bool broken_loop = region->cont == NULL;
5567 tree *counts = NULL;
5568 tree n1, n2, step;
28567c40 5569 tree reductions = NULL_TREE;
8221c30b 5570 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
5571
5572 itype = type = TREE_TYPE (fd->loop.v);
5573 if (POINTER_TYPE_P (type))
5574 itype = signed_type_for (type);
5575
5576 entry_bb = region->entry;
5577 se = split_block (entry_bb, last_stmt (entry_bb));
5578 entry_bb = se->src;
5579 iter_part_bb = se->dest;
5580 cont_bb = region->cont;
5581 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5582 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5583 gcc_assert (broken_loop
5584 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5585 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5586 body_bb = single_succ (seq_start_bb);
5587 if (!broken_loop)
5588 {
5589 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5590 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5591 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5592 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5593 }
5594 exit_bb = region->exit;
5595
5596 /* Trip and adjustment setup goes in ENTRY_BB. */
65f4b875 5597 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 5598 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
5599 gsip = gsi;
5600 gsi_prev (&gsip);
629b3d75
MJ
5601
5602 if (fd->collapse > 1)
5603 {
5604 int first_zero_iter = -1, dummy = -1;
5605 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5606
5607 counts = XALLOCAVEC (tree, fd->collapse);
5608 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5609 fin_bb, first_zero_iter,
5610 dummy_bb, dummy, l2_dom_bb);
5611 t = NULL_TREE;
5612 }
5613 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5614 t = integer_one_node;
5615 else
5616 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5617 fold_convert (type, fd->loop.n1),
5618 fold_convert (type, fd->loop.n2));
5619 if (fd->collapse == 1
5620 && TYPE_UNSIGNED (type)
5621 && (t == NULL_TREE || !integer_onep (t)))
5622 {
5623 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5624 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5625 true, GSI_SAME_STMT);
5626 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5627 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5628 true, GSI_SAME_STMT);
5629 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5630 NULL_TREE, NULL_TREE);
5631 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5632 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5633 expand_omp_regimplify_p, NULL, NULL)
5634 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5635 expand_omp_regimplify_p, NULL, NULL))
5636 {
5637 gsi = gsi_for_stmt (cond_stmt);
5638 gimple_regimplify_operands (cond_stmt, &gsi);
5639 }
5640 se = split_block (entry_bb, cond_stmt);
5641 se->flags = EDGE_TRUE_VALUE;
5642 entry_bb = se->dest;
357067f2 5643 se->probability = profile_probability::very_likely ();
629b3d75 5644 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 5645 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
5646 if (gimple_in_ssa_p (cfun))
5647 {
5648 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5649 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5650 !gsi_end_p (gpi); gsi_next (&gpi))
5651 {
5652 gphi *phi = gpi.phi ();
5653 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5654 se, UNKNOWN_LOCATION);
5655 }
5656 }
5657 gsi = gsi_last_bb (entry_bb);
5658 }
5659
8221c30b
JJ
5660 if (fd->lastprivate_conditional)
5661 {
5662 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5663 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5664 if (fd->have_pointer_condtemp)
5665 condtemp = OMP_CLAUSE_DECL (c);
5666 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5667 cond_var = OMP_CLAUSE_DECL (c);
5668 }
5669 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
5670 {
5671 tree t1 = build_int_cst (long_integer_type_node, 0);
5672 tree t2 = build_int_cst (long_integer_type_node, 1);
5673 tree t3 = build_int_cstu (long_integer_type_node,
5674 (HOST_WIDE_INT_1U << 31) + 1);
5675 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
5676 gimple_stmt_iterator gsi2 = gsi_none ();
5677 gimple *g = NULL;
5678 tree mem = null_pointer_node, memv = NULL_TREE;
6c7ae8c5
JJ
5679 if (fd->have_reductemp)
5680 {
5681 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5682 reductions = OMP_CLAUSE_DECL (c);
5683 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5684 g = SSA_NAME_DEF_STMT (reductions);
5685 reductions = gimple_assign_rhs1 (g);
5686 OMP_CLAUSE_DECL (c) = reductions;
5687 gsi2 = gsi_for_stmt (g);
5688 }
5689 else
5690 {
5691 if (gsi_end_p (gsip))
5692 gsi2 = gsi_after_labels (region->entry);
5693 else
5694 gsi2 = gsip;
5695 reductions = null_pointer_node;
5696 }
8221c30b 5697 if (fd->have_pointer_condtemp)
6c7ae8c5 5698 {
6c7ae8c5
JJ
5699 tree type = TREE_TYPE (condtemp);
5700 memv = create_tmp_var (type);
5701 TREE_ADDRESSABLE (memv) = 1;
5702 unsigned HOST_WIDE_INT sz
5703 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5704 sz *= fd->lastprivate_conditional;
5705 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5706 false);
5707 mem = build_fold_addr_expr (memv);
5708 }
28567c40
JJ
5709 tree t
5710 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5711 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 5712 null_pointer_node, reductions, mem);
28567c40
JJ
5713 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5714 true, GSI_SAME_STMT);
8221c30b 5715 if (fd->have_pointer_condtemp)
6c7ae8c5
JJ
5716 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5717 if (fd->have_reductemp)
5718 {
5719 gsi_remove (&gsi2, true);
5720 release_ssa_name (gimple_assign_lhs (g));
5721 }
28567c40 5722 }
629b3d75
MJ
5723 switch (gimple_omp_for_kind (fd->for_stmt))
5724 {
5725 case GF_OMP_FOR_KIND_FOR:
5726 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5727 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5728 break;
5729 case GF_OMP_FOR_KIND_DISTRIBUTE:
5730 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5731 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5732 break;
5733 default:
5734 gcc_unreachable ();
5735 }
5736 nthreads = build_call_expr (nthreads, 0);
5737 nthreads = fold_convert (itype, nthreads);
5738 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5739 true, GSI_SAME_STMT);
5740 threadid = build_call_expr (threadid, 0);
5741 threadid = fold_convert (itype, threadid);
5742 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5743 true, GSI_SAME_STMT);
5744
5745 n1 = fd->loop.n1;
5746 n2 = fd->loop.n2;
5747 step = fd->loop.step;
5748 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5749 {
5750 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5751 OMP_CLAUSE__LOOPTEMP_);
5752 gcc_assert (innerc);
5753 n1 = OMP_CLAUSE_DECL (innerc);
5754 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5755 OMP_CLAUSE__LOOPTEMP_);
5756 gcc_assert (innerc);
5757 n2 = OMP_CLAUSE_DECL (innerc);
5758 }
5759 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5760 true, NULL_TREE, true, GSI_SAME_STMT);
5761 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5762 true, NULL_TREE, true, GSI_SAME_STMT);
5763 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5764 true, NULL_TREE, true, GSI_SAME_STMT);
5765 tree chunk_size = fold_convert (itype, fd->chunk_size);
5766 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5767 chunk_size
5768 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5769 GSI_SAME_STMT);
5770
5771 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5772 t = fold_build2 (PLUS_EXPR, itype, step, t);
5773 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5774 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5775 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5776 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5777 fold_build1 (NEGATE_EXPR, itype, t),
5778 fold_build1 (NEGATE_EXPR, itype, step));
5779 else
5780 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5781 t = fold_convert (itype, t);
5782 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5783 true, GSI_SAME_STMT);
5784
5785 trip_var = create_tmp_reg (itype, ".trip");
5786 if (gimple_in_ssa_p (cfun))
5787 {
5788 trip_init = make_ssa_name (trip_var);
5789 trip_main = make_ssa_name (trip_var);
5790 trip_back = make_ssa_name (trip_var);
5791 }
5792 else
5793 {
5794 trip_init = trip_var;
5795 trip_main = trip_var;
5796 trip_back = trip_var;
5797 }
5798
5799 gassign *assign_stmt
5800 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5801 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5802
5803 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5804 t = fold_build2 (MULT_EXPR, itype, t, step);
5805 if (POINTER_TYPE_P (type))
5806 t = fold_build_pointer_plus (n1, t);
5807 else
5808 t = fold_build2 (PLUS_EXPR, type, t, n1);
5809 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5810 true, GSI_SAME_STMT);
5811
5812 /* Remove the GIMPLE_OMP_FOR. */
5813 gsi_remove (&gsi, true);
5814
5815 gimple_stmt_iterator gsif = gsi;
5816
5817 /* Iteration space partitioning goes in ITER_PART_BB. */
5818 gsi = gsi_last_bb (iter_part_bb);
5819
5820 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5821 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5822 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5823 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5824 false, GSI_CONTINUE_LINKING);
5825
5826 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5827 t = fold_build2 (MIN_EXPR, itype, t, n);
5828 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5829 false, GSI_CONTINUE_LINKING);
5830
5831 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5832 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5833
5834 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5835 gsi = gsi_start_bb (seq_start_bb);
5836
5837 tree startvar = fd->loop.v;
5838 tree endvar = NULL_TREE;
5839
5840 if (gimple_omp_for_combined_p (fd->for_stmt))
5841 {
5842 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5843 ? gimple_omp_parallel_clauses (inner_stmt)
5844 : gimple_omp_for_clauses (inner_stmt);
5845 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5846 gcc_assert (innerc);
5847 startvar = OMP_CLAUSE_DECL (innerc);
5848 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5849 OMP_CLAUSE__LOOPTEMP_);
5850 gcc_assert (innerc);
5851 endvar = OMP_CLAUSE_DECL (innerc);
5852 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5853 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5854 {
14707c89 5855 innerc = find_lastprivate_looptemp (fd, innerc);
629b3d75
MJ
5856 if (innerc)
5857 {
5858 /* If needed (distribute parallel for with lastprivate),
5859 propagate down the total number of iterations. */
5860 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5861 fd->loop.n2);
5862 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5863 GSI_CONTINUE_LINKING);
5864 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5865 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5866 }
5867 }
5868 }
5869
5870 t = fold_convert (itype, s0);
5871 t = fold_build2 (MULT_EXPR, itype, t, step);
5872 if (POINTER_TYPE_P (type))
bde84d51
RB
5873 {
5874 t = fold_build_pointer_plus (n1, t);
5875 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5876 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5877 t = fold_convert (signed_type_for (type), t);
5878 }
629b3d75
MJ
5879 else
5880 t = fold_build2 (PLUS_EXPR, type, t, n1);
5881 t = fold_convert (TREE_TYPE (startvar), t);
5882 t = force_gimple_operand_gsi (&gsi, t,
5883 DECL_P (startvar)
5884 && TREE_ADDRESSABLE (startvar),
5885 NULL_TREE, false, GSI_CONTINUE_LINKING);
5886 assign_stmt = gimple_build_assign (startvar, t);
5887 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
5888 if (cond_var)
5889 {
5890 tree itype = TREE_TYPE (cond_var);
5891 /* For lastprivate(conditional:) itervar, we need some iteration
5892 counter that starts at unsigned non-zero and increases.
5893 Prefer as few IVs as possible, so if we can use startvar
5894 itself, use that, or startvar + constant (those would be
5895 incremented with step), and as last resort use the s0 + 1
5896 incremented by 1. */
5897 if (POINTER_TYPE_P (type)
5898 || TREE_CODE (n1) != INTEGER_CST
5899 || fd->loop.cond_code != LT_EXPR)
5900 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5901 build_int_cst (itype, 1));
5902 else if (tree_int_cst_sgn (n1) == 1)
5903 t = fold_convert (itype, t);
5904 else
5905 {
5906 tree c = fold_convert (itype, n1);
5907 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5908 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5909 }
5910 t = force_gimple_operand_gsi (&gsi, t, false,
5911 NULL_TREE, false, GSI_CONTINUE_LINKING);
5912 assign_stmt = gimple_build_assign (cond_var, t);
5913 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5914 }
629b3d75
MJ
5915
5916 t = fold_convert (itype, e0);
5917 t = fold_build2 (MULT_EXPR, itype, t, step);
5918 if (POINTER_TYPE_P (type))
bde84d51
RB
5919 {
5920 t = fold_build_pointer_plus (n1, t);
5921 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5922 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5923 t = fold_convert (signed_type_for (type), t);
5924 }
629b3d75
MJ
5925 else
5926 t = fold_build2 (PLUS_EXPR, type, t, n1);
5927 t = fold_convert (TREE_TYPE (startvar), t);
5928 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5929 false, GSI_CONTINUE_LINKING);
5930 if (endvar)
5931 {
5932 assign_stmt = gimple_build_assign (endvar, e);
5933 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5934 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5935 assign_stmt = gimple_build_assign (fd->loop.v, e);
5936 else
5937 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5938 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5939 }
5940 /* Handle linear clause adjustments. */
5941 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5942 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5943 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5944 c; c = OMP_CLAUSE_CHAIN (c))
5945 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5946 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5947 {
5948 tree d = OMP_CLAUSE_DECL (c);
5949 bool is_ref = omp_is_reference (d);
5950 tree t = d, a, dest;
5951 if (is_ref)
5952 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5953 tree type = TREE_TYPE (t);
5954 if (POINTER_TYPE_P (type))
5955 type = sizetype;
5956 dest = unshare_expr (t);
5957 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5958 expand_omp_build_assign (&gsif, v, t);
5959 if (itercnt == NULL_TREE)
5960 {
5961 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5962 {
5963 itercntbias
5964 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5965 fold_convert (itype, fd->loop.n1));
5966 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5967 itercntbias, step);
5968 itercntbias
5969 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5970 NULL_TREE, true,
5971 GSI_SAME_STMT);
5972 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5973 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5974 NULL_TREE, false,
5975 GSI_CONTINUE_LINKING);
5976 }
5977 else
5978 itercnt = s0;
5979 }
5980 a = fold_build2 (MULT_EXPR, type,
5981 fold_convert (type, itercnt),
5982 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5983 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5984 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5985 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5986 false, GSI_CONTINUE_LINKING);
6fcc3cac 5987 expand_omp_build_assign (&gsi, dest, t, true);
629b3d75
MJ
5988 }
5989 if (fd->collapse > 1)
aed3ab25 5990 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
5991
5992 if (!broken_loop)
5993 {
5994 /* The code controlling the sequential loop goes in CONT_BB,
5995 replacing the GIMPLE_OMP_CONTINUE. */
65f4b875 5996 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5997 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5998 vmain = gimple_omp_continue_control_use (cont_stmt);
5999 vback = gimple_omp_continue_control_def (cont_stmt);
6000
7855700e
JJ
6001 if (cond_var)
6002 {
6003 tree itype = TREE_TYPE (cond_var);
6004 tree t2;
6005 if (POINTER_TYPE_P (type)
6006 || TREE_CODE (n1) != INTEGER_CST
6007 || fd->loop.cond_code != LT_EXPR)
6008 t2 = build_int_cst (itype, 1);
6009 else
6010 t2 = fold_convert (itype, step);
6011 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6012 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6013 NULL_TREE, true, GSI_SAME_STMT);
6014 assign_stmt = gimple_build_assign (cond_var, t2);
6015 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6016 }
6017
629b3d75
MJ
6018 if (!gimple_omp_for_combined_p (fd->for_stmt))
6019 {
6020 if (POINTER_TYPE_P (type))
6021 t = fold_build_pointer_plus (vmain, step);
6022 else
6023 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6024 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6025 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6026 true, GSI_SAME_STMT);
6027 assign_stmt = gimple_build_assign (vback, t);
6028 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6029
6030 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6031 t = build2 (EQ_EXPR, boolean_type_node,
6032 build_int_cst (itype, 0),
6033 build_int_cst (itype, 1));
6034 else
6035 t = build2 (fd->loop.cond_code, boolean_type_node,
6036 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6037 ? t : vback, e);
6038 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6039 }
6040
6041 /* Remove GIMPLE_OMP_CONTINUE. */
6042 gsi_remove (&gsi, true);
6043
6044 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25 6045 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
629b3d75
MJ
6046
6047 /* Trip update code goes into TRIP_UPDATE_BB. */
6048 gsi = gsi_start_bb (trip_update_bb);
6049
6050 t = build_int_cst (itype, 1);
6051 t = build2 (PLUS_EXPR, itype, trip_main, t);
6052 assign_stmt = gimple_build_assign (trip_back, t);
6053 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6054 }
6055
6056 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 6057 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6058 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6059 {
6060 t = gimple_omp_return_lhs (gsi_stmt (gsi));
8221c30b 6061 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
6062 {
6063 tree fn;
6064 if (t)
6065 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6066 else
6067 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6068 gcall *g = gimple_build_call (fn, 0);
6069 if (t)
6070 {
6071 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
6072 if (fd->have_reductemp)
6073 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6074 NOP_EXPR, t),
6075 GSI_SAME_STMT);
28567c40
JJ
6076 }
6077 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6078 }
6079 else
6080 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 6081 }
1a39b3d3
JJ
6082 else if (fd->have_pointer_condtemp)
6083 {
6084 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6085 gcall *g = gimple_build_call (fn, 0);
6086 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6087 }
629b3d75
MJ
6088 gsi_remove (&gsi, true);
6089
6090 /* Connect the new blocks. */
6091 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6092 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6093
6094 if (!broken_loop)
6095 {
6096 se = find_edge (cont_bb, body_bb);
6097 if (se == NULL)
6098 {
6099 se = BRANCH_EDGE (cont_bb);
6100 gcc_assert (single_succ (se->dest) == body_bb);
6101 }
6102 if (gimple_omp_for_combined_p (fd->for_stmt))
6103 {
6104 remove_edge (se);
6105 se = NULL;
6106 }
6107 else if (fd->collapse > 1)
6108 {
6109 remove_edge (se);
6110 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6111 }
6112 else
6113 se->flags = EDGE_TRUE_VALUE;
6114 find_edge (cont_bb, trip_update_bb)->flags
6115 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6116
01914336
MJ
6117 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6118 iter_part_bb);
629b3d75
MJ
6119 }
6120
6121 if (gimple_in_ssa_p (cfun))
6122 {
6123 gphi_iterator psi;
6124 gphi *phi;
6125 edge re, ene;
6126 edge_var_map *vm;
6127 size_t i;
6128
6129 gcc_assert (fd->collapse == 1 && !broken_loop);
6130
6131 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6132 remove arguments of the phi nodes in fin_bb. We need to create
6133 appropriate phi nodes in iter_part_bb instead. */
6134 se = find_edge (iter_part_bb, fin_bb);
6135 re = single_succ_edge (trip_update_bb);
6136 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6137 ene = single_succ_edge (entry_bb);
6138
6139 psi = gsi_start_phis (fin_bb);
6140 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6141 gsi_next (&psi), ++i)
6142 {
6143 gphi *nphi;
620e594b 6144 location_t locus;
629b3d75
MJ
6145
6146 phi = psi.phi ();
d83cc5cc
TV
6147 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6148 redirect_edge_var_map_def (vm), 0))
6149 continue;
6150
629b3d75
MJ
6151 t = gimple_phi_result (phi);
6152 gcc_assert (t == redirect_edge_var_map_result (vm));
6153
6154 if (!single_pred_p (fin_bb))
6155 t = copy_ssa_name (t, phi);
6156
6157 nphi = create_phi_node (t, iter_part_bb);
6158
6159 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6160 locus = gimple_phi_arg_location_from_edge (phi, se);
6161
6162 /* A special case -- fd->loop.v is not yet computed in
6163 iter_part_bb, we need to use vextra instead. */
6164 if (t == fd->loop.v)
6165 t = vextra;
6166 add_phi_arg (nphi, t, ene, locus);
6167 locus = redirect_edge_var_map_location (vm);
6168 tree back_arg = redirect_edge_var_map_def (vm);
6169 add_phi_arg (nphi, back_arg, re, locus);
6170 edge ce = find_edge (cont_bb, body_bb);
6171 if (ce == NULL)
6172 {
6173 ce = BRANCH_EDGE (cont_bb);
6174 gcc_assert (single_succ (ce->dest) == body_bb);
6175 ce = single_succ_edge (ce->dest);
6176 }
6177 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6178 gcc_assert (inner_loop_phi != NULL);
6179 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6180 find_edge (seq_start_bb, body_bb), locus);
6181
6182 if (!single_pred_p (fin_bb))
6183 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6184 }
6185 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6186 redirect_edge_var_map_clear (re);
6187 if (single_pred_p (fin_bb))
6188 while (1)
6189 {
6190 psi = gsi_start_phis (fin_bb);
6191 if (gsi_end_p (psi))
6192 break;
6193 remove_phi_node (&psi, false);
6194 }
6195
6196 /* Make phi node for trip. */
6197 phi = create_phi_node (trip_main, iter_part_bb);
6198 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6199 UNKNOWN_LOCATION);
6200 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6201 UNKNOWN_LOCATION);
6202 }
6203
6204 if (!broken_loop)
6205 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6206 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6207 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6208 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6209 recompute_dominator (CDI_DOMINATORS, fin_bb));
6210 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6211 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6212 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6213 recompute_dominator (CDI_DOMINATORS, body_bb));
6214
6215 if (!broken_loop)
6216 {
99b1c316
MS
6217 class loop *loop = body_bb->loop_father;
6218 class loop *trip_loop = alloc_loop ();
629b3d75
MJ
6219 trip_loop->header = iter_part_bb;
6220 trip_loop->latch = trip_update_bb;
6221 add_loop (trip_loop, iter_part_bb->loop_father);
6222
6223 if (loop != entry_bb->loop_father)
6224 {
6225 gcc_assert (loop->header == body_bb);
6226 gcc_assert (loop->latch == region->cont
6227 || single_pred (loop->latch) == region->cont);
6228 trip_loop->inner = loop;
6229 return;
6230 }
6231
6232 if (!gimple_omp_for_combined_p (fd->for_stmt))
6233 {
6234 loop = alloc_loop ();
6235 loop->header = body_bb;
6236 if (collapse_bb == NULL)
6237 loop->latch = cont_bb;
6238 add_loop (loop, trip_loop);
6239 }
6240 }
6241}
6242
629b3d75
MJ
6243/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6244 loop. Given parameters:
6245
6246 for (V = N1; V cond N2; V += STEP) BODY;
6247
6248 where COND is "<" or ">", we generate pseudocode
6249
6250 V = N1;
6251 goto L1;
6252 L0:
6253 BODY;
6254 V += STEP;
6255 L1:
6256 if (V cond N2) goto L0; else goto L2;
6257 L2:
6258
c2ebf4f1
JJ
6259 For collapsed loops, emit the outer loops as scalar
6260 and only try to vectorize the innermost loop. */
629b3d75
MJ
6261
6262static void
6263expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6264{
6265 tree type, t;
6266 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6267 gimple_stmt_iterator gsi;
6268 gimple *stmt;
6269 gcond *cond_stmt;
6270 bool broken_loop = region->cont == NULL;
6271 edge e, ne;
6272 tree *counts = NULL;
6273 int i;
6274 int safelen_int = INT_MAX;
fed2a43c 6275 bool dont_vectorize = false;
629b3d75
MJ
6276 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6277 OMP_CLAUSE_SAFELEN);
6278 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6279 OMP_CLAUSE__SIMDUID_);
fed2a43c
JJ
6280 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6281 OMP_CLAUSE_IF);
6282 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6283 OMP_CLAUSE_SIMDLEN);
e7393c89
JJ
6284 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6285 OMP_CLAUSE__CONDTEMP_);
629b3d75 6286 tree n1, n2;
e7393c89 6287 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
629b3d75
MJ
6288
6289 if (safelen)
6290 {
9d2f08ab 6291 poly_uint64 val;
629b3d75 6292 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
9d2f08ab 6293 if (!poly_int_tree_p (safelen, &val))
629b3d75 6294 safelen_int = 0;
9d2f08ab
RS
6295 else
6296 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
629b3d75
MJ
6297 if (safelen_int == 1)
6298 safelen_int = 0;
6299 }
fed2a43c
JJ
6300 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6301 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6302 {
6303 safelen_int = 0;
6304 dont_vectorize = true;
6305 }
629b3d75
MJ
6306 type = TREE_TYPE (fd->loop.v);
6307 entry_bb = region->entry;
6308 cont_bb = region->cont;
6309 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6310 gcc_assert (broken_loop
6311 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6312 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6313 if (!broken_loop)
6314 {
6315 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6316 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6317 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6318 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6319 }
6320 else
6321 {
6322 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6323 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6324 l2_bb = single_succ (l1_bb);
6325 }
6326 exit_bb = region->exit;
6327 l2_dom_bb = NULL;
6328
65f4b875 6329 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6330
6331 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6332 /* Not needed in SSA form right now. */
6333 gcc_assert (!gimple_in_ssa_p (cfun));
c2ebf4f1
JJ
6334 if (fd->collapse > 1
6335 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6336 || broken_loop))
629b3d75
MJ
6337 {
6338 int first_zero_iter = -1, dummy = -1;
6339 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6340
6341 counts = XALLOCAVEC (tree, fd->collapse);
6342 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6343 zero_iter_bb, first_zero_iter,
6344 dummy_bb, dummy, l2_dom_bb);
6345 }
6346 if (l2_dom_bb == NULL)
6347 l2_dom_bb = l1_bb;
6348
6349 n1 = fd->loop.n1;
6350 n2 = fd->loop.n2;
6351 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6352 {
6353 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6354 OMP_CLAUSE__LOOPTEMP_);
6355 gcc_assert (innerc);
6356 n1 = OMP_CLAUSE_DECL (innerc);
6357 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6358 OMP_CLAUSE__LOOPTEMP_);
6359 gcc_assert (innerc);
6360 n2 = OMP_CLAUSE_DECL (innerc);
6361 }
6362 tree step = fd->loop.step;
fc14ff61 6363 tree orig_step = step; /* May be different from step if is_simt. */
629b3d75 6364
4cea8675
AM
6365 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6366 OMP_CLAUSE__SIMT_);
629b3d75
MJ
6367 if (is_simt)
6368 {
6369 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
6370 is_simt = safelen_int > 1;
6371 }
6372 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6373 if (is_simt)
6374 {
629b3d75
MJ
6375 simt_lane = create_tmp_var (unsigned_type_node);
6376 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6377 gimple_call_set_lhs (g, simt_lane);
6378 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6379 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6380 fold_convert (TREE_TYPE (step), simt_lane));
6381 n1 = fold_convert (type, n1);
6382 if (POINTER_TYPE_P (type))
6383 n1 = fold_build_pointer_plus (n1, offset);
6384 else
6385 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6386
6387 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6388 if (fd->collapse > 1)
6389 simt_maxlane = build_one_cst (unsigned_type_node);
6390 else if (safelen_int < omp_max_simt_vf ())
6391 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6392 tree vf
6393 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6394 unsigned_type_node, 0);
6395 if (simt_maxlane)
6396 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6397 vf = fold_convert (TREE_TYPE (step), vf);
6398 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6399 }
6400
c2ebf4f1
JJ
6401 tree n2var = NULL_TREE;
6402 tree n2v = NULL_TREE;
6403 tree *nonrect_bounds = NULL;
83f565ed 6404 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
629b3d75
MJ
6405 if (fd->collapse > 1)
6406 {
c2ebf4f1 6407 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
629b3d75 6408 {
c2ebf4f1
JJ
6409 if (fd->non_rect)
6410 {
6411 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6412 memset (nonrect_bounds, 0,
6413 sizeof (tree) * (fd->last_nonrect + 1));
6414 }
6415 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6416 gcc_assert (entry_bb == gsi_bb (gsi));
6417 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
629b3d75 6418 gsi_prev (&gsi);
c2ebf4f1
JJ
6419 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6420 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6421 NULL, n1);
6422 gsi = gsi_for_stmt (fd->for_stmt);
6423 }
6424 if (broken_loop)
6425 ;
6426 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6427 {
6428 /* Compute in n2var the limit for the first innermost loop,
6429 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6430 where cnt is how many iterations would the loop have if
6431 all further iterations were assigned to the current task. */
6432 n2var = create_tmp_var (type);
6433 i = fd->collapse - 1;
6434 tree itype = TREE_TYPE (fd->loops[i].v);
6435 if (POINTER_TYPE_P (itype))
6436 itype = signed_type_for (itype);
6437 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6438 ? -1 : 1));
6439 t = fold_build2 (PLUS_EXPR, itype,
6440 fold_convert (itype, fd->loops[i].step), t);
6441 t = fold_build2 (PLUS_EXPR, itype, t,
6442 fold_convert (itype, fd->loops[i].n2));
6443 if (fd->loops[i].m2)
6444 {
6445 tree t2 = fold_convert (itype,
6446 fd->loops[i - fd->loops[i].outer].v);
6447 tree t3 = fold_convert (itype, fd->loops[i].m2);
6448 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6449 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6450 }
6451 t = fold_build2 (MINUS_EXPR, itype, t,
6452 fold_convert (itype, fd->loops[i].v));
6453 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6454 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6455 fold_build1 (NEGATE_EXPR, itype, t),
6456 fold_build1 (NEGATE_EXPR, itype,
6457 fold_convert (itype,
6458 fd->loops[i].step)));
6459 else
6460 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6461 fold_convert (itype, fd->loops[i].step));
6462 t = fold_convert (type, t);
6463 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
83f565ed
JJ
6464 min_arg1 = create_tmp_var (type);
6465 expand_omp_build_assign (&gsi, min_arg1, t2);
6466 min_arg2 = create_tmp_var (type);
6467 expand_omp_build_assign (&gsi, min_arg2, t);
629b3d75
MJ
6468 }
6469 else
c2ebf4f1
JJ
6470 {
6471 if (TREE_CODE (n2) == INTEGER_CST)
6472 {
6473 /* Indicate for lastprivate handling that at least one iteration
6474 has been performed, without wasting runtime. */
6475 if (integer_nonzerop (n2))
6476 expand_omp_build_assign (&gsi, fd->loop.v,
6477 fold_convert (type, n2));
6478 else
6479 /* Indicate that no iteration has been performed. */
6480 expand_omp_build_assign (&gsi, fd->loop.v,
6481 build_one_cst (type));
6482 }
6483 else
6484 {
6485 expand_omp_build_assign (&gsi, fd->loop.v,
6486 build_zero_cst (type));
6487 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6488 }
6489 for (i = 0; i < fd->collapse; i++)
6490 {
6491 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6492 if (fd->loops[i].m1)
6493 {
6494 tree t2
6495 = fold_convert (TREE_TYPE (t),
6496 fd->loops[i - fd->loops[i].outer].v);
6497 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6498 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6499 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6500 }
6501 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6502 /* For normal non-combined collapsed loops just initialize
6503 the outermost iterator in the entry_bb. */
6504 if (!broken_loop)
6505 break;
6506 }
6507 }
629b3d75 6508 }
c2ebf4f1
JJ
6509 else
6510 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
a29bd4f5
JJ
6511 tree altv = NULL_TREE, altn2 = NULL_TREE;
6512 if (fd->collapse == 1
6513 && !broken_loop
fc14ff61 6514 && TREE_CODE (orig_step) != INTEGER_CST)
a29bd4f5
JJ
6515 {
6516 /* The vectorizer currently punts on loops with non-constant steps
6517 for the main IV (can't compute number of iterations and gives up
6518 because of that). As for OpenMP loops it is always possible to
6519 compute the number of iterations upfront, use an alternate IV
6520 as the loop iterator:
6521 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6522 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6523 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6524 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6525 tree itype = TREE_TYPE (fd->loop.v);
6526 if (POINTER_TYPE_P (itype))
6527 itype = signed_type_for (itype);
6528 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6529 t = fold_build2 (PLUS_EXPR, itype,
fc14ff61 6530 fold_convert (itype, step), t);
a29bd4f5
JJ
6531 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6532 t = fold_build2 (MINUS_EXPR, itype, t,
6533 fold_convert (itype, fd->loop.v));
6534 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6535 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6536 fold_build1 (NEGATE_EXPR, itype, t),
6537 fold_build1 (NEGATE_EXPR, itype,
fc14ff61 6538 fold_convert (itype, step)));
a29bd4f5
JJ
6539 else
6540 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
fc14ff61 6541 fold_convert (itype, step));
a29bd4f5
JJ
6542 t = fold_convert (TREE_TYPE (altv), t);
6543 altn2 = create_tmp_var (TREE_TYPE (altv));
6544 expand_omp_build_assign (&gsi, altn2, t);
6545 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6546 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6547 true, GSI_SAME_STMT);
6548 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6549 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6550 build_zero_cst (TREE_TYPE (altv)));
6551 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6552 }
6553 else if (fd->collapse > 1
6554 && !broken_loop
6555 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6556 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6557 {
6558 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6559 altn2 = create_tmp_var (TREE_TYPE (altv));
6560 }
e7393c89
JJ
6561 if (cond_var)
6562 {
6563 if (POINTER_TYPE_P (type)
6564 || TREE_CODE (n1) != INTEGER_CST
6565 || fd->loop.cond_code != LT_EXPR
6566 || tree_int_cst_sgn (n1) != 1)
6567 expand_omp_build_assign (&gsi, cond_var,
6568 build_one_cst (TREE_TYPE (cond_var)));
6569 else
6570 expand_omp_build_assign (&gsi, cond_var,
6571 fold_convert (TREE_TYPE (cond_var), n1));
6572 }
629b3d75
MJ
6573
6574 /* Remove the GIMPLE_OMP_FOR statement. */
6575 gsi_remove (&gsi, true);
6576
6577 if (!broken_loop)
6578 {
6579 /* Code to control the increment goes in the CONT_BB. */
65f4b875 6580 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6581 stmt = gsi_stmt (gsi);
6582 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6583
c2ebf4f1
JJ
6584 if (fd->collapse == 1
6585 || gimple_omp_for_combined_into_p (fd->for_stmt))
6586 {
6587 if (POINTER_TYPE_P (type))
6588 t = fold_build_pointer_plus (fd->loop.v, step);
6589 else
6590 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6591 expand_omp_build_assign (&gsi, fd->loop.v, t);
6592 }
6593 else if (TREE_CODE (n2) != INTEGER_CST)
6594 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
a29bd4f5
JJ
6595 if (altv)
6596 {
6597 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6598 build_one_cst (TREE_TYPE (altv)));
6599 expand_omp_build_assign (&gsi, altv, t);
6600 }
629b3d75
MJ
6601
6602 if (fd->collapse > 1)
6603 {
6604 i = fd->collapse - 1;
6605 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6606 {
6607 t = fold_convert (sizetype, fd->loops[i].step);
6608 t = fold_build_pointer_plus (fd->loops[i].v, t);
6609 }
6610 else
6611 {
6612 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6613 fd->loops[i].step);
6614 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6615 fd->loops[i].v, t);
6616 }
6617 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
629b3d75 6618 }
e7393c89
JJ
6619 if (cond_var)
6620 {
6621 if (POINTER_TYPE_P (type)
6622 || TREE_CODE (n1) != INTEGER_CST
6623 || fd->loop.cond_code != LT_EXPR
6624 || tree_int_cst_sgn (n1) != 1)
6625 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6626 build_one_cst (TREE_TYPE (cond_var)));
6627 else
6628 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6629 fold_convert (TREE_TYPE (cond_var), step));
6630 expand_omp_build_assign (&gsi, cond_var, t);
6631 }
629b3d75
MJ
6632
6633 /* Remove GIMPLE_OMP_CONTINUE. */
6634 gsi_remove (&gsi, true);
6635 }
6636
6637 /* Emit the condition in L1_BB. */
6638 gsi = gsi_start_bb (l1_bb);
6639
a29bd4f5
JJ
6640 if (altv)
6641 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6642 else if (fd->collapse > 1
6643 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6644 && !broken_loop)
c2ebf4f1
JJ
6645 {
6646 i = fd->collapse - 1;
6647 tree itype = TREE_TYPE (fd->loops[i].v);
6648 if (fd->loops[i].m2)
6649 t = n2v = create_tmp_var (itype);
6650 else
6651 t = fold_convert (itype, fd->loops[i].n2);
6652 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6653 false, GSI_CONTINUE_LINKING);
6654 tree v = fd->loops[i].v;
6655 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6656 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6657 false, GSI_CONTINUE_LINKING);
6658 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6659 }
6660 else
6661 {
6662 if (fd->collapse > 1 && !broken_loop)
6663 t = n2var;
6664 else
6665 t = fold_convert (type, n2);
6666 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6667 false, GSI_CONTINUE_LINKING);
6668 tree v = fd->loop.v;
6669 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6670 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6671 false, GSI_CONTINUE_LINKING);
6672 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6673 }
629b3d75
MJ
6674 cond_stmt = gimple_build_cond_empty (t);
6675 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6676 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6677 NULL, NULL)
6678 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6679 NULL, NULL))
6680 {
6681 gsi = gsi_for_stmt (cond_stmt);
6682 gimple_regimplify_operands (cond_stmt, &gsi);
6683 }
6684
6685 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6686 if (is_simt)
6687 {
6688 gsi = gsi_start_bb (l2_bb);
fc14ff61 6689 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
629b3d75
MJ
6690 if (POINTER_TYPE_P (type))
6691 t = fold_build_pointer_plus (fd->loop.v, step);
6692 else
6693 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6694 expand_omp_build_assign (&gsi, fd->loop.v, t);
6695 }
6696
6697 /* Remove GIMPLE_OMP_RETURN. */
65f4b875 6698 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6699 gsi_remove (&gsi, true);
6700
6701 /* Connect the new blocks. */
6702 remove_edge (FALLTHRU_EDGE (entry_bb));
6703
6704 if (!broken_loop)
6705 {
6706 remove_edge (BRANCH_EDGE (entry_bb));
6707 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6708
6709 e = BRANCH_EDGE (l1_bb);
6710 ne = FALLTHRU_EDGE (l1_bb);
6711 e->flags = EDGE_TRUE_VALUE;
6712 }
6713 else
6714 {
6715 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6716
6717 ne = single_succ_edge (l1_bb);
6718 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6719
6720 }
6721 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
6722 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6723 ne->probability = e->probability.invert ();
629b3d75
MJ
6724
6725 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6726 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6727
6728 if (simt_maxlane)
6729 {
6730 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6731 NULL_TREE, NULL_TREE);
6732 gsi = gsi_last_bb (entry_bb);
6733 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6734 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6735 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
6736 FALLTHRU_EDGE (entry_bb)->probability
6737 = profile_probability::guessed_always ().apply_scale (7, 8);
c2ebf4f1 6738 BRANCH_EDGE (entry_bb)->probability
357067f2 6739 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
6740 l2_dom_bb = entry_bb;
6741 }
6742 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6743
c2ebf4f1
JJ
6744 if (!broken_loop && fd->collapse > 1)
6745 {
6746 basic_block last_bb = l1_bb;
6747 basic_block init_bb = NULL;
6748 for (i = fd->collapse - 2; i >= 0; i--)
6749 {
6750 tree nextn2v = NULL_TREE;
6751 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6752 e = EDGE_SUCC (last_bb, 0);
6753 else
6754 e = EDGE_SUCC (last_bb, 1);
6755 basic_block bb = split_edge (e);
6756 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6757 {
6758 t = fold_convert (sizetype, fd->loops[i].step);
6759 t = fold_build_pointer_plus (fd->loops[i].v, t);
6760 }
6761 else
6762 {
6763 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6764 fd->loops[i].step);
6765 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6766 fd->loops[i].v, t);
6767 }
6768 gsi = gsi_after_labels (bb);
6769 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6770
6771 bb = split_block (bb, last_stmt (bb))->dest;
6772 gsi = gsi_start_bb (bb);
6773 tree itype = TREE_TYPE (fd->loops[i].v);
6774 if (fd->loops[i].m2)
6775 t = nextn2v = create_tmp_var (itype);
6776 else
6777 t = fold_convert (itype, fd->loops[i].n2);
6778 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6779 false, GSI_CONTINUE_LINKING);
6780 tree v = fd->loops[i].v;
6781 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6782 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6783 false, GSI_CONTINUE_LINKING);
6784 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6785 cond_stmt = gimple_build_cond_empty (t);
6786 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6787 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6788 expand_omp_regimplify_p, NULL, NULL)
6789 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6790 expand_omp_regimplify_p, NULL, NULL))
6791 {
6792 gsi = gsi_for_stmt (cond_stmt);
6793 gimple_regimplify_operands (cond_stmt, &gsi);
6794 }
6795 ne = single_succ_edge (bb);
6796 ne->flags = EDGE_FALSE_VALUE;
6797
6798 init_bb = create_empty_bb (bb);
6799 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6800 add_bb_to_loop (init_bb, bb->loop_father);
6801 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6802 e->probability
6803 = profile_probability::guessed_always ().apply_scale (7, 8);
6804 ne->probability = e->probability.invert ();
6805
6806 gsi = gsi_after_labels (init_bb);
6807 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6808 fd->loops[i + 1].n1);
6809 if (fd->loops[i + 1].m1)
6810 {
6811 tree t2 = fold_convert (TREE_TYPE (t),
6812 fd->loops[i + 1
6813 - fd->loops[i + 1].outer].v);
6814 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6815 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6816 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6817 }
6818 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6819 if (fd->loops[i + 1].m2)
6820 {
a29bd4f5 6821 if (i + 2 == fd->collapse && (n2var || altv))
c2ebf4f1
JJ
6822 {
6823 gcc_assert (n2v == NULL_TREE);
6824 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6825 }
6826 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6827 fd->loops[i + 1].n2);
6828 tree t2 = fold_convert (TREE_TYPE (t),
6829 fd->loops[i + 1
6830 - fd->loops[i + 1].outer].v);
6831 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6832 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6833 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6834 expand_omp_build_assign (&gsi, n2v, t);
6835 }
6836 if (i + 2 == fd->collapse && n2var)
6837 {
6838 /* For composite simd, n2 is the first iteration the current
6839 task shouldn't already handle, so we effectively want to use
6840 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6841 as the vectorized loop. Except the vectorizer will not
6842 vectorize that, so instead compute N2VAR as
6843 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6844 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6845 as the loop to vectorize. */
6846 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6847 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6848 {
6849 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6850 == LT_EXPR ? -1 : 1));
6851 t = fold_build2 (PLUS_EXPR, itype,
6852 fold_convert (itype,
6853 fd->loops[i + 1].step), t);
6854 if (fd->loops[i + 1].m2)
6855 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6856 else
6857 t = fold_build2 (PLUS_EXPR, itype, t,
6858 fold_convert (itype,
6859 fd->loops[i + 1].n2));
6860 t = fold_build2 (MINUS_EXPR, itype, t,
6861 fold_convert (itype, fd->loops[i + 1].v));
6862 tree step = fold_convert (itype, fd->loops[i + 1].step);
6863 if (TYPE_UNSIGNED (itype)
6864 && fd->loops[i + 1].cond_code == GT_EXPR)
6865 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6866 fold_build1 (NEGATE_EXPR, itype, t),
6867 fold_build1 (NEGATE_EXPR, itype, step));
6868 else
6869 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6870 t = fold_convert (type, t);
6871 }
6872 else
6873 t = counts[i + 1];
83f565ed
JJ
6874 expand_omp_build_assign (&gsi, min_arg1, t2);
6875 expand_omp_build_assign (&gsi, min_arg2, t);
6876 e = split_block (init_bb, last_stmt (init_bb));
6877 gsi = gsi_after_labels (e->dest);
6878 init_bb = e->dest;
6879 remove_edge (FALLTHRU_EDGE (entry_bb));
6880 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6881 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6882 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6883 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
c2ebf4f1
JJ
6884 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6885 expand_omp_build_assign (&gsi, n2var, t);
6886 }
a29bd4f5
JJ
6887 if (i + 2 == fd->collapse && altv)
6888 {
6889 /* The vectorizer currently punts on loops with non-constant
6890 steps for the main IV (can't compute number of iterations
6891 and gives up because of that). As for OpenMP loops it is
6892 always possible to compute the number of iterations upfront,
6893 use an alternate IV as the loop iterator. */
6894 expand_omp_build_assign (&gsi, altv,
6895 build_zero_cst (TREE_TYPE (altv)));
6896 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6897 if (POINTER_TYPE_P (itype))
6898 itype = signed_type_for (itype);
6899 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6900 ? -1 : 1));
6901 t = fold_build2 (PLUS_EXPR, itype,
6902 fold_convert (itype, fd->loops[i + 1].step), t);
6903 t = fold_build2 (PLUS_EXPR, itype, t,
6904 fold_convert (itype,
6905 fd->loops[i + 1].m2
6906 ? n2v : fd->loops[i + 1].n2));
6907 t = fold_build2 (MINUS_EXPR, itype, t,
6908 fold_convert (itype, fd->loops[i + 1].v));
6909 tree step = fold_convert (itype, fd->loops[i + 1].step);
6910 if (TYPE_UNSIGNED (itype)
6911 && fd->loops[i + 1].cond_code == GT_EXPR)
6912 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6913 fold_build1 (NEGATE_EXPR, itype, t),
6914 fold_build1 (NEGATE_EXPR, itype, step));
6915 else
6916 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6917 t = fold_convert (TREE_TYPE (altv), t);
6918 expand_omp_build_assign (&gsi, altn2, t);
6919 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6920 fd->loops[i + 1].m2
6921 ? n2v : fd->loops[i + 1].n2);
6922 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6923 true, GSI_SAME_STMT);
6924 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6925 fd->loops[i + 1].v, t2);
6926 gassign *g
6927 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6928 build_zero_cst (TREE_TYPE (altv)));
6929 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6930 }
c2ebf4f1
JJ
6931 n2v = nextn2v;
6932
6933 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6934 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6935 {
6936 e = find_edge (entry_bb, last_bb);
6937 redirect_edge_succ (e, bb);
6938 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6939 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6940 }
6941
6942 last_bb = bb;
6943 }
6944 }
629b3d75
MJ
6945 if (!broken_loop)
6946 {
99b1c316 6947 class loop *loop = alloc_loop ();
629b3d75
MJ
6948 loop->header = l1_bb;
6949 loop->latch = cont_bb;
6950 add_loop (loop, l1_bb->loop_father);
6951 loop->safelen = safelen_int;
6952 if (simduid)
6953 {
6954 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6955 cfun->has_simduid_loops = true;
6956 }
6957 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6958 the loop. */
6959 if ((flag_tree_loop_vectorize
26d476cd 6960 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
6961 && flag_tree_loop_optimize
6962 && loop->safelen > 1)
6963 {
6964 loop->force_vectorize = true;
f63445e5
JJ
6965 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6966 {
6967 unsigned HOST_WIDE_INT v
6968 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6969 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6970 loop->simdlen = v;
6971 }
629b3d75
MJ
6972 cfun->has_force_vectorize_loops = true;
6973 }
fed2a43c
JJ
6974 else if (dont_vectorize)
6975 loop->dont_vectorize = true;
629b3d75
MJ
6976 }
6977 else if (simduid)
6978 cfun->has_simduid_loops = true;
6979}
6980
6981/* Taskloop construct is represented after gimplification with
6982 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6983 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6984 which should just compute all the needed loop temporaries
6985 for GIMPLE_OMP_TASK. */
6986
6987static void
6988expand_omp_taskloop_for_outer (struct omp_region *region,
6989 struct omp_for_data *fd,
6990 gimple *inner_stmt)
6991{
6992 tree type, bias = NULL_TREE;
6993 basic_block entry_bb, cont_bb, exit_bb;
6994 gimple_stmt_iterator gsi;
6995 gassign *assign_stmt;
6996 tree *counts = NULL;
6997 int i;
6998
6999 gcc_assert (inner_stmt);
7000 gcc_assert (region->cont);
7001 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7002 && gimple_omp_task_taskloop_p (inner_stmt));
7003 type = TREE_TYPE (fd->loop.v);
7004
7005 /* See if we need to bias by LLONG_MIN. */
7006 if (fd->iter_type == long_long_unsigned_type_node
7007 && TREE_CODE (type) == INTEGER_TYPE
7008 && !TYPE_UNSIGNED (type))
7009 {
7010 tree n1, n2;
7011
7012 if (fd->loop.cond_code == LT_EXPR)
7013 {
7014 n1 = fd->loop.n1;
7015 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7016 }
7017 else
7018 {
7019 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7020 n2 = fd->loop.n1;
7021 }
7022 if (TREE_CODE (n1) != INTEGER_CST
7023 || TREE_CODE (n2) != INTEGER_CST
7024 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7025 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7026 }
7027
7028 entry_bb = region->entry;
7029 cont_bb = region->cont;
7030 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7031 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7032 exit_bb = region->exit;
7033
65f4b875 7034 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7035 gimple *for_stmt = gsi_stmt (gsi);
7036 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7037 if (fd->collapse > 1)
7038 {
7039 int first_zero_iter = -1, dummy = -1;
7040 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7041
7042 counts = XALLOCAVEC (tree, fd->collapse);
7043 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7044 zero_iter_bb, first_zero_iter,
7045 dummy_bb, dummy, l2_dom_bb);
7046
7047 if (zero_iter_bb)
7048 {
7049 /* Some counts[i] vars might be uninitialized if
7050 some loop has zero iterations. But the body shouldn't
7051 be executed in that case, so just avoid uninit warnings. */
7052 for (i = first_zero_iter; i < fd->collapse; i++)
7053 if (SSA_VAR_P (counts[i]))
7054 TREE_NO_WARNING (counts[i]) = 1;
7055 gsi_prev (&gsi);
7056 edge e = split_block (entry_bb, gsi_stmt (gsi));
7057 entry_bb = e->dest;
7058 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7059 gsi = gsi_last_bb (entry_bb);
7060 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7061 get_immediate_dominator (CDI_DOMINATORS,
7062 zero_iter_bb));
7063 }
7064 }
7065
7066 tree t0, t1;
7067 t1 = fd->loop.n2;
7068 t0 = fd->loop.n1;
7069 if (POINTER_TYPE_P (TREE_TYPE (t0))
7070 && TYPE_PRECISION (TREE_TYPE (t0))
7071 != TYPE_PRECISION (fd->iter_type))
7072 {
7073 /* Avoid casting pointers to integer of a different size. */
7074 tree itype = signed_type_for (type);
7075 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7076 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7077 }
7078 else
7079 {
7080 t1 = fold_convert (fd->iter_type, t1);
7081 t0 = fold_convert (fd->iter_type, t0);
7082 }
7083 if (bias)
7084 {
7085 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7086 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7087 }
7088
7089 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7090 OMP_CLAUSE__LOOPTEMP_);
7091 gcc_assert (innerc);
7092 tree startvar = OMP_CLAUSE_DECL (innerc);
7093 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7094 gcc_assert (innerc);
7095 tree endvar = OMP_CLAUSE_DECL (innerc);
7096 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7097 {
14707c89 7098 innerc = find_lastprivate_looptemp (fd, innerc);
629b3d75
MJ
7099 if (innerc)
7100 {
7101 /* If needed (inner taskloop has lastprivate clause), propagate
7102 down the total number of iterations. */
7103 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7104 NULL_TREE, false,
7105 GSI_CONTINUE_LINKING);
7106 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7107 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7108 }
7109 }
7110
7111 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7112 GSI_CONTINUE_LINKING);
7113 assign_stmt = gimple_build_assign (startvar, t0);
7114 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7115
7116 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7117 GSI_CONTINUE_LINKING);
7118 assign_stmt = gimple_build_assign (endvar, t1);
7119 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7120 if (fd->collapse > 1)
aed3ab25 7121 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
7122
7123 /* Remove the GIMPLE_OMP_FOR statement. */
7124 gsi = gsi_for_stmt (for_stmt);
7125 gsi_remove (&gsi, true);
7126
65f4b875 7127 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7128 gsi_remove (&gsi, true);
7129
65f4b875 7130 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7131 gsi_remove (&gsi, true);
7132
357067f2 7133 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 7134 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 7135 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
7136 remove_edge (BRANCH_EDGE (cont_bb));
7137 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7138 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7139 recompute_dominator (CDI_DOMINATORS, region->entry));
7140}
7141
7142/* Taskloop construct is represented after gimplification with
7143 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7144 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7145 GOMP_taskloop{,_ull} function arranges for each task to be given just
7146 a single range of iterations. */
7147
7148static void
7149expand_omp_taskloop_for_inner (struct omp_region *region,
7150 struct omp_for_data *fd,
7151 gimple *inner_stmt)
7152{
7153 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7154 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7155 basic_block fin_bb;
7156 gimple_stmt_iterator gsi;
7157 edge ep;
7158 bool broken_loop = region->cont == NULL;
7159 tree *counts = NULL;
7160 tree n1, n2, step;
7161
7162 itype = type = TREE_TYPE (fd->loop.v);
7163 if (POINTER_TYPE_P (type))
7164 itype = signed_type_for (type);
7165
7166 /* See if we need to bias by LLONG_MIN. */
7167 if (fd->iter_type == long_long_unsigned_type_node
7168 && TREE_CODE (type) == INTEGER_TYPE
7169 && !TYPE_UNSIGNED (type))
7170 {
7171 tree n1, n2;
7172
7173 if (fd->loop.cond_code == LT_EXPR)
7174 {
7175 n1 = fd->loop.n1;
7176 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7177 }
7178 else
7179 {
7180 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7181 n2 = fd->loop.n1;
7182 }
7183 if (TREE_CODE (n1) != INTEGER_CST
7184 || TREE_CODE (n2) != INTEGER_CST
7185 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7186 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7187 }
7188
7189 entry_bb = region->entry;
7190 cont_bb = region->cont;
7191 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7192 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7193 gcc_assert (broken_loop
7194 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7195 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7196 if (!broken_loop)
7197 {
7198 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7199 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7200 }
7201 exit_bb = region->exit;
7202
7203 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 7204 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7205 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7206
7207 if (fd->collapse > 1)
7208 {
7209 int first_zero_iter = -1, dummy = -1;
7210 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7211
7212 counts = XALLOCAVEC (tree, fd->collapse);
7213 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7214 fin_bb, first_zero_iter,
7215 dummy_bb, dummy, l2_dom_bb);
7216 t = NULL_TREE;
7217 }
7218 else
7219 t = integer_one_node;
7220
7221 step = fd->loop.step;
7222 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7223 OMP_CLAUSE__LOOPTEMP_);
7224 gcc_assert (innerc);
7225 n1 = OMP_CLAUSE_DECL (innerc);
7226 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7227 gcc_assert (innerc);
7228 n2 = OMP_CLAUSE_DECL (innerc);
7229 if (bias)
7230 {
7231 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7232 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7233 }
7234 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7235 true, NULL_TREE, true, GSI_SAME_STMT);
7236 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7237 true, NULL_TREE, true, GSI_SAME_STMT);
7238 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7239 true, NULL_TREE, true, GSI_SAME_STMT);
7240
7241 tree startvar = fd->loop.v;
7242 tree endvar = NULL_TREE;
7243
7244 if (gimple_omp_for_combined_p (fd->for_stmt))
7245 {
7246 tree clauses = gimple_omp_for_clauses (inner_stmt);
7247 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7248 gcc_assert (innerc);
7249 startvar = OMP_CLAUSE_DECL (innerc);
7250 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7251 OMP_CLAUSE__LOOPTEMP_);
7252 gcc_assert (innerc);
7253 endvar = OMP_CLAUSE_DECL (innerc);
7254 }
7255 t = fold_convert (TREE_TYPE (startvar), n1);
7256 t = force_gimple_operand_gsi (&gsi, t,
7257 DECL_P (startvar)
7258 && TREE_ADDRESSABLE (startvar),
7259 NULL_TREE, false, GSI_CONTINUE_LINKING);
7260 gimple *assign_stmt = gimple_build_assign (startvar, t);
7261 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7262
7263 t = fold_convert (TREE_TYPE (startvar), n2);
7264 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7265 false, GSI_CONTINUE_LINKING);
7266 if (endvar)
7267 {
7268 assign_stmt = gimple_build_assign (endvar, e);
7269 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7270 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7271 assign_stmt = gimple_build_assign (fd->loop.v, e);
7272 else
7273 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7274 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7275 }
2e47c8c6
JJ
7276
7277 tree *nonrect_bounds = NULL;
629b3d75 7278 if (fd->collapse > 1)
2e47c8c6
JJ
7279 {
7280 if (fd->non_rect)
7281 {
7282 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7283 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7284 }
7285 gcc_assert (gsi_bb (gsi) == entry_bb);
7286 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7287 startvar);
7288 entry_bb = gsi_bb (gsi);
7289 }
629b3d75
MJ
7290
7291 if (!broken_loop)
7292 {
7293 /* The code controlling the sequential loop replaces the
7294 GIMPLE_OMP_CONTINUE. */
65f4b875 7295 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7296 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7297 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7298 vmain = gimple_omp_continue_control_use (cont_stmt);
7299 vback = gimple_omp_continue_control_def (cont_stmt);
7300
7301 if (!gimple_omp_for_combined_p (fd->for_stmt))
7302 {
7303 if (POINTER_TYPE_P (type))
7304 t = fold_build_pointer_plus (vmain, step);
7305 else
7306 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7307 t = force_gimple_operand_gsi (&gsi, t,
7308 DECL_P (vback)
7309 && TREE_ADDRESSABLE (vback),
7310 NULL_TREE, true, GSI_SAME_STMT);
7311 assign_stmt = gimple_build_assign (vback, t);
7312 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7313
7314 t = build2 (fd->loop.cond_code, boolean_type_node,
7315 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7316 ? t : vback, e);
7317 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7318 }
7319
7320 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7321 gsi_remove (&gsi, true);
7322
7323 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
2e47c8c6
JJ
7324 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7325 cont_bb, body_bb);
629b3d75
MJ
7326 }
7327
7328 /* Remove the GIMPLE_OMP_FOR statement. */
7329 gsi = gsi_for_stmt (fd->for_stmt);
7330 gsi_remove (&gsi, true);
7331
7332 /* Remove the GIMPLE_OMP_RETURN statement. */
65f4b875 7333 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7334 gsi_remove (&gsi, true);
7335
357067f2 7336 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
7337 if (!broken_loop)
7338 remove_edge (BRANCH_EDGE (entry_bb));
7339 else
7340 {
7341 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7342 region->outer->cont = NULL;
7343 }
7344
7345 /* Connect all the blocks. */
7346 if (!broken_loop)
7347 {
7348 ep = find_edge (cont_bb, body_bb);
7349 if (gimple_omp_for_combined_p (fd->for_stmt))
7350 {
7351 remove_edge (ep);
7352 ep = NULL;
7353 }
7354 else if (fd->collapse > 1)
7355 {
7356 remove_edge (ep);
7357 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7358 }
7359 else
7360 ep->flags = EDGE_TRUE_VALUE;
7361 find_edge (cont_bb, fin_bb)->flags
7362 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7363 }
7364
7365 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7366 recompute_dominator (CDI_DOMINATORS, body_bb));
7367 if (!broken_loop)
7368 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7369 recompute_dominator (CDI_DOMINATORS, fin_bb));
7370
7371 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7372 {
99b1c316 7373 class loop *loop = alloc_loop ();
629b3d75
MJ
7374 loop->header = body_bb;
7375 if (collapse_bb == NULL)
7376 loop->latch = cont_bb;
7377 add_loop (loop, body_bb->loop_father);
7378 }
7379}
7380
7381/* A subroutine of expand_omp_for. Generate code for an OpenACC
7382 partitioned loop. The lowering here is abstracted, in that the
7383 loop parameters are passed through internal functions, which are
7384 further lowered by oacc_device_lower, once we get to the target
7385 compiler. The loop is of the form:
7386
7387 for (V = B; V LTGT E; V += S) {BODY}
7388
7389 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7390 (constant 0 for no chunking) and we will have a GWV partitioning
7391 mask, specifying dimensions over which the loop is to be
02889d23
CLT
7392 partitioned (see note below). We generate code that looks like
7393 (this ignores tiling):
629b3d75
MJ
7394
7395 <entry_bb> [incoming FALL->body, BRANCH->exit]
7396 typedef signedintify (typeof (V)) T; // underlying signed integral type
7397 T range = E - B;
7398 T chunk_no = 0;
7399 T DIR = LTGT == '<' ? +1 : -1;
7400 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7401 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7402
7403 <head_bb> [created by splitting end of entry_bb]
7404 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7405 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7406 if (!(offset LTGT bound)) goto bottom_bb;
7407
7408 <body_bb> [incoming]
7409 V = B + offset;
7410 {BODY}
7411
7412 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7413 offset += step;
7414 if (offset LTGT bound) goto body_bb; [*]
7415
7416 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7417 chunk_no++;
7418 if (chunk < chunk_max) goto head_bb;
7419
7420 <exit_bb> [incoming]
7421 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7422
02889d23 7423 [*] Needed if V live at end of loop. */
629b3d75
MJ
7424
7425static void
7426expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7427{
8c3aa359
TS
7428 bool is_oacc_kernels_parallelized
7429 = (lookup_attribute ("oacc kernels parallelized",
7430 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7431 {
7432 bool is_oacc_kernels
7433 = (lookup_attribute ("oacc kernels",
7434 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7435 if (is_oacc_kernels_parallelized)
7436 gcc_checking_assert (is_oacc_kernels);
7437 }
7438 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7439 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7440 for SSA specifics, and some are for 'parloops' OpenACC
7441 'kernels'-parallelized specifics. */
7442
629b3d75
MJ
7443 tree v = fd->loop.v;
7444 enum tree_code cond_code = fd->loop.cond_code;
7445 enum tree_code plus_code = PLUS_EXPR;
7446
7447 tree chunk_size = integer_minus_one_node;
7448 tree gwv = integer_zero_node;
7449 tree iter_type = TREE_TYPE (v);
7450 tree diff_type = iter_type;
7451 tree plus_type = iter_type;
7452 struct oacc_collapse *counts = NULL;
7453
7454 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7455 == GF_OMP_FOR_KIND_OACC_LOOP);
7456 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7457 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7458
7459 if (POINTER_TYPE_P (iter_type))
7460 {
7461 plus_code = POINTER_PLUS_EXPR;
7462 plus_type = sizetype;
7463 }
f324479c
TB
7464 for (int ix = fd->collapse; ix--;)
7465 {
7466 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7467 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7468 diff_type = diff_type2;
7469 }
629b3d75
MJ
7470 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7471 diff_type = signed_type_for (diff_type);
f4c222c0
TV
7472 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7473 diff_type = integer_type_node;
629b3d75
MJ
7474
7475 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7476 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7477 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7478 basic_block bottom_bb = NULL;
7479
9c3da8cc
JJ
7480 /* entry_bb has two successors; the branch edge is to the exit
7481 block, fallthrough edge to body. */
629b3d75
MJ
7482 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7483 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7484
7485 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7486 body_bb, or to a block whose only successor is the body_bb. Its
7487 fallthrough successor is the final block (same as the branch
7488 successor of the entry_bb). */
7489 if (cont_bb)
7490 {
7491 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7492 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7493
7494 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7495 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7496 }
7497 else
7498 gcc_assert (!gimple_in_ssa_p (cfun));
7499
7500 /* The exit block only has entry_bb and cont_bb as predecessors. */
7501 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7502
7503 tree chunk_no;
7504 tree chunk_max = NULL_TREE;
7505 tree bound, offset;
7506 tree step = create_tmp_var (diff_type, ".step");
7507 bool up = cond_code == LT_EXPR;
7508 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 7509 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
7510 bool negating;
7511
02889d23
CLT
7512 /* Tiling vars. */
7513 tree tile_size = NULL_TREE;
7514 tree element_s = NULL_TREE;
7515 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7516 basic_block elem_body_bb = NULL;
7517 basic_block elem_cont_bb = NULL;
7518
629b3d75
MJ
7519 /* SSA instances. */
7520 tree offset_incr = NULL_TREE;
7521 tree offset_init = NULL_TREE;
7522
7523 gimple_stmt_iterator gsi;
7524 gassign *ass;
7525 gcall *call;
7526 gimple *stmt;
7527 tree expr;
7528 location_t loc;
7529 edge split, be, fte;
7530
7531 /* Split the end of entry_bb to create head_bb. */
7532 split = split_block (entry_bb, last_stmt (entry_bb));
7533 basic_block head_bb = split->dest;
7534 entry_bb = split->src;
7535
7536 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
65f4b875 7537 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7538 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7539 loc = gimple_location (for_stmt);
7540
7541 if (gimple_in_ssa_p (cfun))
7542 {
7543 offset_init = gimple_omp_for_index (for_stmt, 0);
7544 gcc_assert (integer_zerop (fd->loop.n1));
7545 /* The SSA parallelizer does gang parallelism. */
7546 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7547 }
7548
02889d23 7549 if (fd->collapse > 1 || fd->tiling)
629b3d75 7550 {
02889d23 7551 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75 7552 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
f324479c 7553 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
02889d23 7554 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
7555
7556 if (SSA_VAR_P (fd->loop.n2))
7557 {
7558 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7559 true, GSI_SAME_STMT);
7560 ass = gimple_build_assign (fd->loop.n2, total);
7561 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7562 }
629b3d75
MJ
7563 }
7564
7565 tree b = fd->loop.n1;
7566 tree e = fd->loop.n2;
7567 tree s = fd->loop.step;
7568
7569 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7570 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7571
01914336 7572 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
7573 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7574 if (negating)
7575 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7576 s = fold_convert (diff_type, s);
7577 if (negating)
7578 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7579 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7580
7581 if (!chunking)
7582 chunk_size = integer_zero_node;
7583 expr = fold_convert (diff_type, chunk_size);
7584 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7585 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
7586
7587 if (fd->tiling)
7588 {
7589 /* Determine the tile size and element step,
7590 modify the outer loop step size. */
7591 tile_size = create_tmp_var (diff_type, ".tile_size");
7592 expr = build_int_cst (diff_type, 1);
7593 for (int ix = 0; ix < fd->collapse; ix++)
7594 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7595 expr = force_gimple_operand_gsi (&gsi, expr, true,
7596 NULL_TREE, true, GSI_SAME_STMT);
7597 ass = gimple_build_assign (tile_size, expr);
7598 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7599
7600 element_s = create_tmp_var (diff_type, ".element_s");
7601 ass = gimple_build_assign (element_s, s);
7602 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7603
7604 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7605 s = force_gimple_operand_gsi (&gsi, expr, true,
7606 NULL_TREE, true, GSI_SAME_STMT);
7607 }
7608
01914336 7609 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
7610 negating = !up && TYPE_UNSIGNED (iter_type);
7611 expr = fold_build2 (MINUS_EXPR, plus_type,
7612 fold_convert (plus_type, negating ? b : e),
7613 fold_convert (plus_type, negating ? e : b));
7614 expr = fold_convert (diff_type, expr);
7615 if (negating)
7616 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7617 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7618 NULL_TREE, true, GSI_SAME_STMT);
7619
7620 chunk_no = build_int_cst (diff_type, 0);
7621 if (chunking)
7622 {
7623 gcc_assert (!gimple_in_ssa_p (cfun));
7624
7625 expr = chunk_no;
7626 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7627 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7628
7629 ass = gimple_build_assign (chunk_no, expr);
7630 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7631
7632 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7633 build_int_cst (integer_type_node,
7634 IFN_GOACC_LOOP_CHUNKS),
7635 dir, range, s, chunk_size, gwv);
7636 gimple_call_set_lhs (call, chunk_max);
7637 gimple_set_location (call, loc);
7638 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7639 }
7640 else
7641 chunk_size = chunk_no;
7642
7643 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7644 build_int_cst (integer_type_node,
7645 IFN_GOACC_LOOP_STEP),
7646 dir, range, s, chunk_size, gwv);
7647 gimple_call_set_lhs (call, step);
7648 gimple_set_location (call, loc);
7649 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7650
7651 /* Remove the GIMPLE_OMP_FOR. */
7652 gsi_remove (&gsi, true);
7653
01914336 7654 /* Fixup edges from head_bb. */
629b3d75
MJ
7655 be = BRANCH_EDGE (head_bb);
7656 fte = FALLTHRU_EDGE (head_bb);
7657 be->flags |= EDGE_FALSE_VALUE;
7658 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7659
7660 basic_block body_bb = fte->dest;
7661
7662 if (gimple_in_ssa_p (cfun))
7663 {
65f4b875 7664 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7665 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7666
7667 offset = gimple_omp_continue_control_use (cont_stmt);
7668 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7669 }
7670 else
7671 {
7672 offset = create_tmp_var (diff_type, ".offset");
7673 offset_init = offset_incr = offset;
7674 }
7675 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7676
7677 /* Loop offset & bound go into head_bb. */
7678 gsi = gsi_start_bb (head_bb);
7679
7680 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7681 build_int_cst (integer_type_node,
7682 IFN_GOACC_LOOP_OFFSET),
7683 dir, range, s,
7684 chunk_size, gwv, chunk_no);
7685 gimple_call_set_lhs (call, offset_init);
7686 gimple_set_location (call, loc);
7687 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7688
7689 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7690 build_int_cst (integer_type_node,
7691 IFN_GOACC_LOOP_BOUND),
7692 dir, range, s,
7693 chunk_size, gwv, offset_init);
7694 gimple_call_set_lhs (call, bound);
7695 gimple_set_location (call, loc);
7696 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7697
7698 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7699 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7700 GSI_CONTINUE_LINKING);
7701
7702 /* V assignment goes into body_bb. */
7703 if (!gimple_in_ssa_p (cfun))
7704 {
7705 gsi = gsi_start_bb (body_bb);
7706
7707 expr = build2 (plus_code, iter_type, b,
7708 fold_convert (plus_type, offset));
7709 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7710 true, GSI_SAME_STMT);
7711 ass = gimple_build_assign (v, expr);
7712 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
7713
7714 if (fd->collapse > 1 || fd->tiling)
f324479c 7715 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
02889d23
CLT
7716
7717 if (fd->tiling)
7718 {
7719 /* Determine the range of the element loop -- usually simply
7720 the tile_size, but could be smaller if the final
7721 iteration of the outer loop is a partial tile. */
7722 tree e_range = create_tmp_var (diff_type, ".e_range");
7723
7724 expr = build2 (MIN_EXPR, diff_type,
7725 build2 (MINUS_EXPR, diff_type, bound, offset),
7726 build2 (MULT_EXPR, diff_type, tile_size,
7727 element_s));
7728 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7729 true, GSI_SAME_STMT);
7730 ass = gimple_build_assign (e_range, expr);
7731 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7732
7733 /* Determine bound, offset & step of inner loop. */
7734 e_bound = create_tmp_var (diff_type, ".e_bound");
7735 e_offset = create_tmp_var (diff_type, ".e_offset");
7736 e_step = create_tmp_var (diff_type, ".e_step");
7737
7738 /* Mark these as element loops. */
7739 tree t, e_gwv = integer_minus_one_node;
7740 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7741
7742 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7743 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7744 element_s, chunk, e_gwv, chunk);
7745 gimple_call_set_lhs (call, e_offset);
7746 gimple_set_location (call, loc);
7747 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7748
7749 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7750 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7751 element_s, chunk, e_gwv, e_offset);
7752 gimple_call_set_lhs (call, e_bound);
7753 gimple_set_location (call, loc);
7754 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7755
7756 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7757 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7758 element_s, chunk, e_gwv);
7759 gimple_call_set_lhs (call, e_step);
7760 gimple_set_location (call, loc);
7761 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7762
7763 /* Add test and split block. */
7764 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7765 stmt = gimple_build_cond_empty (expr);
7766 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7767 split = split_block (body_bb, stmt);
7768 elem_body_bb = split->dest;
7769 if (cont_bb == body_bb)
7770 cont_bb = elem_body_bb;
7771 body_bb = split->src;
7772
7773 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7774
05e0af43
CP
7775 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7776 if (cont_bb == NULL)
7777 {
7778 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7779 e->probability = profile_probability::even ();
7780 split->probability = profile_probability::even ();
7781 }
7782
02889d23
CLT
7783 /* Initialize the user's loop vars. */
7784 gsi = gsi_start_bb (elem_body_bb);
f324479c
TB
7785 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7786 diff_type);
02889d23 7787 }
629b3d75
MJ
7788 }
7789
7790 /* Loop increment goes into cont_bb. If this is not a loop, we
7791 will have spawned threads as if it was, and each one will
7792 execute one iteration. The specification is not explicit about
7793 whether such constructs are ill-formed or not, and they can
7794 occur, especially when noreturn routines are involved. */
7795 if (cont_bb)
7796 {
65f4b875 7797 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7798 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7799 loc = gimple_location (cont_stmt);
7800
02889d23
CLT
7801 if (fd->tiling)
7802 {
7803 /* Insert element loop increment and test. */
7804 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7805 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7806 true, GSI_SAME_STMT);
7807 ass = gimple_build_assign (e_offset, expr);
7808 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7809 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7810
7811 stmt = gimple_build_cond_empty (expr);
7812 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7813 split = split_block (cont_bb, stmt);
7814 elem_cont_bb = split->src;
7815 cont_bb = split->dest;
7816
7817 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
7818 split->probability = profile_probability::unlikely ().guessed ();
7819 edge latch_edge
7820 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7821 latch_edge->probability = profile_probability::likely ().guessed ();
7822
7823 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7824 skip_edge->probability = profile_probability::unlikely ().guessed ();
7825 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7826 loop_entry_edge->probability
7827 = profile_probability::likely ().guessed ();
02889d23
CLT
7828
7829 gsi = gsi_for_stmt (cont_stmt);
7830 }
7831
629b3d75
MJ
7832 /* Increment offset. */
7833 if (gimple_in_ssa_p (cfun))
02889d23
CLT
7834 expr = build2 (plus_code, iter_type, offset,
7835 fold_convert (plus_type, step));
629b3d75
MJ
7836 else
7837 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7838 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7839 true, GSI_SAME_STMT);
7840 ass = gimple_build_assign (offset_incr, expr);
7841 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7842 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7843 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7844
7845 /* Remove the GIMPLE_OMP_CONTINUE. */
7846 gsi_remove (&gsi, true);
7847
01914336 7848 /* Fixup edges from cont_bb. */
629b3d75
MJ
7849 be = BRANCH_EDGE (cont_bb);
7850 fte = FALLTHRU_EDGE (cont_bb);
7851 be->flags |= EDGE_TRUE_VALUE;
7852 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7853
7854 if (chunking)
7855 {
7856 /* Split the beginning of exit_bb to make bottom_bb. We
7857 need to insert a nop at the start, because splitting is
01914336 7858 after a stmt, not before. */
629b3d75
MJ
7859 gsi = gsi_start_bb (exit_bb);
7860 stmt = gimple_build_nop ();
7861 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7862 split = split_block (exit_bb, stmt);
7863 bottom_bb = split->src;
7864 exit_bb = split->dest;
7865 gsi = gsi_last_bb (bottom_bb);
7866
7867 /* Chunk increment and test goes into bottom_bb. */
7868 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7869 build_int_cst (diff_type, 1));
7870 ass = gimple_build_assign (chunk_no, expr);
7871 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7872
7873 /* Chunk test at end of bottom_bb. */
7874 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7875 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7876 GSI_CONTINUE_LINKING);
7877
01914336 7878 /* Fixup edges from bottom_bb. */
629b3d75 7879 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
7880 split->probability = profile_probability::unlikely ().guessed ();
7881 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7882 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
7883 }
7884 }
7885
65f4b875 7886 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7887 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7888 loc = gimple_location (gsi_stmt (gsi));
7889
7890 if (!gimple_in_ssa_p (cfun))
7891 {
7892 /* Insert the final value of V, in case it is live. This is the
7893 value for the only thread that survives past the join. */
7894 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7895 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7896 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7897 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7898 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7899 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7900 true, GSI_SAME_STMT);
7901 ass = gimple_build_assign (v, expr);
7902 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7903 }
7904
01914336 7905 /* Remove the OMP_RETURN. */
629b3d75
MJ
7906 gsi_remove (&gsi, true);
7907
7908 if (cont_bb)
7909 {
02889d23 7910 /* We now have one, two or three nested loops. Update the loop
629b3d75 7911 structures. */
99b1c316
MS
7912 class loop *parent = entry_bb->loop_father;
7913 class loop *body = body_bb->loop_father;
629b3d75
MJ
7914
7915 if (chunking)
7916 {
99b1c316 7917 class loop *chunk_loop = alloc_loop ();
629b3d75
MJ
7918 chunk_loop->header = head_bb;
7919 chunk_loop->latch = bottom_bb;
7920 add_loop (chunk_loop, parent);
7921 parent = chunk_loop;
7922 }
7923 else if (parent != body)
7924 {
7925 gcc_assert (body->header == body_bb);
7926 gcc_assert (body->latch == cont_bb
7927 || single_pred (body->latch) == cont_bb);
7928 parent = NULL;
7929 }
7930
7931 if (parent)
7932 {
99b1c316 7933 class loop *body_loop = alloc_loop ();
629b3d75
MJ
7934 body_loop->header = body_bb;
7935 body_loop->latch = cont_bb;
7936 add_loop (body_loop, parent);
02889d23
CLT
7937
7938 if (fd->tiling)
7939 {
7940 /* Insert tiling's element loop. */
99b1c316 7941 class loop *inner_loop = alloc_loop ();
02889d23
CLT
7942 inner_loop->header = elem_body_bb;
7943 inner_loop->latch = elem_cont_bb;
7944 add_loop (inner_loop, body_loop);
7945 }
629b3d75
MJ
7946 }
7947 }
7948}
7949
7950/* Expand the OMP loop defined by REGION. */
7951
7952static void
7953expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7954{
7955 struct omp_for_data fd;
7956 struct omp_for_data_loop *loops;
7957
9d50112a
JJ
7958 loops = XALLOCAVEC (struct omp_for_data_loop,
7959 gimple_omp_for_collapse (last_stmt (region->entry)));
629b3d75
MJ
7960 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7961 &fd, loops);
7962 region->sched_kind = fd.sched_kind;
7963 region->sched_modifiers = fd.sched_modifiers;
0b887b75 7964 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
9d50112a
JJ
7965 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7966 {
7967 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7968 if ((loops[i].m1 || loops[i].m2)
7969 && (loops[i].m1 == NULL_TREE
7970 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7971 && (loops[i].m2 == NULL_TREE
7972 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7973 && TREE_CODE (loops[i].step) == INTEGER_CST
7974 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7975 {
7976 tree t;
7977 tree itype = TREE_TYPE (loops[i].v);
7978 if (loops[i].m1 && loops[i].m2)
7979 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7980 else if (loops[i].m1)
7981 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7982 else
7983 t = loops[i].m2;
7984 t = fold_build2 (MULT_EXPR, itype, t,
7985 fold_convert (itype,
7986 loops[i - loops[i].outer].step));
7987 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7988 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7989 fold_build1 (NEGATE_EXPR, itype, t),
7990 fold_build1 (NEGATE_EXPR, itype,
7991 fold_convert (itype,
7992 loops[i].step)));
7993 else
7994 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7995 fold_convert (itype, loops[i].step));
7996 if (integer_nonzerop (t))
7997 error_at (gimple_location (fd.for_stmt),
7998 "invalid OpenMP non-rectangular loop step; "
7999 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8000 "step %qE",
8001 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8002 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8003 loops[i - loops[i].outer].step, i + 1,
8004 loops[i].step);
8005 }
8006 }
629b3d75
MJ
8007
8008 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8009 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8010 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8011 if (region->cont)
8012 {
8013 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8014 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8015 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8016 }
8017 else
8018 /* If there isn't a continue then this is a degerate case where
8019 the introduction of abnormal edges during lowering will prevent
8020 original loops from being detected. Fix that up. */
8021 loops_state_set (LOOPS_NEED_FIXUP);
8022
dfa6e5b4 8023 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
c2ebf4f1 8024 expand_omp_simd (region, &fd);
629b3d75
MJ
8025 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8026 {
1160ec9a 8027 gcc_assert (!inner_stmt && !fd.non_rect);
629b3d75
MJ
8028 expand_oacc_for (region, &fd);
8029 }
8030 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8031 {
8032 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8033 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8034 else
8035 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8036 }
8037 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8038 && !fd.have_ordered)
8039 {
8040 if (fd.chunk_size == NULL)
8041 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8042 else
8043 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8044 }
8045 else
8046 {
8047 int fn_index, start_ix, next_ix;
28567c40
JJ
8048 unsigned HOST_WIDE_INT sched = 0;
8049 tree sched_arg = NULL_TREE;
629b3d75
MJ
8050
8051 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
1160ec9a 8052 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
629b3d75
MJ
8053 if (fd.chunk_size == NULL
8054 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8055 fd.chunk_size = integer_zero_node;
629b3d75
MJ
8056 switch (fd.sched_kind)
8057 {
8058 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
8059 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8060 && fd.lastprivate_conditional == 0)
28567c40
JJ
8061 {
8062 gcc_assert (!fd.have_ordered);
8063 fn_index = 6;
8064 sched = 4;
8065 }
8066 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
8067 && !fd.have_ordered
8068 && fd.lastprivate_conditional == 0)
28567c40
JJ
8069 fn_index = 7;
8070 else
8071 {
8072 fn_index = 3;
8073 sched = (HOST_WIDE_INT_1U << 31);
8074 }
629b3d75
MJ
8075 break;
8076 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8077 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 8078 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
8079 && !fd.have_ordered
8080 && fd.lastprivate_conditional == 0)
629b3d75
MJ
8081 {
8082 fn_index = 3 + fd.sched_kind;
28567c40 8083 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
629b3d75
MJ
8084 break;
8085 }
629b3d75 8086 fn_index = fd.sched_kind;
28567c40
JJ
8087 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8088 sched += (HOST_WIDE_INT_1U << 31);
629b3d75 8089 break;
28567c40
JJ
8090 case OMP_CLAUSE_SCHEDULE_STATIC:
8091 gcc_assert (fd.have_ordered);
8092 fn_index = 0;
8093 sched = (HOST_WIDE_INT_1U << 31) + 1;
8094 break;
8095 default:
8096 gcc_unreachable ();
629b3d75
MJ
8097 }
8098 if (!fd.ordered)
28567c40 8099 fn_index += fd.have_ordered * 8;
629b3d75
MJ
8100 if (fd.ordered)
8101 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8102 else
8103 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8104 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8221c30b 8105 if (fd.have_reductemp || fd.have_pointer_condtemp)
28567c40
JJ
8106 {
8107 if (fd.ordered)
8108 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8109 else if (fd.have_ordered)
8110 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8111 else
8112 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8113 sched_arg = build_int_cstu (long_integer_type_node, sched);
8114 if (!fd.chunk_size)
8115 fd.chunk_size = integer_zero_node;
8116 }
629b3d75
MJ
8117 if (fd.iter_type == long_long_unsigned_type_node)
8118 {
8119 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8120 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8121 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8122 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8123 }
8124 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
28567c40
JJ
8125 (enum built_in_function) next_ix, sched_arg,
8126 inner_stmt);
629b3d75
MJ
8127 }
8128
8129 if (gimple_in_ssa_p (cfun))
8130 update_ssa (TODO_update_ssa_only_virtuals);
8131}
8132
8133/* Expand code for an OpenMP sections directive. In pseudo code, we generate
8134
8135 v = GOMP_sections_start (n);
8136 L0:
8137 switch (v)
8138 {
8139 case 0:
8140 goto L2;
8141 case 1:
8142 section 1;
8143 goto L1;
8144 case 2:
8145 ...
8146 case n:
8147 ...
8148 default:
8149 abort ();
8150 }
8151 L1:
8152 v = GOMP_sections_next ();
8153 goto L0;
8154 L2:
8155 reduction;
8156
8157 If this is a combined parallel sections, replace the call to
8158 GOMP_sections_start with call to GOMP_sections_next. */
8159
8160static void
8161expand_omp_sections (struct omp_region *region)
8162{
8163 tree t, u, vin = NULL, vmain, vnext, l2;
8164 unsigned len;
8165 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8166 gimple_stmt_iterator si, switch_si;
8167 gomp_sections *sections_stmt;
8168 gimple *stmt;
8169 gomp_continue *cont;
8170 edge_iterator ei;
8171 edge e;
8172 struct omp_region *inner;
8173 unsigned i, casei;
8174 bool exit_reachable = region->cont != NULL;
8175
8176 gcc_assert (region->exit != NULL);
8177 entry_bb = region->entry;
8178 l0_bb = single_succ (entry_bb);
8179 l1_bb = region->cont;
8180 l2_bb = region->exit;
8181 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8182 l2 = gimple_block_label (l2_bb);
8183 else
8184 {
8185 /* This can happen if there are reductions. */
8186 len = EDGE_COUNT (l0_bb->succs);
8187 gcc_assert (len > 0);
8188 e = EDGE_SUCC (l0_bb, len - 1);
65f4b875 8189 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
8190 l2 = NULL_TREE;
8191 if (gsi_end_p (si)
01914336 8192 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
8193 l2 = gimple_block_label (e->dest);
8194 else
8195 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8196 {
65f4b875 8197 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
8198 if (gsi_end_p (si)
8199 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8200 {
8201 l2 = gimple_block_label (e->dest);
8202 break;
8203 }
8204 }
8205 }
8206 if (exit_reachable)
8207 default_bb = create_empty_bb (l1_bb->prev_bb);
8208 else
8209 default_bb = create_empty_bb (l0_bb);
8210
8211 /* We will build a switch() with enough cases for all the
8212 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8213 and a default case to abort if something goes wrong. */
8214 len = EDGE_COUNT (l0_bb->succs);
8215
8216 /* Use vec::quick_push on label_vec throughout, since we know the size
8217 in advance. */
8218 auto_vec<tree> label_vec (len);
8219
8220 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8221 GIMPLE_OMP_SECTIONS statement. */
65f4b875 8222 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
8223 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8224 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8225 vin = gimple_omp_sections_control (sections_stmt);
28567c40
JJ
8226 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8227 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8e7757ba
JJ
8228 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8229 tree cond_var = NULL_TREE;
8230 if (reductmp || condtmp)
8231 {
8232 tree reductions = null_pointer_node, mem = null_pointer_node;
8233 tree memv = NULL_TREE, condtemp = NULL_TREE;
8234 gimple_stmt_iterator gsi = gsi_none ();
8235 gimple *g = NULL;
8236 if (reductmp)
8237 {
8238 reductions = OMP_CLAUSE_DECL (reductmp);
8239 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8240 g = SSA_NAME_DEF_STMT (reductions);
8241 reductions = gimple_assign_rhs1 (g);
8242 OMP_CLAUSE_DECL (reductmp) = reductions;
8243 gsi = gsi_for_stmt (g);
8244 }
8245 else
8246 gsi = si;
8247 if (condtmp)
8248 {
8249 condtemp = OMP_CLAUSE_DECL (condtmp);
8250 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8251 OMP_CLAUSE__CONDTEMP_);
8252 cond_var = OMP_CLAUSE_DECL (c);
8253 tree type = TREE_TYPE (condtemp);
8254 memv = create_tmp_var (type);
8255 TREE_ADDRESSABLE (memv) = 1;
8256 unsigned cnt = 0;
8257 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8258 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8259 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8260 ++cnt;
8261 unsigned HOST_WIDE_INT sz
8262 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8263 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8264 false);
8265 mem = build_fold_addr_expr (memv);
8266 }
28567c40
JJ
8267 t = build_int_cst (unsigned_type_node, len - 1);
8268 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8e7757ba 8269 stmt = gimple_build_call (u, 3, t, reductions, mem);
28567c40
JJ
8270 gimple_call_set_lhs (stmt, vin);
8271 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8e7757ba
JJ
8272 if (condtmp)
8273 {
8274 expand_omp_build_assign (&gsi, condtemp, memv, false);
8275 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8276 vin, build_one_cst (TREE_TYPE (cond_var)));
8277 expand_omp_build_assign (&gsi, cond_var, t, false);
8278 }
8279 if (reductmp)
8280 {
8281 gsi_remove (&gsi, true);
8282 release_ssa_name (gimple_assign_lhs (g));
8283 }
28567c40
JJ
8284 }
8285 else if (!is_combined_parallel (region))
629b3d75
MJ
8286 {
8287 /* If we are not inside a combined parallel+sections region,
8288 call GOMP_sections_start. */
8289 t = build_int_cst (unsigned_type_node, len - 1);
8290 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8291 stmt = gimple_build_call (u, 1, t);
8292 }
8293 else
8294 {
8295 /* Otherwise, call GOMP_sections_next. */
8296 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8297 stmt = gimple_build_call (u, 0);
8298 }
8e7757ba 8299 if (!reductmp && !condtmp)
28567c40
JJ
8300 {
8301 gimple_call_set_lhs (stmt, vin);
8302 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8303 }
629b3d75
MJ
8304 gsi_remove (&si, true);
8305
8306 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8307 L0_BB. */
65f4b875 8308 switch_si = gsi_last_nondebug_bb (l0_bb);
629b3d75
MJ
8309 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8310 if (exit_reachable)
8311 {
8312 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8313 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8314 vmain = gimple_omp_continue_control_use (cont);
8315 vnext = gimple_omp_continue_control_def (cont);
8316 }
8317 else
8318 {
8319 vmain = vin;
8320 vnext = NULL_TREE;
8321 }
8322
8323 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8324 label_vec.quick_push (t);
8325 i = 1;
8326
8327 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8328 for (inner = region->inner, casei = 1;
8329 inner;
8330 inner = inner->next, i++, casei++)
8331 {
8332 basic_block s_entry_bb, s_exit_bb;
8333
8334 /* Skip optional reduction region. */
8335 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8336 {
8337 --i;
8338 --casei;
8339 continue;
8340 }
8341
8342 s_entry_bb = inner->entry;
8343 s_exit_bb = inner->exit;
8344
8345 t = gimple_block_label (s_entry_bb);
8346 u = build_int_cst (unsigned_type_node, casei);
8347 u = build_case_label (u, NULL, t);
8348 label_vec.quick_push (u);
8349
65f4b875 8350 si = gsi_last_nondebug_bb (s_entry_bb);
629b3d75
MJ
8351 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8352 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8353 gsi_remove (&si, true);
8354 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8355
8356 if (s_exit_bb == NULL)
8357 continue;
8358
65f4b875 8359 si = gsi_last_nondebug_bb (s_exit_bb);
629b3d75
MJ
8360 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8361 gsi_remove (&si, true);
8362
8363 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8364 }
8365
8366 /* Error handling code goes in DEFAULT_BB. */
8367 t = gimple_block_label (default_bb);
8368 u = build_case_label (NULL, NULL, t);
8369 make_edge (l0_bb, default_bb, 0);
8370 add_bb_to_loop (default_bb, current_loops->tree_root);
8371
8372 stmt = gimple_build_switch (vmain, u, label_vec);
8373 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8374 gsi_remove (&switch_si, true);
8375
8376 si = gsi_start_bb (default_bb);
8377 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8378 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8379
8380 if (exit_reachable)
8381 {
8382 tree bfn_decl;
8383
8384 /* Code to get the next section goes in L1_BB. */
65f4b875 8385 si = gsi_last_nondebug_bb (l1_bb);
629b3d75
MJ
8386 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8387
8388 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8389 stmt = gimple_build_call (bfn_decl, 0);
8390 gimple_call_set_lhs (stmt, vnext);
8e7757ba
JJ
8391 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8392 if (cond_var)
8393 {
8394 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8395 vnext, build_one_cst (TREE_TYPE (cond_var)));
8396 expand_omp_build_assign (&si, cond_var, t, false);
8397 }
629b3d75
MJ
8398 gsi_remove (&si, true);
8399
8400 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8401 }
8402
8403 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
65f4b875 8404 si = gsi_last_nondebug_bb (l2_bb);
629b3d75
MJ
8405 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8406 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8407 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8408 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8409 else
8410 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8411 stmt = gimple_build_call (t, 0);
8412 if (gimple_omp_return_lhs (gsi_stmt (si)))
8413 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8414 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8415 gsi_remove (&si, true);
8416
8417 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8418}
8419
8420/* Expand code for an OpenMP single directive. We've already expanded
8421 much of the code, here we simply place the GOMP_barrier call. */
8422
8423static void
8424expand_omp_single (struct omp_region *region)
8425{
8426 basic_block entry_bb, exit_bb;
8427 gimple_stmt_iterator si;
8428
8429 entry_bb = region->entry;
8430 exit_bb = region->exit;
8431
65f4b875 8432 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
8433 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
8434 gsi_remove (&si, true);
8435 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8436
65f4b875 8437 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
8438 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8439 {
8440 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8441 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8442 }
8443 gsi_remove (&si, true);
8444 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8445}
8446
8447/* Generic expansion for OpenMP synchronization directives: master,
8448 ordered and critical. All we need to do here is remove the entry
8449 and exit markers for REGION. */
8450
8451static void
8452expand_omp_synch (struct omp_region *region)
8453{
8454 basic_block entry_bb, exit_bb;
8455 gimple_stmt_iterator si;
8456
8457 entry_bb = region->entry;
8458 exit_bb = region->exit;
8459
65f4b875 8460 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
8461 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8462 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8463 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8464 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8465 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8466 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
28567c40
JJ
8467 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8468 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8469 {
8470 expand_omp_taskreg (region);
8471 return;
8472 }
629b3d75
MJ
8473 gsi_remove (&si, true);
8474 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8475
8476 if (exit_bb)
8477 {
65f4b875 8478 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
8479 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8480 gsi_remove (&si, true);
8481 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8482 }
8483}
8484
28567c40
JJ
8485/* Translate enum omp_memory_order to enum memmodel. The two enums
8486 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8487 is 0. */
8488
8489static enum memmodel
8490omp_memory_order_to_memmodel (enum omp_memory_order mo)
8491{
8492 switch (mo)
8493 {
8494 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8495 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8496 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
8497 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
8498 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8499 default: gcc_unreachable ();
8500 }
8501}
8502
629b3d75
MJ
8503/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8504 operation as a normal volatile load. */
8505
8506static bool
8507expand_omp_atomic_load (basic_block load_bb, tree addr,
8508 tree loaded_val, int index)
8509{
8510 enum built_in_function tmpbase;
8511 gimple_stmt_iterator gsi;
8512 basic_block store_bb;
8513 location_t loc;
8514 gimple *stmt;
8515 tree decl, call, type, itype;
8516
65f4b875 8517 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8518 stmt = gsi_stmt (gsi);
8519 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8520 loc = gimple_location (stmt);
8521
8522 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8523 is smaller than word size, then expand_atomic_load assumes that the load
8524 is atomic. We could avoid the builtin entirely in this case. */
8525
8526 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8527 decl = builtin_decl_explicit (tmpbase);
8528 if (decl == NULL_TREE)
8529 return false;
8530
8531 type = TREE_TYPE (loaded_val);
8532 itype = TREE_TYPE (TREE_TYPE (decl));
8533
28567c40
JJ
8534 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8535 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8536 call = build_call_expr_loc (loc, decl, 2, addr, mo);
629b3d75
MJ
8537 if (!useless_type_conversion_p (type, itype))
8538 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8539 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8540
8541 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8542 gsi_remove (&gsi, true);
8543
8544 store_bb = single_succ (load_bb);
65f4b875 8545 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8546 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8547 gsi_remove (&gsi, true);
8548
8549 if (gimple_in_ssa_p (cfun))
8550 update_ssa (TODO_update_ssa_no_phi);
8551
8552 return true;
8553}
8554
8555/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8556 operation as a normal volatile store. */
8557
8558static bool
8559expand_omp_atomic_store (basic_block load_bb, tree addr,
8560 tree loaded_val, tree stored_val, int index)
8561{
8562 enum built_in_function tmpbase;
8563 gimple_stmt_iterator gsi;
8564 basic_block store_bb = single_succ (load_bb);
8565 location_t loc;
8566 gimple *stmt;
8567 tree decl, call, type, itype;
8568 machine_mode imode;
8569 bool exchange;
8570
65f4b875 8571 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8572 stmt = gsi_stmt (gsi);
8573 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8574
8575 /* If the load value is needed, then this isn't a store but an exchange. */
8576 exchange = gimple_omp_atomic_need_value_p (stmt);
8577
65f4b875 8578 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8579 stmt = gsi_stmt (gsi);
8580 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8581 loc = gimple_location (stmt);
8582
8583 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8584 is smaller than word size, then expand_atomic_store assumes that the store
8585 is atomic. We could avoid the builtin entirely in this case. */
8586
8587 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8588 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8589 decl = builtin_decl_explicit (tmpbase);
8590 if (decl == NULL_TREE)
8591 return false;
8592
8593 type = TREE_TYPE (stored_val);
8594
8595 /* Dig out the type of the function's second argument. */
8596 itype = TREE_TYPE (decl);
8597 itype = TYPE_ARG_TYPES (itype);
8598 itype = TREE_CHAIN (itype);
8599 itype = TREE_VALUE (itype);
8600 imode = TYPE_MODE (itype);
8601
8602 if (exchange && !can_atomic_exchange_p (imode, true))
8603 return false;
8604
8605 if (!useless_type_conversion_p (itype, type))
8606 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
28567c40
JJ
8607 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8608 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8609 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
629b3d75
MJ
8610 if (exchange)
8611 {
8612 if (!useless_type_conversion_p (type, itype))
8613 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8614 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8615 }
8616
8617 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8618 gsi_remove (&gsi, true);
8619
8620 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
65f4b875 8621 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8622 gsi_remove (&gsi, true);
8623
8624 if (gimple_in_ssa_p (cfun))
8625 update_ssa (TODO_update_ssa_no_phi);
8626
8627 return true;
8628}
8629
8630/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8631 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8632 size of the data type, and thus usable to find the index of the builtin
8633 decl. Returns false if the expression is not of the proper form. */
8634
8635static bool
8636expand_omp_atomic_fetch_op (basic_block load_bb,
8637 tree addr, tree loaded_val,
8638 tree stored_val, int index)
8639{
8640 enum built_in_function oldbase, newbase, tmpbase;
8641 tree decl, itype, call;
8642 tree lhs, rhs;
8643 basic_block store_bb = single_succ (load_bb);
8644 gimple_stmt_iterator gsi;
8645 gimple *stmt;
8646 location_t loc;
8647 enum tree_code code;
8648 bool need_old, need_new;
8649 machine_mode imode;
629b3d75
MJ
8650
8651 /* We expect to find the following sequences:
8652
8653 load_bb:
8654 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8655
8656 store_bb:
8657 val = tmp OP something; (or: something OP tmp)
8658 GIMPLE_OMP_STORE (val)
8659
8660 ???FIXME: Allow a more flexible sequence.
8661 Perhaps use data flow to pick the statements.
8662
8663 */
8664
8665 gsi = gsi_after_labels (store_bb);
8666 stmt = gsi_stmt (gsi);
65f4b875
AO
8667 if (is_gimple_debug (stmt))
8668 {
8669 gsi_next_nondebug (&gsi);
8670 if (gsi_end_p (gsi))
8671 return false;
8672 stmt = gsi_stmt (gsi);
8673 }
629b3d75
MJ
8674 loc = gimple_location (stmt);
8675 if (!is_gimple_assign (stmt))
8676 return false;
65f4b875 8677 gsi_next_nondebug (&gsi);
629b3d75
MJ
8678 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8679 return false;
8680 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8681 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
28567c40
JJ
8682 enum omp_memory_order omo
8683 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8684 enum memmodel mo = omp_memory_order_to_memmodel (omo);
629b3d75
MJ
8685 gcc_checking_assert (!need_old || !need_new);
8686
8687 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8688 return false;
8689
8690 /* Check for one of the supported fetch-op operations. */
8691 code = gimple_assign_rhs_code (stmt);
8692 switch (code)
8693 {
8694 case PLUS_EXPR:
8695 case POINTER_PLUS_EXPR:
8696 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8697 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8698 break;
8699 case MINUS_EXPR:
8700 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8701 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8702 break;
8703 case BIT_AND_EXPR:
8704 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8705 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8706 break;
8707 case BIT_IOR_EXPR:
8708 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8709 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8710 break;
8711 case BIT_XOR_EXPR:
8712 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8713 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8714 break;
8715 default:
8716 return false;
8717 }
8718
8719 /* Make sure the expression is of the proper form. */
8720 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8721 rhs = gimple_assign_rhs2 (stmt);
8722 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8723 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8724 rhs = gimple_assign_rhs1 (stmt);
8725 else
8726 return false;
8727
8728 tmpbase = ((enum built_in_function)
8729 ((need_new ? newbase : oldbase) + index + 1));
8730 decl = builtin_decl_explicit (tmpbase);
8731 if (decl == NULL_TREE)
8732 return false;
8733 itype = TREE_TYPE (TREE_TYPE (decl));
8734 imode = TYPE_MODE (itype);
8735
8736 /* We could test all of the various optabs involved, but the fact of the
8737 matter is that (with the exception of i486 vs i586 and xadd) all targets
8738 that support any atomic operaton optab also implements compare-and-swap.
8739 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 8740 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
8741 return false;
8742
65f4b875 8743 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8744 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8745
8746 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8747 It only requires that the operation happen atomically. Thus we can
8748 use the RELAXED memory model. */
8749 call = build_call_expr_loc (loc, decl, 3, addr,
8750 fold_convert_loc (loc, itype, rhs),
28567c40 8751 build_int_cst (NULL, mo));
629b3d75
MJ
8752
8753 if (need_old || need_new)
8754 {
8755 lhs = need_old ? loaded_val : stored_val;
8756 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8757 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8758 }
8759 else
8760 call = fold_convert_loc (loc, void_type_node, call);
8761 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8762 gsi_remove (&gsi, true);
8763
65f4b875 8764 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8765 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8766 gsi_remove (&gsi, true);
65f4b875 8767 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8768 stmt = gsi_stmt (gsi);
8769 gsi_remove (&gsi, true);
8770
8771 if (gimple_in_ssa_p (cfun))
8772 {
8773 release_defs (stmt);
8774 update_ssa (TODO_update_ssa_no_phi);
8775 }
8776
8777 return true;
8778}
8779
8780/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8781
8782 oldval = *addr;
8783 repeat:
01914336 8784 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
8785 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8786 if (oldval != newval)
8787 goto repeat;
8788
8789 INDEX is log2 of the size of the data type, and thus usable to find the
8790 index of the builtin decl. */
8791
8792static bool
8793expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8794 tree addr, tree loaded_val, tree stored_val,
8795 int index)
8796{
8797 tree loadedi, storedi, initial, new_storedi, old_vali;
b4e47472 8798 tree type, itype, cmpxchg, iaddr, atype;
629b3d75
MJ
8799 gimple_stmt_iterator si;
8800 basic_block loop_header = single_succ (load_bb);
8801 gimple *phi, *stmt;
8802 edge e;
8803 enum built_in_function fncode;
8804
8805 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8806 order to use the RELAXED memory model effectively. */
8807 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8808 + index + 1);
8809 cmpxchg = builtin_decl_explicit (fncode);
8810 if (cmpxchg == NULL_TREE)
8811 return false;
b4e47472
JJ
8812 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8813 atype = type;
629b3d75
MJ
8814 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8815
dc06356a
JJ
8816 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8817 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
8818 return false;
8819
8820 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
65f4b875 8821 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8822 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8823
8824 /* For floating-point values, we'll need to view-convert them to integers
8825 so that we can perform the atomic compare and swap. Simplify the
8826 following code by always setting up the "i"ntegral variables. */
8827 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8828 {
8829 tree iaddr_val;
8830
8831 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8832 true));
b4e47472 8833 atype = itype;
629b3d75
MJ
8834 iaddr_val
8835 = force_gimple_operand_gsi (&si,
8836 fold_convert (TREE_TYPE (iaddr), addr),
8837 false, NULL_TREE, true, GSI_SAME_STMT);
8838 stmt = gimple_build_assign (iaddr, iaddr_val);
8839 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8840 loadedi = create_tmp_var (itype);
8841 if (gimple_in_ssa_p (cfun))
8842 loadedi = make_ssa_name (loadedi);
8843 }
8844 else
8845 {
8846 iaddr = addr;
8847 loadedi = loaded_val;
8848 }
8849
8850 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8851 tree loaddecl = builtin_decl_explicit (fncode);
8852 if (loaddecl)
8853 initial
b4e47472 8854 = fold_convert (atype,
629b3d75
MJ
8855 build_call_expr (loaddecl, 2, iaddr,
8856 build_int_cst (NULL_TREE,
8857 MEMMODEL_RELAXED)));
8858 else
b4e47472
JJ
8859 {
8860 tree off
8861 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8862 true), 0);
8863 initial = build2 (MEM_REF, atype, iaddr, off);
8864 }
629b3d75
MJ
8865
8866 initial
8867 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8868 GSI_SAME_STMT);
8869
8870 /* Move the value to the LOADEDI temporary. */
8871 if (gimple_in_ssa_p (cfun))
8872 {
8873 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8874 phi = create_phi_node (loadedi, loop_header);
8875 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8876 initial);
8877 }
8878 else
8879 gsi_insert_before (&si,
8880 gimple_build_assign (loadedi, initial),
8881 GSI_SAME_STMT);
8882 if (loadedi != loaded_val)
8883 {
8884 gimple_stmt_iterator gsi2;
8885 tree x;
8886
8887 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8888 gsi2 = gsi_start_bb (loop_header);
8889 if (gimple_in_ssa_p (cfun))
8890 {
8891 gassign *stmt;
8892 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8893 true, GSI_SAME_STMT);
8894 stmt = gimple_build_assign (loaded_val, x);
8895 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8896 }
8897 else
8898 {
8899 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8900 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8901 true, GSI_SAME_STMT);
8902 }
8903 }
8904 gsi_remove (&si, true);
8905
65f4b875 8906 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8907 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8908
8909 if (iaddr == addr)
8910 storedi = stored_val;
8911 else
01914336
MJ
8912 storedi
8913 = force_gimple_operand_gsi (&si,
8914 build1 (VIEW_CONVERT_EXPR, itype,
8915 stored_val), true, NULL_TREE, true,
8916 GSI_SAME_STMT);
629b3d75
MJ
8917
8918 /* Build the compare&swap statement. */
8919 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8920 new_storedi = force_gimple_operand_gsi (&si,
8921 fold_convert (TREE_TYPE (loadedi),
8922 new_storedi),
8923 true, NULL_TREE,
8924 true, GSI_SAME_STMT);
8925
8926 if (gimple_in_ssa_p (cfun))
8927 old_vali = loadedi;
8928 else
8929 {
8930 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8931 stmt = gimple_build_assign (old_vali, loadedi);
8932 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8933
8934 stmt = gimple_build_assign (loadedi, new_storedi);
8935 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8936 }
8937
8938 /* Note that we always perform the comparison as an integer, even for
8939 floating point. This allows the atomic operation to properly
8940 succeed even with NaNs and -0.0. */
01914336
MJ
8941 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8942 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
8943 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8944
8945 /* Update cfg. */
8946 e = single_succ_edge (store_bb);
8947 e->flags &= ~EDGE_FALLTHRU;
8948 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
8949 /* Expect no looping. */
8950 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
8951
8952 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 8953 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
8954
8955 /* Copy the new value to loadedi (we already did that before the condition
8956 if we are not in SSA). */
8957 if (gimple_in_ssa_p (cfun))
8958 {
8959 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8960 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8961 }
8962
8963 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8964 gsi_remove (&si, true);
8965
99b1c316 8966 class loop *loop = alloc_loop ();
629b3d75
MJ
8967 loop->header = loop_header;
8968 loop->latch = store_bb;
8969 add_loop (loop, loop_header->loop_father);
8970
8971 if (gimple_in_ssa_p (cfun))
8972 update_ssa (TODO_update_ssa_no_phi);
8973
8974 return true;
8975}
8976
8977/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8978
01914336
MJ
8979 GOMP_atomic_start ();
8980 *addr = rhs;
8981 GOMP_atomic_end ();
629b3d75
MJ
8982
8983 The result is not globally atomic, but works so long as all parallel
8984 references are within #pragma omp atomic directives. According to
8985 responses received from omp@openmp.org, appears to be within spec.
8986 Which makes sense, since that's how several other compilers handle
8987 this situation as well.
8988 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8989 expanding. STORED_VAL is the operand of the matching
8990 GIMPLE_OMP_ATOMIC_STORE.
8991
8992 We replace
8993 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8994 loaded_val = *addr;
8995
8996 and replace
8997 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8998 *addr = stored_val;
8999*/
9000
9001static bool
9002expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9003 tree addr, tree loaded_val, tree stored_val)
9004{
9005 gimple_stmt_iterator si;
9006 gassign *stmt;
9007 tree t;
9008
65f4b875 9009 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
9010 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9011
9012 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9013 t = build_call_expr (t, 0);
9014 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9015
b4e47472
JJ
9016 tree mem = build_simple_mem_ref (addr);
9017 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9018 TREE_OPERAND (mem, 1)
9019 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9020 true),
9021 TREE_OPERAND (mem, 1));
9022 stmt = gimple_build_assign (loaded_val, mem);
629b3d75
MJ
9023 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9024 gsi_remove (&si, true);
9025
65f4b875 9026 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
9027 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9028
b4e47472 9029 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
629b3d75
MJ
9030 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9031
9032 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9033 t = build_call_expr (t, 0);
9034 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9035 gsi_remove (&si, true);
9036
9037 if (gimple_in_ssa_p (cfun))
9038 update_ssa (TODO_update_ssa_no_phi);
9039 return true;
9040}
9041
9042/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 9043 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
9044 call expand_omp_atomic_pipeline, and if it fails too, the
9045 ultimate fallback is wrapping the operation in a mutex
9046 (expand_omp_atomic_mutex). REGION is the atomic region built
9047 by build_omp_regions_1(). */
9048
9049static void
9050expand_omp_atomic (struct omp_region *region)
9051{
9052 basic_block load_bb = region->entry, store_bb = region->exit;
9053 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9054 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9055 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9056 tree addr = gimple_omp_atomic_load_rhs (load);
9057 tree stored_val = gimple_omp_atomic_store_val (store);
b4e47472 9058 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
629b3d75
MJ
9059 HOST_WIDE_INT index;
9060
9061 /* Make sure the type is one of the supported sizes. */
9062 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9063 index = exact_log2 (index);
9064 if (index >= 0 && index <= 4)
9065 {
9066 unsigned int align = TYPE_ALIGN_UNIT (type);
9067
9068 /* __sync builtins require strict data alignment. */
9069 if (exact_log2 (align) >= index)
9070 {
9071 /* Atomic load. */
3bd8f481 9072 scalar_mode smode;
629b3d75 9073 if (loaded_val == stored_val
3bd8f481
RS
9074 && (is_int_mode (TYPE_MODE (type), &smode)
9075 || is_float_mode (TYPE_MODE (type), &smode))
9076 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
9077 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9078 return;
9079
9080 /* Atomic store. */
3bd8f481
RS
9081 if ((is_int_mode (TYPE_MODE (type), &smode)
9082 || is_float_mode (TYPE_MODE (type), &smode))
9083 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
9084 && store_bb == single_succ (load_bb)
9085 && first_stmt (store_bb) == store
9086 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9087 stored_val, index))
9088 return;
9089
9090 /* When possible, use specialized atomic update functions. */
9091 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9092 && store_bb == single_succ (load_bb)
9093 && expand_omp_atomic_fetch_op (load_bb, addr,
9094 loaded_val, stored_val, index))
9095 return;
9096
9097 /* If we don't have specialized __sync builtins, try and implement
9098 as a compare and swap loop. */
9099 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9100 loaded_val, stored_val, index))
9101 return;
9102 }
9103 }
9104
9105 /* The ultimate fallback is wrapping the operation in a mutex. */
9106 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9107}
9108
9109/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9110 at REGION_EXIT. */
9111
9112static void
9113mark_loops_in_oacc_kernels_region (basic_block region_entry,
9114 basic_block region_exit)
9115{
99b1c316 9116 class loop *outer = region_entry->loop_father;
629b3d75
MJ
9117 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9118
9119 /* Don't parallelize the kernels region if it contains more than one outer
9120 loop. */
9121 unsigned int nr_outer_loops = 0;
99b1c316
MS
9122 class loop *single_outer = NULL;
9123 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
629b3d75
MJ
9124 {
9125 gcc_assert (loop_outer (loop) == outer);
9126
9127 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9128 continue;
9129
9130 if (region_exit != NULL
9131 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9132 continue;
9133
9134 nr_outer_loops++;
9135 single_outer = loop;
9136 }
9137 if (nr_outer_loops != 1)
9138 return;
9139
99b1c316 9140 for (class loop *loop = single_outer->inner;
01914336
MJ
9141 loop != NULL;
9142 loop = loop->inner)
629b3d75
MJ
9143 if (loop->next)
9144 return;
9145
9146 /* Mark the loops in the region. */
99b1c316 9147 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
629b3d75
MJ
9148 loop->in_oacc_kernels_region = true;
9149}
9150
629b3d75
MJ
9151/* Build target argument identifier from the DEVICE identifier, value
9152 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9153
9154static tree
9155get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9156{
9157 tree t = build_int_cst (integer_type_node, device);
9158 if (subseqent_param)
9159 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9160 build_int_cst (integer_type_node,
9161 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9162 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9163 build_int_cst (integer_type_node, id));
9164 return t;
9165}
9166
9167/* Like above but return it in type that can be directly stored as an element
9168 of the argument array. */
9169
9170static tree
9171get_target_argument_identifier (int device, bool subseqent_param, int id)
9172{
9173 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9174 return fold_convert (ptr_type_node, t);
9175}
9176
9177/* Return a target argument consisting of DEVICE identifier, value identifier
9178 ID, and the actual VALUE. */
9179
9180static tree
9181get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9182 tree value)
9183{
9184 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9185 fold_convert (integer_type_node, value),
9186 build_int_cst (unsigned_type_node,
9187 GOMP_TARGET_ARG_VALUE_SHIFT));
9188 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9189 get_target_argument_identifier_1 (device, false, id));
9190 t = fold_convert (ptr_type_node, t);
9191 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9192}
9193
9194/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9195 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9196 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9197 arguments. */
9198
9199static void
9200push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9201 int id, tree value, vec <tree> *args)
9202{
9203 if (tree_fits_shwi_p (value)
9204 && tree_to_shwi (value) > -(1 << 15)
9205 && tree_to_shwi (value) < (1 << 15))
9206 args->quick_push (get_target_argument_value (gsi, device, id, value));
9207 else
9208 {
9209 args->quick_push (get_target_argument_identifier (device, true, id));
9210 value = fold_convert (ptr_type_node, value);
9211 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9212 GSI_SAME_STMT);
9213 args->quick_push (value);
9214 }
9215}
9216
01914336 9217/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
9218
9219static tree
9220get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9221{
9222 auto_vec <tree, 6> args;
9223 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9224 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9225 if (c)
9226 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9227 else
9228 t = integer_minus_one_node;
9229 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9230 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9231
9232 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9233 if (c)
9234 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9235 else
9236 t = integer_minus_one_node;
9237 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9238 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9239 &args);
9240
629b3d75
MJ
9241 /* Produce more, perhaps device specific, arguments here. */
9242
9243 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9244 args.length () + 1),
9245 ".omp_target_args");
9246 for (unsigned i = 0; i < args.length (); i++)
9247 {
9248 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9249 build_int_cst (integer_type_node, i),
9250 NULL_TREE, NULL_TREE);
9251 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9252 GSI_SAME_STMT);
9253 }
9254 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9255 build_int_cst (integer_type_node, args.length ()),
9256 NULL_TREE, NULL_TREE);
9257 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9258 GSI_SAME_STMT);
9259 TREE_ADDRESSABLE (argarray) = 1;
9260 return build_fold_addr_expr (argarray);
9261}
9262
9263/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9264
9265static void
9266expand_omp_target (struct omp_region *region)
9267{
9268 basic_block entry_bb, exit_bb, new_bb;
9269 struct function *child_cfun;
9270 tree child_fn, block, t;
9271 gimple_stmt_iterator gsi;
9272 gomp_target *entry_stmt;
9273 gimple *stmt;
9274 edge e;
cc9b9c0b 9275 bool offloaded;
62aee289 9276 int target_kind;
629b3d75
MJ
9277
9278 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
62aee289 9279 target_kind = gimple_omp_target_kind (entry_stmt);
629b3d75
MJ
9280 new_bb = region->entry;
9281
9282 offloaded = is_gimple_omp_offloaded (entry_stmt);
62aee289 9283 switch (target_kind)
629b3d75
MJ
9284 {
9285 case GF_OMP_TARGET_KIND_REGION:
9286 case GF_OMP_TARGET_KIND_UPDATE:
9287 case GF_OMP_TARGET_KIND_ENTER_DATA:
9288 case GF_OMP_TARGET_KIND_EXIT_DATA:
9289 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9290 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 9291 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75 9292 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7aefef31
AS
9293 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9294 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
629b3d75 9295 case GF_OMP_TARGET_KIND_OACC_DECLARE:
e898ce79
GB
9296 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9297 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
629b3d75
MJ
9298 case GF_OMP_TARGET_KIND_DATA:
9299 case GF_OMP_TARGET_KIND_OACC_DATA:
9300 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
e898ce79 9301 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
629b3d75
MJ
9302 break;
9303 default:
9304 gcc_unreachable ();
9305 }
9306
9307 child_fn = NULL_TREE;
9308 child_cfun = NULL;
9309 if (offloaded)
9310 {
9311 child_fn = gimple_omp_target_child_fn (entry_stmt);
9312 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9313 }
9314
9315 /* Supported by expand_omp_taskreg, but not here. */
9316 if (child_cfun != NULL)
9317 gcc_checking_assert (!child_cfun->cfg);
9318 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9319
9320 entry_bb = region->entry;
9321 exit_bb = region->exit;
9322
703e4f86
TS
9323 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9324 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9325
9326 /* Going on, all OpenACC compute constructs are mapped to
9327 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9328 To distinguish between them, we attach attributes. */
62aee289 9329 switch (target_kind)
25651634 9330 {
703e4f86
TS
9331 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9332 DECL_ATTRIBUTES (child_fn)
9333 = tree_cons (get_identifier ("oacc parallel"),
9334 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9335 break;
62aee289 9336 case GF_OMP_TARGET_KIND_OACC_KERNELS:
25651634
TS
9337 DECL_ATTRIBUTES (child_fn)
9338 = tree_cons (get_identifier ("oacc kernels"),
9339 NULL_TREE, DECL_ATTRIBUTES (child_fn));
62aee289
MR
9340 break;
9341 case GF_OMP_TARGET_KIND_OACC_SERIAL:
62aee289
MR
9342 DECL_ATTRIBUTES (child_fn)
9343 = tree_cons (get_identifier ("oacc serial"),
9344 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9345 break;
e898ce79
GB
9346 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9347 DECL_ATTRIBUTES (child_fn)
9348 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9349 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9350 break;
9351 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9352 DECL_ATTRIBUTES (child_fn)
9353 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9354 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9355 break;
62aee289 9356 default:
703e4f86
TS
9357 /* Make sure we don't miss any. */
9358 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9359 && is_gimple_omp_offloaded (entry_stmt)));
62aee289 9360 break;
25651634 9361 }
629b3d75
MJ
9362
9363 if (offloaded)
9364 {
9365 unsigned srcidx, dstidx, num;
9366
9367 /* If the offloading region needs data sent from the parent
9368 function, then the very first statement (except possible
9369 tree profile counter updates) of the offloading body
9370 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9371 &.OMP_DATA_O is passed as an argument to the child function,
9372 we need to replace it with the argument as seen by the child
9373 function.
9374
9375 In most cases, this will end up being the identity assignment
9376 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9377 a function call that has been inlined, the original PARM_DECL
9378 .OMP_DATA_I may have been converted into a different local
9379 variable. In which case, we need to keep the assignment. */
9380 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9381 if (data_arg)
9382 {
9383 basic_block entry_succ_bb = single_succ (entry_bb);
9384 gimple_stmt_iterator gsi;
9385 tree arg;
9386 gimple *tgtcopy_stmt = NULL;
9387 tree sender = TREE_VEC_ELT (data_arg, 0);
9388
9389 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9390 {
9391 gcc_assert (!gsi_end_p (gsi));
9392 stmt = gsi_stmt (gsi);
9393 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9394 continue;
9395
9396 if (gimple_num_ops (stmt) == 2)
9397 {
9398 tree arg = gimple_assign_rhs1 (stmt);
9399
9400 /* We're ignoring the subcode because we're
9401 effectively doing a STRIP_NOPS. */
9402
9403 if (TREE_CODE (arg) == ADDR_EXPR
9404 && TREE_OPERAND (arg, 0) == sender)
9405 {
9406 tgtcopy_stmt = stmt;
9407 break;
9408 }
9409 }
9410 }
9411
9412 gcc_assert (tgtcopy_stmt != NULL);
9413 arg = DECL_ARGUMENTS (child_fn);
9414
9415 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9416 gsi_remove (&gsi, true);
9417 }
9418
9419 /* Declare local variables needed in CHILD_CFUN. */
9420 block = DECL_INITIAL (child_fn);
9421 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9422 /* The gimplifier could record temporaries in the offloading block
9423 rather than in containing function's local_decls chain,
9424 which would mean cgraph missed finalizing them. Do it now. */
9425 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9426 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9427 varpool_node::finalize_decl (t);
9428 DECL_SAVED_TREE (child_fn) = NULL;
9429 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9430 gimple_set_body (child_fn, NULL);
9431 TREE_USED (block) = 1;
9432
9433 /* Reset DECL_CONTEXT on function arguments. */
9434 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9435 DECL_CONTEXT (t) = child_fn;
9436
9437 /* Split ENTRY_BB at GIMPLE_*,
9438 so that it can be moved to the child function. */
65f4b875 9439 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
9440 stmt = gsi_stmt (gsi);
9441 gcc_assert (stmt
9442 && gimple_code (stmt) == gimple_code (entry_stmt));
9443 e = split_block (entry_bb, stmt);
9444 gsi_remove (&gsi, true);
9445 entry_bb = e->dest;
9446 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9447
9448 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9449 if (exit_bb)
9450 {
65f4b875 9451 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
9452 gcc_assert (!gsi_end_p (gsi)
9453 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9454 stmt = gimple_build_return (NULL);
9455 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9456 gsi_remove (&gsi, true);
9457 }
9458
9459 /* Move the offloading region into CHILD_CFUN. */
9460
9461 block = gimple_block (entry_stmt);
9462
9463 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9464 if (exit_bb)
9465 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9466 /* When the OMP expansion process cannot guarantee an up-to-date
9467 loop tree arrange for the child function to fixup loops. */
9468 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9469 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9470
9471 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9472 num = vec_safe_length (child_cfun->local_decls);
9473 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9474 {
9475 t = (*child_cfun->local_decls)[srcidx];
9476 if (DECL_CONTEXT (t) == cfun->decl)
9477 continue;
9478 if (srcidx != dstidx)
9479 (*child_cfun->local_decls)[dstidx] = t;
9480 dstidx++;
9481 }
9482 if (dstidx != num)
9483 vec_safe_truncate (child_cfun->local_decls, dstidx);
9484
9485 /* Inform the callgraph about the new function. */
9486 child_cfun->curr_properties = cfun->curr_properties;
9487 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9488 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9489 cgraph_node *node = cgraph_node::get_create (child_fn);
9490 node->parallelized_function = 1;
9491 cgraph_node::add_new_function (child_fn, true);
9492
9493 /* Add the new function to the offload table. */
9494 if (ENABLE_OFFLOADING)
60bf575c
TV
9495 {
9496 if (in_lto_p)
9497 DECL_PRESERVE_P (child_fn) = 1;
9498 vec_safe_push (offload_funcs, child_fn);
9499 }
629b3d75
MJ
9500
9501 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9502 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9503
9504 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9505 fixed in a following pass. */
9506 push_cfun (child_cfun);
9507 if (need_asm)
9579db35 9508 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
9509 cgraph_edge::rebuild_edges ();
9510
9511 /* Some EH regions might become dead, see PR34608. If
9512 pass_cleanup_cfg isn't the first pass to happen with the
9513 new child, these dead EH edges might cause problems.
9514 Clean them up now. */
9515 if (flag_exceptions)
9516 {
9517 basic_block bb;
9518 bool changed = false;
9519
9520 FOR_EACH_BB_FN (bb, cfun)
9521 changed |= gimple_purge_dead_eh_edges (bb);
9522 if (changed)
9523 cleanup_tree_cfg ();
9524 }
9525 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9526 verify_loop_structure ();
9527 pop_cfun ();
9528
9529 if (dump_file && !gimple_in_ssa_p (cfun))
9530 {
9531 omp_any_child_fn_dumped = true;
9532 dump_function_header (dump_file, child_fn, dump_flags);
9533 dump_function_to_file (child_fn, dump_file, dump_flags);
9534 }
4ccc4e30
JJ
9535
9536 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
629b3d75
MJ
9537 }
9538
9539 /* Emit a library call to launch the offloading region, or do data
9540 transfers. */
59d5960c 9541 tree t1, t2, t3, t4, depend, c, clauses;
629b3d75 9542 enum built_in_function start_ix;
629b3d75 9543 unsigned int flags_i = 0;
629b3d75
MJ
9544
9545 switch (gimple_omp_target_kind (entry_stmt))
9546 {
9547 case GF_OMP_TARGET_KIND_REGION:
9548 start_ix = BUILT_IN_GOMP_TARGET;
9549 break;
9550 case GF_OMP_TARGET_KIND_DATA:
9551 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9552 break;
9553 case GF_OMP_TARGET_KIND_UPDATE:
9554 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9555 break;
9556 case GF_OMP_TARGET_KIND_ENTER_DATA:
9557 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9558 break;
9559 case GF_OMP_TARGET_KIND_EXIT_DATA:
9560 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9561 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9562 break;
629b3d75 9563 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
62aee289
MR
9564 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9565 case GF_OMP_TARGET_KIND_OACC_SERIAL:
e898ce79
GB
9566 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9567 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
629b3d75
MJ
9568 start_ix = BUILT_IN_GOACC_PARALLEL;
9569 break;
9570 case GF_OMP_TARGET_KIND_OACC_DATA:
9571 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
e898ce79 9572 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
629b3d75
MJ
9573 start_ix = BUILT_IN_GOACC_DATA_START;
9574 break;
9575 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9576 start_ix = BUILT_IN_GOACC_UPDATE;
9577 break;
7aefef31
AS
9578 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9579 start_ix = BUILT_IN_GOACC_ENTER_DATA;
9580 break;
9581 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9582 start_ix = BUILT_IN_GOACC_EXIT_DATA;
629b3d75
MJ
9583 break;
9584 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9585 start_ix = BUILT_IN_GOACC_DECLARE;
9586 break;
9587 default:
9588 gcc_unreachable ();
9589 }
9590
9591 clauses = gimple_omp_target_clauses (entry_stmt);
9592
59d5960c
TS
9593 tree device = NULL_TREE;
9594 location_t device_loc = UNKNOWN_LOCATION;
9595 tree goacc_flags = NULL_TREE;
9596 if (is_gimple_omp_oacc (entry_stmt))
629b3d75 9597 {
59d5960c
TS
9598 /* By default, no GOACC_FLAGs are set. */
9599 goacc_flags = integer_zero_node;
629b3d75
MJ
9600 }
9601 else
59d5960c
TS
9602 {
9603 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9604 if (c)
9605 {
9606 device = OMP_CLAUSE_DEVICE_ID (c);
9607 device_loc = OMP_CLAUSE_LOCATION (c);
9608 }
9609 else
9610 {
9611 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9612 library choose). */
9613 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9614 device_loc = gimple_location (entry_stmt);
9615 }
629b3d75 9616
59d5960c
TS
9617 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9618 if (c)
9619 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9620 }
629b3d75 9621
59d5960c
TS
9622 /* By default, there is no conditional. */
9623 tree cond = NULL_TREE;
9624 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9625 if (c)
9626 cond = OMP_CLAUSE_IF_EXPR (c);
9627 /* If we found the clause 'if (cond)', build:
9628 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9629 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
629b3d75
MJ
9630 if (cond)
9631 {
59d5960c
TS
9632 tree *tp;
9633 if (is_gimple_omp_oacc (entry_stmt))
9634 tp = &goacc_flags;
9635 else
9636 {
9637 /* Ensure 'device' is of the correct type. */
9638 device = fold_convert_loc (device_loc, integer_type_node, device);
9639
9640 tp = &device;
9641 }
9642
629b3d75
MJ
9643 cond = gimple_boolify (cond);
9644
9645 basic_block cond_bb, then_bb, else_bb;
9646 edge e;
9647 tree tmp_var;
9648
59d5960c 9649 tmp_var = create_tmp_var (TREE_TYPE (*tp));
629b3d75
MJ
9650 if (offloaded)
9651 e = split_block_after_labels (new_bb);
9652 else
9653 {
65f4b875 9654 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
9655 gsi_prev (&gsi);
9656 e = split_block (new_bb, gsi_stmt (gsi));
9657 }
9658 cond_bb = e->src;
9659 new_bb = e->dest;
9660 remove_edge (e);
9661
9662 then_bb = create_empty_bb (cond_bb);
9663 else_bb = create_empty_bb (then_bb);
9664 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9665 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9666
9667 stmt = gimple_build_cond_empty (cond);
9668 gsi = gsi_last_bb (cond_bb);
9669 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9670
9671 gsi = gsi_start_bb (then_bb);
59d5960c 9672 stmt = gimple_build_assign (tmp_var, *tp);
629b3d75
MJ
9673 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9674
9675 gsi = gsi_start_bb (else_bb);
59d5960c
TS
9676 if (is_gimple_omp_oacc (entry_stmt))
9677 stmt = gimple_build_assign (tmp_var,
9678 BIT_IOR_EXPR,
9679 *tp,
9680 build_int_cst (integer_type_node,
9681 GOACC_FLAG_HOST_FALLBACK));
9682 else
9683 stmt = gimple_build_assign (tmp_var,
9684 build_int_cst (integer_type_node,
9685 GOMP_DEVICE_HOST_FALLBACK));
629b3d75
MJ
9686 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9687
9688 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9689 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9690 add_bb_to_loop (then_bb, cond_bb->loop_father);
9691 add_bb_to_loop (else_bb, cond_bb->loop_father);
9692 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9693 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9694
59d5960c
TS
9695 *tp = tmp_var;
9696
65f4b875 9697 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
9698 }
9699 else
9700 {
65f4b875 9701 gsi = gsi_last_nondebug_bb (new_bb);
59d5960c
TS
9702
9703 if (device != NULL_TREE)
9704 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9705 true, GSI_SAME_STMT);
629b3d75
MJ
9706 }
9707
9708 t = gimple_omp_target_data_arg (entry_stmt);
9709 if (t == NULL)
9710 {
9711 t1 = size_zero_node;
9712 t2 = build_zero_cst (ptr_type_node);
9713 t3 = t2;
9714 t4 = t2;
9715 }
9716 else
9717 {
9718 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9719 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9720 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9721 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9722 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9723 }
9724
9725 gimple *g;
9726 bool tagging = false;
9727 /* The maximum number used by any start_ix, without varargs. */
9728 auto_vec<tree, 11> args;
59d5960c
TS
9729 if (is_gimple_omp_oacc (entry_stmt))
9730 {
9731 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9732 TREE_TYPE (goacc_flags), goacc_flags);
9733 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9734 NULL_TREE, true,
9735 GSI_SAME_STMT);
9736 args.quick_push (goacc_flags_m);
9737 }
9738 else
9739 args.quick_push (device);
629b3d75
MJ
9740 if (offloaded)
9741 args.quick_push (build_fold_addr_expr (child_fn));
9742 args.quick_push (t1);
9743 args.quick_push (t2);
9744 args.quick_push (t3);
9745 args.quick_push (t4);
9746 switch (start_ix)
9747 {
9748 case BUILT_IN_GOACC_DATA_START:
9749 case BUILT_IN_GOACC_DECLARE:
9750 case BUILT_IN_GOMP_TARGET_DATA:
9751 break;
9752 case BUILT_IN_GOMP_TARGET:
9753 case BUILT_IN_GOMP_TARGET_UPDATE:
9754 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9755 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9756 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9757 if (c)
9758 depend = OMP_CLAUSE_DECL (c);
9759 else
9760 depend = build_int_cst (ptr_type_node, 0);
9761 args.quick_push (depend);
9762 if (start_ix == BUILT_IN_GOMP_TARGET)
9763 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9764 break;
9765 case BUILT_IN_GOACC_PARALLEL:
62aee289
MR
9766 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9767 {
9768 tree dims = NULL_TREE;
9769 unsigned int ix;
9770
9771 /* For serial constructs we set all dimensions to 1. */
9772 for (ix = GOMP_DIM_MAX; ix--;)
9773 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9774 oacc_replace_fn_attrib (child_fn, dims);
9775 }
9776 else
9777 oacc_set_fn_attrib (child_fn, clauses, &args);
25651634 9778 tagging = true;
629b3d75 9779 /* FALLTHRU */
7aefef31
AS
9780 case BUILT_IN_GOACC_ENTER_DATA:
9781 case BUILT_IN_GOACC_EXIT_DATA:
629b3d75
MJ
9782 case BUILT_IN_GOACC_UPDATE:
9783 {
9784 tree t_async = NULL_TREE;
9785
9786 /* If present, use the value specified by the respective
9787 clause, making sure that is of the correct type. */
9788 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9789 if (c)
9790 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9791 integer_type_node,
9792 OMP_CLAUSE_ASYNC_EXPR (c));
9793 else if (!tagging)
9794 /* Default values for t_async. */
9795 t_async = fold_convert_loc (gimple_location (entry_stmt),
9796 integer_type_node,
9797 build_int_cst (integer_type_node,
9798 GOMP_ASYNC_SYNC));
9799 if (tagging && t_async)
9800 {
9801 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9802
9803 if (TREE_CODE (t_async) == INTEGER_CST)
9804 {
9805 /* See if we can pack the async arg in to the tag's
9806 operand. */
9807 i_async = TREE_INT_CST_LOW (t_async);
9808 if (i_async < GOMP_LAUNCH_OP_MAX)
9809 t_async = NULL_TREE;
9810 else
9811 i_async = GOMP_LAUNCH_OP_MAX;
9812 }
9813 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9814 i_async));
9815 }
9816 if (t_async)
ee9fcee3
AS
9817 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9818 NULL_TREE, true,
9819 GSI_SAME_STMT));
629b3d75
MJ
9820
9821 /* Save the argument index, and ... */
9822 unsigned t_wait_idx = args.length ();
9823 unsigned num_waits = 0;
9824 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9825 if (!tagging || c)
9826 /* ... push a placeholder. */
9827 args.safe_push (integer_zero_node);
9828
9829 for (; c; c = OMP_CLAUSE_CHAIN (c))
9830 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9831 {
ee9fcee3
AS
9832 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9833 integer_type_node,
9834 OMP_CLAUSE_WAIT_EXPR (c));
9835 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9836 GSI_SAME_STMT);
9837 args.safe_push (arg);
629b3d75
MJ
9838 num_waits++;
9839 }
9840
9841 if (!tagging || num_waits)
9842 {
9843 tree len;
9844
9845 /* Now that we know the number, update the placeholder. */
9846 if (tagging)
9847 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9848 else
9849 len = build_int_cst (integer_type_node, num_waits);
9850 len = fold_convert_loc (gimple_location (entry_stmt),
9851 unsigned_type_node, len);
9852 args[t_wait_idx] = len;
9853 }
9854 }
9855 break;
9856 default:
9857 gcc_unreachable ();
9858 }
9859 if (tagging)
9860 /* Push terminal marker - zero. */
9861 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9862
9863 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9864 gimple_set_location (g, gimple_location (entry_stmt));
9865 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9866 if (!offloaded)
9867 {
9868 g = gsi_stmt (gsi);
9869 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9870 gsi_remove (&gsi, true);
9871 }
629b3d75
MJ
9872}
9873
629b3d75
MJ
9874/* Expand the parallel region tree rooted at REGION. Expansion
9875 proceeds in depth-first order. Innermost regions are expanded
9876 first. This way, parallel regions that require a new function to
9877 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9878 internal dependencies in their body. */
9879
9880static void
9881expand_omp (struct omp_region *region)
9882{
9883 omp_any_child_fn_dumped = false;
9884 while (region)
9885 {
9886 location_t saved_location;
9887 gimple *inner_stmt = NULL;
9888
9889 /* First, determine whether this is a combined parallel+workshare
01914336 9890 region. */
629b3d75
MJ
9891 if (region->type == GIMPLE_OMP_PARALLEL)
9892 determine_parallel_type (region);
629b3d75
MJ
9893
9894 if (region->type == GIMPLE_OMP_FOR
9895 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9896 inner_stmt = last_stmt (region->inner->entry);
9897
9898 if (region->inner)
9899 expand_omp (region->inner);
9900
9901 saved_location = input_location;
9902 if (gimple_has_location (last_stmt (region->entry)))
9903 input_location = gimple_location (last_stmt (region->entry));
9904
9905 switch (region->type)
9906 {
9907 case GIMPLE_OMP_PARALLEL:
9908 case GIMPLE_OMP_TASK:
9909 expand_omp_taskreg (region);
9910 break;
9911
9912 case GIMPLE_OMP_FOR:
9913 expand_omp_for (region, inner_stmt);
9914 break;
9915
9916 case GIMPLE_OMP_SECTIONS:
9917 expand_omp_sections (region);
9918 break;
9919
9920 case GIMPLE_OMP_SECTION:
9921 /* Individual omp sections are handled together with their
9922 parent GIMPLE_OMP_SECTIONS region. */
9923 break;
9924
9925 case GIMPLE_OMP_SINGLE:
9926 expand_omp_single (region);
9927 break;
9928
9929 case GIMPLE_OMP_ORDERED:
9930 {
9931 gomp_ordered *ord_stmt
9932 = as_a <gomp_ordered *> (last_stmt (region->entry));
9933 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9934 OMP_CLAUSE_DEPEND))
9935 {
9936 /* We'll expand these when expanding corresponding
9937 worksharing region with ordered(n) clause. */
9938 gcc_assert (region->outer
9939 && region->outer->type == GIMPLE_OMP_FOR);
9940 region->ord_stmt = ord_stmt;
9941 break;
9942 }
9943 }
9944 /* FALLTHRU */
9945 case GIMPLE_OMP_MASTER:
9946 case GIMPLE_OMP_TASKGROUP:
9947 case GIMPLE_OMP_CRITICAL:
9948 case GIMPLE_OMP_TEAMS:
9949 expand_omp_synch (region);
9950 break;
9951
9952 case GIMPLE_OMP_ATOMIC_LOAD:
9953 expand_omp_atomic (region);
9954 break;
9955
9956 case GIMPLE_OMP_TARGET:
9957 expand_omp_target (region);
9958 break;
9959
9960 default:
9961 gcc_unreachable ();
9962 }
9963
9964 input_location = saved_location;
9965 region = region->next;
9966 }
9967 if (omp_any_child_fn_dumped)
9968 {
9969 if (dump_file)
9970 dump_function_header (dump_file, current_function_decl, dump_flags);
9971 omp_any_child_fn_dumped = false;
9972 }
9973}
9974
9975/* Helper for build_omp_regions. Scan the dominator tree starting at
9976 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9977 true, the function ends once a single tree is built (otherwise, whole
9978 forest of OMP constructs may be built). */
9979
9980static void
9981build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9982 bool single_tree)
9983{
9984 gimple_stmt_iterator gsi;
9985 gimple *stmt;
9986 basic_block son;
9987
65f4b875 9988 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
9989 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9990 {
9991 struct omp_region *region;
9992 enum gimple_code code;
9993
9994 stmt = gsi_stmt (gsi);
9995 code = gimple_code (stmt);
9996 if (code == GIMPLE_OMP_RETURN)
9997 {
9998 /* STMT is the return point out of region PARENT. Mark it
9999 as the exit point and make PARENT the immediately
10000 enclosing region. */
10001 gcc_assert (parent);
10002 region = parent;
10003 region->exit = bb;
10004 parent = parent->outer;
10005 }
10006 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10007 {
5764ee3c 10008 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
10009 GIMPLE_OMP_RETURN, but matches with
10010 GIMPLE_OMP_ATOMIC_LOAD. */
10011 gcc_assert (parent);
10012 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10013 region = parent;
10014 region->exit = bb;
10015 parent = parent->outer;
10016 }
10017 else if (code == GIMPLE_OMP_CONTINUE)
10018 {
10019 gcc_assert (parent);
10020 parent->cont = bb;
10021 }
10022 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10023 {
10024 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10025 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10026 }
10027 else
10028 {
10029 region = new_omp_region (bb, code, parent);
10030 /* Otherwise... */
10031 if (code == GIMPLE_OMP_TARGET)
10032 {
10033 switch (gimple_omp_target_kind (stmt))
10034 {
10035 case GF_OMP_TARGET_KIND_REGION:
629b3d75
MJ
10036 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10037 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 10038 case GF_OMP_TARGET_KIND_OACC_SERIAL:
e898ce79
GB
10039 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10040 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
629b3d75
MJ
10041 break;
10042 case GF_OMP_TARGET_KIND_UPDATE:
10043 case GF_OMP_TARGET_KIND_ENTER_DATA:
10044 case GF_OMP_TARGET_KIND_EXIT_DATA:
cc9b9c0b
JJ
10045 case GF_OMP_TARGET_KIND_DATA:
10046 case GF_OMP_TARGET_KIND_OACC_DATA:
10047 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10048 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
629b3d75 10049 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7aefef31
AS
10050 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10051 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
629b3d75
MJ
10052 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10053 /* ..., other than for those stand-alone directives... */
10054 region = NULL;
10055 break;
10056 default:
10057 gcc_unreachable ();
10058 }
10059 }
10060 else if (code == GIMPLE_OMP_ORDERED
10061 && omp_find_clause (gimple_omp_ordered_clauses
10062 (as_a <gomp_ordered *> (stmt)),
10063 OMP_CLAUSE_DEPEND))
10064 /* #pragma omp ordered depend is also just a stand-alone
10065 directive. */
10066 region = NULL;
28567c40
JJ
10067 else if (code == GIMPLE_OMP_TASK
10068 && gimple_omp_task_taskwait_p (stmt))
10069 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10070 region = NULL;
629b3d75
MJ
10071 /* ..., this directive becomes the parent for a new region. */
10072 if (region)
10073 parent = region;
10074 }
10075 }
10076
10077 if (single_tree && !parent)
10078 return;
10079
10080 for (son = first_dom_son (CDI_DOMINATORS, bb);
10081 son;
10082 son = next_dom_son (CDI_DOMINATORS, son))
10083 build_omp_regions_1 (son, parent, single_tree);
10084}
10085
10086/* Builds the tree of OMP regions rooted at ROOT, storing it to
10087 root_omp_region. */
10088
10089static void
10090build_omp_regions_root (basic_block root)
10091{
10092 gcc_assert (root_omp_region == NULL);
10093 build_omp_regions_1 (root, NULL, true);
10094 gcc_assert (root_omp_region != NULL);
10095}
10096
10097/* Expands omp construct (and its subconstructs) starting in HEAD. */
10098
10099void
10100omp_expand_local (basic_block head)
10101{
10102 build_omp_regions_root (head);
10103 if (dump_file && (dump_flags & TDF_DETAILS))
10104 {
10105 fprintf (dump_file, "\nOMP region tree\n\n");
10106 dump_omp_region (dump_file, root_omp_region, 0);
10107 fprintf (dump_file, "\n");
10108 }
10109
10110 remove_exit_barriers (root_omp_region);
10111 expand_omp (root_omp_region);
10112
10113 omp_free_regions ();
10114}
10115
10116/* Scan the CFG and build a tree of OMP regions. Return the root of
10117 the OMP region tree. */
10118
10119static void
10120build_omp_regions (void)
10121{
10122 gcc_assert (root_omp_region == NULL);
10123 calculate_dominance_info (CDI_DOMINATORS);
10124 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10125}
10126
10127/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10128
10129static unsigned int
10130execute_expand_omp (void)
10131{
10132 build_omp_regions ();
10133
10134 if (!root_omp_region)
10135 return 0;
10136
10137 if (dump_file)
10138 {
10139 fprintf (dump_file, "\nOMP region tree\n\n");
10140 dump_omp_region (dump_file, root_omp_region, 0);
10141 fprintf (dump_file, "\n");
10142 }
10143
10144 remove_exit_barriers (root_omp_region);
10145
10146 expand_omp (root_omp_region);
10147
10148 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10149 verify_loop_structure ();
10150 cleanup_tree_cfg ();
10151
10152 omp_free_regions ();
10153
10154 return 0;
10155}
10156
10157/* OMP expansion -- the default pass, run before creation of SSA form. */
10158
10159namespace {
10160
10161const pass_data pass_data_expand_omp =
10162{
10163 GIMPLE_PASS, /* type */
10164 "ompexp", /* name */
fd2b8c8b 10165 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
10166 TV_NONE, /* tv_id */
10167 PROP_gimple_any, /* properties_required */
10168 PROP_gimple_eomp, /* properties_provided */
10169 0, /* properties_destroyed */
10170 0, /* todo_flags_start */
10171 0, /* todo_flags_finish */
10172};
10173
10174class pass_expand_omp : public gimple_opt_pass
10175{
10176public:
10177 pass_expand_omp (gcc::context *ctxt)
10178 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10179 {}
10180
10181 /* opt_pass methods: */
10182 virtual unsigned int execute (function *)
10183 {
5e9d6aa4 10184 bool gate = ((flag_openacc != 0 || flag_openmp != 0
629b3d75
MJ
10185 || flag_openmp_simd != 0)
10186 && !seen_error ());
10187
10188 /* This pass always runs, to provide PROP_gimple_eomp.
10189 But often, there is nothing to do. */
10190 if (!gate)
10191 return 0;
10192
10193 return execute_expand_omp ();
10194 }
10195
10196}; // class pass_expand_omp
10197
10198} // anon namespace
10199
10200gimple_opt_pass *
10201make_pass_expand_omp (gcc::context *ctxt)
10202{
10203 return new pass_expand_omp (ctxt);
10204}
10205
10206namespace {
10207
10208const pass_data pass_data_expand_omp_ssa =
10209{
10210 GIMPLE_PASS, /* type */
10211 "ompexpssa", /* name */
fd2b8c8b 10212 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
10213 TV_NONE, /* tv_id */
10214 PROP_cfg | PROP_ssa, /* properties_required */
10215 PROP_gimple_eomp, /* properties_provided */
10216 0, /* properties_destroyed */
10217 0, /* todo_flags_start */
10218 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10219};
10220
10221class pass_expand_omp_ssa : public gimple_opt_pass
10222{
10223public:
10224 pass_expand_omp_ssa (gcc::context *ctxt)
10225 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10226 {}
10227
10228 /* opt_pass methods: */
10229 virtual bool gate (function *fun)
10230 {
10231 return !(fun->curr_properties & PROP_gimple_eomp);
10232 }
10233 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10234 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10235
10236}; // class pass_expand_omp_ssa
10237
10238} // anon namespace
10239
10240gimple_opt_pass *
10241make_pass_expand_omp_ssa (gcc::context *ctxt)
10242{
10243 return new pass_expand_omp_ssa (ctxt);
10244}
10245
10246/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10247 GIMPLE_* codes. */
10248
10249bool
10250omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10251 int *region_idx)
10252{
10253 gimple *last = last_stmt (bb);
10254 enum gimple_code code = gimple_code (last);
10255 struct omp_region *cur_region = *region;
10256 bool fallthru = false;
10257
10258 switch (code)
10259 {
10260 case GIMPLE_OMP_PARALLEL:
629b3d75
MJ
10261 case GIMPLE_OMP_FOR:
10262 case GIMPLE_OMP_SINGLE:
10263 case GIMPLE_OMP_TEAMS:
10264 case GIMPLE_OMP_MASTER:
10265 case GIMPLE_OMP_TASKGROUP:
10266 case GIMPLE_OMP_CRITICAL:
10267 case GIMPLE_OMP_SECTION:
629b3d75
MJ
10268 cur_region = new_omp_region (bb, code, cur_region);
10269 fallthru = true;
10270 break;
10271
28567c40
JJ
10272 case GIMPLE_OMP_TASK:
10273 cur_region = new_omp_region (bb, code, cur_region);
10274 fallthru = true;
10275 if (gimple_omp_task_taskwait_p (last))
10276 cur_region = cur_region->outer;
10277 break;
10278
629b3d75
MJ
10279 case GIMPLE_OMP_ORDERED:
10280 cur_region = new_omp_region (bb, code, cur_region);
10281 fallthru = true;
10282 if (omp_find_clause (gimple_omp_ordered_clauses
10283 (as_a <gomp_ordered *> (last)),
10284 OMP_CLAUSE_DEPEND))
10285 cur_region = cur_region->outer;
10286 break;
10287
10288 case GIMPLE_OMP_TARGET:
10289 cur_region = new_omp_region (bb, code, cur_region);
10290 fallthru = true;
10291 switch (gimple_omp_target_kind (last))
10292 {
10293 case GF_OMP_TARGET_KIND_REGION:
629b3d75
MJ
10294 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10295 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 10296 case GF_OMP_TARGET_KIND_OACC_SERIAL:
e898ce79
GB
10297 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10298 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
629b3d75
MJ
10299 break;
10300 case GF_OMP_TARGET_KIND_UPDATE:
10301 case GF_OMP_TARGET_KIND_ENTER_DATA:
10302 case GF_OMP_TARGET_KIND_EXIT_DATA:
cc9b9c0b
JJ
10303 case GF_OMP_TARGET_KIND_DATA:
10304 case GF_OMP_TARGET_KIND_OACC_DATA:
10305 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10306 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
629b3d75 10307 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7aefef31
AS
10308 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10309 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
629b3d75
MJ
10310 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10311 cur_region = cur_region->outer;
10312 break;
10313 default:
10314 gcc_unreachable ();
10315 }
10316 break;
10317
10318 case GIMPLE_OMP_SECTIONS:
10319 cur_region = new_omp_region (bb, code, cur_region);
10320 fallthru = true;
10321 break;
10322
10323 case GIMPLE_OMP_SECTIONS_SWITCH:
10324 fallthru = false;
10325 break;
10326
10327 case GIMPLE_OMP_ATOMIC_LOAD:
10328 case GIMPLE_OMP_ATOMIC_STORE:
10329 fallthru = true;
10330 break;
10331
10332 case GIMPLE_OMP_RETURN:
10333 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10334 somewhere other than the next block. This will be
10335 created later. */
10336 cur_region->exit = bb;
10337 if (cur_region->type == GIMPLE_OMP_TASK)
10338 /* Add an edge corresponding to not scheduling the task
10339 immediately. */
10340 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10341 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10342 cur_region = cur_region->outer;
10343 break;
10344
10345 case GIMPLE_OMP_CONTINUE:
10346 cur_region->cont = bb;
10347 switch (cur_region->type)
10348 {
10349 case GIMPLE_OMP_FOR:
10350 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10351 succs edges as abnormal to prevent splitting
10352 them. */
10353 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10354 /* Make the loopback edge. */
10355 make_edge (bb, single_succ (cur_region->entry),
10356 EDGE_ABNORMAL);
10357
10358 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10359 corresponds to the case that the body of the loop
10360 is not executed at all. */
10361 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10362 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10363 fallthru = false;
10364 break;
10365
10366 case GIMPLE_OMP_SECTIONS:
10367 /* Wire up the edges into and out of the nested sections. */
10368 {
10369 basic_block switch_bb = single_succ (cur_region->entry);
10370
10371 struct omp_region *i;
10372 for (i = cur_region->inner; i ; i = i->next)
10373 {
10374 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10375 make_edge (switch_bb, i->entry, 0);
10376 make_edge (i->exit, bb, EDGE_FALLTHRU);
10377 }
10378
10379 /* Make the loopback edge to the block with
10380 GIMPLE_OMP_SECTIONS_SWITCH. */
10381 make_edge (bb, switch_bb, 0);
10382
10383 /* Make the edge from the switch to exit. */
10384 make_edge (switch_bb, bb->next_bb, 0);
10385 fallthru = false;
10386 }
10387 break;
10388
10389 case GIMPLE_OMP_TASK:
10390 fallthru = true;
10391 break;
10392
10393 default:
10394 gcc_unreachable ();
10395 }
10396 break;
10397
10398 default:
10399 gcc_unreachable ();
10400 }
10401
10402 if (*region != cur_region)
10403 {
10404 *region = cur_region;
10405 if (cur_region)
10406 *region_idx = cur_region->entry->index;
10407 else
10408 *region_idx = 0;
10409 }
10410
10411 return fallthru;
10412}
This page took 3.777274 seconds and 5 git commands to generate.