]> gcc.gnu.org Git - gcc.git/blame - gcc/omp-expand.c
ipa-inline.c (compute_uninlined_call_time, [...]): always use frequencies.
[gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
cbe34bb5 5Copyright (C) 2005-2017 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
56#include "cilk.h"
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
13293add 59#include "hsa-common.h"
5c628c3e 60#include "debug.h"
314e6352
ML
61#include "stringpool.h"
62#include "attribs.h"
629b3d75
MJ
63
64/* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
67
68struct omp_region
69{
70 /* The enclosing region. */
71 struct omp_region *outer;
72
73 /* First child region. */
74 struct omp_region *inner;
75
76 /* Next peer region. */
77 struct omp_region *next;
78
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
81
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
84
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
87
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
92
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
95
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
98
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
101
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
104
105 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
106 a depend clause. */
107 gomp_ordered *ord_stmt;
108};
109
110static struct omp_region *root_omp_region;
111static bool omp_any_child_fn_dumped;
112
113static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
114 bool = false);
115static gphi *find_phi_with_arg_on_edge (tree, edge);
116static void expand_omp (struct omp_region *region);
117
118/* Return true if REGION is a combined parallel+workshare region. */
119
120static inline bool
121is_combined_parallel (struct omp_region *region)
122{
123 return region->is_combined_parallel;
124}
125
126/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
127 is the immediate dominator of PAR_ENTRY_BB, return true if there
128 are no data dependencies that would prevent expanding the parallel
129 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
130
131 When expanding a combined parallel+workshare region, the call to
132 the child function may need additional arguments in the case of
133 GIMPLE_OMP_FOR regions. In some cases, these arguments are
134 computed out of variables passed in from the parent to the child
135 via 'struct .omp_data_s'. For instance:
136
137 #pragma omp parallel for schedule (guided, i * 4)
138 for (j ...)
139
140 Is lowered into:
141
01914336 142 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
143 .omp_data_o.i = i;
144 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
145
146 # BLOCK 3 (WS_ENTRY_BB)
147 .omp_data_i = &.omp_data_o;
148 D.1667 = .omp_data_i->i;
149 D.1598 = D.1667 * 4;
150 #pragma omp for schedule (guided, D.1598)
151
152 When we outline the parallel region, the call to the child function
153 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
154 that value is computed *after* the call site. So, in principle we
155 cannot do the transformation.
156
157 To see whether the code in WS_ENTRY_BB blocks the combined
158 parallel+workshare call, we collect all the variables used in the
159 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
160 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
161 call.
162
163 FIXME. If we had the SSA form built at this point, we could merely
164 hoist the code in block 3 into block 2 and be done with it. But at
165 this point we don't have dataflow information and though we could
166 hack something up here, it is really not worth the aggravation. */
167
168static bool
169workshare_safe_to_combine_p (basic_block ws_entry_bb)
170{
171 struct omp_for_data fd;
172 gimple *ws_stmt = last_stmt (ws_entry_bb);
173
174 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
175 return true;
176
177 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
178
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
185
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
196
197 return true;
198}
199
200/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
202
203static tree
204omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205{
206 if (!simd_schedule)
207 return chunk_size;
208
209 int vf = omp_max_vf ();
210 if (vf == 1)
211 return chunk_size;
212
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
218}
219
220/* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
223
224static vec<tree, va_gc> *
225get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226{
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
230
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232 {
233 struct omp_for_data fd;
234 tree n1, n2;
235
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
239
240 if (gimple_omp_for_combined_into_p (for_stmt))
241 {
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
251 }
252
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
257
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
260
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
263
264 if (fd.chunk_size)
265 {
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
269 }
270
271 return ws_args;
272 }
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274 {
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
283 }
284
285 gcc_unreachable ();
286}
287
288/* Discover whether REGION is a combined parallel+workshare region. */
289
290static void
291determine_parallel_type (struct omp_region *region)
292{
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
295
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
300
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
306
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
313
314 if (single_succ (par_entry_bb) == ws_entry_bb
315 && single_succ (ws_exit_bb) == par_exit_bb
316 && workshare_safe_to_combine_p (ws_entry_bb)
317 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
318 || (last_and_only_stmt (ws_entry_bb)
319 && last_and_only_stmt (par_exit_bb))))
320 {
321 gimple *par_stmt = last_stmt (par_entry_bb);
322 gimple *ws_stmt = last_stmt (ws_entry_bb);
323
324 if (region->inner->type == GIMPLE_OMP_FOR)
325 {
326 /* If this is a combined parallel loop, we need to determine
327 whether or not to use the combined library calls. There
328 are two cases where we do not apply the transformation:
329 static loops and any kind of ordered loop. In the first
330 case, we already open code the loop so there is no need
331 to do anything else. In the latter case, the combined
332 parallel loop call would still need extra synchronization
333 to implement ordered semantics, so there would not be any
334 gain in using the combined call. */
335 tree clauses = gimple_omp_for_clauses (ws_stmt);
336 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
337 if (c == NULL
338 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
339 == OMP_CLAUSE_SCHEDULE_STATIC)
340 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
341 {
342 region->is_combined_parallel = false;
343 region->inner->is_combined_parallel = false;
344 return;
345 }
346 }
347
348 region->is_combined_parallel = true;
349 region->inner->is_combined_parallel = true;
350 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
351 }
352}
353
354/* Debugging dumps for parallel regions. */
355void dump_omp_region (FILE *, struct omp_region *, int);
356void debug_omp_region (struct omp_region *);
357void debug_all_omp_regions (void);
358
359/* Dump the parallel region tree rooted at REGION. */
360
361void
362dump_omp_region (FILE *file, struct omp_region *region, int indent)
363{
364 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
365 gimple_code_name[region->type]);
366
367 if (region->inner)
368 dump_omp_region (file, region->inner, indent + 4);
369
370 if (region->cont)
371 {
372 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
373 region->cont->index);
374 }
375
376 if (region->exit)
377 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
378 region->exit->index);
379 else
380 fprintf (file, "%*s[no exit marker]\n", indent, "");
381
382 if (region->next)
383 dump_omp_region (file, region->next, indent);
384}
385
386DEBUG_FUNCTION void
387debug_omp_region (struct omp_region *region)
388{
389 dump_omp_region (stderr, region, 0);
390}
391
392DEBUG_FUNCTION void
393debug_all_omp_regions (void)
394{
395 dump_omp_region (stderr, root_omp_region, 0);
396}
397
398/* Create a new parallel region starting at STMT inside region PARENT. */
399
400static struct omp_region *
401new_omp_region (basic_block bb, enum gimple_code type,
402 struct omp_region *parent)
403{
404 struct omp_region *region = XCNEW (struct omp_region);
405
406 region->outer = parent;
407 region->entry = bb;
408 region->type = type;
409
410 if (parent)
411 {
412 /* This is a nested region. Add it to the list of inner
413 regions in PARENT. */
414 region->next = parent->inner;
415 parent->inner = region;
416 }
417 else
418 {
419 /* This is a toplevel region. Add it to the list of toplevel
420 regions in ROOT_OMP_REGION. */
421 region->next = root_omp_region;
422 root_omp_region = region;
423 }
424
425 return region;
426}
427
428/* Release the memory associated with the region tree rooted at REGION. */
429
430static void
431free_omp_region_1 (struct omp_region *region)
432{
433 struct omp_region *i, *n;
434
435 for (i = region->inner; i ; i = n)
436 {
437 n = i->next;
438 free_omp_region_1 (i);
439 }
440
441 free (region);
442}
443
444/* Release the memory for the entire omp region tree. */
445
446void
447omp_free_regions (void)
448{
449 struct omp_region *r, *n;
450 for (r = root_omp_region; r ; r = n)
451 {
452 n = r->next;
453 free_omp_region_1 (r);
454 }
455 root_omp_region = NULL;
456}
457
458/* A convenience function to build an empty GIMPLE_COND with just the
459 condition. */
460
461static gcond *
462gimple_build_cond_empty (tree cond)
463{
464 enum tree_code pred_code;
465 tree lhs, rhs;
466
467 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
468 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
469}
470
471/* Return true if a parallel REGION is within a declare target function or
472 within a target region and is not a part of a gridified target. */
473
474static bool
475parallel_needs_hsa_kernel_p (struct omp_region *region)
476{
477 bool indirect = false;
478 for (region = region->outer; region; region = region->outer)
479 {
480 if (region->type == GIMPLE_OMP_PARALLEL)
481 indirect = true;
482 else if (region->type == GIMPLE_OMP_TARGET)
483 {
484 gomp_target *tgt_stmt
485 = as_a <gomp_target *> (last_stmt (region->entry));
486
487 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
488 OMP_CLAUSE__GRIDDIM_))
489 return indirect;
490 else
491 return true;
492 }
493 }
494
495 if (lookup_attribute ("omp declare target",
496 DECL_ATTRIBUTES (current_function_decl)))
497 return true;
498
499 return false;
500}
501
623c6df5
KB
502/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
503 Add CHILD_FNDECL to decl chain of the supercontext of the block
504 ENTRY_BLOCK - this is the block which originally contained the
505 code from which CHILD_FNDECL was created.
506
507 Together, these actions ensure that the debug info for the outlined
508 function will be emitted with the correct lexical scope. */
509
510static void
511adjust_context_and_scope (tree entry_block, tree child_fndecl)
512{
513 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
514 {
515 tree b = BLOCK_SUPERCONTEXT (entry_block);
516
517 if (TREE_CODE (b) == BLOCK)
518 {
519 tree parent_fndecl;
520
521 /* Follow supercontext chain until the parent fndecl
522 is found. */
523 for (parent_fndecl = BLOCK_SUPERCONTEXT (b);
524 TREE_CODE (parent_fndecl) == BLOCK;
525 parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl))
526 ;
527
528 gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL);
529
530 DECL_CONTEXT (child_fndecl) = parent_fndecl;
531
532 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
533 BLOCK_VARS (b) = child_fndecl;
534 }
535 }
536}
537
629b3d75
MJ
538/* Build the function calls to GOMP_parallel_start etc to actually
539 generate the parallel operation. REGION is the parallel region
540 being expanded. BB is the block where to insert the code. WS_ARGS
541 will be set if this is a call to a combined parallel+workshare
542 construct, it contains the list of additional arguments needed by
543 the workshare construct. */
544
545static void
546expand_parallel_call (struct omp_region *region, basic_block bb,
547 gomp_parallel *entry_stmt,
548 vec<tree, va_gc> *ws_args)
549{
550 tree t, t1, t2, val, cond, c, clauses, flags;
551 gimple_stmt_iterator gsi;
552 gimple *stmt;
553 enum built_in_function start_ix;
554 int start_ix2;
555 location_t clause_loc;
556 vec<tree, va_gc> *args;
557
558 clauses = gimple_omp_parallel_clauses (entry_stmt);
559
560 /* Determine what flavor of GOMP_parallel we will be
561 emitting. */
562 start_ix = BUILT_IN_GOMP_PARALLEL;
563 if (is_combined_parallel (region))
564 {
565 switch (region->inner->type)
566 {
567 case GIMPLE_OMP_FOR:
568 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
569 switch (region->inner->sched_kind)
570 {
571 case OMP_CLAUSE_SCHEDULE_RUNTIME:
572 start_ix2 = 3;
573 break;
574 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
575 case OMP_CLAUSE_SCHEDULE_GUIDED:
576 if (region->inner->sched_modifiers
577 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
578 {
579 start_ix2 = 3 + region->inner->sched_kind;
580 break;
581 }
582 /* FALLTHRU */
583 default:
584 start_ix2 = region->inner->sched_kind;
585 break;
586 }
587 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
588 start_ix = (enum built_in_function) start_ix2;
589 break;
590 case GIMPLE_OMP_SECTIONS:
591 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
592 break;
593 default:
594 gcc_unreachable ();
595 }
596 }
597
598 /* By default, the value of NUM_THREADS is zero (selected at run time)
599 and there is no conditional. */
600 cond = NULL_TREE;
601 val = build_int_cst (unsigned_type_node, 0);
602 flags = build_int_cst (unsigned_type_node, 0);
603
604 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
605 if (c)
606 cond = OMP_CLAUSE_IF_EXPR (c);
607
608 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
609 if (c)
610 {
611 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
612 clause_loc = OMP_CLAUSE_LOCATION (c);
613 }
614 else
615 clause_loc = gimple_location (entry_stmt);
616
617 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
618 if (c)
619 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
620
621 /* Ensure 'val' is of the correct type. */
622 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
623
624 /* If we found the clause 'if (cond)', build either
625 (cond != 0) or (cond ? val : 1u). */
626 if (cond)
627 {
628 cond = gimple_boolify (cond);
629
630 if (integer_zerop (val))
631 val = fold_build2_loc (clause_loc,
632 EQ_EXPR, unsigned_type_node, cond,
633 build_int_cst (TREE_TYPE (cond), 0));
634 else
635 {
636 basic_block cond_bb, then_bb, else_bb;
637 edge e, e_then, e_else;
638 tree tmp_then, tmp_else, tmp_join, tmp_var;
639
640 tmp_var = create_tmp_var (TREE_TYPE (val));
641 if (gimple_in_ssa_p (cfun))
642 {
643 tmp_then = make_ssa_name (tmp_var);
644 tmp_else = make_ssa_name (tmp_var);
645 tmp_join = make_ssa_name (tmp_var);
646 }
647 else
648 {
649 tmp_then = tmp_var;
650 tmp_else = tmp_var;
651 tmp_join = tmp_var;
652 }
653
654 e = split_block_after_labels (bb);
655 cond_bb = e->src;
656 bb = e->dest;
657 remove_edge (e);
658
659 then_bb = create_empty_bb (cond_bb);
660 else_bb = create_empty_bb (then_bb);
661 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
662 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
663
664 stmt = gimple_build_cond_empty (cond);
665 gsi = gsi_start_bb (cond_bb);
666 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
667
668 gsi = gsi_start_bb (then_bb);
669 expand_omp_build_assign (&gsi, tmp_then, val, true);
670
671 gsi = gsi_start_bb (else_bb);
672 expand_omp_build_assign (&gsi, tmp_else,
673 build_int_cst (unsigned_type_node, 1),
674 true);
675
676 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
677 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
678 add_bb_to_loop (then_bb, cond_bb->loop_father);
679 add_bb_to_loop (else_bb, cond_bb->loop_father);
680 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
681 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
682
683 if (gimple_in_ssa_p (cfun))
684 {
685 gphi *phi = create_phi_node (tmp_join, bb);
686 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
687 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
688 }
689
690 val = tmp_join;
691 }
692
693 gsi = gsi_start_bb (bb);
694 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
695 false, GSI_CONTINUE_LINKING);
696 }
697
698 gsi = gsi_last_bb (bb);
699 t = gimple_omp_parallel_data_arg (entry_stmt);
700 if (t == NULL)
701 t1 = null_pointer_node;
702 else
703 t1 = build_fold_addr_expr (t);
704 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
705 t2 = build_fold_addr_expr (child_fndecl);
706
623c6df5
KB
707 adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
708
629b3d75
MJ
709 vec_alloc (args, 4 + vec_safe_length (ws_args));
710 args->quick_push (t2);
711 args->quick_push (t1);
712 args->quick_push (val);
713 if (ws_args)
714 args->splice (*ws_args);
715 args->quick_push (flags);
716
717 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
718 builtin_decl_explicit (start_ix), args);
719
720 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
721 false, GSI_CONTINUE_LINKING);
722
723 if (hsa_gen_requested_p ()
724 && parallel_needs_hsa_kernel_p (region))
725 {
726 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
727 hsa_register_kernel (child_cnode);
728 }
729}
730
731/* Insert a function call whose name is FUNC_NAME with the information from
732 ENTRY_STMT into the basic_block BB. */
733
734static void
735expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
736 vec <tree, va_gc> *ws_args)
737{
738 tree t, t1, t2;
739 gimple_stmt_iterator gsi;
740 vec <tree, va_gc> *args;
741
742 gcc_assert (vec_safe_length (ws_args) == 2);
743 tree func_name = (*ws_args)[0];
744 tree grain = (*ws_args)[1];
745
746 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
747 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
748 gcc_assert (count != NULL_TREE);
749 count = OMP_CLAUSE_OPERAND (count, 0);
750
751 gsi = gsi_last_bb (bb);
752 t = gimple_omp_parallel_data_arg (entry_stmt);
753 if (t == NULL)
754 t1 = null_pointer_node;
755 else
756 t1 = build_fold_addr_expr (t);
757 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
758
759 vec_alloc (args, 4);
760 args->quick_push (t2);
761 args->quick_push (t1);
762 args->quick_push (count);
763 args->quick_push (grain);
764 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
765
766 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
767 GSI_CONTINUE_LINKING);
768}
769
770/* Build the function call to GOMP_task to actually
771 generate the task operation. BB is the block where to insert the code. */
772
773static void
774expand_task_call (struct omp_region *region, basic_block bb,
775 gomp_task *entry_stmt)
776{
777 tree t1, t2, t3;
778 gimple_stmt_iterator gsi;
779 location_t loc = gimple_location (entry_stmt);
780
781 tree clauses = gimple_omp_task_clauses (entry_stmt);
782
783 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
784 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
785 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
786 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
787 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
788 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
789
790 unsigned int iflags
791 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
792 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
793 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
794
795 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
796 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
797 tree num_tasks = NULL_TREE;
798 bool ull = false;
799 if (taskloop_p)
800 {
801 gimple *g = last_stmt (region->outer->entry);
802 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
803 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
804 struct omp_for_data fd;
805 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
806 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
807 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
808 OMP_CLAUSE__LOOPTEMP_);
809 startvar = OMP_CLAUSE_DECL (startvar);
810 endvar = OMP_CLAUSE_DECL (endvar);
811 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
812 if (fd.loop.cond_code == LT_EXPR)
813 iflags |= GOMP_TASK_FLAG_UP;
814 tree tclauses = gimple_omp_for_clauses (g);
815 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
816 if (num_tasks)
817 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
818 else
819 {
820 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
821 if (num_tasks)
822 {
823 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
824 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
825 }
826 else
827 num_tasks = integer_zero_node;
828 }
829 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
830 if (ifc == NULL_TREE)
831 iflags |= GOMP_TASK_FLAG_IF;
832 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
833 iflags |= GOMP_TASK_FLAG_NOGROUP;
834 ull = fd.iter_type == long_long_unsigned_type_node;
835 }
836 else if (priority)
837 iflags |= GOMP_TASK_FLAG_PRIORITY;
838
839 tree flags = build_int_cst (unsigned_type_node, iflags);
840
841 tree cond = boolean_true_node;
842 if (ifc)
843 {
844 if (taskloop_p)
845 {
846 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
847 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
848 build_int_cst (unsigned_type_node,
849 GOMP_TASK_FLAG_IF),
850 build_int_cst (unsigned_type_node, 0));
851 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
852 flags, t);
853 }
854 else
855 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
856 }
857
858 if (finalc)
859 {
860 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
861 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
862 build_int_cst (unsigned_type_node,
863 GOMP_TASK_FLAG_FINAL),
864 build_int_cst (unsigned_type_node, 0));
865 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
866 }
867 if (depend)
868 depend = OMP_CLAUSE_DECL (depend);
869 else
870 depend = build_int_cst (ptr_type_node, 0);
871 if (priority)
872 priority = fold_convert (integer_type_node,
873 OMP_CLAUSE_PRIORITY_EXPR (priority));
874 else
875 priority = integer_zero_node;
876
877 gsi = gsi_last_bb (bb);
878 tree t = gimple_omp_task_data_arg (entry_stmt);
879 if (t == NULL)
880 t2 = null_pointer_node;
881 else
882 t2 = build_fold_addr_expr_loc (loc, t);
883 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
884 t = gimple_omp_task_copy_fn (entry_stmt);
885 if (t == NULL)
886 t3 = null_pointer_node;
887 else
888 t3 = build_fold_addr_expr_loc (loc, t);
889
890 if (taskloop_p)
891 t = build_call_expr (ull
892 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
893 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
894 11, t1, t2, t3,
895 gimple_omp_task_arg_size (entry_stmt),
896 gimple_omp_task_arg_align (entry_stmt), flags,
897 num_tasks, priority, startvar, endvar, step);
898 else
899 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
900 9, t1, t2, t3,
901 gimple_omp_task_arg_size (entry_stmt),
902 gimple_omp_task_arg_align (entry_stmt), cond, flags,
903 depend, priority);
904
905 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
906 false, GSI_CONTINUE_LINKING);
907}
908
909/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
910
911static tree
912vec2chain (vec<tree, va_gc> *v)
913{
914 tree chain = NULL_TREE, t;
915 unsigned ix;
916
917 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
918 {
919 DECL_CHAIN (t) = chain;
920 chain = t;
921 }
922
923 return chain;
924}
925
926/* Remove barriers in REGION->EXIT's block. Note that this is only
927 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
928 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
929 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
930 removed. */
931
932static void
933remove_exit_barrier (struct omp_region *region)
934{
935 gimple_stmt_iterator gsi;
936 basic_block exit_bb;
937 edge_iterator ei;
938 edge e;
939 gimple *stmt;
940 int any_addressable_vars = -1;
941
942 exit_bb = region->exit;
943
944 /* If the parallel region doesn't return, we don't have REGION->EXIT
945 block at all. */
946 if (! exit_bb)
947 return;
948
949 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
950 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
951 statements that can appear in between are extremely limited -- no
952 memory operations at all. Here, we allow nothing at all, so the
953 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
954 gsi = gsi_last_bb (exit_bb);
955 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
956 gsi_prev (&gsi);
957 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
958 return;
959
960 FOR_EACH_EDGE (e, ei, exit_bb->preds)
961 {
962 gsi = gsi_last_bb (e->src);
963 if (gsi_end_p (gsi))
964 continue;
965 stmt = gsi_stmt (gsi);
966 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
967 && !gimple_omp_return_nowait_p (stmt))
968 {
969 /* OpenMP 3.0 tasks unfortunately prevent this optimization
970 in many cases. If there could be tasks queued, the barrier
971 might be needed to let the tasks run before some local
972 variable of the parallel that the task uses as shared
973 runs out of scope. The task can be spawned either
974 from within current function (this would be easy to check)
975 or from some function it calls and gets passed an address
976 of such a variable. */
977 if (any_addressable_vars < 0)
978 {
979 gomp_parallel *parallel_stmt
980 = as_a <gomp_parallel *> (last_stmt (region->entry));
981 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
982 tree local_decls, block, decl;
983 unsigned ix;
984
985 any_addressable_vars = 0;
986 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
987 if (TREE_ADDRESSABLE (decl))
988 {
989 any_addressable_vars = 1;
990 break;
991 }
992 for (block = gimple_block (stmt);
993 !any_addressable_vars
994 && block
995 && TREE_CODE (block) == BLOCK;
996 block = BLOCK_SUPERCONTEXT (block))
997 {
998 for (local_decls = BLOCK_VARS (block);
999 local_decls;
1000 local_decls = DECL_CHAIN (local_decls))
1001 if (TREE_ADDRESSABLE (local_decls))
1002 {
1003 any_addressable_vars = 1;
1004 break;
1005 }
1006 if (block == gimple_block (parallel_stmt))
1007 break;
1008 }
1009 }
1010 if (!any_addressable_vars)
1011 gimple_omp_return_set_nowait (stmt);
1012 }
1013 }
1014}
1015
1016static void
1017remove_exit_barriers (struct omp_region *region)
1018{
1019 if (region->type == GIMPLE_OMP_PARALLEL)
1020 remove_exit_barrier (region);
1021
1022 if (region->inner)
1023 {
1024 region = region->inner;
1025 remove_exit_barriers (region);
1026 while (region->next)
1027 {
1028 region = region->next;
1029 remove_exit_barriers (region);
1030 }
1031 }
1032}
1033
1034/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1035 calls. These can't be declared as const functions, but
1036 within one parallel body they are constant, so they can be
1037 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1038 which are declared const. Similarly for task body, except
1039 that in untied task omp_get_thread_num () can change at any task
1040 scheduling point. */
1041
1042static void
1043optimize_omp_library_calls (gimple *entry_stmt)
1044{
1045 basic_block bb;
1046 gimple_stmt_iterator gsi;
1047 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1048 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1049 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1050 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1051 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1052 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1053 OMP_CLAUSE_UNTIED) != NULL);
1054
1055 FOR_EACH_BB_FN (bb, cfun)
1056 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1057 {
1058 gimple *call = gsi_stmt (gsi);
1059 tree decl;
1060
1061 if (is_gimple_call (call)
1062 && (decl = gimple_call_fndecl (call))
1063 && DECL_EXTERNAL (decl)
1064 && TREE_PUBLIC (decl)
1065 && DECL_INITIAL (decl) == NULL)
1066 {
1067 tree built_in;
1068
1069 if (DECL_NAME (decl) == thr_num_id)
1070 {
1071 /* In #pragma omp task untied omp_get_thread_num () can change
1072 during the execution of the task region. */
1073 if (untied_task)
1074 continue;
1075 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1076 }
1077 else if (DECL_NAME (decl) == num_thr_id)
1078 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1079 else
1080 continue;
1081
1082 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1083 || gimple_call_num_args (call) != 0)
1084 continue;
1085
1086 if (flag_exceptions && !TREE_NOTHROW (decl))
1087 continue;
1088
1089 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1090 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1091 TREE_TYPE (TREE_TYPE (built_in))))
1092 continue;
1093
1094 gimple_call_set_fndecl (call, built_in);
1095 }
1096 }
1097}
1098
1099/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1100 regimplified. */
1101
1102static tree
1103expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1104{
1105 tree t = *tp;
1106
1107 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1108 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1109 return t;
1110
1111 if (TREE_CODE (t) == ADDR_EXPR)
1112 recompute_tree_invariant_for_addr_expr (t);
1113
1114 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1115 return NULL_TREE;
1116}
1117
1118/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1119
1120static void
1121expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1122 bool after)
1123{
1124 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1125 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1126 !after, after ? GSI_CONTINUE_LINKING
1127 : GSI_SAME_STMT);
1128 gimple *stmt = gimple_build_assign (to, from);
1129 if (after)
1130 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1131 else
1132 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1133 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1134 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1135 {
1136 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1137 gimple_regimplify_operands (stmt, &gsi);
1138 }
1139}
1140
1141/* Expand the OpenMP parallel or task directive starting at REGION. */
1142
1143static void
1144expand_omp_taskreg (struct omp_region *region)
1145{
1146 basic_block entry_bb, exit_bb, new_bb;
1147 struct function *child_cfun;
1148 tree child_fn, block, t;
1149 gimple_stmt_iterator gsi;
1150 gimple *entry_stmt, *stmt;
1151 edge e;
1152 vec<tree, va_gc> *ws_args;
1153
1154 entry_stmt = last_stmt (region->entry);
1155 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1156 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1157
1158 entry_bb = region->entry;
1159 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1160 exit_bb = region->cont;
1161 else
1162 exit_bb = region->exit;
1163
1164 bool is_cilk_for
1165 = (flag_cilkplus
1166 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1167 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1168 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1169
1170 if (is_cilk_for)
1171 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1172 and the inner statement contains the name of the built-in function
1173 and grain. */
1174 ws_args = region->inner->ws_args;
1175 else if (is_combined_parallel (region))
1176 ws_args = region->ws_args;
1177 else
1178 ws_args = NULL;
1179
1180 if (child_cfun->cfg)
1181 {
1182 /* Due to inlining, it may happen that we have already outlined
1183 the region, in which case all we need to do is make the
1184 sub-graph unreachable and emit the parallel call. */
1185 edge entry_succ_e, exit_succ_e;
1186
1187 entry_succ_e = single_succ_edge (entry_bb);
1188
1189 gsi = gsi_last_bb (entry_bb);
1190 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1191 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1192 gsi_remove (&gsi, true);
1193
1194 new_bb = entry_bb;
1195 if (exit_bb)
1196 {
1197 exit_succ_e = single_succ_edge (exit_bb);
1198 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1199 }
1200 remove_edge_and_dominated_blocks (entry_succ_e);
1201 }
1202 else
1203 {
1204 unsigned srcidx, dstidx, num;
1205
1206 /* If the parallel region needs data sent from the parent
1207 function, then the very first statement (except possible
1208 tree profile counter updates) of the parallel body
1209 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1210 &.OMP_DATA_O is passed as an argument to the child function,
1211 we need to replace it with the argument as seen by the child
1212 function.
1213
1214 In most cases, this will end up being the identity assignment
1215 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1216 a function call that has been inlined, the original PARM_DECL
1217 .OMP_DATA_I may have been converted into a different local
1218 variable. In which case, we need to keep the assignment. */
1219 if (gimple_omp_taskreg_data_arg (entry_stmt))
1220 {
1221 basic_block entry_succ_bb
1222 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1223 : FALLTHRU_EDGE (entry_bb)->dest;
1224 tree arg;
1225 gimple *parcopy_stmt = NULL;
1226
1227 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1228 {
1229 gimple *stmt;
1230
1231 gcc_assert (!gsi_end_p (gsi));
1232 stmt = gsi_stmt (gsi);
1233 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1234 continue;
1235
1236 if (gimple_num_ops (stmt) == 2)
1237 {
1238 tree arg = gimple_assign_rhs1 (stmt);
1239
1240 /* We're ignore the subcode because we're
1241 effectively doing a STRIP_NOPS. */
1242
1243 if (TREE_CODE (arg) == ADDR_EXPR
1244 && TREE_OPERAND (arg, 0)
01914336 1245 == gimple_omp_taskreg_data_arg (entry_stmt))
629b3d75
MJ
1246 {
1247 parcopy_stmt = stmt;
1248 break;
1249 }
1250 }
1251 }
1252
1253 gcc_assert (parcopy_stmt != NULL);
1254 arg = DECL_ARGUMENTS (child_fn);
1255
1256 if (!gimple_in_ssa_p (cfun))
1257 {
1258 if (gimple_assign_lhs (parcopy_stmt) == arg)
1259 gsi_remove (&gsi, true);
1260 else
1261 {
01914336 1262 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1263 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1264 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1265 }
1266 }
1267 else
1268 {
1269 tree lhs = gimple_assign_lhs (parcopy_stmt);
1270 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1271 /* We'd like to set the rhs to the default def in the child_fn,
1272 but it's too early to create ssa names in the child_fn.
1273 Instead, we set the rhs to the parm. In
1274 move_sese_region_to_fn, we introduce a default def for the
1275 parm, map the parm to it's default def, and once we encounter
1276 this stmt, replace the parm with the default def. */
1277 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1278 update_stmt (parcopy_stmt);
1279 }
1280 }
1281
1282 /* Declare local variables needed in CHILD_CFUN. */
1283 block = DECL_INITIAL (child_fn);
1284 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1285 /* The gimplifier could record temporaries in parallel/task block
1286 rather than in containing function's local_decls chain,
1287 which would mean cgraph missed finalizing them. Do it now. */
1288 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1289 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1290 varpool_node::finalize_decl (t);
1291 DECL_SAVED_TREE (child_fn) = NULL;
1292 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1293 gimple_set_body (child_fn, NULL);
1294 TREE_USED (block) = 1;
1295
1296 /* Reset DECL_CONTEXT on function arguments. */
1297 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1298 DECL_CONTEXT (t) = child_fn;
1299
1300 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1301 so that it can be moved to the child function. */
1302 gsi = gsi_last_bb (entry_bb);
1303 stmt = gsi_stmt (gsi);
1304 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1305 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1306 e = split_block (entry_bb, stmt);
1307 gsi_remove (&gsi, true);
1308 entry_bb = e->dest;
1309 edge e2 = NULL;
1310 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1311 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1312 else
1313 {
1314 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1315 gcc_assert (e2->dest == region->exit);
1316 remove_edge (BRANCH_EDGE (entry_bb));
1317 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1318 gsi = gsi_last_bb (region->exit);
1319 gcc_assert (!gsi_end_p (gsi)
1320 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1321 gsi_remove (&gsi, true);
1322 }
1323
1324 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1325 if (exit_bb)
1326 {
1327 gsi = gsi_last_bb (exit_bb);
1328 gcc_assert (!gsi_end_p (gsi)
1329 && (gimple_code (gsi_stmt (gsi))
1330 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1331 stmt = gimple_build_return (NULL);
1332 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1333 gsi_remove (&gsi, true);
1334 }
1335
1336 /* Move the parallel region into CHILD_CFUN. */
1337
1338 if (gimple_in_ssa_p (cfun))
1339 {
1340 init_tree_ssa (child_cfun);
1341 init_ssa_operands (child_cfun);
1342 child_cfun->gimple_df->in_ssa_p = true;
1343 block = NULL_TREE;
1344 }
1345 else
1346 block = gimple_block (entry_stmt);
1347
5c628c3e
RB
1348 /* Make sure to generate early debug for the function before
1349 outlining anything. */
1350 if (! gimple_in_ssa_p (cfun))
1351 (*debug_hooks->early_global_decl) (cfun->decl);
1352
629b3d75
MJ
1353 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1354 if (exit_bb)
1355 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1356 if (e2)
1357 {
1358 basic_block dest_bb = e2->dest;
1359 if (!exit_bb)
1360 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1361 remove_edge (e2);
1362 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1363 }
1364 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1365 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1366 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1367 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1368
1369 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1370 num = vec_safe_length (child_cfun->local_decls);
1371 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1372 {
1373 t = (*child_cfun->local_decls)[srcidx];
1374 if (DECL_CONTEXT (t) == cfun->decl)
1375 continue;
1376 if (srcidx != dstidx)
1377 (*child_cfun->local_decls)[dstidx] = t;
1378 dstidx++;
1379 }
1380 if (dstidx != num)
1381 vec_safe_truncate (child_cfun->local_decls, dstidx);
1382
1383 /* Inform the callgraph about the new function. */
1384 child_cfun->curr_properties = cfun->curr_properties;
1385 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1386 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1387 cgraph_node *node = cgraph_node::get_create (child_fn);
1388 node->parallelized_function = 1;
1389 cgraph_node::add_new_function (child_fn, true);
1390
1391 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1392 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1393
1394 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1395 fixed in a following pass. */
1396 push_cfun (child_cfun);
1397 if (need_asm)
9579db35 1398 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1399
1400 if (optimize)
1401 optimize_omp_library_calls (entry_stmt);
e7a74006 1402 counts_to_freqs ();
629b3d75
MJ
1403 cgraph_edge::rebuild_edges ();
1404
1405 /* Some EH regions might become dead, see PR34608. If
1406 pass_cleanup_cfg isn't the first pass to happen with the
1407 new child, these dead EH edges might cause problems.
1408 Clean them up now. */
1409 if (flag_exceptions)
1410 {
1411 basic_block bb;
1412 bool changed = false;
1413
1414 FOR_EACH_BB_FN (bb, cfun)
1415 changed |= gimple_purge_dead_eh_edges (bb);
1416 if (changed)
1417 cleanup_tree_cfg ();
1418 }
1419 if (gimple_in_ssa_p (cfun))
1420 update_ssa (TODO_update_ssa);
1421 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1422 verify_loop_structure ();
1423 pop_cfun ();
1424
1425 if (dump_file && !gimple_in_ssa_p (cfun))
1426 {
1427 omp_any_child_fn_dumped = true;
1428 dump_function_header (dump_file, child_fn, dump_flags);
1429 dump_function_to_file (child_fn, dump_file, dump_flags);
1430 }
1431 }
1432
1433 /* Emit a library call to launch the children threads. */
1434 if (is_cilk_for)
1435 expand_cilk_for_call (new_bb,
1436 as_a <gomp_parallel *> (entry_stmt), ws_args);
1437 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1438 expand_parallel_call (region, new_bb,
1439 as_a <gomp_parallel *> (entry_stmt), ws_args);
1440 else
1441 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1442 if (gimple_in_ssa_p (cfun))
1443 update_ssa (TODO_update_ssa_only_virtuals);
1444}
1445
1446/* Information about members of an OpenACC collapsed loop nest. */
1447
1448struct oacc_collapse
1449{
01914336 1450 tree base; /* Base value. */
629b3d75 1451 tree iters; /* Number of steps. */
02889d23
CLT
1452 tree step; /* Step size. */
1453 tree tile; /* Tile increment (if tiled). */
1454 tree outer; /* Tile iterator var. */
629b3d75
MJ
1455};
1456
1457/* Helper for expand_oacc_for. Determine collapsed loop information.
1458 Fill in COUNTS array. Emit any initialization code before GSI.
1459 Return the calculated outer loop bound of BOUND_TYPE. */
1460
1461static tree
1462expand_oacc_collapse_init (const struct omp_for_data *fd,
1463 gimple_stmt_iterator *gsi,
02889d23
CLT
1464 oacc_collapse *counts, tree bound_type,
1465 location_t loc)
629b3d75 1466{
02889d23 1467 tree tiling = fd->tiling;
629b3d75
MJ
1468 tree total = build_int_cst (bound_type, 1);
1469 int ix;
1470
1471 gcc_assert (integer_onep (fd->loop.step));
1472 gcc_assert (integer_zerop (fd->loop.n1));
1473
02889d23
CLT
1474 /* When tiling, the first operand of the tile clause applies to the
1475 innermost loop, and we work outwards from there. Seems
1476 backwards, but whatever. */
1477 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1478 {
1479 const omp_for_data_loop *loop = &fd->loops[ix];
1480
1481 tree iter_type = TREE_TYPE (loop->v);
1482 tree diff_type = iter_type;
1483 tree plus_type = iter_type;
1484
1485 gcc_assert (loop->cond_code == fd->loop.cond_code);
1486
1487 if (POINTER_TYPE_P (iter_type))
1488 plus_type = sizetype;
1489 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1490 diff_type = signed_type_for (diff_type);
1491
02889d23
CLT
1492 if (tiling)
1493 {
1494 tree num = build_int_cst (integer_type_node, fd->collapse);
1495 tree loop_no = build_int_cst (integer_type_node, ix);
1496 tree tile = TREE_VALUE (tiling);
1497 gcall *call
1498 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1499 /* gwv-outer=*/integer_zero_node,
1500 /* gwv-inner=*/integer_zero_node);
1501
1502 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1503 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1504 gimple_call_set_lhs (call, counts[ix].tile);
1505 gimple_set_location (call, loc);
1506 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1507
1508 tiling = TREE_CHAIN (tiling);
1509 }
1510 else
1511 {
1512 counts[ix].tile = NULL;
1513 counts[ix].outer = loop->v;
1514 }
1515
629b3d75
MJ
1516 tree b = loop->n1;
1517 tree e = loop->n2;
1518 tree s = loop->step;
1519 bool up = loop->cond_code == LT_EXPR;
1520 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1521 bool negating;
1522 tree expr;
1523
1524 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1525 true, GSI_SAME_STMT);
1526 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1527 true, GSI_SAME_STMT);
1528
01914336 1529 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1530 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1531 if (negating)
1532 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1533 s = fold_convert (diff_type, s);
1534 if (negating)
1535 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1536 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1537 true, GSI_SAME_STMT);
1538
01914336 1539 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1540 negating = !up && TYPE_UNSIGNED (iter_type);
1541 expr = fold_build2 (MINUS_EXPR, plus_type,
1542 fold_convert (plus_type, negating ? b : e),
1543 fold_convert (plus_type, negating ? e : b));
1544 expr = fold_convert (diff_type, expr);
1545 if (negating)
1546 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1547 tree range = force_gimple_operand_gsi
1548 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1549
1550 /* Determine number of iterations. */
1551 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1552 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1553 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1554
1555 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1556 true, GSI_SAME_STMT);
1557
1558 counts[ix].base = b;
1559 counts[ix].iters = iters;
1560 counts[ix].step = s;
1561
1562 total = fold_build2 (MULT_EXPR, bound_type, total,
1563 fold_convert (bound_type, iters));
1564 }
1565
1566 return total;
1567}
1568
02889d23
CLT
1569/* Emit initializers for collapsed loop members. INNER is true if
1570 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1571 loop iteration variable, from which collapsed loop iteration values
1572 are calculated. COUNTS array has been initialized by
1573 expand_oacc_collapse_inits. */
1574
1575static void
02889d23 1576expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75
MJ
1577 gimple_stmt_iterator *gsi,
1578 const oacc_collapse *counts, tree ivar)
1579{
1580 tree ivar_type = TREE_TYPE (ivar);
1581
1582 /* The most rapidly changing iteration variable is the innermost
1583 one. */
1584 for (int ix = fd->collapse; ix--;)
1585 {
1586 const omp_for_data_loop *loop = &fd->loops[ix];
1587 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1588 tree v = inner ? loop->v : collapse->outer;
1589 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1590 tree diff_type = TREE_TYPE (collapse->step);
1591 tree plus_type = iter_type;
1592 enum tree_code plus_code = PLUS_EXPR;
1593 tree expr;
1594
1595 if (POINTER_TYPE_P (iter_type))
1596 {
1597 plus_code = POINTER_PLUS_EXPR;
1598 plus_type = sizetype;
1599 }
1600
02889d23
CLT
1601 expr = ivar;
1602 if (ix)
1603 {
1604 tree mod = fold_convert (ivar_type, collapse->iters);
1605 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1606 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1607 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1608 true, GSI_SAME_STMT);
1609 }
1610
629b3d75
MJ
1611 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1612 collapse->step);
02889d23
CLT
1613 expr = fold_build2 (plus_code, iter_type,
1614 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1615 fold_convert (plus_type, expr));
1616 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1617 true, GSI_SAME_STMT);
02889d23 1618 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1619 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1620 }
1621}
1622
1623/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1624 of the combined collapse > 1 loop constructs, generate code like:
1625 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1626 if (cond3 is <)
1627 adj = STEP3 - 1;
1628 else
1629 adj = STEP3 + 1;
1630 count3 = (adj + N32 - N31) / STEP3;
1631 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1632 if (cond2 is <)
1633 adj = STEP2 - 1;
1634 else
1635 adj = STEP2 + 1;
1636 count2 = (adj + N22 - N21) / STEP2;
1637 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1638 if (cond1 is <)
1639 adj = STEP1 - 1;
1640 else
1641 adj = STEP1 + 1;
1642 count1 = (adj + N12 - N11) / STEP1;
1643 count = count1 * count2 * count3;
1644 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1645 count = 0;
1646 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1647 of the combined loop constructs, just initialize COUNTS array
1648 from the _looptemp_ clauses. */
1649
1650/* NOTE: It *could* be better to moosh all of the BBs together,
1651 creating one larger BB with all the computation and the unexpected
1652 jump at the end. I.e.
1653
1654 bool zero3, zero2, zero1, zero;
1655
1656 zero3 = N32 c3 N31;
1657 count3 = (N32 - N31) /[cl] STEP3;
1658 zero2 = N22 c2 N21;
1659 count2 = (N22 - N21) /[cl] STEP2;
1660 zero1 = N12 c1 N11;
1661 count1 = (N12 - N11) /[cl] STEP1;
1662 zero = zero3 || zero2 || zero1;
1663 count = count1 * count2 * count3;
1664 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1665
1666 After all, we expect the zero=false, and thus we expect to have to
1667 evaluate all of the comparison expressions, so short-circuiting
1668 oughtn't be a win. Since the condition isn't protecting a
1669 denominator, we're not concerned about divide-by-zero, so we can
1670 fully evaluate count even if a numerator turned out to be wrong.
1671
1672 It seems like putting this all together would create much better
1673 scheduling opportunities, and less pressure on the chip's branch
1674 predictor. */
1675
1676static void
1677expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1678 basic_block &entry_bb, tree *counts,
1679 basic_block &zero_iter1_bb, int &first_zero_iter1,
1680 basic_block &zero_iter2_bb, int &first_zero_iter2,
1681 basic_block &l2_dom_bb)
1682{
1683 tree t, type = TREE_TYPE (fd->loop.v);
1684 edge e, ne;
1685 int i;
1686
1687 /* Collapsed loops need work for expansion into SSA form. */
1688 gcc_assert (!gimple_in_ssa_p (cfun));
1689
1690 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1691 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1692 {
1693 gcc_assert (fd->ordered == 0);
1694 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1695 isn't supposed to be handled, as the inner loop doesn't
1696 use it. */
1697 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1698 OMP_CLAUSE__LOOPTEMP_);
1699 gcc_assert (innerc);
1700 for (i = 0; i < fd->collapse; i++)
1701 {
1702 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1703 OMP_CLAUSE__LOOPTEMP_);
1704 gcc_assert (innerc);
1705 if (i)
1706 counts[i] = OMP_CLAUSE_DECL (innerc);
1707 else
1708 counts[0] = NULL_TREE;
1709 }
1710 return;
1711 }
1712
1713 for (i = fd->collapse; i < fd->ordered; i++)
1714 {
1715 tree itype = TREE_TYPE (fd->loops[i].v);
1716 counts[i] = NULL_TREE;
1717 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1718 fold_convert (itype, fd->loops[i].n1),
1719 fold_convert (itype, fd->loops[i].n2));
1720 if (t && integer_zerop (t))
1721 {
1722 for (i = fd->collapse; i < fd->ordered; i++)
1723 counts[i] = build_int_cst (type, 0);
1724 break;
1725 }
1726 }
1727 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1728 {
1729 tree itype = TREE_TYPE (fd->loops[i].v);
1730
1731 if (i >= fd->collapse && counts[i])
1732 continue;
1733 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1734 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1735 fold_convert (itype, fd->loops[i].n1),
1736 fold_convert (itype, fd->loops[i].n2)))
1737 == NULL_TREE || !integer_onep (t)))
1738 {
1739 gcond *cond_stmt;
1740 tree n1, n2;
1741 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1742 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1743 true, GSI_SAME_STMT);
1744 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1745 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1746 true, GSI_SAME_STMT);
1747 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1748 NULL_TREE, NULL_TREE);
1749 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1750 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1751 expand_omp_regimplify_p, NULL, NULL)
1752 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1753 expand_omp_regimplify_p, NULL, NULL))
1754 {
1755 *gsi = gsi_for_stmt (cond_stmt);
1756 gimple_regimplify_operands (cond_stmt, gsi);
1757 }
1758 e = split_block (entry_bb, cond_stmt);
1759 basic_block &zero_iter_bb
1760 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1761 int &first_zero_iter
1762 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1763 if (zero_iter_bb == NULL)
1764 {
1765 gassign *assign_stmt;
1766 first_zero_iter = i;
1767 zero_iter_bb = create_empty_bb (entry_bb);
1768 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1769 *gsi = gsi_after_labels (zero_iter_bb);
1770 if (i < fd->collapse)
1771 assign_stmt = gimple_build_assign (fd->loop.n2,
1772 build_zero_cst (type));
1773 else
1774 {
1775 counts[i] = create_tmp_reg (type, ".count");
1776 assign_stmt
1777 = gimple_build_assign (counts[i], build_zero_cst (type));
1778 }
1779 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1780 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1781 entry_bb);
1782 }
1783 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1784 ne->probability = profile_probability::very_unlikely ();
629b3d75 1785 e->flags = EDGE_TRUE_VALUE;
357067f2 1786 e->probability = ne->probability.invert ();
629b3d75
MJ
1787 if (l2_dom_bb == NULL)
1788 l2_dom_bb = entry_bb;
1789 entry_bb = e->dest;
1790 *gsi = gsi_last_bb (entry_bb);
1791 }
1792
1793 if (POINTER_TYPE_P (itype))
1794 itype = signed_type_for (itype);
1795 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1796 ? -1 : 1));
1797 t = fold_build2 (PLUS_EXPR, itype,
1798 fold_convert (itype, fd->loops[i].step), t);
1799 t = fold_build2 (PLUS_EXPR, itype, t,
1800 fold_convert (itype, fd->loops[i].n2));
1801 t = fold_build2 (MINUS_EXPR, itype, t,
1802 fold_convert (itype, fd->loops[i].n1));
1803 /* ?? We could probably use CEIL_DIV_EXPR instead of
1804 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1805 generate the same code in the end because generically we
1806 don't know that the values involved must be negative for
1807 GT?? */
1808 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1809 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1810 fold_build1 (NEGATE_EXPR, itype, t),
1811 fold_build1 (NEGATE_EXPR, itype,
1812 fold_convert (itype,
1813 fd->loops[i].step)));
1814 else
1815 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1816 fold_convert (itype, fd->loops[i].step));
1817 t = fold_convert (type, t);
1818 if (TREE_CODE (t) == INTEGER_CST)
1819 counts[i] = t;
1820 else
1821 {
1822 if (i < fd->collapse || i != first_zero_iter2)
1823 counts[i] = create_tmp_reg (type, ".count");
1824 expand_omp_build_assign (gsi, counts[i], t);
1825 }
1826 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1827 {
1828 if (i == 0)
1829 t = counts[0];
1830 else
1831 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1832 expand_omp_build_assign (gsi, fd->loop.n2, t);
1833 }
1834 }
1835}
1836
1837/* Helper function for expand_omp_{for_*,simd}. Generate code like:
1838 T = V;
1839 V3 = N31 + (T % count3) * STEP3;
1840 T = T / count3;
1841 V2 = N21 + (T % count2) * STEP2;
1842 T = T / count2;
1843 V1 = N11 + T * STEP1;
1844 if this loop doesn't have an inner loop construct combined with it.
1845 If it does have an inner loop construct combined with it and the
1846 iteration count isn't known constant, store values from counts array
1847 into its _looptemp_ temporaries instead. */
1848
1849static void
1850expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1851 tree *counts, gimple *inner_stmt, tree startvar)
1852{
1853 int i;
1854 if (gimple_omp_for_combined_p (fd->for_stmt))
1855 {
1856 /* If fd->loop.n2 is constant, then no propagation of the counts
1857 is needed, they are constant. */
1858 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1859 return;
1860
1861 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1862 ? gimple_omp_taskreg_clauses (inner_stmt)
1863 : gimple_omp_for_clauses (inner_stmt);
1864 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1865 isn't supposed to be handled, as the inner loop doesn't
1866 use it. */
1867 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1868 gcc_assert (innerc);
1869 for (i = 0; i < fd->collapse; i++)
1870 {
1871 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1872 OMP_CLAUSE__LOOPTEMP_);
1873 gcc_assert (innerc);
1874 if (i)
1875 {
1876 tree tem = OMP_CLAUSE_DECL (innerc);
1877 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1878 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1879 false, GSI_CONTINUE_LINKING);
1880 gassign *stmt = gimple_build_assign (tem, t);
1881 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1882 }
1883 }
1884 return;
1885 }
1886
1887 tree type = TREE_TYPE (fd->loop.v);
1888 tree tem = create_tmp_reg (type, ".tem");
1889 gassign *stmt = gimple_build_assign (tem, startvar);
1890 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1891
1892 for (i = fd->collapse - 1; i >= 0; i--)
1893 {
1894 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1895 itype = vtype;
1896 if (POINTER_TYPE_P (vtype))
1897 itype = signed_type_for (vtype);
1898 if (i != 0)
1899 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1900 else
1901 t = tem;
1902 t = fold_convert (itype, t);
1903 t = fold_build2 (MULT_EXPR, itype, t,
1904 fold_convert (itype, fd->loops[i].step));
1905 if (POINTER_TYPE_P (vtype))
1906 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1907 else
1908 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1909 t = force_gimple_operand_gsi (gsi, t,
1910 DECL_P (fd->loops[i].v)
1911 && TREE_ADDRESSABLE (fd->loops[i].v),
1912 NULL_TREE, false,
1913 GSI_CONTINUE_LINKING);
1914 stmt = gimple_build_assign (fd->loops[i].v, t);
1915 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1916 if (i != 0)
1917 {
1918 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1919 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1920 false, GSI_CONTINUE_LINKING);
1921 stmt = gimple_build_assign (tem, t);
1922 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1923 }
1924 }
1925}
1926
1927/* Helper function for expand_omp_for_*. Generate code like:
1928 L10:
1929 V3 += STEP3;
1930 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1931 L11:
1932 V3 = N31;
1933 V2 += STEP2;
1934 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1935 L12:
1936 V2 = N21;
1937 V1 += STEP1;
1938 goto BODY_BB; */
1939
1940static basic_block
1941extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1942 basic_block body_bb)
1943{
1944 basic_block last_bb, bb, collapse_bb = NULL;
1945 int i;
1946 gimple_stmt_iterator gsi;
1947 edge e;
1948 tree t;
1949 gimple *stmt;
1950
1951 last_bb = cont_bb;
1952 for (i = fd->collapse - 1; i >= 0; i--)
1953 {
1954 tree vtype = TREE_TYPE (fd->loops[i].v);
1955
1956 bb = create_empty_bb (last_bb);
1957 add_bb_to_loop (bb, last_bb->loop_father);
1958 gsi = gsi_start_bb (bb);
1959
1960 if (i < fd->collapse - 1)
1961 {
1962 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
357067f2 1963 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75
MJ
1964
1965 t = fd->loops[i + 1].n1;
1966 t = force_gimple_operand_gsi (&gsi, t,
1967 DECL_P (fd->loops[i + 1].v)
1968 && TREE_ADDRESSABLE (fd->loops[i
1969 + 1].v),
1970 NULL_TREE, false,
1971 GSI_CONTINUE_LINKING);
1972 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1973 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1974 }
1975 else
1976 collapse_bb = bb;
1977
1978 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1979
1980 if (POINTER_TYPE_P (vtype))
1981 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1982 else
1983 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1984 t = force_gimple_operand_gsi (&gsi, t,
1985 DECL_P (fd->loops[i].v)
1986 && TREE_ADDRESSABLE (fd->loops[i].v),
1987 NULL_TREE, false, GSI_CONTINUE_LINKING);
1988 stmt = gimple_build_assign (fd->loops[i].v, t);
1989 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1990
1991 if (i > 0)
1992 {
1993 t = fd->loops[i].n2;
1994 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1995 false, GSI_CONTINUE_LINKING);
1996 tree v = fd->loops[i].v;
1997 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1998 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1999 false, GSI_CONTINUE_LINKING);
2000 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2001 stmt = gimple_build_cond_empty (t);
2002 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2003 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 2004 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
2005 }
2006 else
2007 make_edge (bb, body_bb, EDGE_FALLTHRU);
2008 last_bb = bb;
2009 }
2010
2011 return collapse_bb;
2012}
2013
2014/* Expand #pragma omp ordered depend(source). */
2015
2016static void
2017expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2018 tree *counts, location_t loc)
2019{
2020 enum built_in_function source_ix
2021 = fd->iter_type == long_integer_type_node
2022 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2023 gimple *g
2024 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2025 build_fold_addr_expr (counts[fd->ordered]));
2026 gimple_set_location (g, loc);
2027 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2028}
2029
2030/* Expand a single depend from #pragma omp ordered depend(sink:...). */
2031
2032static void
2033expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2034 tree *counts, tree c, location_t loc)
2035{
2036 auto_vec<tree, 10> args;
2037 enum built_in_function sink_ix
2038 = fd->iter_type == long_integer_type_node
2039 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2040 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2041 int i;
2042 gimple_stmt_iterator gsi2 = *gsi;
2043 bool warned_step = false;
2044
2045 for (i = 0; i < fd->ordered; i++)
2046 {
2047 tree step = NULL_TREE;
2048 off = TREE_PURPOSE (deps);
2049 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2050 {
2051 step = TREE_OPERAND (off, 1);
2052 off = TREE_OPERAND (off, 0);
2053 }
2054 if (!integer_zerop (off))
2055 {
2056 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2057 || fd->loops[i].cond_code == GT_EXPR);
2058 bool forward = fd->loops[i].cond_code == LT_EXPR;
2059 if (step)
2060 {
2061 /* Non-simple Fortran DO loops. If step is variable,
2062 we don't know at compile even the direction, so can't
2063 warn. */
2064 if (TREE_CODE (step) != INTEGER_CST)
2065 break;
2066 forward = tree_int_cst_sgn (step) != -1;
2067 }
2068 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2069 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2070 "lexically later iteration");
2071 break;
2072 }
2073 deps = TREE_CHAIN (deps);
2074 }
2075 /* If all offsets corresponding to the collapsed loops are zero,
2076 this depend clause can be ignored. FIXME: but there is still a
2077 flush needed. We need to emit one __sync_synchronize () for it
2078 though (perhaps conditionally)? Solve this together with the
2079 conservative dependence folding optimization.
2080 if (i >= fd->collapse)
2081 return; */
2082
2083 deps = OMP_CLAUSE_DECL (c);
2084 gsi_prev (&gsi2);
2085 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2086 edge e2 = split_block_after_labels (e1->dest);
2087
2088 gsi2 = gsi_after_labels (e1->dest);
2089 *gsi = gsi_last_bb (e1->src);
2090 for (i = 0; i < fd->ordered; i++)
2091 {
2092 tree itype = TREE_TYPE (fd->loops[i].v);
2093 tree step = NULL_TREE;
2094 tree orig_off = NULL_TREE;
2095 if (POINTER_TYPE_P (itype))
2096 itype = sizetype;
2097 if (i)
2098 deps = TREE_CHAIN (deps);
2099 off = TREE_PURPOSE (deps);
2100 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2101 {
2102 step = TREE_OPERAND (off, 1);
2103 off = TREE_OPERAND (off, 0);
2104 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2105 && integer_onep (fd->loops[i].step)
2106 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2107 }
2108 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2109 if (step)
2110 {
2111 off = fold_convert_loc (loc, itype, off);
2112 orig_off = off;
2113 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2114 }
2115
2116 if (integer_zerop (off))
2117 t = boolean_true_node;
2118 else
2119 {
2120 tree a;
2121 tree co = fold_convert_loc (loc, itype, off);
2122 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2123 {
2124 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2125 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2126 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2127 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2128 co);
2129 }
2130 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2131 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2132 fd->loops[i].v, co);
2133 else
2134 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2135 fd->loops[i].v, co);
2136 if (step)
2137 {
2138 tree t1, t2;
2139 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2140 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2141 fd->loops[i].n1);
2142 else
2143 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2144 fd->loops[i].n2);
2145 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2146 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2147 fd->loops[i].n2);
2148 else
2149 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2150 fd->loops[i].n1);
2151 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2152 step, build_int_cst (TREE_TYPE (step), 0));
2153 if (TREE_CODE (step) != INTEGER_CST)
2154 {
2155 t1 = unshare_expr (t1);
2156 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2157 false, GSI_CONTINUE_LINKING);
2158 t2 = unshare_expr (t2);
2159 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2160 false, GSI_CONTINUE_LINKING);
2161 }
2162 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2163 t, t2, t1);
2164 }
2165 else if (fd->loops[i].cond_code == LT_EXPR)
2166 {
2167 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2168 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2169 fd->loops[i].n1);
2170 else
2171 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2172 fd->loops[i].n2);
2173 }
2174 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2175 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2176 fd->loops[i].n2);
2177 else
2178 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2179 fd->loops[i].n1);
2180 }
2181 if (cond)
2182 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2183 else
2184 cond = t;
2185
2186 off = fold_convert_loc (loc, itype, off);
2187
2188 if (step
2189 || (fd->loops[i].cond_code == LT_EXPR
2190 ? !integer_onep (fd->loops[i].step)
2191 : !integer_minus_onep (fd->loops[i].step)))
2192 {
2193 if (step == NULL_TREE
2194 && TYPE_UNSIGNED (itype)
2195 && fd->loops[i].cond_code == GT_EXPR)
2196 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2197 fold_build1_loc (loc, NEGATE_EXPR, itype,
2198 s));
2199 else
2200 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2201 orig_off ? orig_off : off, s);
2202 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2203 build_int_cst (itype, 0));
2204 if (integer_zerop (t) && !warned_step)
2205 {
2206 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2207 "in the iteration space");
2208 warned_step = true;
2209 }
2210 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2211 cond, t);
2212 }
2213
2214 if (i <= fd->collapse - 1 && fd->collapse > 1)
2215 t = fd->loop.v;
2216 else if (counts[i])
2217 t = counts[i];
2218 else
2219 {
2220 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2221 fd->loops[i].v, fd->loops[i].n1);
2222 t = fold_convert_loc (loc, fd->iter_type, t);
2223 }
2224 if (step)
2225 /* We have divided off by step already earlier. */;
2226 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2227 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2228 fold_build1_loc (loc, NEGATE_EXPR, itype,
2229 s));
2230 else
2231 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2232 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2233 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2234 off = fold_convert_loc (loc, fd->iter_type, off);
2235 if (i <= fd->collapse - 1 && fd->collapse > 1)
2236 {
2237 if (i)
2238 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2239 off);
2240 if (i < fd->collapse - 1)
2241 {
2242 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2243 counts[i]);
2244 continue;
2245 }
2246 }
2247 off = unshare_expr (off);
2248 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2249 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2250 true, GSI_SAME_STMT);
2251 args.safe_push (t);
2252 }
2253 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2254 gimple_set_location (g, loc);
2255 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2256
2257 cond = unshare_expr (cond);
2258 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2259 GSI_CONTINUE_LINKING);
2260 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2261 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
2262 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2263 e1->probability = e3->probability.invert ();
629b3d75
MJ
2264 e1->flags = EDGE_TRUE_VALUE;
2265 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2266
2267 *gsi = gsi_after_labels (e2->dest);
2268}
2269
2270/* Expand all #pragma omp ordered depend(source) and
2271 #pragma omp ordered depend(sink:...) constructs in the current
2272 #pragma omp for ordered(n) region. */
2273
2274static void
2275expand_omp_ordered_source_sink (struct omp_region *region,
2276 struct omp_for_data *fd, tree *counts,
2277 basic_block cont_bb)
2278{
2279 struct omp_region *inner;
2280 int i;
2281 for (i = fd->collapse - 1; i < fd->ordered; i++)
2282 if (i == fd->collapse - 1 && fd->collapse > 1)
2283 counts[i] = NULL_TREE;
2284 else if (i >= fd->collapse && !cont_bb)
2285 counts[i] = build_zero_cst (fd->iter_type);
2286 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2287 && integer_onep (fd->loops[i].step))
2288 counts[i] = NULL_TREE;
2289 else
2290 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2291 tree atype
2292 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2293 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2294 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2295
2296 for (inner = region->inner; inner; inner = inner->next)
2297 if (inner->type == GIMPLE_OMP_ORDERED)
2298 {
2299 gomp_ordered *ord_stmt = inner->ord_stmt;
2300 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2301 location_t loc = gimple_location (ord_stmt);
2302 tree c;
2303 for (c = gimple_omp_ordered_clauses (ord_stmt);
2304 c; c = OMP_CLAUSE_CHAIN (c))
2305 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2306 break;
2307 if (c)
2308 expand_omp_ordered_source (&gsi, fd, counts, loc);
2309 for (c = gimple_omp_ordered_clauses (ord_stmt);
2310 c; c = OMP_CLAUSE_CHAIN (c))
2311 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2312 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2313 gsi_remove (&gsi, true);
2314 }
2315}
2316
2317/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2318 collapsed. */
2319
2320static basic_block
2321expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2322 basic_block cont_bb, basic_block body_bb,
2323 bool ordered_lastprivate)
2324{
2325 if (fd->ordered == fd->collapse)
2326 return cont_bb;
2327
2328 if (!cont_bb)
2329 {
2330 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2331 for (int i = fd->collapse; i < fd->ordered; i++)
2332 {
2333 tree type = TREE_TYPE (fd->loops[i].v);
2334 tree n1 = fold_convert (type, fd->loops[i].n1);
2335 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2336 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2337 size_int (i - fd->collapse + 1),
2338 NULL_TREE, NULL_TREE);
2339 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2340 }
2341 return NULL;
2342 }
2343
2344 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2345 {
2346 tree t, type = TREE_TYPE (fd->loops[i].v);
2347 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2348 expand_omp_build_assign (&gsi, fd->loops[i].v,
2349 fold_convert (type, fd->loops[i].n1));
2350 if (counts[i])
2351 expand_omp_build_assign (&gsi, counts[i],
2352 build_zero_cst (fd->iter_type));
2353 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2354 size_int (i - fd->collapse + 1),
2355 NULL_TREE, NULL_TREE);
2356 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2357 if (!gsi_end_p (gsi))
2358 gsi_prev (&gsi);
2359 else
2360 gsi = gsi_last_bb (body_bb);
2361 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2362 basic_block new_body = e1->dest;
2363 if (body_bb == cont_bb)
2364 cont_bb = new_body;
2365 edge e2 = NULL;
2366 basic_block new_header;
2367 if (EDGE_COUNT (cont_bb->preds) > 0)
2368 {
2369 gsi = gsi_last_bb (cont_bb);
2370 if (POINTER_TYPE_P (type))
2371 t = fold_build_pointer_plus (fd->loops[i].v,
2372 fold_convert (sizetype,
2373 fd->loops[i].step));
2374 else
2375 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2376 fold_convert (type, fd->loops[i].step));
2377 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2378 if (counts[i])
2379 {
2380 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2381 build_int_cst (fd->iter_type, 1));
2382 expand_omp_build_assign (&gsi, counts[i], t);
2383 t = counts[i];
2384 }
2385 else
2386 {
2387 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2388 fd->loops[i].v, fd->loops[i].n1);
2389 t = fold_convert (fd->iter_type, t);
2390 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2391 true, GSI_SAME_STMT);
2392 }
2393 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2394 size_int (i - fd->collapse + 1),
2395 NULL_TREE, NULL_TREE);
2396 expand_omp_build_assign (&gsi, aref, t);
2397 gsi_prev (&gsi);
2398 e2 = split_block (cont_bb, gsi_stmt (gsi));
2399 new_header = e2->dest;
2400 }
2401 else
2402 new_header = cont_bb;
2403 gsi = gsi_after_labels (new_header);
2404 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2405 true, GSI_SAME_STMT);
2406 tree n2
2407 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2408 true, NULL_TREE, true, GSI_SAME_STMT);
2409 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2410 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2411 edge e3 = split_block (new_header, gsi_stmt (gsi));
2412 cont_bb = e3->dest;
2413 remove_edge (e1);
2414 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2415 e3->flags = EDGE_FALSE_VALUE;
357067f2 2416 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 2417 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 2418 e1->probability = e3->probability.invert ();
629b3d75
MJ
2419
2420 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2421 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2422
2423 if (e2)
2424 {
2425 struct loop *loop = alloc_loop ();
2426 loop->header = new_header;
2427 loop->latch = e2->src;
2428 add_loop (loop, body_bb->loop_father);
2429 }
2430 }
2431
2432 /* If there are any lastprivate clauses and it is possible some loops
2433 might have zero iterations, ensure all the decls are initialized,
2434 otherwise we could crash evaluating C++ class iterators with lastprivate
2435 clauses. */
2436 bool need_inits = false;
2437 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2438 if (need_inits)
2439 {
2440 tree type = TREE_TYPE (fd->loops[i].v);
2441 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2442 expand_omp_build_assign (&gsi, fd->loops[i].v,
2443 fold_convert (type, fd->loops[i].n1));
2444 }
2445 else
2446 {
2447 tree type = TREE_TYPE (fd->loops[i].v);
2448 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2449 boolean_type_node,
2450 fold_convert (type, fd->loops[i].n1),
2451 fold_convert (type, fd->loops[i].n2));
2452 if (!integer_onep (this_cond))
2453 need_inits = true;
2454 }
2455
2456 return cont_bb;
2457}
2458
2459/* A subroutine of expand_omp_for. Generate code for a parallel
2460 loop with any schedule. Given parameters:
2461
2462 for (V = N1; V cond N2; V += STEP) BODY;
2463
2464 where COND is "<" or ">", we generate pseudocode
2465
2466 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2467 if (more) goto L0; else goto L3;
2468 L0:
2469 V = istart0;
2470 iend = iend0;
2471 L1:
2472 BODY;
2473 V += STEP;
2474 if (V cond iend) goto L1; else goto L2;
2475 L2:
2476 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2477 L3:
2478
2479 If this is a combined omp parallel loop, instead of the call to
2480 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2481 If this is gimple_omp_for_combined_p loop, then instead of assigning
2482 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2483 inner GIMPLE_OMP_FOR and V += STEP; and
2484 if (V cond iend) goto L1; else goto L2; are removed.
2485
2486 For collapsed loops, given parameters:
2487 collapse(3)
2488 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2489 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2490 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2491 BODY;
2492
2493 we generate pseudocode
2494
2495 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2496 if (cond3 is <)
2497 adj = STEP3 - 1;
2498 else
2499 adj = STEP3 + 1;
2500 count3 = (adj + N32 - N31) / STEP3;
2501 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2502 if (cond2 is <)
2503 adj = STEP2 - 1;
2504 else
2505 adj = STEP2 + 1;
2506 count2 = (adj + N22 - N21) / STEP2;
2507 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2508 if (cond1 is <)
2509 adj = STEP1 - 1;
2510 else
2511 adj = STEP1 + 1;
2512 count1 = (adj + N12 - N11) / STEP1;
2513 count = count1 * count2 * count3;
2514 goto Z1;
2515 Z0:
2516 count = 0;
2517 Z1:
2518 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2519 if (more) goto L0; else goto L3;
2520 L0:
2521 V = istart0;
2522 T = V;
2523 V3 = N31 + (T % count3) * STEP3;
2524 T = T / count3;
2525 V2 = N21 + (T % count2) * STEP2;
2526 T = T / count2;
2527 V1 = N11 + T * STEP1;
2528 iend = iend0;
2529 L1:
2530 BODY;
2531 V += 1;
2532 if (V < iend) goto L10; else goto L2;
2533 L10:
2534 V3 += STEP3;
2535 if (V3 cond3 N32) goto L1; else goto L11;
2536 L11:
2537 V3 = N31;
2538 V2 += STEP2;
2539 if (V2 cond2 N22) goto L1; else goto L12;
2540 L12:
2541 V2 = N21;
2542 V1 += STEP1;
2543 goto L1;
2544 L2:
2545 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2546 L3:
2547
2548 */
2549
2550static void
2551expand_omp_for_generic (struct omp_region *region,
2552 struct omp_for_data *fd,
2553 enum built_in_function start_fn,
2554 enum built_in_function next_fn,
2555 gimple *inner_stmt)
2556{
2557 tree type, istart0, iend0, iend;
2558 tree t, vmain, vback, bias = NULL_TREE;
2559 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2560 basic_block l2_bb = NULL, l3_bb = NULL;
2561 gimple_stmt_iterator gsi;
2562 gassign *assign_stmt;
2563 bool in_combined_parallel = is_combined_parallel (region);
2564 bool broken_loop = region->cont == NULL;
2565 edge e, ne;
2566 tree *counts = NULL;
2567 int i;
2568 bool ordered_lastprivate = false;
2569
2570 gcc_assert (!broken_loop || !in_combined_parallel);
2571 gcc_assert (fd->iter_type == long_integer_type_node
2572 || !in_combined_parallel);
2573
2574 entry_bb = region->entry;
2575 cont_bb = region->cont;
2576 collapse_bb = NULL;
2577 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2578 gcc_assert (broken_loop
2579 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2580 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2581 l1_bb = single_succ (l0_bb);
2582 if (!broken_loop)
2583 {
2584 l2_bb = create_empty_bb (cont_bb);
2585 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2586 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2587 == l1_bb));
2588 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2589 }
2590 else
2591 l2_bb = NULL;
2592 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2593 exit_bb = region->exit;
2594
2595 gsi = gsi_last_bb (entry_bb);
2596
2597 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2598 if (fd->ordered
2599 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2600 OMP_CLAUSE_LASTPRIVATE))
2601 ordered_lastprivate = false;
2602 if (fd->collapse > 1 || fd->ordered)
2603 {
2604 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2605 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2606
2607 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2608 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2609 zero_iter1_bb, first_zero_iter1,
2610 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2611
2612 if (zero_iter1_bb)
2613 {
2614 /* Some counts[i] vars might be uninitialized if
2615 some loop has zero iterations. But the body shouldn't
2616 be executed in that case, so just avoid uninit warnings. */
2617 for (i = first_zero_iter1;
2618 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2619 if (SSA_VAR_P (counts[i]))
2620 TREE_NO_WARNING (counts[i]) = 1;
2621 gsi_prev (&gsi);
2622 e = split_block (entry_bb, gsi_stmt (gsi));
2623 entry_bb = e->dest;
2624 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2625 gsi = gsi_last_bb (entry_bb);
2626 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2627 get_immediate_dominator (CDI_DOMINATORS,
2628 zero_iter1_bb));
2629 }
2630 if (zero_iter2_bb)
2631 {
2632 /* Some counts[i] vars might be uninitialized if
2633 some loop has zero iterations. But the body shouldn't
2634 be executed in that case, so just avoid uninit warnings. */
2635 for (i = first_zero_iter2; i < fd->ordered; i++)
2636 if (SSA_VAR_P (counts[i]))
2637 TREE_NO_WARNING (counts[i]) = 1;
2638 if (zero_iter1_bb)
2639 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2640 else
2641 {
2642 gsi_prev (&gsi);
2643 e = split_block (entry_bb, gsi_stmt (gsi));
2644 entry_bb = e->dest;
2645 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2646 gsi = gsi_last_bb (entry_bb);
2647 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2648 get_immediate_dominator
2649 (CDI_DOMINATORS, zero_iter2_bb));
2650 }
2651 }
2652 if (fd->collapse == 1)
2653 {
2654 counts[0] = fd->loop.n2;
2655 fd->loop = fd->loops[0];
2656 }
2657 }
2658
2659 type = TREE_TYPE (fd->loop.v);
2660 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2661 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2662 TREE_ADDRESSABLE (istart0) = 1;
2663 TREE_ADDRESSABLE (iend0) = 1;
2664
2665 /* See if we need to bias by LLONG_MIN. */
2666 if (fd->iter_type == long_long_unsigned_type_node
2667 && TREE_CODE (type) == INTEGER_TYPE
2668 && !TYPE_UNSIGNED (type)
2669 && fd->ordered == 0)
2670 {
2671 tree n1, n2;
2672
2673 if (fd->loop.cond_code == LT_EXPR)
2674 {
2675 n1 = fd->loop.n1;
2676 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2677 }
2678 else
2679 {
2680 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2681 n2 = fd->loop.n1;
2682 }
2683 if (TREE_CODE (n1) != INTEGER_CST
2684 || TREE_CODE (n2) != INTEGER_CST
2685 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2686 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2687 }
2688
2689 gimple_stmt_iterator gsif = gsi;
2690 gsi_prev (&gsif);
2691
2692 tree arr = NULL_TREE;
2693 if (in_combined_parallel)
2694 {
2695 gcc_assert (fd->ordered == 0);
2696 /* In a combined parallel loop, emit a call to
2697 GOMP_loop_foo_next. */
2698 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2699 build_fold_addr_expr (istart0),
2700 build_fold_addr_expr (iend0));
2701 }
2702 else
2703 {
2704 tree t0, t1, t2, t3, t4;
2705 /* If this is not a combined parallel loop, emit a call to
2706 GOMP_loop_foo_start in ENTRY_BB. */
2707 t4 = build_fold_addr_expr (iend0);
2708 t3 = build_fold_addr_expr (istart0);
2709 if (fd->ordered)
2710 {
2711 t0 = build_int_cst (unsigned_type_node,
2712 fd->ordered - fd->collapse + 1);
2713 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2714 fd->ordered
2715 - fd->collapse + 1),
2716 ".omp_counts");
2717 DECL_NAMELESS (arr) = 1;
2718 TREE_ADDRESSABLE (arr) = 1;
2719 TREE_STATIC (arr) = 1;
2720 vec<constructor_elt, va_gc> *v;
2721 vec_alloc (v, fd->ordered - fd->collapse + 1);
2722 int idx;
2723
2724 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2725 {
2726 tree c;
2727 if (idx == 0 && fd->collapse > 1)
2728 c = fd->loop.n2;
2729 else
2730 c = counts[idx + fd->collapse - 1];
2731 tree purpose = size_int (idx);
2732 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2733 if (TREE_CODE (c) != INTEGER_CST)
2734 TREE_STATIC (arr) = 0;
2735 }
2736
2737 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2738 if (!TREE_STATIC (arr))
2739 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2740 void_type_node, arr),
2741 true, NULL_TREE, true, GSI_SAME_STMT);
2742 t1 = build_fold_addr_expr (arr);
2743 t2 = NULL_TREE;
2744 }
2745 else
2746 {
2747 t2 = fold_convert (fd->iter_type, fd->loop.step);
2748 t1 = fd->loop.n2;
2749 t0 = fd->loop.n1;
2750 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2751 {
2752 tree innerc
2753 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2754 OMP_CLAUSE__LOOPTEMP_);
2755 gcc_assert (innerc);
2756 t0 = OMP_CLAUSE_DECL (innerc);
2757 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2758 OMP_CLAUSE__LOOPTEMP_);
2759 gcc_assert (innerc);
2760 t1 = OMP_CLAUSE_DECL (innerc);
2761 }
2762 if (POINTER_TYPE_P (TREE_TYPE (t0))
2763 && TYPE_PRECISION (TREE_TYPE (t0))
2764 != TYPE_PRECISION (fd->iter_type))
2765 {
2766 /* Avoid casting pointers to integer of a different size. */
2767 tree itype = signed_type_for (type);
2768 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2769 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2770 }
2771 else
2772 {
2773 t1 = fold_convert (fd->iter_type, t1);
2774 t0 = fold_convert (fd->iter_type, t0);
2775 }
2776 if (bias)
2777 {
2778 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2779 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2780 }
2781 }
2782 if (fd->iter_type == long_integer_type_node || fd->ordered)
2783 {
2784 if (fd->chunk_size)
2785 {
2786 t = fold_convert (fd->iter_type, fd->chunk_size);
2787 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2788 if (fd->ordered)
2789 t = build_call_expr (builtin_decl_explicit (start_fn),
2790 5, t0, t1, t, t3, t4);
2791 else
2792 t = build_call_expr (builtin_decl_explicit (start_fn),
2793 6, t0, t1, t2, t, t3, t4);
2794 }
2795 else if (fd->ordered)
2796 t = build_call_expr (builtin_decl_explicit (start_fn),
2797 4, t0, t1, t3, t4);
2798 else
2799 t = build_call_expr (builtin_decl_explicit (start_fn),
2800 5, t0, t1, t2, t3, t4);
2801 }
2802 else
2803 {
2804 tree t5;
2805 tree c_bool_type;
2806 tree bfn_decl;
2807
2808 /* The GOMP_loop_ull_*start functions have additional boolean
2809 argument, true for < loops and false for > loops.
2810 In Fortran, the C bool type can be different from
2811 boolean_type_node. */
2812 bfn_decl = builtin_decl_explicit (start_fn);
2813 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2814 t5 = build_int_cst (c_bool_type,
2815 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2816 if (fd->chunk_size)
2817 {
2818 tree bfn_decl = builtin_decl_explicit (start_fn);
2819 t = fold_convert (fd->iter_type, fd->chunk_size);
2820 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2821 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2822 }
2823 else
2824 t = build_call_expr (builtin_decl_explicit (start_fn),
2825 6, t5, t0, t1, t2, t3, t4);
2826 }
2827 }
2828 if (TREE_TYPE (t) != boolean_type_node)
2829 t = fold_build2 (NE_EXPR, boolean_type_node,
2830 t, build_int_cst (TREE_TYPE (t), 0));
2831 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 2832 true, GSI_SAME_STMT);
629b3d75
MJ
2833 if (arr && !TREE_STATIC (arr))
2834 {
2835 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2836 TREE_THIS_VOLATILE (clobber) = 1;
2837 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2838 GSI_SAME_STMT);
2839 }
2840 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2841
2842 /* Remove the GIMPLE_OMP_FOR statement. */
2843 gsi_remove (&gsi, true);
2844
2845 if (gsi_end_p (gsif))
2846 gsif = gsi_after_labels (gsi_bb (gsif));
2847 gsi_next (&gsif);
2848
2849 /* Iteration setup for sequential loop goes in L0_BB. */
2850 tree startvar = fd->loop.v;
2851 tree endvar = NULL_TREE;
2852
2853 if (gimple_omp_for_combined_p (fd->for_stmt))
2854 {
2855 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2856 && gimple_omp_for_kind (inner_stmt)
2857 == GF_OMP_FOR_KIND_SIMD);
2858 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2859 OMP_CLAUSE__LOOPTEMP_);
2860 gcc_assert (innerc);
2861 startvar = OMP_CLAUSE_DECL (innerc);
2862 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2863 OMP_CLAUSE__LOOPTEMP_);
2864 gcc_assert (innerc);
2865 endvar = OMP_CLAUSE_DECL (innerc);
2866 }
2867
2868 gsi = gsi_start_bb (l0_bb);
2869 t = istart0;
2870 if (fd->ordered && fd->collapse == 1)
2871 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2872 fold_convert (fd->iter_type, fd->loop.step));
2873 else if (bias)
2874 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2875 if (fd->ordered && fd->collapse == 1)
2876 {
2877 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2878 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2879 fd->loop.n1, fold_convert (sizetype, t));
2880 else
2881 {
2882 t = fold_convert (TREE_TYPE (startvar), t);
2883 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2884 fd->loop.n1, t);
2885 }
2886 }
2887 else
2888 {
2889 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2890 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2891 t = fold_convert (TREE_TYPE (startvar), t);
2892 }
2893 t = force_gimple_operand_gsi (&gsi, t,
2894 DECL_P (startvar)
2895 && TREE_ADDRESSABLE (startvar),
2896 NULL_TREE, false, GSI_CONTINUE_LINKING);
2897 assign_stmt = gimple_build_assign (startvar, t);
2898 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2899
2900 t = iend0;
2901 if (fd->ordered && fd->collapse == 1)
2902 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2903 fold_convert (fd->iter_type, fd->loop.step));
2904 else if (bias)
2905 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2906 if (fd->ordered && fd->collapse == 1)
2907 {
2908 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2909 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2910 fd->loop.n1, fold_convert (sizetype, t));
2911 else
2912 {
2913 t = fold_convert (TREE_TYPE (startvar), t);
2914 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2915 fd->loop.n1, t);
2916 }
2917 }
2918 else
2919 {
2920 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2921 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2922 t = fold_convert (TREE_TYPE (startvar), t);
2923 }
2924 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2925 false, GSI_CONTINUE_LINKING);
2926 if (endvar)
2927 {
2928 assign_stmt = gimple_build_assign (endvar, iend);
2929 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2930 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2931 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2932 else
2933 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2934 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2935 }
2936 /* Handle linear clause adjustments. */
2937 tree itercnt = NULL_TREE;
2938 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2939 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2940 c; c = OMP_CLAUSE_CHAIN (c))
2941 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2942 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2943 {
2944 tree d = OMP_CLAUSE_DECL (c);
2945 bool is_ref = omp_is_reference (d);
2946 tree t = d, a, dest;
2947 if (is_ref)
2948 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2949 tree type = TREE_TYPE (t);
2950 if (POINTER_TYPE_P (type))
2951 type = sizetype;
2952 dest = unshare_expr (t);
2953 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2954 expand_omp_build_assign (&gsif, v, t);
2955 if (itercnt == NULL_TREE)
2956 {
2957 itercnt = startvar;
2958 tree n1 = fd->loop.n1;
2959 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2960 {
2961 itercnt
2962 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2963 itercnt);
2964 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2965 }
2966 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2967 itercnt, n1);
2968 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2969 itercnt, fd->loop.step);
2970 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2971 NULL_TREE, false,
2972 GSI_CONTINUE_LINKING);
2973 }
2974 a = fold_build2 (MULT_EXPR, type,
2975 fold_convert (type, itercnt),
2976 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2977 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2978 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2979 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2980 false, GSI_CONTINUE_LINKING);
2981 assign_stmt = gimple_build_assign (dest, t);
2982 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2983 }
2984 if (fd->collapse > 1)
2985 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2986
2987 if (fd->ordered)
2988 {
2989 /* Until now, counts array contained number of iterations or
2990 variable containing it for ith loop. From now on, we need
2991 those counts only for collapsed loops, and only for the 2nd
2992 till the last collapsed one. Move those one element earlier,
2993 we'll use counts[fd->collapse - 1] for the first source/sink
2994 iteration counter and so on and counts[fd->ordered]
2995 as the array holding the current counter values for
2996 depend(source). */
2997 if (fd->collapse > 1)
2998 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2999 if (broken_loop)
3000 {
3001 int i;
3002 for (i = fd->collapse; i < fd->ordered; i++)
3003 {
3004 tree type = TREE_TYPE (fd->loops[i].v);
3005 tree this_cond
3006 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3007 fold_convert (type, fd->loops[i].n1),
3008 fold_convert (type, fd->loops[i].n2));
3009 if (!integer_onep (this_cond))
3010 break;
3011 }
3012 if (i < fd->ordered)
3013 {
3014 cont_bb
3015 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3016 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3017 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3018 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3019 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3020 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3021 make_edge (cont_bb, l1_bb, 0);
3022 l2_bb = create_empty_bb (cont_bb);
3023 broken_loop = false;
3024 }
3025 }
3026 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3027 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3028 ordered_lastprivate);
3029 if (counts[fd->collapse - 1])
3030 {
3031 gcc_assert (fd->collapse == 1);
3032 gsi = gsi_last_bb (l0_bb);
3033 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3034 istart0, true);
3035 gsi = gsi_last_bb (cont_bb);
3036 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3037 build_int_cst (fd->iter_type, 1));
3038 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3039 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3040 size_zero_node, NULL_TREE, NULL_TREE);
3041 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3042 t = counts[fd->collapse - 1];
3043 }
3044 else if (fd->collapse > 1)
3045 t = fd->loop.v;
3046 else
3047 {
3048 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3049 fd->loops[0].v, fd->loops[0].n1);
3050 t = fold_convert (fd->iter_type, t);
3051 }
3052 gsi = gsi_last_bb (l0_bb);
3053 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3054 size_zero_node, NULL_TREE, NULL_TREE);
3055 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3056 false, GSI_CONTINUE_LINKING);
3057 expand_omp_build_assign (&gsi, aref, t, true);
3058 }
3059
3060 if (!broken_loop)
3061 {
3062 /* Code to control the increment and predicate for the sequential
3063 loop goes in the CONT_BB. */
3064 gsi = gsi_last_bb (cont_bb);
3065 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3066 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3067 vmain = gimple_omp_continue_control_use (cont_stmt);
3068 vback = gimple_omp_continue_control_def (cont_stmt);
3069
3070 if (!gimple_omp_for_combined_p (fd->for_stmt))
3071 {
3072 if (POINTER_TYPE_P (type))
3073 t = fold_build_pointer_plus (vmain, fd->loop.step);
3074 else
3075 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3076 t = force_gimple_operand_gsi (&gsi, t,
3077 DECL_P (vback)
3078 && TREE_ADDRESSABLE (vback),
3079 NULL_TREE, true, GSI_SAME_STMT);
3080 assign_stmt = gimple_build_assign (vback, t);
3081 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3082
3083 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3084 {
3085 if (fd->collapse > 1)
3086 t = fd->loop.v;
3087 else
3088 {
3089 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3090 fd->loops[0].v, fd->loops[0].n1);
3091 t = fold_convert (fd->iter_type, t);
3092 }
3093 tree aref = build4 (ARRAY_REF, fd->iter_type,
3094 counts[fd->ordered], size_zero_node,
3095 NULL_TREE, NULL_TREE);
3096 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3097 true, GSI_SAME_STMT);
3098 expand_omp_build_assign (&gsi, aref, t);
3099 }
3100
3101 t = build2 (fd->loop.cond_code, boolean_type_node,
3102 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3103 iend);
3104 gcond *cond_stmt = gimple_build_cond_empty (t);
3105 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3106 }
3107
3108 /* Remove GIMPLE_OMP_CONTINUE. */
3109 gsi_remove (&gsi, true);
3110
3111 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3112 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3113
3114 /* Emit code to get the next parallel iteration in L2_BB. */
3115 gsi = gsi_start_bb (l2_bb);
3116
3117 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3118 build_fold_addr_expr (istart0),
3119 build_fold_addr_expr (iend0));
3120 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3121 false, GSI_CONTINUE_LINKING);
3122 if (TREE_TYPE (t) != boolean_type_node)
3123 t = fold_build2 (NE_EXPR, boolean_type_node,
3124 t, build_int_cst (TREE_TYPE (t), 0));
3125 gcond *cond_stmt = gimple_build_cond_empty (t);
3126 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3127 }
3128
3129 /* Add the loop cleanup function. */
3130 gsi = gsi_last_bb (exit_bb);
3131 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3132 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3133 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3134 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3135 else
3136 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3137 gcall *call_stmt = gimple_build_call (t, 0);
3138 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3139 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3140 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3141 if (fd->ordered)
3142 {
3143 tree arr = counts[fd->ordered];
3144 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3145 TREE_THIS_VOLATILE (clobber) = 1;
3146 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3147 GSI_SAME_STMT);
3148 }
3149 gsi_remove (&gsi, true);
3150
3151 /* Connect the new blocks. */
3152 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3153 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3154
3155 if (!broken_loop)
3156 {
3157 gimple_seq phis;
3158
3159 e = find_edge (cont_bb, l3_bb);
3160 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3161
3162 phis = phi_nodes (l3_bb);
3163 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3164 {
3165 gimple *phi = gsi_stmt (gsi);
3166 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3167 PHI_ARG_DEF_FROM_EDGE (phi, e));
3168 }
3169 remove_edge (e);
3170
3171 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3172 e = find_edge (cont_bb, l1_bb);
3173 if (e == NULL)
3174 {
3175 e = BRANCH_EDGE (cont_bb);
3176 gcc_assert (single_succ (e->dest) == l1_bb);
3177 }
3178 if (gimple_omp_for_combined_p (fd->for_stmt))
3179 {
3180 remove_edge (e);
3181 e = NULL;
3182 }
3183 else if (fd->collapse > 1)
3184 {
3185 remove_edge (e);
3186 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3187 }
3188 else
3189 e->flags = EDGE_TRUE_VALUE;
3190 if (e)
3191 {
357067f2
JH
3192 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3193 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
3194 }
3195 else
3196 {
3197 e = find_edge (cont_bb, l2_bb);
3198 e->flags = EDGE_FALLTHRU;
3199 }
3200 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3201
3202 if (gimple_in_ssa_p (cfun))
3203 {
3204 /* Add phis to the outer loop that connect to the phis in the inner,
3205 original loop, and move the loop entry value of the inner phi to
3206 the loop entry value of the outer phi. */
3207 gphi_iterator psi;
3208 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3209 {
3210 source_location locus;
3211 gphi *nphi;
3212 gphi *exit_phi = psi.phi ();
3213
3214 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3215 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3216
3217 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3218 edge latch_to_l1 = find_edge (latch, l1_bb);
3219 gphi *inner_phi
3220 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3221
3222 tree t = gimple_phi_result (exit_phi);
3223 tree new_res = copy_ssa_name (t, NULL);
3224 nphi = create_phi_node (new_res, l0_bb);
3225
3226 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3227 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3228 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3229 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3230 add_phi_arg (nphi, t, entry_to_l0, locus);
3231
3232 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3233 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3234
3235 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3236 };
3237 }
3238
3239 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3240 recompute_dominator (CDI_DOMINATORS, l2_bb));
3241 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3242 recompute_dominator (CDI_DOMINATORS, l3_bb));
3243 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3244 recompute_dominator (CDI_DOMINATORS, l0_bb));
3245 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3246 recompute_dominator (CDI_DOMINATORS, l1_bb));
3247
3248 /* We enter expand_omp_for_generic with a loop. This original loop may
3249 have its own loop struct, or it may be part of an outer loop struct
3250 (which may be the fake loop). */
3251 struct loop *outer_loop = entry_bb->loop_father;
3252 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3253
3254 add_bb_to_loop (l2_bb, outer_loop);
3255
3256 /* We've added a new loop around the original loop. Allocate the
3257 corresponding loop struct. */
3258 struct loop *new_loop = alloc_loop ();
3259 new_loop->header = l0_bb;
3260 new_loop->latch = l2_bb;
3261 add_loop (new_loop, outer_loop);
3262
3263 /* Allocate a loop structure for the original loop unless we already
3264 had one. */
3265 if (!orig_loop_has_loop_struct
3266 && !gimple_omp_for_combined_p (fd->for_stmt))
3267 {
3268 struct loop *orig_loop = alloc_loop ();
3269 orig_loop->header = l1_bb;
3270 /* The loop may have multiple latches. */
3271 add_loop (orig_loop, new_loop);
3272 }
3273 }
3274}
3275
3276/* A subroutine of expand_omp_for. Generate code for a parallel
3277 loop with static schedule and no specified chunk size. Given
3278 parameters:
3279
3280 for (V = N1; V cond N2; V += STEP) BODY;
3281
3282 where COND is "<" or ">", we generate pseudocode
3283
3284 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3285 if (cond is <)
3286 adj = STEP - 1;
3287 else
3288 adj = STEP + 1;
3289 if ((__typeof (V)) -1 > 0 && cond is >)
3290 n = -(adj + N2 - N1) / -STEP;
3291 else
3292 n = (adj + N2 - N1) / STEP;
3293 q = n / nthreads;
3294 tt = n % nthreads;
3295 if (threadid < tt) goto L3; else goto L4;
3296 L3:
3297 tt = 0;
3298 q = q + 1;
3299 L4:
3300 s0 = q * threadid + tt;
3301 e0 = s0 + q;
3302 V = s0 * STEP + N1;
3303 if (s0 >= e0) goto L2; else goto L0;
3304 L0:
3305 e = e0 * STEP + N1;
3306 L1:
3307 BODY;
3308 V += STEP;
3309 if (V cond e) goto L1;
3310 L2:
3311*/
3312
3313static void
3314expand_omp_for_static_nochunk (struct omp_region *region,
3315 struct omp_for_data *fd,
3316 gimple *inner_stmt)
3317{
3318 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3319 tree type, itype, vmain, vback;
3320 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3321 basic_block body_bb, cont_bb, collapse_bb = NULL;
3322 basic_block fin_bb;
3323 gimple_stmt_iterator gsi;
3324 edge ep;
3325 bool broken_loop = region->cont == NULL;
3326 tree *counts = NULL;
3327 tree n1, n2, step;
3328
3329 itype = type = TREE_TYPE (fd->loop.v);
3330 if (POINTER_TYPE_P (type))
3331 itype = signed_type_for (type);
3332
3333 entry_bb = region->entry;
3334 cont_bb = region->cont;
3335 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3336 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3337 gcc_assert (broken_loop
3338 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3339 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3340 body_bb = single_succ (seq_start_bb);
3341 if (!broken_loop)
3342 {
3343 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3344 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3345 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3346 }
3347 exit_bb = region->exit;
3348
3349 /* Iteration space partitioning goes in ENTRY_BB. */
3350 gsi = gsi_last_bb (entry_bb);
3351 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3352
3353 if (fd->collapse > 1)
3354 {
3355 int first_zero_iter = -1, dummy = -1;
3356 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3357
3358 counts = XALLOCAVEC (tree, fd->collapse);
3359 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3360 fin_bb, first_zero_iter,
3361 dummy_bb, dummy, l2_dom_bb);
3362 t = NULL_TREE;
3363 }
3364 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3365 t = integer_one_node;
3366 else
3367 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3368 fold_convert (type, fd->loop.n1),
3369 fold_convert (type, fd->loop.n2));
3370 if (fd->collapse == 1
3371 && TYPE_UNSIGNED (type)
3372 && (t == NULL_TREE || !integer_onep (t)))
3373 {
3374 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3375 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3376 true, GSI_SAME_STMT);
3377 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3378 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3379 true, GSI_SAME_STMT);
3380 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3381 NULL_TREE, NULL_TREE);
3382 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3383 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3384 expand_omp_regimplify_p, NULL, NULL)
3385 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3386 expand_omp_regimplify_p, NULL, NULL))
3387 {
3388 gsi = gsi_for_stmt (cond_stmt);
3389 gimple_regimplify_operands (cond_stmt, &gsi);
3390 }
3391 ep = split_block (entry_bb, cond_stmt);
3392 ep->flags = EDGE_TRUE_VALUE;
3393 entry_bb = ep->dest;
357067f2 3394 ep->probability = profile_probability::very_likely ();
629b3d75 3395 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 3396 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
3397 if (gimple_in_ssa_p (cfun))
3398 {
3399 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3400 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3401 !gsi_end_p (gpi); gsi_next (&gpi))
3402 {
3403 gphi *phi = gpi.phi ();
3404 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3405 ep, UNKNOWN_LOCATION);
3406 }
3407 }
3408 gsi = gsi_last_bb (entry_bb);
3409 }
3410
3411 switch (gimple_omp_for_kind (fd->for_stmt))
3412 {
3413 case GF_OMP_FOR_KIND_FOR:
3414 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3415 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3416 break;
3417 case GF_OMP_FOR_KIND_DISTRIBUTE:
3418 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3419 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3420 break;
3421 default:
3422 gcc_unreachable ();
3423 }
3424 nthreads = build_call_expr (nthreads, 0);
3425 nthreads = fold_convert (itype, nthreads);
3426 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3427 true, GSI_SAME_STMT);
3428 threadid = build_call_expr (threadid, 0);
3429 threadid = fold_convert (itype, threadid);
3430 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3431 true, GSI_SAME_STMT);
3432
3433 n1 = fd->loop.n1;
3434 n2 = fd->loop.n2;
3435 step = fd->loop.step;
3436 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3437 {
3438 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3439 OMP_CLAUSE__LOOPTEMP_);
3440 gcc_assert (innerc);
3441 n1 = OMP_CLAUSE_DECL (innerc);
3442 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3443 OMP_CLAUSE__LOOPTEMP_);
3444 gcc_assert (innerc);
3445 n2 = OMP_CLAUSE_DECL (innerc);
3446 }
3447 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3448 true, NULL_TREE, true, GSI_SAME_STMT);
3449 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3450 true, NULL_TREE, true, GSI_SAME_STMT);
3451 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3452 true, NULL_TREE, true, GSI_SAME_STMT);
3453
3454 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3455 t = fold_build2 (PLUS_EXPR, itype, step, t);
3456 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3457 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3458 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3459 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3460 fold_build1 (NEGATE_EXPR, itype, t),
3461 fold_build1 (NEGATE_EXPR, itype, step));
3462 else
3463 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3464 t = fold_convert (itype, t);
3465 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3466
3467 q = create_tmp_reg (itype, "q");
3468 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3469 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3470 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3471
3472 tt = create_tmp_reg (itype, "tt");
3473 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3474 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3475 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3476
3477 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3478 gcond *cond_stmt = gimple_build_cond_empty (t);
3479 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3480
3481 second_bb = split_block (entry_bb, cond_stmt)->dest;
3482 gsi = gsi_last_bb (second_bb);
3483 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3484
3485 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3486 GSI_SAME_STMT);
3487 gassign *assign_stmt
3488 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3489 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3490
3491 third_bb = split_block (second_bb, assign_stmt)->dest;
3492 gsi = gsi_last_bb (third_bb);
3493 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3494
3495 t = build2 (MULT_EXPR, itype, q, threadid);
3496 t = build2 (PLUS_EXPR, itype, t, tt);
3497 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3498
3499 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3500 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3501
3502 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3503 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3504
3505 /* Remove the GIMPLE_OMP_FOR statement. */
3506 gsi_remove (&gsi, true);
3507
3508 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3509 gsi = gsi_start_bb (seq_start_bb);
3510
3511 tree startvar = fd->loop.v;
3512 tree endvar = NULL_TREE;
3513
3514 if (gimple_omp_for_combined_p (fd->for_stmt))
3515 {
3516 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3517 ? gimple_omp_parallel_clauses (inner_stmt)
3518 : gimple_omp_for_clauses (inner_stmt);
3519 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3520 gcc_assert (innerc);
3521 startvar = OMP_CLAUSE_DECL (innerc);
3522 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3523 OMP_CLAUSE__LOOPTEMP_);
3524 gcc_assert (innerc);
3525 endvar = OMP_CLAUSE_DECL (innerc);
3526 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3527 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3528 {
3529 int i;
3530 for (i = 1; i < fd->collapse; i++)
3531 {
3532 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3533 OMP_CLAUSE__LOOPTEMP_);
3534 gcc_assert (innerc);
3535 }
3536 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3537 OMP_CLAUSE__LOOPTEMP_);
3538 if (innerc)
3539 {
3540 /* If needed (distribute parallel for with lastprivate),
3541 propagate down the total number of iterations. */
3542 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3543 fd->loop.n2);
3544 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3545 GSI_CONTINUE_LINKING);
3546 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3547 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3548 }
3549 }
3550 }
3551 t = fold_convert (itype, s0);
3552 t = fold_build2 (MULT_EXPR, itype, t, step);
3553 if (POINTER_TYPE_P (type))
3554 t = fold_build_pointer_plus (n1, t);
3555 else
3556 t = fold_build2 (PLUS_EXPR, type, t, n1);
3557 t = fold_convert (TREE_TYPE (startvar), t);
3558 t = force_gimple_operand_gsi (&gsi, t,
3559 DECL_P (startvar)
3560 && TREE_ADDRESSABLE (startvar),
3561 NULL_TREE, false, GSI_CONTINUE_LINKING);
3562 assign_stmt = gimple_build_assign (startvar, t);
3563 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3564
3565 t = fold_convert (itype, e0);
3566 t = fold_build2 (MULT_EXPR, itype, t, step);
3567 if (POINTER_TYPE_P (type))
3568 t = fold_build_pointer_plus (n1, t);
3569 else
3570 t = fold_build2 (PLUS_EXPR, type, t, n1);
3571 t = fold_convert (TREE_TYPE (startvar), t);
3572 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3573 false, GSI_CONTINUE_LINKING);
3574 if (endvar)
3575 {
3576 assign_stmt = gimple_build_assign (endvar, e);
3577 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3578 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3579 assign_stmt = gimple_build_assign (fd->loop.v, e);
3580 else
3581 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3582 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3583 }
3584 /* Handle linear clause adjustments. */
3585 tree itercnt = NULL_TREE;
3586 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3587 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3588 c; c = OMP_CLAUSE_CHAIN (c))
3589 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3590 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3591 {
3592 tree d = OMP_CLAUSE_DECL (c);
3593 bool is_ref = omp_is_reference (d);
3594 tree t = d, a, dest;
3595 if (is_ref)
3596 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3597 if (itercnt == NULL_TREE)
3598 {
3599 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3600 {
3601 itercnt = fold_build2 (MINUS_EXPR, itype,
3602 fold_convert (itype, n1),
3603 fold_convert (itype, fd->loop.n1));
3604 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3605 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3606 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3607 NULL_TREE, false,
3608 GSI_CONTINUE_LINKING);
3609 }
3610 else
3611 itercnt = s0;
3612 }
3613 tree type = TREE_TYPE (t);
3614 if (POINTER_TYPE_P (type))
3615 type = sizetype;
3616 a = fold_build2 (MULT_EXPR, type,
3617 fold_convert (type, itercnt),
3618 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3619 dest = unshare_expr (t);
3620 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3621 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3622 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3623 false, GSI_CONTINUE_LINKING);
3624 assign_stmt = gimple_build_assign (dest, t);
3625 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3626 }
3627 if (fd->collapse > 1)
3628 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3629
3630 if (!broken_loop)
3631 {
3632 /* The code controlling the sequential loop replaces the
3633 GIMPLE_OMP_CONTINUE. */
3634 gsi = gsi_last_bb (cont_bb);
3635 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3636 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3637 vmain = gimple_omp_continue_control_use (cont_stmt);
3638 vback = gimple_omp_continue_control_def (cont_stmt);
3639
3640 if (!gimple_omp_for_combined_p (fd->for_stmt))
3641 {
3642 if (POINTER_TYPE_P (type))
3643 t = fold_build_pointer_plus (vmain, step);
3644 else
3645 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3646 t = force_gimple_operand_gsi (&gsi, t,
3647 DECL_P (vback)
3648 && TREE_ADDRESSABLE (vback),
3649 NULL_TREE, true, GSI_SAME_STMT);
3650 assign_stmt = gimple_build_assign (vback, t);
3651 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3652
3653 t = build2 (fd->loop.cond_code, boolean_type_node,
3654 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3655 ? t : vback, e);
3656 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3657 }
3658
3659 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3660 gsi_remove (&gsi, true);
3661
3662 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3663 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3664 }
3665
3666 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3667 gsi = gsi_last_bb (exit_bb);
3668 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3669 {
3670 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3671 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3672 }
3673 gsi_remove (&gsi, true);
3674
3675 /* Connect all the blocks. */
3676 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 3677 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
3678 ep = find_edge (entry_bb, second_bb);
3679 ep->flags = EDGE_TRUE_VALUE;
357067f2 3680 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
629b3d75
MJ
3681 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3682 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3683
3684 if (!broken_loop)
3685 {
3686 ep = find_edge (cont_bb, body_bb);
3687 if (ep == NULL)
3688 {
3689 ep = BRANCH_EDGE (cont_bb);
3690 gcc_assert (single_succ (ep->dest) == body_bb);
3691 }
3692 if (gimple_omp_for_combined_p (fd->for_stmt))
3693 {
3694 remove_edge (ep);
3695 ep = NULL;
3696 }
3697 else if (fd->collapse > 1)
3698 {
3699 remove_edge (ep);
3700 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3701 }
3702 else
3703 ep->flags = EDGE_TRUE_VALUE;
3704 find_edge (cont_bb, fin_bb)->flags
3705 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3706 }
3707
3708 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3709 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3710 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3711
3712 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3713 recompute_dominator (CDI_DOMINATORS, body_bb));
3714 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3715 recompute_dominator (CDI_DOMINATORS, fin_bb));
3716
3717 struct loop *loop = body_bb->loop_father;
3718 if (loop != entry_bb->loop_father)
3719 {
3720 gcc_assert (broken_loop || loop->header == body_bb);
3721 gcc_assert (broken_loop
3722 || loop->latch == region->cont
3723 || single_pred (loop->latch) == region->cont);
3724 return;
3725 }
3726
3727 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3728 {
3729 loop = alloc_loop ();
3730 loop->header = body_bb;
3731 if (collapse_bb == NULL)
3732 loop->latch = cont_bb;
3733 add_loop (loop, body_bb->loop_father);
3734 }
3735}
3736
3737/* Return phi in E->DEST with ARG on edge E. */
3738
3739static gphi *
3740find_phi_with_arg_on_edge (tree arg, edge e)
3741{
3742 basic_block bb = e->dest;
3743
3744 for (gphi_iterator gpi = gsi_start_phis (bb);
3745 !gsi_end_p (gpi);
3746 gsi_next (&gpi))
3747 {
3748 gphi *phi = gpi.phi ();
3749 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3750 return phi;
3751 }
3752
3753 return NULL;
3754}
3755
3756/* A subroutine of expand_omp_for. Generate code for a parallel
3757 loop with static schedule and a specified chunk size. Given
3758 parameters:
3759
3760 for (V = N1; V cond N2; V += STEP) BODY;
3761
3762 where COND is "<" or ">", we generate pseudocode
3763
3764 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3765 if (cond is <)
3766 adj = STEP - 1;
3767 else
3768 adj = STEP + 1;
3769 if ((__typeof (V)) -1 > 0 && cond is >)
3770 n = -(adj + N2 - N1) / -STEP;
3771 else
3772 n = (adj + N2 - N1) / STEP;
3773 trip = 0;
3774 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3775 here so that V is defined
3776 if the loop is not entered
3777 L0:
3778 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 3779 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
3780 if (s0 < n) goto L1; else goto L4;
3781 L1:
3782 V = s0 * STEP + N1;
3783 e = e0 * STEP + N1;
3784 L2:
3785 BODY;
3786 V += STEP;
3787 if (V cond e) goto L2; else goto L3;
3788 L3:
3789 trip += 1;
3790 goto L0;
3791 L4:
3792*/
3793
3794static void
3795expand_omp_for_static_chunk (struct omp_region *region,
3796 struct omp_for_data *fd, gimple *inner_stmt)
3797{
3798 tree n, s0, e0, e, t;
3799 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3800 tree type, itype, vmain, vback, vextra;
3801 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3802 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3803 gimple_stmt_iterator gsi;
3804 edge se;
3805 bool broken_loop = region->cont == NULL;
3806 tree *counts = NULL;
3807 tree n1, n2, step;
3808
3809 itype = type = TREE_TYPE (fd->loop.v);
3810 if (POINTER_TYPE_P (type))
3811 itype = signed_type_for (type);
3812
3813 entry_bb = region->entry;
3814 se = split_block (entry_bb, last_stmt (entry_bb));
3815 entry_bb = se->src;
3816 iter_part_bb = se->dest;
3817 cont_bb = region->cont;
3818 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3819 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3820 gcc_assert (broken_loop
3821 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3822 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3823 body_bb = single_succ (seq_start_bb);
3824 if (!broken_loop)
3825 {
3826 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3827 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3828 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3829 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3830 }
3831 exit_bb = region->exit;
3832
3833 /* Trip and adjustment setup goes in ENTRY_BB. */
3834 gsi = gsi_last_bb (entry_bb);
3835 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3836
3837 if (fd->collapse > 1)
3838 {
3839 int first_zero_iter = -1, dummy = -1;
3840 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3841
3842 counts = XALLOCAVEC (tree, fd->collapse);
3843 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3844 fin_bb, first_zero_iter,
3845 dummy_bb, dummy, l2_dom_bb);
3846 t = NULL_TREE;
3847 }
3848 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3849 t = integer_one_node;
3850 else
3851 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3852 fold_convert (type, fd->loop.n1),
3853 fold_convert (type, fd->loop.n2));
3854 if (fd->collapse == 1
3855 && TYPE_UNSIGNED (type)
3856 && (t == NULL_TREE || !integer_onep (t)))
3857 {
3858 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3859 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3860 true, GSI_SAME_STMT);
3861 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3862 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3863 true, GSI_SAME_STMT);
3864 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3865 NULL_TREE, NULL_TREE);
3866 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3867 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3868 expand_omp_regimplify_p, NULL, NULL)
3869 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3870 expand_omp_regimplify_p, NULL, NULL))
3871 {
3872 gsi = gsi_for_stmt (cond_stmt);
3873 gimple_regimplify_operands (cond_stmt, &gsi);
3874 }
3875 se = split_block (entry_bb, cond_stmt);
3876 se->flags = EDGE_TRUE_VALUE;
3877 entry_bb = se->dest;
357067f2 3878 se->probability = profile_probability::very_likely ();
629b3d75 3879 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 3880 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
3881 if (gimple_in_ssa_p (cfun))
3882 {
3883 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3884 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3885 !gsi_end_p (gpi); gsi_next (&gpi))
3886 {
3887 gphi *phi = gpi.phi ();
3888 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3889 se, UNKNOWN_LOCATION);
3890 }
3891 }
3892 gsi = gsi_last_bb (entry_bb);
3893 }
3894
3895 switch (gimple_omp_for_kind (fd->for_stmt))
3896 {
3897 case GF_OMP_FOR_KIND_FOR:
3898 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3899 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3900 break;
3901 case GF_OMP_FOR_KIND_DISTRIBUTE:
3902 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3903 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3904 break;
3905 default:
3906 gcc_unreachable ();
3907 }
3908 nthreads = build_call_expr (nthreads, 0);
3909 nthreads = fold_convert (itype, nthreads);
3910 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3911 true, GSI_SAME_STMT);
3912 threadid = build_call_expr (threadid, 0);
3913 threadid = fold_convert (itype, threadid);
3914 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3915 true, GSI_SAME_STMT);
3916
3917 n1 = fd->loop.n1;
3918 n2 = fd->loop.n2;
3919 step = fd->loop.step;
3920 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3921 {
3922 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3923 OMP_CLAUSE__LOOPTEMP_);
3924 gcc_assert (innerc);
3925 n1 = OMP_CLAUSE_DECL (innerc);
3926 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3927 OMP_CLAUSE__LOOPTEMP_);
3928 gcc_assert (innerc);
3929 n2 = OMP_CLAUSE_DECL (innerc);
3930 }
3931 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3932 true, NULL_TREE, true, GSI_SAME_STMT);
3933 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3934 true, NULL_TREE, true, GSI_SAME_STMT);
3935 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3936 true, NULL_TREE, true, GSI_SAME_STMT);
3937 tree chunk_size = fold_convert (itype, fd->chunk_size);
3938 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3939 chunk_size
3940 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3941 GSI_SAME_STMT);
3942
3943 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3944 t = fold_build2 (PLUS_EXPR, itype, step, t);
3945 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3946 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3947 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3948 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3949 fold_build1 (NEGATE_EXPR, itype, t),
3950 fold_build1 (NEGATE_EXPR, itype, step));
3951 else
3952 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3953 t = fold_convert (itype, t);
3954 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3955 true, GSI_SAME_STMT);
3956
3957 trip_var = create_tmp_reg (itype, ".trip");
3958 if (gimple_in_ssa_p (cfun))
3959 {
3960 trip_init = make_ssa_name (trip_var);
3961 trip_main = make_ssa_name (trip_var);
3962 trip_back = make_ssa_name (trip_var);
3963 }
3964 else
3965 {
3966 trip_init = trip_var;
3967 trip_main = trip_var;
3968 trip_back = trip_var;
3969 }
3970
3971 gassign *assign_stmt
3972 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3973 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3974
3975 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3976 t = fold_build2 (MULT_EXPR, itype, t, step);
3977 if (POINTER_TYPE_P (type))
3978 t = fold_build_pointer_plus (n1, t);
3979 else
3980 t = fold_build2 (PLUS_EXPR, type, t, n1);
3981 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3982 true, GSI_SAME_STMT);
3983
3984 /* Remove the GIMPLE_OMP_FOR. */
3985 gsi_remove (&gsi, true);
3986
3987 gimple_stmt_iterator gsif = gsi;
3988
3989 /* Iteration space partitioning goes in ITER_PART_BB. */
3990 gsi = gsi_last_bb (iter_part_bb);
3991
3992 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3993 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3994 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3995 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3996 false, GSI_CONTINUE_LINKING);
3997
3998 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3999 t = fold_build2 (MIN_EXPR, itype, t, n);
4000 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4001 false, GSI_CONTINUE_LINKING);
4002
4003 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4004 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4005
4006 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4007 gsi = gsi_start_bb (seq_start_bb);
4008
4009 tree startvar = fd->loop.v;
4010 tree endvar = NULL_TREE;
4011
4012 if (gimple_omp_for_combined_p (fd->for_stmt))
4013 {
4014 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4015 ? gimple_omp_parallel_clauses (inner_stmt)
4016 : gimple_omp_for_clauses (inner_stmt);
4017 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4018 gcc_assert (innerc);
4019 startvar = OMP_CLAUSE_DECL (innerc);
4020 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4021 OMP_CLAUSE__LOOPTEMP_);
4022 gcc_assert (innerc);
4023 endvar = OMP_CLAUSE_DECL (innerc);
4024 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4025 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4026 {
4027 int i;
4028 for (i = 1; i < fd->collapse; i++)
4029 {
4030 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4031 OMP_CLAUSE__LOOPTEMP_);
4032 gcc_assert (innerc);
4033 }
4034 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4035 OMP_CLAUSE__LOOPTEMP_);
4036 if (innerc)
4037 {
4038 /* If needed (distribute parallel for with lastprivate),
4039 propagate down the total number of iterations. */
4040 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4041 fd->loop.n2);
4042 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4043 GSI_CONTINUE_LINKING);
4044 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4045 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4046 }
4047 }
4048 }
4049
4050 t = fold_convert (itype, s0);
4051 t = fold_build2 (MULT_EXPR, itype, t, step);
4052 if (POINTER_TYPE_P (type))
4053 t = fold_build_pointer_plus (n1, t);
4054 else
4055 t = fold_build2 (PLUS_EXPR, type, t, n1);
4056 t = fold_convert (TREE_TYPE (startvar), t);
4057 t = force_gimple_operand_gsi (&gsi, t,
4058 DECL_P (startvar)
4059 && TREE_ADDRESSABLE (startvar),
4060 NULL_TREE, false, GSI_CONTINUE_LINKING);
4061 assign_stmt = gimple_build_assign (startvar, t);
4062 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4063
4064 t = fold_convert (itype, e0);
4065 t = fold_build2 (MULT_EXPR, itype, t, step);
4066 if (POINTER_TYPE_P (type))
4067 t = fold_build_pointer_plus (n1, t);
4068 else
4069 t = fold_build2 (PLUS_EXPR, type, t, n1);
4070 t = fold_convert (TREE_TYPE (startvar), t);
4071 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4072 false, GSI_CONTINUE_LINKING);
4073 if (endvar)
4074 {
4075 assign_stmt = gimple_build_assign (endvar, e);
4076 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4077 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4078 assign_stmt = gimple_build_assign (fd->loop.v, e);
4079 else
4080 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4081 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4082 }
4083 /* Handle linear clause adjustments. */
4084 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4085 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4086 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4087 c; c = OMP_CLAUSE_CHAIN (c))
4088 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4089 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4090 {
4091 tree d = OMP_CLAUSE_DECL (c);
4092 bool is_ref = omp_is_reference (d);
4093 tree t = d, a, dest;
4094 if (is_ref)
4095 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4096 tree type = TREE_TYPE (t);
4097 if (POINTER_TYPE_P (type))
4098 type = sizetype;
4099 dest = unshare_expr (t);
4100 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4101 expand_omp_build_assign (&gsif, v, t);
4102 if (itercnt == NULL_TREE)
4103 {
4104 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4105 {
4106 itercntbias
4107 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4108 fold_convert (itype, fd->loop.n1));
4109 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4110 itercntbias, step);
4111 itercntbias
4112 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4113 NULL_TREE, true,
4114 GSI_SAME_STMT);
4115 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4116 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4117 NULL_TREE, false,
4118 GSI_CONTINUE_LINKING);
4119 }
4120 else
4121 itercnt = s0;
4122 }
4123 a = fold_build2 (MULT_EXPR, type,
4124 fold_convert (type, itercnt),
4125 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4126 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4127 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4128 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4129 false, GSI_CONTINUE_LINKING);
4130 assign_stmt = gimple_build_assign (dest, t);
4131 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4132 }
4133 if (fd->collapse > 1)
4134 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4135
4136 if (!broken_loop)
4137 {
4138 /* The code controlling the sequential loop goes in CONT_BB,
4139 replacing the GIMPLE_OMP_CONTINUE. */
4140 gsi = gsi_last_bb (cont_bb);
4141 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4142 vmain = gimple_omp_continue_control_use (cont_stmt);
4143 vback = gimple_omp_continue_control_def (cont_stmt);
4144
4145 if (!gimple_omp_for_combined_p (fd->for_stmt))
4146 {
4147 if (POINTER_TYPE_P (type))
4148 t = fold_build_pointer_plus (vmain, step);
4149 else
4150 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4151 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4152 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4153 true, GSI_SAME_STMT);
4154 assign_stmt = gimple_build_assign (vback, t);
4155 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4156
4157 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4158 t = build2 (EQ_EXPR, boolean_type_node,
4159 build_int_cst (itype, 0),
4160 build_int_cst (itype, 1));
4161 else
4162 t = build2 (fd->loop.cond_code, boolean_type_node,
4163 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4164 ? t : vback, e);
4165 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4166 }
4167
4168 /* Remove GIMPLE_OMP_CONTINUE. */
4169 gsi_remove (&gsi, true);
4170
4171 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4172 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4173
4174 /* Trip update code goes into TRIP_UPDATE_BB. */
4175 gsi = gsi_start_bb (trip_update_bb);
4176
4177 t = build_int_cst (itype, 1);
4178 t = build2 (PLUS_EXPR, itype, trip_main, t);
4179 assign_stmt = gimple_build_assign (trip_back, t);
4180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4181 }
4182
4183 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4184 gsi = gsi_last_bb (exit_bb);
4185 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4186 {
4187 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4188 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4189 }
4190 gsi_remove (&gsi, true);
4191
4192 /* Connect the new blocks. */
4193 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4194 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4195
4196 if (!broken_loop)
4197 {
4198 se = find_edge (cont_bb, body_bb);
4199 if (se == NULL)
4200 {
4201 se = BRANCH_EDGE (cont_bb);
4202 gcc_assert (single_succ (se->dest) == body_bb);
4203 }
4204 if (gimple_omp_for_combined_p (fd->for_stmt))
4205 {
4206 remove_edge (se);
4207 se = NULL;
4208 }
4209 else if (fd->collapse > 1)
4210 {
4211 remove_edge (se);
4212 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4213 }
4214 else
4215 se->flags = EDGE_TRUE_VALUE;
4216 find_edge (cont_bb, trip_update_bb)->flags
4217 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4218
01914336
MJ
4219 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4220 iter_part_bb);
629b3d75
MJ
4221 }
4222
4223 if (gimple_in_ssa_p (cfun))
4224 {
4225 gphi_iterator psi;
4226 gphi *phi;
4227 edge re, ene;
4228 edge_var_map *vm;
4229 size_t i;
4230
4231 gcc_assert (fd->collapse == 1 && !broken_loop);
4232
4233 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4234 remove arguments of the phi nodes in fin_bb. We need to create
4235 appropriate phi nodes in iter_part_bb instead. */
4236 se = find_edge (iter_part_bb, fin_bb);
4237 re = single_succ_edge (trip_update_bb);
4238 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4239 ene = single_succ_edge (entry_bb);
4240
4241 psi = gsi_start_phis (fin_bb);
4242 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4243 gsi_next (&psi), ++i)
4244 {
4245 gphi *nphi;
4246 source_location locus;
4247
4248 phi = psi.phi ();
d83cc5cc
TV
4249 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4250 redirect_edge_var_map_def (vm), 0))
4251 continue;
4252
629b3d75
MJ
4253 t = gimple_phi_result (phi);
4254 gcc_assert (t == redirect_edge_var_map_result (vm));
4255
4256 if (!single_pred_p (fin_bb))
4257 t = copy_ssa_name (t, phi);
4258
4259 nphi = create_phi_node (t, iter_part_bb);
4260
4261 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4262 locus = gimple_phi_arg_location_from_edge (phi, se);
4263
4264 /* A special case -- fd->loop.v is not yet computed in
4265 iter_part_bb, we need to use vextra instead. */
4266 if (t == fd->loop.v)
4267 t = vextra;
4268 add_phi_arg (nphi, t, ene, locus);
4269 locus = redirect_edge_var_map_location (vm);
4270 tree back_arg = redirect_edge_var_map_def (vm);
4271 add_phi_arg (nphi, back_arg, re, locus);
4272 edge ce = find_edge (cont_bb, body_bb);
4273 if (ce == NULL)
4274 {
4275 ce = BRANCH_EDGE (cont_bb);
4276 gcc_assert (single_succ (ce->dest) == body_bb);
4277 ce = single_succ_edge (ce->dest);
4278 }
4279 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4280 gcc_assert (inner_loop_phi != NULL);
4281 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4282 find_edge (seq_start_bb, body_bb), locus);
4283
4284 if (!single_pred_p (fin_bb))
4285 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4286 }
4287 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4288 redirect_edge_var_map_clear (re);
4289 if (single_pred_p (fin_bb))
4290 while (1)
4291 {
4292 psi = gsi_start_phis (fin_bb);
4293 if (gsi_end_p (psi))
4294 break;
4295 remove_phi_node (&psi, false);
4296 }
4297
4298 /* Make phi node for trip. */
4299 phi = create_phi_node (trip_main, iter_part_bb);
4300 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4301 UNKNOWN_LOCATION);
4302 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4303 UNKNOWN_LOCATION);
4304 }
4305
4306 if (!broken_loop)
4307 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4308 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4309 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4310 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4311 recompute_dominator (CDI_DOMINATORS, fin_bb));
4312 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4313 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4314 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4315 recompute_dominator (CDI_DOMINATORS, body_bb));
4316
4317 if (!broken_loop)
4318 {
4319 struct loop *loop = body_bb->loop_father;
4320 struct loop *trip_loop = alloc_loop ();
4321 trip_loop->header = iter_part_bb;
4322 trip_loop->latch = trip_update_bb;
4323 add_loop (trip_loop, iter_part_bb->loop_father);
4324
4325 if (loop != entry_bb->loop_father)
4326 {
4327 gcc_assert (loop->header == body_bb);
4328 gcc_assert (loop->latch == region->cont
4329 || single_pred (loop->latch) == region->cont);
4330 trip_loop->inner = loop;
4331 return;
4332 }
4333
4334 if (!gimple_omp_for_combined_p (fd->for_stmt))
4335 {
4336 loop = alloc_loop ();
4337 loop->header = body_bb;
4338 if (collapse_bb == NULL)
4339 loop->latch = cont_bb;
4340 add_loop (loop, trip_loop);
4341 }
4342 }
4343}
4344
4345/* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4346 Given parameters:
4347 for (V = N1; V cond N2; V += STEP) BODY;
4348
4349 where COND is "<" or ">" or "!=", we generate pseudocode
4350
4351 for (ind_var = low; ind_var < high; ind_var++)
4352 {
4353 V = n1 + (ind_var * STEP)
4354
4355 <BODY>
4356 }
4357
4358 In the above pseudocode, low and high are function parameters of the
4359 child function. In the function below, we are inserting a temp.
4360 variable that will be making a call to two OMP functions that will not be
4361 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4362 with _Cilk_for). These functions are replaced with low and high
4363 by the function that handles taskreg. */
4364
4365
4366static void
4367expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4368{
4369 bool broken_loop = region->cont == NULL;
4370 basic_block entry_bb = region->entry;
4371 basic_block cont_bb = region->cont;
4372
4373 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4374 gcc_assert (broken_loop
4375 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4376 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4377 basic_block l1_bb, l2_bb;
4378
4379 if (!broken_loop)
4380 {
4381 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4382 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4383 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4384 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4385 }
4386 else
4387 {
4388 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4389 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4390 l2_bb = single_succ (l1_bb);
4391 }
4392 basic_block exit_bb = region->exit;
4393 basic_block l2_dom_bb = NULL;
4394
4395 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4396
4397 /* Below statements until the "tree high_val = ..." are pseudo statements
4398 used to pass information to be used by expand_omp_taskreg.
4399 low_val and high_val will be replaced by the __low and __high
4400 parameter from the child function.
4401
4402 The call_exprs part is a place-holder, it is mainly used
4403 to distinctly identify to the top-level part that this is
4404 where we should put low and high (reasoning given in header
4405 comment). */
4406
01914336
MJ
4407 gomp_parallel *par_stmt
4408 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4409 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
629b3d75
MJ
4410 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4411 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4412 {
a01f151f 4413 if (id_equal (DECL_NAME (t), "__high"))
629b3d75 4414 high_val = t;
a01f151f 4415 else if (id_equal (DECL_NAME (t), "__low"))
629b3d75
MJ
4416 low_val = t;
4417 }
4418 gcc_assert (low_val && high_val);
4419
4420 tree type = TREE_TYPE (low_val);
4421 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4422 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4423
4424 /* Not needed in SSA form right now. */
4425 gcc_assert (!gimple_in_ssa_p (cfun));
4426 if (l2_dom_bb == NULL)
4427 l2_dom_bb = l1_bb;
4428
4429 tree n1 = low_val;
4430 tree n2 = high_val;
4431
4432 gimple *stmt = gimple_build_assign (ind_var, n1);
4433
4434 /* Replace the GIMPLE_OMP_FOR statement. */
4435 gsi_replace (&gsi, stmt, true);
4436
4437 if (!broken_loop)
4438 {
4439 /* Code to control the increment goes in the CONT_BB. */
4440 gsi = gsi_last_bb (cont_bb);
4441 stmt = gsi_stmt (gsi);
4442 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4443 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4444 build_one_cst (type));
4445
4446 /* Replace GIMPLE_OMP_CONTINUE. */
4447 gsi_replace (&gsi, stmt, true);
4448 }
4449
4450 /* Emit the condition in L1_BB. */
4451 gsi = gsi_after_labels (l1_bb);
4452 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4453 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4454 fd->loop.step);
4455 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4456 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4457 fd->loop.n1, fold_convert (sizetype, t));
4458 else
4459 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4460 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4461 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4462 expand_omp_build_assign (&gsi, fd->loop.v, t);
4463
4464 /* The condition is always '<' since the runtime will fill in the low
4465 and high values. */
4466 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4467 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4468
4469 /* Remove GIMPLE_OMP_RETURN. */
4470 gsi = gsi_last_bb (exit_bb);
4471 gsi_remove (&gsi, true);
4472
4473 /* Connect the new blocks. */
4474 remove_edge (FALLTHRU_EDGE (entry_bb));
4475
4476 edge e, ne;
4477 if (!broken_loop)
4478 {
4479 remove_edge (BRANCH_EDGE (entry_bb));
4480 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4481
4482 e = BRANCH_EDGE (l1_bb);
4483 ne = FALLTHRU_EDGE (l1_bb);
4484 e->flags = EDGE_TRUE_VALUE;
4485 }
4486 else
4487 {
4488 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4489
4490 ne = single_succ_edge (l1_bb);
4491 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4492
4493 }
4494 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
4495 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4496 ne->probability = e->probability.invert ();
629b3d75
MJ
4497
4498 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4499 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4500 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4501
4502 if (!broken_loop)
4503 {
4504 struct loop *loop = alloc_loop ();
4505 loop->header = l1_bb;
4506 loop->latch = cont_bb;
4507 add_loop (loop, l1_bb->loop_father);
4508 loop->safelen = INT_MAX;
4509 }
4510
4511 /* Pick the correct library function based on the precision of the
4512 induction variable type. */
4513 tree lib_fun = NULL_TREE;
4514 if (TYPE_PRECISION (type) == 32)
4515 lib_fun = cilk_for_32_fndecl;
4516 else if (TYPE_PRECISION (type) == 64)
4517 lib_fun = cilk_for_64_fndecl;
4518 else
4519 gcc_unreachable ();
4520
4521 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4522
4523 /* WS_ARGS contains the library function flavor to call:
4524 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4525 user-defined grain value. If the user does not define one, then zero
4526 is passed in by the parser. */
4527 vec_alloc (region->ws_args, 2);
4528 region->ws_args->quick_push (lib_fun);
4529 region->ws_args->quick_push (fd->chunk_size);
4530}
4531
4532/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4533 loop. Given parameters:
4534
4535 for (V = N1; V cond N2; V += STEP) BODY;
4536
4537 where COND is "<" or ">", we generate pseudocode
4538
4539 V = N1;
4540 goto L1;
4541 L0:
4542 BODY;
4543 V += STEP;
4544 L1:
4545 if (V cond N2) goto L0; else goto L2;
4546 L2:
4547
4548 For collapsed loops, given parameters:
4549 collapse(3)
4550 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4551 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4552 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4553 BODY;
4554
4555 we generate pseudocode
4556
4557 if (cond3 is <)
4558 adj = STEP3 - 1;
4559 else
4560 adj = STEP3 + 1;
4561 count3 = (adj + N32 - N31) / STEP3;
4562 if (cond2 is <)
4563 adj = STEP2 - 1;
4564 else
4565 adj = STEP2 + 1;
4566 count2 = (adj + N22 - N21) / STEP2;
4567 if (cond1 is <)
4568 adj = STEP1 - 1;
4569 else
4570 adj = STEP1 + 1;
4571 count1 = (adj + N12 - N11) / STEP1;
4572 count = count1 * count2 * count3;
4573 V = 0;
4574 V1 = N11;
4575 V2 = N21;
4576 V3 = N31;
4577 goto L1;
4578 L0:
4579 BODY;
4580 V += 1;
4581 V3 += STEP3;
4582 V2 += (V3 cond3 N32) ? 0 : STEP2;
4583 V3 = (V3 cond3 N32) ? V3 : N31;
4584 V1 += (V2 cond2 N22) ? 0 : STEP1;
4585 V2 = (V2 cond2 N22) ? V2 : N21;
4586 L1:
4587 if (V < count) goto L0; else goto L2;
4588 L2:
4589
4590 */
4591
4592static void
4593expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4594{
4595 tree type, t;
4596 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4597 gimple_stmt_iterator gsi;
4598 gimple *stmt;
4599 gcond *cond_stmt;
4600 bool broken_loop = region->cont == NULL;
4601 edge e, ne;
4602 tree *counts = NULL;
4603 int i;
4604 int safelen_int = INT_MAX;
4605 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4606 OMP_CLAUSE_SAFELEN);
4607 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4608 OMP_CLAUSE__SIMDUID_);
4609 tree n1, n2;
4610
4611 if (safelen)
4612 {
4613 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4614 if (TREE_CODE (safelen) != INTEGER_CST)
4615 safelen_int = 0;
4616 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4617 safelen_int = tree_to_uhwi (safelen);
4618 if (safelen_int == 1)
4619 safelen_int = 0;
4620 }
4621 type = TREE_TYPE (fd->loop.v);
4622 entry_bb = region->entry;
4623 cont_bb = region->cont;
4624 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4625 gcc_assert (broken_loop
4626 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4627 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4628 if (!broken_loop)
4629 {
4630 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4631 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4632 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4633 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4634 }
4635 else
4636 {
4637 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4638 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4639 l2_bb = single_succ (l1_bb);
4640 }
4641 exit_bb = region->exit;
4642 l2_dom_bb = NULL;
4643
4644 gsi = gsi_last_bb (entry_bb);
4645
4646 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4647 /* Not needed in SSA form right now. */
4648 gcc_assert (!gimple_in_ssa_p (cfun));
4649 if (fd->collapse > 1)
4650 {
4651 int first_zero_iter = -1, dummy = -1;
4652 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4653
4654 counts = XALLOCAVEC (tree, fd->collapse);
4655 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4656 zero_iter_bb, first_zero_iter,
4657 dummy_bb, dummy, l2_dom_bb);
4658 }
4659 if (l2_dom_bb == NULL)
4660 l2_dom_bb = l1_bb;
4661
4662 n1 = fd->loop.n1;
4663 n2 = fd->loop.n2;
4664 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4665 {
4666 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4667 OMP_CLAUSE__LOOPTEMP_);
4668 gcc_assert (innerc);
4669 n1 = OMP_CLAUSE_DECL (innerc);
4670 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4671 OMP_CLAUSE__LOOPTEMP_);
4672 gcc_assert (innerc);
4673 n2 = OMP_CLAUSE_DECL (innerc);
4674 }
4675 tree step = fd->loop.step;
4676
4cea8675
AM
4677 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4678 OMP_CLAUSE__SIMT_);
629b3d75
MJ
4679 if (is_simt)
4680 {
4681 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
4682 is_simt = safelen_int > 1;
4683 }
4684 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4685 if (is_simt)
4686 {
629b3d75
MJ
4687 simt_lane = create_tmp_var (unsigned_type_node);
4688 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4689 gimple_call_set_lhs (g, simt_lane);
4690 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4691 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4692 fold_convert (TREE_TYPE (step), simt_lane));
4693 n1 = fold_convert (type, n1);
4694 if (POINTER_TYPE_P (type))
4695 n1 = fold_build_pointer_plus (n1, offset);
4696 else
4697 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4698
4699 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4700 if (fd->collapse > 1)
4701 simt_maxlane = build_one_cst (unsigned_type_node);
4702 else if (safelen_int < omp_max_simt_vf ())
4703 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4704 tree vf
4705 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4706 unsigned_type_node, 0);
4707 if (simt_maxlane)
4708 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4709 vf = fold_convert (TREE_TYPE (step), vf);
4710 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4711 }
4712
4713 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4714 if (fd->collapse > 1)
4715 {
4716 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4717 {
4718 gsi_prev (&gsi);
4719 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4720 gsi_next (&gsi);
4721 }
4722 else
4723 for (i = 0; i < fd->collapse; i++)
4724 {
4725 tree itype = TREE_TYPE (fd->loops[i].v);
4726 if (POINTER_TYPE_P (itype))
4727 itype = signed_type_for (itype);
4728 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4729 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4730 }
4731 }
4732
4733 /* Remove the GIMPLE_OMP_FOR statement. */
4734 gsi_remove (&gsi, true);
4735
4736 if (!broken_loop)
4737 {
4738 /* Code to control the increment goes in the CONT_BB. */
4739 gsi = gsi_last_bb (cont_bb);
4740 stmt = gsi_stmt (gsi);
4741 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4742
4743 if (POINTER_TYPE_P (type))
4744 t = fold_build_pointer_plus (fd->loop.v, step);
4745 else
4746 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4747 expand_omp_build_assign (&gsi, fd->loop.v, t);
4748
4749 if (fd->collapse > 1)
4750 {
4751 i = fd->collapse - 1;
4752 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4753 {
4754 t = fold_convert (sizetype, fd->loops[i].step);
4755 t = fold_build_pointer_plus (fd->loops[i].v, t);
4756 }
4757 else
4758 {
4759 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4760 fd->loops[i].step);
4761 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4762 fd->loops[i].v, t);
4763 }
4764 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4765
4766 for (i = fd->collapse - 1; i > 0; i--)
4767 {
4768 tree itype = TREE_TYPE (fd->loops[i].v);
4769 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4770 if (POINTER_TYPE_P (itype2))
4771 itype2 = signed_type_for (itype2);
bcc6842b
JJ
4772 t = fold_convert (itype2, fd->loops[i - 1].step);
4773 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4774 GSI_SAME_STMT);
629b3d75
MJ
4775 t = build3 (COND_EXPR, itype2,
4776 build2 (fd->loops[i].cond_code, boolean_type_node,
4777 fd->loops[i].v,
4778 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 4779 build_int_cst (itype2, 0), t);
629b3d75
MJ
4780 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4781 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4782 else
4783 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4784 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4785
bcc6842b
JJ
4786 t = fold_convert (itype, fd->loops[i].n1);
4787 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4788 GSI_SAME_STMT);
629b3d75
MJ
4789 t = build3 (COND_EXPR, itype,
4790 build2 (fd->loops[i].cond_code, boolean_type_node,
4791 fd->loops[i].v,
4792 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 4793 fd->loops[i].v, t);
629b3d75
MJ
4794 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4795 }
4796 }
4797
4798 /* Remove GIMPLE_OMP_CONTINUE. */
4799 gsi_remove (&gsi, true);
4800 }
4801
4802 /* Emit the condition in L1_BB. */
4803 gsi = gsi_start_bb (l1_bb);
4804
4805 t = fold_convert (type, n2);
4806 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4807 false, GSI_CONTINUE_LINKING);
4808 tree v = fd->loop.v;
4809 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4810 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4811 false, GSI_CONTINUE_LINKING);
4812 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4813 cond_stmt = gimple_build_cond_empty (t);
4814 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4815 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4816 NULL, NULL)
4817 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4818 NULL, NULL))
4819 {
4820 gsi = gsi_for_stmt (cond_stmt);
4821 gimple_regimplify_operands (cond_stmt, &gsi);
4822 }
4823
4824 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4825 if (is_simt)
4826 {
4827 gsi = gsi_start_bb (l2_bb);
4828 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4829 if (POINTER_TYPE_P (type))
4830 t = fold_build_pointer_plus (fd->loop.v, step);
4831 else
4832 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4833 expand_omp_build_assign (&gsi, fd->loop.v, t);
4834 }
4835
4836 /* Remove GIMPLE_OMP_RETURN. */
4837 gsi = gsi_last_bb (exit_bb);
4838 gsi_remove (&gsi, true);
4839
4840 /* Connect the new blocks. */
4841 remove_edge (FALLTHRU_EDGE (entry_bb));
4842
4843 if (!broken_loop)
4844 {
4845 remove_edge (BRANCH_EDGE (entry_bb));
4846 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4847
4848 e = BRANCH_EDGE (l1_bb);
4849 ne = FALLTHRU_EDGE (l1_bb);
4850 e->flags = EDGE_TRUE_VALUE;
4851 }
4852 else
4853 {
4854 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4855
4856 ne = single_succ_edge (l1_bb);
4857 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4858
4859 }
4860 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
4861 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4862 ne->probability = e->probability.invert ();
629b3d75
MJ
4863
4864 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4865 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4866
4867 if (simt_maxlane)
4868 {
4869 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4870 NULL_TREE, NULL_TREE);
4871 gsi = gsi_last_bb (entry_bb);
4872 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4873 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4874 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
4875 FALLTHRU_EDGE (entry_bb)->probability
4876 = profile_probability::guessed_always ().apply_scale (7, 8);
4877 BRANCH_EDGE (entry_bb)->probability
4878 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
4879 l2_dom_bb = entry_bb;
4880 }
4881 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4882
4883 if (!broken_loop)
4884 {
4885 struct loop *loop = alloc_loop ();
4886 loop->header = l1_bb;
4887 loop->latch = cont_bb;
4888 add_loop (loop, l1_bb->loop_father);
4889 loop->safelen = safelen_int;
4890 if (simduid)
4891 {
4892 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4893 cfun->has_simduid_loops = true;
4894 }
4895 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4896 the loop. */
4897 if ((flag_tree_loop_vectorize
26d476cd 4898 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
4899 && flag_tree_loop_optimize
4900 && loop->safelen > 1)
4901 {
4902 loop->force_vectorize = true;
4903 cfun->has_force_vectorize_loops = true;
4904 }
4905 }
4906 else if (simduid)
4907 cfun->has_simduid_loops = true;
4908}
4909
4910/* Taskloop construct is represented after gimplification with
4911 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4912 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4913 which should just compute all the needed loop temporaries
4914 for GIMPLE_OMP_TASK. */
4915
4916static void
4917expand_omp_taskloop_for_outer (struct omp_region *region,
4918 struct omp_for_data *fd,
4919 gimple *inner_stmt)
4920{
4921 tree type, bias = NULL_TREE;
4922 basic_block entry_bb, cont_bb, exit_bb;
4923 gimple_stmt_iterator gsi;
4924 gassign *assign_stmt;
4925 tree *counts = NULL;
4926 int i;
4927
4928 gcc_assert (inner_stmt);
4929 gcc_assert (region->cont);
4930 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4931 && gimple_omp_task_taskloop_p (inner_stmt));
4932 type = TREE_TYPE (fd->loop.v);
4933
4934 /* See if we need to bias by LLONG_MIN. */
4935 if (fd->iter_type == long_long_unsigned_type_node
4936 && TREE_CODE (type) == INTEGER_TYPE
4937 && !TYPE_UNSIGNED (type))
4938 {
4939 tree n1, n2;
4940
4941 if (fd->loop.cond_code == LT_EXPR)
4942 {
4943 n1 = fd->loop.n1;
4944 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4945 }
4946 else
4947 {
4948 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4949 n2 = fd->loop.n1;
4950 }
4951 if (TREE_CODE (n1) != INTEGER_CST
4952 || TREE_CODE (n2) != INTEGER_CST
4953 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4954 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4955 }
4956
4957 entry_bb = region->entry;
4958 cont_bb = region->cont;
4959 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4960 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4961 exit_bb = region->exit;
4962
4963 gsi = gsi_last_bb (entry_bb);
4964 gimple *for_stmt = gsi_stmt (gsi);
4965 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4966 if (fd->collapse > 1)
4967 {
4968 int first_zero_iter = -1, dummy = -1;
4969 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4970
4971 counts = XALLOCAVEC (tree, fd->collapse);
4972 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4973 zero_iter_bb, first_zero_iter,
4974 dummy_bb, dummy, l2_dom_bb);
4975
4976 if (zero_iter_bb)
4977 {
4978 /* Some counts[i] vars might be uninitialized if
4979 some loop has zero iterations. But the body shouldn't
4980 be executed in that case, so just avoid uninit warnings. */
4981 for (i = first_zero_iter; i < fd->collapse; i++)
4982 if (SSA_VAR_P (counts[i]))
4983 TREE_NO_WARNING (counts[i]) = 1;
4984 gsi_prev (&gsi);
4985 edge e = split_block (entry_bb, gsi_stmt (gsi));
4986 entry_bb = e->dest;
4987 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4988 gsi = gsi_last_bb (entry_bb);
4989 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4990 get_immediate_dominator (CDI_DOMINATORS,
4991 zero_iter_bb));
4992 }
4993 }
4994
4995 tree t0, t1;
4996 t1 = fd->loop.n2;
4997 t0 = fd->loop.n1;
4998 if (POINTER_TYPE_P (TREE_TYPE (t0))
4999 && TYPE_PRECISION (TREE_TYPE (t0))
5000 != TYPE_PRECISION (fd->iter_type))
5001 {
5002 /* Avoid casting pointers to integer of a different size. */
5003 tree itype = signed_type_for (type);
5004 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5005 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5006 }
5007 else
5008 {
5009 t1 = fold_convert (fd->iter_type, t1);
5010 t0 = fold_convert (fd->iter_type, t0);
5011 }
5012 if (bias)
5013 {
5014 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5015 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5016 }
5017
5018 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5019 OMP_CLAUSE__LOOPTEMP_);
5020 gcc_assert (innerc);
5021 tree startvar = OMP_CLAUSE_DECL (innerc);
5022 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5023 gcc_assert (innerc);
5024 tree endvar = OMP_CLAUSE_DECL (innerc);
5025 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5026 {
5027 gcc_assert (innerc);
5028 for (i = 1; i < fd->collapse; i++)
5029 {
5030 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5031 OMP_CLAUSE__LOOPTEMP_);
5032 gcc_assert (innerc);
5033 }
5034 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5035 OMP_CLAUSE__LOOPTEMP_);
5036 if (innerc)
5037 {
5038 /* If needed (inner taskloop has lastprivate clause), propagate
5039 down the total number of iterations. */
5040 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5041 NULL_TREE, false,
5042 GSI_CONTINUE_LINKING);
5043 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5044 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5045 }
5046 }
5047
5048 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5049 GSI_CONTINUE_LINKING);
5050 assign_stmt = gimple_build_assign (startvar, t0);
5051 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5052
5053 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5054 GSI_CONTINUE_LINKING);
5055 assign_stmt = gimple_build_assign (endvar, t1);
5056 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5057 if (fd->collapse > 1)
5058 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5059
5060 /* Remove the GIMPLE_OMP_FOR statement. */
5061 gsi = gsi_for_stmt (for_stmt);
5062 gsi_remove (&gsi, true);
5063
5064 gsi = gsi_last_bb (cont_bb);
5065 gsi_remove (&gsi, true);
5066
5067 gsi = gsi_last_bb (exit_bb);
5068 gsi_remove (&gsi, true);
5069
357067f2 5070 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 5071 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 5072 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
5073 remove_edge (BRANCH_EDGE (cont_bb));
5074 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5075 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5076 recompute_dominator (CDI_DOMINATORS, region->entry));
5077}
5078
5079/* Taskloop construct is represented after gimplification with
5080 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5081 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5082 GOMP_taskloop{,_ull} function arranges for each task to be given just
5083 a single range of iterations. */
5084
5085static void
5086expand_omp_taskloop_for_inner (struct omp_region *region,
5087 struct omp_for_data *fd,
5088 gimple *inner_stmt)
5089{
5090 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5091 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5092 basic_block fin_bb;
5093 gimple_stmt_iterator gsi;
5094 edge ep;
5095 bool broken_loop = region->cont == NULL;
5096 tree *counts = NULL;
5097 tree n1, n2, step;
5098
5099 itype = type = TREE_TYPE (fd->loop.v);
5100 if (POINTER_TYPE_P (type))
5101 itype = signed_type_for (type);
5102
5103 /* See if we need to bias by LLONG_MIN. */
5104 if (fd->iter_type == long_long_unsigned_type_node
5105 && TREE_CODE (type) == INTEGER_TYPE
5106 && !TYPE_UNSIGNED (type))
5107 {
5108 tree n1, n2;
5109
5110 if (fd->loop.cond_code == LT_EXPR)
5111 {
5112 n1 = fd->loop.n1;
5113 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5114 }
5115 else
5116 {
5117 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5118 n2 = fd->loop.n1;
5119 }
5120 if (TREE_CODE (n1) != INTEGER_CST
5121 || TREE_CODE (n2) != INTEGER_CST
5122 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5123 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5124 }
5125
5126 entry_bb = region->entry;
5127 cont_bb = region->cont;
5128 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5129 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5130 gcc_assert (broken_loop
5131 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5132 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5133 if (!broken_loop)
5134 {
5135 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5136 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5137 }
5138 exit_bb = region->exit;
5139
5140 /* Iteration space partitioning goes in ENTRY_BB. */
5141 gsi = gsi_last_bb (entry_bb);
5142 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5143
5144 if (fd->collapse > 1)
5145 {
5146 int first_zero_iter = -1, dummy = -1;
5147 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5148
5149 counts = XALLOCAVEC (tree, fd->collapse);
5150 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5151 fin_bb, first_zero_iter,
5152 dummy_bb, dummy, l2_dom_bb);
5153 t = NULL_TREE;
5154 }
5155 else
5156 t = integer_one_node;
5157
5158 step = fd->loop.step;
5159 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5160 OMP_CLAUSE__LOOPTEMP_);
5161 gcc_assert (innerc);
5162 n1 = OMP_CLAUSE_DECL (innerc);
5163 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5164 gcc_assert (innerc);
5165 n2 = OMP_CLAUSE_DECL (innerc);
5166 if (bias)
5167 {
5168 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5169 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5170 }
5171 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5172 true, NULL_TREE, true, GSI_SAME_STMT);
5173 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5174 true, NULL_TREE, true, GSI_SAME_STMT);
5175 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5176 true, NULL_TREE, true, GSI_SAME_STMT);
5177
5178 tree startvar = fd->loop.v;
5179 tree endvar = NULL_TREE;
5180
5181 if (gimple_omp_for_combined_p (fd->for_stmt))
5182 {
5183 tree clauses = gimple_omp_for_clauses (inner_stmt);
5184 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5185 gcc_assert (innerc);
5186 startvar = OMP_CLAUSE_DECL (innerc);
5187 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5188 OMP_CLAUSE__LOOPTEMP_);
5189 gcc_assert (innerc);
5190 endvar = OMP_CLAUSE_DECL (innerc);
5191 }
5192 t = fold_convert (TREE_TYPE (startvar), n1);
5193 t = force_gimple_operand_gsi (&gsi, t,
5194 DECL_P (startvar)
5195 && TREE_ADDRESSABLE (startvar),
5196 NULL_TREE, false, GSI_CONTINUE_LINKING);
5197 gimple *assign_stmt = gimple_build_assign (startvar, t);
5198 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5199
5200 t = fold_convert (TREE_TYPE (startvar), n2);
5201 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5202 false, GSI_CONTINUE_LINKING);
5203 if (endvar)
5204 {
5205 assign_stmt = gimple_build_assign (endvar, e);
5206 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5207 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5208 assign_stmt = gimple_build_assign (fd->loop.v, e);
5209 else
5210 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5211 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5212 }
5213 if (fd->collapse > 1)
5214 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5215
5216 if (!broken_loop)
5217 {
5218 /* The code controlling the sequential loop replaces the
5219 GIMPLE_OMP_CONTINUE. */
5220 gsi = gsi_last_bb (cont_bb);
5221 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5222 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5223 vmain = gimple_omp_continue_control_use (cont_stmt);
5224 vback = gimple_omp_continue_control_def (cont_stmt);
5225
5226 if (!gimple_omp_for_combined_p (fd->for_stmt))
5227 {
5228 if (POINTER_TYPE_P (type))
5229 t = fold_build_pointer_plus (vmain, step);
5230 else
5231 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5232 t = force_gimple_operand_gsi (&gsi, t,
5233 DECL_P (vback)
5234 && TREE_ADDRESSABLE (vback),
5235 NULL_TREE, true, GSI_SAME_STMT);
5236 assign_stmt = gimple_build_assign (vback, t);
5237 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5238
5239 t = build2 (fd->loop.cond_code, boolean_type_node,
5240 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5241 ? t : vback, e);
5242 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5243 }
5244
5245 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5246 gsi_remove (&gsi, true);
5247
5248 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5249 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5250 }
5251
5252 /* Remove the GIMPLE_OMP_FOR statement. */
5253 gsi = gsi_for_stmt (fd->for_stmt);
5254 gsi_remove (&gsi, true);
5255
5256 /* Remove the GIMPLE_OMP_RETURN statement. */
5257 gsi = gsi_last_bb (exit_bb);
5258 gsi_remove (&gsi, true);
5259
357067f2 5260 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
5261 if (!broken_loop)
5262 remove_edge (BRANCH_EDGE (entry_bb));
5263 else
5264 {
5265 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5266 region->outer->cont = NULL;
5267 }
5268
5269 /* Connect all the blocks. */
5270 if (!broken_loop)
5271 {
5272 ep = find_edge (cont_bb, body_bb);
5273 if (gimple_omp_for_combined_p (fd->for_stmt))
5274 {
5275 remove_edge (ep);
5276 ep = NULL;
5277 }
5278 else if (fd->collapse > 1)
5279 {
5280 remove_edge (ep);
5281 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5282 }
5283 else
5284 ep->flags = EDGE_TRUE_VALUE;
5285 find_edge (cont_bb, fin_bb)->flags
5286 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5287 }
5288
5289 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5290 recompute_dominator (CDI_DOMINATORS, body_bb));
5291 if (!broken_loop)
5292 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5293 recompute_dominator (CDI_DOMINATORS, fin_bb));
5294
5295 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5296 {
5297 struct loop *loop = alloc_loop ();
5298 loop->header = body_bb;
5299 if (collapse_bb == NULL)
5300 loop->latch = cont_bb;
5301 add_loop (loop, body_bb->loop_father);
5302 }
5303}
5304
5305/* A subroutine of expand_omp_for. Generate code for an OpenACC
5306 partitioned loop. The lowering here is abstracted, in that the
5307 loop parameters are passed through internal functions, which are
5308 further lowered by oacc_device_lower, once we get to the target
5309 compiler. The loop is of the form:
5310
5311 for (V = B; V LTGT E; V += S) {BODY}
5312
5313 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5314 (constant 0 for no chunking) and we will have a GWV partitioning
5315 mask, specifying dimensions over which the loop is to be
02889d23
CLT
5316 partitioned (see note below). We generate code that looks like
5317 (this ignores tiling):
629b3d75
MJ
5318
5319 <entry_bb> [incoming FALL->body, BRANCH->exit]
5320 typedef signedintify (typeof (V)) T; // underlying signed integral type
5321 T range = E - B;
5322 T chunk_no = 0;
5323 T DIR = LTGT == '<' ? +1 : -1;
5324 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5325 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5326
5327 <head_bb> [created by splitting end of entry_bb]
5328 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5329 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5330 if (!(offset LTGT bound)) goto bottom_bb;
5331
5332 <body_bb> [incoming]
5333 V = B + offset;
5334 {BODY}
5335
5336 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5337 offset += step;
5338 if (offset LTGT bound) goto body_bb; [*]
5339
5340 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5341 chunk_no++;
5342 if (chunk < chunk_max) goto head_bb;
5343
5344 <exit_bb> [incoming]
5345 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5346
02889d23 5347 [*] Needed if V live at end of loop. */
629b3d75
MJ
5348
5349static void
5350expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5351{
5352 tree v = fd->loop.v;
5353 enum tree_code cond_code = fd->loop.cond_code;
5354 enum tree_code plus_code = PLUS_EXPR;
5355
5356 tree chunk_size = integer_minus_one_node;
5357 tree gwv = integer_zero_node;
5358 tree iter_type = TREE_TYPE (v);
5359 tree diff_type = iter_type;
5360 tree plus_type = iter_type;
5361 struct oacc_collapse *counts = NULL;
5362
5363 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5364 == GF_OMP_FOR_KIND_OACC_LOOP);
5365 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5366 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5367
5368 if (POINTER_TYPE_P (iter_type))
5369 {
5370 plus_code = POINTER_PLUS_EXPR;
5371 plus_type = sizetype;
5372 }
5373 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5374 diff_type = signed_type_for (diff_type);
f4c222c0
TV
5375 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5376 diff_type = integer_type_node;
629b3d75
MJ
5377
5378 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5379 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5380 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5381 basic_block bottom_bb = NULL;
5382
5383 /* entry_bb has two sucessors; the branch edge is to the exit
5384 block, fallthrough edge to body. */
5385 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5386 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5387
5388 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5389 body_bb, or to a block whose only successor is the body_bb. Its
5390 fallthrough successor is the final block (same as the branch
5391 successor of the entry_bb). */
5392 if (cont_bb)
5393 {
5394 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5395 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5396
5397 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5398 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5399 }
5400 else
5401 gcc_assert (!gimple_in_ssa_p (cfun));
5402
5403 /* The exit block only has entry_bb and cont_bb as predecessors. */
5404 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5405
5406 tree chunk_no;
5407 tree chunk_max = NULL_TREE;
5408 tree bound, offset;
5409 tree step = create_tmp_var (diff_type, ".step");
5410 bool up = cond_code == LT_EXPR;
5411 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 5412 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
5413 bool negating;
5414
02889d23
CLT
5415 /* Tiling vars. */
5416 tree tile_size = NULL_TREE;
5417 tree element_s = NULL_TREE;
5418 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5419 basic_block elem_body_bb = NULL;
5420 basic_block elem_cont_bb = NULL;
5421
629b3d75
MJ
5422 /* SSA instances. */
5423 tree offset_incr = NULL_TREE;
5424 tree offset_init = NULL_TREE;
5425
5426 gimple_stmt_iterator gsi;
5427 gassign *ass;
5428 gcall *call;
5429 gimple *stmt;
5430 tree expr;
5431 location_t loc;
5432 edge split, be, fte;
5433
5434 /* Split the end of entry_bb to create head_bb. */
5435 split = split_block (entry_bb, last_stmt (entry_bb));
5436 basic_block head_bb = split->dest;
5437 entry_bb = split->src;
5438
5439 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5440 gsi = gsi_last_bb (entry_bb);
5441 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5442 loc = gimple_location (for_stmt);
5443
5444 if (gimple_in_ssa_p (cfun))
5445 {
5446 offset_init = gimple_omp_for_index (for_stmt, 0);
5447 gcc_assert (integer_zerop (fd->loop.n1));
5448 /* The SSA parallelizer does gang parallelism. */
5449 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5450 }
5451
02889d23 5452 if (fd->collapse > 1 || fd->tiling)
629b3d75 5453 {
02889d23 5454 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75
MJ
5455 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5456 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
02889d23 5457 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
5458
5459 if (SSA_VAR_P (fd->loop.n2))
5460 {
5461 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5462 true, GSI_SAME_STMT);
5463 ass = gimple_build_assign (fd->loop.n2, total);
5464 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5465 }
629b3d75
MJ
5466 }
5467
5468 tree b = fd->loop.n1;
5469 tree e = fd->loop.n2;
5470 tree s = fd->loop.step;
5471
5472 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5473 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5474
01914336 5475 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5476 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5477 if (negating)
5478 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5479 s = fold_convert (diff_type, s);
5480 if (negating)
5481 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5482 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5483
5484 if (!chunking)
5485 chunk_size = integer_zero_node;
5486 expr = fold_convert (diff_type, chunk_size);
5487 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5488 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
5489
5490 if (fd->tiling)
5491 {
5492 /* Determine the tile size and element step,
5493 modify the outer loop step size. */
5494 tile_size = create_tmp_var (diff_type, ".tile_size");
5495 expr = build_int_cst (diff_type, 1);
5496 for (int ix = 0; ix < fd->collapse; ix++)
5497 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5498 expr = force_gimple_operand_gsi (&gsi, expr, true,
5499 NULL_TREE, true, GSI_SAME_STMT);
5500 ass = gimple_build_assign (tile_size, expr);
5501 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5502
5503 element_s = create_tmp_var (diff_type, ".element_s");
5504 ass = gimple_build_assign (element_s, s);
5505 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5506
5507 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5508 s = force_gimple_operand_gsi (&gsi, expr, true,
5509 NULL_TREE, true, GSI_SAME_STMT);
5510 }
5511
01914336 5512 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5513 negating = !up && TYPE_UNSIGNED (iter_type);
5514 expr = fold_build2 (MINUS_EXPR, plus_type,
5515 fold_convert (plus_type, negating ? b : e),
5516 fold_convert (plus_type, negating ? e : b));
5517 expr = fold_convert (diff_type, expr);
5518 if (negating)
5519 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5520 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5521 NULL_TREE, true, GSI_SAME_STMT);
5522
5523 chunk_no = build_int_cst (diff_type, 0);
5524 if (chunking)
5525 {
5526 gcc_assert (!gimple_in_ssa_p (cfun));
5527
5528 expr = chunk_no;
5529 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5530 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5531
5532 ass = gimple_build_assign (chunk_no, expr);
5533 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5534
5535 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5536 build_int_cst (integer_type_node,
5537 IFN_GOACC_LOOP_CHUNKS),
5538 dir, range, s, chunk_size, gwv);
5539 gimple_call_set_lhs (call, chunk_max);
5540 gimple_set_location (call, loc);
5541 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5542 }
5543 else
5544 chunk_size = chunk_no;
5545
5546 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5547 build_int_cst (integer_type_node,
5548 IFN_GOACC_LOOP_STEP),
5549 dir, range, s, chunk_size, gwv);
5550 gimple_call_set_lhs (call, step);
5551 gimple_set_location (call, loc);
5552 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5553
5554 /* Remove the GIMPLE_OMP_FOR. */
5555 gsi_remove (&gsi, true);
5556
01914336 5557 /* Fixup edges from head_bb. */
629b3d75
MJ
5558 be = BRANCH_EDGE (head_bb);
5559 fte = FALLTHRU_EDGE (head_bb);
5560 be->flags |= EDGE_FALSE_VALUE;
5561 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5562
5563 basic_block body_bb = fte->dest;
5564
5565 if (gimple_in_ssa_p (cfun))
5566 {
5567 gsi = gsi_last_bb (cont_bb);
5568 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5569
5570 offset = gimple_omp_continue_control_use (cont_stmt);
5571 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5572 }
5573 else
5574 {
5575 offset = create_tmp_var (diff_type, ".offset");
5576 offset_init = offset_incr = offset;
5577 }
5578 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5579
5580 /* Loop offset & bound go into head_bb. */
5581 gsi = gsi_start_bb (head_bb);
5582
5583 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5584 build_int_cst (integer_type_node,
5585 IFN_GOACC_LOOP_OFFSET),
5586 dir, range, s,
5587 chunk_size, gwv, chunk_no);
5588 gimple_call_set_lhs (call, offset_init);
5589 gimple_set_location (call, loc);
5590 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5591
5592 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5593 build_int_cst (integer_type_node,
5594 IFN_GOACC_LOOP_BOUND),
5595 dir, range, s,
5596 chunk_size, gwv, offset_init);
5597 gimple_call_set_lhs (call, bound);
5598 gimple_set_location (call, loc);
5599 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5600
5601 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5602 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5603 GSI_CONTINUE_LINKING);
5604
5605 /* V assignment goes into body_bb. */
5606 if (!gimple_in_ssa_p (cfun))
5607 {
5608 gsi = gsi_start_bb (body_bb);
5609
5610 expr = build2 (plus_code, iter_type, b,
5611 fold_convert (plus_type, offset));
5612 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5613 true, GSI_SAME_STMT);
5614 ass = gimple_build_assign (v, expr);
5615 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
5616
5617 if (fd->collapse > 1 || fd->tiling)
5618 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5619
5620 if (fd->tiling)
5621 {
5622 /* Determine the range of the element loop -- usually simply
5623 the tile_size, but could be smaller if the final
5624 iteration of the outer loop is a partial tile. */
5625 tree e_range = create_tmp_var (diff_type, ".e_range");
5626
5627 expr = build2 (MIN_EXPR, diff_type,
5628 build2 (MINUS_EXPR, diff_type, bound, offset),
5629 build2 (MULT_EXPR, diff_type, tile_size,
5630 element_s));
5631 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5632 true, GSI_SAME_STMT);
5633 ass = gimple_build_assign (e_range, expr);
5634 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5635
5636 /* Determine bound, offset & step of inner loop. */
5637 e_bound = create_tmp_var (diff_type, ".e_bound");
5638 e_offset = create_tmp_var (diff_type, ".e_offset");
5639 e_step = create_tmp_var (diff_type, ".e_step");
5640
5641 /* Mark these as element loops. */
5642 tree t, e_gwv = integer_minus_one_node;
5643 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5644
5645 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5646 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5647 element_s, chunk, e_gwv, chunk);
5648 gimple_call_set_lhs (call, e_offset);
5649 gimple_set_location (call, loc);
5650 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5651
5652 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5653 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5654 element_s, chunk, e_gwv, e_offset);
5655 gimple_call_set_lhs (call, e_bound);
5656 gimple_set_location (call, loc);
5657 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5658
5659 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5660 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5661 element_s, chunk, e_gwv);
5662 gimple_call_set_lhs (call, e_step);
5663 gimple_set_location (call, loc);
5664 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5665
5666 /* Add test and split block. */
5667 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5668 stmt = gimple_build_cond_empty (expr);
5669 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5670 split = split_block (body_bb, stmt);
5671 elem_body_bb = split->dest;
5672 if (cont_bb == body_bb)
5673 cont_bb = elem_body_bb;
5674 body_bb = split->src;
5675
5676 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5677
5678 /* Initialize the user's loop vars. */
5679 gsi = gsi_start_bb (elem_body_bb);
5680 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5681 }
629b3d75
MJ
5682 }
5683
5684 /* Loop increment goes into cont_bb. If this is not a loop, we
5685 will have spawned threads as if it was, and each one will
5686 execute one iteration. The specification is not explicit about
5687 whether such constructs are ill-formed or not, and they can
5688 occur, especially when noreturn routines are involved. */
5689 if (cont_bb)
5690 {
5691 gsi = gsi_last_bb (cont_bb);
5692 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5693 loc = gimple_location (cont_stmt);
5694
02889d23
CLT
5695 if (fd->tiling)
5696 {
5697 /* Insert element loop increment and test. */
5698 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5699 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5700 true, GSI_SAME_STMT);
5701 ass = gimple_build_assign (e_offset, expr);
5702 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5703 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5704
5705 stmt = gimple_build_cond_empty (expr);
5706 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5707 split = split_block (cont_bb, stmt);
5708 elem_cont_bb = split->src;
5709 cont_bb = split->dest;
5710
5711 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
5712 split->probability = profile_probability::unlikely ().guessed ();
5713 edge latch_edge
5714 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5715 latch_edge->probability = profile_probability::likely ().guessed ();
5716
5717 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5718 skip_edge->probability = profile_probability::unlikely ().guessed ();
5719 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5720 loop_entry_edge->probability
5721 = profile_probability::likely ().guessed ();
02889d23
CLT
5722
5723 gsi = gsi_for_stmt (cont_stmt);
5724 }
5725
629b3d75
MJ
5726 /* Increment offset. */
5727 if (gimple_in_ssa_p (cfun))
02889d23
CLT
5728 expr = build2 (plus_code, iter_type, offset,
5729 fold_convert (plus_type, step));
629b3d75
MJ
5730 else
5731 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5732 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5733 true, GSI_SAME_STMT);
5734 ass = gimple_build_assign (offset_incr, expr);
5735 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5736 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5737 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5738
5739 /* Remove the GIMPLE_OMP_CONTINUE. */
5740 gsi_remove (&gsi, true);
5741
01914336 5742 /* Fixup edges from cont_bb. */
629b3d75
MJ
5743 be = BRANCH_EDGE (cont_bb);
5744 fte = FALLTHRU_EDGE (cont_bb);
5745 be->flags |= EDGE_TRUE_VALUE;
5746 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5747
5748 if (chunking)
5749 {
5750 /* Split the beginning of exit_bb to make bottom_bb. We
5751 need to insert a nop at the start, because splitting is
01914336 5752 after a stmt, not before. */
629b3d75
MJ
5753 gsi = gsi_start_bb (exit_bb);
5754 stmt = gimple_build_nop ();
5755 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5756 split = split_block (exit_bb, stmt);
5757 bottom_bb = split->src;
5758 exit_bb = split->dest;
5759 gsi = gsi_last_bb (bottom_bb);
5760
5761 /* Chunk increment and test goes into bottom_bb. */
5762 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5763 build_int_cst (diff_type, 1));
5764 ass = gimple_build_assign (chunk_no, expr);
5765 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5766
5767 /* Chunk test at end of bottom_bb. */
5768 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5769 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5770 GSI_CONTINUE_LINKING);
5771
01914336 5772 /* Fixup edges from bottom_bb. */
629b3d75 5773 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
5774 split->probability = profile_probability::unlikely ().guessed ();
5775 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5776 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
5777 }
5778 }
5779
5780 gsi = gsi_last_bb (exit_bb);
5781 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5782 loc = gimple_location (gsi_stmt (gsi));
5783
5784 if (!gimple_in_ssa_p (cfun))
5785 {
5786 /* Insert the final value of V, in case it is live. This is the
5787 value for the only thread that survives past the join. */
5788 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5789 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5790 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5791 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5792 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5793 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5794 true, GSI_SAME_STMT);
5795 ass = gimple_build_assign (v, expr);
5796 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5797 }
5798
01914336 5799 /* Remove the OMP_RETURN. */
629b3d75
MJ
5800 gsi_remove (&gsi, true);
5801
5802 if (cont_bb)
5803 {
02889d23 5804 /* We now have one, two or three nested loops. Update the loop
629b3d75
MJ
5805 structures. */
5806 struct loop *parent = entry_bb->loop_father;
5807 struct loop *body = body_bb->loop_father;
5808
5809 if (chunking)
5810 {
5811 struct loop *chunk_loop = alloc_loop ();
5812 chunk_loop->header = head_bb;
5813 chunk_loop->latch = bottom_bb;
5814 add_loop (chunk_loop, parent);
5815 parent = chunk_loop;
5816 }
5817 else if (parent != body)
5818 {
5819 gcc_assert (body->header == body_bb);
5820 gcc_assert (body->latch == cont_bb
5821 || single_pred (body->latch) == cont_bb);
5822 parent = NULL;
5823 }
5824
5825 if (parent)
5826 {
5827 struct loop *body_loop = alloc_loop ();
5828 body_loop->header = body_bb;
5829 body_loop->latch = cont_bb;
5830 add_loop (body_loop, parent);
02889d23
CLT
5831
5832 if (fd->tiling)
5833 {
5834 /* Insert tiling's element loop. */
5835 struct loop *inner_loop = alloc_loop ();
5836 inner_loop->header = elem_body_bb;
5837 inner_loop->latch = elem_cont_bb;
5838 add_loop (inner_loop, body_loop);
5839 }
629b3d75
MJ
5840 }
5841 }
5842}
5843
5844/* Expand the OMP loop defined by REGION. */
5845
5846static void
5847expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5848{
5849 struct omp_for_data fd;
5850 struct omp_for_data_loop *loops;
5851
5852 loops
5853 = (struct omp_for_data_loop *)
5854 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5855 * sizeof (struct omp_for_data_loop));
5856 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5857 &fd, loops);
5858 region->sched_kind = fd.sched_kind;
5859 region->sched_modifiers = fd.sched_modifiers;
5860
5861 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5862 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5863 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5864 if (region->cont)
5865 {
5866 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5867 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5868 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5869 }
5870 else
5871 /* If there isn't a continue then this is a degerate case where
5872 the introduction of abnormal edges during lowering will prevent
5873 original loops from being detected. Fix that up. */
5874 loops_state_set (LOOPS_NEED_FIXUP);
5875
5876 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5877 expand_omp_simd (region, &fd);
5878 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5879 expand_cilk_for (region, &fd);
5880 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5881 {
5882 gcc_assert (!inner_stmt);
5883 expand_oacc_for (region, &fd);
5884 }
5885 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5886 {
5887 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5888 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5889 else
5890 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5891 }
5892 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5893 && !fd.have_ordered)
5894 {
5895 if (fd.chunk_size == NULL)
5896 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5897 else
5898 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5899 }
5900 else
5901 {
5902 int fn_index, start_ix, next_ix;
5903
5904 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5905 == GF_OMP_FOR_KIND_FOR);
5906 if (fd.chunk_size == NULL
5907 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5908 fd.chunk_size = integer_zero_node;
5909 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5910 switch (fd.sched_kind)
5911 {
5912 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5913 fn_index = 3;
5914 break;
5915 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5916 case OMP_CLAUSE_SCHEDULE_GUIDED:
5917 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5918 && !fd.ordered
5919 && !fd.have_ordered)
5920 {
5921 fn_index = 3 + fd.sched_kind;
5922 break;
5923 }
5924 /* FALLTHRU */
5925 default:
5926 fn_index = fd.sched_kind;
5927 break;
5928 }
5929 if (!fd.ordered)
5930 fn_index += fd.have_ordered * 6;
5931 if (fd.ordered)
5932 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5933 else
5934 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5935 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5936 if (fd.iter_type == long_long_unsigned_type_node)
5937 {
5938 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5939 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5940 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5941 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5942 }
5943 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5944 (enum built_in_function) next_ix, inner_stmt);
5945 }
5946
5947 if (gimple_in_ssa_p (cfun))
5948 update_ssa (TODO_update_ssa_only_virtuals);
5949}
5950
5951/* Expand code for an OpenMP sections directive. In pseudo code, we generate
5952
5953 v = GOMP_sections_start (n);
5954 L0:
5955 switch (v)
5956 {
5957 case 0:
5958 goto L2;
5959 case 1:
5960 section 1;
5961 goto L1;
5962 case 2:
5963 ...
5964 case n:
5965 ...
5966 default:
5967 abort ();
5968 }
5969 L1:
5970 v = GOMP_sections_next ();
5971 goto L0;
5972 L2:
5973 reduction;
5974
5975 If this is a combined parallel sections, replace the call to
5976 GOMP_sections_start with call to GOMP_sections_next. */
5977
5978static void
5979expand_omp_sections (struct omp_region *region)
5980{
5981 tree t, u, vin = NULL, vmain, vnext, l2;
5982 unsigned len;
5983 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5984 gimple_stmt_iterator si, switch_si;
5985 gomp_sections *sections_stmt;
5986 gimple *stmt;
5987 gomp_continue *cont;
5988 edge_iterator ei;
5989 edge e;
5990 struct omp_region *inner;
5991 unsigned i, casei;
5992 bool exit_reachable = region->cont != NULL;
5993
5994 gcc_assert (region->exit != NULL);
5995 entry_bb = region->entry;
5996 l0_bb = single_succ (entry_bb);
5997 l1_bb = region->cont;
5998 l2_bb = region->exit;
5999 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6000 l2 = gimple_block_label (l2_bb);
6001 else
6002 {
6003 /* This can happen if there are reductions. */
6004 len = EDGE_COUNT (l0_bb->succs);
6005 gcc_assert (len > 0);
6006 e = EDGE_SUCC (l0_bb, len - 1);
6007 si = gsi_last_bb (e->dest);
6008 l2 = NULL_TREE;
6009 if (gsi_end_p (si)
01914336 6010 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
6011 l2 = gimple_block_label (e->dest);
6012 else
6013 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6014 {
6015 si = gsi_last_bb (e->dest);
6016 if (gsi_end_p (si)
6017 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6018 {
6019 l2 = gimple_block_label (e->dest);
6020 break;
6021 }
6022 }
6023 }
6024 if (exit_reachable)
6025 default_bb = create_empty_bb (l1_bb->prev_bb);
6026 else
6027 default_bb = create_empty_bb (l0_bb);
6028
6029 /* We will build a switch() with enough cases for all the
6030 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6031 and a default case to abort if something goes wrong. */
6032 len = EDGE_COUNT (l0_bb->succs);
6033
6034 /* Use vec::quick_push on label_vec throughout, since we know the size
6035 in advance. */
6036 auto_vec<tree> label_vec (len);
6037
6038 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6039 GIMPLE_OMP_SECTIONS statement. */
6040 si = gsi_last_bb (entry_bb);
6041 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6042 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6043 vin = gimple_omp_sections_control (sections_stmt);
6044 if (!is_combined_parallel (region))
6045 {
6046 /* If we are not inside a combined parallel+sections region,
6047 call GOMP_sections_start. */
6048 t = build_int_cst (unsigned_type_node, len - 1);
6049 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6050 stmt = gimple_build_call (u, 1, t);
6051 }
6052 else
6053 {
6054 /* Otherwise, call GOMP_sections_next. */
6055 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6056 stmt = gimple_build_call (u, 0);
6057 }
6058 gimple_call_set_lhs (stmt, vin);
6059 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6060 gsi_remove (&si, true);
6061
6062 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6063 L0_BB. */
6064 switch_si = gsi_last_bb (l0_bb);
6065 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6066 if (exit_reachable)
6067 {
6068 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6069 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6070 vmain = gimple_omp_continue_control_use (cont);
6071 vnext = gimple_omp_continue_control_def (cont);
6072 }
6073 else
6074 {
6075 vmain = vin;
6076 vnext = NULL_TREE;
6077 }
6078
6079 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6080 label_vec.quick_push (t);
6081 i = 1;
6082
6083 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6084 for (inner = region->inner, casei = 1;
6085 inner;
6086 inner = inner->next, i++, casei++)
6087 {
6088 basic_block s_entry_bb, s_exit_bb;
6089
6090 /* Skip optional reduction region. */
6091 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6092 {
6093 --i;
6094 --casei;
6095 continue;
6096 }
6097
6098 s_entry_bb = inner->entry;
6099 s_exit_bb = inner->exit;
6100
6101 t = gimple_block_label (s_entry_bb);
6102 u = build_int_cst (unsigned_type_node, casei);
6103 u = build_case_label (u, NULL, t);
6104 label_vec.quick_push (u);
6105
6106 si = gsi_last_bb (s_entry_bb);
6107 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6108 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6109 gsi_remove (&si, true);
6110 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6111
6112 if (s_exit_bb == NULL)
6113 continue;
6114
6115 si = gsi_last_bb (s_exit_bb);
6116 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6117 gsi_remove (&si, true);
6118
6119 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6120 }
6121
6122 /* Error handling code goes in DEFAULT_BB. */
6123 t = gimple_block_label (default_bb);
6124 u = build_case_label (NULL, NULL, t);
6125 make_edge (l0_bb, default_bb, 0);
6126 add_bb_to_loop (default_bb, current_loops->tree_root);
6127
6128 stmt = gimple_build_switch (vmain, u, label_vec);
6129 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6130 gsi_remove (&switch_si, true);
6131
6132 si = gsi_start_bb (default_bb);
6133 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6134 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6135
6136 if (exit_reachable)
6137 {
6138 tree bfn_decl;
6139
6140 /* Code to get the next section goes in L1_BB. */
6141 si = gsi_last_bb (l1_bb);
6142 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6143
6144 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6145 stmt = gimple_build_call (bfn_decl, 0);
6146 gimple_call_set_lhs (stmt, vnext);
6147 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6148 gsi_remove (&si, true);
6149
6150 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6151 }
6152
6153 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6154 si = gsi_last_bb (l2_bb);
6155 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6156 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6157 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6158 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6159 else
6160 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6161 stmt = gimple_build_call (t, 0);
6162 if (gimple_omp_return_lhs (gsi_stmt (si)))
6163 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6164 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6165 gsi_remove (&si, true);
6166
6167 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6168}
6169
6170/* Expand code for an OpenMP single directive. We've already expanded
6171 much of the code, here we simply place the GOMP_barrier call. */
6172
6173static void
6174expand_omp_single (struct omp_region *region)
6175{
6176 basic_block entry_bb, exit_bb;
6177 gimple_stmt_iterator si;
6178
6179 entry_bb = region->entry;
6180 exit_bb = region->exit;
6181
6182 si = gsi_last_bb (entry_bb);
6183 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6184 gsi_remove (&si, true);
6185 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6186
6187 si = gsi_last_bb (exit_bb);
6188 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6189 {
6190 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6191 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6192 }
6193 gsi_remove (&si, true);
6194 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6195}
6196
6197/* Generic expansion for OpenMP synchronization directives: master,
6198 ordered and critical. All we need to do here is remove the entry
6199 and exit markers for REGION. */
6200
6201static void
6202expand_omp_synch (struct omp_region *region)
6203{
6204 basic_block entry_bb, exit_bb;
6205 gimple_stmt_iterator si;
6206
6207 entry_bb = region->entry;
6208 exit_bb = region->exit;
6209
6210 si = gsi_last_bb (entry_bb);
6211 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6212 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6213 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6214 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6215 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6216 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6217 gsi_remove (&si, true);
6218 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6219
6220 if (exit_bb)
6221 {
6222 si = gsi_last_bb (exit_bb);
6223 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6224 gsi_remove (&si, true);
6225 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6226 }
6227}
6228
6229/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6230 operation as a normal volatile load. */
6231
6232static bool
6233expand_omp_atomic_load (basic_block load_bb, tree addr,
6234 tree loaded_val, int index)
6235{
6236 enum built_in_function tmpbase;
6237 gimple_stmt_iterator gsi;
6238 basic_block store_bb;
6239 location_t loc;
6240 gimple *stmt;
6241 tree decl, call, type, itype;
6242
6243 gsi = gsi_last_bb (load_bb);
6244 stmt = gsi_stmt (gsi);
6245 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6246 loc = gimple_location (stmt);
6247
6248 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6249 is smaller than word size, then expand_atomic_load assumes that the load
6250 is atomic. We could avoid the builtin entirely in this case. */
6251
6252 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6253 decl = builtin_decl_explicit (tmpbase);
6254 if (decl == NULL_TREE)
6255 return false;
6256
6257 type = TREE_TYPE (loaded_val);
6258 itype = TREE_TYPE (TREE_TYPE (decl));
6259
6260 call = build_call_expr_loc (loc, decl, 2, addr,
6261 build_int_cst (NULL,
6262 gimple_omp_atomic_seq_cst_p (stmt)
6263 ? MEMMODEL_SEQ_CST
6264 : MEMMODEL_RELAXED));
6265 if (!useless_type_conversion_p (type, itype))
6266 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6267 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6268
6269 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6270 gsi_remove (&gsi, true);
6271
6272 store_bb = single_succ (load_bb);
6273 gsi = gsi_last_bb (store_bb);
6274 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6275 gsi_remove (&gsi, true);
6276
6277 if (gimple_in_ssa_p (cfun))
6278 update_ssa (TODO_update_ssa_no_phi);
6279
6280 return true;
6281}
6282
6283/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6284 operation as a normal volatile store. */
6285
6286static bool
6287expand_omp_atomic_store (basic_block load_bb, tree addr,
6288 tree loaded_val, tree stored_val, int index)
6289{
6290 enum built_in_function tmpbase;
6291 gimple_stmt_iterator gsi;
6292 basic_block store_bb = single_succ (load_bb);
6293 location_t loc;
6294 gimple *stmt;
6295 tree decl, call, type, itype;
6296 machine_mode imode;
6297 bool exchange;
6298
6299 gsi = gsi_last_bb (load_bb);
6300 stmt = gsi_stmt (gsi);
6301 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6302
6303 /* If the load value is needed, then this isn't a store but an exchange. */
6304 exchange = gimple_omp_atomic_need_value_p (stmt);
6305
6306 gsi = gsi_last_bb (store_bb);
6307 stmt = gsi_stmt (gsi);
6308 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6309 loc = gimple_location (stmt);
6310
6311 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6312 is smaller than word size, then expand_atomic_store assumes that the store
6313 is atomic. We could avoid the builtin entirely in this case. */
6314
6315 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6316 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6317 decl = builtin_decl_explicit (tmpbase);
6318 if (decl == NULL_TREE)
6319 return false;
6320
6321 type = TREE_TYPE (stored_val);
6322
6323 /* Dig out the type of the function's second argument. */
6324 itype = TREE_TYPE (decl);
6325 itype = TYPE_ARG_TYPES (itype);
6326 itype = TREE_CHAIN (itype);
6327 itype = TREE_VALUE (itype);
6328 imode = TYPE_MODE (itype);
6329
6330 if (exchange && !can_atomic_exchange_p (imode, true))
6331 return false;
6332
6333 if (!useless_type_conversion_p (itype, type))
6334 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6335 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6336 build_int_cst (NULL,
6337 gimple_omp_atomic_seq_cst_p (stmt)
6338 ? MEMMODEL_SEQ_CST
6339 : MEMMODEL_RELAXED));
6340 if (exchange)
6341 {
6342 if (!useless_type_conversion_p (type, itype))
6343 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6344 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6345 }
6346
6347 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6348 gsi_remove (&gsi, true);
6349
6350 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6351 gsi = gsi_last_bb (load_bb);
6352 gsi_remove (&gsi, true);
6353
6354 if (gimple_in_ssa_p (cfun))
6355 update_ssa (TODO_update_ssa_no_phi);
6356
6357 return true;
6358}
6359
6360/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6361 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6362 size of the data type, and thus usable to find the index of the builtin
6363 decl. Returns false if the expression is not of the proper form. */
6364
6365static bool
6366expand_omp_atomic_fetch_op (basic_block load_bb,
6367 tree addr, tree loaded_val,
6368 tree stored_val, int index)
6369{
6370 enum built_in_function oldbase, newbase, tmpbase;
6371 tree decl, itype, call;
6372 tree lhs, rhs;
6373 basic_block store_bb = single_succ (load_bb);
6374 gimple_stmt_iterator gsi;
6375 gimple *stmt;
6376 location_t loc;
6377 enum tree_code code;
6378 bool need_old, need_new;
6379 machine_mode imode;
6380 bool seq_cst;
6381
6382 /* We expect to find the following sequences:
6383
6384 load_bb:
6385 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6386
6387 store_bb:
6388 val = tmp OP something; (or: something OP tmp)
6389 GIMPLE_OMP_STORE (val)
6390
6391 ???FIXME: Allow a more flexible sequence.
6392 Perhaps use data flow to pick the statements.
6393
6394 */
6395
6396 gsi = gsi_after_labels (store_bb);
6397 stmt = gsi_stmt (gsi);
6398 loc = gimple_location (stmt);
6399 if (!is_gimple_assign (stmt))
6400 return false;
6401 gsi_next (&gsi);
6402 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6403 return false;
6404 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6405 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6406 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6407 gcc_checking_assert (!need_old || !need_new);
6408
6409 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6410 return false;
6411
6412 /* Check for one of the supported fetch-op operations. */
6413 code = gimple_assign_rhs_code (stmt);
6414 switch (code)
6415 {
6416 case PLUS_EXPR:
6417 case POINTER_PLUS_EXPR:
6418 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6419 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6420 break;
6421 case MINUS_EXPR:
6422 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6423 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6424 break;
6425 case BIT_AND_EXPR:
6426 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6427 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6428 break;
6429 case BIT_IOR_EXPR:
6430 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6431 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6432 break;
6433 case BIT_XOR_EXPR:
6434 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6435 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6436 break;
6437 default:
6438 return false;
6439 }
6440
6441 /* Make sure the expression is of the proper form. */
6442 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6443 rhs = gimple_assign_rhs2 (stmt);
6444 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6445 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6446 rhs = gimple_assign_rhs1 (stmt);
6447 else
6448 return false;
6449
6450 tmpbase = ((enum built_in_function)
6451 ((need_new ? newbase : oldbase) + index + 1));
6452 decl = builtin_decl_explicit (tmpbase);
6453 if (decl == NULL_TREE)
6454 return false;
6455 itype = TREE_TYPE (TREE_TYPE (decl));
6456 imode = TYPE_MODE (itype);
6457
6458 /* We could test all of the various optabs involved, but the fact of the
6459 matter is that (with the exception of i486 vs i586 and xadd) all targets
6460 that support any atomic operaton optab also implements compare-and-swap.
6461 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 6462 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
6463 return false;
6464
6465 gsi = gsi_last_bb (load_bb);
6466 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6467
6468 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6469 It only requires that the operation happen atomically. Thus we can
6470 use the RELAXED memory model. */
6471 call = build_call_expr_loc (loc, decl, 3, addr,
6472 fold_convert_loc (loc, itype, rhs),
6473 build_int_cst (NULL,
6474 seq_cst ? MEMMODEL_SEQ_CST
6475 : MEMMODEL_RELAXED));
6476
6477 if (need_old || need_new)
6478 {
6479 lhs = need_old ? loaded_val : stored_val;
6480 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6481 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6482 }
6483 else
6484 call = fold_convert_loc (loc, void_type_node, call);
6485 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6486 gsi_remove (&gsi, true);
6487
6488 gsi = gsi_last_bb (store_bb);
6489 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6490 gsi_remove (&gsi, true);
6491 gsi = gsi_last_bb (store_bb);
6492 stmt = gsi_stmt (gsi);
6493 gsi_remove (&gsi, true);
6494
6495 if (gimple_in_ssa_p (cfun))
6496 {
6497 release_defs (stmt);
6498 update_ssa (TODO_update_ssa_no_phi);
6499 }
6500
6501 return true;
6502}
6503
6504/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6505
6506 oldval = *addr;
6507 repeat:
01914336 6508 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
6509 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6510 if (oldval != newval)
6511 goto repeat;
6512
6513 INDEX is log2 of the size of the data type, and thus usable to find the
6514 index of the builtin decl. */
6515
6516static bool
6517expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6518 tree addr, tree loaded_val, tree stored_val,
6519 int index)
6520{
6521 tree loadedi, storedi, initial, new_storedi, old_vali;
6522 tree type, itype, cmpxchg, iaddr;
6523 gimple_stmt_iterator si;
6524 basic_block loop_header = single_succ (load_bb);
6525 gimple *phi, *stmt;
6526 edge e;
6527 enum built_in_function fncode;
6528
6529 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6530 order to use the RELAXED memory model effectively. */
6531 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6532 + index + 1);
6533 cmpxchg = builtin_decl_explicit (fncode);
6534 if (cmpxchg == NULL_TREE)
6535 return false;
6536 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6537 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6538
dc06356a
JJ
6539 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6540 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
6541 return false;
6542
6543 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6544 si = gsi_last_bb (load_bb);
6545 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6546
6547 /* For floating-point values, we'll need to view-convert them to integers
6548 so that we can perform the atomic compare and swap. Simplify the
6549 following code by always setting up the "i"ntegral variables. */
6550 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6551 {
6552 tree iaddr_val;
6553
6554 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6555 true));
6556 iaddr_val
6557 = force_gimple_operand_gsi (&si,
6558 fold_convert (TREE_TYPE (iaddr), addr),
6559 false, NULL_TREE, true, GSI_SAME_STMT);
6560 stmt = gimple_build_assign (iaddr, iaddr_val);
6561 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6562 loadedi = create_tmp_var (itype);
6563 if (gimple_in_ssa_p (cfun))
6564 loadedi = make_ssa_name (loadedi);
6565 }
6566 else
6567 {
6568 iaddr = addr;
6569 loadedi = loaded_val;
6570 }
6571
6572 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6573 tree loaddecl = builtin_decl_explicit (fncode);
6574 if (loaddecl)
6575 initial
6576 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6577 build_call_expr (loaddecl, 2, iaddr,
6578 build_int_cst (NULL_TREE,
6579 MEMMODEL_RELAXED)));
6580 else
6581 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6582 build_int_cst (TREE_TYPE (iaddr), 0));
6583
6584 initial
6585 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6586 GSI_SAME_STMT);
6587
6588 /* Move the value to the LOADEDI temporary. */
6589 if (gimple_in_ssa_p (cfun))
6590 {
6591 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6592 phi = create_phi_node (loadedi, loop_header);
6593 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6594 initial);
6595 }
6596 else
6597 gsi_insert_before (&si,
6598 gimple_build_assign (loadedi, initial),
6599 GSI_SAME_STMT);
6600 if (loadedi != loaded_val)
6601 {
6602 gimple_stmt_iterator gsi2;
6603 tree x;
6604
6605 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6606 gsi2 = gsi_start_bb (loop_header);
6607 if (gimple_in_ssa_p (cfun))
6608 {
6609 gassign *stmt;
6610 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6611 true, GSI_SAME_STMT);
6612 stmt = gimple_build_assign (loaded_val, x);
6613 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6614 }
6615 else
6616 {
6617 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6618 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6619 true, GSI_SAME_STMT);
6620 }
6621 }
6622 gsi_remove (&si, true);
6623
6624 si = gsi_last_bb (store_bb);
6625 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6626
6627 if (iaddr == addr)
6628 storedi = stored_val;
6629 else
01914336
MJ
6630 storedi
6631 = force_gimple_operand_gsi (&si,
6632 build1 (VIEW_CONVERT_EXPR, itype,
6633 stored_val), true, NULL_TREE, true,
6634 GSI_SAME_STMT);
629b3d75
MJ
6635
6636 /* Build the compare&swap statement. */
6637 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6638 new_storedi = force_gimple_operand_gsi (&si,
6639 fold_convert (TREE_TYPE (loadedi),
6640 new_storedi),
6641 true, NULL_TREE,
6642 true, GSI_SAME_STMT);
6643
6644 if (gimple_in_ssa_p (cfun))
6645 old_vali = loadedi;
6646 else
6647 {
6648 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6649 stmt = gimple_build_assign (old_vali, loadedi);
6650 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6651
6652 stmt = gimple_build_assign (loadedi, new_storedi);
6653 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6654 }
6655
6656 /* Note that we always perform the comparison as an integer, even for
6657 floating point. This allows the atomic operation to properly
6658 succeed even with NaNs and -0.0. */
01914336
MJ
6659 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6660 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
6661 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6662
6663 /* Update cfg. */
6664 e = single_succ_edge (store_bb);
6665 e->flags &= ~EDGE_FALLTHRU;
6666 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
6667 /* Expect no looping. */
6668 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
6669
6670 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 6671 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
6672
6673 /* Copy the new value to loadedi (we already did that before the condition
6674 if we are not in SSA). */
6675 if (gimple_in_ssa_p (cfun))
6676 {
6677 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6678 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6679 }
6680
6681 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6682 gsi_remove (&si, true);
6683
6684 struct loop *loop = alloc_loop ();
6685 loop->header = loop_header;
6686 loop->latch = store_bb;
6687 add_loop (loop, loop_header->loop_father);
6688
6689 if (gimple_in_ssa_p (cfun))
6690 update_ssa (TODO_update_ssa_no_phi);
6691
6692 return true;
6693}
6694
6695/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6696
01914336
MJ
6697 GOMP_atomic_start ();
6698 *addr = rhs;
6699 GOMP_atomic_end ();
629b3d75
MJ
6700
6701 The result is not globally atomic, but works so long as all parallel
6702 references are within #pragma omp atomic directives. According to
6703 responses received from omp@openmp.org, appears to be within spec.
6704 Which makes sense, since that's how several other compilers handle
6705 this situation as well.
6706 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6707 expanding. STORED_VAL is the operand of the matching
6708 GIMPLE_OMP_ATOMIC_STORE.
6709
6710 We replace
6711 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6712 loaded_val = *addr;
6713
6714 and replace
6715 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6716 *addr = stored_val;
6717*/
6718
6719static bool
6720expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6721 tree addr, tree loaded_val, tree stored_val)
6722{
6723 gimple_stmt_iterator si;
6724 gassign *stmt;
6725 tree t;
6726
6727 si = gsi_last_bb (load_bb);
6728 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6729
6730 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6731 t = build_call_expr (t, 0);
6732 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6733
6734 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6735 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6736 gsi_remove (&si, true);
6737
6738 si = gsi_last_bb (store_bb);
6739 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6740
6741 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6742 stored_val);
6743 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6744
6745 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6746 t = build_call_expr (t, 0);
6747 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6748 gsi_remove (&si, true);
6749
6750 if (gimple_in_ssa_p (cfun))
6751 update_ssa (TODO_update_ssa_no_phi);
6752 return true;
6753}
6754
6755/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 6756 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
6757 call expand_omp_atomic_pipeline, and if it fails too, the
6758 ultimate fallback is wrapping the operation in a mutex
6759 (expand_omp_atomic_mutex). REGION is the atomic region built
6760 by build_omp_regions_1(). */
6761
6762static void
6763expand_omp_atomic (struct omp_region *region)
6764{
6765 basic_block load_bb = region->entry, store_bb = region->exit;
6766 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6767 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6768 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6769 tree addr = gimple_omp_atomic_load_rhs (load);
6770 tree stored_val = gimple_omp_atomic_store_val (store);
6771 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6772 HOST_WIDE_INT index;
6773
6774 /* Make sure the type is one of the supported sizes. */
6775 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6776 index = exact_log2 (index);
6777 if (index >= 0 && index <= 4)
6778 {
6779 unsigned int align = TYPE_ALIGN_UNIT (type);
6780
6781 /* __sync builtins require strict data alignment. */
6782 if (exact_log2 (align) >= index)
6783 {
6784 /* Atomic load. */
3bd8f481 6785 scalar_mode smode;
629b3d75 6786 if (loaded_val == stored_val
3bd8f481
RS
6787 && (is_int_mode (TYPE_MODE (type), &smode)
6788 || is_float_mode (TYPE_MODE (type), &smode))
6789 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
6790 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6791 return;
6792
6793 /* Atomic store. */
3bd8f481
RS
6794 if ((is_int_mode (TYPE_MODE (type), &smode)
6795 || is_float_mode (TYPE_MODE (type), &smode))
6796 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
6797 && store_bb == single_succ (load_bb)
6798 && first_stmt (store_bb) == store
6799 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6800 stored_val, index))
6801 return;
6802
6803 /* When possible, use specialized atomic update functions. */
6804 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6805 && store_bb == single_succ (load_bb)
6806 && expand_omp_atomic_fetch_op (load_bb, addr,
6807 loaded_val, stored_val, index))
6808 return;
6809
6810 /* If we don't have specialized __sync builtins, try and implement
6811 as a compare and swap loop. */
6812 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6813 loaded_val, stored_val, index))
6814 return;
6815 }
6816 }
6817
6818 /* The ultimate fallback is wrapping the operation in a mutex. */
6819 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6820}
6821
6822/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6823 at REGION_EXIT. */
6824
6825static void
6826mark_loops_in_oacc_kernels_region (basic_block region_entry,
6827 basic_block region_exit)
6828{
6829 struct loop *outer = region_entry->loop_father;
6830 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6831
6832 /* Don't parallelize the kernels region if it contains more than one outer
6833 loop. */
6834 unsigned int nr_outer_loops = 0;
6835 struct loop *single_outer = NULL;
6836 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6837 {
6838 gcc_assert (loop_outer (loop) == outer);
6839
6840 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6841 continue;
6842
6843 if (region_exit != NULL
6844 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6845 continue;
6846
6847 nr_outer_loops++;
6848 single_outer = loop;
6849 }
6850 if (nr_outer_loops != 1)
6851 return;
6852
01914336
MJ
6853 for (struct loop *loop = single_outer->inner;
6854 loop != NULL;
6855 loop = loop->inner)
629b3d75
MJ
6856 if (loop->next)
6857 return;
6858
6859 /* Mark the loops in the region. */
6860 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6861 loop->in_oacc_kernels_region = true;
6862}
6863
6864/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6865
6866struct GTY(()) grid_launch_attributes_trees
6867{
6868 tree kernel_dim_array_type;
6869 tree kernel_lattrs_dimnum_decl;
6870 tree kernel_lattrs_grid_decl;
6871 tree kernel_lattrs_group_decl;
6872 tree kernel_launch_attributes_type;
6873};
6874
6875static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6876
6877/* Create types used to pass kernel launch attributes to target. */
6878
6879static void
6880grid_create_kernel_launch_attr_types (void)
6881{
6882 if (grid_attr_trees)
6883 return;
6884 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6885
6886 tree dim_arr_index_type
6887 = build_index_type (build_int_cst (integer_type_node, 2));
6888 grid_attr_trees->kernel_dim_array_type
6889 = build_array_type (uint32_type_node, dim_arr_index_type);
6890
6891 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6892 grid_attr_trees->kernel_lattrs_dimnum_decl
6893 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6894 uint32_type_node);
6895 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6896
6897 grid_attr_trees->kernel_lattrs_grid_decl
6898 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6899 grid_attr_trees->kernel_dim_array_type);
6900 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6901 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6902 grid_attr_trees->kernel_lattrs_group_decl
6903 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6904 grid_attr_trees->kernel_dim_array_type);
6905 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6906 = grid_attr_trees->kernel_lattrs_grid_decl;
6907 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6908 "__gomp_kernel_launch_attributes",
6909 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6910}
6911
6912/* Insert before the current statement in GSI a store of VALUE to INDEX of
6913 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6914 of type uint32_type_node. */
6915
6916static void
6917grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6918 tree fld_decl, int index, tree value)
6919{
6920 tree ref = build4 (ARRAY_REF, uint32_type_node,
6921 build3 (COMPONENT_REF,
6922 grid_attr_trees->kernel_dim_array_type,
6923 range_var, fld_decl, NULL_TREE),
6924 build_int_cst (integer_type_node, index),
6925 NULL_TREE, NULL_TREE);
6926 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6927}
6928
6929/* Return a tree representation of a pointer to a structure with grid and
6930 work-group size information. Statements filling that information will be
6931 inserted before GSI, TGT_STMT is the target statement which has the
6932 necessary information in it. */
6933
6934static tree
6935grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6936 gomp_target *tgt_stmt)
6937{
6938 grid_create_kernel_launch_attr_types ();
6939 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6940 "__kernel_launch_attrs");
6941
6942 unsigned max_dim = 0;
6943 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6944 clause;
6945 clause = OMP_CLAUSE_CHAIN (clause))
6946 {
6947 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6948 continue;
6949
6950 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6951 max_dim = MAX (dim, max_dim);
6952
6953 grid_insert_store_range_dim (gsi, lattrs,
6954 grid_attr_trees->kernel_lattrs_grid_decl,
6955 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6956 grid_insert_store_range_dim (gsi, lattrs,
6957 grid_attr_trees->kernel_lattrs_group_decl,
6958 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6959 }
6960
6961 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6962 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6963 gcc_checking_assert (max_dim <= 2);
6964 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6965 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6966 GSI_SAME_STMT);
6967 TREE_ADDRESSABLE (lattrs) = 1;
6968 return build_fold_addr_expr (lattrs);
6969}
6970
6971/* Build target argument identifier from the DEVICE identifier, value
6972 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6973
6974static tree
6975get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6976{
6977 tree t = build_int_cst (integer_type_node, device);
6978 if (subseqent_param)
6979 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6980 build_int_cst (integer_type_node,
6981 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6982 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6983 build_int_cst (integer_type_node, id));
6984 return t;
6985}
6986
6987/* Like above but return it in type that can be directly stored as an element
6988 of the argument array. */
6989
6990static tree
6991get_target_argument_identifier (int device, bool subseqent_param, int id)
6992{
6993 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6994 return fold_convert (ptr_type_node, t);
6995}
6996
6997/* Return a target argument consisting of DEVICE identifier, value identifier
6998 ID, and the actual VALUE. */
6999
7000static tree
7001get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7002 tree value)
7003{
7004 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7005 fold_convert (integer_type_node, value),
7006 build_int_cst (unsigned_type_node,
7007 GOMP_TARGET_ARG_VALUE_SHIFT));
7008 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7009 get_target_argument_identifier_1 (device, false, id));
7010 t = fold_convert (ptr_type_node, t);
7011 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7012}
7013
7014/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7015 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7016 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7017 arguments. */
7018
7019static void
7020push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7021 int id, tree value, vec <tree> *args)
7022{
7023 if (tree_fits_shwi_p (value)
7024 && tree_to_shwi (value) > -(1 << 15)
7025 && tree_to_shwi (value) < (1 << 15))
7026 args->quick_push (get_target_argument_value (gsi, device, id, value));
7027 else
7028 {
7029 args->quick_push (get_target_argument_identifier (device, true, id));
7030 value = fold_convert (ptr_type_node, value);
7031 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7032 GSI_SAME_STMT);
7033 args->quick_push (value);
7034 }
7035}
7036
01914336 7037/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
7038
7039static tree
7040get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7041{
7042 auto_vec <tree, 6> args;
7043 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7044 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7045 if (c)
7046 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7047 else
7048 t = integer_minus_one_node;
7049 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7050 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7051
7052 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7053 if (c)
7054 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7055 else
7056 t = integer_minus_one_node;
7057 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7058 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7059 &args);
7060
7061 /* Add HSA-specific grid sizes, if available. */
7062 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7063 OMP_CLAUSE__GRIDDIM_))
7064 {
01914336
MJ
7065 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7066 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
629b3d75
MJ
7067 args.quick_push (t);
7068 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7069 }
7070
7071 /* Produce more, perhaps device specific, arguments here. */
7072
7073 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7074 args.length () + 1),
7075 ".omp_target_args");
7076 for (unsigned i = 0; i < args.length (); i++)
7077 {
7078 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7079 build_int_cst (integer_type_node, i),
7080 NULL_TREE, NULL_TREE);
7081 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7082 GSI_SAME_STMT);
7083 }
7084 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7085 build_int_cst (integer_type_node, args.length ()),
7086 NULL_TREE, NULL_TREE);
7087 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7088 GSI_SAME_STMT);
7089 TREE_ADDRESSABLE (argarray) = 1;
7090 return build_fold_addr_expr (argarray);
7091}
7092
7093/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7094
7095static void
7096expand_omp_target (struct omp_region *region)
7097{
7098 basic_block entry_bb, exit_bb, new_bb;
7099 struct function *child_cfun;
7100 tree child_fn, block, t;
7101 gimple_stmt_iterator gsi;
7102 gomp_target *entry_stmt;
7103 gimple *stmt;
7104 edge e;
7105 bool offloaded, data_region;
7106
7107 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7108 new_bb = region->entry;
7109
7110 offloaded = is_gimple_omp_offloaded (entry_stmt);
7111 switch (gimple_omp_target_kind (entry_stmt))
7112 {
7113 case GF_OMP_TARGET_KIND_REGION:
7114 case GF_OMP_TARGET_KIND_UPDATE:
7115 case GF_OMP_TARGET_KIND_ENTER_DATA:
7116 case GF_OMP_TARGET_KIND_EXIT_DATA:
7117 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7118 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7119 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7120 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7121 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7122 data_region = false;
7123 break;
7124 case GF_OMP_TARGET_KIND_DATA:
7125 case GF_OMP_TARGET_KIND_OACC_DATA:
7126 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7127 data_region = true;
7128 break;
7129 default:
7130 gcc_unreachable ();
7131 }
7132
7133 child_fn = NULL_TREE;
7134 child_cfun = NULL;
7135 if (offloaded)
7136 {
7137 child_fn = gimple_omp_target_child_fn (entry_stmt);
7138 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7139 }
7140
7141 /* Supported by expand_omp_taskreg, but not here. */
7142 if (child_cfun != NULL)
7143 gcc_checking_assert (!child_cfun->cfg);
7144 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7145
7146 entry_bb = region->entry;
7147 exit_bb = region->exit;
7148
7149 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
25651634
TS
7150 {
7151 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7152
7153 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7154 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7155 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7156 DECL_ATTRIBUTES (child_fn)
7157 = tree_cons (get_identifier ("oacc kernels"),
7158 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7159 }
629b3d75
MJ
7160
7161 if (offloaded)
7162 {
7163 unsigned srcidx, dstidx, num;
7164
7165 /* If the offloading region needs data sent from the parent
7166 function, then the very first statement (except possible
7167 tree profile counter updates) of the offloading body
7168 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7169 &.OMP_DATA_O is passed as an argument to the child function,
7170 we need to replace it with the argument as seen by the child
7171 function.
7172
7173 In most cases, this will end up being the identity assignment
7174 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7175 a function call that has been inlined, the original PARM_DECL
7176 .OMP_DATA_I may have been converted into a different local
7177 variable. In which case, we need to keep the assignment. */
7178 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7179 if (data_arg)
7180 {
7181 basic_block entry_succ_bb = single_succ (entry_bb);
7182 gimple_stmt_iterator gsi;
7183 tree arg;
7184 gimple *tgtcopy_stmt = NULL;
7185 tree sender = TREE_VEC_ELT (data_arg, 0);
7186
7187 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7188 {
7189 gcc_assert (!gsi_end_p (gsi));
7190 stmt = gsi_stmt (gsi);
7191 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7192 continue;
7193
7194 if (gimple_num_ops (stmt) == 2)
7195 {
7196 tree arg = gimple_assign_rhs1 (stmt);
7197
7198 /* We're ignoring the subcode because we're
7199 effectively doing a STRIP_NOPS. */
7200
7201 if (TREE_CODE (arg) == ADDR_EXPR
7202 && TREE_OPERAND (arg, 0) == sender)
7203 {
7204 tgtcopy_stmt = stmt;
7205 break;
7206 }
7207 }
7208 }
7209
7210 gcc_assert (tgtcopy_stmt != NULL);
7211 arg = DECL_ARGUMENTS (child_fn);
7212
7213 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7214 gsi_remove (&gsi, true);
7215 }
7216
7217 /* Declare local variables needed in CHILD_CFUN. */
7218 block = DECL_INITIAL (child_fn);
7219 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7220 /* The gimplifier could record temporaries in the offloading block
7221 rather than in containing function's local_decls chain,
7222 which would mean cgraph missed finalizing them. Do it now. */
7223 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7224 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7225 varpool_node::finalize_decl (t);
7226 DECL_SAVED_TREE (child_fn) = NULL;
7227 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7228 gimple_set_body (child_fn, NULL);
7229 TREE_USED (block) = 1;
7230
7231 /* Reset DECL_CONTEXT on function arguments. */
7232 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7233 DECL_CONTEXT (t) = child_fn;
7234
7235 /* Split ENTRY_BB at GIMPLE_*,
7236 so that it can be moved to the child function. */
7237 gsi = gsi_last_bb (entry_bb);
7238 stmt = gsi_stmt (gsi);
7239 gcc_assert (stmt
7240 && gimple_code (stmt) == gimple_code (entry_stmt));
7241 e = split_block (entry_bb, stmt);
7242 gsi_remove (&gsi, true);
7243 entry_bb = e->dest;
7244 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7245
7246 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7247 if (exit_bb)
7248 {
7249 gsi = gsi_last_bb (exit_bb);
7250 gcc_assert (!gsi_end_p (gsi)
7251 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7252 stmt = gimple_build_return (NULL);
7253 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7254 gsi_remove (&gsi, true);
7255 }
7256
5c628c3e
RB
7257 /* Make sure to generate early debug for the function before
7258 outlining anything. */
7259 if (! gimple_in_ssa_p (cfun))
7260 (*debug_hooks->early_global_decl) (cfun->decl);
7261
629b3d75
MJ
7262 /* Move the offloading region into CHILD_CFUN. */
7263
7264 block = gimple_block (entry_stmt);
7265
7266 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7267 if (exit_bb)
7268 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7269 /* When the OMP expansion process cannot guarantee an up-to-date
7270 loop tree arrange for the child function to fixup loops. */
7271 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7272 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7273
7274 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7275 num = vec_safe_length (child_cfun->local_decls);
7276 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7277 {
7278 t = (*child_cfun->local_decls)[srcidx];
7279 if (DECL_CONTEXT (t) == cfun->decl)
7280 continue;
7281 if (srcidx != dstidx)
7282 (*child_cfun->local_decls)[dstidx] = t;
7283 dstidx++;
7284 }
7285 if (dstidx != num)
7286 vec_safe_truncate (child_cfun->local_decls, dstidx);
7287
7288 /* Inform the callgraph about the new function. */
7289 child_cfun->curr_properties = cfun->curr_properties;
7290 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7291 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7292 cgraph_node *node = cgraph_node::get_create (child_fn);
7293 node->parallelized_function = 1;
7294 cgraph_node::add_new_function (child_fn, true);
7295
7296 /* Add the new function to the offload table. */
7297 if (ENABLE_OFFLOADING)
7298 vec_safe_push (offload_funcs, child_fn);
7299
7300 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7301 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7302
7303 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7304 fixed in a following pass. */
7305 push_cfun (child_cfun);
7306 if (need_asm)
9579db35 7307 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
7308 cgraph_edge::rebuild_edges ();
7309
7310 /* Some EH regions might become dead, see PR34608. If
7311 pass_cleanup_cfg isn't the first pass to happen with the
7312 new child, these dead EH edges might cause problems.
7313 Clean them up now. */
7314 if (flag_exceptions)
7315 {
7316 basic_block bb;
7317 bool changed = false;
7318
7319 FOR_EACH_BB_FN (bb, cfun)
7320 changed |= gimple_purge_dead_eh_edges (bb);
7321 if (changed)
7322 cleanup_tree_cfg ();
7323 }
7324 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7325 verify_loop_structure ();
7326 pop_cfun ();
7327
7328 if (dump_file && !gimple_in_ssa_p (cfun))
7329 {
7330 omp_any_child_fn_dumped = true;
7331 dump_function_header (dump_file, child_fn, dump_flags);
7332 dump_function_to_file (child_fn, dump_file, dump_flags);
7333 }
7334 }
7335
7336 /* Emit a library call to launch the offloading region, or do data
7337 transfers. */
7338 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7339 enum built_in_function start_ix;
7340 location_t clause_loc;
7341 unsigned int flags_i = 0;
629b3d75
MJ
7342
7343 switch (gimple_omp_target_kind (entry_stmt))
7344 {
7345 case GF_OMP_TARGET_KIND_REGION:
7346 start_ix = BUILT_IN_GOMP_TARGET;
7347 break;
7348 case GF_OMP_TARGET_KIND_DATA:
7349 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7350 break;
7351 case GF_OMP_TARGET_KIND_UPDATE:
7352 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7353 break;
7354 case GF_OMP_TARGET_KIND_ENTER_DATA:
7355 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7356 break;
7357 case GF_OMP_TARGET_KIND_EXIT_DATA:
7358 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7359 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7360 break;
7361 case GF_OMP_TARGET_KIND_OACC_KERNELS:
629b3d75
MJ
7362 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7363 start_ix = BUILT_IN_GOACC_PARALLEL;
7364 break;
7365 case GF_OMP_TARGET_KIND_OACC_DATA:
7366 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7367 start_ix = BUILT_IN_GOACC_DATA_START;
7368 break;
7369 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7370 start_ix = BUILT_IN_GOACC_UPDATE;
7371 break;
7372 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7373 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7374 break;
7375 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7376 start_ix = BUILT_IN_GOACC_DECLARE;
7377 break;
7378 default:
7379 gcc_unreachable ();
7380 }
7381
7382 clauses = gimple_omp_target_clauses (entry_stmt);
7383
7384 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7385 library choose) and there is no conditional. */
7386 cond = NULL_TREE;
7387 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7388
7389 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7390 if (c)
7391 cond = OMP_CLAUSE_IF_EXPR (c);
7392
7393 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7394 if (c)
7395 {
7396 /* Even if we pass it to all library function calls, it is currently only
7397 defined/used for the OpenMP target ones. */
7398 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7399 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7400 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7401 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7402
7403 device = OMP_CLAUSE_DEVICE_ID (c);
7404 clause_loc = OMP_CLAUSE_LOCATION (c);
7405 }
7406 else
7407 clause_loc = gimple_location (entry_stmt);
7408
7409 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7410 if (c)
7411 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7412
7413 /* Ensure 'device' is of the correct type. */
7414 device = fold_convert_loc (clause_loc, integer_type_node, device);
7415
7416 /* If we found the clause 'if (cond)', build
7417 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7418 if (cond)
7419 {
7420 cond = gimple_boolify (cond);
7421
7422 basic_block cond_bb, then_bb, else_bb;
7423 edge e;
7424 tree tmp_var;
7425
7426 tmp_var = create_tmp_var (TREE_TYPE (device));
7427 if (offloaded)
7428 e = split_block_after_labels (new_bb);
7429 else
7430 {
7431 gsi = gsi_last_bb (new_bb);
7432 gsi_prev (&gsi);
7433 e = split_block (new_bb, gsi_stmt (gsi));
7434 }
7435 cond_bb = e->src;
7436 new_bb = e->dest;
7437 remove_edge (e);
7438
7439 then_bb = create_empty_bb (cond_bb);
7440 else_bb = create_empty_bb (then_bb);
7441 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7442 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7443
7444 stmt = gimple_build_cond_empty (cond);
7445 gsi = gsi_last_bb (cond_bb);
7446 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7447
7448 gsi = gsi_start_bb (then_bb);
7449 stmt = gimple_build_assign (tmp_var, device);
7450 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7451
7452 gsi = gsi_start_bb (else_bb);
7453 stmt = gimple_build_assign (tmp_var,
7454 build_int_cst (integer_type_node,
7455 GOMP_DEVICE_HOST_FALLBACK));
7456 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7457
7458 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7459 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7460 add_bb_to_loop (then_bb, cond_bb->loop_father);
7461 add_bb_to_loop (else_bb, cond_bb->loop_father);
7462 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7463 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7464
7465 device = tmp_var;
7466 gsi = gsi_last_bb (new_bb);
7467 }
7468 else
7469 {
7470 gsi = gsi_last_bb (new_bb);
7471 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7472 true, GSI_SAME_STMT);
7473 }
7474
7475 t = gimple_omp_target_data_arg (entry_stmt);
7476 if (t == NULL)
7477 {
7478 t1 = size_zero_node;
7479 t2 = build_zero_cst (ptr_type_node);
7480 t3 = t2;
7481 t4 = t2;
7482 }
7483 else
7484 {
7485 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7486 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7487 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7488 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7489 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7490 }
7491
7492 gimple *g;
7493 bool tagging = false;
7494 /* The maximum number used by any start_ix, without varargs. */
7495 auto_vec<tree, 11> args;
7496 args.quick_push (device);
7497 if (offloaded)
7498 args.quick_push (build_fold_addr_expr (child_fn));
7499 args.quick_push (t1);
7500 args.quick_push (t2);
7501 args.quick_push (t3);
7502 args.quick_push (t4);
7503 switch (start_ix)
7504 {
7505 case BUILT_IN_GOACC_DATA_START:
7506 case BUILT_IN_GOACC_DECLARE:
7507 case BUILT_IN_GOMP_TARGET_DATA:
7508 break;
7509 case BUILT_IN_GOMP_TARGET:
7510 case BUILT_IN_GOMP_TARGET_UPDATE:
7511 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7512 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7513 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7514 if (c)
7515 depend = OMP_CLAUSE_DECL (c);
7516 else
7517 depend = build_int_cst (ptr_type_node, 0);
7518 args.quick_push (depend);
7519 if (start_ix == BUILT_IN_GOMP_TARGET)
7520 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7521 break;
7522 case BUILT_IN_GOACC_PARALLEL:
25651634
TS
7523 oacc_set_fn_attrib (child_fn, clauses, &args);
7524 tagging = true;
629b3d75
MJ
7525 /* FALLTHRU */
7526 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7527 case BUILT_IN_GOACC_UPDATE:
7528 {
7529 tree t_async = NULL_TREE;
7530
7531 /* If present, use the value specified by the respective
7532 clause, making sure that is of the correct type. */
7533 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7534 if (c)
7535 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7536 integer_type_node,
7537 OMP_CLAUSE_ASYNC_EXPR (c));
7538 else if (!tagging)
7539 /* Default values for t_async. */
7540 t_async = fold_convert_loc (gimple_location (entry_stmt),
7541 integer_type_node,
7542 build_int_cst (integer_type_node,
7543 GOMP_ASYNC_SYNC));
7544 if (tagging && t_async)
7545 {
7546 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7547
7548 if (TREE_CODE (t_async) == INTEGER_CST)
7549 {
7550 /* See if we can pack the async arg in to the tag's
7551 operand. */
7552 i_async = TREE_INT_CST_LOW (t_async);
7553 if (i_async < GOMP_LAUNCH_OP_MAX)
7554 t_async = NULL_TREE;
7555 else
7556 i_async = GOMP_LAUNCH_OP_MAX;
7557 }
7558 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7559 i_async));
7560 }
7561 if (t_async)
7562 args.safe_push (t_async);
7563
7564 /* Save the argument index, and ... */
7565 unsigned t_wait_idx = args.length ();
7566 unsigned num_waits = 0;
7567 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7568 if (!tagging || c)
7569 /* ... push a placeholder. */
7570 args.safe_push (integer_zero_node);
7571
7572 for (; c; c = OMP_CLAUSE_CHAIN (c))
7573 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7574 {
7575 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7576 integer_type_node,
7577 OMP_CLAUSE_WAIT_EXPR (c)));
7578 num_waits++;
7579 }
7580
7581 if (!tagging || num_waits)
7582 {
7583 tree len;
7584
7585 /* Now that we know the number, update the placeholder. */
7586 if (tagging)
7587 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7588 else
7589 len = build_int_cst (integer_type_node, num_waits);
7590 len = fold_convert_loc (gimple_location (entry_stmt),
7591 unsigned_type_node, len);
7592 args[t_wait_idx] = len;
7593 }
7594 }
7595 break;
7596 default:
7597 gcc_unreachable ();
7598 }
7599 if (tagging)
7600 /* Push terminal marker - zero. */
7601 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7602
7603 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7604 gimple_set_location (g, gimple_location (entry_stmt));
7605 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7606 if (!offloaded)
7607 {
7608 g = gsi_stmt (gsi);
7609 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7610 gsi_remove (&gsi, true);
7611 }
7612 if (data_region && region->exit)
7613 {
7614 gsi = gsi_last_bb (region->exit);
7615 g = gsi_stmt (gsi);
7616 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7617 gsi_remove (&gsi, true);
7618 }
7619}
7620
7621/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7622 iteration variable derived from the thread number. INTRA_GROUP means this
7623 is an expansion of a loop iterating over work-items within a separate
01914336 7624 iteration over groups. */
629b3d75
MJ
7625
7626static void
7627grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7628{
7629 gimple_stmt_iterator gsi;
7630 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7631 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7632 == GF_OMP_FOR_KIND_GRID_LOOP);
7633 size_t collapse = gimple_omp_for_collapse (for_stmt);
7634 struct omp_for_data_loop *loops
7635 = XALLOCAVEC (struct omp_for_data_loop,
01914336 7636 gimple_omp_for_collapse (for_stmt));
629b3d75
MJ
7637 struct omp_for_data fd;
7638
7639 remove_edge (BRANCH_EDGE (kfor->entry));
7640 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7641
7642 gcc_assert (kfor->cont);
7643 omp_extract_for_data (for_stmt, &fd, loops);
7644
7645 gsi = gsi_start_bb (body_bb);
7646
7647 for (size_t dim = 0; dim < collapse; dim++)
7648 {
7649 tree type, itype;
7650 itype = type = TREE_TYPE (fd.loops[dim].v);
7651 if (POINTER_TYPE_P (type))
7652 itype = signed_type_for (type);
7653
7654 tree n1 = fd.loops[dim].n1;
7655 tree step = fd.loops[dim].step;
7656 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7657 true, NULL_TREE, true, GSI_SAME_STMT);
7658 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7659 true, NULL_TREE, true, GSI_SAME_STMT);
7660 tree threadid;
7661 if (gimple_omp_for_grid_group_iter (for_stmt))
7662 {
7663 gcc_checking_assert (!intra_group);
7664 threadid = build_call_expr (builtin_decl_explicit
7665 (BUILT_IN_HSA_WORKGROUPID), 1,
7666 build_int_cstu (unsigned_type_node, dim));
7667 }
7668 else if (intra_group)
7669 threadid = build_call_expr (builtin_decl_explicit
7670 (BUILT_IN_HSA_WORKITEMID), 1,
7671 build_int_cstu (unsigned_type_node, dim));
7672 else
7673 threadid = build_call_expr (builtin_decl_explicit
7674 (BUILT_IN_HSA_WORKITEMABSID), 1,
7675 build_int_cstu (unsigned_type_node, dim));
7676 threadid = fold_convert (itype, threadid);
7677 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7678 true, GSI_SAME_STMT);
7679
7680 tree startvar = fd.loops[dim].v;
7681 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7682 if (POINTER_TYPE_P (type))
7683 t = fold_build_pointer_plus (n1, t);
7684 else
7685 t = fold_build2 (PLUS_EXPR, type, t, n1);
7686 t = fold_convert (type, t);
7687 t = force_gimple_operand_gsi (&gsi, t,
7688 DECL_P (startvar)
7689 && TREE_ADDRESSABLE (startvar),
7690 NULL_TREE, true, GSI_SAME_STMT);
7691 gassign *assign_stmt = gimple_build_assign (startvar, t);
7692 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7693 }
01914336 7694 /* Remove the omp for statement. */
629b3d75
MJ
7695 gsi = gsi_last_bb (kfor->entry);
7696 gsi_remove (&gsi, true);
7697
7698 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7699 gsi = gsi_last_bb (kfor->cont);
7700 gcc_assert (!gsi_end_p (gsi)
7701 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7702 gsi_remove (&gsi, true);
7703
7704 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7705 gsi = gsi_last_bb (kfor->exit);
7706 gcc_assert (!gsi_end_p (gsi)
7707 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7708 if (intra_group)
7709 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7710 gsi_remove (&gsi, true);
7711
7712 /* Fixup the much simpler CFG. */
7713 remove_edge (find_edge (kfor->cont, body_bb));
7714
7715 if (kfor->cont != body_bb)
7716 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7717 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7718}
7719
7720/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7721 argument_decls. */
7722
7723struct grid_arg_decl_map
7724{
7725 tree old_arg;
7726 tree new_arg;
7727};
7728
7729/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7730 pertaining to kernel function. */
7731
7732static tree
7733grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7734{
7735 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7736 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7737 tree t = *tp;
7738
7739 if (t == adm->old_arg)
7740 *tp = adm->new_arg;
7741 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7742 return NULL_TREE;
7743}
7744
7745/* If TARGET region contains a kernel body for loop, remove its region from the
01914336 7746 TARGET and expand it in HSA gridified kernel fashion. */
629b3d75
MJ
7747
7748static void
7749grid_expand_target_grid_body (struct omp_region *target)
7750{
7751 if (!hsa_gen_requested_p ())
7752 return;
7753
7754 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7755 struct omp_region **pp;
7756
7757 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7758 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7759 break;
7760
7761 struct omp_region *gpukernel = *pp;
7762
7763 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7764 if (!gpukernel)
7765 {
7766 /* HSA cannot handle OACC stuff. */
7767 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7768 return;
7769 gcc_checking_assert (orig_child_fndecl);
7770 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7771 OMP_CLAUSE__GRIDDIM_));
7772 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7773
7774 hsa_register_kernel (n);
7775 return;
7776 }
7777
7778 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7779 OMP_CLAUSE__GRIDDIM_));
01914336
MJ
7780 tree inside_block
7781 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
629b3d75
MJ
7782 *pp = gpukernel->next;
7783 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7784 if ((*pp)->type == GIMPLE_OMP_FOR)
7785 break;
7786
7787 struct omp_region *kfor = *pp;
7788 gcc_assert (kfor);
7789 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7790 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7791 *pp = kfor->next;
7792 if (kfor->inner)
7793 {
7794 if (gimple_omp_for_grid_group_iter (for_stmt))
7795 {
7796 struct omp_region **next_pp;
7797 for (pp = &kfor->inner; *pp; pp = next_pp)
7798 {
7799 next_pp = &(*pp)->next;
7800 if ((*pp)->type != GIMPLE_OMP_FOR)
7801 continue;
7802 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7803 gcc_assert (gimple_omp_for_kind (inner)
7804 == GF_OMP_FOR_KIND_GRID_LOOP);
7805 grid_expand_omp_for_loop (*pp, true);
7806 *pp = (*pp)->next;
7807 next_pp = pp;
7808 }
7809 }
7810 expand_omp (kfor->inner);
7811 }
7812 if (gpukernel->inner)
7813 expand_omp (gpukernel->inner);
7814
7815 tree kern_fndecl = copy_node (orig_child_fndecl);
7816 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7817 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7818 tree tgtblock = gimple_block (tgt_stmt);
7819 tree fniniblock = make_node (BLOCK);
7820 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7821 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7822 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7823 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7824 DECL_INITIAL (kern_fndecl) = fniniblock;
7825 push_struct_function (kern_fndecl);
7826 cfun->function_end_locus = gimple_location (tgt_stmt);
7827 init_tree_ssa (cfun);
7828 pop_cfun ();
7829
5c628c3e
RB
7830 /* Make sure to generate early debug for the function before
7831 outlining anything. */
7832 if (! gimple_in_ssa_p (cfun))
7833 (*debug_hooks->early_global_decl) (cfun->decl);
7834
629b3d75
MJ
7835 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7836 gcc_assert (!DECL_CHAIN (old_parm_decl));
7837 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7838 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7839 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7840 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7841 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7842 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7843 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7844 kern_cfun->curr_properties = cfun->curr_properties;
7845
7846 grid_expand_omp_for_loop (kfor, false);
7847
01914336 7848 /* Remove the omp for statement. */
629b3d75
MJ
7849 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7850 gsi_remove (&gsi, true);
7851 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7852 return. */
7853 gsi = gsi_last_bb (gpukernel->exit);
7854 gcc_assert (!gsi_end_p (gsi)
7855 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7856 gimple *ret_stmt = gimple_build_return (NULL);
7857 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7858 gsi_remove (&gsi, true);
7859
7860 /* Statements in the first BB in the target construct have been produced by
7861 target lowering and must be copied inside the GPUKERNEL, with the two
7862 exceptions of the first OMP statement and the OMP_DATA assignment
7863 statement. */
7864 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7865 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7866 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7867 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7868 !gsi_end_p (tsi); gsi_next (&tsi))
7869 {
7870 gimple *stmt = gsi_stmt (tsi);
7871 if (is_gimple_omp (stmt))
7872 break;
7873 if (sender
7874 && is_gimple_assign (stmt)
7875 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7876 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7877 continue;
7878 gimple *copy = gimple_copy (stmt);
7879 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7880 gimple_set_block (copy, fniniblock);
7881 }
7882
7883 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7884 gpukernel->exit, inside_block);
7885
7886 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7887 kcn->mark_force_output ();
7888 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7889
7890 hsa_register_kernel (kcn, orig_child);
7891
7892 cgraph_node::add_new_function (kern_fndecl, true);
7893 push_cfun (kern_cfun);
7894 cgraph_edge::rebuild_edges ();
7895
7896 /* Re-map any mention of the PARM_DECL of the original function to the
7897 PARM_DECL of the new one.
7898
7899 TODO: It would be great if lowering produced references into the GPU
7900 kernel decl straight away and we did not have to do this. */
7901 struct grid_arg_decl_map adm;
7902 adm.old_arg = old_parm_decl;
7903 adm.new_arg = new_parm_decl;
7904 basic_block bb;
7905 FOR_EACH_BB_FN (bb, kern_cfun)
7906 {
7907 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7908 {
7909 gimple *stmt = gsi_stmt (gsi);
7910 struct walk_stmt_info wi;
7911 memset (&wi, 0, sizeof (wi));
7912 wi.info = &adm;
7913 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7914 }
7915 }
7916 pop_cfun ();
7917
7918 return;
7919}
7920
7921/* Expand the parallel region tree rooted at REGION. Expansion
7922 proceeds in depth-first order. Innermost regions are expanded
7923 first. This way, parallel regions that require a new function to
7924 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7925 internal dependencies in their body. */
7926
7927static void
7928expand_omp (struct omp_region *region)
7929{
7930 omp_any_child_fn_dumped = false;
7931 while (region)
7932 {
7933 location_t saved_location;
7934 gimple *inner_stmt = NULL;
7935
7936 /* First, determine whether this is a combined parallel+workshare
01914336 7937 region. */
629b3d75
MJ
7938 if (region->type == GIMPLE_OMP_PARALLEL)
7939 determine_parallel_type (region);
7940 else if (region->type == GIMPLE_OMP_TARGET)
7941 grid_expand_target_grid_body (region);
7942
7943 if (region->type == GIMPLE_OMP_FOR
7944 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7945 inner_stmt = last_stmt (region->inner->entry);
7946
7947 if (region->inner)
7948 expand_omp (region->inner);
7949
7950 saved_location = input_location;
7951 if (gimple_has_location (last_stmt (region->entry)))
7952 input_location = gimple_location (last_stmt (region->entry));
7953
7954 switch (region->type)
7955 {
7956 case GIMPLE_OMP_PARALLEL:
7957 case GIMPLE_OMP_TASK:
7958 expand_omp_taskreg (region);
7959 break;
7960
7961 case GIMPLE_OMP_FOR:
7962 expand_omp_for (region, inner_stmt);
7963 break;
7964
7965 case GIMPLE_OMP_SECTIONS:
7966 expand_omp_sections (region);
7967 break;
7968
7969 case GIMPLE_OMP_SECTION:
7970 /* Individual omp sections are handled together with their
7971 parent GIMPLE_OMP_SECTIONS region. */
7972 break;
7973
7974 case GIMPLE_OMP_SINGLE:
7975 expand_omp_single (region);
7976 break;
7977
7978 case GIMPLE_OMP_ORDERED:
7979 {
7980 gomp_ordered *ord_stmt
7981 = as_a <gomp_ordered *> (last_stmt (region->entry));
7982 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7983 OMP_CLAUSE_DEPEND))
7984 {
7985 /* We'll expand these when expanding corresponding
7986 worksharing region with ordered(n) clause. */
7987 gcc_assert (region->outer
7988 && region->outer->type == GIMPLE_OMP_FOR);
7989 region->ord_stmt = ord_stmt;
7990 break;
7991 }
7992 }
7993 /* FALLTHRU */
7994 case GIMPLE_OMP_MASTER:
7995 case GIMPLE_OMP_TASKGROUP:
7996 case GIMPLE_OMP_CRITICAL:
7997 case GIMPLE_OMP_TEAMS:
7998 expand_omp_synch (region);
7999 break;
8000
8001 case GIMPLE_OMP_ATOMIC_LOAD:
8002 expand_omp_atomic (region);
8003 break;
8004
8005 case GIMPLE_OMP_TARGET:
8006 expand_omp_target (region);
8007 break;
8008
8009 default:
8010 gcc_unreachable ();
8011 }
8012
8013 input_location = saved_location;
8014 region = region->next;
8015 }
8016 if (omp_any_child_fn_dumped)
8017 {
8018 if (dump_file)
8019 dump_function_header (dump_file, current_function_decl, dump_flags);
8020 omp_any_child_fn_dumped = false;
8021 }
8022}
8023
8024/* Helper for build_omp_regions. Scan the dominator tree starting at
8025 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8026 true, the function ends once a single tree is built (otherwise, whole
8027 forest of OMP constructs may be built). */
8028
8029static void
8030build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8031 bool single_tree)
8032{
8033 gimple_stmt_iterator gsi;
8034 gimple *stmt;
8035 basic_block son;
8036
8037 gsi = gsi_last_bb (bb);
8038 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8039 {
8040 struct omp_region *region;
8041 enum gimple_code code;
8042
8043 stmt = gsi_stmt (gsi);
8044 code = gimple_code (stmt);
8045 if (code == GIMPLE_OMP_RETURN)
8046 {
8047 /* STMT is the return point out of region PARENT. Mark it
8048 as the exit point and make PARENT the immediately
8049 enclosing region. */
8050 gcc_assert (parent);
8051 region = parent;
8052 region->exit = bb;
8053 parent = parent->outer;
8054 }
8055 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8056 {
5764ee3c 8057 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
8058 GIMPLE_OMP_RETURN, but matches with
8059 GIMPLE_OMP_ATOMIC_LOAD. */
8060 gcc_assert (parent);
8061 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8062 region = parent;
8063 region->exit = bb;
8064 parent = parent->outer;
8065 }
8066 else if (code == GIMPLE_OMP_CONTINUE)
8067 {
8068 gcc_assert (parent);
8069 parent->cont = bb;
8070 }
8071 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8072 {
8073 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8074 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8075 }
8076 else
8077 {
8078 region = new_omp_region (bb, code, parent);
8079 /* Otherwise... */
8080 if (code == GIMPLE_OMP_TARGET)
8081 {
8082 switch (gimple_omp_target_kind (stmt))
8083 {
8084 case GF_OMP_TARGET_KIND_REGION:
8085 case GF_OMP_TARGET_KIND_DATA:
8086 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8087 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8088 case GF_OMP_TARGET_KIND_OACC_DATA:
8089 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8090 break;
8091 case GF_OMP_TARGET_KIND_UPDATE:
8092 case GF_OMP_TARGET_KIND_ENTER_DATA:
8093 case GF_OMP_TARGET_KIND_EXIT_DATA:
8094 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8095 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8096 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8097 /* ..., other than for those stand-alone directives... */
8098 region = NULL;
8099 break;
8100 default:
8101 gcc_unreachable ();
8102 }
8103 }
8104 else if (code == GIMPLE_OMP_ORDERED
8105 && omp_find_clause (gimple_omp_ordered_clauses
8106 (as_a <gomp_ordered *> (stmt)),
8107 OMP_CLAUSE_DEPEND))
8108 /* #pragma omp ordered depend is also just a stand-alone
8109 directive. */
8110 region = NULL;
8111 /* ..., this directive becomes the parent for a new region. */
8112 if (region)
8113 parent = region;
8114 }
8115 }
8116
8117 if (single_tree && !parent)
8118 return;
8119
8120 for (son = first_dom_son (CDI_DOMINATORS, bb);
8121 son;
8122 son = next_dom_son (CDI_DOMINATORS, son))
8123 build_omp_regions_1 (son, parent, single_tree);
8124}
8125
8126/* Builds the tree of OMP regions rooted at ROOT, storing it to
8127 root_omp_region. */
8128
8129static void
8130build_omp_regions_root (basic_block root)
8131{
8132 gcc_assert (root_omp_region == NULL);
8133 build_omp_regions_1 (root, NULL, true);
8134 gcc_assert (root_omp_region != NULL);
8135}
8136
8137/* Expands omp construct (and its subconstructs) starting in HEAD. */
8138
8139void
8140omp_expand_local (basic_block head)
8141{
8142 build_omp_regions_root (head);
8143 if (dump_file && (dump_flags & TDF_DETAILS))
8144 {
8145 fprintf (dump_file, "\nOMP region tree\n\n");
8146 dump_omp_region (dump_file, root_omp_region, 0);
8147 fprintf (dump_file, "\n");
8148 }
8149
8150 remove_exit_barriers (root_omp_region);
8151 expand_omp (root_omp_region);
8152
8153 omp_free_regions ();
8154}
8155
8156/* Scan the CFG and build a tree of OMP regions. Return the root of
8157 the OMP region tree. */
8158
8159static void
8160build_omp_regions (void)
8161{
8162 gcc_assert (root_omp_region == NULL);
8163 calculate_dominance_info (CDI_DOMINATORS);
8164 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8165}
8166
8167/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8168
8169static unsigned int
8170execute_expand_omp (void)
8171{
8172 build_omp_regions ();
8173
8174 if (!root_omp_region)
8175 return 0;
8176
8177 if (dump_file)
8178 {
8179 fprintf (dump_file, "\nOMP region tree\n\n");
8180 dump_omp_region (dump_file, root_omp_region, 0);
8181 fprintf (dump_file, "\n");
8182 }
8183
8184 remove_exit_barriers (root_omp_region);
8185
8186 expand_omp (root_omp_region);
8187
8188 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8189 verify_loop_structure ();
8190 cleanup_tree_cfg ();
8191
8192 omp_free_regions ();
8193
8194 return 0;
8195}
8196
8197/* OMP expansion -- the default pass, run before creation of SSA form. */
8198
8199namespace {
8200
8201const pass_data pass_data_expand_omp =
8202{
8203 GIMPLE_PASS, /* type */
8204 "ompexp", /* name */
fd2b8c8b 8205 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
8206 TV_NONE, /* tv_id */
8207 PROP_gimple_any, /* properties_required */
8208 PROP_gimple_eomp, /* properties_provided */
8209 0, /* properties_destroyed */
8210 0, /* todo_flags_start */
8211 0, /* todo_flags_finish */
8212};
8213
8214class pass_expand_omp : public gimple_opt_pass
8215{
8216public:
8217 pass_expand_omp (gcc::context *ctxt)
8218 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8219 {}
8220
8221 /* opt_pass methods: */
8222 virtual unsigned int execute (function *)
8223 {
8224 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
8225 || flag_openmp_simd != 0)
8226 && !seen_error ());
8227
8228 /* This pass always runs, to provide PROP_gimple_eomp.
8229 But often, there is nothing to do. */
8230 if (!gate)
8231 return 0;
8232
8233 return execute_expand_omp ();
8234 }
8235
8236}; // class pass_expand_omp
8237
8238} // anon namespace
8239
8240gimple_opt_pass *
8241make_pass_expand_omp (gcc::context *ctxt)
8242{
8243 return new pass_expand_omp (ctxt);
8244}
8245
8246namespace {
8247
8248const pass_data pass_data_expand_omp_ssa =
8249{
8250 GIMPLE_PASS, /* type */
8251 "ompexpssa", /* name */
fd2b8c8b 8252 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
8253 TV_NONE, /* tv_id */
8254 PROP_cfg | PROP_ssa, /* properties_required */
8255 PROP_gimple_eomp, /* properties_provided */
8256 0, /* properties_destroyed */
8257 0, /* todo_flags_start */
8258 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8259};
8260
8261class pass_expand_omp_ssa : public gimple_opt_pass
8262{
8263public:
8264 pass_expand_omp_ssa (gcc::context *ctxt)
8265 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8266 {}
8267
8268 /* opt_pass methods: */
8269 virtual bool gate (function *fun)
8270 {
8271 return !(fun->curr_properties & PROP_gimple_eomp);
8272 }
8273 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8274 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8275
8276}; // class pass_expand_omp_ssa
8277
8278} // anon namespace
8279
8280gimple_opt_pass *
8281make_pass_expand_omp_ssa (gcc::context *ctxt)
8282{
8283 return new pass_expand_omp_ssa (ctxt);
8284}
8285
8286/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8287 GIMPLE_* codes. */
8288
8289bool
8290omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8291 int *region_idx)
8292{
8293 gimple *last = last_stmt (bb);
8294 enum gimple_code code = gimple_code (last);
8295 struct omp_region *cur_region = *region;
8296 bool fallthru = false;
8297
8298 switch (code)
8299 {
8300 case GIMPLE_OMP_PARALLEL:
8301 case GIMPLE_OMP_TASK:
8302 case GIMPLE_OMP_FOR:
8303 case GIMPLE_OMP_SINGLE:
8304 case GIMPLE_OMP_TEAMS:
8305 case GIMPLE_OMP_MASTER:
8306 case GIMPLE_OMP_TASKGROUP:
8307 case GIMPLE_OMP_CRITICAL:
8308 case GIMPLE_OMP_SECTION:
8309 case GIMPLE_OMP_GRID_BODY:
8310 cur_region = new_omp_region (bb, code, cur_region);
8311 fallthru = true;
8312 break;
8313
8314 case GIMPLE_OMP_ORDERED:
8315 cur_region = new_omp_region (bb, code, cur_region);
8316 fallthru = true;
8317 if (omp_find_clause (gimple_omp_ordered_clauses
8318 (as_a <gomp_ordered *> (last)),
8319 OMP_CLAUSE_DEPEND))
8320 cur_region = cur_region->outer;
8321 break;
8322
8323 case GIMPLE_OMP_TARGET:
8324 cur_region = new_omp_region (bb, code, cur_region);
8325 fallthru = true;
8326 switch (gimple_omp_target_kind (last))
8327 {
8328 case GF_OMP_TARGET_KIND_REGION:
8329 case GF_OMP_TARGET_KIND_DATA:
8330 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8331 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8332 case GF_OMP_TARGET_KIND_OACC_DATA:
8333 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8334 break;
8335 case GF_OMP_TARGET_KIND_UPDATE:
8336 case GF_OMP_TARGET_KIND_ENTER_DATA:
8337 case GF_OMP_TARGET_KIND_EXIT_DATA:
8338 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8339 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8340 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8341 cur_region = cur_region->outer;
8342 break;
8343 default:
8344 gcc_unreachable ();
8345 }
8346 break;
8347
8348 case GIMPLE_OMP_SECTIONS:
8349 cur_region = new_omp_region (bb, code, cur_region);
8350 fallthru = true;
8351 break;
8352
8353 case GIMPLE_OMP_SECTIONS_SWITCH:
8354 fallthru = false;
8355 break;
8356
8357 case GIMPLE_OMP_ATOMIC_LOAD:
8358 case GIMPLE_OMP_ATOMIC_STORE:
8359 fallthru = true;
8360 break;
8361
8362 case GIMPLE_OMP_RETURN:
8363 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8364 somewhere other than the next block. This will be
8365 created later. */
8366 cur_region->exit = bb;
8367 if (cur_region->type == GIMPLE_OMP_TASK)
8368 /* Add an edge corresponding to not scheduling the task
8369 immediately. */
8370 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8371 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8372 cur_region = cur_region->outer;
8373 break;
8374
8375 case GIMPLE_OMP_CONTINUE:
8376 cur_region->cont = bb;
8377 switch (cur_region->type)
8378 {
8379 case GIMPLE_OMP_FOR:
8380 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8381 succs edges as abnormal to prevent splitting
8382 them. */
8383 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8384 /* Make the loopback edge. */
8385 make_edge (bb, single_succ (cur_region->entry),
8386 EDGE_ABNORMAL);
8387
8388 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8389 corresponds to the case that the body of the loop
8390 is not executed at all. */
8391 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8392 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8393 fallthru = false;
8394 break;
8395
8396 case GIMPLE_OMP_SECTIONS:
8397 /* Wire up the edges into and out of the nested sections. */
8398 {
8399 basic_block switch_bb = single_succ (cur_region->entry);
8400
8401 struct omp_region *i;
8402 for (i = cur_region->inner; i ; i = i->next)
8403 {
8404 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8405 make_edge (switch_bb, i->entry, 0);
8406 make_edge (i->exit, bb, EDGE_FALLTHRU);
8407 }
8408
8409 /* Make the loopback edge to the block with
8410 GIMPLE_OMP_SECTIONS_SWITCH. */
8411 make_edge (bb, switch_bb, 0);
8412
8413 /* Make the edge from the switch to exit. */
8414 make_edge (switch_bb, bb->next_bb, 0);
8415 fallthru = false;
8416 }
8417 break;
8418
8419 case GIMPLE_OMP_TASK:
8420 fallthru = true;
8421 break;
8422
8423 default:
8424 gcc_unreachable ();
8425 }
8426 break;
8427
8428 default:
8429 gcc_unreachable ();
8430 }
8431
8432 if (*region != cur_region)
8433 {
8434 *region = cur_region;
8435 if (cur_region)
8436 *region_idx = cur_region->entry->index;
8437 else
8438 *region_idx = 0;
8439 }
8440
8441 return fallthru;
8442}
8443
8444#include "gt-omp-expand.h"
This page took 1.289492 seconds and 5 git commands to generate.