]> gcc.gnu.org Git - gcc.git/blame - gcc/omp-expand.c
Fix missing include of header file in mips.c.
[gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
cbe34bb5 5Copyright (C) 2005-2017 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
56#include "cilk.h"
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
13293add 59#include "hsa-common.h"
5c628c3e 60#include "debug.h"
629b3d75
MJ
61
62
63/* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67struct omp_region
68{
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 a depend clause. */
106 gomp_ordered *ord_stmt;
107};
108
109static struct omp_region *root_omp_region;
110static bool omp_any_child_fn_dumped;
111
112static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 bool = false);
114static gphi *find_phi_with_arg_on_edge (tree, edge);
115static void expand_omp (struct omp_region *region);
116
117/* Return true if REGION is a combined parallel+workshare region. */
118
119static inline bool
120is_combined_parallel (struct omp_region *region)
121{
122 return region->is_combined_parallel;
123}
124
125/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
135
136 #pragma omp parallel for schedule (guided, i * 4)
137 for (j ...)
138
139 Is lowered into:
140
01914336 141 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
142 .omp_data_o.i = i;
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
148 D.1598 = D.1667 * 4;
149 #pragma omp for schedule (guided, D.1598)
150
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
155
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
160 call.
161
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
166
167static bool
168workshare_safe_to_combine_p (basic_block ws_entry_bb)
169{
170 struct omp_for_data fd;
171 gimple *ws_stmt = last_stmt (ws_entry_bb);
172
173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174 return true;
175
176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177
178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179
180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181 return false;
182 if (fd.iter_type != long_integer_type_node)
183 return false;
184
185 /* FIXME. We give up too easily here. If any of these arguments
186 are not constants, they will likely involve variables that have
187 been mapped into fields of .omp_data_s for sharing with the child
188 function. With appropriate data flow, it would be possible to
189 see through this. */
190 if (!is_gimple_min_invariant (fd.loop.n1)
191 || !is_gimple_min_invariant (fd.loop.n2)
192 || !is_gimple_min_invariant (fd.loop.step)
193 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194 return false;
195
196 return true;
197}
198
199/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200 presence (SIMD_SCHEDULE). */
201
202static tree
203omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204{
205 if (!simd_schedule)
206 return chunk_size;
207
208 int vf = omp_max_vf ();
209 if (vf == 1)
210 return chunk_size;
211
212 tree type = TREE_TYPE (chunk_size);
213 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 build_int_cst (type, vf - 1));
215 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 build_int_cst (type, -vf));
217}
218
219/* Collect additional arguments needed to emit a combined
220 parallel+workshare call. WS_STMT is the workshare directive being
221 expanded. */
222
223static vec<tree, va_gc> *
224get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225{
226 tree t;
227 location_t loc = gimple_location (ws_stmt);
228 vec<tree, va_gc> *ws_args;
229
230 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231 {
232 struct omp_for_data fd;
233 tree n1, n2;
234
235 omp_extract_for_data (for_stmt, &fd, NULL);
236 n1 = fd.loop.n1;
237 n2 = fd.loop.n2;
238
239 if (gimple_omp_for_combined_into_p (for_stmt))
240 {
241 tree innerc
242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 OMP_CLAUSE__LOOPTEMP_);
244 gcc_assert (innerc);
245 n1 = OMP_CLAUSE_DECL (innerc);
246 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n2 = OMP_CLAUSE_DECL (innerc);
250 }
251
252 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253
254 t = fold_convert_loc (loc, long_integer_type_node, n1);
255 ws_args->quick_push (t);
256
257 t = fold_convert_loc (loc, long_integer_type_node, n2);
258 ws_args->quick_push (t);
259
260 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261 ws_args->quick_push (t);
262
263 if (fd.chunk_size)
264 {
265 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 ws_args->quick_push (t);
268 }
269
270 return ws_args;
271 }
272 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273 {
274 /* Number of sections is equal to the number of edges from the
275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 the exit of the sections region. */
277 basic_block bb = single_succ (gimple_bb (ws_stmt));
278 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279 vec_alloc (ws_args, 1);
280 ws_args->quick_push (t);
281 return ws_args;
282 }
283
284 gcc_unreachable ();
285}
286
287/* Discover whether REGION is a combined parallel+workshare region. */
288
289static void
290determine_parallel_type (struct omp_region *region)
291{
292 basic_block par_entry_bb, par_exit_bb;
293 basic_block ws_entry_bb, ws_exit_bb;
294
295 if (region == NULL || region->inner == NULL
296 || region->exit == NULL || region->inner->exit == NULL
297 || region->inner->cont == NULL)
298 return;
299
300 /* We only support parallel+for and parallel+sections. */
301 if (region->type != GIMPLE_OMP_PARALLEL
302 || (region->inner->type != GIMPLE_OMP_FOR
303 && region->inner->type != GIMPLE_OMP_SECTIONS))
304 return;
305
306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307 WS_EXIT_BB -> PAR_EXIT_BB. */
308 par_entry_bb = region->entry;
309 par_exit_bb = region->exit;
310 ws_entry_bb = region->inner->entry;
311 ws_exit_bb = region->inner->exit;
312
313 if (single_succ (par_entry_bb) == ws_entry_bb
314 && single_succ (ws_exit_bb) == par_exit_bb
315 && workshare_safe_to_combine_p (ws_entry_bb)
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 || (last_and_only_stmt (ws_entry_bb)
318 && last_and_only_stmt (par_exit_bb))))
319 {
320 gimple *par_stmt = last_stmt (par_entry_bb);
321 gimple *ws_stmt = last_stmt (ws_entry_bb);
322
323 if (region->inner->type == GIMPLE_OMP_FOR)
324 {
325 /* If this is a combined parallel loop, we need to determine
326 whether or not to use the combined library calls. There
327 are two cases where we do not apply the transformation:
328 static loops and any kind of ordered loop. In the first
329 case, we already open code the loop so there is no need
330 to do anything else. In the latter case, the combined
331 parallel loop call would still need extra synchronization
332 to implement ordered semantics, so there would not be any
333 gain in using the combined call. */
334 tree clauses = gimple_omp_for_clauses (ws_stmt);
335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 if (c == NULL
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 == OMP_CLAUSE_SCHEDULE_STATIC)
339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340 {
341 region->is_combined_parallel = false;
342 region->inner->is_combined_parallel = false;
343 return;
344 }
345 }
346
347 region->is_combined_parallel = true;
348 region->inner->is_combined_parallel = true;
349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
350 }
351}
352
353/* Debugging dumps for parallel regions. */
354void dump_omp_region (FILE *, struct omp_region *, int);
355void debug_omp_region (struct omp_region *);
356void debug_all_omp_regions (void);
357
358/* Dump the parallel region tree rooted at REGION. */
359
360void
361dump_omp_region (FILE *file, struct omp_region *region, int indent)
362{
363 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 gimple_code_name[region->type]);
365
366 if (region->inner)
367 dump_omp_region (file, region->inner, indent + 4);
368
369 if (region->cont)
370 {
371 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 region->cont->index);
373 }
374
375 if (region->exit)
376 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 region->exit->index);
378 else
379 fprintf (file, "%*s[no exit marker]\n", indent, "");
380
381 if (region->next)
382 dump_omp_region (file, region->next, indent);
383}
384
385DEBUG_FUNCTION void
386debug_omp_region (struct omp_region *region)
387{
388 dump_omp_region (stderr, region, 0);
389}
390
391DEBUG_FUNCTION void
392debug_all_omp_regions (void)
393{
394 dump_omp_region (stderr, root_omp_region, 0);
395}
396
397/* Create a new parallel region starting at STMT inside region PARENT. */
398
399static struct omp_region *
400new_omp_region (basic_block bb, enum gimple_code type,
401 struct omp_region *parent)
402{
403 struct omp_region *region = XCNEW (struct omp_region);
404
405 region->outer = parent;
406 region->entry = bb;
407 region->type = type;
408
409 if (parent)
410 {
411 /* This is a nested region. Add it to the list of inner
412 regions in PARENT. */
413 region->next = parent->inner;
414 parent->inner = region;
415 }
416 else
417 {
418 /* This is a toplevel region. Add it to the list of toplevel
419 regions in ROOT_OMP_REGION. */
420 region->next = root_omp_region;
421 root_omp_region = region;
422 }
423
424 return region;
425}
426
427/* Release the memory associated with the region tree rooted at REGION. */
428
429static void
430free_omp_region_1 (struct omp_region *region)
431{
432 struct omp_region *i, *n;
433
434 for (i = region->inner; i ; i = n)
435 {
436 n = i->next;
437 free_omp_region_1 (i);
438 }
439
440 free (region);
441}
442
443/* Release the memory for the entire omp region tree. */
444
445void
446omp_free_regions (void)
447{
448 struct omp_region *r, *n;
449 for (r = root_omp_region; r ; r = n)
450 {
451 n = r->next;
452 free_omp_region_1 (r);
453 }
454 root_omp_region = NULL;
455}
456
457/* A convenience function to build an empty GIMPLE_COND with just the
458 condition. */
459
460static gcond *
461gimple_build_cond_empty (tree cond)
462{
463 enum tree_code pred_code;
464 tree lhs, rhs;
465
466 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
468}
469
470/* Return true if a parallel REGION is within a declare target function or
471 within a target region and is not a part of a gridified target. */
472
473static bool
474parallel_needs_hsa_kernel_p (struct omp_region *region)
475{
476 bool indirect = false;
477 for (region = region->outer; region; region = region->outer)
478 {
479 if (region->type == GIMPLE_OMP_PARALLEL)
480 indirect = true;
481 else if (region->type == GIMPLE_OMP_TARGET)
482 {
483 gomp_target *tgt_stmt
484 = as_a <gomp_target *> (last_stmt (region->entry));
485
486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 OMP_CLAUSE__GRIDDIM_))
488 return indirect;
489 else
490 return true;
491 }
492 }
493
494 if (lookup_attribute ("omp declare target",
495 DECL_ATTRIBUTES (current_function_decl)))
496 return true;
497
498 return false;
499}
500
501/* Build the function calls to GOMP_parallel_start etc to actually
502 generate the parallel operation. REGION is the parallel region
503 being expanded. BB is the block where to insert the code. WS_ARGS
504 will be set if this is a call to a combined parallel+workshare
505 construct, it contains the list of additional arguments needed by
506 the workshare construct. */
507
508static void
509expand_parallel_call (struct omp_region *region, basic_block bb,
510 gomp_parallel *entry_stmt,
511 vec<tree, va_gc> *ws_args)
512{
513 tree t, t1, t2, val, cond, c, clauses, flags;
514 gimple_stmt_iterator gsi;
515 gimple *stmt;
516 enum built_in_function start_ix;
517 int start_ix2;
518 location_t clause_loc;
519 vec<tree, va_gc> *args;
520
521 clauses = gimple_omp_parallel_clauses (entry_stmt);
522
523 /* Determine what flavor of GOMP_parallel we will be
524 emitting. */
525 start_ix = BUILT_IN_GOMP_PARALLEL;
526 if (is_combined_parallel (region))
527 {
528 switch (region->inner->type)
529 {
530 case GIMPLE_OMP_FOR:
531 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
532 switch (region->inner->sched_kind)
533 {
534 case OMP_CLAUSE_SCHEDULE_RUNTIME:
535 start_ix2 = 3;
536 break;
537 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
538 case OMP_CLAUSE_SCHEDULE_GUIDED:
539 if (region->inner->sched_modifiers
540 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
541 {
542 start_ix2 = 3 + region->inner->sched_kind;
543 break;
544 }
545 /* FALLTHRU */
546 default:
547 start_ix2 = region->inner->sched_kind;
548 break;
549 }
550 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
551 start_ix = (enum built_in_function) start_ix2;
552 break;
553 case GIMPLE_OMP_SECTIONS:
554 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
555 break;
556 default:
557 gcc_unreachable ();
558 }
559 }
560
561 /* By default, the value of NUM_THREADS is zero (selected at run time)
562 and there is no conditional. */
563 cond = NULL_TREE;
564 val = build_int_cst (unsigned_type_node, 0);
565 flags = build_int_cst (unsigned_type_node, 0);
566
567 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
568 if (c)
569 cond = OMP_CLAUSE_IF_EXPR (c);
570
571 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
572 if (c)
573 {
574 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
575 clause_loc = OMP_CLAUSE_LOCATION (c);
576 }
577 else
578 clause_loc = gimple_location (entry_stmt);
579
580 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
581 if (c)
582 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
583
584 /* Ensure 'val' is of the correct type. */
585 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
586
587 /* If we found the clause 'if (cond)', build either
588 (cond != 0) or (cond ? val : 1u). */
589 if (cond)
590 {
591 cond = gimple_boolify (cond);
592
593 if (integer_zerop (val))
594 val = fold_build2_loc (clause_loc,
595 EQ_EXPR, unsigned_type_node, cond,
596 build_int_cst (TREE_TYPE (cond), 0));
597 else
598 {
599 basic_block cond_bb, then_bb, else_bb;
600 edge e, e_then, e_else;
601 tree tmp_then, tmp_else, tmp_join, tmp_var;
602
603 tmp_var = create_tmp_var (TREE_TYPE (val));
604 if (gimple_in_ssa_p (cfun))
605 {
606 tmp_then = make_ssa_name (tmp_var);
607 tmp_else = make_ssa_name (tmp_var);
608 tmp_join = make_ssa_name (tmp_var);
609 }
610 else
611 {
612 tmp_then = tmp_var;
613 tmp_else = tmp_var;
614 tmp_join = tmp_var;
615 }
616
617 e = split_block_after_labels (bb);
618 cond_bb = e->src;
619 bb = e->dest;
620 remove_edge (e);
621
622 then_bb = create_empty_bb (cond_bb);
623 else_bb = create_empty_bb (then_bb);
624 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
625 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
626
627 stmt = gimple_build_cond_empty (cond);
628 gsi = gsi_start_bb (cond_bb);
629 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
630
631 gsi = gsi_start_bb (then_bb);
632 expand_omp_build_assign (&gsi, tmp_then, val, true);
633
634 gsi = gsi_start_bb (else_bb);
635 expand_omp_build_assign (&gsi, tmp_else,
636 build_int_cst (unsigned_type_node, 1),
637 true);
638
639 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
640 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
641 add_bb_to_loop (then_bb, cond_bb->loop_father);
642 add_bb_to_loop (else_bb, cond_bb->loop_father);
643 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
644 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
645
646 if (gimple_in_ssa_p (cfun))
647 {
648 gphi *phi = create_phi_node (tmp_join, bb);
649 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
650 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
651 }
652
653 val = tmp_join;
654 }
655
656 gsi = gsi_start_bb (bb);
657 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
658 false, GSI_CONTINUE_LINKING);
659 }
660
661 gsi = gsi_last_bb (bb);
662 t = gimple_omp_parallel_data_arg (entry_stmt);
663 if (t == NULL)
664 t1 = null_pointer_node;
665 else
666 t1 = build_fold_addr_expr (t);
667 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
668 t2 = build_fold_addr_expr (child_fndecl);
669
670 vec_alloc (args, 4 + vec_safe_length (ws_args));
671 args->quick_push (t2);
672 args->quick_push (t1);
673 args->quick_push (val);
674 if (ws_args)
675 args->splice (*ws_args);
676 args->quick_push (flags);
677
678 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
679 builtin_decl_explicit (start_ix), args);
680
681 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
682 false, GSI_CONTINUE_LINKING);
683
684 if (hsa_gen_requested_p ()
685 && parallel_needs_hsa_kernel_p (region))
686 {
687 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
688 hsa_register_kernel (child_cnode);
689 }
690}
691
692/* Insert a function call whose name is FUNC_NAME with the information from
693 ENTRY_STMT into the basic_block BB. */
694
695static void
696expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
697 vec <tree, va_gc> *ws_args)
698{
699 tree t, t1, t2;
700 gimple_stmt_iterator gsi;
701 vec <tree, va_gc> *args;
702
703 gcc_assert (vec_safe_length (ws_args) == 2);
704 tree func_name = (*ws_args)[0];
705 tree grain = (*ws_args)[1];
706
707 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
708 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
709 gcc_assert (count != NULL_TREE);
710 count = OMP_CLAUSE_OPERAND (count, 0);
711
712 gsi = gsi_last_bb (bb);
713 t = gimple_omp_parallel_data_arg (entry_stmt);
714 if (t == NULL)
715 t1 = null_pointer_node;
716 else
717 t1 = build_fold_addr_expr (t);
718 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
719
720 vec_alloc (args, 4);
721 args->quick_push (t2);
722 args->quick_push (t1);
723 args->quick_push (count);
724 args->quick_push (grain);
725 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
726
727 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
728 GSI_CONTINUE_LINKING);
729}
730
731/* Build the function call to GOMP_task to actually
732 generate the task operation. BB is the block where to insert the code. */
733
734static void
735expand_task_call (struct omp_region *region, basic_block bb,
736 gomp_task *entry_stmt)
737{
738 tree t1, t2, t3;
739 gimple_stmt_iterator gsi;
740 location_t loc = gimple_location (entry_stmt);
741
742 tree clauses = gimple_omp_task_clauses (entry_stmt);
743
744 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
745 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
746 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
747 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
748 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
749 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
750
751 unsigned int iflags
752 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
753 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
754 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
755
756 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
757 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
758 tree num_tasks = NULL_TREE;
759 bool ull = false;
760 if (taskloop_p)
761 {
762 gimple *g = last_stmt (region->outer->entry);
763 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
764 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
765 struct omp_for_data fd;
766 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
767 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
768 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
769 OMP_CLAUSE__LOOPTEMP_);
770 startvar = OMP_CLAUSE_DECL (startvar);
771 endvar = OMP_CLAUSE_DECL (endvar);
772 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
773 if (fd.loop.cond_code == LT_EXPR)
774 iflags |= GOMP_TASK_FLAG_UP;
775 tree tclauses = gimple_omp_for_clauses (g);
776 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
777 if (num_tasks)
778 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
779 else
780 {
781 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
782 if (num_tasks)
783 {
784 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
785 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
786 }
787 else
788 num_tasks = integer_zero_node;
789 }
790 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
791 if (ifc == NULL_TREE)
792 iflags |= GOMP_TASK_FLAG_IF;
793 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
794 iflags |= GOMP_TASK_FLAG_NOGROUP;
795 ull = fd.iter_type == long_long_unsigned_type_node;
796 }
797 else if (priority)
798 iflags |= GOMP_TASK_FLAG_PRIORITY;
799
800 tree flags = build_int_cst (unsigned_type_node, iflags);
801
802 tree cond = boolean_true_node;
803 if (ifc)
804 {
805 if (taskloop_p)
806 {
807 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
808 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
809 build_int_cst (unsigned_type_node,
810 GOMP_TASK_FLAG_IF),
811 build_int_cst (unsigned_type_node, 0));
812 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
813 flags, t);
814 }
815 else
816 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
817 }
818
819 if (finalc)
820 {
821 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
822 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
823 build_int_cst (unsigned_type_node,
824 GOMP_TASK_FLAG_FINAL),
825 build_int_cst (unsigned_type_node, 0));
826 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
827 }
828 if (depend)
829 depend = OMP_CLAUSE_DECL (depend);
830 else
831 depend = build_int_cst (ptr_type_node, 0);
832 if (priority)
833 priority = fold_convert (integer_type_node,
834 OMP_CLAUSE_PRIORITY_EXPR (priority));
835 else
836 priority = integer_zero_node;
837
838 gsi = gsi_last_bb (bb);
839 tree t = gimple_omp_task_data_arg (entry_stmt);
840 if (t == NULL)
841 t2 = null_pointer_node;
842 else
843 t2 = build_fold_addr_expr_loc (loc, t);
844 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
845 t = gimple_omp_task_copy_fn (entry_stmt);
846 if (t == NULL)
847 t3 = null_pointer_node;
848 else
849 t3 = build_fold_addr_expr_loc (loc, t);
850
851 if (taskloop_p)
852 t = build_call_expr (ull
853 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
854 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
855 11, t1, t2, t3,
856 gimple_omp_task_arg_size (entry_stmt),
857 gimple_omp_task_arg_align (entry_stmt), flags,
858 num_tasks, priority, startvar, endvar, step);
859 else
860 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
861 9, t1, t2, t3,
862 gimple_omp_task_arg_size (entry_stmt),
863 gimple_omp_task_arg_align (entry_stmt), cond, flags,
864 depend, priority);
865
866 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
867 false, GSI_CONTINUE_LINKING);
868}
869
870/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
871
872static tree
873vec2chain (vec<tree, va_gc> *v)
874{
875 tree chain = NULL_TREE, t;
876 unsigned ix;
877
878 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
879 {
880 DECL_CHAIN (t) = chain;
881 chain = t;
882 }
883
884 return chain;
885}
886
887/* Remove barriers in REGION->EXIT's block. Note that this is only
888 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
889 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
890 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
891 removed. */
892
893static void
894remove_exit_barrier (struct omp_region *region)
895{
896 gimple_stmt_iterator gsi;
897 basic_block exit_bb;
898 edge_iterator ei;
899 edge e;
900 gimple *stmt;
901 int any_addressable_vars = -1;
902
903 exit_bb = region->exit;
904
905 /* If the parallel region doesn't return, we don't have REGION->EXIT
906 block at all. */
907 if (! exit_bb)
908 return;
909
910 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
911 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
912 statements that can appear in between are extremely limited -- no
913 memory operations at all. Here, we allow nothing at all, so the
914 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
915 gsi = gsi_last_bb (exit_bb);
916 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
917 gsi_prev (&gsi);
918 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
919 return;
920
921 FOR_EACH_EDGE (e, ei, exit_bb->preds)
922 {
923 gsi = gsi_last_bb (e->src);
924 if (gsi_end_p (gsi))
925 continue;
926 stmt = gsi_stmt (gsi);
927 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
928 && !gimple_omp_return_nowait_p (stmt))
929 {
930 /* OpenMP 3.0 tasks unfortunately prevent this optimization
931 in many cases. If there could be tasks queued, the barrier
932 might be needed to let the tasks run before some local
933 variable of the parallel that the task uses as shared
934 runs out of scope. The task can be spawned either
935 from within current function (this would be easy to check)
936 or from some function it calls and gets passed an address
937 of such a variable. */
938 if (any_addressable_vars < 0)
939 {
940 gomp_parallel *parallel_stmt
941 = as_a <gomp_parallel *> (last_stmt (region->entry));
942 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
943 tree local_decls, block, decl;
944 unsigned ix;
945
946 any_addressable_vars = 0;
947 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
948 if (TREE_ADDRESSABLE (decl))
949 {
950 any_addressable_vars = 1;
951 break;
952 }
953 for (block = gimple_block (stmt);
954 !any_addressable_vars
955 && block
956 && TREE_CODE (block) == BLOCK;
957 block = BLOCK_SUPERCONTEXT (block))
958 {
959 for (local_decls = BLOCK_VARS (block);
960 local_decls;
961 local_decls = DECL_CHAIN (local_decls))
962 if (TREE_ADDRESSABLE (local_decls))
963 {
964 any_addressable_vars = 1;
965 break;
966 }
967 if (block == gimple_block (parallel_stmt))
968 break;
969 }
970 }
971 if (!any_addressable_vars)
972 gimple_omp_return_set_nowait (stmt);
973 }
974 }
975}
976
977static void
978remove_exit_barriers (struct omp_region *region)
979{
980 if (region->type == GIMPLE_OMP_PARALLEL)
981 remove_exit_barrier (region);
982
983 if (region->inner)
984 {
985 region = region->inner;
986 remove_exit_barriers (region);
987 while (region->next)
988 {
989 region = region->next;
990 remove_exit_barriers (region);
991 }
992 }
993}
994
995/* Optimize omp_get_thread_num () and omp_get_num_threads ()
996 calls. These can't be declared as const functions, but
997 within one parallel body they are constant, so they can be
998 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
999 which are declared const. Similarly for task body, except
1000 that in untied task omp_get_thread_num () can change at any task
1001 scheduling point. */
1002
1003static void
1004optimize_omp_library_calls (gimple *entry_stmt)
1005{
1006 basic_block bb;
1007 gimple_stmt_iterator gsi;
1008 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1009 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1010 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1011 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1012 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1013 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1014 OMP_CLAUSE_UNTIED) != NULL);
1015
1016 FOR_EACH_BB_FN (bb, cfun)
1017 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1018 {
1019 gimple *call = gsi_stmt (gsi);
1020 tree decl;
1021
1022 if (is_gimple_call (call)
1023 && (decl = gimple_call_fndecl (call))
1024 && DECL_EXTERNAL (decl)
1025 && TREE_PUBLIC (decl)
1026 && DECL_INITIAL (decl) == NULL)
1027 {
1028 tree built_in;
1029
1030 if (DECL_NAME (decl) == thr_num_id)
1031 {
1032 /* In #pragma omp task untied omp_get_thread_num () can change
1033 during the execution of the task region. */
1034 if (untied_task)
1035 continue;
1036 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1037 }
1038 else if (DECL_NAME (decl) == num_thr_id)
1039 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1040 else
1041 continue;
1042
1043 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1044 || gimple_call_num_args (call) != 0)
1045 continue;
1046
1047 if (flag_exceptions && !TREE_NOTHROW (decl))
1048 continue;
1049
1050 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1051 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1052 TREE_TYPE (TREE_TYPE (built_in))))
1053 continue;
1054
1055 gimple_call_set_fndecl (call, built_in);
1056 }
1057 }
1058}
1059
1060/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1061 regimplified. */
1062
1063static tree
1064expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1065{
1066 tree t = *tp;
1067
1068 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1069 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1070 return t;
1071
1072 if (TREE_CODE (t) == ADDR_EXPR)
1073 recompute_tree_invariant_for_addr_expr (t);
1074
1075 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1076 return NULL_TREE;
1077}
1078
1079/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1080
1081static void
1082expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1083 bool after)
1084{
1085 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1086 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1087 !after, after ? GSI_CONTINUE_LINKING
1088 : GSI_SAME_STMT);
1089 gimple *stmt = gimple_build_assign (to, from);
1090 if (after)
1091 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1092 else
1093 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1094 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1095 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1096 {
1097 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1098 gimple_regimplify_operands (stmt, &gsi);
1099 }
1100}
1101
1102/* Expand the OpenMP parallel or task directive starting at REGION. */
1103
1104static void
1105expand_omp_taskreg (struct omp_region *region)
1106{
1107 basic_block entry_bb, exit_bb, new_bb;
1108 struct function *child_cfun;
1109 tree child_fn, block, t;
1110 gimple_stmt_iterator gsi;
1111 gimple *entry_stmt, *stmt;
1112 edge e;
1113 vec<tree, va_gc> *ws_args;
1114
1115 entry_stmt = last_stmt (region->entry);
1116 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1117 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1118
1119 entry_bb = region->entry;
1120 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1121 exit_bb = region->cont;
1122 else
1123 exit_bb = region->exit;
1124
1125 bool is_cilk_for
1126 = (flag_cilkplus
1127 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1128 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1129 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1130
1131 if (is_cilk_for)
1132 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1133 and the inner statement contains the name of the built-in function
1134 and grain. */
1135 ws_args = region->inner->ws_args;
1136 else if (is_combined_parallel (region))
1137 ws_args = region->ws_args;
1138 else
1139 ws_args = NULL;
1140
1141 if (child_cfun->cfg)
1142 {
1143 /* Due to inlining, it may happen that we have already outlined
1144 the region, in which case all we need to do is make the
1145 sub-graph unreachable and emit the parallel call. */
1146 edge entry_succ_e, exit_succ_e;
1147
1148 entry_succ_e = single_succ_edge (entry_bb);
1149
1150 gsi = gsi_last_bb (entry_bb);
1151 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1152 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1153 gsi_remove (&gsi, true);
1154
1155 new_bb = entry_bb;
1156 if (exit_bb)
1157 {
1158 exit_succ_e = single_succ_edge (exit_bb);
1159 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1160 }
1161 remove_edge_and_dominated_blocks (entry_succ_e);
1162 }
1163 else
1164 {
1165 unsigned srcidx, dstidx, num;
1166
1167 /* If the parallel region needs data sent from the parent
1168 function, then the very first statement (except possible
1169 tree profile counter updates) of the parallel body
1170 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1171 &.OMP_DATA_O is passed as an argument to the child function,
1172 we need to replace it with the argument as seen by the child
1173 function.
1174
1175 In most cases, this will end up being the identity assignment
1176 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1177 a function call that has been inlined, the original PARM_DECL
1178 .OMP_DATA_I may have been converted into a different local
1179 variable. In which case, we need to keep the assignment. */
1180 if (gimple_omp_taskreg_data_arg (entry_stmt))
1181 {
1182 basic_block entry_succ_bb
1183 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1184 : FALLTHRU_EDGE (entry_bb)->dest;
1185 tree arg;
1186 gimple *parcopy_stmt = NULL;
1187
1188 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1189 {
1190 gimple *stmt;
1191
1192 gcc_assert (!gsi_end_p (gsi));
1193 stmt = gsi_stmt (gsi);
1194 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1195 continue;
1196
1197 if (gimple_num_ops (stmt) == 2)
1198 {
1199 tree arg = gimple_assign_rhs1 (stmt);
1200
1201 /* We're ignore the subcode because we're
1202 effectively doing a STRIP_NOPS. */
1203
1204 if (TREE_CODE (arg) == ADDR_EXPR
1205 && TREE_OPERAND (arg, 0)
01914336 1206 == gimple_omp_taskreg_data_arg (entry_stmt))
629b3d75
MJ
1207 {
1208 parcopy_stmt = stmt;
1209 break;
1210 }
1211 }
1212 }
1213
1214 gcc_assert (parcopy_stmt != NULL);
1215 arg = DECL_ARGUMENTS (child_fn);
1216
1217 if (!gimple_in_ssa_p (cfun))
1218 {
1219 if (gimple_assign_lhs (parcopy_stmt) == arg)
1220 gsi_remove (&gsi, true);
1221 else
1222 {
01914336 1223 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1224 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1225 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1226 }
1227 }
1228 else
1229 {
1230 tree lhs = gimple_assign_lhs (parcopy_stmt);
1231 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1232 /* We'd like to set the rhs to the default def in the child_fn,
1233 but it's too early to create ssa names in the child_fn.
1234 Instead, we set the rhs to the parm. In
1235 move_sese_region_to_fn, we introduce a default def for the
1236 parm, map the parm to it's default def, and once we encounter
1237 this stmt, replace the parm with the default def. */
1238 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1239 update_stmt (parcopy_stmt);
1240 }
1241 }
1242
1243 /* Declare local variables needed in CHILD_CFUN. */
1244 block = DECL_INITIAL (child_fn);
1245 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1246 /* The gimplifier could record temporaries in parallel/task block
1247 rather than in containing function's local_decls chain,
1248 which would mean cgraph missed finalizing them. Do it now. */
1249 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1250 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1251 varpool_node::finalize_decl (t);
1252 DECL_SAVED_TREE (child_fn) = NULL;
1253 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1254 gimple_set_body (child_fn, NULL);
1255 TREE_USED (block) = 1;
1256
1257 /* Reset DECL_CONTEXT on function arguments. */
1258 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1259 DECL_CONTEXT (t) = child_fn;
1260
1261 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1262 so that it can be moved to the child function. */
1263 gsi = gsi_last_bb (entry_bb);
1264 stmt = gsi_stmt (gsi);
1265 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1266 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1267 e = split_block (entry_bb, stmt);
1268 gsi_remove (&gsi, true);
1269 entry_bb = e->dest;
1270 edge e2 = NULL;
1271 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1272 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1273 else
1274 {
1275 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1276 gcc_assert (e2->dest == region->exit);
1277 remove_edge (BRANCH_EDGE (entry_bb));
1278 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1279 gsi = gsi_last_bb (region->exit);
1280 gcc_assert (!gsi_end_p (gsi)
1281 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1282 gsi_remove (&gsi, true);
1283 }
1284
1285 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1286 if (exit_bb)
1287 {
1288 gsi = gsi_last_bb (exit_bb);
1289 gcc_assert (!gsi_end_p (gsi)
1290 && (gimple_code (gsi_stmt (gsi))
1291 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1292 stmt = gimple_build_return (NULL);
1293 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1294 gsi_remove (&gsi, true);
1295 }
1296
1297 /* Move the parallel region into CHILD_CFUN. */
1298
1299 if (gimple_in_ssa_p (cfun))
1300 {
1301 init_tree_ssa (child_cfun);
1302 init_ssa_operands (child_cfun);
1303 child_cfun->gimple_df->in_ssa_p = true;
1304 block = NULL_TREE;
1305 }
1306 else
1307 block = gimple_block (entry_stmt);
1308
5c628c3e
RB
1309 /* Make sure to generate early debug for the function before
1310 outlining anything. */
1311 if (! gimple_in_ssa_p (cfun))
1312 (*debug_hooks->early_global_decl) (cfun->decl);
1313
629b3d75
MJ
1314 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1315 if (exit_bb)
1316 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1317 if (e2)
1318 {
1319 basic_block dest_bb = e2->dest;
1320 if (!exit_bb)
1321 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1322 remove_edge (e2);
1323 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1324 }
1325 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1326 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1327 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1328 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1329
1330 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1331 num = vec_safe_length (child_cfun->local_decls);
1332 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1333 {
1334 t = (*child_cfun->local_decls)[srcidx];
1335 if (DECL_CONTEXT (t) == cfun->decl)
1336 continue;
1337 if (srcidx != dstidx)
1338 (*child_cfun->local_decls)[dstidx] = t;
1339 dstidx++;
1340 }
1341 if (dstidx != num)
1342 vec_safe_truncate (child_cfun->local_decls, dstidx);
1343
1344 /* Inform the callgraph about the new function. */
1345 child_cfun->curr_properties = cfun->curr_properties;
1346 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1347 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1348 cgraph_node *node = cgraph_node::get_create (child_fn);
1349 node->parallelized_function = 1;
1350 cgraph_node::add_new_function (child_fn, true);
1351
1352 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1353 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1354
1355 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1356 fixed in a following pass. */
1357 push_cfun (child_cfun);
1358 if (need_asm)
9579db35 1359 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1360
1361 if (optimize)
1362 optimize_omp_library_calls (entry_stmt);
1363 cgraph_edge::rebuild_edges ();
1364
1365 /* Some EH regions might become dead, see PR34608. If
1366 pass_cleanup_cfg isn't the first pass to happen with the
1367 new child, these dead EH edges might cause problems.
1368 Clean them up now. */
1369 if (flag_exceptions)
1370 {
1371 basic_block bb;
1372 bool changed = false;
1373
1374 FOR_EACH_BB_FN (bb, cfun)
1375 changed |= gimple_purge_dead_eh_edges (bb);
1376 if (changed)
1377 cleanup_tree_cfg ();
1378 }
1379 if (gimple_in_ssa_p (cfun))
1380 update_ssa (TODO_update_ssa);
1381 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1382 verify_loop_structure ();
1383 pop_cfun ();
1384
1385 if (dump_file && !gimple_in_ssa_p (cfun))
1386 {
1387 omp_any_child_fn_dumped = true;
1388 dump_function_header (dump_file, child_fn, dump_flags);
1389 dump_function_to_file (child_fn, dump_file, dump_flags);
1390 }
1391 }
1392
1393 /* Emit a library call to launch the children threads. */
1394 if (is_cilk_for)
1395 expand_cilk_for_call (new_bb,
1396 as_a <gomp_parallel *> (entry_stmt), ws_args);
1397 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1398 expand_parallel_call (region, new_bb,
1399 as_a <gomp_parallel *> (entry_stmt), ws_args);
1400 else
1401 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1402 if (gimple_in_ssa_p (cfun))
1403 update_ssa (TODO_update_ssa_only_virtuals);
1404}
1405
1406/* Information about members of an OpenACC collapsed loop nest. */
1407
1408struct oacc_collapse
1409{
01914336 1410 tree base; /* Base value. */
629b3d75 1411 tree iters; /* Number of steps. */
02889d23
CLT
1412 tree step; /* Step size. */
1413 tree tile; /* Tile increment (if tiled). */
1414 tree outer; /* Tile iterator var. */
629b3d75
MJ
1415};
1416
1417/* Helper for expand_oacc_for. Determine collapsed loop information.
1418 Fill in COUNTS array. Emit any initialization code before GSI.
1419 Return the calculated outer loop bound of BOUND_TYPE. */
1420
1421static tree
1422expand_oacc_collapse_init (const struct omp_for_data *fd,
1423 gimple_stmt_iterator *gsi,
02889d23
CLT
1424 oacc_collapse *counts, tree bound_type,
1425 location_t loc)
629b3d75 1426{
02889d23 1427 tree tiling = fd->tiling;
629b3d75
MJ
1428 tree total = build_int_cst (bound_type, 1);
1429 int ix;
1430
1431 gcc_assert (integer_onep (fd->loop.step));
1432 gcc_assert (integer_zerop (fd->loop.n1));
1433
02889d23
CLT
1434 /* When tiling, the first operand of the tile clause applies to the
1435 innermost loop, and we work outwards from there. Seems
1436 backwards, but whatever. */
1437 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1438 {
1439 const omp_for_data_loop *loop = &fd->loops[ix];
1440
1441 tree iter_type = TREE_TYPE (loop->v);
1442 tree diff_type = iter_type;
1443 tree plus_type = iter_type;
1444
1445 gcc_assert (loop->cond_code == fd->loop.cond_code);
1446
1447 if (POINTER_TYPE_P (iter_type))
1448 plus_type = sizetype;
1449 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1450 diff_type = signed_type_for (diff_type);
1451
02889d23
CLT
1452 if (tiling)
1453 {
1454 tree num = build_int_cst (integer_type_node, fd->collapse);
1455 tree loop_no = build_int_cst (integer_type_node, ix);
1456 tree tile = TREE_VALUE (tiling);
1457 gcall *call
1458 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1459 /* gwv-outer=*/integer_zero_node,
1460 /* gwv-inner=*/integer_zero_node);
1461
1462 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1463 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1464 gimple_call_set_lhs (call, counts[ix].tile);
1465 gimple_set_location (call, loc);
1466 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1467
1468 tiling = TREE_CHAIN (tiling);
1469 }
1470 else
1471 {
1472 counts[ix].tile = NULL;
1473 counts[ix].outer = loop->v;
1474 }
1475
629b3d75
MJ
1476 tree b = loop->n1;
1477 tree e = loop->n2;
1478 tree s = loop->step;
1479 bool up = loop->cond_code == LT_EXPR;
1480 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1481 bool negating;
1482 tree expr;
1483
1484 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1485 true, GSI_SAME_STMT);
1486 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1487 true, GSI_SAME_STMT);
1488
01914336 1489 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1490 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1491 if (negating)
1492 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1493 s = fold_convert (diff_type, s);
1494 if (negating)
1495 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1496 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1497 true, GSI_SAME_STMT);
1498
01914336 1499 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1500 negating = !up && TYPE_UNSIGNED (iter_type);
1501 expr = fold_build2 (MINUS_EXPR, plus_type,
1502 fold_convert (plus_type, negating ? b : e),
1503 fold_convert (plus_type, negating ? e : b));
1504 expr = fold_convert (diff_type, expr);
1505 if (negating)
1506 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1507 tree range = force_gimple_operand_gsi
1508 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1509
1510 /* Determine number of iterations. */
1511 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1512 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1513 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1514
1515 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1516 true, GSI_SAME_STMT);
1517
1518 counts[ix].base = b;
1519 counts[ix].iters = iters;
1520 counts[ix].step = s;
1521
1522 total = fold_build2 (MULT_EXPR, bound_type, total,
1523 fold_convert (bound_type, iters));
1524 }
1525
1526 return total;
1527}
1528
02889d23
CLT
1529/* Emit initializers for collapsed loop members. INNER is true if
1530 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1531 loop iteration variable, from which collapsed loop iteration values
1532 are calculated. COUNTS array has been initialized by
1533 expand_oacc_collapse_inits. */
1534
1535static void
02889d23 1536expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75
MJ
1537 gimple_stmt_iterator *gsi,
1538 const oacc_collapse *counts, tree ivar)
1539{
1540 tree ivar_type = TREE_TYPE (ivar);
1541
1542 /* The most rapidly changing iteration variable is the innermost
1543 one. */
1544 for (int ix = fd->collapse; ix--;)
1545 {
1546 const omp_for_data_loop *loop = &fd->loops[ix];
1547 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1548 tree v = inner ? loop->v : collapse->outer;
1549 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1550 tree diff_type = TREE_TYPE (collapse->step);
1551 tree plus_type = iter_type;
1552 enum tree_code plus_code = PLUS_EXPR;
1553 tree expr;
1554
1555 if (POINTER_TYPE_P (iter_type))
1556 {
1557 plus_code = POINTER_PLUS_EXPR;
1558 plus_type = sizetype;
1559 }
1560
02889d23
CLT
1561 expr = ivar;
1562 if (ix)
1563 {
1564 tree mod = fold_convert (ivar_type, collapse->iters);
1565 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1566 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1567 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1568 true, GSI_SAME_STMT);
1569 }
1570
629b3d75
MJ
1571 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1572 collapse->step);
02889d23
CLT
1573 expr = fold_build2 (plus_code, iter_type,
1574 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1575 fold_convert (plus_type, expr));
1576 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1577 true, GSI_SAME_STMT);
02889d23 1578 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1579 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1580 }
1581}
1582
1583/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1584 of the combined collapse > 1 loop constructs, generate code like:
1585 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1586 if (cond3 is <)
1587 adj = STEP3 - 1;
1588 else
1589 adj = STEP3 + 1;
1590 count3 = (adj + N32 - N31) / STEP3;
1591 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1592 if (cond2 is <)
1593 adj = STEP2 - 1;
1594 else
1595 adj = STEP2 + 1;
1596 count2 = (adj + N22 - N21) / STEP2;
1597 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1598 if (cond1 is <)
1599 adj = STEP1 - 1;
1600 else
1601 adj = STEP1 + 1;
1602 count1 = (adj + N12 - N11) / STEP1;
1603 count = count1 * count2 * count3;
1604 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1605 count = 0;
1606 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1607 of the combined loop constructs, just initialize COUNTS array
1608 from the _looptemp_ clauses. */
1609
1610/* NOTE: It *could* be better to moosh all of the BBs together,
1611 creating one larger BB with all the computation and the unexpected
1612 jump at the end. I.e.
1613
1614 bool zero3, zero2, zero1, zero;
1615
1616 zero3 = N32 c3 N31;
1617 count3 = (N32 - N31) /[cl] STEP3;
1618 zero2 = N22 c2 N21;
1619 count2 = (N22 - N21) /[cl] STEP2;
1620 zero1 = N12 c1 N11;
1621 count1 = (N12 - N11) /[cl] STEP1;
1622 zero = zero3 || zero2 || zero1;
1623 count = count1 * count2 * count3;
1624 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1625
1626 After all, we expect the zero=false, and thus we expect to have to
1627 evaluate all of the comparison expressions, so short-circuiting
1628 oughtn't be a win. Since the condition isn't protecting a
1629 denominator, we're not concerned about divide-by-zero, so we can
1630 fully evaluate count even if a numerator turned out to be wrong.
1631
1632 It seems like putting this all together would create much better
1633 scheduling opportunities, and less pressure on the chip's branch
1634 predictor. */
1635
1636static void
1637expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1638 basic_block &entry_bb, tree *counts,
1639 basic_block &zero_iter1_bb, int &first_zero_iter1,
1640 basic_block &zero_iter2_bb, int &first_zero_iter2,
1641 basic_block &l2_dom_bb)
1642{
1643 tree t, type = TREE_TYPE (fd->loop.v);
1644 edge e, ne;
1645 int i;
1646
1647 /* Collapsed loops need work for expansion into SSA form. */
1648 gcc_assert (!gimple_in_ssa_p (cfun));
1649
1650 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1651 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1652 {
1653 gcc_assert (fd->ordered == 0);
1654 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1655 isn't supposed to be handled, as the inner loop doesn't
1656 use it. */
1657 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1658 OMP_CLAUSE__LOOPTEMP_);
1659 gcc_assert (innerc);
1660 for (i = 0; i < fd->collapse; i++)
1661 {
1662 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1663 OMP_CLAUSE__LOOPTEMP_);
1664 gcc_assert (innerc);
1665 if (i)
1666 counts[i] = OMP_CLAUSE_DECL (innerc);
1667 else
1668 counts[0] = NULL_TREE;
1669 }
1670 return;
1671 }
1672
1673 for (i = fd->collapse; i < fd->ordered; i++)
1674 {
1675 tree itype = TREE_TYPE (fd->loops[i].v);
1676 counts[i] = NULL_TREE;
1677 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1678 fold_convert (itype, fd->loops[i].n1),
1679 fold_convert (itype, fd->loops[i].n2));
1680 if (t && integer_zerop (t))
1681 {
1682 for (i = fd->collapse; i < fd->ordered; i++)
1683 counts[i] = build_int_cst (type, 0);
1684 break;
1685 }
1686 }
1687 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1688 {
1689 tree itype = TREE_TYPE (fd->loops[i].v);
1690
1691 if (i >= fd->collapse && counts[i])
1692 continue;
1693 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1694 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1695 fold_convert (itype, fd->loops[i].n1),
1696 fold_convert (itype, fd->loops[i].n2)))
1697 == NULL_TREE || !integer_onep (t)))
1698 {
1699 gcond *cond_stmt;
1700 tree n1, n2;
1701 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1702 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1703 true, GSI_SAME_STMT);
1704 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1705 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1706 true, GSI_SAME_STMT);
1707 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1708 NULL_TREE, NULL_TREE);
1709 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1710 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1711 expand_omp_regimplify_p, NULL, NULL)
1712 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1713 expand_omp_regimplify_p, NULL, NULL))
1714 {
1715 *gsi = gsi_for_stmt (cond_stmt);
1716 gimple_regimplify_operands (cond_stmt, gsi);
1717 }
1718 e = split_block (entry_bb, cond_stmt);
1719 basic_block &zero_iter_bb
1720 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1721 int &first_zero_iter
1722 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1723 if (zero_iter_bb == NULL)
1724 {
1725 gassign *assign_stmt;
1726 first_zero_iter = i;
1727 zero_iter_bb = create_empty_bb (entry_bb);
1728 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1729 *gsi = gsi_after_labels (zero_iter_bb);
1730 if (i < fd->collapse)
1731 assign_stmt = gimple_build_assign (fd->loop.n2,
1732 build_zero_cst (type));
1733 else
1734 {
1735 counts[i] = create_tmp_reg (type, ".count");
1736 assign_stmt
1737 = gimple_build_assign (counts[i], build_zero_cst (type));
1738 }
1739 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1740 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1741 entry_bb);
1742 }
1743 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1744 ne->probability = profile_probability::very_unlikely ();
629b3d75 1745 e->flags = EDGE_TRUE_VALUE;
357067f2 1746 e->probability = ne->probability.invert ();
629b3d75
MJ
1747 if (l2_dom_bb == NULL)
1748 l2_dom_bb = entry_bb;
1749 entry_bb = e->dest;
1750 *gsi = gsi_last_bb (entry_bb);
1751 }
1752
1753 if (POINTER_TYPE_P (itype))
1754 itype = signed_type_for (itype);
1755 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1756 ? -1 : 1));
1757 t = fold_build2 (PLUS_EXPR, itype,
1758 fold_convert (itype, fd->loops[i].step), t);
1759 t = fold_build2 (PLUS_EXPR, itype, t,
1760 fold_convert (itype, fd->loops[i].n2));
1761 t = fold_build2 (MINUS_EXPR, itype, t,
1762 fold_convert (itype, fd->loops[i].n1));
1763 /* ?? We could probably use CEIL_DIV_EXPR instead of
1764 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1765 generate the same code in the end because generically we
1766 don't know that the values involved must be negative for
1767 GT?? */
1768 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1769 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1770 fold_build1 (NEGATE_EXPR, itype, t),
1771 fold_build1 (NEGATE_EXPR, itype,
1772 fold_convert (itype,
1773 fd->loops[i].step)));
1774 else
1775 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1776 fold_convert (itype, fd->loops[i].step));
1777 t = fold_convert (type, t);
1778 if (TREE_CODE (t) == INTEGER_CST)
1779 counts[i] = t;
1780 else
1781 {
1782 if (i < fd->collapse || i != first_zero_iter2)
1783 counts[i] = create_tmp_reg (type, ".count");
1784 expand_omp_build_assign (gsi, counts[i], t);
1785 }
1786 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1787 {
1788 if (i == 0)
1789 t = counts[0];
1790 else
1791 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1792 expand_omp_build_assign (gsi, fd->loop.n2, t);
1793 }
1794 }
1795}
1796
1797/* Helper function for expand_omp_{for_*,simd}. Generate code like:
1798 T = V;
1799 V3 = N31 + (T % count3) * STEP3;
1800 T = T / count3;
1801 V2 = N21 + (T % count2) * STEP2;
1802 T = T / count2;
1803 V1 = N11 + T * STEP1;
1804 if this loop doesn't have an inner loop construct combined with it.
1805 If it does have an inner loop construct combined with it and the
1806 iteration count isn't known constant, store values from counts array
1807 into its _looptemp_ temporaries instead. */
1808
1809static void
1810expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1811 tree *counts, gimple *inner_stmt, tree startvar)
1812{
1813 int i;
1814 if (gimple_omp_for_combined_p (fd->for_stmt))
1815 {
1816 /* If fd->loop.n2 is constant, then no propagation of the counts
1817 is needed, they are constant. */
1818 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1819 return;
1820
1821 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1822 ? gimple_omp_taskreg_clauses (inner_stmt)
1823 : gimple_omp_for_clauses (inner_stmt);
1824 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1825 isn't supposed to be handled, as the inner loop doesn't
1826 use it. */
1827 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1828 gcc_assert (innerc);
1829 for (i = 0; i < fd->collapse; i++)
1830 {
1831 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1832 OMP_CLAUSE__LOOPTEMP_);
1833 gcc_assert (innerc);
1834 if (i)
1835 {
1836 tree tem = OMP_CLAUSE_DECL (innerc);
1837 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1838 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1839 false, GSI_CONTINUE_LINKING);
1840 gassign *stmt = gimple_build_assign (tem, t);
1841 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1842 }
1843 }
1844 return;
1845 }
1846
1847 tree type = TREE_TYPE (fd->loop.v);
1848 tree tem = create_tmp_reg (type, ".tem");
1849 gassign *stmt = gimple_build_assign (tem, startvar);
1850 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1851
1852 for (i = fd->collapse - 1; i >= 0; i--)
1853 {
1854 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1855 itype = vtype;
1856 if (POINTER_TYPE_P (vtype))
1857 itype = signed_type_for (vtype);
1858 if (i != 0)
1859 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1860 else
1861 t = tem;
1862 t = fold_convert (itype, t);
1863 t = fold_build2 (MULT_EXPR, itype, t,
1864 fold_convert (itype, fd->loops[i].step));
1865 if (POINTER_TYPE_P (vtype))
1866 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1867 else
1868 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1869 t = force_gimple_operand_gsi (gsi, t,
1870 DECL_P (fd->loops[i].v)
1871 && TREE_ADDRESSABLE (fd->loops[i].v),
1872 NULL_TREE, false,
1873 GSI_CONTINUE_LINKING);
1874 stmt = gimple_build_assign (fd->loops[i].v, t);
1875 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1876 if (i != 0)
1877 {
1878 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1879 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1880 false, GSI_CONTINUE_LINKING);
1881 stmt = gimple_build_assign (tem, t);
1882 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1883 }
1884 }
1885}
1886
1887/* Helper function for expand_omp_for_*. Generate code like:
1888 L10:
1889 V3 += STEP3;
1890 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1891 L11:
1892 V3 = N31;
1893 V2 += STEP2;
1894 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1895 L12:
1896 V2 = N21;
1897 V1 += STEP1;
1898 goto BODY_BB; */
1899
1900static basic_block
1901extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1902 basic_block body_bb)
1903{
1904 basic_block last_bb, bb, collapse_bb = NULL;
1905 int i;
1906 gimple_stmt_iterator gsi;
1907 edge e;
1908 tree t;
1909 gimple *stmt;
1910
1911 last_bb = cont_bb;
1912 for (i = fd->collapse - 1; i >= 0; i--)
1913 {
1914 tree vtype = TREE_TYPE (fd->loops[i].v);
1915
1916 bb = create_empty_bb (last_bb);
1917 add_bb_to_loop (bb, last_bb->loop_father);
1918 gsi = gsi_start_bb (bb);
1919
1920 if (i < fd->collapse - 1)
1921 {
1922 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
357067f2 1923 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75
MJ
1924
1925 t = fd->loops[i + 1].n1;
1926 t = force_gimple_operand_gsi (&gsi, t,
1927 DECL_P (fd->loops[i + 1].v)
1928 && TREE_ADDRESSABLE (fd->loops[i
1929 + 1].v),
1930 NULL_TREE, false,
1931 GSI_CONTINUE_LINKING);
1932 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1933 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1934 }
1935 else
1936 collapse_bb = bb;
1937
1938 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1939
1940 if (POINTER_TYPE_P (vtype))
1941 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1942 else
1943 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1944 t = force_gimple_operand_gsi (&gsi, t,
1945 DECL_P (fd->loops[i].v)
1946 && TREE_ADDRESSABLE (fd->loops[i].v),
1947 NULL_TREE, false, GSI_CONTINUE_LINKING);
1948 stmt = gimple_build_assign (fd->loops[i].v, t);
1949 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1950
1951 if (i > 0)
1952 {
1953 t = fd->loops[i].n2;
1954 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1955 false, GSI_CONTINUE_LINKING);
1956 tree v = fd->loops[i].v;
1957 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1958 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1959 false, GSI_CONTINUE_LINKING);
1960 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1961 stmt = gimple_build_cond_empty (t);
1962 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1963 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 1964 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
1965 }
1966 else
1967 make_edge (bb, body_bb, EDGE_FALLTHRU);
1968 last_bb = bb;
1969 }
1970
1971 return collapse_bb;
1972}
1973
1974/* Expand #pragma omp ordered depend(source). */
1975
1976static void
1977expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1978 tree *counts, location_t loc)
1979{
1980 enum built_in_function source_ix
1981 = fd->iter_type == long_integer_type_node
1982 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1983 gimple *g
1984 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1985 build_fold_addr_expr (counts[fd->ordered]));
1986 gimple_set_location (g, loc);
1987 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1988}
1989
1990/* Expand a single depend from #pragma omp ordered depend(sink:...). */
1991
1992static void
1993expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1994 tree *counts, tree c, location_t loc)
1995{
1996 auto_vec<tree, 10> args;
1997 enum built_in_function sink_ix
1998 = fd->iter_type == long_integer_type_node
1999 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2000 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2001 int i;
2002 gimple_stmt_iterator gsi2 = *gsi;
2003 bool warned_step = false;
2004
2005 for (i = 0; i < fd->ordered; i++)
2006 {
2007 tree step = NULL_TREE;
2008 off = TREE_PURPOSE (deps);
2009 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2010 {
2011 step = TREE_OPERAND (off, 1);
2012 off = TREE_OPERAND (off, 0);
2013 }
2014 if (!integer_zerop (off))
2015 {
2016 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2017 || fd->loops[i].cond_code == GT_EXPR);
2018 bool forward = fd->loops[i].cond_code == LT_EXPR;
2019 if (step)
2020 {
2021 /* Non-simple Fortran DO loops. If step is variable,
2022 we don't know at compile even the direction, so can't
2023 warn. */
2024 if (TREE_CODE (step) != INTEGER_CST)
2025 break;
2026 forward = tree_int_cst_sgn (step) != -1;
2027 }
2028 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2029 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2030 "lexically later iteration");
2031 break;
2032 }
2033 deps = TREE_CHAIN (deps);
2034 }
2035 /* If all offsets corresponding to the collapsed loops are zero,
2036 this depend clause can be ignored. FIXME: but there is still a
2037 flush needed. We need to emit one __sync_synchronize () for it
2038 though (perhaps conditionally)? Solve this together with the
2039 conservative dependence folding optimization.
2040 if (i >= fd->collapse)
2041 return; */
2042
2043 deps = OMP_CLAUSE_DECL (c);
2044 gsi_prev (&gsi2);
2045 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2046 edge e2 = split_block_after_labels (e1->dest);
2047
2048 gsi2 = gsi_after_labels (e1->dest);
2049 *gsi = gsi_last_bb (e1->src);
2050 for (i = 0; i < fd->ordered; i++)
2051 {
2052 tree itype = TREE_TYPE (fd->loops[i].v);
2053 tree step = NULL_TREE;
2054 tree orig_off = NULL_TREE;
2055 if (POINTER_TYPE_P (itype))
2056 itype = sizetype;
2057 if (i)
2058 deps = TREE_CHAIN (deps);
2059 off = TREE_PURPOSE (deps);
2060 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2061 {
2062 step = TREE_OPERAND (off, 1);
2063 off = TREE_OPERAND (off, 0);
2064 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2065 && integer_onep (fd->loops[i].step)
2066 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2067 }
2068 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2069 if (step)
2070 {
2071 off = fold_convert_loc (loc, itype, off);
2072 orig_off = off;
2073 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2074 }
2075
2076 if (integer_zerop (off))
2077 t = boolean_true_node;
2078 else
2079 {
2080 tree a;
2081 tree co = fold_convert_loc (loc, itype, off);
2082 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2083 {
2084 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2085 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2086 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2087 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2088 co);
2089 }
2090 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2091 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2092 fd->loops[i].v, co);
2093 else
2094 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2095 fd->loops[i].v, co);
2096 if (step)
2097 {
2098 tree t1, t2;
2099 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2100 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2101 fd->loops[i].n1);
2102 else
2103 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2104 fd->loops[i].n2);
2105 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2106 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2107 fd->loops[i].n2);
2108 else
2109 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2110 fd->loops[i].n1);
2111 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2112 step, build_int_cst (TREE_TYPE (step), 0));
2113 if (TREE_CODE (step) != INTEGER_CST)
2114 {
2115 t1 = unshare_expr (t1);
2116 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2117 false, GSI_CONTINUE_LINKING);
2118 t2 = unshare_expr (t2);
2119 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2120 false, GSI_CONTINUE_LINKING);
2121 }
2122 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2123 t, t2, t1);
2124 }
2125 else if (fd->loops[i].cond_code == LT_EXPR)
2126 {
2127 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2128 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2129 fd->loops[i].n1);
2130 else
2131 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2132 fd->loops[i].n2);
2133 }
2134 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2135 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2136 fd->loops[i].n2);
2137 else
2138 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2139 fd->loops[i].n1);
2140 }
2141 if (cond)
2142 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2143 else
2144 cond = t;
2145
2146 off = fold_convert_loc (loc, itype, off);
2147
2148 if (step
2149 || (fd->loops[i].cond_code == LT_EXPR
2150 ? !integer_onep (fd->loops[i].step)
2151 : !integer_minus_onep (fd->loops[i].step)))
2152 {
2153 if (step == NULL_TREE
2154 && TYPE_UNSIGNED (itype)
2155 && fd->loops[i].cond_code == GT_EXPR)
2156 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2157 fold_build1_loc (loc, NEGATE_EXPR, itype,
2158 s));
2159 else
2160 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2161 orig_off ? orig_off : off, s);
2162 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2163 build_int_cst (itype, 0));
2164 if (integer_zerop (t) && !warned_step)
2165 {
2166 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2167 "in the iteration space");
2168 warned_step = true;
2169 }
2170 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2171 cond, t);
2172 }
2173
2174 if (i <= fd->collapse - 1 && fd->collapse > 1)
2175 t = fd->loop.v;
2176 else if (counts[i])
2177 t = counts[i];
2178 else
2179 {
2180 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2181 fd->loops[i].v, fd->loops[i].n1);
2182 t = fold_convert_loc (loc, fd->iter_type, t);
2183 }
2184 if (step)
2185 /* We have divided off by step already earlier. */;
2186 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2187 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2188 fold_build1_loc (loc, NEGATE_EXPR, itype,
2189 s));
2190 else
2191 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2192 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2193 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2194 off = fold_convert_loc (loc, fd->iter_type, off);
2195 if (i <= fd->collapse - 1 && fd->collapse > 1)
2196 {
2197 if (i)
2198 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2199 off);
2200 if (i < fd->collapse - 1)
2201 {
2202 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2203 counts[i]);
2204 continue;
2205 }
2206 }
2207 off = unshare_expr (off);
2208 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2209 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2210 true, GSI_SAME_STMT);
2211 args.safe_push (t);
2212 }
2213 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2214 gimple_set_location (g, loc);
2215 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2216
2217 cond = unshare_expr (cond);
2218 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2219 GSI_CONTINUE_LINKING);
2220 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2221 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
2222 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2223 e1->probability = e3->probability.invert ();
629b3d75
MJ
2224 e1->flags = EDGE_TRUE_VALUE;
2225 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2226
2227 *gsi = gsi_after_labels (e2->dest);
2228}
2229
2230/* Expand all #pragma omp ordered depend(source) and
2231 #pragma omp ordered depend(sink:...) constructs in the current
2232 #pragma omp for ordered(n) region. */
2233
2234static void
2235expand_omp_ordered_source_sink (struct omp_region *region,
2236 struct omp_for_data *fd, tree *counts,
2237 basic_block cont_bb)
2238{
2239 struct omp_region *inner;
2240 int i;
2241 for (i = fd->collapse - 1; i < fd->ordered; i++)
2242 if (i == fd->collapse - 1 && fd->collapse > 1)
2243 counts[i] = NULL_TREE;
2244 else if (i >= fd->collapse && !cont_bb)
2245 counts[i] = build_zero_cst (fd->iter_type);
2246 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2247 && integer_onep (fd->loops[i].step))
2248 counts[i] = NULL_TREE;
2249 else
2250 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2251 tree atype
2252 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2253 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2254 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2255
2256 for (inner = region->inner; inner; inner = inner->next)
2257 if (inner->type == GIMPLE_OMP_ORDERED)
2258 {
2259 gomp_ordered *ord_stmt = inner->ord_stmt;
2260 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2261 location_t loc = gimple_location (ord_stmt);
2262 tree c;
2263 for (c = gimple_omp_ordered_clauses (ord_stmt);
2264 c; c = OMP_CLAUSE_CHAIN (c))
2265 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2266 break;
2267 if (c)
2268 expand_omp_ordered_source (&gsi, fd, counts, loc);
2269 for (c = gimple_omp_ordered_clauses (ord_stmt);
2270 c; c = OMP_CLAUSE_CHAIN (c))
2271 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2272 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2273 gsi_remove (&gsi, true);
2274 }
2275}
2276
2277/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2278 collapsed. */
2279
2280static basic_block
2281expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2282 basic_block cont_bb, basic_block body_bb,
2283 bool ordered_lastprivate)
2284{
2285 if (fd->ordered == fd->collapse)
2286 return cont_bb;
2287
2288 if (!cont_bb)
2289 {
2290 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2291 for (int i = fd->collapse; i < fd->ordered; i++)
2292 {
2293 tree type = TREE_TYPE (fd->loops[i].v);
2294 tree n1 = fold_convert (type, fd->loops[i].n1);
2295 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2296 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2297 size_int (i - fd->collapse + 1),
2298 NULL_TREE, NULL_TREE);
2299 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2300 }
2301 return NULL;
2302 }
2303
2304 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2305 {
2306 tree t, type = TREE_TYPE (fd->loops[i].v);
2307 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2308 expand_omp_build_assign (&gsi, fd->loops[i].v,
2309 fold_convert (type, fd->loops[i].n1));
2310 if (counts[i])
2311 expand_omp_build_assign (&gsi, counts[i],
2312 build_zero_cst (fd->iter_type));
2313 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2314 size_int (i - fd->collapse + 1),
2315 NULL_TREE, NULL_TREE);
2316 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2317 if (!gsi_end_p (gsi))
2318 gsi_prev (&gsi);
2319 else
2320 gsi = gsi_last_bb (body_bb);
2321 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2322 basic_block new_body = e1->dest;
2323 if (body_bb == cont_bb)
2324 cont_bb = new_body;
2325 edge e2 = NULL;
2326 basic_block new_header;
2327 if (EDGE_COUNT (cont_bb->preds) > 0)
2328 {
2329 gsi = gsi_last_bb (cont_bb);
2330 if (POINTER_TYPE_P (type))
2331 t = fold_build_pointer_plus (fd->loops[i].v,
2332 fold_convert (sizetype,
2333 fd->loops[i].step));
2334 else
2335 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2336 fold_convert (type, fd->loops[i].step));
2337 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2338 if (counts[i])
2339 {
2340 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2341 build_int_cst (fd->iter_type, 1));
2342 expand_omp_build_assign (&gsi, counts[i], t);
2343 t = counts[i];
2344 }
2345 else
2346 {
2347 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2348 fd->loops[i].v, fd->loops[i].n1);
2349 t = fold_convert (fd->iter_type, t);
2350 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2351 true, GSI_SAME_STMT);
2352 }
2353 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2354 size_int (i - fd->collapse + 1),
2355 NULL_TREE, NULL_TREE);
2356 expand_omp_build_assign (&gsi, aref, t);
2357 gsi_prev (&gsi);
2358 e2 = split_block (cont_bb, gsi_stmt (gsi));
2359 new_header = e2->dest;
2360 }
2361 else
2362 new_header = cont_bb;
2363 gsi = gsi_after_labels (new_header);
2364 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2365 true, GSI_SAME_STMT);
2366 tree n2
2367 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2368 true, NULL_TREE, true, GSI_SAME_STMT);
2369 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2370 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2371 edge e3 = split_block (new_header, gsi_stmt (gsi));
2372 cont_bb = e3->dest;
2373 remove_edge (e1);
2374 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2375 e3->flags = EDGE_FALSE_VALUE;
357067f2 2376 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 2377 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 2378 e1->probability = e3->probability.invert ();
629b3d75
MJ
2379
2380 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2381 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2382
2383 if (e2)
2384 {
2385 struct loop *loop = alloc_loop ();
2386 loop->header = new_header;
2387 loop->latch = e2->src;
2388 add_loop (loop, body_bb->loop_father);
2389 }
2390 }
2391
2392 /* If there are any lastprivate clauses and it is possible some loops
2393 might have zero iterations, ensure all the decls are initialized,
2394 otherwise we could crash evaluating C++ class iterators with lastprivate
2395 clauses. */
2396 bool need_inits = false;
2397 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2398 if (need_inits)
2399 {
2400 tree type = TREE_TYPE (fd->loops[i].v);
2401 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2402 expand_omp_build_assign (&gsi, fd->loops[i].v,
2403 fold_convert (type, fd->loops[i].n1));
2404 }
2405 else
2406 {
2407 tree type = TREE_TYPE (fd->loops[i].v);
2408 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2409 boolean_type_node,
2410 fold_convert (type, fd->loops[i].n1),
2411 fold_convert (type, fd->loops[i].n2));
2412 if (!integer_onep (this_cond))
2413 need_inits = true;
2414 }
2415
2416 return cont_bb;
2417}
2418
2419/* A subroutine of expand_omp_for. Generate code for a parallel
2420 loop with any schedule. Given parameters:
2421
2422 for (V = N1; V cond N2; V += STEP) BODY;
2423
2424 where COND is "<" or ">", we generate pseudocode
2425
2426 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2427 if (more) goto L0; else goto L3;
2428 L0:
2429 V = istart0;
2430 iend = iend0;
2431 L1:
2432 BODY;
2433 V += STEP;
2434 if (V cond iend) goto L1; else goto L2;
2435 L2:
2436 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2437 L3:
2438
2439 If this is a combined omp parallel loop, instead of the call to
2440 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2441 If this is gimple_omp_for_combined_p loop, then instead of assigning
2442 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2443 inner GIMPLE_OMP_FOR and V += STEP; and
2444 if (V cond iend) goto L1; else goto L2; are removed.
2445
2446 For collapsed loops, given parameters:
2447 collapse(3)
2448 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2449 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2450 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2451 BODY;
2452
2453 we generate pseudocode
2454
2455 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2456 if (cond3 is <)
2457 adj = STEP3 - 1;
2458 else
2459 adj = STEP3 + 1;
2460 count3 = (adj + N32 - N31) / STEP3;
2461 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2462 if (cond2 is <)
2463 adj = STEP2 - 1;
2464 else
2465 adj = STEP2 + 1;
2466 count2 = (adj + N22 - N21) / STEP2;
2467 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2468 if (cond1 is <)
2469 adj = STEP1 - 1;
2470 else
2471 adj = STEP1 + 1;
2472 count1 = (adj + N12 - N11) / STEP1;
2473 count = count1 * count2 * count3;
2474 goto Z1;
2475 Z0:
2476 count = 0;
2477 Z1:
2478 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2479 if (more) goto L0; else goto L3;
2480 L0:
2481 V = istart0;
2482 T = V;
2483 V3 = N31 + (T % count3) * STEP3;
2484 T = T / count3;
2485 V2 = N21 + (T % count2) * STEP2;
2486 T = T / count2;
2487 V1 = N11 + T * STEP1;
2488 iend = iend0;
2489 L1:
2490 BODY;
2491 V += 1;
2492 if (V < iend) goto L10; else goto L2;
2493 L10:
2494 V3 += STEP3;
2495 if (V3 cond3 N32) goto L1; else goto L11;
2496 L11:
2497 V3 = N31;
2498 V2 += STEP2;
2499 if (V2 cond2 N22) goto L1; else goto L12;
2500 L12:
2501 V2 = N21;
2502 V1 += STEP1;
2503 goto L1;
2504 L2:
2505 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2506 L3:
2507
2508 */
2509
2510static void
2511expand_omp_for_generic (struct omp_region *region,
2512 struct omp_for_data *fd,
2513 enum built_in_function start_fn,
2514 enum built_in_function next_fn,
2515 gimple *inner_stmt)
2516{
2517 tree type, istart0, iend0, iend;
2518 tree t, vmain, vback, bias = NULL_TREE;
2519 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2520 basic_block l2_bb = NULL, l3_bb = NULL;
2521 gimple_stmt_iterator gsi;
2522 gassign *assign_stmt;
2523 bool in_combined_parallel = is_combined_parallel (region);
2524 bool broken_loop = region->cont == NULL;
2525 edge e, ne;
2526 tree *counts = NULL;
2527 int i;
2528 bool ordered_lastprivate = false;
2529
2530 gcc_assert (!broken_loop || !in_combined_parallel);
2531 gcc_assert (fd->iter_type == long_integer_type_node
2532 || !in_combined_parallel);
2533
2534 entry_bb = region->entry;
2535 cont_bb = region->cont;
2536 collapse_bb = NULL;
2537 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2538 gcc_assert (broken_loop
2539 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2540 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2541 l1_bb = single_succ (l0_bb);
2542 if (!broken_loop)
2543 {
2544 l2_bb = create_empty_bb (cont_bb);
2545 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2546 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2547 == l1_bb));
2548 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2549 }
2550 else
2551 l2_bb = NULL;
2552 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2553 exit_bb = region->exit;
2554
2555 gsi = gsi_last_bb (entry_bb);
2556
2557 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2558 if (fd->ordered
2559 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2560 OMP_CLAUSE_LASTPRIVATE))
2561 ordered_lastprivate = false;
2562 if (fd->collapse > 1 || fd->ordered)
2563 {
2564 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2565 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2566
2567 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2568 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2569 zero_iter1_bb, first_zero_iter1,
2570 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2571
2572 if (zero_iter1_bb)
2573 {
2574 /* Some counts[i] vars might be uninitialized if
2575 some loop has zero iterations. But the body shouldn't
2576 be executed in that case, so just avoid uninit warnings. */
2577 for (i = first_zero_iter1;
2578 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2579 if (SSA_VAR_P (counts[i]))
2580 TREE_NO_WARNING (counts[i]) = 1;
2581 gsi_prev (&gsi);
2582 e = split_block (entry_bb, gsi_stmt (gsi));
2583 entry_bb = e->dest;
2584 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2585 gsi = gsi_last_bb (entry_bb);
2586 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2587 get_immediate_dominator (CDI_DOMINATORS,
2588 zero_iter1_bb));
2589 }
2590 if (zero_iter2_bb)
2591 {
2592 /* Some counts[i] vars might be uninitialized if
2593 some loop has zero iterations. But the body shouldn't
2594 be executed in that case, so just avoid uninit warnings. */
2595 for (i = first_zero_iter2; i < fd->ordered; i++)
2596 if (SSA_VAR_P (counts[i]))
2597 TREE_NO_WARNING (counts[i]) = 1;
2598 if (zero_iter1_bb)
2599 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2600 else
2601 {
2602 gsi_prev (&gsi);
2603 e = split_block (entry_bb, gsi_stmt (gsi));
2604 entry_bb = e->dest;
2605 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2606 gsi = gsi_last_bb (entry_bb);
2607 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2608 get_immediate_dominator
2609 (CDI_DOMINATORS, zero_iter2_bb));
2610 }
2611 }
2612 if (fd->collapse == 1)
2613 {
2614 counts[0] = fd->loop.n2;
2615 fd->loop = fd->loops[0];
2616 }
2617 }
2618
2619 type = TREE_TYPE (fd->loop.v);
2620 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2621 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2622 TREE_ADDRESSABLE (istart0) = 1;
2623 TREE_ADDRESSABLE (iend0) = 1;
2624
2625 /* See if we need to bias by LLONG_MIN. */
2626 if (fd->iter_type == long_long_unsigned_type_node
2627 && TREE_CODE (type) == INTEGER_TYPE
2628 && !TYPE_UNSIGNED (type)
2629 && fd->ordered == 0)
2630 {
2631 tree n1, n2;
2632
2633 if (fd->loop.cond_code == LT_EXPR)
2634 {
2635 n1 = fd->loop.n1;
2636 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2637 }
2638 else
2639 {
2640 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2641 n2 = fd->loop.n1;
2642 }
2643 if (TREE_CODE (n1) != INTEGER_CST
2644 || TREE_CODE (n2) != INTEGER_CST
2645 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2646 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2647 }
2648
2649 gimple_stmt_iterator gsif = gsi;
2650 gsi_prev (&gsif);
2651
2652 tree arr = NULL_TREE;
2653 if (in_combined_parallel)
2654 {
2655 gcc_assert (fd->ordered == 0);
2656 /* In a combined parallel loop, emit a call to
2657 GOMP_loop_foo_next. */
2658 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2659 build_fold_addr_expr (istart0),
2660 build_fold_addr_expr (iend0));
2661 }
2662 else
2663 {
2664 tree t0, t1, t2, t3, t4;
2665 /* If this is not a combined parallel loop, emit a call to
2666 GOMP_loop_foo_start in ENTRY_BB. */
2667 t4 = build_fold_addr_expr (iend0);
2668 t3 = build_fold_addr_expr (istart0);
2669 if (fd->ordered)
2670 {
2671 t0 = build_int_cst (unsigned_type_node,
2672 fd->ordered - fd->collapse + 1);
2673 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2674 fd->ordered
2675 - fd->collapse + 1),
2676 ".omp_counts");
2677 DECL_NAMELESS (arr) = 1;
2678 TREE_ADDRESSABLE (arr) = 1;
2679 TREE_STATIC (arr) = 1;
2680 vec<constructor_elt, va_gc> *v;
2681 vec_alloc (v, fd->ordered - fd->collapse + 1);
2682 int idx;
2683
2684 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2685 {
2686 tree c;
2687 if (idx == 0 && fd->collapse > 1)
2688 c = fd->loop.n2;
2689 else
2690 c = counts[idx + fd->collapse - 1];
2691 tree purpose = size_int (idx);
2692 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2693 if (TREE_CODE (c) != INTEGER_CST)
2694 TREE_STATIC (arr) = 0;
2695 }
2696
2697 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2698 if (!TREE_STATIC (arr))
2699 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2700 void_type_node, arr),
2701 true, NULL_TREE, true, GSI_SAME_STMT);
2702 t1 = build_fold_addr_expr (arr);
2703 t2 = NULL_TREE;
2704 }
2705 else
2706 {
2707 t2 = fold_convert (fd->iter_type, fd->loop.step);
2708 t1 = fd->loop.n2;
2709 t0 = fd->loop.n1;
2710 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2711 {
2712 tree innerc
2713 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2714 OMP_CLAUSE__LOOPTEMP_);
2715 gcc_assert (innerc);
2716 t0 = OMP_CLAUSE_DECL (innerc);
2717 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2718 OMP_CLAUSE__LOOPTEMP_);
2719 gcc_assert (innerc);
2720 t1 = OMP_CLAUSE_DECL (innerc);
2721 }
2722 if (POINTER_TYPE_P (TREE_TYPE (t0))
2723 && TYPE_PRECISION (TREE_TYPE (t0))
2724 != TYPE_PRECISION (fd->iter_type))
2725 {
2726 /* Avoid casting pointers to integer of a different size. */
2727 tree itype = signed_type_for (type);
2728 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2729 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2730 }
2731 else
2732 {
2733 t1 = fold_convert (fd->iter_type, t1);
2734 t0 = fold_convert (fd->iter_type, t0);
2735 }
2736 if (bias)
2737 {
2738 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2739 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2740 }
2741 }
2742 if (fd->iter_type == long_integer_type_node || fd->ordered)
2743 {
2744 if (fd->chunk_size)
2745 {
2746 t = fold_convert (fd->iter_type, fd->chunk_size);
2747 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2748 if (fd->ordered)
2749 t = build_call_expr (builtin_decl_explicit (start_fn),
2750 5, t0, t1, t, t3, t4);
2751 else
2752 t = build_call_expr (builtin_decl_explicit (start_fn),
2753 6, t0, t1, t2, t, t3, t4);
2754 }
2755 else if (fd->ordered)
2756 t = build_call_expr (builtin_decl_explicit (start_fn),
2757 4, t0, t1, t3, t4);
2758 else
2759 t = build_call_expr (builtin_decl_explicit (start_fn),
2760 5, t0, t1, t2, t3, t4);
2761 }
2762 else
2763 {
2764 tree t5;
2765 tree c_bool_type;
2766 tree bfn_decl;
2767
2768 /* The GOMP_loop_ull_*start functions have additional boolean
2769 argument, true for < loops and false for > loops.
2770 In Fortran, the C bool type can be different from
2771 boolean_type_node. */
2772 bfn_decl = builtin_decl_explicit (start_fn);
2773 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2774 t5 = build_int_cst (c_bool_type,
2775 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2776 if (fd->chunk_size)
2777 {
2778 tree bfn_decl = builtin_decl_explicit (start_fn);
2779 t = fold_convert (fd->iter_type, fd->chunk_size);
2780 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2781 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2782 }
2783 else
2784 t = build_call_expr (builtin_decl_explicit (start_fn),
2785 6, t5, t0, t1, t2, t3, t4);
2786 }
2787 }
2788 if (TREE_TYPE (t) != boolean_type_node)
2789 t = fold_build2 (NE_EXPR, boolean_type_node,
2790 t, build_int_cst (TREE_TYPE (t), 0));
2791 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 2792 true, GSI_SAME_STMT);
629b3d75
MJ
2793 if (arr && !TREE_STATIC (arr))
2794 {
2795 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2796 TREE_THIS_VOLATILE (clobber) = 1;
2797 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2798 GSI_SAME_STMT);
2799 }
2800 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2801
2802 /* Remove the GIMPLE_OMP_FOR statement. */
2803 gsi_remove (&gsi, true);
2804
2805 if (gsi_end_p (gsif))
2806 gsif = gsi_after_labels (gsi_bb (gsif));
2807 gsi_next (&gsif);
2808
2809 /* Iteration setup for sequential loop goes in L0_BB. */
2810 tree startvar = fd->loop.v;
2811 tree endvar = NULL_TREE;
2812
2813 if (gimple_omp_for_combined_p (fd->for_stmt))
2814 {
2815 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2816 && gimple_omp_for_kind (inner_stmt)
2817 == GF_OMP_FOR_KIND_SIMD);
2818 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2819 OMP_CLAUSE__LOOPTEMP_);
2820 gcc_assert (innerc);
2821 startvar = OMP_CLAUSE_DECL (innerc);
2822 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2823 OMP_CLAUSE__LOOPTEMP_);
2824 gcc_assert (innerc);
2825 endvar = OMP_CLAUSE_DECL (innerc);
2826 }
2827
2828 gsi = gsi_start_bb (l0_bb);
2829 t = istart0;
2830 if (fd->ordered && fd->collapse == 1)
2831 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2832 fold_convert (fd->iter_type, fd->loop.step));
2833 else if (bias)
2834 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2835 if (fd->ordered && fd->collapse == 1)
2836 {
2837 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2838 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2839 fd->loop.n1, fold_convert (sizetype, t));
2840 else
2841 {
2842 t = fold_convert (TREE_TYPE (startvar), t);
2843 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2844 fd->loop.n1, t);
2845 }
2846 }
2847 else
2848 {
2849 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2850 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2851 t = fold_convert (TREE_TYPE (startvar), t);
2852 }
2853 t = force_gimple_operand_gsi (&gsi, t,
2854 DECL_P (startvar)
2855 && TREE_ADDRESSABLE (startvar),
2856 NULL_TREE, false, GSI_CONTINUE_LINKING);
2857 assign_stmt = gimple_build_assign (startvar, t);
2858 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2859
2860 t = iend0;
2861 if (fd->ordered && fd->collapse == 1)
2862 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2863 fold_convert (fd->iter_type, fd->loop.step));
2864 else if (bias)
2865 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2866 if (fd->ordered && fd->collapse == 1)
2867 {
2868 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2869 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2870 fd->loop.n1, fold_convert (sizetype, t));
2871 else
2872 {
2873 t = fold_convert (TREE_TYPE (startvar), t);
2874 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2875 fd->loop.n1, t);
2876 }
2877 }
2878 else
2879 {
2880 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2881 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2882 t = fold_convert (TREE_TYPE (startvar), t);
2883 }
2884 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2885 false, GSI_CONTINUE_LINKING);
2886 if (endvar)
2887 {
2888 assign_stmt = gimple_build_assign (endvar, iend);
2889 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2890 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2891 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2892 else
2893 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2894 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2895 }
2896 /* Handle linear clause adjustments. */
2897 tree itercnt = NULL_TREE;
2898 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2899 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2900 c; c = OMP_CLAUSE_CHAIN (c))
2901 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2902 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2903 {
2904 tree d = OMP_CLAUSE_DECL (c);
2905 bool is_ref = omp_is_reference (d);
2906 tree t = d, a, dest;
2907 if (is_ref)
2908 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2909 tree type = TREE_TYPE (t);
2910 if (POINTER_TYPE_P (type))
2911 type = sizetype;
2912 dest = unshare_expr (t);
2913 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2914 expand_omp_build_assign (&gsif, v, t);
2915 if (itercnt == NULL_TREE)
2916 {
2917 itercnt = startvar;
2918 tree n1 = fd->loop.n1;
2919 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2920 {
2921 itercnt
2922 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2923 itercnt);
2924 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2925 }
2926 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2927 itercnt, n1);
2928 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2929 itercnt, fd->loop.step);
2930 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2931 NULL_TREE, false,
2932 GSI_CONTINUE_LINKING);
2933 }
2934 a = fold_build2 (MULT_EXPR, type,
2935 fold_convert (type, itercnt),
2936 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2937 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2938 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2939 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2940 false, GSI_CONTINUE_LINKING);
2941 assign_stmt = gimple_build_assign (dest, t);
2942 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2943 }
2944 if (fd->collapse > 1)
2945 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2946
2947 if (fd->ordered)
2948 {
2949 /* Until now, counts array contained number of iterations or
2950 variable containing it for ith loop. From now on, we need
2951 those counts only for collapsed loops, and only for the 2nd
2952 till the last collapsed one. Move those one element earlier,
2953 we'll use counts[fd->collapse - 1] for the first source/sink
2954 iteration counter and so on and counts[fd->ordered]
2955 as the array holding the current counter values for
2956 depend(source). */
2957 if (fd->collapse > 1)
2958 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2959 if (broken_loop)
2960 {
2961 int i;
2962 for (i = fd->collapse; i < fd->ordered; i++)
2963 {
2964 tree type = TREE_TYPE (fd->loops[i].v);
2965 tree this_cond
2966 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2967 fold_convert (type, fd->loops[i].n1),
2968 fold_convert (type, fd->loops[i].n2));
2969 if (!integer_onep (this_cond))
2970 break;
2971 }
2972 if (i < fd->ordered)
2973 {
2974 cont_bb
2975 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2976 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2977 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2978 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2979 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2980 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2981 make_edge (cont_bb, l1_bb, 0);
2982 l2_bb = create_empty_bb (cont_bb);
2983 broken_loop = false;
2984 }
2985 }
2986 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2987 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2988 ordered_lastprivate);
2989 if (counts[fd->collapse - 1])
2990 {
2991 gcc_assert (fd->collapse == 1);
2992 gsi = gsi_last_bb (l0_bb);
2993 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2994 istart0, true);
2995 gsi = gsi_last_bb (cont_bb);
2996 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2997 build_int_cst (fd->iter_type, 1));
2998 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2999 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3000 size_zero_node, NULL_TREE, NULL_TREE);
3001 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3002 t = counts[fd->collapse - 1];
3003 }
3004 else if (fd->collapse > 1)
3005 t = fd->loop.v;
3006 else
3007 {
3008 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3009 fd->loops[0].v, fd->loops[0].n1);
3010 t = fold_convert (fd->iter_type, t);
3011 }
3012 gsi = gsi_last_bb (l0_bb);
3013 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3014 size_zero_node, NULL_TREE, NULL_TREE);
3015 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3016 false, GSI_CONTINUE_LINKING);
3017 expand_omp_build_assign (&gsi, aref, t, true);
3018 }
3019
3020 if (!broken_loop)
3021 {
3022 /* Code to control the increment and predicate for the sequential
3023 loop goes in the CONT_BB. */
3024 gsi = gsi_last_bb (cont_bb);
3025 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3026 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3027 vmain = gimple_omp_continue_control_use (cont_stmt);
3028 vback = gimple_omp_continue_control_def (cont_stmt);
3029
3030 if (!gimple_omp_for_combined_p (fd->for_stmt))
3031 {
3032 if (POINTER_TYPE_P (type))
3033 t = fold_build_pointer_plus (vmain, fd->loop.step);
3034 else
3035 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3036 t = force_gimple_operand_gsi (&gsi, t,
3037 DECL_P (vback)
3038 && TREE_ADDRESSABLE (vback),
3039 NULL_TREE, true, GSI_SAME_STMT);
3040 assign_stmt = gimple_build_assign (vback, t);
3041 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3042
3043 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3044 {
3045 if (fd->collapse > 1)
3046 t = fd->loop.v;
3047 else
3048 {
3049 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3050 fd->loops[0].v, fd->loops[0].n1);
3051 t = fold_convert (fd->iter_type, t);
3052 }
3053 tree aref = build4 (ARRAY_REF, fd->iter_type,
3054 counts[fd->ordered], size_zero_node,
3055 NULL_TREE, NULL_TREE);
3056 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3057 true, GSI_SAME_STMT);
3058 expand_omp_build_assign (&gsi, aref, t);
3059 }
3060
3061 t = build2 (fd->loop.cond_code, boolean_type_node,
3062 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3063 iend);
3064 gcond *cond_stmt = gimple_build_cond_empty (t);
3065 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3066 }
3067
3068 /* Remove GIMPLE_OMP_CONTINUE. */
3069 gsi_remove (&gsi, true);
3070
3071 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3072 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3073
3074 /* Emit code to get the next parallel iteration in L2_BB. */
3075 gsi = gsi_start_bb (l2_bb);
3076
3077 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3078 build_fold_addr_expr (istart0),
3079 build_fold_addr_expr (iend0));
3080 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3081 false, GSI_CONTINUE_LINKING);
3082 if (TREE_TYPE (t) != boolean_type_node)
3083 t = fold_build2 (NE_EXPR, boolean_type_node,
3084 t, build_int_cst (TREE_TYPE (t), 0));
3085 gcond *cond_stmt = gimple_build_cond_empty (t);
3086 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3087 }
3088
3089 /* Add the loop cleanup function. */
3090 gsi = gsi_last_bb (exit_bb);
3091 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3092 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3093 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3094 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3095 else
3096 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3097 gcall *call_stmt = gimple_build_call (t, 0);
3098 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3099 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3100 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3101 if (fd->ordered)
3102 {
3103 tree arr = counts[fd->ordered];
3104 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3105 TREE_THIS_VOLATILE (clobber) = 1;
3106 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3107 GSI_SAME_STMT);
3108 }
3109 gsi_remove (&gsi, true);
3110
3111 /* Connect the new blocks. */
3112 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3113 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3114
3115 if (!broken_loop)
3116 {
3117 gimple_seq phis;
3118
3119 e = find_edge (cont_bb, l3_bb);
3120 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3121
3122 phis = phi_nodes (l3_bb);
3123 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3124 {
3125 gimple *phi = gsi_stmt (gsi);
3126 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3127 PHI_ARG_DEF_FROM_EDGE (phi, e));
3128 }
3129 remove_edge (e);
3130
3131 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3132 e = find_edge (cont_bb, l1_bb);
3133 if (e == NULL)
3134 {
3135 e = BRANCH_EDGE (cont_bb);
3136 gcc_assert (single_succ (e->dest) == l1_bb);
3137 }
3138 if (gimple_omp_for_combined_p (fd->for_stmt))
3139 {
3140 remove_edge (e);
3141 e = NULL;
3142 }
3143 else if (fd->collapse > 1)
3144 {
3145 remove_edge (e);
3146 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3147 }
3148 else
3149 e->flags = EDGE_TRUE_VALUE;
3150 if (e)
3151 {
357067f2
JH
3152 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3153 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
3154 }
3155 else
3156 {
3157 e = find_edge (cont_bb, l2_bb);
3158 e->flags = EDGE_FALLTHRU;
3159 }
3160 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3161
3162 if (gimple_in_ssa_p (cfun))
3163 {
3164 /* Add phis to the outer loop that connect to the phis in the inner,
3165 original loop, and move the loop entry value of the inner phi to
3166 the loop entry value of the outer phi. */
3167 gphi_iterator psi;
3168 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3169 {
3170 source_location locus;
3171 gphi *nphi;
3172 gphi *exit_phi = psi.phi ();
3173
3174 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3175 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3176
3177 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3178 edge latch_to_l1 = find_edge (latch, l1_bb);
3179 gphi *inner_phi
3180 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3181
3182 tree t = gimple_phi_result (exit_phi);
3183 tree new_res = copy_ssa_name (t, NULL);
3184 nphi = create_phi_node (new_res, l0_bb);
3185
3186 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3187 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3188 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3189 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3190 add_phi_arg (nphi, t, entry_to_l0, locus);
3191
3192 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3193 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3194
3195 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3196 };
3197 }
3198
3199 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3200 recompute_dominator (CDI_DOMINATORS, l2_bb));
3201 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3202 recompute_dominator (CDI_DOMINATORS, l3_bb));
3203 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3204 recompute_dominator (CDI_DOMINATORS, l0_bb));
3205 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3206 recompute_dominator (CDI_DOMINATORS, l1_bb));
3207
3208 /* We enter expand_omp_for_generic with a loop. This original loop may
3209 have its own loop struct, or it may be part of an outer loop struct
3210 (which may be the fake loop). */
3211 struct loop *outer_loop = entry_bb->loop_father;
3212 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3213
3214 add_bb_to_loop (l2_bb, outer_loop);
3215
3216 /* We've added a new loop around the original loop. Allocate the
3217 corresponding loop struct. */
3218 struct loop *new_loop = alloc_loop ();
3219 new_loop->header = l0_bb;
3220 new_loop->latch = l2_bb;
3221 add_loop (new_loop, outer_loop);
3222
3223 /* Allocate a loop structure for the original loop unless we already
3224 had one. */
3225 if (!orig_loop_has_loop_struct
3226 && !gimple_omp_for_combined_p (fd->for_stmt))
3227 {
3228 struct loop *orig_loop = alloc_loop ();
3229 orig_loop->header = l1_bb;
3230 /* The loop may have multiple latches. */
3231 add_loop (orig_loop, new_loop);
3232 }
3233 }
3234}
3235
3236/* A subroutine of expand_omp_for. Generate code for a parallel
3237 loop with static schedule and no specified chunk size. Given
3238 parameters:
3239
3240 for (V = N1; V cond N2; V += STEP) BODY;
3241
3242 where COND is "<" or ">", we generate pseudocode
3243
3244 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3245 if (cond is <)
3246 adj = STEP - 1;
3247 else
3248 adj = STEP + 1;
3249 if ((__typeof (V)) -1 > 0 && cond is >)
3250 n = -(adj + N2 - N1) / -STEP;
3251 else
3252 n = (adj + N2 - N1) / STEP;
3253 q = n / nthreads;
3254 tt = n % nthreads;
3255 if (threadid < tt) goto L3; else goto L4;
3256 L3:
3257 tt = 0;
3258 q = q + 1;
3259 L4:
3260 s0 = q * threadid + tt;
3261 e0 = s0 + q;
3262 V = s0 * STEP + N1;
3263 if (s0 >= e0) goto L2; else goto L0;
3264 L0:
3265 e = e0 * STEP + N1;
3266 L1:
3267 BODY;
3268 V += STEP;
3269 if (V cond e) goto L1;
3270 L2:
3271*/
3272
3273static void
3274expand_omp_for_static_nochunk (struct omp_region *region,
3275 struct omp_for_data *fd,
3276 gimple *inner_stmt)
3277{
3278 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3279 tree type, itype, vmain, vback;
3280 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3281 basic_block body_bb, cont_bb, collapse_bb = NULL;
3282 basic_block fin_bb;
3283 gimple_stmt_iterator gsi;
3284 edge ep;
3285 bool broken_loop = region->cont == NULL;
3286 tree *counts = NULL;
3287 tree n1, n2, step;
3288
3289 itype = type = TREE_TYPE (fd->loop.v);
3290 if (POINTER_TYPE_P (type))
3291 itype = signed_type_for (type);
3292
3293 entry_bb = region->entry;
3294 cont_bb = region->cont;
3295 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3296 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3297 gcc_assert (broken_loop
3298 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3299 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3300 body_bb = single_succ (seq_start_bb);
3301 if (!broken_loop)
3302 {
3303 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3304 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3305 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3306 }
3307 exit_bb = region->exit;
3308
3309 /* Iteration space partitioning goes in ENTRY_BB. */
3310 gsi = gsi_last_bb (entry_bb);
3311 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3312
3313 if (fd->collapse > 1)
3314 {
3315 int first_zero_iter = -1, dummy = -1;
3316 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3317
3318 counts = XALLOCAVEC (tree, fd->collapse);
3319 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3320 fin_bb, first_zero_iter,
3321 dummy_bb, dummy, l2_dom_bb);
3322 t = NULL_TREE;
3323 }
3324 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3325 t = integer_one_node;
3326 else
3327 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3328 fold_convert (type, fd->loop.n1),
3329 fold_convert (type, fd->loop.n2));
3330 if (fd->collapse == 1
3331 && TYPE_UNSIGNED (type)
3332 && (t == NULL_TREE || !integer_onep (t)))
3333 {
3334 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3335 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3336 true, GSI_SAME_STMT);
3337 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3338 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3339 true, GSI_SAME_STMT);
3340 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3341 NULL_TREE, NULL_TREE);
3342 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3343 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3344 expand_omp_regimplify_p, NULL, NULL)
3345 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3346 expand_omp_regimplify_p, NULL, NULL))
3347 {
3348 gsi = gsi_for_stmt (cond_stmt);
3349 gimple_regimplify_operands (cond_stmt, &gsi);
3350 }
3351 ep = split_block (entry_bb, cond_stmt);
3352 ep->flags = EDGE_TRUE_VALUE;
3353 entry_bb = ep->dest;
357067f2 3354 ep->probability = profile_probability::very_likely ();
629b3d75 3355 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 3356 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
3357 if (gimple_in_ssa_p (cfun))
3358 {
3359 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3360 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3361 !gsi_end_p (gpi); gsi_next (&gpi))
3362 {
3363 gphi *phi = gpi.phi ();
3364 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3365 ep, UNKNOWN_LOCATION);
3366 }
3367 }
3368 gsi = gsi_last_bb (entry_bb);
3369 }
3370
3371 switch (gimple_omp_for_kind (fd->for_stmt))
3372 {
3373 case GF_OMP_FOR_KIND_FOR:
3374 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3375 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3376 break;
3377 case GF_OMP_FOR_KIND_DISTRIBUTE:
3378 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3379 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3380 break;
3381 default:
3382 gcc_unreachable ();
3383 }
3384 nthreads = build_call_expr (nthreads, 0);
3385 nthreads = fold_convert (itype, nthreads);
3386 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3387 true, GSI_SAME_STMT);
3388 threadid = build_call_expr (threadid, 0);
3389 threadid = fold_convert (itype, threadid);
3390 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3391 true, GSI_SAME_STMT);
3392
3393 n1 = fd->loop.n1;
3394 n2 = fd->loop.n2;
3395 step = fd->loop.step;
3396 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3397 {
3398 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3399 OMP_CLAUSE__LOOPTEMP_);
3400 gcc_assert (innerc);
3401 n1 = OMP_CLAUSE_DECL (innerc);
3402 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3403 OMP_CLAUSE__LOOPTEMP_);
3404 gcc_assert (innerc);
3405 n2 = OMP_CLAUSE_DECL (innerc);
3406 }
3407 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3408 true, NULL_TREE, true, GSI_SAME_STMT);
3409 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3410 true, NULL_TREE, true, GSI_SAME_STMT);
3411 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3412 true, NULL_TREE, true, GSI_SAME_STMT);
3413
3414 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3415 t = fold_build2 (PLUS_EXPR, itype, step, t);
3416 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3417 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3418 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3419 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3420 fold_build1 (NEGATE_EXPR, itype, t),
3421 fold_build1 (NEGATE_EXPR, itype, step));
3422 else
3423 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3424 t = fold_convert (itype, t);
3425 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3426
3427 q = create_tmp_reg (itype, "q");
3428 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3429 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3430 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3431
3432 tt = create_tmp_reg (itype, "tt");
3433 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3434 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3435 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3436
3437 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3438 gcond *cond_stmt = gimple_build_cond_empty (t);
3439 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3440
3441 second_bb = split_block (entry_bb, cond_stmt)->dest;
3442 gsi = gsi_last_bb (second_bb);
3443 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3444
3445 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3446 GSI_SAME_STMT);
3447 gassign *assign_stmt
3448 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3449 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3450
3451 third_bb = split_block (second_bb, assign_stmt)->dest;
3452 gsi = gsi_last_bb (third_bb);
3453 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3454
3455 t = build2 (MULT_EXPR, itype, q, threadid);
3456 t = build2 (PLUS_EXPR, itype, t, tt);
3457 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3458
3459 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3460 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3461
3462 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3463 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3464
3465 /* Remove the GIMPLE_OMP_FOR statement. */
3466 gsi_remove (&gsi, true);
3467
3468 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3469 gsi = gsi_start_bb (seq_start_bb);
3470
3471 tree startvar = fd->loop.v;
3472 tree endvar = NULL_TREE;
3473
3474 if (gimple_omp_for_combined_p (fd->for_stmt))
3475 {
3476 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3477 ? gimple_omp_parallel_clauses (inner_stmt)
3478 : gimple_omp_for_clauses (inner_stmt);
3479 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3480 gcc_assert (innerc);
3481 startvar = OMP_CLAUSE_DECL (innerc);
3482 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3483 OMP_CLAUSE__LOOPTEMP_);
3484 gcc_assert (innerc);
3485 endvar = OMP_CLAUSE_DECL (innerc);
3486 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3487 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3488 {
3489 int i;
3490 for (i = 1; i < fd->collapse; i++)
3491 {
3492 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3493 OMP_CLAUSE__LOOPTEMP_);
3494 gcc_assert (innerc);
3495 }
3496 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3497 OMP_CLAUSE__LOOPTEMP_);
3498 if (innerc)
3499 {
3500 /* If needed (distribute parallel for with lastprivate),
3501 propagate down the total number of iterations. */
3502 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3503 fd->loop.n2);
3504 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3505 GSI_CONTINUE_LINKING);
3506 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3507 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3508 }
3509 }
3510 }
3511 t = fold_convert (itype, s0);
3512 t = fold_build2 (MULT_EXPR, itype, t, step);
3513 if (POINTER_TYPE_P (type))
3514 t = fold_build_pointer_plus (n1, t);
3515 else
3516 t = fold_build2 (PLUS_EXPR, type, t, n1);
3517 t = fold_convert (TREE_TYPE (startvar), t);
3518 t = force_gimple_operand_gsi (&gsi, t,
3519 DECL_P (startvar)
3520 && TREE_ADDRESSABLE (startvar),
3521 NULL_TREE, false, GSI_CONTINUE_LINKING);
3522 assign_stmt = gimple_build_assign (startvar, t);
3523 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3524
3525 t = fold_convert (itype, e0);
3526 t = fold_build2 (MULT_EXPR, itype, t, step);
3527 if (POINTER_TYPE_P (type))
3528 t = fold_build_pointer_plus (n1, t);
3529 else
3530 t = fold_build2 (PLUS_EXPR, type, t, n1);
3531 t = fold_convert (TREE_TYPE (startvar), t);
3532 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3533 false, GSI_CONTINUE_LINKING);
3534 if (endvar)
3535 {
3536 assign_stmt = gimple_build_assign (endvar, e);
3537 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3538 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3539 assign_stmt = gimple_build_assign (fd->loop.v, e);
3540 else
3541 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3542 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3543 }
3544 /* Handle linear clause adjustments. */
3545 tree itercnt = NULL_TREE;
3546 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3547 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3548 c; c = OMP_CLAUSE_CHAIN (c))
3549 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3550 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3551 {
3552 tree d = OMP_CLAUSE_DECL (c);
3553 bool is_ref = omp_is_reference (d);
3554 tree t = d, a, dest;
3555 if (is_ref)
3556 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3557 if (itercnt == NULL_TREE)
3558 {
3559 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3560 {
3561 itercnt = fold_build2 (MINUS_EXPR, itype,
3562 fold_convert (itype, n1),
3563 fold_convert (itype, fd->loop.n1));
3564 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3565 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3566 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3567 NULL_TREE, false,
3568 GSI_CONTINUE_LINKING);
3569 }
3570 else
3571 itercnt = s0;
3572 }
3573 tree type = TREE_TYPE (t);
3574 if (POINTER_TYPE_P (type))
3575 type = sizetype;
3576 a = fold_build2 (MULT_EXPR, type,
3577 fold_convert (type, itercnt),
3578 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3579 dest = unshare_expr (t);
3580 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3581 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3582 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3583 false, GSI_CONTINUE_LINKING);
3584 assign_stmt = gimple_build_assign (dest, t);
3585 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3586 }
3587 if (fd->collapse > 1)
3588 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3589
3590 if (!broken_loop)
3591 {
3592 /* The code controlling the sequential loop replaces the
3593 GIMPLE_OMP_CONTINUE. */
3594 gsi = gsi_last_bb (cont_bb);
3595 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3596 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3597 vmain = gimple_omp_continue_control_use (cont_stmt);
3598 vback = gimple_omp_continue_control_def (cont_stmt);
3599
3600 if (!gimple_omp_for_combined_p (fd->for_stmt))
3601 {
3602 if (POINTER_TYPE_P (type))
3603 t = fold_build_pointer_plus (vmain, step);
3604 else
3605 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3606 t = force_gimple_operand_gsi (&gsi, t,
3607 DECL_P (vback)
3608 && TREE_ADDRESSABLE (vback),
3609 NULL_TREE, true, GSI_SAME_STMT);
3610 assign_stmt = gimple_build_assign (vback, t);
3611 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3612
3613 t = build2 (fd->loop.cond_code, boolean_type_node,
3614 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3615 ? t : vback, e);
3616 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3617 }
3618
3619 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3620 gsi_remove (&gsi, true);
3621
3622 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3623 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3624 }
3625
3626 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3627 gsi = gsi_last_bb (exit_bb);
3628 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3629 {
3630 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3631 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3632 }
3633 gsi_remove (&gsi, true);
3634
3635 /* Connect all the blocks. */
3636 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 3637 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
3638 ep = find_edge (entry_bb, second_bb);
3639 ep->flags = EDGE_TRUE_VALUE;
357067f2 3640 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
629b3d75
MJ
3641 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3642 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3643
3644 if (!broken_loop)
3645 {
3646 ep = find_edge (cont_bb, body_bb);
3647 if (ep == NULL)
3648 {
3649 ep = BRANCH_EDGE (cont_bb);
3650 gcc_assert (single_succ (ep->dest) == body_bb);
3651 }
3652 if (gimple_omp_for_combined_p (fd->for_stmt))
3653 {
3654 remove_edge (ep);
3655 ep = NULL;
3656 }
3657 else if (fd->collapse > 1)
3658 {
3659 remove_edge (ep);
3660 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3661 }
3662 else
3663 ep->flags = EDGE_TRUE_VALUE;
3664 find_edge (cont_bb, fin_bb)->flags
3665 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3666 }
3667
3668 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3669 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3670 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3671
3672 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3673 recompute_dominator (CDI_DOMINATORS, body_bb));
3674 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3675 recompute_dominator (CDI_DOMINATORS, fin_bb));
3676
3677 struct loop *loop = body_bb->loop_father;
3678 if (loop != entry_bb->loop_father)
3679 {
3680 gcc_assert (broken_loop || loop->header == body_bb);
3681 gcc_assert (broken_loop
3682 || loop->latch == region->cont
3683 || single_pred (loop->latch) == region->cont);
3684 return;
3685 }
3686
3687 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3688 {
3689 loop = alloc_loop ();
3690 loop->header = body_bb;
3691 if (collapse_bb == NULL)
3692 loop->latch = cont_bb;
3693 add_loop (loop, body_bb->loop_father);
3694 }
3695}
3696
3697/* Return phi in E->DEST with ARG on edge E. */
3698
3699static gphi *
3700find_phi_with_arg_on_edge (tree arg, edge e)
3701{
3702 basic_block bb = e->dest;
3703
3704 for (gphi_iterator gpi = gsi_start_phis (bb);
3705 !gsi_end_p (gpi);
3706 gsi_next (&gpi))
3707 {
3708 gphi *phi = gpi.phi ();
3709 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3710 return phi;
3711 }
3712
3713 return NULL;
3714}
3715
3716/* A subroutine of expand_omp_for. Generate code for a parallel
3717 loop with static schedule and a specified chunk size. Given
3718 parameters:
3719
3720 for (V = N1; V cond N2; V += STEP) BODY;
3721
3722 where COND is "<" or ">", we generate pseudocode
3723
3724 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3725 if (cond is <)
3726 adj = STEP - 1;
3727 else
3728 adj = STEP + 1;
3729 if ((__typeof (V)) -1 > 0 && cond is >)
3730 n = -(adj + N2 - N1) / -STEP;
3731 else
3732 n = (adj + N2 - N1) / STEP;
3733 trip = 0;
3734 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3735 here so that V is defined
3736 if the loop is not entered
3737 L0:
3738 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 3739 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
3740 if (s0 < n) goto L1; else goto L4;
3741 L1:
3742 V = s0 * STEP + N1;
3743 e = e0 * STEP + N1;
3744 L2:
3745 BODY;
3746 V += STEP;
3747 if (V cond e) goto L2; else goto L3;
3748 L3:
3749 trip += 1;
3750 goto L0;
3751 L4:
3752*/
3753
3754static void
3755expand_omp_for_static_chunk (struct omp_region *region,
3756 struct omp_for_data *fd, gimple *inner_stmt)
3757{
3758 tree n, s0, e0, e, t;
3759 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3760 tree type, itype, vmain, vback, vextra;
3761 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3762 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3763 gimple_stmt_iterator gsi;
3764 edge se;
3765 bool broken_loop = region->cont == NULL;
3766 tree *counts = NULL;
3767 tree n1, n2, step;
3768
3769 itype = type = TREE_TYPE (fd->loop.v);
3770 if (POINTER_TYPE_P (type))
3771 itype = signed_type_for (type);
3772
3773 entry_bb = region->entry;
3774 se = split_block (entry_bb, last_stmt (entry_bb));
3775 entry_bb = se->src;
3776 iter_part_bb = se->dest;
3777 cont_bb = region->cont;
3778 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3779 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3780 gcc_assert (broken_loop
3781 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3782 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3783 body_bb = single_succ (seq_start_bb);
3784 if (!broken_loop)
3785 {
3786 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3787 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3788 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3789 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3790 }
3791 exit_bb = region->exit;
3792
3793 /* Trip and adjustment setup goes in ENTRY_BB. */
3794 gsi = gsi_last_bb (entry_bb);
3795 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3796
3797 if (fd->collapse > 1)
3798 {
3799 int first_zero_iter = -1, dummy = -1;
3800 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3801
3802 counts = XALLOCAVEC (tree, fd->collapse);
3803 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3804 fin_bb, first_zero_iter,
3805 dummy_bb, dummy, l2_dom_bb);
3806 t = NULL_TREE;
3807 }
3808 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3809 t = integer_one_node;
3810 else
3811 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3812 fold_convert (type, fd->loop.n1),
3813 fold_convert (type, fd->loop.n2));
3814 if (fd->collapse == 1
3815 && TYPE_UNSIGNED (type)
3816 && (t == NULL_TREE || !integer_onep (t)))
3817 {
3818 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3819 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3820 true, GSI_SAME_STMT);
3821 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3822 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3823 true, GSI_SAME_STMT);
3824 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3825 NULL_TREE, NULL_TREE);
3826 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3827 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3828 expand_omp_regimplify_p, NULL, NULL)
3829 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3830 expand_omp_regimplify_p, NULL, NULL))
3831 {
3832 gsi = gsi_for_stmt (cond_stmt);
3833 gimple_regimplify_operands (cond_stmt, &gsi);
3834 }
3835 se = split_block (entry_bb, cond_stmt);
3836 se->flags = EDGE_TRUE_VALUE;
3837 entry_bb = se->dest;
357067f2 3838 se->probability = profile_probability::very_likely ();
629b3d75 3839 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 3840 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
3841 if (gimple_in_ssa_p (cfun))
3842 {
3843 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3844 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3845 !gsi_end_p (gpi); gsi_next (&gpi))
3846 {
3847 gphi *phi = gpi.phi ();
3848 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3849 se, UNKNOWN_LOCATION);
3850 }
3851 }
3852 gsi = gsi_last_bb (entry_bb);
3853 }
3854
3855 switch (gimple_omp_for_kind (fd->for_stmt))
3856 {
3857 case GF_OMP_FOR_KIND_FOR:
3858 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3859 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3860 break;
3861 case GF_OMP_FOR_KIND_DISTRIBUTE:
3862 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3863 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3864 break;
3865 default:
3866 gcc_unreachable ();
3867 }
3868 nthreads = build_call_expr (nthreads, 0);
3869 nthreads = fold_convert (itype, nthreads);
3870 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3871 true, GSI_SAME_STMT);
3872 threadid = build_call_expr (threadid, 0);
3873 threadid = fold_convert (itype, threadid);
3874 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3875 true, GSI_SAME_STMT);
3876
3877 n1 = fd->loop.n1;
3878 n2 = fd->loop.n2;
3879 step = fd->loop.step;
3880 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3881 {
3882 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3883 OMP_CLAUSE__LOOPTEMP_);
3884 gcc_assert (innerc);
3885 n1 = OMP_CLAUSE_DECL (innerc);
3886 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3887 OMP_CLAUSE__LOOPTEMP_);
3888 gcc_assert (innerc);
3889 n2 = OMP_CLAUSE_DECL (innerc);
3890 }
3891 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3892 true, NULL_TREE, true, GSI_SAME_STMT);
3893 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3894 true, NULL_TREE, true, GSI_SAME_STMT);
3895 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3896 true, NULL_TREE, true, GSI_SAME_STMT);
3897 tree chunk_size = fold_convert (itype, fd->chunk_size);
3898 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3899 chunk_size
3900 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3901 GSI_SAME_STMT);
3902
3903 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3904 t = fold_build2 (PLUS_EXPR, itype, step, t);
3905 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3906 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3907 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3908 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3909 fold_build1 (NEGATE_EXPR, itype, t),
3910 fold_build1 (NEGATE_EXPR, itype, step));
3911 else
3912 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3913 t = fold_convert (itype, t);
3914 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3915 true, GSI_SAME_STMT);
3916
3917 trip_var = create_tmp_reg (itype, ".trip");
3918 if (gimple_in_ssa_p (cfun))
3919 {
3920 trip_init = make_ssa_name (trip_var);
3921 trip_main = make_ssa_name (trip_var);
3922 trip_back = make_ssa_name (trip_var);
3923 }
3924 else
3925 {
3926 trip_init = trip_var;
3927 trip_main = trip_var;
3928 trip_back = trip_var;
3929 }
3930
3931 gassign *assign_stmt
3932 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3933 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3934
3935 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3936 t = fold_build2 (MULT_EXPR, itype, t, step);
3937 if (POINTER_TYPE_P (type))
3938 t = fold_build_pointer_plus (n1, t);
3939 else
3940 t = fold_build2 (PLUS_EXPR, type, t, n1);
3941 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3942 true, GSI_SAME_STMT);
3943
3944 /* Remove the GIMPLE_OMP_FOR. */
3945 gsi_remove (&gsi, true);
3946
3947 gimple_stmt_iterator gsif = gsi;
3948
3949 /* Iteration space partitioning goes in ITER_PART_BB. */
3950 gsi = gsi_last_bb (iter_part_bb);
3951
3952 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3953 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3954 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3955 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3956 false, GSI_CONTINUE_LINKING);
3957
3958 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3959 t = fold_build2 (MIN_EXPR, itype, t, n);
3960 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3961 false, GSI_CONTINUE_LINKING);
3962
3963 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3964 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3965
3966 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3967 gsi = gsi_start_bb (seq_start_bb);
3968
3969 tree startvar = fd->loop.v;
3970 tree endvar = NULL_TREE;
3971
3972 if (gimple_omp_for_combined_p (fd->for_stmt))
3973 {
3974 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3975 ? gimple_omp_parallel_clauses (inner_stmt)
3976 : gimple_omp_for_clauses (inner_stmt);
3977 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3978 gcc_assert (innerc);
3979 startvar = OMP_CLAUSE_DECL (innerc);
3980 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3981 OMP_CLAUSE__LOOPTEMP_);
3982 gcc_assert (innerc);
3983 endvar = OMP_CLAUSE_DECL (innerc);
3984 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3985 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3986 {
3987 int i;
3988 for (i = 1; i < fd->collapse; i++)
3989 {
3990 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3991 OMP_CLAUSE__LOOPTEMP_);
3992 gcc_assert (innerc);
3993 }
3994 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3995 OMP_CLAUSE__LOOPTEMP_);
3996 if (innerc)
3997 {
3998 /* If needed (distribute parallel for with lastprivate),
3999 propagate down the total number of iterations. */
4000 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4001 fd->loop.n2);
4002 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4003 GSI_CONTINUE_LINKING);
4004 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4005 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4006 }
4007 }
4008 }
4009
4010 t = fold_convert (itype, s0);
4011 t = fold_build2 (MULT_EXPR, itype, t, step);
4012 if (POINTER_TYPE_P (type))
4013 t = fold_build_pointer_plus (n1, t);
4014 else
4015 t = fold_build2 (PLUS_EXPR, type, t, n1);
4016 t = fold_convert (TREE_TYPE (startvar), t);
4017 t = force_gimple_operand_gsi (&gsi, t,
4018 DECL_P (startvar)
4019 && TREE_ADDRESSABLE (startvar),
4020 NULL_TREE, false, GSI_CONTINUE_LINKING);
4021 assign_stmt = gimple_build_assign (startvar, t);
4022 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4023
4024 t = fold_convert (itype, e0);
4025 t = fold_build2 (MULT_EXPR, itype, t, step);
4026 if (POINTER_TYPE_P (type))
4027 t = fold_build_pointer_plus (n1, t);
4028 else
4029 t = fold_build2 (PLUS_EXPR, type, t, n1);
4030 t = fold_convert (TREE_TYPE (startvar), t);
4031 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4032 false, GSI_CONTINUE_LINKING);
4033 if (endvar)
4034 {
4035 assign_stmt = gimple_build_assign (endvar, e);
4036 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4037 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4038 assign_stmt = gimple_build_assign (fd->loop.v, e);
4039 else
4040 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4041 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4042 }
4043 /* Handle linear clause adjustments. */
4044 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4045 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4046 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4047 c; c = OMP_CLAUSE_CHAIN (c))
4048 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4049 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4050 {
4051 tree d = OMP_CLAUSE_DECL (c);
4052 bool is_ref = omp_is_reference (d);
4053 tree t = d, a, dest;
4054 if (is_ref)
4055 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4056 tree type = TREE_TYPE (t);
4057 if (POINTER_TYPE_P (type))
4058 type = sizetype;
4059 dest = unshare_expr (t);
4060 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4061 expand_omp_build_assign (&gsif, v, t);
4062 if (itercnt == NULL_TREE)
4063 {
4064 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4065 {
4066 itercntbias
4067 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4068 fold_convert (itype, fd->loop.n1));
4069 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4070 itercntbias, step);
4071 itercntbias
4072 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4073 NULL_TREE, true,
4074 GSI_SAME_STMT);
4075 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4076 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4077 NULL_TREE, false,
4078 GSI_CONTINUE_LINKING);
4079 }
4080 else
4081 itercnt = s0;
4082 }
4083 a = fold_build2 (MULT_EXPR, type,
4084 fold_convert (type, itercnt),
4085 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4086 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4087 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4088 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4089 false, GSI_CONTINUE_LINKING);
4090 assign_stmt = gimple_build_assign (dest, t);
4091 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4092 }
4093 if (fd->collapse > 1)
4094 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4095
4096 if (!broken_loop)
4097 {
4098 /* The code controlling the sequential loop goes in CONT_BB,
4099 replacing the GIMPLE_OMP_CONTINUE. */
4100 gsi = gsi_last_bb (cont_bb);
4101 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4102 vmain = gimple_omp_continue_control_use (cont_stmt);
4103 vback = gimple_omp_continue_control_def (cont_stmt);
4104
4105 if (!gimple_omp_for_combined_p (fd->for_stmt))
4106 {
4107 if (POINTER_TYPE_P (type))
4108 t = fold_build_pointer_plus (vmain, step);
4109 else
4110 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4111 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4112 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4113 true, GSI_SAME_STMT);
4114 assign_stmt = gimple_build_assign (vback, t);
4115 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4116
4117 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4118 t = build2 (EQ_EXPR, boolean_type_node,
4119 build_int_cst (itype, 0),
4120 build_int_cst (itype, 1));
4121 else
4122 t = build2 (fd->loop.cond_code, boolean_type_node,
4123 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4124 ? t : vback, e);
4125 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4126 }
4127
4128 /* Remove GIMPLE_OMP_CONTINUE. */
4129 gsi_remove (&gsi, true);
4130
4131 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4132 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4133
4134 /* Trip update code goes into TRIP_UPDATE_BB. */
4135 gsi = gsi_start_bb (trip_update_bb);
4136
4137 t = build_int_cst (itype, 1);
4138 t = build2 (PLUS_EXPR, itype, trip_main, t);
4139 assign_stmt = gimple_build_assign (trip_back, t);
4140 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4141 }
4142
4143 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4144 gsi = gsi_last_bb (exit_bb);
4145 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4146 {
4147 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4148 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4149 }
4150 gsi_remove (&gsi, true);
4151
4152 /* Connect the new blocks. */
4153 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4154 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4155
4156 if (!broken_loop)
4157 {
4158 se = find_edge (cont_bb, body_bb);
4159 if (se == NULL)
4160 {
4161 se = BRANCH_EDGE (cont_bb);
4162 gcc_assert (single_succ (se->dest) == body_bb);
4163 }
4164 if (gimple_omp_for_combined_p (fd->for_stmt))
4165 {
4166 remove_edge (se);
4167 se = NULL;
4168 }
4169 else if (fd->collapse > 1)
4170 {
4171 remove_edge (se);
4172 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4173 }
4174 else
4175 se->flags = EDGE_TRUE_VALUE;
4176 find_edge (cont_bb, trip_update_bb)->flags
4177 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4178
01914336
MJ
4179 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4180 iter_part_bb);
629b3d75
MJ
4181 }
4182
4183 if (gimple_in_ssa_p (cfun))
4184 {
4185 gphi_iterator psi;
4186 gphi *phi;
4187 edge re, ene;
4188 edge_var_map *vm;
4189 size_t i;
4190
4191 gcc_assert (fd->collapse == 1 && !broken_loop);
4192
4193 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4194 remove arguments of the phi nodes in fin_bb. We need to create
4195 appropriate phi nodes in iter_part_bb instead. */
4196 se = find_edge (iter_part_bb, fin_bb);
4197 re = single_succ_edge (trip_update_bb);
4198 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4199 ene = single_succ_edge (entry_bb);
4200
4201 psi = gsi_start_phis (fin_bb);
4202 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4203 gsi_next (&psi), ++i)
4204 {
4205 gphi *nphi;
4206 source_location locus;
4207
4208 phi = psi.phi ();
d83cc5cc
TV
4209 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4210 redirect_edge_var_map_def (vm), 0))
4211 continue;
4212
629b3d75
MJ
4213 t = gimple_phi_result (phi);
4214 gcc_assert (t == redirect_edge_var_map_result (vm));
4215
4216 if (!single_pred_p (fin_bb))
4217 t = copy_ssa_name (t, phi);
4218
4219 nphi = create_phi_node (t, iter_part_bb);
4220
4221 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4222 locus = gimple_phi_arg_location_from_edge (phi, se);
4223
4224 /* A special case -- fd->loop.v is not yet computed in
4225 iter_part_bb, we need to use vextra instead. */
4226 if (t == fd->loop.v)
4227 t = vextra;
4228 add_phi_arg (nphi, t, ene, locus);
4229 locus = redirect_edge_var_map_location (vm);
4230 tree back_arg = redirect_edge_var_map_def (vm);
4231 add_phi_arg (nphi, back_arg, re, locus);
4232 edge ce = find_edge (cont_bb, body_bb);
4233 if (ce == NULL)
4234 {
4235 ce = BRANCH_EDGE (cont_bb);
4236 gcc_assert (single_succ (ce->dest) == body_bb);
4237 ce = single_succ_edge (ce->dest);
4238 }
4239 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4240 gcc_assert (inner_loop_phi != NULL);
4241 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4242 find_edge (seq_start_bb, body_bb), locus);
4243
4244 if (!single_pred_p (fin_bb))
4245 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4246 }
4247 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4248 redirect_edge_var_map_clear (re);
4249 if (single_pred_p (fin_bb))
4250 while (1)
4251 {
4252 psi = gsi_start_phis (fin_bb);
4253 if (gsi_end_p (psi))
4254 break;
4255 remove_phi_node (&psi, false);
4256 }
4257
4258 /* Make phi node for trip. */
4259 phi = create_phi_node (trip_main, iter_part_bb);
4260 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4261 UNKNOWN_LOCATION);
4262 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4263 UNKNOWN_LOCATION);
4264 }
4265
4266 if (!broken_loop)
4267 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4268 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4269 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4270 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4271 recompute_dominator (CDI_DOMINATORS, fin_bb));
4272 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4273 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4274 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4275 recompute_dominator (CDI_DOMINATORS, body_bb));
4276
4277 if (!broken_loop)
4278 {
4279 struct loop *loop = body_bb->loop_father;
4280 struct loop *trip_loop = alloc_loop ();
4281 trip_loop->header = iter_part_bb;
4282 trip_loop->latch = trip_update_bb;
4283 add_loop (trip_loop, iter_part_bb->loop_father);
4284
4285 if (loop != entry_bb->loop_father)
4286 {
4287 gcc_assert (loop->header == body_bb);
4288 gcc_assert (loop->latch == region->cont
4289 || single_pred (loop->latch) == region->cont);
4290 trip_loop->inner = loop;
4291 return;
4292 }
4293
4294 if (!gimple_omp_for_combined_p (fd->for_stmt))
4295 {
4296 loop = alloc_loop ();
4297 loop->header = body_bb;
4298 if (collapse_bb == NULL)
4299 loop->latch = cont_bb;
4300 add_loop (loop, trip_loop);
4301 }
4302 }
4303}
4304
4305/* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4306 Given parameters:
4307 for (V = N1; V cond N2; V += STEP) BODY;
4308
4309 where COND is "<" or ">" or "!=", we generate pseudocode
4310
4311 for (ind_var = low; ind_var < high; ind_var++)
4312 {
4313 V = n1 + (ind_var * STEP)
4314
4315 <BODY>
4316 }
4317
4318 In the above pseudocode, low and high are function parameters of the
4319 child function. In the function below, we are inserting a temp.
4320 variable that will be making a call to two OMP functions that will not be
4321 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4322 with _Cilk_for). These functions are replaced with low and high
4323 by the function that handles taskreg. */
4324
4325
4326static void
4327expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4328{
4329 bool broken_loop = region->cont == NULL;
4330 basic_block entry_bb = region->entry;
4331 basic_block cont_bb = region->cont;
4332
4333 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4334 gcc_assert (broken_loop
4335 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4336 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4337 basic_block l1_bb, l2_bb;
4338
4339 if (!broken_loop)
4340 {
4341 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4342 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4343 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4344 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4345 }
4346 else
4347 {
4348 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4349 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4350 l2_bb = single_succ (l1_bb);
4351 }
4352 basic_block exit_bb = region->exit;
4353 basic_block l2_dom_bb = NULL;
4354
4355 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4356
4357 /* Below statements until the "tree high_val = ..." are pseudo statements
4358 used to pass information to be used by expand_omp_taskreg.
4359 low_val and high_val will be replaced by the __low and __high
4360 parameter from the child function.
4361
4362 The call_exprs part is a place-holder, it is mainly used
4363 to distinctly identify to the top-level part that this is
4364 where we should put low and high (reasoning given in header
4365 comment). */
4366
01914336
MJ
4367 gomp_parallel *par_stmt
4368 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4369 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
629b3d75
MJ
4370 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4371 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4372 {
a01f151f 4373 if (id_equal (DECL_NAME (t), "__high"))
629b3d75 4374 high_val = t;
a01f151f 4375 else if (id_equal (DECL_NAME (t), "__low"))
629b3d75
MJ
4376 low_val = t;
4377 }
4378 gcc_assert (low_val && high_val);
4379
4380 tree type = TREE_TYPE (low_val);
4381 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4382 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4383
4384 /* Not needed in SSA form right now. */
4385 gcc_assert (!gimple_in_ssa_p (cfun));
4386 if (l2_dom_bb == NULL)
4387 l2_dom_bb = l1_bb;
4388
4389 tree n1 = low_val;
4390 tree n2 = high_val;
4391
4392 gimple *stmt = gimple_build_assign (ind_var, n1);
4393
4394 /* Replace the GIMPLE_OMP_FOR statement. */
4395 gsi_replace (&gsi, stmt, true);
4396
4397 if (!broken_loop)
4398 {
4399 /* Code to control the increment goes in the CONT_BB. */
4400 gsi = gsi_last_bb (cont_bb);
4401 stmt = gsi_stmt (gsi);
4402 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4403 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4404 build_one_cst (type));
4405
4406 /* Replace GIMPLE_OMP_CONTINUE. */
4407 gsi_replace (&gsi, stmt, true);
4408 }
4409
4410 /* Emit the condition in L1_BB. */
4411 gsi = gsi_after_labels (l1_bb);
4412 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4413 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4414 fd->loop.step);
4415 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4416 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4417 fd->loop.n1, fold_convert (sizetype, t));
4418 else
4419 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4420 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4421 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4422 expand_omp_build_assign (&gsi, fd->loop.v, t);
4423
4424 /* The condition is always '<' since the runtime will fill in the low
4425 and high values. */
4426 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4427 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4428
4429 /* Remove GIMPLE_OMP_RETURN. */
4430 gsi = gsi_last_bb (exit_bb);
4431 gsi_remove (&gsi, true);
4432
4433 /* Connect the new blocks. */
4434 remove_edge (FALLTHRU_EDGE (entry_bb));
4435
4436 edge e, ne;
4437 if (!broken_loop)
4438 {
4439 remove_edge (BRANCH_EDGE (entry_bb));
4440 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4441
4442 e = BRANCH_EDGE (l1_bb);
4443 ne = FALLTHRU_EDGE (l1_bb);
4444 e->flags = EDGE_TRUE_VALUE;
4445 }
4446 else
4447 {
4448 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4449
4450 ne = single_succ_edge (l1_bb);
4451 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4452
4453 }
4454 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
4455 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4456 ne->probability = e->probability.invert ();
629b3d75
MJ
4457
4458 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4459 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4460 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4461
4462 if (!broken_loop)
4463 {
4464 struct loop *loop = alloc_loop ();
4465 loop->header = l1_bb;
4466 loop->latch = cont_bb;
4467 add_loop (loop, l1_bb->loop_father);
4468 loop->safelen = INT_MAX;
4469 }
4470
4471 /* Pick the correct library function based on the precision of the
4472 induction variable type. */
4473 tree lib_fun = NULL_TREE;
4474 if (TYPE_PRECISION (type) == 32)
4475 lib_fun = cilk_for_32_fndecl;
4476 else if (TYPE_PRECISION (type) == 64)
4477 lib_fun = cilk_for_64_fndecl;
4478 else
4479 gcc_unreachable ();
4480
4481 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4482
4483 /* WS_ARGS contains the library function flavor to call:
4484 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4485 user-defined grain value. If the user does not define one, then zero
4486 is passed in by the parser. */
4487 vec_alloc (region->ws_args, 2);
4488 region->ws_args->quick_push (lib_fun);
4489 region->ws_args->quick_push (fd->chunk_size);
4490}
4491
4492/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4493 loop. Given parameters:
4494
4495 for (V = N1; V cond N2; V += STEP) BODY;
4496
4497 where COND is "<" or ">", we generate pseudocode
4498
4499 V = N1;
4500 goto L1;
4501 L0:
4502 BODY;
4503 V += STEP;
4504 L1:
4505 if (V cond N2) goto L0; else goto L2;
4506 L2:
4507
4508 For collapsed loops, given parameters:
4509 collapse(3)
4510 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4511 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4512 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4513 BODY;
4514
4515 we generate pseudocode
4516
4517 if (cond3 is <)
4518 adj = STEP3 - 1;
4519 else
4520 adj = STEP3 + 1;
4521 count3 = (adj + N32 - N31) / STEP3;
4522 if (cond2 is <)
4523 adj = STEP2 - 1;
4524 else
4525 adj = STEP2 + 1;
4526 count2 = (adj + N22 - N21) / STEP2;
4527 if (cond1 is <)
4528 adj = STEP1 - 1;
4529 else
4530 adj = STEP1 + 1;
4531 count1 = (adj + N12 - N11) / STEP1;
4532 count = count1 * count2 * count3;
4533 V = 0;
4534 V1 = N11;
4535 V2 = N21;
4536 V3 = N31;
4537 goto L1;
4538 L0:
4539 BODY;
4540 V += 1;
4541 V3 += STEP3;
4542 V2 += (V3 cond3 N32) ? 0 : STEP2;
4543 V3 = (V3 cond3 N32) ? V3 : N31;
4544 V1 += (V2 cond2 N22) ? 0 : STEP1;
4545 V2 = (V2 cond2 N22) ? V2 : N21;
4546 L1:
4547 if (V < count) goto L0; else goto L2;
4548 L2:
4549
4550 */
4551
4552static void
4553expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4554{
4555 tree type, t;
4556 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4557 gimple_stmt_iterator gsi;
4558 gimple *stmt;
4559 gcond *cond_stmt;
4560 bool broken_loop = region->cont == NULL;
4561 edge e, ne;
4562 tree *counts = NULL;
4563 int i;
4564 int safelen_int = INT_MAX;
4565 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4566 OMP_CLAUSE_SAFELEN);
4567 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4568 OMP_CLAUSE__SIMDUID_);
4569 tree n1, n2;
4570
4571 if (safelen)
4572 {
4573 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4574 if (TREE_CODE (safelen) != INTEGER_CST)
4575 safelen_int = 0;
4576 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4577 safelen_int = tree_to_uhwi (safelen);
4578 if (safelen_int == 1)
4579 safelen_int = 0;
4580 }
4581 type = TREE_TYPE (fd->loop.v);
4582 entry_bb = region->entry;
4583 cont_bb = region->cont;
4584 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4585 gcc_assert (broken_loop
4586 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4587 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4588 if (!broken_loop)
4589 {
4590 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4591 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4592 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4593 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4594 }
4595 else
4596 {
4597 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4598 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4599 l2_bb = single_succ (l1_bb);
4600 }
4601 exit_bb = region->exit;
4602 l2_dom_bb = NULL;
4603
4604 gsi = gsi_last_bb (entry_bb);
4605
4606 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4607 /* Not needed in SSA form right now. */
4608 gcc_assert (!gimple_in_ssa_p (cfun));
4609 if (fd->collapse > 1)
4610 {
4611 int first_zero_iter = -1, dummy = -1;
4612 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4613
4614 counts = XALLOCAVEC (tree, fd->collapse);
4615 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4616 zero_iter_bb, first_zero_iter,
4617 dummy_bb, dummy, l2_dom_bb);
4618 }
4619 if (l2_dom_bb == NULL)
4620 l2_dom_bb = l1_bb;
4621
4622 n1 = fd->loop.n1;
4623 n2 = fd->loop.n2;
4624 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4625 {
4626 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4627 OMP_CLAUSE__LOOPTEMP_);
4628 gcc_assert (innerc);
4629 n1 = OMP_CLAUSE_DECL (innerc);
4630 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4631 OMP_CLAUSE__LOOPTEMP_);
4632 gcc_assert (innerc);
4633 n2 = OMP_CLAUSE_DECL (innerc);
4634 }
4635 tree step = fd->loop.step;
4636
4cea8675
AM
4637 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4638 OMP_CLAUSE__SIMT_);
629b3d75
MJ
4639 if (is_simt)
4640 {
4641 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
4642 is_simt = safelen_int > 1;
4643 }
4644 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4645 if (is_simt)
4646 {
629b3d75
MJ
4647 simt_lane = create_tmp_var (unsigned_type_node);
4648 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4649 gimple_call_set_lhs (g, simt_lane);
4650 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4651 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4652 fold_convert (TREE_TYPE (step), simt_lane));
4653 n1 = fold_convert (type, n1);
4654 if (POINTER_TYPE_P (type))
4655 n1 = fold_build_pointer_plus (n1, offset);
4656 else
4657 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4658
4659 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4660 if (fd->collapse > 1)
4661 simt_maxlane = build_one_cst (unsigned_type_node);
4662 else if (safelen_int < omp_max_simt_vf ())
4663 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4664 tree vf
4665 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4666 unsigned_type_node, 0);
4667 if (simt_maxlane)
4668 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4669 vf = fold_convert (TREE_TYPE (step), vf);
4670 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4671 }
4672
4673 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4674 if (fd->collapse > 1)
4675 {
4676 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4677 {
4678 gsi_prev (&gsi);
4679 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4680 gsi_next (&gsi);
4681 }
4682 else
4683 for (i = 0; i < fd->collapse; i++)
4684 {
4685 tree itype = TREE_TYPE (fd->loops[i].v);
4686 if (POINTER_TYPE_P (itype))
4687 itype = signed_type_for (itype);
4688 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4689 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4690 }
4691 }
4692
4693 /* Remove the GIMPLE_OMP_FOR statement. */
4694 gsi_remove (&gsi, true);
4695
4696 if (!broken_loop)
4697 {
4698 /* Code to control the increment goes in the CONT_BB. */
4699 gsi = gsi_last_bb (cont_bb);
4700 stmt = gsi_stmt (gsi);
4701 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4702
4703 if (POINTER_TYPE_P (type))
4704 t = fold_build_pointer_plus (fd->loop.v, step);
4705 else
4706 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4707 expand_omp_build_assign (&gsi, fd->loop.v, t);
4708
4709 if (fd->collapse > 1)
4710 {
4711 i = fd->collapse - 1;
4712 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4713 {
4714 t = fold_convert (sizetype, fd->loops[i].step);
4715 t = fold_build_pointer_plus (fd->loops[i].v, t);
4716 }
4717 else
4718 {
4719 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4720 fd->loops[i].step);
4721 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4722 fd->loops[i].v, t);
4723 }
4724 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4725
4726 for (i = fd->collapse - 1; i > 0; i--)
4727 {
4728 tree itype = TREE_TYPE (fd->loops[i].v);
4729 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4730 if (POINTER_TYPE_P (itype2))
4731 itype2 = signed_type_for (itype2);
4732 t = build3 (COND_EXPR, itype2,
4733 build2 (fd->loops[i].cond_code, boolean_type_node,
4734 fd->loops[i].v,
4735 fold_convert (itype, fd->loops[i].n2)),
4736 build_int_cst (itype2, 0),
4737 fold_convert (itype2, fd->loops[i - 1].step));
4738 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4739 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4740 else
4741 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4742 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4743
4744 t = build3 (COND_EXPR, itype,
4745 build2 (fd->loops[i].cond_code, boolean_type_node,
4746 fd->loops[i].v,
4747 fold_convert (itype, fd->loops[i].n2)),
4748 fd->loops[i].v,
4749 fold_convert (itype, fd->loops[i].n1));
4750 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4751 }
4752 }
4753
4754 /* Remove GIMPLE_OMP_CONTINUE. */
4755 gsi_remove (&gsi, true);
4756 }
4757
4758 /* Emit the condition in L1_BB. */
4759 gsi = gsi_start_bb (l1_bb);
4760
4761 t = fold_convert (type, n2);
4762 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4763 false, GSI_CONTINUE_LINKING);
4764 tree v = fd->loop.v;
4765 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4766 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4767 false, GSI_CONTINUE_LINKING);
4768 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4769 cond_stmt = gimple_build_cond_empty (t);
4770 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4771 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4772 NULL, NULL)
4773 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4774 NULL, NULL))
4775 {
4776 gsi = gsi_for_stmt (cond_stmt);
4777 gimple_regimplify_operands (cond_stmt, &gsi);
4778 }
4779
4780 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4781 if (is_simt)
4782 {
4783 gsi = gsi_start_bb (l2_bb);
4784 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4785 if (POINTER_TYPE_P (type))
4786 t = fold_build_pointer_plus (fd->loop.v, step);
4787 else
4788 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4789 expand_omp_build_assign (&gsi, fd->loop.v, t);
4790 }
4791
4792 /* Remove GIMPLE_OMP_RETURN. */
4793 gsi = gsi_last_bb (exit_bb);
4794 gsi_remove (&gsi, true);
4795
4796 /* Connect the new blocks. */
4797 remove_edge (FALLTHRU_EDGE (entry_bb));
4798
4799 if (!broken_loop)
4800 {
4801 remove_edge (BRANCH_EDGE (entry_bb));
4802 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4803
4804 e = BRANCH_EDGE (l1_bb);
4805 ne = FALLTHRU_EDGE (l1_bb);
4806 e->flags = EDGE_TRUE_VALUE;
4807 }
4808 else
4809 {
4810 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4811
4812 ne = single_succ_edge (l1_bb);
4813 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4814
4815 }
4816 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
4817 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4818 ne->probability = e->probability.invert ();
629b3d75
MJ
4819
4820 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4821 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4822
4823 if (simt_maxlane)
4824 {
4825 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4826 NULL_TREE, NULL_TREE);
4827 gsi = gsi_last_bb (entry_bb);
4828 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4829 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4830 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
4831 FALLTHRU_EDGE (entry_bb)->probability
4832 = profile_probability::guessed_always ().apply_scale (7, 8);
4833 BRANCH_EDGE (entry_bb)->probability
4834 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
4835 l2_dom_bb = entry_bb;
4836 }
4837 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4838
4839 if (!broken_loop)
4840 {
4841 struct loop *loop = alloc_loop ();
4842 loop->header = l1_bb;
4843 loop->latch = cont_bb;
4844 add_loop (loop, l1_bb->loop_father);
4845 loop->safelen = safelen_int;
4846 if (simduid)
4847 {
4848 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4849 cfun->has_simduid_loops = true;
4850 }
4851 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4852 the loop. */
4853 if ((flag_tree_loop_vectorize
26d476cd 4854 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
4855 && flag_tree_loop_optimize
4856 && loop->safelen > 1)
4857 {
4858 loop->force_vectorize = true;
4859 cfun->has_force_vectorize_loops = true;
4860 }
4861 }
4862 else if (simduid)
4863 cfun->has_simduid_loops = true;
4864}
4865
4866/* Taskloop construct is represented after gimplification with
4867 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4868 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4869 which should just compute all the needed loop temporaries
4870 for GIMPLE_OMP_TASK. */
4871
4872static void
4873expand_omp_taskloop_for_outer (struct omp_region *region,
4874 struct omp_for_data *fd,
4875 gimple *inner_stmt)
4876{
4877 tree type, bias = NULL_TREE;
4878 basic_block entry_bb, cont_bb, exit_bb;
4879 gimple_stmt_iterator gsi;
4880 gassign *assign_stmt;
4881 tree *counts = NULL;
4882 int i;
4883
4884 gcc_assert (inner_stmt);
4885 gcc_assert (region->cont);
4886 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4887 && gimple_omp_task_taskloop_p (inner_stmt));
4888 type = TREE_TYPE (fd->loop.v);
4889
4890 /* See if we need to bias by LLONG_MIN. */
4891 if (fd->iter_type == long_long_unsigned_type_node
4892 && TREE_CODE (type) == INTEGER_TYPE
4893 && !TYPE_UNSIGNED (type))
4894 {
4895 tree n1, n2;
4896
4897 if (fd->loop.cond_code == LT_EXPR)
4898 {
4899 n1 = fd->loop.n1;
4900 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4901 }
4902 else
4903 {
4904 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4905 n2 = fd->loop.n1;
4906 }
4907 if (TREE_CODE (n1) != INTEGER_CST
4908 || TREE_CODE (n2) != INTEGER_CST
4909 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4910 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4911 }
4912
4913 entry_bb = region->entry;
4914 cont_bb = region->cont;
4915 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4916 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4917 exit_bb = region->exit;
4918
4919 gsi = gsi_last_bb (entry_bb);
4920 gimple *for_stmt = gsi_stmt (gsi);
4921 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4922 if (fd->collapse > 1)
4923 {
4924 int first_zero_iter = -1, dummy = -1;
4925 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4926
4927 counts = XALLOCAVEC (tree, fd->collapse);
4928 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4929 zero_iter_bb, first_zero_iter,
4930 dummy_bb, dummy, l2_dom_bb);
4931
4932 if (zero_iter_bb)
4933 {
4934 /* Some counts[i] vars might be uninitialized if
4935 some loop has zero iterations. But the body shouldn't
4936 be executed in that case, so just avoid uninit warnings. */
4937 for (i = first_zero_iter; i < fd->collapse; i++)
4938 if (SSA_VAR_P (counts[i]))
4939 TREE_NO_WARNING (counts[i]) = 1;
4940 gsi_prev (&gsi);
4941 edge e = split_block (entry_bb, gsi_stmt (gsi));
4942 entry_bb = e->dest;
4943 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4944 gsi = gsi_last_bb (entry_bb);
4945 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4946 get_immediate_dominator (CDI_DOMINATORS,
4947 zero_iter_bb));
4948 }
4949 }
4950
4951 tree t0, t1;
4952 t1 = fd->loop.n2;
4953 t0 = fd->loop.n1;
4954 if (POINTER_TYPE_P (TREE_TYPE (t0))
4955 && TYPE_PRECISION (TREE_TYPE (t0))
4956 != TYPE_PRECISION (fd->iter_type))
4957 {
4958 /* Avoid casting pointers to integer of a different size. */
4959 tree itype = signed_type_for (type);
4960 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4961 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4962 }
4963 else
4964 {
4965 t1 = fold_convert (fd->iter_type, t1);
4966 t0 = fold_convert (fd->iter_type, t0);
4967 }
4968 if (bias)
4969 {
4970 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4971 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4972 }
4973
4974 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4975 OMP_CLAUSE__LOOPTEMP_);
4976 gcc_assert (innerc);
4977 tree startvar = OMP_CLAUSE_DECL (innerc);
4978 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4979 gcc_assert (innerc);
4980 tree endvar = OMP_CLAUSE_DECL (innerc);
4981 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4982 {
4983 gcc_assert (innerc);
4984 for (i = 1; i < fd->collapse; i++)
4985 {
4986 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4987 OMP_CLAUSE__LOOPTEMP_);
4988 gcc_assert (innerc);
4989 }
4990 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4991 OMP_CLAUSE__LOOPTEMP_);
4992 if (innerc)
4993 {
4994 /* If needed (inner taskloop has lastprivate clause), propagate
4995 down the total number of iterations. */
4996 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4997 NULL_TREE, false,
4998 GSI_CONTINUE_LINKING);
4999 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5000 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5001 }
5002 }
5003
5004 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5005 GSI_CONTINUE_LINKING);
5006 assign_stmt = gimple_build_assign (startvar, t0);
5007 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5008
5009 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5010 GSI_CONTINUE_LINKING);
5011 assign_stmt = gimple_build_assign (endvar, t1);
5012 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5013 if (fd->collapse > 1)
5014 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5015
5016 /* Remove the GIMPLE_OMP_FOR statement. */
5017 gsi = gsi_for_stmt (for_stmt);
5018 gsi_remove (&gsi, true);
5019
5020 gsi = gsi_last_bb (cont_bb);
5021 gsi_remove (&gsi, true);
5022
5023 gsi = gsi_last_bb (exit_bb);
5024 gsi_remove (&gsi, true);
5025
357067f2 5026 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 5027 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 5028 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
5029 remove_edge (BRANCH_EDGE (cont_bb));
5030 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5031 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5032 recompute_dominator (CDI_DOMINATORS, region->entry));
5033}
5034
5035/* Taskloop construct is represented after gimplification with
5036 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5037 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5038 GOMP_taskloop{,_ull} function arranges for each task to be given just
5039 a single range of iterations. */
5040
5041static void
5042expand_omp_taskloop_for_inner (struct omp_region *region,
5043 struct omp_for_data *fd,
5044 gimple *inner_stmt)
5045{
5046 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5047 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5048 basic_block fin_bb;
5049 gimple_stmt_iterator gsi;
5050 edge ep;
5051 bool broken_loop = region->cont == NULL;
5052 tree *counts = NULL;
5053 tree n1, n2, step;
5054
5055 itype = type = TREE_TYPE (fd->loop.v);
5056 if (POINTER_TYPE_P (type))
5057 itype = signed_type_for (type);
5058
5059 /* See if we need to bias by LLONG_MIN. */
5060 if (fd->iter_type == long_long_unsigned_type_node
5061 && TREE_CODE (type) == INTEGER_TYPE
5062 && !TYPE_UNSIGNED (type))
5063 {
5064 tree n1, n2;
5065
5066 if (fd->loop.cond_code == LT_EXPR)
5067 {
5068 n1 = fd->loop.n1;
5069 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5070 }
5071 else
5072 {
5073 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5074 n2 = fd->loop.n1;
5075 }
5076 if (TREE_CODE (n1) != INTEGER_CST
5077 || TREE_CODE (n2) != INTEGER_CST
5078 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5079 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5080 }
5081
5082 entry_bb = region->entry;
5083 cont_bb = region->cont;
5084 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5085 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5086 gcc_assert (broken_loop
5087 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5088 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5089 if (!broken_loop)
5090 {
5091 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5092 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5093 }
5094 exit_bb = region->exit;
5095
5096 /* Iteration space partitioning goes in ENTRY_BB. */
5097 gsi = gsi_last_bb (entry_bb);
5098 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5099
5100 if (fd->collapse > 1)
5101 {
5102 int first_zero_iter = -1, dummy = -1;
5103 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5104
5105 counts = XALLOCAVEC (tree, fd->collapse);
5106 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5107 fin_bb, first_zero_iter,
5108 dummy_bb, dummy, l2_dom_bb);
5109 t = NULL_TREE;
5110 }
5111 else
5112 t = integer_one_node;
5113
5114 step = fd->loop.step;
5115 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5116 OMP_CLAUSE__LOOPTEMP_);
5117 gcc_assert (innerc);
5118 n1 = OMP_CLAUSE_DECL (innerc);
5119 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5120 gcc_assert (innerc);
5121 n2 = OMP_CLAUSE_DECL (innerc);
5122 if (bias)
5123 {
5124 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5125 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5126 }
5127 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5128 true, NULL_TREE, true, GSI_SAME_STMT);
5129 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5130 true, NULL_TREE, true, GSI_SAME_STMT);
5131 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5132 true, NULL_TREE, true, GSI_SAME_STMT);
5133
5134 tree startvar = fd->loop.v;
5135 tree endvar = NULL_TREE;
5136
5137 if (gimple_omp_for_combined_p (fd->for_stmt))
5138 {
5139 tree clauses = gimple_omp_for_clauses (inner_stmt);
5140 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5141 gcc_assert (innerc);
5142 startvar = OMP_CLAUSE_DECL (innerc);
5143 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5144 OMP_CLAUSE__LOOPTEMP_);
5145 gcc_assert (innerc);
5146 endvar = OMP_CLAUSE_DECL (innerc);
5147 }
5148 t = fold_convert (TREE_TYPE (startvar), n1);
5149 t = force_gimple_operand_gsi (&gsi, t,
5150 DECL_P (startvar)
5151 && TREE_ADDRESSABLE (startvar),
5152 NULL_TREE, false, GSI_CONTINUE_LINKING);
5153 gimple *assign_stmt = gimple_build_assign (startvar, t);
5154 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5155
5156 t = fold_convert (TREE_TYPE (startvar), n2);
5157 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5158 false, GSI_CONTINUE_LINKING);
5159 if (endvar)
5160 {
5161 assign_stmt = gimple_build_assign (endvar, e);
5162 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5163 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5164 assign_stmt = gimple_build_assign (fd->loop.v, e);
5165 else
5166 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5167 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5168 }
5169 if (fd->collapse > 1)
5170 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5171
5172 if (!broken_loop)
5173 {
5174 /* The code controlling the sequential loop replaces the
5175 GIMPLE_OMP_CONTINUE. */
5176 gsi = gsi_last_bb (cont_bb);
5177 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5178 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5179 vmain = gimple_omp_continue_control_use (cont_stmt);
5180 vback = gimple_omp_continue_control_def (cont_stmt);
5181
5182 if (!gimple_omp_for_combined_p (fd->for_stmt))
5183 {
5184 if (POINTER_TYPE_P (type))
5185 t = fold_build_pointer_plus (vmain, step);
5186 else
5187 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5188 t = force_gimple_operand_gsi (&gsi, t,
5189 DECL_P (vback)
5190 && TREE_ADDRESSABLE (vback),
5191 NULL_TREE, true, GSI_SAME_STMT);
5192 assign_stmt = gimple_build_assign (vback, t);
5193 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5194
5195 t = build2 (fd->loop.cond_code, boolean_type_node,
5196 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5197 ? t : vback, e);
5198 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5199 }
5200
5201 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5202 gsi_remove (&gsi, true);
5203
5204 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5205 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5206 }
5207
5208 /* Remove the GIMPLE_OMP_FOR statement. */
5209 gsi = gsi_for_stmt (fd->for_stmt);
5210 gsi_remove (&gsi, true);
5211
5212 /* Remove the GIMPLE_OMP_RETURN statement. */
5213 gsi = gsi_last_bb (exit_bb);
5214 gsi_remove (&gsi, true);
5215
357067f2 5216 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
5217 if (!broken_loop)
5218 remove_edge (BRANCH_EDGE (entry_bb));
5219 else
5220 {
5221 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5222 region->outer->cont = NULL;
5223 }
5224
5225 /* Connect all the blocks. */
5226 if (!broken_loop)
5227 {
5228 ep = find_edge (cont_bb, body_bb);
5229 if (gimple_omp_for_combined_p (fd->for_stmt))
5230 {
5231 remove_edge (ep);
5232 ep = NULL;
5233 }
5234 else if (fd->collapse > 1)
5235 {
5236 remove_edge (ep);
5237 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5238 }
5239 else
5240 ep->flags = EDGE_TRUE_VALUE;
5241 find_edge (cont_bb, fin_bb)->flags
5242 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5243 }
5244
5245 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5246 recompute_dominator (CDI_DOMINATORS, body_bb));
5247 if (!broken_loop)
5248 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5249 recompute_dominator (CDI_DOMINATORS, fin_bb));
5250
5251 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5252 {
5253 struct loop *loop = alloc_loop ();
5254 loop->header = body_bb;
5255 if (collapse_bb == NULL)
5256 loop->latch = cont_bb;
5257 add_loop (loop, body_bb->loop_father);
5258 }
5259}
5260
5261/* A subroutine of expand_omp_for. Generate code for an OpenACC
5262 partitioned loop. The lowering here is abstracted, in that the
5263 loop parameters are passed through internal functions, which are
5264 further lowered by oacc_device_lower, once we get to the target
5265 compiler. The loop is of the form:
5266
5267 for (V = B; V LTGT E; V += S) {BODY}
5268
5269 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5270 (constant 0 for no chunking) and we will have a GWV partitioning
5271 mask, specifying dimensions over which the loop is to be
02889d23
CLT
5272 partitioned (see note below). We generate code that looks like
5273 (this ignores tiling):
629b3d75
MJ
5274
5275 <entry_bb> [incoming FALL->body, BRANCH->exit]
5276 typedef signedintify (typeof (V)) T; // underlying signed integral type
5277 T range = E - B;
5278 T chunk_no = 0;
5279 T DIR = LTGT == '<' ? +1 : -1;
5280 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5281 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5282
5283 <head_bb> [created by splitting end of entry_bb]
5284 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5285 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5286 if (!(offset LTGT bound)) goto bottom_bb;
5287
5288 <body_bb> [incoming]
5289 V = B + offset;
5290 {BODY}
5291
5292 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5293 offset += step;
5294 if (offset LTGT bound) goto body_bb; [*]
5295
5296 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5297 chunk_no++;
5298 if (chunk < chunk_max) goto head_bb;
5299
5300 <exit_bb> [incoming]
5301 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5302
02889d23 5303 [*] Needed if V live at end of loop. */
629b3d75
MJ
5304
5305static void
5306expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5307{
5308 tree v = fd->loop.v;
5309 enum tree_code cond_code = fd->loop.cond_code;
5310 enum tree_code plus_code = PLUS_EXPR;
5311
5312 tree chunk_size = integer_minus_one_node;
5313 tree gwv = integer_zero_node;
5314 tree iter_type = TREE_TYPE (v);
5315 tree diff_type = iter_type;
5316 tree plus_type = iter_type;
5317 struct oacc_collapse *counts = NULL;
5318
5319 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5320 == GF_OMP_FOR_KIND_OACC_LOOP);
5321 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5322 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5323
5324 if (POINTER_TYPE_P (iter_type))
5325 {
5326 plus_code = POINTER_PLUS_EXPR;
5327 plus_type = sizetype;
5328 }
5329 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5330 diff_type = signed_type_for (diff_type);
5331
5332 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5333 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5334 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5335 basic_block bottom_bb = NULL;
5336
5337 /* entry_bb has two sucessors; the branch edge is to the exit
5338 block, fallthrough edge to body. */
5339 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5340 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5341
5342 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5343 body_bb, or to a block whose only successor is the body_bb. Its
5344 fallthrough successor is the final block (same as the branch
5345 successor of the entry_bb). */
5346 if (cont_bb)
5347 {
5348 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5349 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5350
5351 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5352 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5353 }
5354 else
5355 gcc_assert (!gimple_in_ssa_p (cfun));
5356
5357 /* The exit block only has entry_bb and cont_bb as predecessors. */
5358 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5359
5360 tree chunk_no;
5361 tree chunk_max = NULL_TREE;
5362 tree bound, offset;
5363 tree step = create_tmp_var (diff_type, ".step");
5364 bool up = cond_code == LT_EXPR;
5365 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 5366 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
5367 bool negating;
5368
02889d23
CLT
5369 /* Tiling vars. */
5370 tree tile_size = NULL_TREE;
5371 tree element_s = NULL_TREE;
5372 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5373 basic_block elem_body_bb = NULL;
5374 basic_block elem_cont_bb = NULL;
5375
629b3d75
MJ
5376 /* SSA instances. */
5377 tree offset_incr = NULL_TREE;
5378 tree offset_init = NULL_TREE;
5379
5380 gimple_stmt_iterator gsi;
5381 gassign *ass;
5382 gcall *call;
5383 gimple *stmt;
5384 tree expr;
5385 location_t loc;
5386 edge split, be, fte;
5387
5388 /* Split the end of entry_bb to create head_bb. */
5389 split = split_block (entry_bb, last_stmt (entry_bb));
5390 basic_block head_bb = split->dest;
5391 entry_bb = split->src;
5392
5393 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5394 gsi = gsi_last_bb (entry_bb);
5395 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5396 loc = gimple_location (for_stmt);
5397
5398 if (gimple_in_ssa_p (cfun))
5399 {
5400 offset_init = gimple_omp_for_index (for_stmt, 0);
5401 gcc_assert (integer_zerop (fd->loop.n1));
5402 /* The SSA parallelizer does gang parallelism. */
5403 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5404 }
5405
02889d23 5406 if (fd->collapse > 1 || fd->tiling)
629b3d75 5407 {
02889d23 5408 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75
MJ
5409 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5410 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
02889d23 5411 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
5412
5413 if (SSA_VAR_P (fd->loop.n2))
5414 {
5415 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5416 true, GSI_SAME_STMT);
5417 ass = gimple_build_assign (fd->loop.n2, total);
5418 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5419 }
629b3d75
MJ
5420 }
5421
5422 tree b = fd->loop.n1;
5423 tree e = fd->loop.n2;
5424 tree s = fd->loop.step;
5425
5426 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5427 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5428
01914336 5429 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5430 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5431 if (negating)
5432 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5433 s = fold_convert (diff_type, s);
5434 if (negating)
5435 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5436 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5437
5438 if (!chunking)
5439 chunk_size = integer_zero_node;
5440 expr = fold_convert (diff_type, chunk_size);
5441 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5442 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
5443
5444 if (fd->tiling)
5445 {
5446 /* Determine the tile size and element step,
5447 modify the outer loop step size. */
5448 tile_size = create_tmp_var (diff_type, ".tile_size");
5449 expr = build_int_cst (diff_type, 1);
5450 for (int ix = 0; ix < fd->collapse; ix++)
5451 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5452 expr = force_gimple_operand_gsi (&gsi, expr, true,
5453 NULL_TREE, true, GSI_SAME_STMT);
5454 ass = gimple_build_assign (tile_size, expr);
5455 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5456
5457 element_s = create_tmp_var (diff_type, ".element_s");
5458 ass = gimple_build_assign (element_s, s);
5459 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5460
5461 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5462 s = force_gimple_operand_gsi (&gsi, expr, true,
5463 NULL_TREE, true, GSI_SAME_STMT);
5464 }
5465
01914336 5466 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5467 negating = !up && TYPE_UNSIGNED (iter_type);
5468 expr = fold_build2 (MINUS_EXPR, plus_type,
5469 fold_convert (plus_type, negating ? b : e),
5470 fold_convert (plus_type, negating ? e : b));
5471 expr = fold_convert (diff_type, expr);
5472 if (negating)
5473 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5474 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5475 NULL_TREE, true, GSI_SAME_STMT);
5476
5477 chunk_no = build_int_cst (diff_type, 0);
5478 if (chunking)
5479 {
5480 gcc_assert (!gimple_in_ssa_p (cfun));
5481
5482 expr = chunk_no;
5483 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5484 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5485
5486 ass = gimple_build_assign (chunk_no, expr);
5487 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5488
5489 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5490 build_int_cst (integer_type_node,
5491 IFN_GOACC_LOOP_CHUNKS),
5492 dir, range, s, chunk_size, gwv);
5493 gimple_call_set_lhs (call, chunk_max);
5494 gimple_set_location (call, loc);
5495 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5496 }
5497 else
5498 chunk_size = chunk_no;
5499
5500 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5501 build_int_cst (integer_type_node,
5502 IFN_GOACC_LOOP_STEP),
5503 dir, range, s, chunk_size, gwv);
5504 gimple_call_set_lhs (call, step);
5505 gimple_set_location (call, loc);
5506 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5507
5508 /* Remove the GIMPLE_OMP_FOR. */
5509 gsi_remove (&gsi, true);
5510
01914336 5511 /* Fixup edges from head_bb. */
629b3d75
MJ
5512 be = BRANCH_EDGE (head_bb);
5513 fte = FALLTHRU_EDGE (head_bb);
5514 be->flags |= EDGE_FALSE_VALUE;
5515 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5516
5517 basic_block body_bb = fte->dest;
5518
5519 if (gimple_in_ssa_p (cfun))
5520 {
5521 gsi = gsi_last_bb (cont_bb);
5522 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5523
5524 offset = gimple_omp_continue_control_use (cont_stmt);
5525 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5526 }
5527 else
5528 {
5529 offset = create_tmp_var (diff_type, ".offset");
5530 offset_init = offset_incr = offset;
5531 }
5532 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5533
5534 /* Loop offset & bound go into head_bb. */
5535 gsi = gsi_start_bb (head_bb);
5536
5537 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5538 build_int_cst (integer_type_node,
5539 IFN_GOACC_LOOP_OFFSET),
5540 dir, range, s,
5541 chunk_size, gwv, chunk_no);
5542 gimple_call_set_lhs (call, offset_init);
5543 gimple_set_location (call, loc);
5544 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5545
5546 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5547 build_int_cst (integer_type_node,
5548 IFN_GOACC_LOOP_BOUND),
5549 dir, range, s,
5550 chunk_size, gwv, offset_init);
5551 gimple_call_set_lhs (call, bound);
5552 gimple_set_location (call, loc);
5553 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5554
5555 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5556 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5557 GSI_CONTINUE_LINKING);
5558
5559 /* V assignment goes into body_bb. */
5560 if (!gimple_in_ssa_p (cfun))
5561 {
5562 gsi = gsi_start_bb (body_bb);
5563
5564 expr = build2 (plus_code, iter_type, b,
5565 fold_convert (plus_type, offset));
5566 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5567 true, GSI_SAME_STMT);
5568 ass = gimple_build_assign (v, expr);
5569 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
5570
5571 if (fd->collapse > 1 || fd->tiling)
5572 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5573
5574 if (fd->tiling)
5575 {
5576 /* Determine the range of the element loop -- usually simply
5577 the tile_size, but could be smaller if the final
5578 iteration of the outer loop is a partial tile. */
5579 tree e_range = create_tmp_var (diff_type, ".e_range");
5580
5581 expr = build2 (MIN_EXPR, diff_type,
5582 build2 (MINUS_EXPR, diff_type, bound, offset),
5583 build2 (MULT_EXPR, diff_type, tile_size,
5584 element_s));
5585 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5586 true, GSI_SAME_STMT);
5587 ass = gimple_build_assign (e_range, expr);
5588 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5589
5590 /* Determine bound, offset & step of inner loop. */
5591 e_bound = create_tmp_var (diff_type, ".e_bound");
5592 e_offset = create_tmp_var (diff_type, ".e_offset");
5593 e_step = create_tmp_var (diff_type, ".e_step");
5594
5595 /* Mark these as element loops. */
5596 tree t, e_gwv = integer_minus_one_node;
5597 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5598
5599 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5600 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5601 element_s, chunk, e_gwv, chunk);
5602 gimple_call_set_lhs (call, e_offset);
5603 gimple_set_location (call, loc);
5604 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5605
5606 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5607 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5608 element_s, chunk, e_gwv, e_offset);
5609 gimple_call_set_lhs (call, e_bound);
5610 gimple_set_location (call, loc);
5611 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5612
5613 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5614 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5615 element_s, chunk, e_gwv);
5616 gimple_call_set_lhs (call, e_step);
5617 gimple_set_location (call, loc);
5618 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5619
5620 /* Add test and split block. */
5621 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5622 stmt = gimple_build_cond_empty (expr);
5623 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5624 split = split_block (body_bb, stmt);
5625 elem_body_bb = split->dest;
5626 if (cont_bb == body_bb)
5627 cont_bb = elem_body_bb;
5628 body_bb = split->src;
5629
5630 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5631
5632 /* Initialize the user's loop vars. */
5633 gsi = gsi_start_bb (elem_body_bb);
5634 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5635 }
629b3d75
MJ
5636 }
5637
5638 /* Loop increment goes into cont_bb. If this is not a loop, we
5639 will have spawned threads as if it was, and each one will
5640 execute one iteration. The specification is not explicit about
5641 whether such constructs are ill-formed or not, and they can
5642 occur, especially when noreturn routines are involved. */
5643 if (cont_bb)
5644 {
5645 gsi = gsi_last_bb (cont_bb);
5646 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5647 loc = gimple_location (cont_stmt);
5648
02889d23
CLT
5649 if (fd->tiling)
5650 {
5651 /* Insert element loop increment and test. */
5652 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5653 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5654 true, GSI_SAME_STMT);
5655 ass = gimple_build_assign (e_offset, expr);
5656 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5657 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5658
5659 stmt = gimple_build_cond_empty (expr);
5660 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5661 split = split_block (cont_bb, stmt);
5662 elem_cont_bb = split->src;
5663 cont_bb = split->dest;
5664
5665 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
5666 split->probability = profile_probability::unlikely ().guessed ();
5667 edge latch_edge
5668 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5669 latch_edge->probability = profile_probability::likely ().guessed ();
5670
5671 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5672 skip_edge->probability = profile_probability::unlikely ().guessed ();
5673 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5674 loop_entry_edge->probability
5675 = profile_probability::likely ().guessed ();
02889d23
CLT
5676
5677 gsi = gsi_for_stmt (cont_stmt);
5678 }
5679
629b3d75
MJ
5680 /* Increment offset. */
5681 if (gimple_in_ssa_p (cfun))
02889d23
CLT
5682 expr = build2 (plus_code, iter_type, offset,
5683 fold_convert (plus_type, step));
629b3d75
MJ
5684 else
5685 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5686 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5687 true, GSI_SAME_STMT);
5688 ass = gimple_build_assign (offset_incr, expr);
5689 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5690 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5691 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5692
5693 /* Remove the GIMPLE_OMP_CONTINUE. */
5694 gsi_remove (&gsi, true);
5695
01914336 5696 /* Fixup edges from cont_bb. */
629b3d75
MJ
5697 be = BRANCH_EDGE (cont_bb);
5698 fte = FALLTHRU_EDGE (cont_bb);
5699 be->flags |= EDGE_TRUE_VALUE;
5700 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5701
5702 if (chunking)
5703 {
5704 /* Split the beginning of exit_bb to make bottom_bb. We
5705 need to insert a nop at the start, because splitting is
01914336 5706 after a stmt, not before. */
629b3d75
MJ
5707 gsi = gsi_start_bb (exit_bb);
5708 stmt = gimple_build_nop ();
5709 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5710 split = split_block (exit_bb, stmt);
5711 bottom_bb = split->src;
5712 exit_bb = split->dest;
5713 gsi = gsi_last_bb (bottom_bb);
5714
5715 /* Chunk increment and test goes into bottom_bb. */
5716 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5717 build_int_cst (diff_type, 1));
5718 ass = gimple_build_assign (chunk_no, expr);
5719 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5720
5721 /* Chunk test at end of bottom_bb. */
5722 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5723 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5724 GSI_CONTINUE_LINKING);
5725
01914336 5726 /* Fixup edges from bottom_bb. */
629b3d75 5727 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
5728 split->probability = profile_probability::unlikely ().guessed ();
5729 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5730 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
5731 }
5732 }
5733
5734 gsi = gsi_last_bb (exit_bb);
5735 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5736 loc = gimple_location (gsi_stmt (gsi));
5737
5738 if (!gimple_in_ssa_p (cfun))
5739 {
5740 /* Insert the final value of V, in case it is live. This is the
5741 value for the only thread that survives past the join. */
5742 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5743 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5744 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5745 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5746 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5747 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5748 true, GSI_SAME_STMT);
5749 ass = gimple_build_assign (v, expr);
5750 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5751 }
5752
01914336 5753 /* Remove the OMP_RETURN. */
629b3d75
MJ
5754 gsi_remove (&gsi, true);
5755
5756 if (cont_bb)
5757 {
02889d23 5758 /* We now have one, two or three nested loops. Update the loop
629b3d75
MJ
5759 structures. */
5760 struct loop *parent = entry_bb->loop_father;
5761 struct loop *body = body_bb->loop_father;
5762
5763 if (chunking)
5764 {
5765 struct loop *chunk_loop = alloc_loop ();
5766 chunk_loop->header = head_bb;
5767 chunk_loop->latch = bottom_bb;
5768 add_loop (chunk_loop, parent);
5769 parent = chunk_loop;
5770 }
5771 else if (parent != body)
5772 {
5773 gcc_assert (body->header == body_bb);
5774 gcc_assert (body->latch == cont_bb
5775 || single_pred (body->latch) == cont_bb);
5776 parent = NULL;
5777 }
5778
5779 if (parent)
5780 {
5781 struct loop *body_loop = alloc_loop ();
5782 body_loop->header = body_bb;
5783 body_loop->latch = cont_bb;
5784 add_loop (body_loop, parent);
02889d23
CLT
5785
5786 if (fd->tiling)
5787 {
5788 /* Insert tiling's element loop. */
5789 struct loop *inner_loop = alloc_loop ();
5790 inner_loop->header = elem_body_bb;
5791 inner_loop->latch = elem_cont_bb;
5792 add_loop (inner_loop, body_loop);
5793 }
629b3d75
MJ
5794 }
5795 }
5796}
5797
5798/* Expand the OMP loop defined by REGION. */
5799
5800static void
5801expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5802{
5803 struct omp_for_data fd;
5804 struct omp_for_data_loop *loops;
5805
5806 loops
5807 = (struct omp_for_data_loop *)
5808 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5809 * sizeof (struct omp_for_data_loop));
5810 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5811 &fd, loops);
5812 region->sched_kind = fd.sched_kind;
5813 region->sched_modifiers = fd.sched_modifiers;
5814
5815 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5816 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5817 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5818 if (region->cont)
5819 {
5820 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5821 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5822 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5823 }
5824 else
5825 /* If there isn't a continue then this is a degerate case where
5826 the introduction of abnormal edges during lowering will prevent
5827 original loops from being detected. Fix that up. */
5828 loops_state_set (LOOPS_NEED_FIXUP);
5829
5830 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5831 expand_omp_simd (region, &fd);
5832 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5833 expand_cilk_for (region, &fd);
5834 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5835 {
5836 gcc_assert (!inner_stmt);
5837 expand_oacc_for (region, &fd);
5838 }
5839 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5840 {
5841 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5842 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5843 else
5844 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5845 }
5846 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5847 && !fd.have_ordered)
5848 {
5849 if (fd.chunk_size == NULL)
5850 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5851 else
5852 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5853 }
5854 else
5855 {
5856 int fn_index, start_ix, next_ix;
5857
5858 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5859 == GF_OMP_FOR_KIND_FOR);
5860 if (fd.chunk_size == NULL
5861 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5862 fd.chunk_size = integer_zero_node;
5863 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5864 switch (fd.sched_kind)
5865 {
5866 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5867 fn_index = 3;
5868 break;
5869 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5870 case OMP_CLAUSE_SCHEDULE_GUIDED:
5871 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5872 && !fd.ordered
5873 && !fd.have_ordered)
5874 {
5875 fn_index = 3 + fd.sched_kind;
5876 break;
5877 }
5878 /* FALLTHRU */
5879 default:
5880 fn_index = fd.sched_kind;
5881 break;
5882 }
5883 if (!fd.ordered)
5884 fn_index += fd.have_ordered * 6;
5885 if (fd.ordered)
5886 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5887 else
5888 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5889 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5890 if (fd.iter_type == long_long_unsigned_type_node)
5891 {
5892 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5893 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5894 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5895 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5896 }
5897 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5898 (enum built_in_function) next_ix, inner_stmt);
5899 }
5900
5901 if (gimple_in_ssa_p (cfun))
5902 update_ssa (TODO_update_ssa_only_virtuals);
5903}
5904
5905/* Expand code for an OpenMP sections directive. In pseudo code, we generate
5906
5907 v = GOMP_sections_start (n);
5908 L0:
5909 switch (v)
5910 {
5911 case 0:
5912 goto L2;
5913 case 1:
5914 section 1;
5915 goto L1;
5916 case 2:
5917 ...
5918 case n:
5919 ...
5920 default:
5921 abort ();
5922 }
5923 L1:
5924 v = GOMP_sections_next ();
5925 goto L0;
5926 L2:
5927 reduction;
5928
5929 If this is a combined parallel sections, replace the call to
5930 GOMP_sections_start with call to GOMP_sections_next. */
5931
5932static void
5933expand_omp_sections (struct omp_region *region)
5934{
5935 tree t, u, vin = NULL, vmain, vnext, l2;
5936 unsigned len;
5937 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5938 gimple_stmt_iterator si, switch_si;
5939 gomp_sections *sections_stmt;
5940 gimple *stmt;
5941 gomp_continue *cont;
5942 edge_iterator ei;
5943 edge e;
5944 struct omp_region *inner;
5945 unsigned i, casei;
5946 bool exit_reachable = region->cont != NULL;
5947
5948 gcc_assert (region->exit != NULL);
5949 entry_bb = region->entry;
5950 l0_bb = single_succ (entry_bb);
5951 l1_bb = region->cont;
5952 l2_bb = region->exit;
5953 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5954 l2 = gimple_block_label (l2_bb);
5955 else
5956 {
5957 /* This can happen if there are reductions. */
5958 len = EDGE_COUNT (l0_bb->succs);
5959 gcc_assert (len > 0);
5960 e = EDGE_SUCC (l0_bb, len - 1);
5961 si = gsi_last_bb (e->dest);
5962 l2 = NULL_TREE;
5963 if (gsi_end_p (si)
01914336 5964 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
5965 l2 = gimple_block_label (e->dest);
5966 else
5967 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5968 {
5969 si = gsi_last_bb (e->dest);
5970 if (gsi_end_p (si)
5971 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5972 {
5973 l2 = gimple_block_label (e->dest);
5974 break;
5975 }
5976 }
5977 }
5978 if (exit_reachable)
5979 default_bb = create_empty_bb (l1_bb->prev_bb);
5980 else
5981 default_bb = create_empty_bb (l0_bb);
5982
5983 /* We will build a switch() with enough cases for all the
5984 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5985 and a default case to abort if something goes wrong. */
5986 len = EDGE_COUNT (l0_bb->succs);
5987
5988 /* Use vec::quick_push on label_vec throughout, since we know the size
5989 in advance. */
5990 auto_vec<tree> label_vec (len);
5991
5992 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5993 GIMPLE_OMP_SECTIONS statement. */
5994 si = gsi_last_bb (entry_bb);
5995 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5996 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5997 vin = gimple_omp_sections_control (sections_stmt);
5998 if (!is_combined_parallel (region))
5999 {
6000 /* If we are not inside a combined parallel+sections region,
6001 call GOMP_sections_start. */
6002 t = build_int_cst (unsigned_type_node, len - 1);
6003 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6004 stmt = gimple_build_call (u, 1, t);
6005 }
6006 else
6007 {
6008 /* Otherwise, call GOMP_sections_next. */
6009 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6010 stmt = gimple_build_call (u, 0);
6011 }
6012 gimple_call_set_lhs (stmt, vin);
6013 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6014 gsi_remove (&si, true);
6015
6016 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6017 L0_BB. */
6018 switch_si = gsi_last_bb (l0_bb);
6019 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6020 if (exit_reachable)
6021 {
6022 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6023 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6024 vmain = gimple_omp_continue_control_use (cont);
6025 vnext = gimple_omp_continue_control_def (cont);
6026 }
6027 else
6028 {
6029 vmain = vin;
6030 vnext = NULL_TREE;
6031 }
6032
6033 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6034 label_vec.quick_push (t);
6035 i = 1;
6036
6037 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6038 for (inner = region->inner, casei = 1;
6039 inner;
6040 inner = inner->next, i++, casei++)
6041 {
6042 basic_block s_entry_bb, s_exit_bb;
6043
6044 /* Skip optional reduction region. */
6045 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6046 {
6047 --i;
6048 --casei;
6049 continue;
6050 }
6051
6052 s_entry_bb = inner->entry;
6053 s_exit_bb = inner->exit;
6054
6055 t = gimple_block_label (s_entry_bb);
6056 u = build_int_cst (unsigned_type_node, casei);
6057 u = build_case_label (u, NULL, t);
6058 label_vec.quick_push (u);
6059
6060 si = gsi_last_bb (s_entry_bb);
6061 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6062 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6063 gsi_remove (&si, true);
6064 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6065
6066 if (s_exit_bb == NULL)
6067 continue;
6068
6069 si = gsi_last_bb (s_exit_bb);
6070 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6071 gsi_remove (&si, true);
6072
6073 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6074 }
6075
6076 /* Error handling code goes in DEFAULT_BB. */
6077 t = gimple_block_label (default_bb);
6078 u = build_case_label (NULL, NULL, t);
6079 make_edge (l0_bb, default_bb, 0);
6080 add_bb_to_loop (default_bb, current_loops->tree_root);
6081
6082 stmt = gimple_build_switch (vmain, u, label_vec);
6083 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6084 gsi_remove (&switch_si, true);
6085
6086 si = gsi_start_bb (default_bb);
6087 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6088 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6089
6090 if (exit_reachable)
6091 {
6092 tree bfn_decl;
6093
6094 /* Code to get the next section goes in L1_BB. */
6095 si = gsi_last_bb (l1_bb);
6096 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6097
6098 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6099 stmt = gimple_build_call (bfn_decl, 0);
6100 gimple_call_set_lhs (stmt, vnext);
6101 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6102 gsi_remove (&si, true);
6103
6104 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6105 }
6106
6107 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6108 si = gsi_last_bb (l2_bb);
6109 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6110 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6111 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6112 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6113 else
6114 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6115 stmt = gimple_build_call (t, 0);
6116 if (gimple_omp_return_lhs (gsi_stmt (si)))
6117 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6118 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6119 gsi_remove (&si, true);
6120
6121 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6122}
6123
6124/* Expand code for an OpenMP single directive. We've already expanded
6125 much of the code, here we simply place the GOMP_barrier call. */
6126
6127static void
6128expand_omp_single (struct omp_region *region)
6129{
6130 basic_block entry_bb, exit_bb;
6131 gimple_stmt_iterator si;
6132
6133 entry_bb = region->entry;
6134 exit_bb = region->exit;
6135
6136 si = gsi_last_bb (entry_bb);
6137 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6138 gsi_remove (&si, true);
6139 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6140
6141 si = gsi_last_bb (exit_bb);
6142 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6143 {
6144 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6145 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6146 }
6147 gsi_remove (&si, true);
6148 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6149}
6150
6151/* Generic expansion for OpenMP synchronization directives: master,
6152 ordered and critical. All we need to do here is remove the entry
6153 and exit markers for REGION. */
6154
6155static void
6156expand_omp_synch (struct omp_region *region)
6157{
6158 basic_block entry_bb, exit_bb;
6159 gimple_stmt_iterator si;
6160
6161 entry_bb = region->entry;
6162 exit_bb = region->exit;
6163
6164 si = gsi_last_bb (entry_bb);
6165 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6166 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6167 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6168 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6169 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6170 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6171 gsi_remove (&si, true);
6172 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6173
6174 if (exit_bb)
6175 {
6176 si = gsi_last_bb (exit_bb);
6177 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6178 gsi_remove (&si, true);
6179 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6180 }
6181}
6182
6183/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6184 operation as a normal volatile load. */
6185
6186static bool
6187expand_omp_atomic_load (basic_block load_bb, tree addr,
6188 tree loaded_val, int index)
6189{
6190 enum built_in_function tmpbase;
6191 gimple_stmt_iterator gsi;
6192 basic_block store_bb;
6193 location_t loc;
6194 gimple *stmt;
6195 tree decl, call, type, itype;
6196
6197 gsi = gsi_last_bb (load_bb);
6198 stmt = gsi_stmt (gsi);
6199 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6200 loc = gimple_location (stmt);
6201
6202 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6203 is smaller than word size, then expand_atomic_load assumes that the load
6204 is atomic. We could avoid the builtin entirely in this case. */
6205
6206 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6207 decl = builtin_decl_explicit (tmpbase);
6208 if (decl == NULL_TREE)
6209 return false;
6210
6211 type = TREE_TYPE (loaded_val);
6212 itype = TREE_TYPE (TREE_TYPE (decl));
6213
6214 call = build_call_expr_loc (loc, decl, 2, addr,
6215 build_int_cst (NULL,
6216 gimple_omp_atomic_seq_cst_p (stmt)
6217 ? MEMMODEL_SEQ_CST
6218 : MEMMODEL_RELAXED));
6219 if (!useless_type_conversion_p (type, itype))
6220 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6221 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6222
6223 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6224 gsi_remove (&gsi, true);
6225
6226 store_bb = single_succ (load_bb);
6227 gsi = gsi_last_bb (store_bb);
6228 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6229 gsi_remove (&gsi, true);
6230
6231 if (gimple_in_ssa_p (cfun))
6232 update_ssa (TODO_update_ssa_no_phi);
6233
6234 return true;
6235}
6236
6237/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6238 operation as a normal volatile store. */
6239
6240static bool
6241expand_omp_atomic_store (basic_block load_bb, tree addr,
6242 tree loaded_val, tree stored_val, int index)
6243{
6244 enum built_in_function tmpbase;
6245 gimple_stmt_iterator gsi;
6246 basic_block store_bb = single_succ (load_bb);
6247 location_t loc;
6248 gimple *stmt;
6249 tree decl, call, type, itype;
6250 machine_mode imode;
6251 bool exchange;
6252
6253 gsi = gsi_last_bb (load_bb);
6254 stmt = gsi_stmt (gsi);
6255 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6256
6257 /* If the load value is needed, then this isn't a store but an exchange. */
6258 exchange = gimple_omp_atomic_need_value_p (stmt);
6259
6260 gsi = gsi_last_bb (store_bb);
6261 stmt = gsi_stmt (gsi);
6262 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6263 loc = gimple_location (stmt);
6264
6265 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6266 is smaller than word size, then expand_atomic_store assumes that the store
6267 is atomic. We could avoid the builtin entirely in this case. */
6268
6269 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6270 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6271 decl = builtin_decl_explicit (tmpbase);
6272 if (decl == NULL_TREE)
6273 return false;
6274
6275 type = TREE_TYPE (stored_val);
6276
6277 /* Dig out the type of the function's second argument. */
6278 itype = TREE_TYPE (decl);
6279 itype = TYPE_ARG_TYPES (itype);
6280 itype = TREE_CHAIN (itype);
6281 itype = TREE_VALUE (itype);
6282 imode = TYPE_MODE (itype);
6283
6284 if (exchange && !can_atomic_exchange_p (imode, true))
6285 return false;
6286
6287 if (!useless_type_conversion_p (itype, type))
6288 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6289 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6290 build_int_cst (NULL,
6291 gimple_omp_atomic_seq_cst_p (stmt)
6292 ? MEMMODEL_SEQ_CST
6293 : MEMMODEL_RELAXED));
6294 if (exchange)
6295 {
6296 if (!useless_type_conversion_p (type, itype))
6297 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6298 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6299 }
6300
6301 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6302 gsi_remove (&gsi, true);
6303
6304 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6305 gsi = gsi_last_bb (load_bb);
6306 gsi_remove (&gsi, true);
6307
6308 if (gimple_in_ssa_p (cfun))
6309 update_ssa (TODO_update_ssa_no_phi);
6310
6311 return true;
6312}
6313
6314/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6315 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6316 size of the data type, and thus usable to find the index of the builtin
6317 decl. Returns false if the expression is not of the proper form. */
6318
6319static bool
6320expand_omp_atomic_fetch_op (basic_block load_bb,
6321 tree addr, tree loaded_val,
6322 tree stored_val, int index)
6323{
6324 enum built_in_function oldbase, newbase, tmpbase;
6325 tree decl, itype, call;
6326 tree lhs, rhs;
6327 basic_block store_bb = single_succ (load_bb);
6328 gimple_stmt_iterator gsi;
6329 gimple *stmt;
6330 location_t loc;
6331 enum tree_code code;
6332 bool need_old, need_new;
6333 machine_mode imode;
6334 bool seq_cst;
6335
6336 /* We expect to find the following sequences:
6337
6338 load_bb:
6339 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6340
6341 store_bb:
6342 val = tmp OP something; (or: something OP tmp)
6343 GIMPLE_OMP_STORE (val)
6344
6345 ???FIXME: Allow a more flexible sequence.
6346 Perhaps use data flow to pick the statements.
6347
6348 */
6349
6350 gsi = gsi_after_labels (store_bb);
6351 stmt = gsi_stmt (gsi);
6352 loc = gimple_location (stmt);
6353 if (!is_gimple_assign (stmt))
6354 return false;
6355 gsi_next (&gsi);
6356 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6357 return false;
6358 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6359 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6360 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6361 gcc_checking_assert (!need_old || !need_new);
6362
6363 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6364 return false;
6365
6366 /* Check for one of the supported fetch-op operations. */
6367 code = gimple_assign_rhs_code (stmt);
6368 switch (code)
6369 {
6370 case PLUS_EXPR:
6371 case POINTER_PLUS_EXPR:
6372 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6373 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6374 break;
6375 case MINUS_EXPR:
6376 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6377 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6378 break;
6379 case BIT_AND_EXPR:
6380 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6381 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6382 break;
6383 case BIT_IOR_EXPR:
6384 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6385 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6386 break;
6387 case BIT_XOR_EXPR:
6388 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6389 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6390 break;
6391 default:
6392 return false;
6393 }
6394
6395 /* Make sure the expression is of the proper form. */
6396 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6397 rhs = gimple_assign_rhs2 (stmt);
6398 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6399 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6400 rhs = gimple_assign_rhs1 (stmt);
6401 else
6402 return false;
6403
6404 tmpbase = ((enum built_in_function)
6405 ((need_new ? newbase : oldbase) + index + 1));
6406 decl = builtin_decl_explicit (tmpbase);
6407 if (decl == NULL_TREE)
6408 return false;
6409 itype = TREE_TYPE (TREE_TYPE (decl));
6410 imode = TYPE_MODE (itype);
6411
6412 /* We could test all of the various optabs involved, but the fact of the
6413 matter is that (with the exception of i486 vs i586 and xadd) all targets
6414 that support any atomic operaton optab also implements compare-and-swap.
6415 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 6416 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
6417 return false;
6418
6419 gsi = gsi_last_bb (load_bb);
6420 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6421
6422 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6423 It only requires that the operation happen atomically. Thus we can
6424 use the RELAXED memory model. */
6425 call = build_call_expr_loc (loc, decl, 3, addr,
6426 fold_convert_loc (loc, itype, rhs),
6427 build_int_cst (NULL,
6428 seq_cst ? MEMMODEL_SEQ_CST
6429 : MEMMODEL_RELAXED));
6430
6431 if (need_old || need_new)
6432 {
6433 lhs = need_old ? loaded_val : stored_val;
6434 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6435 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6436 }
6437 else
6438 call = fold_convert_loc (loc, void_type_node, call);
6439 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6440 gsi_remove (&gsi, true);
6441
6442 gsi = gsi_last_bb (store_bb);
6443 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6444 gsi_remove (&gsi, true);
6445 gsi = gsi_last_bb (store_bb);
6446 stmt = gsi_stmt (gsi);
6447 gsi_remove (&gsi, true);
6448
6449 if (gimple_in_ssa_p (cfun))
6450 {
6451 release_defs (stmt);
6452 update_ssa (TODO_update_ssa_no_phi);
6453 }
6454
6455 return true;
6456}
6457
6458/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6459
6460 oldval = *addr;
6461 repeat:
01914336 6462 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
6463 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6464 if (oldval != newval)
6465 goto repeat;
6466
6467 INDEX is log2 of the size of the data type, and thus usable to find the
6468 index of the builtin decl. */
6469
6470static bool
6471expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6472 tree addr, tree loaded_val, tree stored_val,
6473 int index)
6474{
6475 tree loadedi, storedi, initial, new_storedi, old_vali;
6476 tree type, itype, cmpxchg, iaddr;
6477 gimple_stmt_iterator si;
6478 basic_block loop_header = single_succ (load_bb);
6479 gimple *phi, *stmt;
6480 edge e;
6481 enum built_in_function fncode;
6482
6483 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6484 order to use the RELAXED memory model effectively. */
6485 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6486 + index + 1);
6487 cmpxchg = builtin_decl_explicit (fncode);
6488 if (cmpxchg == NULL_TREE)
6489 return false;
6490 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6491 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6492
dc06356a
JJ
6493 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6494 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
6495 return false;
6496
6497 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6498 si = gsi_last_bb (load_bb);
6499 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6500
6501 /* For floating-point values, we'll need to view-convert them to integers
6502 so that we can perform the atomic compare and swap. Simplify the
6503 following code by always setting up the "i"ntegral variables. */
6504 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6505 {
6506 tree iaddr_val;
6507
6508 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6509 true));
6510 iaddr_val
6511 = force_gimple_operand_gsi (&si,
6512 fold_convert (TREE_TYPE (iaddr), addr),
6513 false, NULL_TREE, true, GSI_SAME_STMT);
6514 stmt = gimple_build_assign (iaddr, iaddr_val);
6515 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6516 loadedi = create_tmp_var (itype);
6517 if (gimple_in_ssa_p (cfun))
6518 loadedi = make_ssa_name (loadedi);
6519 }
6520 else
6521 {
6522 iaddr = addr;
6523 loadedi = loaded_val;
6524 }
6525
6526 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6527 tree loaddecl = builtin_decl_explicit (fncode);
6528 if (loaddecl)
6529 initial
6530 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6531 build_call_expr (loaddecl, 2, iaddr,
6532 build_int_cst (NULL_TREE,
6533 MEMMODEL_RELAXED)));
6534 else
6535 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6536 build_int_cst (TREE_TYPE (iaddr), 0));
6537
6538 initial
6539 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6540 GSI_SAME_STMT);
6541
6542 /* Move the value to the LOADEDI temporary. */
6543 if (gimple_in_ssa_p (cfun))
6544 {
6545 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6546 phi = create_phi_node (loadedi, loop_header);
6547 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6548 initial);
6549 }
6550 else
6551 gsi_insert_before (&si,
6552 gimple_build_assign (loadedi, initial),
6553 GSI_SAME_STMT);
6554 if (loadedi != loaded_val)
6555 {
6556 gimple_stmt_iterator gsi2;
6557 tree x;
6558
6559 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6560 gsi2 = gsi_start_bb (loop_header);
6561 if (gimple_in_ssa_p (cfun))
6562 {
6563 gassign *stmt;
6564 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6565 true, GSI_SAME_STMT);
6566 stmt = gimple_build_assign (loaded_val, x);
6567 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6568 }
6569 else
6570 {
6571 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6572 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6573 true, GSI_SAME_STMT);
6574 }
6575 }
6576 gsi_remove (&si, true);
6577
6578 si = gsi_last_bb (store_bb);
6579 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6580
6581 if (iaddr == addr)
6582 storedi = stored_val;
6583 else
01914336
MJ
6584 storedi
6585 = force_gimple_operand_gsi (&si,
6586 build1 (VIEW_CONVERT_EXPR, itype,
6587 stored_val), true, NULL_TREE, true,
6588 GSI_SAME_STMT);
629b3d75
MJ
6589
6590 /* Build the compare&swap statement. */
6591 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6592 new_storedi = force_gimple_operand_gsi (&si,
6593 fold_convert (TREE_TYPE (loadedi),
6594 new_storedi),
6595 true, NULL_TREE,
6596 true, GSI_SAME_STMT);
6597
6598 if (gimple_in_ssa_p (cfun))
6599 old_vali = loadedi;
6600 else
6601 {
6602 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6603 stmt = gimple_build_assign (old_vali, loadedi);
6604 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6605
6606 stmt = gimple_build_assign (loadedi, new_storedi);
6607 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6608 }
6609
6610 /* Note that we always perform the comparison as an integer, even for
6611 floating point. This allows the atomic operation to properly
6612 succeed even with NaNs and -0.0. */
01914336
MJ
6613 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6614 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
6615 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6616
6617 /* Update cfg. */
6618 e = single_succ_edge (store_bb);
6619 e->flags &= ~EDGE_FALLTHRU;
6620 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
6621 /* Expect no looping. */
6622 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
6623
6624 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 6625 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
6626
6627 /* Copy the new value to loadedi (we already did that before the condition
6628 if we are not in SSA). */
6629 if (gimple_in_ssa_p (cfun))
6630 {
6631 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6632 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6633 }
6634
6635 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6636 gsi_remove (&si, true);
6637
6638 struct loop *loop = alloc_loop ();
6639 loop->header = loop_header;
6640 loop->latch = store_bb;
6641 add_loop (loop, loop_header->loop_father);
6642
6643 if (gimple_in_ssa_p (cfun))
6644 update_ssa (TODO_update_ssa_no_phi);
6645
6646 return true;
6647}
6648
6649/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6650
01914336
MJ
6651 GOMP_atomic_start ();
6652 *addr = rhs;
6653 GOMP_atomic_end ();
629b3d75
MJ
6654
6655 The result is not globally atomic, but works so long as all parallel
6656 references are within #pragma omp atomic directives. According to
6657 responses received from omp@openmp.org, appears to be within spec.
6658 Which makes sense, since that's how several other compilers handle
6659 this situation as well.
6660 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6661 expanding. STORED_VAL is the operand of the matching
6662 GIMPLE_OMP_ATOMIC_STORE.
6663
6664 We replace
6665 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6666 loaded_val = *addr;
6667
6668 and replace
6669 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6670 *addr = stored_val;
6671*/
6672
6673static bool
6674expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6675 tree addr, tree loaded_val, tree stored_val)
6676{
6677 gimple_stmt_iterator si;
6678 gassign *stmt;
6679 tree t;
6680
6681 si = gsi_last_bb (load_bb);
6682 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6683
6684 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6685 t = build_call_expr (t, 0);
6686 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6687
6688 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6689 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6690 gsi_remove (&si, true);
6691
6692 si = gsi_last_bb (store_bb);
6693 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6694
6695 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6696 stored_val);
6697 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6698
6699 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6700 t = build_call_expr (t, 0);
6701 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6702 gsi_remove (&si, true);
6703
6704 if (gimple_in_ssa_p (cfun))
6705 update_ssa (TODO_update_ssa_no_phi);
6706 return true;
6707}
6708
6709/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 6710 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
6711 call expand_omp_atomic_pipeline, and if it fails too, the
6712 ultimate fallback is wrapping the operation in a mutex
6713 (expand_omp_atomic_mutex). REGION is the atomic region built
6714 by build_omp_regions_1(). */
6715
6716static void
6717expand_omp_atomic (struct omp_region *region)
6718{
6719 basic_block load_bb = region->entry, store_bb = region->exit;
6720 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6721 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6722 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6723 tree addr = gimple_omp_atomic_load_rhs (load);
6724 tree stored_val = gimple_omp_atomic_store_val (store);
6725 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6726 HOST_WIDE_INT index;
6727
6728 /* Make sure the type is one of the supported sizes. */
6729 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6730 index = exact_log2 (index);
6731 if (index >= 0 && index <= 4)
6732 {
6733 unsigned int align = TYPE_ALIGN_UNIT (type);
6734
6735 /* __sync builtins require strict data alignment. */
6736 if (exact_log2 (align) >= index)
6737 {
6738 /* Atomic load. */
6739 if (loaded_val == stored_val
6740 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6741 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6742 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6743 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6744 return;
6745
6746 /* Atomic store. */
6747 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6748 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6749 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6750 && store_bb == single_succ (load_bb)
6751 && first_stmt (store_bb) == store
6752 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6753 stored_val, index))
6754 return;
6755
6756 /* When possible, use specialized atomic update functions. */
6757 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6758 && store_bb == single_succ (load_bb)
6759 && expand_omp_atomic_fetch_op (load_bb, addr,
6760 loaded_val, stored_val, index))
6761 return;
6762
6763 /* If we don't have specialized __sync builtins, try and implement
6764 as a compare and swap loop. */
6765 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6766 loaded_val, stored_val, index))
6767 return;
6768 }
6769 }
6770
6771 /* The ultimate fallback is wrapping the operation in a mutex. */
6772 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6773}
6774
6775/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6776 at REGION_EXIT. */
6777
6778static void
6779mark_loops_in_oacc_kernels_region (basic_block region_entry,
6780 basic_block region_exit)
6781{
6782 struct loop *outer = region_entry->loop_father;
6783 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6784
6785 /* Don't parallelize the kernels region if it contains more than one outer
6786 loop. */
6787 unsigned int nr_outer_loops = 0;
6788 struct loop *single_outer = NULL;
6789 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6790 {
6791 gcc_assert (loop_outer (loop) == outer);
6792
6793 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6794 continue;
6795
6796 if (region_exit != NULL
6797 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6798 continue;
6799
6800 nr_outer_loops++;
6801 single_outer = loop;
6802 }
6803 if (nr_outer_loops != 1)
6804 return;
6805
01914336
MJ
6806 for (struct loop *loop = single_outer->inner;
6807 loop != NULL;
6808 loop = loop->inner)
629b3d75
MJ
6809 if (loop->next)
6810 return;
6811
6812 /* Mark the loops in the region. */
6813 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6814 loop->in_oacc_kernels_region = true;
6815}
6816
6817/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6818
6819struct GTY(()) grid_launch_attributes_trees
6820{
6821 tree kernel_dim_array_type;
6822 tree kernel_lattrs_dimnum_decl;
6823 tree kernel_lattrs_grid_decl;
6824 tree kernel_lattrs_group_decl;
6825 tree kernel_launch_attributes_type;
6826};
6827
6828static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6829
6830/* Create types used to pass kernel launch attributes to target. */
6831
6832static void
6833grid_create_kernel_launch_attr_types (void)
6834{
6835 if (grid_attr_trees)
6836 return;
6837 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6838
6839 tree dim_arr_index_type
6840 = build_index_type (build_int_cst (integer_type_node, 2));
6841 grid_attr_trees->kernel_dim_array_type
6842 = build_array_type (uint32_type_node, dim_arr_index_type);
6843
6844 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6845 grid_attr_trees->kernel_lattrs_dimnum_decl
6846 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6847 uint32_type_node);
6848 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6849
6850 grid_attr_trees->kernel_lattrs_grid_decl
6851 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6852 grid_attr_trees->kernel_dim_array_type);
6853 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6854 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6855 grid_attr_trees->kernel_lattrs_group_decl
6856 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6857 grid_attr_trees->kernel_dim_array_type);
6858 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6859 = grid_attr_trees->kernel_lattrs_grid_decl;
6860 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6861 "__gomp_kernel_launch_attributes",
6862 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6863}
6864
6865/* Insert before the current statement in GSI a store of VALUE to INDEX of
6866 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6867 of type uint32_type_node. */
6868
6869static void
6870grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6871 tree fld_decl, int index, tree value)
6872{
6873 tree ref = build4 (ARRAY_REF, uint32_type_node,
6874 build3 (COMPONENT_REF,
6875 grid_attr_trees->kernel_dim_array_type,
6876 range_var, fld_decl, NULL_TREE),
6877 build_int_cst (integer_type_node, index),
6878 NULL_TREE, NULL_TREE);
6879 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6880}
6881
6882/* Return a tree representation of a pointer to a structure with grid and
6883 work-group size information. Statements filling that information will be
6884 inserted before GSI, TGT_STMT is the target statement which has the
6885 necessary information in it. */
6886
6887static tree
6888grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6889 gomp_target *tgt_stmt)
6890{
6891 grid_create_kernel_launch_attr_types ();
6892 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6893 "__kernel_launch_attrs");
6894
6895 unsigned max_dim = 0;
6896 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6897 clause;
6898 clause = OMP_CLAUSE_CHAIN (clause))
6899 {
6900 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6901 continue;
6902
6903 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6904 max_dim = MAX (dim, max_dim);
6905
6906 grid_insert_store_range_dim (gsi, lattrs,
6907 grid_attr_trees->kernel_lattrs_grid_decl,
6908 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6909 grid_insert_store_range_dim (gsi, lattrs,
6910 grid_attr_trees->kernel_lattrs_group_decl,
6911 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6912 }
6913
6914 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6915 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6916 gcc_checking_assert (max_dim <= 2);
6917 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6918 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6919 GSI_SAME_STMT);
6920 TREE_ADDRESSABLE (lattrs) = 1;
6921 return build_fold_addr_expr (lattrs);
6922}
6923
6924/* Build target argument identifier from the DEVICE identifier, value
6925 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6926
6927static tree
6928get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6929{
6930 tree t = build_int_cst (integer_type_node, device);
6931 if (subseqent_param)
6932 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6933 build_int_cst (integer_type_node,
6934 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6935 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6936 build_int_cst (integer_type_node, id));
6937 return t;
6938}
6939
6940/* Like above but return it in type that can be directly stored as an element
6941 of the argument array. */
6942
6943static tree
6944get_target_argument_identifier (int device, bool subseqent_param, int id)
6945{
6946 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6947 return fold_convert (ptr_type_node, t);
6948}
6949
6950/* Return a target argument consisting of DEVICE identifier, value identifier
6951 ID, and the actual VALUE. */
6952
6953static tree
6954get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6955 tree value)
6956{
6957 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6958 fold_convert (integer_type_node, value),
6959 build_int_cst (unsigned_type_node,
6960 GOMP_TARGET_ARG_VALUE_SHIFT));
6961 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6962 get_target_argument_identifier_1 (device, false, id));
6963 t = fold_convert (ptr_type_node, t);
6964 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6965}
6966
6967/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6968 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6969 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6970 arguments. */
6971
6972static void
6973push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6974 int id, tree value, vec <tree> *args)
6975{
6976 if (tree_fits_shwi_p (value)
6977 && tree_to_shwi (value) > -(1 << 15)
6978 && tree_to_shwi (value) < (1 << 15))
6979 args->quick_push (get_target_argument_value (gsi, device, id, value));
6980 else
6981 {
6982 args->quick_push (get_target_argument_identifier (device, true, id));
6983 value = fold_convert (ptr_type_node, value);
6984 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6985 GSI_SAME_STMT);
6986 args->quick_push (value);
6987 }
6988}
6989
01914336 6990/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
6991
6992static tree
6993get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6994{
6995 auto_vec <tree, 6> args;
6996 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6997 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6998 if (c)
6999 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7000 else
7001 t = integer_minus_one_node;
7002 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7003 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7004
7005 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7006 if (c)
7007 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7008 else
7009 t = integer_minus_one_node;
7010 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7011 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7012 &args);
7013
7014 /* Add HSA-specific grid sizes, if available. */
7015 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7016 OMP_CLAUSE__GRIDDIM_))
7017 {
01914336
MJ
7018 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7019 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
629b3d75
MJ
7020 args.quick_push (t);
7021 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7022 }
7023
7024 /* Produce more, perhaps device specific, arguments here. */
7025
7026 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7027 args.length () + 1),
7028 ".omp_target_args");
7029 for (unsigned i = 0; i < args.length (); i++)
7030 {
7031 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7032 build_int_cst (integer_type_node, i),
7033 NULL_TREE, NULL_TREE);
7034 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7035 GSI_SAME_STMT);
7036 }
7037 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7038 build_int_cst (integer_type_node, args.length ()),
7039 NULL_TREE, NULL_TREE);
7040 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7041 GSI_SAME_STMT);
7042 TREE_ADDRESSABLE (argarray) = 1;
7043 return build_fold_addr_expr (argarray);
7044}
7045
7046/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7047
7048static void
7049expand_omp_target (struct omp_region *region)
7050{
7051 basic_block entry_bb, exit_bb, new_bb;
7052 struct function *child_cfun;
7053 tree child_fn, block, t;
7054 gimple_stmt_iterator gsi;
7055 gomp_target *entry_stmt;
7056 gimple *stmt;
7057 edge e;
7058 bool offloaded, data_region;
7059
7060 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7061 new_bb = region->entry;
7062
7063 offloaded = is_gimple_omp_offloaded (entry_stmt);
7064 switch (gimple_omp_target_kind (entry_stmt))
7065 {
7066 case GF_OMP_TARGET_KIND_REGION:
7067 case GF_OMP_TARGET_KIND_UPDATE:
7068 case GF_OMP_TARGET_KIND_ENTER_DATA:
7069 case GF_OMP_TARGET_KIND_EXIT_DATA:
7070 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7071 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7072 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7073 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7074 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7075 data_region = false;
7076 break;
7077 case GF_OMP_TARGET_KIND_DATA:
7078 case GF_OMP_TARGET_KIND_OACC_DATA:
7079 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7080 data_region = true;
7081 break;
7082 default:
7083 gcc_unreachable ();
7084 }
7085
7086 child_fn = NULL_TREE;
7087 child_cfun = NULL;
7088 if (offloaded)
7089 {
7090 child_fn = gimple_omp_target_child_fn (entry_stmt);
7091 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7092 }
7093
7094 /* Supported by expand_omp_taskreg, but not here. */
7095 if (child_cfun != NULL)
7096 gcc_checking_assert (!child_cfun->cfg);
7097 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7098
7099 entry_bb = region->entry;
7100 exit_bb = region->exit;
7101
7102 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
25651634
TS
7103 {
7104 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7105
7106 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7107 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7108 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7109 DECL_ATTRIBUTES (child_fn)
7110 = tree_cons (get_identifier ("oacc kernels"),
7111 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7112 }
629b3d75
MJ
7113
7114 if (offloaded)
7115 {
7116 unsigned srcidx, dstidx, num;
7117
7118 /* If the offloading region needs data sent from the parent
7119 function, then the very first statement (except possible
7120 tree profile counter updates) of the offloading body
7121 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7122 &.OMP_DATA_O is passed as an argument to the child function,
7123 we need to replace it with the argument as seen by the child
7124 function.
7125
7126 In most cases, this will end up being the identity assignment
7127 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7128 a function call that has been inlined, the original PARM_DECL
7129 .OMP_DATA_I may have been converted into a different local
7130 variable. In which case, we need to keep the assignment. */
7131 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7132 if (data_arg)
7133 {
7134 basic_block entry_succ_bb = single_succ (entry_bb);
7135 gimple_stmt_iterator gsi;
7136 tree arg;
7137 gimple *tgtcopy_stmt = NULL;
7138 tree sender = TREE_VEC_ELT (data_arg, 0);
7139
7140 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7141 {
7142 gcc_assert (!gsi_end_p (gsi));
7143 stmt = gsi_stmt (gsi);
7144 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7145 continue;
7146
7147 if (gimple_num_ops (stmt) == 2)
7148 {
7149 tree arg = gimple_assign_rhs1 (stmt);
7150
7151 /* We're ignoring the subcode because we're
7152 effectively doing a STRIP_NOPS. */
7153
7154 if (TREE_CODE (arg) == ADDR_EXPR
7155 && TREE_OPERAND (arg, 0) == sender)
7156 {
7157 tgtcopy_stmt = stmt;
7158 break;
7159 }
7160 }
7161 }
7162
7163 gcc_assert (tgtcopy_stmt != NULL);
7164 arg = DECL_ARGUMENTS (child_fn);
7165
7166 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7167 gsi_remove (&gsi, true);
7168 }
7169
7170 /* Declare local variables needed in CHILD_CFUN. */
7171 block = DECL_INITIAL (child_fn);
7172 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7173 /* The gimplifier could record temporaries in the offloading block
7174 rather than in containing function's local_decls chain,
7175 which would mean cgraph missed finalizing them. Do it now. */
7176 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7177 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7178 varpool_node::finalize_decl (t);
7179 DECL_SAVED_TREE (child_fn) = NULL;
7180 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7181 gimple_set_body (child_fn, NULL);
7182 TREE_USED (block) = 1;
7183
7184 /* Reset DECL_CONTEXT on function arguments. */
7185 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7186 DECL_CONTEXT (t) = child_fn;
7187
7188 /* Split ENTRY_BB at GIMPLE_*,
7189 so that it can be moved to the child function. */
7190 gsi = gsi_last_bb (entry_bb);
7191 stmt = gsi_stmt (gsi);
7192 gcc_assert (stmt
7193 && gimple_code (stmt) == gimple_code (entry_stmt));
7194 e = split_block (entry_bb, stmt);
7195 gsi_remove (&gsi, true);
7196 entry_bb = e->dest;
7197 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7198
7199 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7200 if (exit_bb)
7201 {
7202 gsi = gsi_last_bb (exit_bb);
7203 gcc_assert (!gsi_end_p (gsi)
7204 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7205 stmt = gimple_build_return (NULL);
7206 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7207 gsi_remove (&gsi, true);
7208 }
7209
5c628c3e
RB
7210 /* Make sure to generate early debug for the function before
7211 outlining anything. */
7212 if (! gimple_in_ssa_p (cfun))
7213 (*debug_hooks->early_global_decl) (cfun->decl);
7214
629b3d75
MJ
7215 /* Move the offloading region into CHILD_CFUN. */
7216
7217 block = gimple_block (entry_stmt);
7218
7219 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7220 if (exit_bb)
7221 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7222 /* When the OMP expansion process cannot guarantee an up-to-date
7223 loop tree arrange for the child function to fixup loops. */
7224 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7225 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7226
7227 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7228 num = vec_safe_length (child_cfun->local_decls);
7229 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7230 {
7231 t = (*child_cfun->local_decls)[srcidx];
7232 if (DECL_CONTEXT (t) == cfun->decl)
7233 continue;
7234 if (srcidx != dstidx)
7235 (*child_cfun->local_decls)[dstidx] = t;
7236 dstidx++;
7237 }
7238 if (dstidx != num)
7239 vec_safe_truncate (child_cfun->local_decls, dstidx);
7240
7241 /* Inform the callgraph about the new function. */
7242 child_cfun->curr_properties = cfun->curr_properties;
7243 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7244 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7245 cgraph_node *node = cgraph_node::get_create (child_fn);
7246 node->parallelized_function = 1;
7247 cgraph_node::add_new_function (child_fn, true);
7248
7249 /* Add the new function to the offload table. */
7250 if (ENABLE_OFFLOADING)
7251 vec_safe_push (offload_funcs, child_fn);
7252
7253 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7254 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7255
7256 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7257 fixed in a following pass. */
7258 push_cfun (child_cfun);
7259 if (need_asm)
9579db35 7260 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
7261 cgraph_edge::rebuild_edges ();
7262
7263 /* Some EH regions might become dead, see PR34608. If
7264 pass_cleanup_cfg isn't the first pass to happen with the
7265 new child, these dead EH edges might cause problems.
7266 Clean them up now. */
7267 if (flag_exceptions)
7268 {
7269 basic_block bb;
7270 bool changed = false;
7271
7272 FOR_EACH_BB_FN (bb, cfun)
7273 changed |= gimple_purge_dead_eh_edges (bb);
7274 if (changed)
7275 cleanup_tree_cfg ();
7276 }
7277 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7278 verify_loop_structure ();
7279 pop_cfun ();
7280
7281 if (dump_file && !gimple_in_ssa_p (cfun))
7282 {
7283 omp_any_child_fn_dumped = true;
7284 dump_function_header (dump_file, child_fn, dump_flags);
7285 dump_function_to_file (child_fn, dump_file, dump_flags);
7286 }
7287 }
7288
7289 /* Emit a library call to launch the offloading region, or do data
7290 transfers. */
7291 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7292 enum built_in_function start_ix;
7293 location_t clause_loc;
7294 unsigned int flags_i = 0;
629b3d75
MJ
7295
7296 switch (gimple_omp_target_kind (entry_stmt))
7297 {
7298 case GF_OMP_TARGET_KIND_REGION:
7299 start_ix = BUILT_IN_GOMP_TARGET;
7300 break;
7301 case GF_OMP_TARGET_KIND_DATA:
7302 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7303 break;
7304 case GF_OMP_TARGET_KIND_UPDATE:
7305 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7306 break;
7307 case GF_OMP_TARGET_KIND_ENTER_DATA:
7308 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7309 break;
7310 case GF_OMP_TARGET_KIND_EXIT_DATA:
7311 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7312 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7313 break;
7314 case GF_OMP_TARGET_KIND_OACC_KERNELS:
629b3d75
MJ
7315 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7316 start_ix = BUILT_IN_GOACC_PARALLEL;
7317 break;
7318 case GF_OMP_TARGET_KIND_OACC_DATA:
7319 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7320 start_ix = BUILT_IN_GOACC_DATA_START;
7321 break;
7322 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7323 start_ix = BUILT_IN_GOACC_UPDATE;
7324 break;
7325 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7326 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7327 break;
7328 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7329 start_ix = BUILT_IN_GOACC_DECLARE;
7330 break;
7331 default:
7332 gcc_unreachable ();
7333 }
7334
7335 clauses = gimple_omp_target_clauses (entry_stmt);
7336
7337 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7338 library choose) and there is no conditional. */
7339 cond = NULL_TREE;
7340 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7341
7342 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7343 if (c)
7344 cond = OMP_CLAUSE_IF_EXPR (c);
7345
7346 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7347 if (c)
7348 {
7349 /* Even if we pass it to all library function calls, it is currently only
7350 defined/used for the OpenMP target ones. */
7351 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7352 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7353 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7354 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7355
7356 device = OMP_CLAUSE_DEVICE_ID (c);
7357 clause_loc = OMP_CLAUSE_LOCATION (c);
7358 }
7359 else
7360 clause_loc = gimple_location (entry_stmt);
7361
7362 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7363 if (c)
7364 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7365
7366 /* Ensure 'device' is of the correct type. */
7367 device = fold_convert_loc (clause_loc, integer_type_node, device);
7368
7369 /* If we found the clause 'if (cond)', build
7370 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7371 if (cond)
7372 {
7373 cond = gimple_boolify (cond);
7374
7375 basic_block cond_bb, then_bb, else_bb;
7376 edge e;
7377 tree tmp_var;
7378
7379 tmp_var = create_tmp_var (TREE_TYPE (device));
7380 if (offloaded)
7381 e = split_block_after_labels (new_bb);
7382 else
7383 {
7384 gsi = gsi_last_bb (new_bb);
7385 gsi_prev (&gsi);
7386 e = split_block (new_bb, gsi_stmt (gsi));
7387 }
7388 cond_bb = e->src;
7389 new_bb = e->dest;
7390 remove_edge (e);
7391
7392 then_bb = create_empty_bb (cond_bb);
7393 else_bb = create_empty_bb (then_bb);
7394 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7395 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7396
7397 stmt = gimple_build_cond_empty (cond);
7398 gsi = gsi_last_bb (cond_bb);
7399 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7400
7401 gsi = gsi_start_bb (then_bb);
7402 stmt = gimple_build_assign (tmp_var, device);
7403 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7404
7405 gsi = gsi_start_bb (else_bb);
7406 stmt = gimple_build_assign (tmp_var,
7407 build_int_cst (integer_type_node,
7408 GOMP_DEVICE_HOST_FALLBACK));
7409 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7410
7411 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7412 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7413 add_bb_to_loop (then_bb, cond_bb->loop_father);
7414 add_bb_to_loop (else_bb, cond_bb->loop_father);
7415 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7416 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7417
7418 device = tmp_var;
7419 gsi = gsi_last_bb (new_bb);
7420 }
7421 else
7422 {
7423 gsi = gsi_last_bb (new_bb);
7424 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7425 true, GSI_SAME_STMT);
7426 }
7427
7428 t = gimple_omp_target_data_arg (entry_stmt);
7429 if (t == NULL)
7430 {
7431 t1 = size_zero_node;
7432 t2 = build_zero_cst (ptr_type_node);
7433 t3 = t2;
7434 t4 = t2;
7435 }
7436 else
7437 {
7438 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7439 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7440 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7441 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7442 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7443 }
7444
7445 gimple *g;
7446 bool tagging = false;
7447 /* The maximum number used by any start_ix, without varargs. */
7448 auto_vec<tree, 11> args;
7449 args.quick_push (device);
7450 if (offloaded)
7451 args.quick_push (build_fold_addr_expr (child_fn));
7452 args.quick_push (t1);
7453 args.quick_push (t2);
7454 args.quick_push (t3);
7455 args.quick_push (t4);
7456 switch (start_ix)
7457 {
7458 case BUILT_IN_GOACC_DATA_START:
7459 case BUILT_IN_GOACC_DECLARE:
7460 case BUILT_IN_GOMP_TARGET_DATA:
7461 break;
7462 case BUILT_IN_GOMP_TARGET:
7463 case BUILT_IN_GOMP_TARGET_UPDATE:
7464 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7465 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7466 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7467 if (c)
7468 depend = OMP_CLAUSE_DECL (c);
7469 else
7470 depend = build_int_cst (ptr_type_node, 0);
7471 args.quick_push (depend);
7472 if (start_ix == BUILT_IN_GOMP_TARGET)
7473 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7474 break;
7475 case BUILT_IN_GOACC_PARALLEL:
25651634
TS
7476 oacc_set_fn_attrib (child_fn, clauses, &args);
7477 tagging = true;
629b3d75
MJ
7478 /* FALLTHRU */
7479 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7480 case BUILT_IN_GOACC_UPDATE:
7481 {
7482 tree t_async = NULL_TREE;
7483
7484 /* If present, use the value specified by the respective
7485 clause, making sure that is of the correct type. */
7486 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7487 if (c)
7488 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7489 integer_type_node,
7490 OMP_CLAUSE_ASYNC_EXPR (c));
7491 else if (!tagging)
7492 /* Default values for t_async. */
7493 t_async = fold_convert_loc (gimple_location (entry_stmt),
7494 integer_type_node,
7495 build_int_cst (integer_type_node,
7496 GOMP_ASYNC_SYNC));
7497 if (tagging && t_async)
7498 {
7499 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7500
7501 if (TREE_CODE (t_async) == INTEGER_CST)
7502 {
7503 /* See if we can pack the async arg in to the tag's
7504 operand. */
7505 i_async = TREE_INT_CST_LOW (t_async);
7506 if (i_async < GOMP_LAUNCH_OP_MAX)
7507 t_async = NULL_TREE;
7508 else
7509 i_async = GOMP_LAUNCH_OP_MAX;
7510 }
7511 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7512 i_async));
7513 }
7514 if (t_async)
7515 args.safe_push (t_async);
7516
7517 /* Save the argument index, and ... */
7518 unsigned t_wait_idx = args.length ();
7519 unsigned num_waits = 0;
7520 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7521 if (!tagging || c)
7522 /* ... push a placeholder. */
7523 args.safe_push (integer_zero_node);
7524
7525 for (; c; c = OMP_CLAUSE_CHAIN (c))
7526 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7527 {
7528 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7529 integer_type_node,
7530 OMP_CLAUSE_WAIT_EXPR (c)));
7531 num_waits++;
7532 }
7533
7534 if (!tagging || num_waits)
7535 {
7536 tree len;
7537
7538 /* Now that we know the number, update the placeholder. */
7539 if (tagging)
7540 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7541 else
7542 len = build_int_cst (integer_type_node, num_waits);
7543 len = fold_convert_loc (gimple_location (entry_stmt),
7544 unsigned_type_node, len);
7545 args[t_wait_idx] = len;
7546 }
7547 }
7548 break;
7549 default:
7550 gcc_unreachable ();
7551 }
7552 if (tagging)
7553 /* Push terminal marker - zero. */
7554 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7555
7556 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7557 gimple_set_location (g, gimple_location (entry_stmt));
7558 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7559 if (!offloaded)
7560 {
7561 g = gsi_stmt (gsi);
7562 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7563 gsi_remove (&gsi, true);
7564 }
7565 if (data_region && region->exit)
7566 {
7567 gsi = gsi_last_bb (region->exit);
7568 g = gsi_stmt (gsi);
7569 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7570 gsi_remove (&gsi, true);
7571 }
7572}
7573
7574/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7575 iteration variable derived from the thread number. INTRA_GROUP means this
7576 is an expansion of a loop iterating over work-items within a separate
01914336 7577 iteration over groups. */
629b3d75
MJ
7578
7579static void
7580grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7581{
7582 gimple_stmt_iterator gsi;
7583 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7584 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7585 == GF_OMP_FOR_KIND_GRID_LOOP);
7586 size_t collapse = gimple_omp_for_collapse (for_stmt);
7587 struct omp_for_data_loop *loops
7588 = XALLOCAVEC (struct omp_for_data_loop,
01914336 7589 gimple_omp_for_collapse (for_stmt));
629b3d75
MJ
7590 struct omp_for_data fd;
7591
7592 remove_edge (BRANCH_EDGE (kfor->entry));
7593 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7594
7595 gcc_assert (kfor->cont);
7596 omp_extract_for_data (for_stmt, &fd, loops);
7597
7598 gsi = gsi_start_bb (body_bb);
7599
7600 for (size_t dim = 0; dim < collapse; dim++)
7601 {
7602 tree type, itype;
7603 itype = type = TREE_TYPE (fd.loops[dim].v);
7604 if (POINTER_TYPE_P (type))
7605 itype = signed_type_for (type);
7606
7607 tree n1 = fd.loops[dim].n1;
7608 tree step = fd.loops[dim].step;
7609 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7610 true, NULL_TREE, true, GSI_SAME_STMT);
7611 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7612 true, NULL_TREE, true, GSI_SAME_STMT);
7613 tree threadid;
7614 if (gimple_omp_for_grid_group_iter (for_stmt))
7615 {
7616 gcc_checking_assert (!intra_group);
7617 threadid = build_call_expr (builtin_decl_explicit
7618 (BUILT_IN_HSA_WORKGROUPID), 1,
7619 build_int_cstu (unsigned_type_node, dim));
7620 }
7621 else if (intra_group)
7622 threadid = build_call_expr (builtin_decl_explicit
7623 (BUILT_IN_HSA_WORKITEMID), 1,
7624 build_int_cstu (unsigned_type_node, dim));
7625 else
7626 threadid = build_call_expr (builtin_decl_explicit
7627 (BUILT_IN_HSA_WORKITEMABSID), 1,
7628 build_int_cstu (unsigned_type_node, dim));
7629 threadid = fold_convert (itype, threadid);
7630 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7631 true, GSI_SAME_STMT);
7632
7633 tree startvar = fd.loops[dim].v;
7634 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7635 if (POINTER_TYPE_P (type))
7636 t = fold_build_pointer_plus (n1, t);
7637 else
7638 t = fold_build2 (PLUS_EXPR, type, t, n1);
7639 t = fold_convert (type, t);
7640 t = force_gimple_operand_gsi (&gsi, t,
7641 DECL_P (startvar)
7642 && TREE_ADDRESSABLE (startvar),
7643 NULL_TREE, true, GSI_SAME_STMT);
7644 gassign *assign_stmt = gimple_build_assign (startvar, t);
7645 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7646 }
01914336 7647 /* Remove the omp for statement. */
629b3d75
MJ
7648 gsi = gsi_last_bb (kfor->entry);
7649 gsi_remove (&gsi, true);
7650
7651 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7652 gsi = gsi_last_bb (kfor->cont);
7653 gcc_assert (!gsi_end_p (gsi)
7654 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7655 gsi_remove (&gsi, true);
7656
7657 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7658 gsi = gsi_last_bb (kfor->exit);
7659 gcc_assert (!gsi_end_p (gsi)
7660 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7661 if (intra_group)
7662 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7663 gsi_remove (&gsi, true);
7664
7665 /* Fixup the much simpler CFG. */
7666 remove_edge (find_edge (kfor->cont, body_bb));
7667
7668 if (kfor->cont != body_bb)
7669 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7670 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7671}
7672
7673/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7674 argument_decls. */
7675
7676struct grid_arg_decl_map
7677{
7678 tree old_arg;
7679 tree new_arg;
7680};
7681
7682/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7683 pertaining to kernel function. */
7684
7685static tree
7686grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7687{
7688 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7689 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7690 tree t = *tp;
7691
7692 if (t == adm->old_arg)
7693 *tp = adm->new_arg;
7694 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7695 return NULL_TREE;
7696}
7697
7698/* If TARGET region contains a kernel body for loop, remove its region from the
01914336 7699 TARGET and expand it in HSA gridified kernel fashion. */
629b3d75
MJ
7700
7701static void
7702grid_expand_target_grid_body (struct omp_region *target)
7703{
7704 if (!hsa_gen_requested_p ())
7705 return;
7706
7707 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7708 struct omp_region **pp;
7709
7710 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7711 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7712 break;
7713
7714 struct omp_region *gpukernel = *pp;
7715
7716 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7717 if (!gpukernel)
7718 {
7719 /* HSA cannot handle OACC stuff. */
7720 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7721 return;
7722 gcc_checking_assert (orig_child_fndecl);
7723 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7724 OMP_CLAUSE__GRIDDIM_));
7725 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7726
7727 hsa_register_kernel (n);
7728 return;
7729 }
7730
7731 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7732 OMP_CLAUSE__GRIDDIM_));
01914336
MJ
7733 tree inside_block
7734 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
629b3d75
MJ
7735 *pp = gpukernel->next;
7736 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7737 if ((*pp)->type == GIMPLE_OMP_FOR)
7738 break;
7739
7740 struct omp_region *kfor = *pp;
7741 gcc_assert (kfor);
7742 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7743 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7744 *pp = kfor->next;
7745 if (kfor->inner)
7746 {
7747 if (gimple_omp_for_grid_group_iter (for_stmt))
7748 {
7749 struct omp_region **next_pp;
7750 for (pp = &kfor->inner; *pp; pp = next_pp)
7751 {
7752 next_pp = &(*pp)->next;
7753 if ((*pp)->type != GIMPLE_OMP_FOR)
7754 continue;
7755 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7756 gcc_assert (gimple_omp_for_kind (inner)
7757 == GF_OMP_FOR_KIND_GRID_LOOP);
7758 grid_expand_omp_for_loop (*pp, true);
7759 *pp = (*pp)->next;
7760 next_pp = pp;
7761 }
7762 }
7763 expand_omp (kfor->inner);
7764 }
7765 if (gpukernel->inner)
7766 expand_omp (gpukernel->inner);
7767
7768 tree kern_fndecl = copy_node (orig_child_fndecl);
7769 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7770 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7771 tree tgtblock = gimple_block (tgt_stmt);
7772 tree fniniblock = make_node (BLOCK);
7773 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7774 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7775 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7776 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7777 DECL_INITIAL (kern_fndecl) = fniniblock;
7778 push_struct_function (kern_fndecl);
7779 cfun->function_end_locus = gimple_location (tgt_stmt);
7780 init_tree_ssa (cfun);
7781 pop_cfun ();
7782
5c628c3e
RB
7783 /* Make sure to generate early debug for the function before
7784 outlining anything. */
7785 if (! gimple_in_ssa_p (cfun))
7786 (*debug_hooks->early_global_decl) (cfun->decl);
7787
629b3d75
MJ
7788 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7789 gcc_assert (!DECL_CHAIN (old_parm_decl));
7790 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7791 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7792 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7793 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7794 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7795 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7796 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7797 kern_cfun->curr_properties = cfun->curr_properties;
7798
7799 grid_expand_omp_for_loop (kfor, false);
7800
01914336 7801 /* Remove the omp for statement. */
629b3d75
MJ
7802 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7803 gsi_remove (&gsi, true);
7804 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7805 return. */
7806 gsi = gsi_last_bb (gpukernel->exit);
7807 gcc_assert (!gsi_end_p (gsi)
7808 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7809 gimple *ret_stmt = gimple_build_return (NULL);
7810 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7811 gsi_remove (&gsi, true);
7812
7813 /* Statements in the first BB in the target construct have been produced by
7814 target lowering and must be copied inside the GPUKERNEL, with the two
7815 exceptions of the first OMP statement and the OMP_DATA assignment
7816 statement. */
7817 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7818 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7819 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7820 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7821 !gsi_end_p (tsi); gsi_next (&tsi))
7822 {
7823 gimple *stmt = gsi_stmt (tsi);
7824 if (is_gimple_omp (stmt))
7825 break;
7826 if (sender
7827 && is_gimple_assign (stmt)
7828 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7829 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7830 continue;
7831 gimple *copy = gimple_copy (stmt);
7832 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7833 gimple_set_block (copy, fniniblock);
7834 }
7835
7836 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7837 gpukernel->exit, inside_block);
7838
7839 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7840 kcn->mark_force_output ();
7841 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7842
7843 hsa_register_kernel (kcn, orig_child);
7844
7845 cgraph_node::add_new_function (kern_fndecl, true);
7846 push_cfun (kern_cfun);
7847 cgraph_edge::rebuild_edges ();
7848
7849 /* Re-map any mention of the PARM_DECL of the original function to the
7850 PARM_DECL of the new one.
7851
7852 TODO: It would be great if lowering produced references into the GPU
7853 kernel decl straight away and we did not have to do this. */
7854 struct grid_arg_decl_map adm;
7855 adm.old_arg = old_parm_decl;
7856 adm.new_arg = new_parm_decl;
7857 basic_block bb;
7858 FOR_EACH_BB_FN (bb, kern_cfun)
7859 {
7860 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7861 {
7862 gimple *stmt = gsi_stmt (gsi);
7863 struct walk_stmt_info wi;
7864 memset (&wi, 0, sizeof (wi));
7865 wi.info = &adm;
7866 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7867 }
7868 }
7869 pop_cfun ();
7870
7871 return;
7872}
7873
7874/* Expand the parallel region tree rooted at REGION. Expansion
7875 proceeds in depth-first order. Innermost regions are expanded
7876 first. This way, parallel regions that require a new function to
7877 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7878 internal dependencies in their body. */
7879
7880static void
7881expand_omp (struct omp_region *region)
7882{
7883 omp_any_child_fn_dumped = false;
7884 while (region)
7885 {
7886 location_t saved_location;
7887 gimple *inner_stmt = NULL;
7888
7889 /* First, determine whether this is a combined parallel+workshare
01914336 7890 region. */
629b3d75
MJ
7891 if (region->type == GIMPLE_OMP_PARALLEL)
7892 determine_parallel_type (region);
7893 else if (region->type == GIMPLE_OMP_TARGET)
7894 grid_expand_target_grid_body (region);
7895
7896 if (region->type == GIMPLE_OMP_FOR
7897 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7898 inner_stmt = last_stmt (region->inner->entry);
7899
7900 if (region->inner)
7901 expand_omp (region->inner);
7902
7903 saved_location = input_location;
7904 if (gimple_has_location (last_stmt (region->entry)))
7905 input_location = gimple_location (last_stmt (region->entry));
7906
7907 switch (region->type)
7908 {
7909 case GIMPLE_OMP_PARALLEL:
7910 case GIMPLE_OMP_TASK:
7911 expand_omp_taskreg (region);
7912 break;
7913
7914 case GIMPLE_OMP_FOR:
7915 expand_omp_for (region, inner_stmt);
7916 break;
7917
7918 case GIMPLE_OMP_SECTIONS:
7919 expand_omp_sections (region);
7920 break;
7921
7922 case GIMPLE_OMP_SECTION:
7923 /* Individual omp sections are handled together with their
7924 parent GIMPLE_OMP_SECTIONS region. */
7925 break;
7926
7927 case GIMPLE_OMP_SINGLE:
7928 expand_omp_single (region);
7929 break;
7930
7931 case GIMPLE_OMP_ORDERED:
7932 {
7933 gomp_ordered *ord_stmt
7934 = as_a <gomp_ordered *> (last_stmt (region->entry));
7935 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7936 OMP_CLAUSE_DEPEND))
7937 {
7938 /* We'll expand these when expanding corresponding
7939 worksharing region with ordered(n) clause. */
7940 gcc_assert (region->outer
7941 && region->outer->type == GIMPLE_OMP_FOR);
7942 region->ord_stmt = ord_stmt;
7943 break;
7944 }
7945 }
7946 /* FALLTHRU */
7947 case GIMPLE_OMP_MASTER:
7948 case GIMPLE_OMP_TASKGROUP:
7949 case GIMPLE_OMP_CRITICAL:
7950 case GIMPLE_OMP_TEAMS:
7951 expand_omp_synch (region);
7952 break;
7953
7954 case GIMPLE_OMP_ATOMIC_LOAD:
7955 expand_omp_atomic (region);
7956 break;
7957
7958 case GIMPLE_OMP_TARGET:
7959 expand_omp_target (region);
7960 break;
7961
7962 default:
7963 gcc_unreachable ();
7964 }
7965
7966 input_location = saved_location;
7967 region = region->next;
7968 }
7969 if (omp_any_child_fn_dumped)
7970 {
7971 if (dump_file)
7972 dump_function_header (dump_file, current_function_decl, dump_flags);
7973 omp_any_child_fn_dumped = false;
7974 }
7975}
7976
7977/* Helper for build_omp_regions. Scan the dominator tree starting at
7978 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7979 true, the function ends once a single tree is built (otherwise, whole
7980 forest of OMP constructs may be built). */
7981
7982static void
7983build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7984 bool single_tree)
7985{
7986 gimple_stmt_iterator gsi;
7987 gimple *stmt;
7988 basic_block son;
7989
7990 gsi = gsi_last_bb (bb);
7991 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7992 {
7993 struct omp_region *region;
7994 enum gimple_code code;
7995
7996 stmt = gsi_stmt (gsi);
7997 code = gimple_code (stmt);
7998 if (code == GIMPLE_OMP_RETURN)
7999 {
8000 /* STMT is the return point out of region PARENT. Mark it
8001 as the exit point and make PARENT the immediately
8002 enclosing region. */
8003 gcc_assert (parent);
8004 region = parent;
8005 region->exit = bb;
8006 parent = parent->outer;
8007 }
8008 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8009 {
5764ee3c 8010 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
8011 GIMPLE_OMP_RETURN, but matches with
8012 GIMPLE_OMP_ATOMIC_LOAD. */
8013 gcc_assert (parent);
8014 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8015 region = parent;
8016 region->exit = bb;
8017 parent = parent->outer;
8018 }
8019 else if (code == GIMPLE_OMP_CONTINUE)
8020 {
8021 gcc_assert (parent);
8022 parent->cont = bb;
8023 }
8024 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8025 {
8026 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8027 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8028 }
8029 else
8030 {
8031 region = new_omp_region (bb, code, parent);
8032 /* Otherwise... */
8033 if (code == GIMPLE_OMP_TARGET)
8034 {
8035 switch (gimple_omp_target_kind (stmt))
8036 {
8037 case GF_OMP_TARGET_KIND_REGION:
8038 case GF_OMP_TARGET_KIND_DATA:
8039 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8040 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8041 case GF_OMP_TARGET_KIND_OACC_DATA:
8042 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8043 break;
8044 case GF_OMP_TARGET_KIND_UPDATE:
8045 case GF_OMP_TARGET_KIND_ENTER_DATA:
8046 case GF_OMP_TARGET_KIND_EXIT_DATA:
8047 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8048 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8049 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8050 /* ..., other than for those stand-alone directives... */
8051 region = NULL;
8052 break;
8053 default:
8054 gcc_unreachable ();
8055 }
8056 }
8057 else if (code == GIMPLE_OMP_ORDERED
8058 && omp_find_clause (gimple_omp_ordered_clauses
8059 (as_a <gomp_ordered *> (stmt)),
8060 OMP_CLAUSE_DEPEND))
8061 /* #pragma omp ordered depend is also just a stand-alone
8062 directive. */
8063 region = NULL;
8064 /* ..., this directive becomes the parent for a new region. */
8065 if (region)
8066 parent = region;
8067 }
8068 }
8069
8070 if (single_tree && !parent)
8071 return;
8072
8073 for (son = first_dom_son (CDI_DOMINATORS, bb);
8074 son;
8075 son = next_dom_son (CDI_DOMINATORS, son))
8076 build_omp_regions_1 (son, parent, single_tree);
8077}
8078
8079/* Builds the tree of OMP regions rooted at ROOT, storing it to
8080 root_omp_region. */
8081
8082static void
8083build_omp_regions_root (basic_block root)
8084{
8085 gcc_assert (root_omp_region == NULL);
8086 build_omp_regions_1 (root, NULL, true);
8087 gcc_assert (root_omp_region != NULL);
8088}
8089
8090/* Expands omp construct (and its subconstructs) starting in HEAD. */
8091
8092void
8093omp_expand_local (basic_block head)
8094{
8095 build_omp_regions_root (head);
8096 if (dump_file && (dump_flags & TDF_DETAILS))
8097 {
8098 fprintf (dump_file, "\nOMP region tree\n\n");
8099 dump_omp_region (dump_file, root_omp_region, 0);
8100 fprintf (dump_file, "\n");
8101 }
8102
8103 remove_exit_barriers (root_omp_region);
8104 expand_omp (root_omp_region);
8105
8106 omp_free_regions ();
8107}
8108
8109/* Scan the CFG and build a tree of OMP regions. Return the root of
8110 the OMP region tree. */
8111
8112static void
8113build_omp_regions (void)
8114{
8115 gcc_assert (root_omp_region == NULL);
8116 calculate_dominance_info (CDI_DOMINATORS);
8117 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8118}
8119
8120/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8121
8122static unsigned int
8123execute_expand_omp (void)
8124{
8125 build_omp_regions ();
8126
8127 if (!root_omp_region)
8128 return 0;
8129
8130 if (dump_file)
8131 {
8132 fprintf (dump_file, "\nOMP region tree\n\n");
8133 dump_omp_region (dump_file, root_omp_region, 0);
8134 fprintf (dump_file, "\n");
8135 }
8136
8137 remove_exit_barriers (root_omp_region);
8138
8139 expand_omp (root_omp_region);
8140
8141 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8142 verify_loop_structure ();
8143 cleanup_tree_cfg ();
8144
8145 omp_free_regions ();
8146
8147 return 0;
8148}
8149
8150/* OMP expansion -- the default pass, run before creation of SSA form. */
8151
8152namespace {
8153
8154const pass_data pass_data_expand_omp =
8155{
8156 GIMPLE_PASS, /* type */
8157 "ompexp", /* name */
fd2b8c8b 8158 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
8159 TV_NONE, /* tv_id */
8160 PROP_gimple_any, /* properties_required */
8161 PROP_gimple_eomp, /* properties_provided */
8162 0, /* properties_destroyed */
8163 0, /* todo_flags_start */
8164 0, /* todo_flags_finish */
8165};
8166
8167class pass_expand_omp : public gimple_opt_pass
8168{
8169public:
8170 pass_expand_omp (gcc::context *ctxt)
8171 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8172 {}
8173
8174 /* opt_pass methods: */
8175 virtual unsigned int execute (function *)
8176 {
8177 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
8178 || flag_openmp_simd != 0)
8179 && !seen_error ());
8180
8181 /* This pass always runs, to provide PROP_gimple_eomp.
8182 But often, there is nothing to do. */
8183 if (!gate)
8184 return 0;
8185
8186 return execute_expand_omp ();
8187 }
8188
8189}; // class pass_expand_omp
8190
8191} // anon namespace
8192
8193gimple_opt_pass *
8194make_pass_expand_omp (gcc::context *ctxt)
8195{
8196 return new pass_expand_omp (ctxt);
8197}
8198
8199namespace {
8200
8201const pass_data pass_data_expand_omp_ssa =
8202{
8203 GIMPLE_PASS, /* type */
8204 "ompexpssa", /* name */
fd2b8c8b 8205 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
8206 TV_NONE, /* tv_id */
8207 PROP_cfg | PROP_ssa, /* properties_required */
8208 PROP_gimple_eomp, /* properties_provided */
8209 0, /* properties_destroyed */
8210 0, /* todo_flags_start */
8211 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8212};
8213
8214class pass_expand_omp_ssa : public gimple_opt_pass
8215{
8216public:
8217 pass_expand_omp_ssa (gcc::context *ctxt)
8218 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8219 {}
8220
8221 /* opt_pass methods: */
8222 virtual bool gate (function *fun)
8223 {
8224 return !(fun->curr_properties & PROP_gimple_eomp);
8225 }
8226 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8227 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8228
8229}; // class pass_expand_omp_ssa
8230
8231} // anon namespace
8232
8233gimple_opt_pass *
8234make_pass_expand_omp_ssa (gcc::context *ctxt)
8235{
8236 return new pass_expand_omp_ssa (ctxt);
8237}
8238
8239/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8240 GIMPLE_* codes. */
8241
8242bool
8243omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8244 int *region_idx)
8245{
8246 gimple *last = last_stmt (bb);
8247 enum gimple_code code = gimple_code (last);
8248 struct omp_region *cur_region = *region;
8249 bool fallthru = false;
8250
8251 switch (code)
8252 {
8253 case GIMPLE_OMP_PARALLEL:
8254 case GIMPLE_OMP_TASK:
8255 case GIMPLE_OMP_FOR:
8256 case GIMPLE_OMP_SINGLE:
8257 case GIMPLE_OMP_TEAMS:
8258 case GIMPLE_OMP_MASTER:
8259 case GIMPLE_OMP_TASKGROUP:
8260 case GIMPLE_OMP_CRITICAL:
8261 case GIMPLE_OMP_SECTION:
8262 case GIMPLE_OMP_GRID_BODY:
8263 cur_region = new_omp_region (bb, code, cur_region);
8264 fallthru = true;
8265 break;
8266
8267 case GIMPLE_OMP_ORDERED:
8268 cur_region = new_omp_region (bb, code, cur_region);
8269 fallthru = true;
8270 if (omp_find_clause (gimple_omp_ordered_clauses
8271 (as_a <gomp_ordered *> (last)),
8272 OMP_CLAUSE_DEPEND))
8273 cur_region = cur_region->outer;
8274 break;
8275
8276 case GIMPLE_OMP_TARGET:
8277 cur_region = new_omp_region (bb, code, cur_region);
8278 fallthru = true;
8279 switch (gimple_omp_target_kind (last))
8280 {
8281 case GF_OMP_TARGET_KIND_REGION:
8282 case GF_OMP_TARGET_KIND_DATA:
8283 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8284 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8285 case GF_OMP_TARGET_KIND_OACC_DATA:
8286 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8287 break;
8288 case GF_OMP_TARGET_KIND_UPDATE:
8289 case GF_OMP_TARGET_KIND_ENTER_DATA:
8290 case GF_OMP_TARGET_KIND_EXIT_DATA:
8291 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8292 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8293 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8294 cur_region = cur_region->outer;
8295 break;
8296 default:
8297 gcc_unreachable ();
8298 }
8299 break;
8300
8301 case GIMPLE_OMP_SECTIONS:
8302 cur_region = new_omp_region (bb, code, cur_region);
8303 fallthru = true;
8304 break;
8305
8306 case GIMPLE_OMP_SECTIONS_SWITCH:
8307 fallthru = false;
8308 break;
8309
8310 case GIMPLE_OMP_ATOMIC_LOAD:
8311 case GIMPLE_OMP_ATOMIC_STORE:
8312 fallthru = true;
8313 break;
8314
8315 case GIMPLE_OMP_RETURN:
8316 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8317 somewhere other than the next block. This will be
8318 created later. */
8319 cur_region->exit = bb;
8320 if (cur_region->type == GIMPLE_OMP_TASK)
8321 /* Add an edge corresponding to not scheduling the task
8322 immediately. */
8323 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8324 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8325 cur_region = cur_region->outer;
8326 break;
8327
8328 case GIMPLE_OMP_CONTINUE:
8329 cur_region->cont = bb;
8330 switch (cur_region->type)
8331 {
8332 case GIMPLE_OMP_FOR:
8333 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8334 succs edges as abnormal to prevent splitting
8335 them. */
8336 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8337 /* Make the loopback edge. */
8338 make_edge (bb, single_succ (cur_region->entry),
8339 EDGE_ABNORMAL);
8340
8341 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8342 corresponds to the case that the body of the loop
8343 is not executed at all. */
8344 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8345 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8346 fallthru = false;
8347 break;
8348
8349 case GIMPLE_OMP_SECTIONS:
8350 /* Wire up the edges into and out of the nested sections. */
8351 {
8352 basic_block switch_bb = single_succ (cur_region->entry);
8353
8354 struct omp_region *i;
8355 for (i = cur_region->inner; i ; i = i->next)
8356 {
8357 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8358 make_edge (switch_bb, i->entry, 0);
8359 make_edge (i->exit, bb, EDGE_FALLTHRU);
8360 }
8361
8362 /* Make the loopback edge to the block with
8363 GIMPLE_OMP_SECTIONS_SWITCH. */
8364 make_edge (bb, switch_bb, 0);
8365
8366 /* Make the edge from the switch to exit. */
8367 make_edge (switch_bb, bb->next_bb, 0);
8368 fallthru = false;
8369 }
8370 break;
8371
8372 case GIMPLE_OMP_TASK:
8373 fallthru = true;
8374 break;
8375
8376 default:
8377 gcc_unreachable ();
8378 }
8379 break;
8380
8381 default:
8382 gcc_unreachable ();
8383 }
8384
8385 if (*region != cur_region)
8386 {
8387 *region = cur_region;
8388 if (cur_region)
8389 *region_idx = cur_region->entry->index;
8390 else
8391 *region_idx = 0;
8392 }
8393
8394 return fallthru;
8395}
8396
8397#include "gt-omp-expand.h"
This page took 1.154123 seconds and 5 git commands to generate.