This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[gomp4.1] Taskloop support
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Richard Henderson <rth at redhat dot com>
- Date: Thu, 21 May 2015 11:20:57 +0200
- Subject: [gomp4.1] Taskloop support
- Authentication-results: sourceware.org; auth=none
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
This patch finishes the C #pragma omp taskloop support on the gomp 4.1
branch, including library support.
2015-05-21 Jakub Jelinek <jakub@redhat.com>
* tree.h (OMP_STANDALONE_CLAUSES): Adjust to cover
OMP_TARGET_{ENTER,EXIT}_DATA.
(OMP_CLAUSE_SHARED_FIRSTPRIVATE): Define.
* gimplify.c (gimplify_scan_omp_clauses): Add lastprivate
clause to outer taskloop if needed.
(gimplify_omp_for): Fix a typo. Fixup OMP_TASKLOOP
gimplification.
* omp-low.c (omp_copy_decl_2): If var is TREE_ADDRESSABLE
listed in task_shared_vars, clear TREE_ADDRESSABLE on the
copy.
(build_outer_var_ref): Add lastprivate argument, pass it through
recursively. Handle lastprivate on taskloop construct.
(install_var_field): Allow multiple fields for a single
decl - one for firstprivate, another for shared clauses
on task.
(scan_sharing_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
(add_taskreg_looptemp_clauses): Add one more _looptemp_ clause
for taskloop GIMPLE_OMP_TASK, if it is collapse > 1 with
non-constant iteration count and there is lastprivate clause
on the inner GIMPLE_OMP_FOR.
(finish_taskreg_scan): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
(lower_rec_input_clauses): Likewise. Ignore all
OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE clauses on taskloop construct.
(lower_lastprivate_clauses): For OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE
on taskloop lookup decl in outer context. Pass true
to build_outer_var_ref lastprivate argument.
(lower_send_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
(lower_send_shared_vars): Ignore fields with NULL or
FIELD_DECL abstract origin.
(expand_task_call): Use GOMP_TASK_* defines instead of
hardcoded integers.
(expand_omp_simd): Handle addressable fd->loop.v.
(expand_omp_taskloop_for_outer): Initialize the last
_looptemp_ with total iteration count if needed.
(expand_omp_taskloop_for_inner): Handle bias and broken_loop.
(lower_omp_for_lastprivate): Use last _looptemp_ clause
on taskloop for comparison.
(create_task_copyfn): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
gcc/c-family/
* c-omp.c (c_finish_omp_for): Clear DECL_INITIAL.
gcc/testsuite/
* gcc.dg/gomp/taskloop-1.c: New test.
include/
* gomp-constants.h (GOMP_TASK_FLAG_UNTIED, GOMP_TASK_FLAG_FINAL,
GOMP_TASK_FLAG_MERGEABLE, GOMP_TASK_FLAG_DEPEND, GOMP_TASK_FLAG_UP,
GOMP_TASK_FLAG_GRAINSIZE, GOMP_TASK_FLAG_IF, GOMP_TASK_FLAG_NOGROUP):
Define.
libgomp/
* libgomp.map (GOMP_4.1): Export GOMP_taskloop and GOMP_taskloop_ull.
* task.c: Include gomp-constants.h. Include taskloop.c twice
with appropriate macros.
(GOMP_task): Use GOMP_TASK_FLAG_* defines instead of hardcoded
constants.
* taskloop.c: New file.
* testsuite/libgomp.c/for-4.c: New test.
* testsuite/libgomp.c/taskloop-1.c: New test.
* testsuite/libgomp.c/taskloop-2.c: New test.
* testsuite/libgomp.c/taskloop-3.c: New test.
--- gcc/tree.h.jj 2015-05-19 18:56:50.982256719 +0200
+++ gcc/tree.h 2015-05-19 19:04:52.496759752 +0200
@@ -1206,7 +1206,7 @@ extern void protected_set_expr_location
/* Generic accessors for OMP nodes that keep clauses as operand 0. */
#define OMP_STANDALONE_CLAUSES(NODE) \
- TREE_OPERAND (TREE_RANGE_CHECK (NODE, OACC_CACHE, OMP_TARGET_UPDATE), 0)
+ TREE_OPERAND (TREE_RANGE_CHECK (NODE, OACC_CACHE, OMP_TARGET_EXIT_DATA), 0)
#define OACC_PARALLEL_BODY(NODE) \
TREE_OPERAND (OACC_PARALLEL_CHECK (NODE), 0)
@@ -1366,6 +1366,12 @@ extern void protected_set_expr_location
#define OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ(NODE) \
(OMP_CLAUSE_CHECK (NODE))->omp_clause.gimple_reduction_init
+/* True on a SHARED clause if a FIRSTPRIVATE clause for the same
+ decl is present in the chain (this can happen only for taskloop
+ with FIRSTPRIVATE/LASTPRIVATE on it originally. */
+#define OMP_CLAUSE_SHARED_FIRSTPRIVATE(NODE) \
+ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SHARED)->base.public_flag)
+
#define OMP_CLAUSE_FINAL_EXPR(NODE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_FINAL), 0)
#define OMP_CLAUSE_IF_EXPR(NODE) \
--- gcc/gimplify.c.jj 2015-05-19 19:02:52.230632257 +0200
+++ gcc/gimplify.c 2015-05-20 19:07:01.317440243 +0200
@@ -6167,6 +6167,12 @@ gimplify_scan_omp_clauses (tree *list_p,
(splay_tree_key) decl) == NULL)
omp_add_variable (outer_ctx, decl, GOVD_SHARED | GOVD_SEEN);
else if (outer_ctx
+ && (outer_ctx->region_type & ORT_TASK) != 0
+ && outer_ctx->combined_loop
+ && splay_tree_lookup (outer_ctx->variables,
+ (splay_tree_key) decl) == NULL)
+ omp_add_variable (outer_ctx, decl, GOVD_LASTPRIVATE | GOVD_SEEN);
+ else if (outer_ctx
&& outer_ctx->region_type == ORT_WORKSHARE
&& outer_ctx->combined_loop
&& splay_tree_lookup (outer_ctx->variables,
@@ -6227,6 +6233,10 @@ gimplify_scan_omp_clauses (tree *list_p,
else if (omp_check_private (octx, decl, false))
break;
}
+ else if (octx
+ && (octx->region_type & ORT_TASK) != 0
+ && octx->combined_loop)
+ ;
else
break;
gcc_checking_assert (splay_tree_lookup (octx->variables,
@@ -7061,7 +7071,7 @@ gimplify_omp_for (tree *expr_p, gimple_s
/* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear
clause for the IV. */
- if (org == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1)
+ if (ort == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1)
{
t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), 0);
gcc_assert (TREE_CODE (t) == MODIFY_EXPR);
@@ -7075,7 +7085,8 @@ gimplify_omp_for (tree *expr_p, gimple_s
}
}
- gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, ort);
+ if (TREE_CODE (for_stmt) != OMP_TASKLOOP)
+ gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, ort);
if (TREE_CODE (for_stmt) == OMP_DISTRIBUTE)
gimplify_omp_ctxp->distribute = true;
@@ -7113,9 +7124,69 @@ gimplify_omp_for (tree *expr_p, gimple_s
for_stmt = walk_tree (&OMP_FOR_BODY (for_stmt), find_combined_omp_for,
NULL, NULL);
gcc_assert (for_stmt != NULL_TREE);
- gimplify_omp_ctxp->combined_loop = true;
}
+ /* For taskloop, need to gimplify the start, end and step before the
+ taskloop, outside of the taskloop omp context. */
+ if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+ {
+ for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
+ {
+ t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
+ if (!is_gimple_constant (TREE_OPERAND (t, 1)))
+ {
+ TREE_OPERAND (t, 1)
+ = get_initialized_tmp_var (TREE_OPERAND (t, 1),
+ pre_p, NULL);
+ tree c = build_omp_clause (input_location,
+ OMP_CLAUSE_FIRSTPRIVATE);
+ OMP_CLAUSE_DECL (c) = TREE_OPERAND (t, 1);
+ OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt);
+ OMP_FOR_CLAUSES (orig_for_stmt) = c;
+ }
+
+ /* Handle OMP_FOR_COND. */
+ t = TREE_VEC_ELT (OMP_FOR_COND (for_stmt), i);
+ if (!is_gimple_constant (TREE_OPERAND (t, 1)))
+ {
+ TREE_OPERAND (t, 1)
+ = get_initialized_tmp_var (TREE_OPERAND (t, 1),
+ pre_p, NULL);
+ tree c = build_omp_clause (input_location,
+ OMP_CLAUSE_FIRSTPRIVATE);
+ OMP_CLAUSE_DECL (c) = TREE_OPERAND (t, 1);
+ OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt);
+ OMP_FOR_CLAUSES (orig_for_stmt) = c;
+ }
+
+ /* Handle OMP_FOR_INCR. */
+ t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i);
+ if (TREE_CODE (t) == MODIFY_EXPR)
+ {
+ decl = TREE_OPERAND (t, 0);
+ t = TREE_OPERAND (t, 1);
+ tree *tp = &TREE_OPERAND (t, 1);
+ if (TREE_CODE (t) == PLUS_EXPR && *tp == decl)
+ tp = &TREE_OPERAND (t, 0);
+
+ if (!is_gimple_constant (*tp))
+ {
+ *tp = get_initialized_tmp_var (*tp, pre_p, NULL);
+ tree c = build_omp_clause (input_location,
+ OMP_CLAUSE_FIRSTPRIVATE);
+ OMP_CLAUSE_DECL (c) = *tp;
+ OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt);
+ OMP_FOR_CLAUSES (orig_for_stmt) = c;
+ }
+ }
+ }
+
+ gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (orig_for_stmt), pre_p, ort);
+ }
+
+ if (orig_for_stmt != for_stmt)
+ gimplify_omp_ctxp->combined_loop = true;
+
for_body = NULL;
gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
== TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
@@ -7175,6 +7246,11 @@ gimplify_omp_for (tree *expr_p, gimple_s
else if (omp_check_private (outer, decl, false))
outer = NULL;
}
+ else if (((outer->region_type & ORT_TASK) != 0)
+ && outer->combined_loop
+ && !omp_check_private (gimplify_omp_ctxp,
+ decl, false))
+ ;
else if (outer->region_type != ORT_COMBINED_PARALLEL)
outer = NULL;
if (outer)
@@ -7206,6 +7282,11 @@ gimplify_omp_for (tree *expr_p, gimple_s
else if (omp_check_private (outer, decl, false))
outer = NULL;
}
+ else if (((outer->region_type & ORT_TASK) != 0)
+ && outer->combined_loop
+ && !omp_check_private (gimplify_omp_ctxp,
+ decl, false))
+ ;
else if (outer->region_type != ORT_COMBINED_PARALLEL)
outer = NULL;
if (outer)
@@ -7418,14 +7499,39 @@ gimplify_omp_for (tree *expr_p, gimple_s
BITMAP_FREE (has_decl_expr);
- gimplify_and_add (OMP_FOR_BODY (orig_for_stmt), &for_body);
+ if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+ {
+ push_gimplify_context ();
+ if (TREE_CODE (OMP_FOR_BODY (orig_for_stmt)) != BIND_EXPR)
+ {
+ OMP_FOR_BODY (orig_for_stmt)
+ = build3 (BIND_EXPR, void_type_node, NULL,
+ OMP_FOR_BODY (orig_for_stmt), NULL);
+ TREE_SIDE_EFFECTS (OMP_FOR_BODY (orig_for_stmt)) = 1;
+ }
+ }
+
+ gimple g = gimplify_and_return_first (OMP_FOR_BODY (orig_for_stmt),
+ &for_body);
+
+ if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+ {
+ if (gimple_code (g) == GIMPLE_BIND)
+ pop_gimplify_context (g);
+ else
+ pop_gimplify_context (NULL);
+ }
if (orig_for_stmt != for_stmt)
for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
{
t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
decl = TREE_OPERAND (t, 0);
+ struct gimplify_omp_ctx *ctx = gimplify_omp_ctxp;
+ if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+ gimplify_omp_ctxp = ctx->outer_context;
var = create_tmp_var (TREE_TYPE (decl), get_name (decl));
+ gimplify_omp_ctxp = ctx;
omp_add_variable (gimplify_omp_ctxp, var, GOVD_PRIVATE | GOVD_SEEN);
TREE_OPERAND (t, 0) = var;
t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i);
@@ -7524,14 +7630,18 @@ gimplify_omp_for (tree *expr_p, gimple_s
gforo_clauses_ptr = &OMP_CLAUSE_CHAIN (*gforo_clauses_ptr);
break;
/* For lastprivate, keep the clause on inner taskloop, and add
- a shared clause on task. */
+ a shared clause on task. If the same decl is also firstprivate,
+ add also firstprivate clause on the inner taskloop. */
case OMP_CLAUSE_LASTPRIVATE:
*gfor_clauses_ptr = c;
gfor_clauses_ptr = &OMP_CLAUSE_CHAIN (c);
- *gtask_clauses_ptr = build_omp_clause (OMP_CLAUSE_LOCATION (c),
- OMP_CLAUSE_SHARED);
+ *gtask_clauses_ptr
+ = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_SHARED);
OMP_CLAUSE_DECL (*gtask_clauses_ptr) = OMP_CLAUSE_DECL (c);
- gtask_clauses_ptr = &OMP_CLAUSE_CHAIN (*gtask_clauses_ptr);
+ if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
+ OMP_CLAUSE_SHARED_FIRSTPRIVATE (*gtask_clauses_ptr) = 1;
+ gtask_clauses_ptr
+ = &OMP_CLAUSE_CHAIN (*gtask_clauses_ptr);
break;
default:
gcc_unreachable ();
@@ -7539,8 +7649,7 @@ gimplify_omp_for (tree *expr_p, gimple_s
*gfor_clauses_ptr = NULL_TREE;
*gtask_clauses_ptr = NULL_TREE;
*gforo_clauses_ptr = NULL_TREE;
- gimple g
- = gimple_build_bind (NULL_TREE, gfor, NULL_TREE);
+ g = gimple_build_bind (NULL_TREE, gfor, NULL_TREE);
g = gimple_build_omp_task (g, task_clauses, NULL_TREE, NULL_TREE,
NULL_TREE, NULL_TREE, NULL_TREE);
gimple_omp_task_set_taskloop_p (g, true);
--- gcc/omp-low.c.jj 2015-05-19 18:56:55.730182802 +0200
+++ gcc/omp-low.c 2015-05-20 19:20:25.828928071 +0200
@@ -1129,6 +1129,14 @@ omp_copy_decl_2 (tree var, tree name, tr
DECL_CONTEXT (copy) = current_function_decl;
DECL_CHAIN (copy) = ctx->block_vars;
+ /* If VAR is listed in task_shared_vars, it means it wasn't
+ originally addressable and is just because task needs to take
+ it's address. But we don't need to take address of privatizations
+ from that var. */
+ if (TREE_ADDRESSABLE (var)
+ && task_shared_vars
+ && bitmap_bit_p (task_shared_vars, DECL_UID (var)))
+ TREE_ADDRESSABLE (copy) = 0;
ctx->block_vars = copy;
return copy;
@@ -1179,7 +1187,7 @@ build_receiver_ref (tree var, bool by_re
this is some variable. */
static tree
-build_outer_var_ref (tree var, omp_context *ctx)
+build_outer_var_ref (tree var, omp_context *ctx, bool lastprivate = false)
{
tree x;
@@ -1188,7 +1196,7 @@ build_outer_var_ref (tree var, omp_conte
else if (is_variable_sized (var))
{
x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
- x = build_outer_var_ref (x, ctx);
+ x = build_outer_var_ref (x, ctx, lastprivate);
x = build_simple_mem_ref (x);
}
else if (is_taskreg_ctx (ctx))
@@ -1209,6 +1217,33 @@ build_outer_var_ref (tree var, omp_conte
if (x == NULL_TREE)
x = var;
}
+ else if (lastprivate && is_taskloop_ctx (ctx))
+ {
+ gcc_assert (ctx->outer);
+ splay_tree_node n
+ = splay_tree_lookup (ctx->outer->field_map,
+ (splay_tree_key) &DECL_UID (var));
+ if (n == NULL)
+ {
+ if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx->outer)))
+ x = var;
+ else
+ x = lookup_decl (var, ctx->outer);
+ }
+ else
+ {
+ tree field = (tree) n->value;
+ /* If the receiver record type was remapped in the child function,
+ remap the field into the new record type. */
+ x = maybe_lookup_field (field, ctx->outer);
+ if (x != NULL)
+ field = x;
+
+ x = build_simple_mem_ref (ctx->outer->receiver_decl);
+ x = omp_build_component_ref (x, field);
+ x = build_simple_mem_ref (x);
+ }
+ }
else if (ctx->outer)
x = lookup_decl (var, ctx->outer);
else if (is_reference (var))
@@ -1239,11 +1274,17 @@ static void
install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
{
tree field, type, sfield = NULL_TREE;
+ splay_tree_key key = (splay_tree_key) var;
+ if ((mask & 8) != 0)
+ {
+ key = (splay_tree_key) &DECL_UID (var);
+ gcc_checking_assert (key != (splay_tree_key) var);
+ }
gcc_assert ((mask & 1) == 0
- || !splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
+ || !splay_tree_lookup (ctx->field_map, key));
gcc_assert ((mask & 2) == 0 || !ctx->sfield_map
- || !splay_tree_lookup (ctx->sfield_map, (splay_tree_key) var));
+ || !splay_tree_lookup (ctx->sfield_map, key));
gcc_assert ((mask & 3) == 3
|| !is_gimple_omp_oacc (ctx->stmt));
@@ -1298,7 +1339,7 @@ install_var_field (tree var, bool by_ref
ctx->sfield_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
{
- sfield = build_decl (DECL_SOURCE_LOCATION (var),
+ sfield = build_decl (DECL_SOURCE_LOCATION (t),
FIELD_DECL, DECL_NAME (t), TREE_TYPE (t));
DECL_ABSTRACT_ORIGIN (sfield) = DECL_ABSTRACT_ORIGIN (t);
insert_field_into_struct (ctx->srecord_type, sfield);
@@ -1313,11 +1354,9 @@ install_var_field (tree var, bool by_ref
}
if (mask & 1)
- splay_tree_insert (ctx->field_map, (splay_tree_key) var,
- (splay_tree_value) field);
+ splay_tree_insert (ctx->field_map, key, (splay_tree_value) field);
if ((mask & 2) && ctx->sfield_map)
- splay_tree_insert (ctx->sfield_map, (splay_tree_key) var,
- (splay_tree_value) sfield);
+ splay_tree_insert (ctx->sfield_map, key, (splay_tree_value) sfield);
}
static tree
@@ -1718,6 +1757,11 @@ scan_sharing_clauses (tree clauses, omp_
if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
break;
by_ref = use_pointer_for_field (decl, ctx);
+ if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+ {
+ gcc_assert (by_ref);
+ break;
+ }
if (! TREE_READONLY (decl)
|| TREE_ADDRESSABLE (decl)
|| by_ref
@@ -1998,8 +2042,14 @@ scan_sharing_clauses (tree clauses, omp_
if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
break;
decl = OMP_CLAUSE_DECL (c);
- if (! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
- fixup_remapped_decl (decl, ctx, false);
+ if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
+ break;
+ if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+ {
+ install_var_field (decl, true, 11, ctx);
+ break;
+ }
+ fixup_remapped_decl (decl, ctx, false);
break;
case OMP_CLAUSE_MAP:
@@ -2336,7 +2386,16 @@ add_taskreg_looptemp_clauses (enum gf_ma
tree type = fd.iter_type;
if (fd.collapse > 1
&& TREE_CODE (fd.loop.n2) != INTEGER_CST)
- count += fd.collapse - 1;
+ {
+ count += fd.collapse - 1;
+ /* For taskloop, if there are lastprivate clauses on the inner
+ GIMPLE_OMP_FOR, add one more temporaries for the total number
+ of iterations (product of count1 ... countN-1). */
+ if (msk == GF_OMP_FOR_KIND_TASKLOOP
+ && find_omp_clause (gimple_omp_for_clauses (for_stmt),
+ OMP_CLAUSE_LASTPRIVATE))
+ count++;
+ }
for (i = 0; i < count; i++)
{
tree temp = create_tmp_var (type);
@@ -2480,7 +2539,8 @@ finish_taskreg_scan (omp_context *ctx)
for (c = gimple_omp_taskreg_clauses (ctx->stmt);
c; c = OMP_CLAUSE_CHAIN (c))
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED)
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
+ && !OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
{
tree decl = OMP_CLAUSE_DECL (c);
@@ -3755,7 +3815,8 @@ lower_rec_input_clauses (tree clauses, g
continue;
if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
{
- gcc_assert (is_global_var (OMP_CLAUSE_DECL (c)));
+ gcc_assert (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)
+ || is_global_var (OMP_CLAUSE_DECL (c)));
continue;
}
case OMP_CLAUSE_FIRSTPRIVATE:
@@ -3775,7 +3836,7 @@ lower_rec_input_clauses (tree clauses, g
if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
{
lastprivate_firstprivate = true;
- if (pass != 0)
+ if (pass != 0 || is_taskloop_ctx (ctx))
continue;
}
/* Even without corresponding firstprivate, if
@@ -3936,6 +3997,11 @@ lower_rec_input_clauses (tree clauses, g
/* Shared global vars are just accessed directly. */
if (is_global_var (new_var))
break;
+ /* For taskloop firstprivate/lastprivate, represented
+ as firstprivate and shared clause on the task, new_var
+ is the firstprivate var. */
+ if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+ break;
/* Set up the DECL_VALUE_EXPR for shared variables now. This
needs to be delayed until after fixup_child_record_type so
that we get the correct type during the dereference. */
@@ -4467,7 +4533,15 @@ lower_lastprivate_clauses (tree clauses,
&& !OMP_CLAUSE_LINEAR_NO_COPYOUT (c)))
{
var = OMP_CLAUSE_DECL (c);
- new_var = lookup_decl (var, ctx);
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
+ && OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)
+ && is_taskloop_ctx (ctx))
+ {
+ gcc_checking_assert (ctx->outer && is_task_ctx (ctx->outer));
+ new_var = lookup_decl (var, ctx->outer);
+ }
+ else
+ new_var = lookup_decl (var, ctx);
if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
{
@@ -4511,7 +4585,7 @@ lower_lastprivate_clauses (tree clauses,
OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c) = NULL;
}
- x = build_outer_var_ref (var, ctx);
+ x = build_outer_var_ref (var, ctx, true);
if (is_reference (var))
new_var = build_simple_mem_ref_loc (clause_loc, new_var);
x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
@@ -4792,6 +4866,10 @@ lower_send_clauses (tree clauses, gimple
case OMP_CLAUSE_LASTPRIVATE:
case OMP_CLAUSE_REDUCTION:
break;
+ case OMP_CLAUSE_SHARED:
+ if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+ break;
+ continue;
case OMP_CLAUSE__LOOPTEMP_:
if (ignored_looptemp)
{
@@ -4809,6 +4887,25 @@ lower_send_clauses (tree clauses, gimple
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
&& is_global_var (var))
continue;
+
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED)
+ {
+ /* Handle taskloop firstprivate/lastprivate, where the
+ lastprivate on GIMPLE_OMP_TASK is represented as
+ OMP_CLAUSE_SHARED_FIRSTPRIVATE. */
+ tree f
+ = (tree)
+ splay_tree_lookup (ctx->sfield_map
+ ? ctx->sfield_map : ctx->field_map,
+ (splay_tree_key) &DECL_UID (val))->value;
+ gcc_assert (use_pointer_for_field (val, ctx));
+ x = omp_build_component_ref (ctx->sender_decl, f);
+ var = build_fold_addr_expr (var);
+ gimplify_assign (x, var, ilist);
+ DECL_ABSTRACT_ORIGIN (f) = NULL;
+ continue;
+ }
+
if (is_variable_sized (val))
continue;
by_ref = use_pointer_for_field (val, NULL);
@@ -4879,6 +4976,9 @@ lower_send_shared_vars (gimple_seq *ilis
for (f = TYPE_FIELDS (record_type); f ; f = DECL_CHAIN (f))
{
ovar = DECL_ABSTRACT_ORIGIN (f);
+ if (!ovar || TREE_CODE (ovar) == FIELD_DECL)
+ continue;
+
nvar = maybe_lookup_decl (ovar, ctx);
if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
continue;
@@ -5158,7 +5258,9 @@ expand_task_call (struct omp_region *reg
tree finalc = find_omp_clause (clauses, OMP_CLAUSE_FINAL);
unsigned int iflags
- = (untied ? 1 : 0) | (mergeable ? 4 : 0) | (depend ? 8 : 0);
+ = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
+ | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
+ | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
@@ -5178,7 +5280,7 @@ expand_task_call (struct omp_region *reg
endvar = OMP_CLAUSE_DECL (endvar);
step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
if (fd.loop.cond_code == LT_EXPR)
- iflags |= 256;
+ iflags |= GOMP_TASK_FLAG_UP;
tree tclauses = gimple_omp_for_clauses (g);
num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
if (num_tasks)
@@ -5188,7 +5290,7 @@ expand_task_call (struct omp_region *reg
num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
if (num_tasks)
{
- iflags |= 512;
+ iflags |= GOMP_TASK_FLAG_GRAINSIZE;
num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
}
else
@@ -5196,9 +5298,9 @@ expand_task_call (struct omp_region *reg
}
num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
if (ifc == NULL_TREE)
- iflags |= 1024;
+ iflags |= GOMP_TASK_FLAG_IF;
if (find_omp_clause (tclauses, OMP_CLAUSE_NOGROUP))
- iflags |= 2048;
+ iflags |= GOMP_TASK_FLAG_NOGROUP;
ull = fd.iter_type == long_long_unsigned_type_node;
}
@@ -5211,7 +5313,8 @@ expand_task_call (struct omp_region *reg
{
tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
- build_int_cst (unsigned_type_node, 1024),
+ build_int_cst (unsigned_type_node,
+ GOMP_TASK_FLAG_IF),
build_int_cst (unsigned_type_node, 0));
flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
flags, t);
@@ -5224,7 +5327,8 @@ expand_task_call (struct omp_region *reg
{
tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
- build_int_cst (unsigned_type_node, 2),
+ build_int_cst (unsigned_type_node,
+ GOMP_TASK_FLAG_FINAL),
build_int_cst (unsigned_type_node, 0));
flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
}
@@ -7925,7 +8029,11 @@ expand_omp_simd (struct omp_region *regi
t = fold_convert (type, n2);
t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
- t = build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t);
+ tree v = fd->loop.v;
+ if (DECL_P (v) && TREE_ADDRESSABLE (v))
+ v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
cond_stmt = gimple_build_cond_empty (t);
gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
@@ -8124,6 +8232,28 @@ expand_omp_taskloop_for_outer (struct om
innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
tree endvar = OMP_CLAUSE_DECL (innerc);
+ if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
+ {
+ gcc_assert (innerc);
+ for (i = 1; i < fd->collapse; i++)
+ {
+ innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+ OMP_CLAUSE__LOOPTEMP_);
+ gcc_assert (innerc);
+ }
+ innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+ OMP_CLAUSE__LOOPTEMP_);
+ if (innerc)
+ {
+ /* If needed (inner taskloop has lastprivate clause), propagate
+ down the total number of iterations. */
+ tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
+ NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
+ gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+ }
+ }
t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
GSI_CONTINUE_LINKING);
@@ -8167,7 +8297,7 @@ expand_omp_taskloop_for_inner (struct om
struct omp_for_data *fd,
gimple inner_stmt)
{
- tree e, t, type, itype, vmain, vback;
+ tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
basic_block fin_bb;
gimple_stmt_iterator gsi;
@@ -8180,6 +8310,29 @@ expand_omp_taskloop_for_inner (struct om
if (POINTER_TYPE_P (type))
itype = signed_type_for (type);
+ /* See if we need to bias by LLONG_MIN. */
+ if (fd->iter_type == long_long_unsigned_type_node
+ && TREE_CODE (type) == INTEGER_TYPE
+ && !TYPE_UNSIGNED (type))
+ {
+ tree n1, n2;
+
+ if (fd->loop.cond_code == LT_EXPR)
+ {
+ n1 = fd->loop.n1;
+ n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
+ }
+ else
+ {
+ n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
+ n2 = fd->loop.n1;
+ }
+ if (TREE_CODE (n1) != INTEGER_CST
+ || TREE_CODE (n2) != INTEGER_CST
+ || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
+ bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
+ }
+
entry_bb = region->entry;
cont_bb = region->cont;
gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
@@ -8220,6 +8373,11 @@ expand_omp_taskloop_for_inner (struct om
innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
n2 = OMP_CLAUSE_DECL (innerc);
+ if (bias)
+ {
+ n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
+ n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
+ }
n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
true, NULL_TREE, true, GSI_SAME_STMT);
n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
@@ -8310,7 +8468,13 @@ expand_omp_taskloop_for_inner (struct om
gsi_remove (&gsi, true);
FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
- remove_edge (BRANCH_EDGE (entry_bb));
+ if (!broken_loop)
+ remove_edge (BRANCH_EDGE (entry_bb));
+ else
+ {
+ remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
+ region->outer->cont = NULL;
+ }
/* Connect all the blocks. */
if (!broken_loop)
@@ -8334,8 +8498,9 @@ expand_omp_taskloop_for_inner (struct om
set_immediate_dominator (CDI_DOMINATORS, body_bb,
recompute_dominator (CDI_DOMINATORS, body_bb));
- set_immediate_dominator (CDI_DOMINATORS, fin_bb,
- recompute_dominator (CDI_DOMINATORS, fin_bb));
+ if (!broken_loop)
+ set_immediate_dominator (CDI_DOMINATORS, fin_bb,
+ recompute_dominator (CDI_DOMINATORS, fin_bb));
if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
{
@@ -11054,15 +11219,42 @@ lower_omp_for_lastprivate (struct omp_fo
tree n2 = fd->loop.n2;
if (fd->collapse > 1
&& TREE_CODE (n2) != INTEGER_CST
- && gimple_omp_for_combined_into_p (fd->for_stmt)
- && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
+ && gimple_omp_for_combined_into_p (fd->for_stmt))
{
- gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
- if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR)
+ struct omp_context *task_ctx = NULL;
+ if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
{
- struct omp_for_data outer_fd;
- extract_omp_for_data (gfor, &outer_fd, NULL);
- n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
+ gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
+ if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR)
+ {
+ struct omp_for_data outer_fd;
+ extract_omp_for_data (gfor, &outer_fd, NULL);
+ n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
+ }
+ else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP)
+ task_ctx = ctx->outer->outer;
+ }
+ else if (is_task_ctx (ctx->outer))
+ task_ctx = ctx->outer;
+ if (task_ctx)
+ {
+ int i;
+ tree innerc
+ = find_omp_clause (gimple_omp_task_clauses (task_ctx->stmt),
+ OMP_CLAUSE__LOOPTEMP_);
+ gcc_assert (innerc);
+ for (i = 0; i < fd->collapse; i++)
+ {
+ innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+ OMP_CLAUSE__LOOPTEMP_);
+ gcc_assert (innerc);
+ }
+ innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+ OMP_CLAUSE__LOOPTEMP_);
+ if (innerc)
+ n2 = fold_convert (TREE_TYPE (n2),
+ lookup_decl (OMP_CLAUSE_DECL (innerc),
+ task_ctx));
}
}
cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2);
@@ -11426,6 +11618,13 @@ create_task_copyfn (gomp_task *task_stmt
n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
if (n == NULL)
break;
+ if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+ {
+ decl = (tree) n->value;
+ n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
+ if (n == NULL)
+ break;
+ }
f = (tree) n->value;
if (tcctx.cb.decl_map)
f = *tcctx.cb.decl_map->get (f);
--- gcc/c-family/c-omp.c.jj 2015-05-19 18:54:16.202666384 +0200
+++ gcc/c-family/c-omp.c 2015-05-19 19:04:52.500759690 +0200
@@ -491,6 +491,7 @@ c_finish_omp_for (location_t locus, enum
init = integer_zero_node;
fail = true;
}
+ DECL_INITIAL (decl) = NULL_TREE;
init = build_modify_expr (elocus, decl, NULL_TREE, NOP_EXPR,
/* FIXME diagnostics: This should
--- gcc/testsuite/gcc.dg/gomp/taskloop-1.c.jj 2015-05-19 19:04:52.495759768 +0200
+++ gcc/testsuite/gcc.dg/gomp/taskloop-1.c 2015-05-19 19:04:52.495759768 +0200
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+int e;
+int bar (int, int);
+void baz (int, int, int, int *, int *, int *);
+
+void
+foo (int a, int b, int c, int d, int f, int g, int h, int j, int k, int l)
+{
+ int i;
+ #pragma omp taskloop if (a) final (b) untied default(none) mergeable \
+ private(c) firstprivate (e) shared (d) num_tasks(f) collapse(1)
+ for (i = bar (g, h) + j; i < k; i += l)
+ baz (i, d, e++, &c, &d, &e);
+}
--- include/gomp-constants.h.jj 2015-05-19 18:54:15.724673826 +0200
+++ include/gomp-constants.h 2015-05-19 19:04:52.494759783 +0200
@@ -113,4 +113,14 @@ enum gomp_map_kind
#define GOMP_DEVICE_ICV -1
#define GOMP_DEVICE_HOST_FALLBACK -2
+/* GOMP_task/GOMP_taskloop* flags argument. */
+#define GOMP_TASK_FLAG_UNTIED (1 << 0)
+#define GOMP_TASK_FLAG_FINAL (1 << 1)
+#define GOMP_TASK_FLAG_MERGEABLE (1 << 2)
+#define GOMP_TASK_FLAG_DEPEND (1 << 3)
+#define GOMP_TASK_FLAG_UP (1 << 8)
+#define GOMP_TASK_FLAG_GRAINSIZE (1 << 9)
+#define GOMP_TASK_FLAG_IF (1 << 10)
+#define GOMP_TASK_FLAG_NOGROUP (1 << 11)
+
#endif
--- libgomp/libgomp.map.jj 2015-05-19 18:54:16.332664361 +0200
+++ libgomp/libgomp.map 2015-05-19 19:04:52.493759799 +0200
@@ -240,6 +240,12 @@ GOMP_4.0.1 {
GOMP_offload_unregister;
} GOMP_4.0;
+GOMP_4.1 {
+ global:
+ GOMP_taskloop;
+ GOMP_taskloop_ull;
+} GOMP_4.0.1;
+
OACC_2.0 {
global:
acc_get_num_devices;
--- libgomp/task.c.jj 2015-05-19 18:54:16.322664516 +0200
+++ libgomp/task.c 2015-05-19 19:04:52.494759783 +0200
@@ -29,6 +29,7 @@
#include "libgomp.h"
#include <stdlib.h>
#include <string.h>
+#include "gomp-constants.h"
typedef struct gomp_task_depend_entry *hash_entry_type;
@@ -126,8 +127,7 @@ GOMP_task (void (*fn) (void *), void *da
might be running on different thread than FN. */
if (cpyfn)
if_clause = false;
- if (flags & 1)
- flags &= ~1;
+ flags &= ~GOMP_TASK_FLAG_UNTIED;
#endif
/* If parallel or taskgroup has been cancelled, don't start new tasks. */
@@ -148,12 +148,14 @@ GOMP_task (void (*fn) (void *), void *da
depend clauses for non-deferred tasks other than this, because
the parent task is suspended until the child task finishes and thus
it can't start further child tasks. */
- if ((flags & 8) && thr->task && thr->task->depend_hash)
+ if ((flags & GOMP_TASK_FLAG_DEPEND)
+ && thr->task && thr->task->depend_hash)
gomp_task_maybe_wait_for_dependencies (depend);
gomp_init_task (&task, thr->task, gomp_icv (false));
task.kind = GOMP_TASK_IFFALSE;
- task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
+ task.final_task = (thr->task && thr->task->final_task)
+ || (flags & GOMP_TASK_FLAG_FINAL);
if (thr->task)
{
task.in_tied_task = thr->task->in_tied_task;
@@ -196,7 +198,7 @@ GOMP_task (void (*fn) (void *), void *da
bool do_wake;
size_t depend_size = 0;
- if (flags & 8)
+ if (flags & GOMP_TASK_FLAG_DEPEND)
depend_size = ((uintptr_t) depend[0]
* sizeof (struct gomp_task_depend_entry));
task = gomp_malloc (sizeof (*task) + depend_size
@@ -219,7 +221,7 @@ GOMP_task (void (*fn) (void *), void *da
task->kind = GOMP_TASK_WAITING;
task->fn = fn;
task->fn_data = arg;
- task->final_task = (flags & 2) >> 1;
+ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
gomp_mutex_lock (&team->task_lock);
/* If parallel or taskgroup has been cancelled, don't start new
tasks. */
@@ -412,6 +414,25 @@ GOMP_task (void (*fn) (void *), void *da
}
}
+ialias (GOMP_taskgroup_start)
+ialias (GOMP_taskgroup_end)
+
+#define TYPE long
+#define UTYPE unsigned long
+#define TYPE_is_long 1
+#include "taskloop.c"
+#undef TYPE
+#undef UTYPE
+#undef TYPE_is_long
+
+#define TYPE unsigned long long
+#define UTYPE TYPE
+#define GOMP_taskloop GOMP_taskloop_ull
+#include "taskloop.c"
+#undef TYPE
+#undef UTYPE
+#undef GOMP_taskloop
+
static inline bool
gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
struct gomp_taskgroup *taskgroup, struct gomp_team *team)
--- libgomp/taskloop.c.jj 2015-05-19 19:04:52.493759799 +0200
+++ libgomp/taskloop.c 2015-05-20 18:37:38.584454280 +0200
@@ -0,0 +1,360 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+ Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file handles the taskloop construct. It is included twice, once
+ for the long and once for unsigned long long variant. */
+
+/* Called when encountering an explicit task directive. If IF_CLAUSE is
+ false, then we must not delay in executing the task. If UNTIED is true,
+ then the task may be executed by any member of the team. */
+
+void
+GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
+ long arg_size, long arg_align, unsigned flags,
+ unsigned long num_tasks,
+ TYPE start, TYPE end, TYPE step)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+
+#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
+ /* If pthread_mutex_* is used for omp_*lock*, then each task must be
+ tied to one thread all the time. This means UNTIED tasks must be
+ tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
+ might be running on different thread than FN. */
+ if (cpyfn)
+ flags &= ~GOMP_TASK_FLAG_IF;
+ flags &= ~GOMP_TASK_FLAG_UNTIED;
+#endif
+
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+ if (team && gomp_team_barrier_cancelled (&team->barrier))
+ return;
+
+#ifdef TYPE_is_long
+ TYPE s = step;
+ if (step > 0)
+ {
+ if (start >= end)
+ return;
+ s--;
+ }
+ else
+ {
+ if (start <= end)
+ return;
+ s++;
+ }
+ UTYPE n = (end - start + s) / step;
+#else
+ UTYPE n;
+ if (flags & GOMP_TASK_FLAG_UP)
+ {
+ if (start >= end)
+ return;
+ n = (end - start + step - 1) / step;
+ }
+ else
+ {
+ if (start <= end)
+ return;
+ n = (start - end - step - 1) / -step;
+ }
+#endif
+
+ TYPE task_step = step;
+ unsigned long nfirst = n;
+ if (flags & GOMP_TASK_FLAG_GRAINSIZE)
+ {
+ unsigned long grainsize = num_tasks;
+#ifdef TYPE_is_long
+ num_tasks = n / grainsize;
+#else
+ UTYPE ndiv = n / grainsize;
+ num_tasks = ndiv;
+ if (num_tasks != ndiv)
+ num_tasks = ~0UL;
+#endif
+ if (num_tasks <= 1)
+ {
+ num_tasks = 1;
+ task_step = end - start;
+ }
+ else if (num_tasks >= grainsize
+#ifndef TYPE_is_long
+ && num_tasks != ~0UL
+#endif
+ )
+ {
+ UTYPE mul = num_tasks * grainsize;
+ task_step = (TYPE) grainsize * step;
+ if (mul != n)
+ {
+ task_step += step;
+ nfirst = n - mul - 1;
+ }
+ }
+ else
+ {
+ UTYPE div = n / num_tasks;
+ UTYPE mod = n % num_tasks;
+ task_step = (TYPE) div * step;
+ if (mod)
+ {
+ task_step += step;
+ nfirst = mod - 1;
+ }
+ }
+ }
+ else
+ {
+ if (num_tasks == 0)
+ num_tasks = team ? team->nthreads : 1;
+ if (num_tasks >= n)
+ num_tasks = n;
+ else
+ {
+ UTYPE div = n / num_tasks;
+ UTYPE mod = n % num_tasks;
+ task_step = (TYPE) div * step;
+ if (mod)
+ {
+ task_step += step;
+ nfirst = mod - 1;
+ }
+ }
+ }
+
+ if (flags & GOMP_TASK_FLAG_NOGROUP)
+ {
+ if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
+ return;
+ }
+ else
+ ialias_call (GOMP_taskgroup_start) ();
+
+ if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
+ || (thr->task && thr->task->final_task)
+ || team->task_count + num_tasks > 64 * team->nthreads)
+ {
+ unsigned long i;
+ if (__builtin_expect (cpyfn != NULL, 0))
+ {
+ struct gomp_task task[num_tasks];
+ struct gomp_task *parent = thr->task;
+ arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
+ char buf[num_tasks * arg_size + arg_align - 1];
+ char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
+ & ~(uintptr_t) (arg_align - 1));
+ char *orig_arg = arg;
+ for (i = 0; i < num_tasks; i++)
+ {
+ gomp_init_task (&task[i], parent, gomp_icv (false));
+ task[i].kind = GOMP_TASK_IFFALSE;
+ task[i].final_task = (thr->task && thr->task->final_task)
+ || (flags & GOMP_TASK_FLAG_FINAL);
+ if (thr->task)
+ {
+ task[i].in_tied_task = thr->task->in_tied_task;
+ task[i].taskgroup = thr->task->taskgroup;
+ }
+ thr->task = &task[i];
+ cpyfn (arg, data);
+ arg += arg_size;
+ }
+ arg = orig_arg;
+ for (i = 0; i < num_tasks; i++)
+ {
+ thr->task = &task[i];
+ ((TYPE *)arg)[0] = start;
+ start += task_step;
+ ((TYPE *)arg)[1] = start;
+ if (i == nfirst)
+ task_step -= step;
+ fn (arg);
+ arg += arg_size;
+ if (task[i].children != NULL)
+ {
+ gomp_mutex_lock (&team->task_lock);
+ gomp_clear_parent (task[i].children);
+ gomp_mutex_unlock (&team->task_lock);
+ }
+ gomp_end_task ();
+ }
+ }
+ else
+ for (i = 0; i < num_tasks; i++)
+ {
+ struct gomp_task task;
+
+ gomp_init_task (&task, thr->task, gomp_icv (false));
+ task.kind = GOMP_TASK_IFFALSE;
+ task.final_task = (thr->task && thr->task->final_task)
+ || (flags & GOMP_TASK_FLAG_FINAL);
+ if (thr->task)
+ {
+ task.in_tied_task = thr->task->in_tied_task;
+ task.taskgroup = thr->task->taskgroup;
+ }
+ thr->task = &task;
+ ((TYPE *)data)[0] = start;
+ start += task_step;
+ ((TYPE *)data)[1] = start;
+ if (i == nfirst)
+ task_step -= step;
+ fn (data);
+ if (task.children != NULL)
+ {
+ gomp_mutex_lock (&team->task_lock);
+ gomp_clear_parent (task.children);
+ gomp_mutex_unlock (&team->task_lock);
+ }
+ gomp_end_task ();
+ }
+ }
+ else
+ {
+ struct gomp_task *tasks[num_tasks];
+ struct gomp_task *parent = thr->task;
+ struct gomp_taskgroup *taskgroup = parent->taskgroup;
+ char *arg;
+ int do_wake;
+ unsigned long i;
+
+ for (i = 0; i < num_tasks; i++)
+ {
+ struct gomp_task *task
+ = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
+ tasks[i] = task;
+ arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
+ & ~(uintptr_t) (arg_align - 1));
+ gomp_init_task (task, parent, gomp_icv (false));
+ task->kind = GOMP_TASK_IFFALSE;
+ task->in_tied_task = parent->in_tied_task;
+ task->taskgroup = taskgroup;
+ thr->task = task;
+ if (cpyfn)
+ {
+ cpyfn (arg, data);
+ task->copy_ctors_done = true;
+ }
+ else
+ memcpy (arg, data, arg_size);
+ ((TYPE *)arg)[0] = start;
+ start += task_step;
+ ((TYPE *)arg)[1] = start;
+ if (i == nfirst)
+ task_step -= step;
+ thr->task = parent;
+ task->kind = GOMP_TASK_WAITING;
+ task->fn = fn;
+ task->fn_data = arg;
+ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
+ }
+ gomp_mutex_lock (&team->task_lock);
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+ if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
+ || (taskgroup && taskgroup->cancelled))
+ && cpyfn == NULL, 0))
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ for (i = 0; i < num_tasks; i++)
+ {
+ gomp_finish_task (tasks[i]);
+ free (tasks[i]);
+ }
+ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+ ialias_call (GOMP_taskgroup_end) ();
+ return;
+ }
+ if (taskgroup)
+ taskgroup->num_children += num_tasks;
+ for (i = 0; i < num_tasks; i++)
+ {
+ struct gomp_task *task = tasks[i];
+ if (parent->children)
+ {
+ task->next_child = parent->children;
+ task->prev_child = parent->children->prev_child;
+ task->next_child->prev_child = task;
+ task->prev_child->next_child = task;
+ }
+ else
+ {
+ task->next_child = task;
+ task->prev_child = task;
+ }
+ parent->children = task;
+ if (taskgroup)
+ {
+ if (taskgroup->children)
+ {
+ task->next_taskgroup = taskgroup->children;
+ task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+ task->next_taskgroup->prev_taskgroup = task;
+ task->prev_taskgroup->next_taskgroup = task;
+ }
+ else
+ {
+ task->next_taskgroup = task;
+ task->prev_taskgroup = task;
+ }
+ taskgroup->children = task;
+ }
+ if (team->task_queue)
+ {
+ task->next_queue = team->task_queue;
+ task->prev_queue = team->task_queue->prev_queue;
+ task->next_queue->prev_queue = task;
+ task->prev_queue->next_queue = task;
+ }
+ else
+ {
+ task->next_queue = task;
+ task->prev_queue = task;
+ team->task_queue = task;
+ }
+ ++team->task_count;
+ ++team->task_queued_count;
+ }
+ gomp_team_barrier_set_task_pending (&team->barrier);
+ if (team->task_running_count + !parent->in_tied_task
+ < team->nthreads)
+ {
+ do_wake = team->nthreads - team->task_running_count
+ - !parent->in_tied_task;
+ if ((unsigned long) do_wake > num_tasks)
+ do_wake = num_tasks;
+ }
+ else
+ do_wake = 0;
+ gomp_mutex_unlock (&team->task_lock);
+ if (do_wake)
+ gomp_team_barrier_wake (&team->barrier, do_wake);
+ }
+ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+ ialias_call (GOMP_taskgroup_end) ();
+}
--- libgomp/testsuite/libgomp.c/for-4.c.jj 2015-05-19 19:04:52.491759830 +0200
+++ libgomp/testsuite/libgomp.c/for-4.c 2015-05-20 18:37:38.583454296 +0200
@@ -0,0 +1,42 @@
+/* { dg-options "-std=gnu99 -fopenmp" } */
+
+extern void abort (void);
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#define F taskloop
+#define G taskloop
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F taskloop simd
+#define G taskloop_simd
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+int
+main ()
+{
+ int err = 0;
+ #pragma omp parallel reduction(|:err)
+ #pragma omp single
+ {
+ if (test_taskloop_normal ()
+ || test_taskloop_simd_normal ())
+ err = 1;
+ }
+ if (err)
+ abort ();
+ return 0;
+}
--- libgomp/testsuite/libgomp.c/taskloop-1.c.jj 2015-05-19 19:04:52.492759814 +0200
+++ libgomp/testsuite/libgomp.c/taskloop-1.c 2015-05-20 18:37:38.583454296 +0200
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp -std=c99" } */
+
+int q, r, e;
+
+__attribute__((noinline, noclone)) void
+foo (long a, long b)
+{
+ #pragma omp taskloop lastprivate (q) nogroup
+ for (long d = a; d < b; d += 2)
+ {
+ q = d;
+ if (d < 2 || d > 6 || (d & 1))
+ #pragma omp atomic
+ e |= 1;
+ }
+}
+
+__attribute__((noinline, noclone)) int
+bar (int a, int b)
+{
+ int q = 7;
+ #pragma omp taskloop lastprivate (q)
+ for (int d = a; d < b; d++)
+ {
+ if (d < 12 || d > 17)
+ #pragma omp atomic
+ e |= 1;
+ q = d;
+ }
+ return q;
+}
+
+int
+main ()
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ foo (2, 7);
+ r = bar (12, 18);
+ }
+ if (q != 6 || r != 17 || e)
+ __builtin_abort ();
+ return 0;
+}
--- libgomp/testsuite/libgomp.c/taskloop-2.c.jj 2015-05-19 19:04:52.492759814 +0200
+++ libgomp/testsuite/libgomp.c/taskloop-2.c 2015-05-20 18:37:38.582454311 +0200
@@ -0,0 +1,147 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -std=c99" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+int u[1024], v[1024], w[1024], m;
+
+__attribute__((noinline, noclone)) void
+f1 (long a, long b)
+{
+ #pragma omp taskloop simd default(none) shared(u, v, w) nogroup
+ for (long d = a; d < b; d++)
+ u[d] = v[d] + w[d];
+}
+
+__attribute__((noinline, noclone)) int
+f2 (long a, long b, long c)
+{
+ int d, e;
+ #pragma omp taskloop simd default(none) shared(u, v, w) linear(d:1) linear(c:5) lastprivate(e)
+ for (d = a; d < b; d++)
+ {
+ u[d] = v[d] + w[d];
+ c = c + 5;
+ e = c + 9;
+ }
+ return d + c + e;
+}
+
+__attribute__((noinline, noclone)) int
+f3 (long a, long b)
+{
+ int d;
+ #pragma omp taskloop simd default(none) shared(u, v, w)
+ for (d = a; d < b; d++)
+ {
+ int *p = &d;
+ u[d] = v[d] + w[d];
+ }
+ return d;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (long a, long b, long c, long d)
+{
+ int e, f, g;
+ #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2) lastprivate(g)
+ for (e = a; e < b; e++)
+ for (f = c; f < d; f++)
+ {
+ int *p = &e;
+ int *q = &f;
+ int r = 32 * e + f;
+ u[r] = v[r] + w[r];
+ g = r;
+ }
+ return e + f + g;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (long a, long b, long c, long d)
+{
+ int e, f;
+ #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2)
+ for (e = a; e < b; e++)
+ for (f = c; f < d; f++)
+ {
+ int r = 32 * e + f;
+ u[r] = v[r] + w[r];
+ }
+ return e + f;
+}
+
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 1024; i++)
+ {
+ v[i] = i;
+ w[i] = i + 1;
+ }
+ #pragma omp parallel
+ #pragma omp single
+ f1 (0, 1024);
+ for (i = 0; i < 1024; i++)
+ if (u[i] != 2 * i + 1)
+ __builtin_abort ();
+ else
+ {
+ v[i] = 1024 - i;
+ w[i] = 512 - i;
+ }
+ #pragma omp parallel
+ #pragma omp single
+ m = f2 (2, 1022, 17);
+ for (i = 0; i < 1024; i++)
+ if ((i < 2 || i >= 1022) ? u[i] != 2 * i + 1 : u[i] != 1536 - 2 * i)
+ __builtin_abort ();
+ else
+ {
+ v[i] = i;
+ w[i] = i + 1;
+ }
+ if (m != 1022 + 2 * (1020 * 5 + 17) + 9)
+ __builtin_abort ();
+ #pragma omp parallel
+ #pragma omp single
+ m = f3 (0, 1024);
+ for (i = 0; i < 1024; i++)
+ if (u[i] != 2 * i + 1)
+ __builtin_abort ();
+ else
+ {
+ v[i] = 1024 - i;
+ w[i] = 512 - i;
+ }
+ if (m != 1024)
+ __builtin_abort ();
+ #pragma omp parallel
+ #pragma omp single
+ m = f4 (0, 32, 0, 32);
+ for (i = 0; i < 1024; i++)
+ if (u[i] != 1536 - 2 * i)
+ __builtin_abort ();
+ else
+ {
+ v[i] = i;
+ w[i] = i + 1;
+ }
+ if (m != 32 + 32 + 1023)
+ __builtin_abort ();
+ #pragma omp parallel
+ #pragma omp single
+ m = f5 (0, 32, 0, 32);
+ for (i = 0; i < 1024; i++)
+ if (u[i] != 2 * i + 1)
+ __builtin_abort ();
+ else
+ {
+ v[i] = 1024 - i;
+ w[i] = 512 - i;
+ }
+ if (m != 32 + 32)
+ __builtin_abort ();
+ return 0;
+}
--- libgomp/testsuite/libgomp.c/taskloop-3.c.jj 2015-05-19 19:04:52.492759814 +0200
+++ libgomp/testsuite/libgomp.c/taskloop-3.c 2015-05-20 18:37:38.583454296 +0200
@@ -0,0 +1,84 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp -std=c99" } */
+
+int g;
+int a[1024];
+
+__attribute__((noinline, noclone)) int
+f1 (int x)
+{
+ #pragma omp taskloop firstprivate (x) lastprivate (x)
+ for (int i = 0; i < 64; i++)
+ {
+ if (x != 74)
+ __builtin_abort ();
+ if (i == 63)
+ x = i + 4;
+ }
+ return x;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+ #pragma omp taskloop firstprivate (g) lastprivate (g) nogroup
+ for (int i = 0; i < 64; i++)
+ {
+ if (g != 77)
+ __builtin_abort ();
+ if (i == 63)
+ g = i + 9;
+ }
+}
+
+__attribute__((noinline, noclone)) long long
+f3 (long long a, long long b, long long c)
+{
+ long long i;
+ int l;
+ #pragma omp taskloop default (none) lastprivate (i, l)
+ for (i = a; i < b; i += c)
+ l = i;
+ return l * 7 + i;
+}
+
+__attribute__((noinline, noclone)) long long
+f4 (long long a, long long b, long long c, long long d,
+ long long e, long long f, int k)
+{
+ long long i, j;
+ int l;
+ #pragma omp taskloop default (none) collapse(2) \
+ firstprivate (k) lastprivate (i, j, k, l)
+ for (i = a; i < b; i += e)
+ for (j = c; j < d; j += f)
+ {
+ if (k != 73)
+ __builtin_abort ();
+ if (i == 31 && j == 46)
+ k = i;
+ l = j;
+ }
+ return i + 5 * j + 11 * k + 17 * l;
+}
+
+int
+main ()
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ if (f1 (74) != 63 + 4)
+ __builtin_abort ();
+ g = 77;
+ f2 ();
+ #pragma omp taskwait
+ if (g != 63 + 9)
+ __builtin_abort ();
+ if (f3 (7, 12, 2) != 11 * 7 + 13)
+ __builtin_abort ();
+ if (f4 (0, 32, 16, 48, 1, 2, 73) != 32 + 5 * 48 + 11 * 31 + 17 * 46)
+ __builtin_abort ();
+ }
+ return 0;
+}
Jakub