This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[gomp3] Task sharing fixes
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Henderson <rth at redhat dot com>, Diego Novillo <dnovillo at google dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Tue, 11 Mar 2008 16:41:41 -0400
- Subject: [gomp3] Task sharing fixes
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
This patch fixes two problems with tasks (as soon as they aren't just
stubbed - forcefully if (0)ed in libgomp):
1) if any firstprivate clauses (or implicit firstprivate) needs to copy data
from the creating task, we can't defer starting the task immediately,
as if the task is started on a different thread or later, the variables
that need to be copied might have changed or be even unavailable.
The exception is if firstprivate doesn't use pointer for any of the fields, but
the value itself is stored into the .omp_data_o structure.
This patch changes GOMP_task's last argument into a flags bitmask from bool.
If all data that needs copying is put directly into .omp_data_o
structure, GOMP_task will be called with second bit cleared and can
be deferred or started on different thread, otherwise if task creation
needs to copy the data (e.g. run copy constructors etc.), GOMP_task
will be called with second bit in flags set and will call the taskfn
immediately once the new task is created. When the
copying/initialization is done, the task will call GOMP_task_start,
at which point the new task can be suspended, moved to other thread
even if not untied, etc.
2) for shared variables if they aren't aggregate etc., parallel uses
copy-in/out if the variable isn't addressable. Unfortunately this
can't work for tasks, because the task can be deferred and by the
time we would do the copy-out var = .omp_data_o.var; after GOMP_task
call, the task might not be even started, or it might be running on
some other thread. Therefore, all shared vars that aren't global
in outer context must be passed as references. This leads to
problems, as the .omp_data_o.var = &var; addition during omp lowering
might lead to previously is_gimple_reg variables getting
TREE_ADDRESSABLE. To fix this we need to regimplify anything that uses
them.
Regtested on x86_64-linux, will commit tomorrow unless I hear objections.
2008-03-11 Jakub Jelinek <jakub@redhat.com>
* tree.h (OMP_TASK_EXPLICIT_START): Define.
* omp-low.c (task_shared_vars): New variable.
(scan_sharing_clauses): In OMP_TASK disallow copy-in/out
sharing.
(lower_send_shared_vars): Likewise.
(lower_rec_input_clauses): Likewise. Set OMP_TASK_EXPLICIT_START
if firstprivate or allocatable private needs to copy data from
outer task. Emit GOMP_task_wait call if so.
(expand_task_call): Change last GOMP_task argument to bitmask.
* builtin-types.def (BT_FN_VOID_OMPFN_PTR_BOOL_BOOL): Remove.
(BT_FN_VOID_OMPFN_PTR_BOOL_UINT): New.
* omp-builtins.def (BUILT_IN_GOMP_TASK_START): New.
(BUILT_IN_GOMP_TASK): Change type of last argument.
* types.def (BT_FN_VOID_OMPFN_PTR_BOOL_BOOL): Remove.
(BT_FN_VOID_OMPFN_PTR_BOOL_UINT): New.
* libgomp_g.h (GOMP_task_flag_untied,
GOMP_task_flag_explicit_start): Define.
* libgomp.map (GOMP_task_start): Export @@GOMP_2.0.
* task.c (GOMP_task): Change last argument.
(GOMP_task_start): New function.
--- libgomp/libgomp_g.h (revision 132481)
+++ libgomp/libgomp_g.h (working copy)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU OpenMP Library (libgomp).
@@ -95,7 +95,10 @@ extern void GOMP_parallel_end (void);
/* team.c */
-extern void GOMP_task (void (*) (void *), void *, bool, bool);
+#define GOMP_task_flag_untied 1 /* UNTIED clause present. */
+#define GOMP_task_flag_explicit_start 2 /* Explicit GOMP_task_start needed. */
+extern void GOMP_task (void (*) (void *), void *, bool, unsigned);
+extern void GOMP_task_start (void);
extern void GOMP_taskwait (void);
/* sections.c */
--- libgomp/libgomp.map (revision 133073)
+++ libgomp/libgomp.map (working copy)
@@ -154,5 +154,6 @@ GOMP_1.0 {
GOMP_2.0 {
global:
GOMP_task;
+ GOMP_task_start;
GOMP_taskwait;
} GOMP_1.0;
--- libgomp/task.c (revision 132481)
+++ libgomp/task.c (working copy)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007 Free Software Foundation, Inc.
+/* Copyright (C) 2007, 2008 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU OpenMP Library (libgomp).
@@ -63,7 +63,7 @@ gomp_end_task (void)
void
GOMP_task (void (*fn) (void *), void *data,
bool if_clause __attribute__((unused)),
- bool untied __attribute__((unused)))
+ unsigned flags __attribute__((unused)))
{
struct gomp_thread *thr = gomp_thread ();
thr->task = gomp_new_task (thr->task, gomp_icv ());
@@ -75,6 +75,17 @@ GOMP_task (void (*fn) (void *), void *da
gomp_end_task ();
}
+/* Called after a task has been initialized. Only should be called if
+ GOMP_task was called with GOMP_task_flag_explicit_start bit set,
+ after all firstprivate etc. copying is done. The copying will
+ happen immediately, in the thread that created the task, afterwards
+ it can be suspended and/or moved to another thread, even if not untied. */
+
+void
+GOMP_task_start (void)
+{
+}
+
/* Called when encountering a taskwait directive. */
void
--- gcc/tree.h (revision 132748)
+++ gcc/tree.h (working copy)
@@ -501,6 +501,8 @@ struct gimple_stmt GTY(())
OMP_SECTION
OMP_PARALLEL_COMBINED in
OMP_PARALLEL
+ OMP_TASK_EXPLICIT_START in
+ OMP_TASK
OMP_CLAUSE_PRIVATE_OUTER_REF in
OMP_CLAUSE_PRIVATE
@@ -1809,6 +1811,11 @@ struct tree_constructor GTY(())
#define OMP_PARALLEL_COMBINED(NODE) \
TREE_PRIVATE (OMP_PARALLEL_CHECK (NODE))
+/* True on an OMP_TASK statement if explicit GOMP_task_start call
+ is needed after privatized variable initialization. */
+#define OMP_TASK_EXPLICIT_START(NODE) \
+ TREE_PRIVATE (OMP_TASK_CHECK (NODE))
+
/* True on a PRIVATE clause if its decl is kept around for debugging
information only and its DECL_VALUE_EXPR is supposed to point
to what it has been remapped to. */
--- gcc/omp-low.c (revision 133114)
+++ gcc/omp-low.c (working copy)
@@ -118,6 +118,7 @@ struct omp_for_data
static splay_tree all_contexts;
static int taskreg_nesting_level;
struct omp_region *root_omp_region;
+static bitmap task_shared_vars;
static void scan_omp (tree *, omp_context *);
static void lower_omp (tree *, omp_context *);
@@ -1056,6 +1057,7 @@ scan_sharing_clauses (tree clauses, omp_
for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
bool by_ref;
+ tree outer;
switch (OMP_CLAUSE_CODE (c))
{
@@ -1074,13 +1076,29 @@ scan_sharing_clauses (tree clauses, omp_
by_ref = use_pointer_for_field (decl, true);
/* Global variables don't need to be copied,
the receiver side will use them directly. */
- if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
+ outer = maybe_lookup_decl_in_outer_ctx (decl, ctx);
+ if (is_global_var (outer))
break;
if (! TREE_READONLY (decl)
|| TREE_ADDRESSABLE (decl)
|| by_ref
|| is_reference (decl))
{
+ /* For tasks copy-out is not possible, so force by_ref. */
+ if (TREE_CODE (ctx->stmt) == OMP_TASK)
+ {
+ by_ref = true;
+ if (is_gimple_reg (outer))
+ {
+ /* Taking address of this OUTER in
+ lower_send_shared_vars might need regimplification
+ of everything that uses the variable. */
+ if (!task_shared_vars)
+ task_shared_vars = BITMAP_ALLOC (NULL);
+ bitmap_set_bit (task_shared_vars, DECL_UID (outer));
+ TREE_ADDRESSABLE (outer) = 1;
+ }
+ }
install_var_field (decl, by_ref, ctx);
install_var_local (decl, ctx);
break;
@@ -1938,6 +1956,9 @@ lower_rec_input_clauses (tree clauses, t
needs to be delayed until after fixup_child_record_type so
that we get the correct type during the dereference. */
by_ref = use_pointer_for_field (var, true);
+ /* For tasks copy-out is not possible, so force by_ref. */
+ if (TREE_CODE (ctx->stmt) == OMP_TASK)
+ by_ref = true;
x = build_receiver_ref (var, by_ref, ctx);
SET_DECL_VALUE_EXPR (new_var, x);
DECL_HAS_VALUE_EXPR_P (new_var) = 1;
@@ -1959,7 +1980,12 @@ lower_rec_input_clauses (tree clauses, t
case OMP_CLAUSE_PRIVATE:
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_PRIVATE
|| OMP_CLAUSE_PRIVATE_OUTER_REF (c))
- x = build_outer_var_ref (var, ctx);
+ {
+ x = build_outer_var_ref (var, ctx);
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
+ && TREE_CODE (ctx->stmt) == OMP_TASK)
+ OMP_TASK_EXPLICIT_START (ctx->stmt) = 1;
+ }
else
x = NULL;
x = lang_hooks.decls.omp_clause_default_ctor (c, new_var, x);
@@ -1981,6 +2007,14 @@ lower_rec_input_clauses (tree clauses, t
x = build_outer_var_ref (var, ctx);
x = lang_hooks.decls.omp_clause_copy_ctor (c, new_var, x);
gimplify_and_add (x, ilist);
+ if (TREE_CODE (ctx->stmt) == OMP_TASK)
+ {
+ if (is_global_var (maybe_lookup_decl_in_outer_ctx (var,
+ ctx))
+ || is_variable_sized (var)
+ || use_pointer_for_field (var, false))
+ OMP_TASK_EXPLICIT_START (ctx->stmt) = 1;
+ }
goto do_dtor;
break;
@@ -2040,6 +2074,14 @@ lower_rec_input_clauses (tree clauses, t
happens after firstprivate copying in all threads. */
if (copyin_by_ref || lastprivate_firstprivate)
gimplify_and_add (build_omp_barrier (), ilist);
+
+ if (TREE_CODE (ctx->stmt) == OMP_TASK
+ && OMP_TASK_EXPLICIT_START (ctx->stmt))
+ {
+ x = built_in_decls[BUILT_IN_GOMP_TASK_START];
+ x = build_call_expr (x, 0);
+ gimplify_and_add (x, ilist);
+ }
}
@@ -2360,7 +2402,9 @@ lower_send_shared_vars (tree *ilist, tre
mapping for OVAR. */
var = lookup_decl_in_outer_ctx (ovar, ctx);
- if (use_pointer_for_field (ovar, true))
+ if (use_pointer_for_field (ovar, true)
+ /* For tasks copy-out is not possible, so force use of pointer. */
+ || TREE_CODE (ctx->stmt) == OMP_TASK)
{
x = build_sender_ref (ovar, ctx);
var = build_fold_addr_expr (var);
@@ -2555,7 +2599,7 @@ expand_parallel_call (struct omp_region
static void
expand_task_call (basic_block bb, tree entry_stmt)
{
- tree t, t1, t2, untied, cond, c, clauses;
+ tree t, t1, t2, flags, cond, c, clauses;
block_stmt_iterator si;
clauses = OMP_TASK_CLAUSES (entry_stmt);
@@ -2567,7 +2611,9 @@ expand_task_call (basic_block bb, tree e
cond = boolean_true_node;
c = find_omp_clause (clauses, OMP_CLAUSE_UNTIED);
- untied = c ? boolean_true_node : boolean_false_node;
+ flags = build_int_cst (unsigned_type_node,
+ (c ? 1 : 0)
+ | (OMP_TASK_EXPLICIT_START (entry_stmt) ? 2 : 0));
si = bsi_last (bb);
t = OMP_TASK_DATA_ARG (entry_stmt);
@@ -2578,7 +2624,7 @@ expand_task_call (basic_block bb, tree e
t2 = build_fold_addr_expr (OMP_TASK_FN (entry_stmt));
t = build_call_expr (built_in_decls[BUILT_IN_GOMP_TASK], 4, t2, t1,
- cond, untied);
+ cond, flags);
force_gimple_operand_bsi (&si, t, true, NULL_TREE,
false, BSI_CONTINUE_LINKING);
@@ -5463,7 +5509,9 @@ lower_omp_1 (tree *tp, int *walk_subtree
break;
case VAR_DECL:
- if (ctx && DECL_HAS_VALUE_EXPR_P (t))
+ if ((ctx && DECL_HAS_VALUE_EXPR_P (t))
+ || (task_shared_vars
+ && bitmap_bit_p (task_shared_vars, DECL_UID (t))))
{
lower_regimplify (&t, wi);
if (wi->val_only)
@@ -5478,7 +5526,7 @@ lower_omp_1 (tree *tp, int *walk_subtree
break;
case ADDR_EXPR:
- if (ctx)
+ if (ctx || task_shared_vars)
lower_regimplify (tp, wi);
break;
@@ -5488,12 +5536,12 @@ lower_omp_1 (tree *tp, int *walk_subtree
case IMAGPART_EXPR:
case COMPONENT_REF:
case VIEW_CONVERT_EXPR:
- if (ctx)
+ if (ctx || task_shared_vars)
lower_regimplify (tp, wi);
break;
case INDIRECT_REF:
- if (ctx)
+ if (ctx || task_shared_vars)
{
wi->is_lhs = false;
wi->val_only = true;
@@ -5536,13 +5584,20 @@ execute_lower_omp (void)
gcc_assert (taskreg_nesting_level == 0);
if (all_contexts->root)
- lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
+ {
+ if (task_shared_vars)
+ push_gimplify_context ();
+ lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
+ if (task_shared_vars)
+ pop_gimplify_context (NULL);
+ }
if (all_contexts)
{
splay_tree_delete (all_contexts);
all_contexts = NULL;
}
+ BITMAP_FREE (task_shared_vars);
return 0;
}
--- gcc/builtin-types.def (revision 132481)
+++ gcc/builtin-types.def (working copy)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+/* Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
Free Software Foundation, Inc.
This file is part of GCC.
@@ -393,8 +393,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PT
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR,
BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR)
-DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_BOOL_BOOL, BT_VOID, BT_PTR_FN_VOID_PTR,
- BT_PTR, BT_BOOL, BT_BOOL)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_BOOL_UINT, BT_VOID, BT_PTR_FN_VOID_PTR,
+ BT_PTR, BT_BOOL, BT_UINT)
DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING,
--- gcc/fortran/types.def (revision 132481)
+++ gcc/fortran/types.def (working copy)
@@ -117,8 +117,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PT
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR,
BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR)
-DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_BOOL_BOOL, BT_VOID,
- BT_PTR_FN_VOID_PTR, BT_PTR, BT_BOOL, BT_BOOL)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_BOOL_UINT, BT_VOID,
+ BT_PTR_FN_VOID_PTR, BT_PTR, BT_BOOL, BT_UINT)
DEF_FUNCTION_TYPE_5 (BT_FN_BOOL_LONG_LONG_LONG_LONGPTR_LONGPTR,
BT_BOOL, BT_LONG, BT_LONG, BT_LONG,
--- gcc/omp-builtins.def (revision 132481)
+++ gcc/omp-builtins.def (working copy)
@@ -1,6 +1,6 @@
/* This file contains the definitions and documentation for the
OpenMP builtins used in the GNU compiler.
- Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
This file is part of GCC.
@@ -37,6 +37,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_BARRIER,
BT_FN_VOID, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKWAIT, "GOMP_taskwait",
BT_FN_VOID, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASK_START, "GOMP_task_start",
+ BT_FN_VOID, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_CRITICAL_START, "GOMP_critical_start",
BT_FN_VOID, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_CRITICAL_END, "GOMP_critical_end",
@@ -151,7 +153,7 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_END, "GOMP_parallel_end",
BT_FN_VOID, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASK, "GOMP_task",
- BT_FN_VOID_OMPFN_PTR_BOOL_BOOL, ATTR_NOTHROW_LIST)
+ BT_FN_VOID_OMPFN_PTR_BOOL_UINT, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_START, "GOMP_sections_start",
BT_FN_UINT_UINT, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_NEXT, "GOMP_sections_next",
Jakub