[gomp4 06/14] omp-low: copy omp_data_o to shared memory on NVPTX
Alexander Monakov
amonakov@ispras.ru
Tue Nov 3 14:25:00 GMT 2015
Hello,
Here's an alternative patch that does not depend on exposure of shared-memory
address space, and does not try to use pass_late_lower_omp. It's based on
Bernd's suggestion to transform
(use .omp_data_o)
GOMP_parallel (fn, &omp_data_o, ...);
.omp_data_o = {CLOBBER};
to
.omp_data_o_ptr = __internal_omp_alloc_shared (&.omp_data_o, sizeof ...);
(use (*.omp_data_o_ptr) instead of .omp_data_o)
GOMP_parallel (fn, .omp_data_o_ptr, ...);
__internal_omp_free_shared (.omp_data_o_ptr);
.omp_data_o = {CLOBBER};
Every target except nvptx can lower free_shared to nothing and alloc_shared to
just returning the first argument, and nvptx can select storage in shared
memory or global memory. For now it simply uses malloc/free.
Sanity-checked by running the libgomp testsuite. I realize the #ifdef in
internal-fn.c is not appropriate: it's there to make the patch smaller, I'll
replace it with a target hook if otherwise this approach is ok.
Thanks.
Alexander
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index bf0f23e..3145a8d 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -175,6 +175,38 @@ expand_GOMP_SIMD_LAST_LANE (gcall *)
gcc_unreachable ();
}
+static void
+expand_GOMP_ALLOC_SHARED (gcall *stmt)
+{
+ tree lhs = gimple_call_lhs (stmt);
+ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+
+ /* XXX PoC only, needs to be a target hook. */
+#ifdef GCC_NVPTX_H
+ tree fndecl = builtin_decl_explicit (BUILT_IN_MALLOC);
+ tree t = build_call_expr (fndecl, 1, gimple_call_arg (stmt, 1));
+
+ expand_call (t, target, 0);
+#else
+ tree rhs = gimple_call_arg (stmt, 0);
+
+ rtx src = expand_normal (rhs);
+
+ emit_move_insn (target, src);
+#endif
+}
+
+static void
+expand_GOMP_FREE_SHARED (gcall *stmt)
+{
+#ifdef GCC_NVPTX_H
+ tree fndecl = builtin_decl_explicit (BUILT_IN_FREE);
+ tree t = build_call_expr (fndecl, 1, gimple_call_arg (stmt, 0));
+
+ expand_call (t, NULL_RTX, 1);
+#endif
+}
+
/* This should get expanded in the sanopt pass. */
static void
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 0db03f1..0c8e76a 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -44,6 +44,8 @@ DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_ALLOC_SHARED, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_FREE_SHARED, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (LOOP_VECTORIZED, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (MASK_LOAD, ECF_PURE | ECF_LEAF, NULL)
DEF_INTERNAL_FN (MASK_STORE, ECF_LEAF, NULL)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 696889d..225bf20 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -5870,7 +5870,8 @@ expand_omp_taskreg (struct omp_region *region)
a function call that has been inlined, the original PARM_DECL
.OMP_DATA_I may have been converted into a different local
variable. In which case, we need to keep the assignment. */
- if (gimple_omp_taskreg_data_arg (entry_stmt))
+ tree data_arg = gimple_omp_taskreg_data_arg (entry_stmt);
+ if (data_arg)
{
basic_block entry_succ_bb
= single_succ_p (entry_bb) ? single_succ (entry_bb)
@@ -5894,9 +5895,10 @@ expand_omp_taskreg (struct omp_region *region)
/* We're ignore the subcode because we're
effectively doing a STRIP_NOPS. */
- if (TREE_CODE (arg) == ADDR_EXPR
- && TREE_OPERAND (arg, 0)
- == gimple_omp_taskreg_data_arg (entry_stmt))
+ if ((TREE_CODE (arg) == ADDR_EXPR
+ && TREE_OPERAND (arg, 0) == data_arg)
+ || (TREE_CODE (data_arg) == INDIRECT_REF
+ && TREE_OPERAND (data_arg, 0) == arg))
{
parcopy_stmt = stmt;
break;
@@ -11835,27 +11837,44 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
record_vars_into (ctx->block_vars, child_fn);
record_vars_into (gimple_bind_vars (par_bind), child_fn);
+ ilist = NULL;
+ tree sender_decl = NULL_TREE;
+
if (ctx->record_type)
{
- ctx->sender_decl
+ sender_decl
= create_tmp_var (ctx->srecord_type ? ctx->srecord_type
: ctx->record_type, ".omp_data_o");
- DECL_NAMELESS (ctx->sender_decl) = 1;
- TREE_ADDRESSABLE (ctx->sender_decl) = 1;
+ DECL_NAMELESS (sender_decl) = 1;
+ TREE_ADDRESSABLE (sender_decl) = 1;
+
+ /* Instead of using the automatic variable .omp_data_o directly, build
+ .omp_data_o_ptr = GOMP_ALLOC_SHARED (&.omp_data_o, sizeof .omp_data_o)
+ ... and replace SENDER_DECL with indirect ref *.omp_data_o_ptr. */
+ tree ae = build_fold_addr_expr (sender_decl);
+ tree sz = TYPE_SIZE_UNIT (TREE_TYPE (sender_decl));
+ gimple g = gimple_build_call_internal (IFN_GOMP_ALLOC_SHARED, 2, ae, sz);
+ gimple_seq_add_stmt (&ilist, g);
+ tree result = create_tmp_var (TREE_TYPE (ae), ".omp_data_o_ptr");
+ gimple_call_set_lhs (g, result);
+ ctx->sender_decl = build_fold_indirect_ref (result);
gimple_omp_taskreg_set_data_arg (stmt, ctx->sender_decl);
}
olist = NULL;
- ilist = NULL;
lower_send_clauses (clauses, &ilist, &olist, ctx);
lower_send_shared_vars (&ilist, &olist, ctx);
if (ctx->record_type)
{
- tree clobber = build_constructor (TREE_TYPE (ctx->sender_decl), NULL);
+ /* GOMP_FREE_SHARED (.omp_data_o_ptr). */
+ tree ae = build_fold_addr_expr (ctx->sender_decl);
+ gimple g = gimple_build_call_internal (IFN_GOMP_FREE_SHARED, 1, ae);
+ gimple_seq_add_stmt (&olist, g);
+ /* Clobber the original stack variable. */
+ tree clobber = build_constructor (TREE_TYPE (sender_decl), NULL);
TREE_THIS_VOLATILE (clobber) = 1;
- gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl,
- clobber));
+ gimple_seq_add_stmt (&olist, gimple_build_assign (sender_decl, clobber));
}
/* Once all the expansions are done, sequence all the different
More information about the Gcc-patches
mailing list