[gomp4 06/14] omp-low: copy omp_data_o to shared memory on NVPTX

Alexander Monakov amonakov@ispras.ru
Tue Nov 3 14:25:00 GMT 2015


Hello,

Here's an alternative patch that does not depend on exposure of shared-memory
address space, and does not try to use pass_late_lower_omp.  It's based on
Bernd's suggestion to transform

  (use .omp_data_o)
  GOMP_parallel (fn, &omp_data_o, ...);
  .omp_data_o = {CLOBBER};

to

  .omp_data_o_ptr = __internal_omp_alloc_shared (&.omp_data_o, sizeof ...);
  (use (*.omp_data_o_ptr) instead of .omp_data_o)
  GOMP_parallel (fn, .omp_data_o_ptr, ...);
  __internal_omp_free_shared (.omp_data_o_ptr);
  .omp_data_o = {CLOBBER};

Every target except nvptx can lower free_shared to nothing and alloc_shared to
just returning the first argument, and nvptx can select storage in shared
memory or global memory.  For now it simply uses malloc/free.

Sanity-checked by running the libgomp testsuite.  I realize the #ifdef in
internal-fn.c is not appropriate: it's there to make the patch smaller, I'll
replace it with a target hook if otherwise this approach is ok.

Thanks.
Alexander

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index bf0f23e..3145a8d 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -175,6 +175,38 @@ expand_GOMP_SIMD_LAST_LANE (gcall *)
   gcc_unreachable ();
 }
 
+static void
+expand_GOMP_ALLOC_SHARED (gcall *stmt)
+{
+  tree lhs = gimple_call_lhs (stmt);
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+
+  /* XXX PoC only, needs to be a target hook.  */
+#ifdef GCC_NVPTX_H
+  tree fndecl = builtin_decl_explicit (BUILT_IN_MALLOC);
+  tree t = build_call_expr (fndecl, 1, gimple_call_arg (stmt, 1));
+
+  expand_call (t, target, 0);
+#else
+  tree rhs = gimple_call_arg (stmt, 0);
+
+  rtx src = expand_normal (rhs);
+
+  emit_move_insn (target, src);
+#endif
+}
+
+static void
+expand_GOMP_FREE_SHARED (gcall *stmt)
+{
+#ifdef GCC_NVPTX_H
+  tree fndecl = builtin_decl_explicit (BUILT_IN_FREE);
+  tree t = build_call_expr (fndecl, 1, gimple_call_arg (stmt, 0));
+
+  expand_call (t, NULL_RTX, 1);
+#endif
+}
+
 /* This should get expanded in the sanopt pass.  */
 
 static void
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 0db03f1..0c8e76a 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -44,6 +44,8 @@ DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF, NULL)
 DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_ALLOC_SHARED, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_FREE_SHARED, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (LOOP_VECTORIZED, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (MASK_LOAD, ECF_PURE | ECF_LEAF, NULL)
 DEF_INTERNAL_FN (MASK_STORE, ECF_LEAF, NULL)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 696889d..225bf20 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -5870,7 +5870,8 @@ expand_omp_taskreg (struct omp_region *region)
         a function call that has been inlined, the original PARM_DECL
         .OMP_DATA_I may have been converted into a different local
         variable.  In which case, we need to keep the assignment.  */
-      if (gimple_omp_taskreg_data_arg (entry_stmt))
+      tree data_arg = gimple_omp_taskreg_data_arg (entry_stmt);
+      if (data_arg)
        {
          basic_block entry_succ_bb
            = single_succ_p (entry_bb) ? single_succ (entry_bb)
@@ -5894,9 +5895,10 @@ expand_omp_taskreg (struct omp_region *region)
                  /* We're ignore the subcode because we're
                     effectively doing a STRIP_NOPS.  */
 
-                 if (TREE_CODE (arg) == ADDR_EXPR
-                     && TREE_OPERAND (arg, 0)
-                       == gimple_omp_taskreg_data_arg (entry_stmt))
+                 if ((TREE_CODE (arg) == ADDR_EXPR
+                      && TREE_OPERAND (arg, 0) == data_arg)
+                     || (TREE_CODE (data_arg) == INDIRECT_REF
+                         && TREE_OPERAND (data_arg, 0) == arg))
                    {
                      parcopy_stmt = stmt;
                      break;
@@ -11835,27 +11837,44 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   record_vars_into (ctx->block_vars, child_fn);
   record_vars_into (gimple_bind_vars (par_bind), child_fn);
 
+  ilist = NULL;
+  tree sender_decl = NULL_TREE;
+
   if (ctx->record_type)
     {
-      ctx->sender_decl
+      sender_decl
        = create_tmp_var (ctx->srecord_type ? ctx->srecord_type
                          : ctx->record_type, ".omp_data_o");
-      DECL_NAMELESS (ctx->sender_decl) = 1;
-      TREE_ADDRESSABLE (ctx->sender_decl) = 1;
+      DECL_NAMELESS (sender_decl) = 1;
+      TREE_ADDRESSABLE (sender_decl) = 1;
+
+      /* Instead of using the automatic variable .omp_data_o directly, build
+         .omp_data_o_ptr = GOMP_ALLOC_SHARED (&.omp_data_o, sizeof .omp_data_o)
+         ... and replace SENDER_DECL with indirect ref *.omp_data_o_ptr.  */
+      tree ae = build_fold_addr_expr (sender_decl);
+      tree sz = TYPE_SIZE_UNIT (TREE_TYPE (sender_decl));
+      gimple g = gimple_build_call_internal (IFN_GOMP_ALLOC_SHARED, 2, ae, sz);
+      gimple_seq_add_stmt (&ilist, g);
+      tree result = create_tmp_var (TREE_TYPE (ae), ".omp_data_o_ptr");
+      gimple_call_set_lhs (g, result);
+      ctx->sender_decl = build_fold_indirect_ref (result);
       gimple_omp_taskreg_set_data_arg (stmt, ctx->sender_decl);
     }
 
   olist = NULL;
-  ilist = NULL;
   lower_send_clauses (clauses, &ilist, &olist, ctx);
   lower_send_shared_vars (&ilist, &olist, ctx);
 
   if (ctx->record_type)
     {
-      tree clobber = build_constructor (TREE_TYPE (ctx->sender_decl), NULL);
+      /* GOMP_FREE_SHARED (.omp_data_o_ptr).  */
+      tree ae = build_fold_addr_expr (ctx->sender_decl);
+      gimple g = gimple_build_call_internal (IFN_GOMP_FREE_SHARED, 1, ae);
+      gimple_seq_add_stmt (&olist, g);
+      /* Clobber the original stack variable.  */
+      tree clobber = build_constructor (TREE_TYPE (sender_decl), NULL);
       TREE_THIS_VOLATILE (clobber) = 1;
-      gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl,
-                                                       clobber));
+      gimple_seq_add_stmt (&olist, gimple_build_assign (sender_decl, clobber));
     }
 
   /* Once all the expansions are done, sequence all the different



More information about the Gcc-patches mailing list