This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 5/5] omp-low: implement SIMT privatization


This patch adjusts privatization in OpenMP SIMD loops lowered for SIMT targets.
Addressable private variables become fields of new '.omp_simt' structure that
is allocated by a call to GOMP_SIMT_ENTER ().  This function is similar to
__builtin_alloca_with_align, except that it obtains per-SIMT-lane storage and
implicitly performs target-specific actions; on NVPTX that means a transition
to per-lane softstacks and inverting the uniform-simt mask.


	* internal-fn.c (expand_GOMP_SIMT_ENTER): New.
        (expand_GOMP_SIMT_EXIT): New.
        * internal-fn.def (GOMP_SIMT_ENTER): New internal function.
        (GOMP_SIMT_EXIT): Ditto.
        * target-insns.def (omp_simt_enter): New insn.
        (omp_simt_exit): Ditto.
        * omp-low.c (struct omplow_simd_context): New fields simtrec,
        simt_ilist.
        (lower_rec_simd_input_clauses): Implement SIMT privatization.
        (lower_rec_input_clauses): Likewise.
        (lower_lastprivate_clauses): Handle SIMT privatization.

---
 gcc/internal-fn.c    |  34 +++++++++++++
 gcc/internal-fn.def  |   2 +
 gcc/omp-low.c        | 136 ++++++++++++++++++++++++++++++++++++++++-----------
 gcc/target-insns.def |   2 +
 4 files changed, 145 insertions(+), 29 deletions(-)

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index b1dbc98..bc94a3d 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -166,6 +166,40 @@ expand_GOMP_USE_SIMT (internal_fn, gcall *)
   gcc_unreachable ();
 }
 
+/* Allocate per-lane storage and begin non-uniform execution region.  */
+
+static void
+expand_GOMP_SIMT_ENTER (internal_fn, gcall *stmt)
+{
+  rtx target;
+  tree lhs = gimple_call_lhs (stmt);
+  if (lhs)
+    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  else
+    target = gen_reg_rtx (Pmode);
+  rtx size = expand_normal (gimple_call_arg (stmt, 0));
+  rtx align = expand_normal (gimple_call_arg (stmt, 1));
+  struct expand_operand ops[3];
+  create_output_operand (&ops[0], target, Pmode);
+  create_input_operand (&ops[1], size, Pmode);
+  create_input_operand (&ops[2], align, Pmode);
+  gcc_assert (targetm.have_omp_simt_enter ());
+  expand_insn (targetm.code_for_omp_simt_enter, 3, ops);
+}
+
+/* Deallocate per-lane storage and leave non-uniform execution region.  */
+
+static void
+expand_GOMP_SIMT_EXIT (internal_fn, gcall *stmt)
+{
+  gcc_checking_assert (!gimple_call_lhs (stmt));
+  rtx arg = expand_normal (gimple_call_arg (stmt, 0));
+  struct expand_operand ops[1];
+  create_input_operand (&ops[0], arg, Pmode);
+  gcc_assert (targetm.have_omp_simt_exit ());
+  expand_insn (targetm.code_for_omp_simt_exit, 1, ops);
+}
+
 /* Lane index on SIMT targets: thread index in the warp on NVPTX.  On targets
    without SIMT execution this should be expanded in omp_device_lower pass.  */
 
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 9a03e17..c3dbb02 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -142,6 +142,8 @@ DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary)
 DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary)
 
 DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index a5f8bf65..499afce 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -3452,6 +3452,8 @@ omp_clause_aligned_alignment (tree clause)
 struct omplow_simd_context {
   tree idx;
   tree lane;
+  tree simtrec;
+  gimple_seq simt_ilist;
   int max_vf;
   bool is_simt;
 };
@@ -3488,18 +3490,48 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
   if (max_vf == 1)
     return false;
 
-  tree atype = build_array_type_nelts (TREE_TYPE (new_var), max_vf);
-  tree avar = create_tmp_var_raw (atype);
-  if (TREE_ADDRESSABLE (new_var))
-    TREE_ADDRESSABLE (avar) = 1;
-  DECL_ATTRIBUTES (avar)
-    = tree_cons (get_identifier ("omp simd array"), NULL,
-		 DECL_ATTRIBUTES (avar));
-  gimple_add_tmp_var (avar);
-  ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
-		 NULL_TREE, NULL_TREE);
-  lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
-		 NULL_TREE, NULL_TREE);
+  if (sctx->is_simt)
+    {
+      if (is_gimple_reg (new_var))
+	{
+	  ivar = lvar = new_var;
+	  return true;
+	}
+      tree field = build_decl (DECL_SOURCE_LOCATION (new_var), FIELD_DECL,
+			       DECL_NAME (new_var), TREE_TYPE (new_var));
+      SET_DECL_ALIGN (field, DECL_ALIGN (new_var));
+      DECL_USER_ALIGN (field) = DECL_USER_ALIGN (new_var);
+      TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (new_var);
+      tree rectype = TREE_TYPE (TREE_TYPE (sctx->simtrec));
+      insert_field_into_struct (rectype, field);
+
+      tree ptr = create_tmp_var (build_pointer_type (TREE_TYPE (new_var)));
+      DECL_ATTRIBUTES (ptr)
+	= tree_cons (get_identifier ("omp simt ref"), NULL,
+		     DECL_ATTRIBUTES (ptr));
+      ivar = lvar = build1 (INDIRECT_REF, TREE_TYPE (new_var), ptr);
+
+      tree t = build1 (INDIRECT_REF, rectype, sctx->simtrec);
+      t = omp_build_component_ref (t, field);
+      t = build1 (ADDR_EXPR, TREE_TYPE (ptr), t);
+      gimple *g = gimple_build_assign (ptr, t);
+      gimple_seq_add_stmt (&sctx->simt_ilist, g);
+    }
+  else
+    {
+      tree atype = build_array_type_nelts (TREE_TYPE (new_var), max_vf);
+      tree avar = create_tmp_var_raw (atype);
+      if (TREE_ADDRESSABLE (new_var))
+	TREE_ADDRESSABLE (avar) = 1;
+      DECL_ATTRIBUTES (avar)
+	= tree_cons (get_identifier ("omp simd array"), NULL,
+		     DECL_ATTRIBUTES (avar));
+      gimple_add_tmp_var (avar);
+      ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
+		     NULL_TREE, NULL_TREE);
+      lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
+		     NULL_TREE, NULL_TREE);
+    }
   if (DECL_P (new_var))
     {
       SET_DECL_VALUE_EXPR (new_var, lvar);
@@ -3577,6 +3609,16 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 	  continue;
 	}
 
+  /* Prepare the structure used for SIMT privatization.  */
+  if (sctx.is_simt && sctx.max_vf != 1)
+    {
+      tree type = lang_hooks.types.make_type (RECORD_TYPE);
+      TYPE_ARTIFICIAL (type) = TYPE_NAMELESS (type) = 1;
+      TREE_ADDRESSABLE (type) = 1;
+      type = build_pointer_type (type);
+      sctx.simtrec = create_tmp_var (type, ".omp_simt");
+    }
+
   /* Do all the fixed sized types in the first pass, and the variable sized
      types in the second pass.  This makes sure that the scalar arguments to
      the variable sized types are processed before we use them in the
@@ -4464,6 +4506,26 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 	}
     }
 
+  /* Emit GOMP_SIMT_ENTER () to enter non-uniform execution and allocate
+     privatized data.  Initialize pointers to privatized instances.  */
+  if (sctx.is_simt && sctx.max_vf != 1)
+    {
+      tree rectype = TREE_TYPE (TREE_TYPE (sctx.simtrec));
+      layout_type (rectype);
+      tree size = TYPE_SIZE_UNIT (rectype);
+      tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
+      gimple *g
+	= gimple_build_call_internal (IFN_GOMP_SIMT_ENTER, 2, size, align);
+      gimple_call_set_lhs (g, sctx.simtrec);
+      gimple_seq seq = NULL;
+      gimple_seq_add_stmt (&seq, g);
+      gimple_stmt_iterator gsi;
+      for (gsi = gsi_start (sctx.simt_ilist); !gsi_end_p (gsi); gsi_next (&gsi))
+	gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
+      gimple_seq_add_seq (&seq, sctx.simt_ilist);
+      gimple_seq_add_seq (&seq, *ilist);
+      *ilist = seq;
+    }
   if (sctx.lane)
     {
       tree uid = create_tmp_var (ptr_type_node, "simduid");
@@ -4548,6 +4610,17 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 	    gimple_seq_add_stmt (seq, gimple_build_label (end));
 	  }
     }
+  if (sctx.is_simt && sctx.max_vf != 1)
+    {
+      tree rectype = TREE_TYPE (TREE_TYPE (sctx.simtrec));
+      tree clobber = build_constructor (rectype, NULL);
+      TREE_THIS_VOLATILE (clobber) = 1;
+      gimplify_assign (build1 (INDIRECT_REF, rectype, sctx.simtrec), clobber,
+		       dlist);
+      gimple *g
+	= gimple_build_call_internal (IFN_GOMP_SIMT_EXIT, 1, sctx.simtrec);
+      gimple_seq_add_stmt (dlist, g);
+    }
 
   /* The copyin sequence is not to be executed by the main thread, since
      that would result in self-copies.  Perhaps not visible to scalars,
@@ -4718,7 +4791,8 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
 	  if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
 	    {
 	      tree val = DECL_VALUE_EXPR (new_var);
-	      if (TREE_CODE (val) == ARRAY_REF
+	      if (!maybe_simt
+		  && TREE_CODE (val) == ARRAY_REF
 		  && VAR_P (TREE_OPERAND (val, 0))
 		  && lookup_attribute ("omp simd array",
 				       DECL_ATTRIBUTES (TREE_OPERAND (val,
@@ -4737,24 +4811,28 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
 		  new_var = build4 (ARRAY_REF, TREE_TYPE (val),
 				    TREE_OPERAND (val, 0), lastlane,
 				    NULL_TREE, NULL_TREE);
-		  if (maybe_simt)
+		}
+	      else if (maybe_simt
+		       && TREE_CODE (val) == INDIRECT_REF
+		       && VAR_P (TREE_OPERAND (val, 0))
+		       && lookup_attribute ("omp simt ref",
+					    DECL_ATTRIBUTES (TREE_OPERAND (val,
+									   0))))
+		{
+		  if (simtlast == NULL)
 		    {
-		      gcall *g;
-		      if (simtlast == NULL)
-			{
-			  simtlast = create_tmp_var (unsigned_type_node);
-			  g = gimple_build_call_internal
-			    (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
-			  gimple_call_set_lhs (g, simtlast);
-			  gimple_seq_add_stmt (stmt_list, g);
-			}
-		      x = build_call_expr_internal_loc
-			(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
-			 TREE_TYPE (new_var), 2, new_var, simtlast);
-		      new_var = unshare_expr (new_var);
-		      gimplify_assign (new_var, x, stmt_list);
-		      new_var = unshare_expr (new_var);
+		      simtlast = create_tmp_var (unsigned_type_node);
+		      gcall *g = gimple_build_call_internal
+			(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
+		      gimple_call_set_lhs (g, simtlast);
+		      gimple_seq_add_stmt (stmt_list, g);
 		    }
+		  x = build_call_expr_internal_loc
+		    (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
+		     TREE_TYPE (val), 2, val, simtlast);
+		  new_var = unshare_expr (new_var);
+		  gimplify_assign (new_var, x, stmt_list);
+		  new_var = unshare_expr (new_var);
 		}
 	    }
 
diff --git a/gcc/target-insns.def b/gcc/target-insns.def
index e011a5a..d4d361d 100644
--- a/gcc/target-insns.def
+++ b/gcc/target-insns.def
@@ -68,6 +68,8 @@ DEF_TARGET_INSN (oacc_dim_pos, (rtx x0, rtx x1))
 DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
 DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_enter, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_exit, (rtx x0))
 DEF_TARGET_INSN (omp_simt_lane, (rtx x0))
 DEF_TARGET_INSN (omp_simt_last_lane, (rtx x0, rtx x1))
 DEF_TARGET_INSN (omp_simt_ordered, (rtx x0, rtx x1))
-- 
1.8.3.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]