This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [gomp-nvptx 9/9] adjust SIMD loop lowering for SIMT targets


Apologies -- last-minute attempt to cleanup and enhance broke this patch;
fixed version below.  The main difference is checking whether we're
transforming a loop that might be executed on the target: checking
decl->offloadable isn't enough, because target region outlining might not have
happened yet; in that case, we need to walk the region tree upwards to check
if any containing region is a target region.

Alexander

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index a3c4a90..3189e96 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -142,6 +142,28 @@ expand_ANNOTATE (gcall *)
   gcc_unreachable ();
 }
 
+/* Lane index on SIMT targets: thread index in the warp on NVPTX.  On targets
+   without SIMT execution this should be expanded in omp_device_lower pass.  */
+
+static void
+expand_GOMP_SIMT_LANE (gcall *stmt)
+{
+  tree lhs = gimple_call_lhs (stmt);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  /* FIXME: use a separate pattern for OpenMP?  */
+  gcc_assert (targetm.have_oacc_dim_pos ());
+  emit_insn (targetm.gen_oacc_dim_pos (target, const2_rtx));
+}
+
+/* This should get expanded in omp_device_lower pass.  */
+
+static void
+expand_GOMP_SIMT_VF (gcall *)
+{
+  gcc_unreachable ();
+}
+
 /* This should get expanded in adjust_simduid_builtins.  */
 
 static void
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1cb14a8..66c7422 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.  If not see
 
 DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF, NULL)
 DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index cc0435e..0478b2a 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -10173,7 +10173,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
 				  OMP_CLAUSE_SAFELEN);
   tree simduid = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
 				  OMP_CLAUSE__SIMDUID_);
-  tree n1, n2;
+  tree n1, n2, step, simt_lane;
 
   type = TREE_TYPE (fd->loop.v);
   entry_bb = region->entry;
@@ -10218,12 +10218,36 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
 
   n1 = fd->loop.n1;
   n2 = fd->loop.n2;
+  step = fd->loop.step;
+  bool offloaded = cgraph_node::get (current_function_decl)->offloadable;
+  for (struct omp_region *reg = region; !offloaded && reg; reg = reg->outer)
+    offloaded = reg->type == GIMPLE_OMP_TARGET;
+  bool do_simt_transform
+    = offloaded && !broken_loop && !safelen && !simduid && !(fd->collapse > 1);
+  if (do_simt_transform)
+    {
+      simt_lane
+	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_LANE,
+					integer_type_node, 0);
+      simt_lane = fold_convert (TREE_TYPE (step), simt_lane);
+      simt_lane = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, simt_lane);
+      cfun->curr_properties &= ~PROP_gimple_lomp_dev;
+    }
+
   if (gimple_omp_for_combined_into_p (fd->for_stmt))
     {
       tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
 				     OMP_CLAUSE__LOOPTEMP_);
       gcc_assert (innerc);
       n1 = OMP_CLAUSE_DECL (innerc);
+      if (do_simt_transform)
+	{
+	  n1 = fold_convert (type, n1);
+	  if (POINTER_TYPE_P (type))
+	    n1 = fold_build_pointer_plus (n1, simt_lane);
+	  else
+	    n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, simt_lane));
+	}
       innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
 				OMP_CLAUSE__LOOPTEMP_);
       gcc_assert (innerc);
@@ -10239,8 +10263,15 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
     }
   else
     {
-      expand_omp_build_assign (&gsi, fd->loop.v,
-			       fold_convert (type, fd->loop.n1));
+      if (do_simt_transform)
+	{
+	  n1 = fold_convert (type, n1);
+	  if (POINTER_TYPE_P (type))
+	    n1 = fold_build_pointer_plus (n1, simt_lane);
+	  else
+	    n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, simt_lane));
+	}
+      expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
       if (fd->collapse > 1)
 	for (i = 0; i < fd->collapse; i++)
 	  {
@@ -10262,10 +10293,18 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
       stmt = gsi_stmt (gsi);
       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
 
+      if (do_simt_transform)
+	{
+	  tree simt_vf
+	    = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
+					    integer_type_node, 0);
+	  simt_vf = fold_convert (TREE_TYPE (step), simt_vf);
+	  step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, simt_vf);
+	}
       if (POINTER_TYPE_P (type))
-	t = fold_build_pointer_plus (fd->loop.v, fd->loop.step);
+	t = fold_build_pointer_plus (fd->loop.v, step);
       else
-	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, fd->loop.step);
+	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
       expand_omp_build_assign (&gsi, fd->loop.v, t);
 
       if (fd->collapse > 1)
@@ -12960,7 +12999,6 @@ expand_omp (struct omp_region *region)
     }
 }
 
-
 /* Helper for build_omp_regions.  Scan the dominator tree starting at
    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
    true, the function ends once a single tree is built (otherwise, whole
@@ -16235,7 +16273,7 @@ const pass_data pass_data_lower_omp =
   OPTGROUP_NONE, /* optinfo_flags */
   TV_NONE, /* tv_id */
   PROP_gimple_any, /* properties_required */
-  PROP_gimple_lomp, /* properties_provided */
+  PROP_gimple_lomp | PROP_gimple_lomp_dev, /* properties_provided */
   0, /* properties_destroyed */
   0, /* todo_flags_start */
   0, /* todo_flags_finish */
@@ -19470,5 +19508,90 @@ make_pass_oacc_device_lower (gcc::context *ctxt)
 {
   return new pass_oacc_device_lower (ctxt);
 }
+
+
+/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
+   VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
+   LANE is kept to be expanded to RTL later on.  */
+
+static unsigned int
+execute_omp_device_lower ()
+{
+  int vf = 1;
+  if (targetm.simt.vf)
+    vf = targetm.simt.vf ();
+  tree vf_tree = build_int_cst (integer_type_node, vf);
+  basic_block bb;
+  gimple_stmt_iterator gsi;
+  FOR_EACH_BB_FN (bb, cfun)
+    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+	gimple *stmt = gsi_stmt (gsi);
+	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
+	  continue;
+	tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
+	switch (gimple_call_internal_fn (stmt))
+	  {
+	  case IFN_GOMP_SIMT_LANE:
+	    rhs = vf == 1 ? integer_zero_node : NULL_TREE;
+	    break;
+	  case IFN_GOMP_SIMT_VF:
+	    rhs = vf_tree;
+	    break;
+	  default:
+	    break;
+	  }
+	if (!rhs)
+	  continue;
+	stmt = gimple_build_assign (lhs, rhs);
+	gsi_replace (&gsi, stmt, false);
+      }
+  if (vf != 1)
+    cfun->has_force_vectorize_loops = false;
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_omp_device_lower =
+{
+  GIMPLE_PASS, /* type */
+  "ompdevlow", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg, /* properties_required */
+  PROP_gimple_lomp_dev, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_omp_device_lower : public gimple_opt_pass
+{
+public:
+  pass_omp_device_lower (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *fun)
+    {
+      /* FIXME: inlining does not propagate the lomp_dev property.  */
+      return 1 || !(fun->curr_properties & PROP_gimple_lomp_dev);
+    }
+  virtual unsigned int execute (function *)
+    {
+      return execute_omp_device_lower ();
+    }
+
+}; // class pass_expand_omp_ssa
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_omp_device_lower (gcc::context *ctxt)
+{
+  return new pass_omp_device_lower (ctxt);
+}
 
 #include "gt-omp-low.h"
diff --git a/gcc/passes.def b/gcc/passes.def
index c0ab6b9..ec049f8 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -151,6 +151,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_fixup_cfg);
   NEXT_PASS (pass_lower_eh_dispatch);
   NEXT_PASS (pass_oacc_device_lower);
+  NEXT_PASS (pass_omp_device_lower);
   NEXT_PASS (pass_all_optimizations);
   PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations)
       NEXT_PASS (pass_remove_cgraph_callee_edges);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 49e22a9..71b2561 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -226,6 +226,7 @@ protected:
 						   of math functions; the
 						   current choices have
 						   been optimized.  */
+#define PROP_gimple_lomp_dev	(1 << 16)	/* done omp_device_lower */
 
 #define PROP_trees \
   (PROP_gimple_any | PROP_gimple_lcf | PROP_gimple_leh | PROP_gimple_lomp)
@@ -414,6 +415,7 @@ extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_oacc_device_lower (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_omp_device_lower (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]