This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [gomp-nvptx 9/9] adjust SIMD loop lowering for SIMT targets
- From: Alexander Monakov <amonakov at ispras dot ru>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Jakub Jelinek <jakub at redhat dot com>, Bernd Schmidt <bschmidt at redhat dot com>, Dmitry Melnik <dm at ispras dot ru>
- Date: Wed, 2 Dec 2015 01:40:23 +0300 (MSK)
- Subject: Re: [gomp-nvptx 9/9] adjust SIMD loop lowering for SIMT targets
- Authentication-results: sourceware.org; auth=none
- References: <1448983707-18854-1-git-send-email-amonakov at ispras dot ru> <1448983707-18854-10-git-send-email-amonakov at ispras dot ru>
Apologies -- last-minute attempt to cleanup and enhance broke this patch;
fixed version below. The main difference is checking whether we're
transforming a loop that might be executed on the target: checking
decl->offloadable isn't enough, because target region outlining might not have
happened yet; in that case, we need to walk the region tree upwards to check
if any containing region is a target region.
Alexander
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index a3c4a90..3189e96 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -142,6 +142,28 @@ expand_ANNOTATE (gcall *)
gcc_unreachable ();
}
+/* Lane index on SIMT targets: thread index in the warp on NVPTX. On targets
+ without SIMT execution this should be expanded in omp_device_lower pass. */
+
+static void
+expand_GOMP_SIMT_LANE (gcall *stmt)
+{
+ tree lhs = gimple_call_lhs (stmt);
+
+ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ /* FIXME: use a separate pattern for OpenMP? */
+ gcc_assert (targetm.have_oacc_dim_pos ());
+ emit_insn (targetm.gen_oacc_dim_pos (target, const2_rtx));
+}
+
+/* This should get expanded in omp_device_lower pass. */
+
+static void
+expand_GOMP_SIMT_VF (gcall *)
+{
+ gcc_unreachable ();
+}
+
/* This should get expanded in adjust_simduid_builtins. */
static void
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1cb14a8..66c7422 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3. If not see
DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF, NULL)
DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index cc0435e..0478b2a 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -10173,7 +10173,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
OMP_CLAUSE_SAFELEN);
tree simduid = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
OMP_CLAUSE__SIMDUID_);
- tree n1, n2;
+ tree n1, n2, step, simt_lane;
type = TREE_TYPE (fd->loop.v);
entry_bb = region->entry;
@@ -10218,12 +10218,36 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
n1 = fd->loop.n1;
n2 = fd->loop.n2;
+ step = fd->loop.step;
+ bool offloaded = cgraph_node::get (current_function_decl)->offloadable;
+ for (struct omp_region *reg = region; !offloaded && reg; reg = reg->outer)
+ offloaded = reg->type == GIMPLE_OMP_TARGET;
+ bool do_simt_transform
+ = offloaded && !broken_loop && !safelen && !simduid && !(fd->collapse > 1);
+ if (do_simt_transform)
+ {
+ simt_lane
+ = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_LANE,
+ integer_type_node, 0);
+ simt_lane = fold_convert (TREE_TYPE (step), simt_lane);
+ simt_lane = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, simt_lane);
+ cfun->curr_properties &= ~PROP_gimple_lomp_dev;
+ }
+
if (gimple_omp_for_combined_into_p (fd->for_stmt))
{
tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
n1 = OMP_CLAUSE_DECL (innerc);
+ if (do_simt_transform)
+ {
+ n1 = fold_convert (type, n1);
+ if (POINTER_TYPE_P (type))
+ n1 = fold_build_pointer_plus (n1, simt_lane);
+ else
+ n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, simt_lane));
+ }
innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
@@ -10239,8 +10263,15 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
}
else
{
- expand_omp_build_assign (&gsi, fd->loop.v,
- fold_convert (type, fd->loop.n1));
+ if (do_simt_transform)
+ {
+ n1 = fold_convert (type, n1);
+ if (POINTER_TYPE_P (type))
+ n1 = fold_build_pointer_plus (n1, simt_lane);
+ else
+ n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, simt_lane));
+ }
+ expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
if (fd->collapse > 1)
for (i = 0; i < fd->collapse; i++)
{
@@ -10262,10 +10293,18 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
stmt = gsi_stmt (gsi);
gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
+ if (do_simt_transform)
+ {
+ tree simt_vf
+ = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
+ integer_type_node, 0);
+ simt_vf = fold_convert (TREE_TYPE (step), simt_vf);
+ step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, simt_vf);
+ }
if (POINTER_TYPE_P (type))
- t = fold_build_pointer_plus (fd->loop.v, fd->loop.step);
+ t = fold_build_pointer_plus (fd->loop.v, step);
else
- t = fold_build2 (PLUS_EXPR, type, fd->loop.v, fd->loop.step);
+ t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
expand_omp_build_assign (&gsi, fd->loop.v, t);
if (fd->collapse > 1)
@@ -12960,7 +12999,6 @@ expand_omp (struct omp_region *region)
}
}
-
/* Helper for build_omp_regions. Scan the dominator tree starting at
block BB. PARENT is the region that contains BB. If SINGLE_TREE is
true, the function ends once a single tree is built (otherwise, whole
@@ -16235,7 +16273,7 @@ const pass_data pass_data_lower_omp =
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
PROP_gimple_any, /* properties_required */
- PROP_gimple_lomp, /* properties_provided */
+ PROP_gimple_lomp | PROP_gimple_lomp_dev, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
@@ -19470,5 +19508,90 @@ make_pass_oacc_device_lower (gcc::context *ctxt)
{
return new pass_oacc_device_lower (ctxt);
}
+
+
+/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
+ VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
+ LANE is kept to be expanded to RTL later on. */
+
+static unsigned int
+execute_omp_device_lower ()
+{
+ int vf = 1;
+ if (targetm.simt.vf)
+ vf = targetm.simt.vf ();
+ tree vf_tree = build_int_cst (integer_type_node, vf);
+ basic_block bb;
+ gimple_stmt_iterator gsi;
+ FOR_EACH_BB_FN (bb, cfun)
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
+ continue;
+ tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
+ switch (gimple_call_internal_fn (stmt))
+ {
+ case IFN_GOMP_SIMT_LANE:
+ rhs = vf == 1 ? integer_zero_node : NULL_TREE;
+ break;
+ case IFN_GOMP_SIMT_VF:
+ rhs = vf_tree;
+ break;
+ default:
+ break;
+ }
+ if (!rhs)
+ continue;
+ stmt = gimple_build_assign (lhs, rhs);
+ gsi_replace (&gsi, stmt, false);
+ }
+ if (vf != 1)
+ cfun->has_force_vectorize_loops = false;
+ return 0;
+}
+
+namespace {
+
+const pass_data pass_data_omp_device_lower =
+{
+ GIMPLE_PASS, /* type */
+ "ompdevlow", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_cfg, /* properties_required */
+ PROP_gimple_lomp_dev, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_omp_device_lower : public gimple_opt_pass
+{
+public:
+ pass_omp_device_lower (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *fun)
+ {
+ /* FIXME: inlining does not propagate the lomp_dev property. */
+ return 1 || !(fun->curr_properties & PROP_gimple_lomp_dev);
+ }
+ virtual unsigned int execute (function *)
+ {
+ return execute_omp_device_lower ();
+ }
+
+}; // class pass_expand_omp_ssa
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_omp_device_lower (gcc::context *ctxt)
+{
+ return new pass_omp_device_lower (ctxt);
+}
#include "gt-omp-low.h"
diff --git a/gcc/passes.def b/gcc/passes.def
index c0ab6b9..ec049f8 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -151,6 +151,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_fixup_cfg);
NEXT_PASS (pass_lower_eh_dispatch);
NEXT_PASS (pass_oacc_device_lower);
+ NEXT_PASS (pass_omp_device_lower);
NEXT_PASS (pass_all_optimizations);
PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations)
NEXT_PASS (pass_remove_cgraph_callee_edges);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 49e22a9..71b2561 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -226,6 +226,7 @@ protected:
of math functions; the
current choices have
been optimized. */
+#define PROP_gimple_lomp_dev (1 << 16) /* done omp_device_lower */
#define PROP_trees \
(PROP_gimple_any | PROP_gimple_lcf | PROP_gimple_leh | PROP_gimple_lomp)
@@ -414,6 +415,7 @@ extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_oacc_device_lower (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_omp_device_lower (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt);