This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, 6/16] Add pass_oacc_kernels
- From: Tom de Vries <Tom_deVries at mentor dot com>
- To: Richard Biener <rguenther at suse dot de>
- Cc: "gcc-patches at gnu dot org" <gcc-patches at gnu dot org>, Jakub Jelinek <jakub at redhat dot com>
- Date: Tue, 24 Nov 2015 13:14:53 +0100
- Subject: Re: [PATCH, 6/16] Add pass_oacc_kernels
- Authentication-results: sourceware.org; auth=none
- References: <5640BD31 dot 2060602 at mentor dot com> <5640DA47 dot 2010508 at mentor dot com> <alpine dot LSU dot 2 dot 11 dot 1511111156500 dot 4884 at t29 dot fhfr dot qr> <564DD3C1 dot 8040005 at mentor dot com>
On 19/11/15 14:50, Tom de Vries wrote:
On 11/11/15 11:58, Richard Biener wrote:
On Mon, 9 Nov 2015, Tom de Vries wrote:
On 09/11/15 16:35, Tom de Vries wrote:
Hi,
this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.
The patch series contains these patches:
1 Insert new exit block only when needed in
transform_to_exit_first_loop_alt
2 Make create_parallel_loop return void
3 Ignore reduction clause on kernels directive
4 Implement -foffload-alias
5 Add in_oacc_kernels_region in struct loop
6 Add pass_oacc_kernels
7 Add pass_dominator_oacc_kernels
8 Add pass_ch_oacc_kernels
9 Add pass_parallelize_loops_oacc_kernels
10 Add pass_oacc_kernels pass group in passes.def
11 Update testcases after adding kernels pass group
12 Handle acc loop directive
13 Add c-c++-common/goacc/kernels-*.c
14 Add gfortran.dg/goacc/kernels-*.f95
15 Add libgomp.oacc-c-c++-common/kernels-*.c
16 Add libgomp.oacc-fortran/kernels-*.f95
The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.
Bootstrapped and reg-tested on x86_64.
Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).
I'll post the individual patches in reply to this message.
this patchs add a pass group pass_oacc_kernels (which will be added
to the
pass list as a whole in patch 10).
Just to understand (while also skimming the HSA patches).
You are basically relying on autopar for what the HSA patches call
"gridification"? That is, OMP lowering produces loopy kernels
and autopar then will basically strip the outermost loop?
Short answer: no. In more detail...
<SNIP>
Reposting patch, after splitting the pass group into two.
Thanks,
- TOm
Add pass_oacc_kernels
2015-11-09 Tom de Vries <tom@codesourcery.com>
* tree-pass.h (make_pass_oacc_kernels, make_pass_oacc_kernels2):
Declare.
* tree-ssa-loop.c (gate_oacc_kernels): New static function.
(pass_data_oacc_kernels, pass_data_oacc_kernels2): New pass_data.
(class pass_oacc_kernels, class pass_oacc_kernels2): New pass.
(make_pass_oacc_kernels, make_pass_oacc_kernels2): New function.
---
gcc/tree-pass.h | 2 +
gcc/tree-ssa-loop.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 112 insertions(+)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index dcd2d5e..9704918 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -465,6 +465,8 @@ extern gimple_opt_pass *make_pass_strength_reduction (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_vtable_verify (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_ubsan (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_sanopt (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_oacc_kernels (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_oacc_kernels2 (gcc::context *ctxt);
/* IPA Passes */
extern simple_ipa_opt_pass *make_pass_ipa_lower_emutls (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index afdef12..cf7d94e 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "tree-scalar-evolution.h"
#include "tree-vectorizer.h"
+#include "omp-low.h"
/* A pass making sure loops are fixed up. */
@@ -141,6 +142,115 @@ make_pass_tree_loop (gcc::context *ctxt)
return new pass_tree_loop (ctxt);
}
+/* Gate for oacc kernels pass group. */
+
+static bool
+gate_oacc_kernels (function *fn)
+{
+ if (flag_tree_parallelize_loops <= 1)
+ return false;
+
+ tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
+ if (oacc_function_attr == NULL_TREE)
+ return false;
+
+ tree val = TREE_VALUE (oacc_function_attr);
+ while (val != NULL_TREE && TREE_VALUE (val) == NULL_TREE)
+ val = TREE_CHAIN (val);
+
+ if (val != NULL_TREE)
+ return false;
+
+ struct loop *loop;
+ FOR_EACH_LOOP (loop, 0)
+ if (loop->in_oacc_kernels_region)
+ return true;
+
+ return false;
+}
+
+/* The oacc kernels superpass. */
+
+namespace {
+
+const pass_data pass_data_oacc_kernels =
+{
+ GIMPLE_PASS, /* type */
+ "oacc_kernels", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ TV_TREE_LOOP, /* tv_id */
+ PROP_cfg, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_oacc_kernels : public gimple_opt_pass
+{
+public:
+ pass_oacc_kernels (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_oacc_kernels, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *fn) { return gate_oacc_kernels (fn); }
+
+}; // class pass_oacc_kernels
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_oacc_kernels (gcc::context *ctxt)
+{
+ return new pass_oacc_kernels (ctxt);
+}
+
+namespace {
+
+const pass_data pass_data_oacc_kernels2 =
+{
+ GIMPLE_PASS, /* type */
+ "oacc_kernels2", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ TV_TREE_LOOP, /* tv_id */
+ PROP_cfg, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_oacc_kernels2 : public gimple_opt_pass
+{
+public:
+ pass_oacc_kernels2 (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_oacc_kernels2, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *fn) { return gate_oacc_kernels (fn); }
+ virtual unsigned int execute (function *fn)
+ {
+ /* Rather than having a copy of the previous dump, get some use out of
+ this dump, and try to minimize differences with the following pass
+ (pass_lim), which will initizalize the loop optimizer with
+ LOOPS_NORMAL. */
+ loop_optimizer_init (LOOPS_NORMAL);
+ loop_optimizer_finalize (fn);
+ return 0;
+ }
+
+}; // class pass_oacc_kernels2
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_oacc_kernels2 (gcc::context *ctxt)
+{
+ return new pass_oacc_kernels2 (ctxt);
+}
+
/* The no-loop superpass. */
namespace {