This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [nvptx] vector length patch series
- From: Tom de Vries <tdevries at suse dot de>
- To: "Schwinge, Thomas" <Thomas_Schwinge at mentor dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Sat, 22 Dec 2018 03:13:23 +0100
- Subject: Re: [nvptx] vector length patch series
- References: <d101a77b-fc5f-0396-b8d1-c13f34fd1c38@codesourcery.com> <2ece5d7b-3675-84ab-f255-3c56a2ffd7dc@suse.de> <91b927af-d854-2865-7cbd-9a9a835ab5cc@codesourcery.com> <1394d89c-896e-f6a3-5f9a-78e98b16e85c@suse.de>
On 14-12-18 20:58, Tom de Vries wrote:
> 0003-openacc-Add-target-hook-TARGET_GOACC_ADJUST_PARALLEL.patch
> 0017-nvptx-Enable-large-vectors.patch
1.
If I void nvptx_adjust_parallelism like this:
...
static unsigned
nvptx_adjust_parallelism (unsigned inner_mask, unsigned outer_mask)
{
return default_goacc_adjust_parallelism (inner_mask, outer_mask);
}
...
I don't run into any failing tests. From what I can tell, the only
test-case that the proposed implementation of the hook has an effect on,
is the worker vector loop in vred2d-128.c, but that one is passing.
Can you confirm that this hook is in fact needed? Does this test fail on
a specific card? Or is there another test-case that exercises this?
2.
If you have a test-case where this is indeed failing without the
proposed hook implementation, then please try to remove the hardcoding
of vector_length > 32 from the test-source and instead set it using
-fopenacc-dim. AFAIU, the proposed hook does not handle that case, so
you should be able to make it fail.
If so, can you test whether attached implementation fixes it?
Thanks,
- Tom
[nvptx] Add nvptx_adjust_parallelism
2018-12-17 Tom de Vries <tdevries@suse.de>
* config/nvptx/nvptx.c (nvptx_adjust_parallelism): New function.
(TARGET_GOACC_ADJUST_PARALLELISM): Define.
---
gcc/config/nvptx/nvptx.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
gcc/omp-offload.c | 7 ++++++
gcc/omp-offload.h | 1 +
3 files changed, 63 insertions(+)
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index f4095ff5f55..90bbc5b251e 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -5314,6 +5314,58 @@ nvptx_dim_limit (int axis)
return 0;
}
+/* This is a copy of oacc_validate_dims from omp-offload.c that does not update
+ the function attributes. */
+
+static void
+oacc_validate_dims_no_update (tree fn, tree attrs, int *dims, int level,
+ unsigned used)
+{
+ tree purpose[GOMP_DIM_MAX];
+ unsigned ix;
+ tree pos = TREE_VALUE (attrs);
+
+ gcc_assert (pos);
+
+ for (ix = 0; ix != GOMP_DIM_MAX; ix++)
+ {
+ purpose[ix] = TREE_PURPOSE (pos);
+ tree val = TREE_VALUE (pos);
+ dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
+ pos = TREE_CHAIN (pos);
+ }
+
+ targetm.goacc.validate_dims (fn, dims, level);
+
+ for (ix = 0; ix != GOMP_DIM_MAX; ix++)
+ if (dims[ix] < 0)
+ dims[ix] = (used & GOMP_DIM_MASK (ix)
+ ? oacc_get_default_dim (ix) : oacc_get_min_dim (ix));
+}
+
+/* Adjust the parallelism available to a loop given vector_length
+ associated with the offloaded function. */
+
+static unsigned
+nvptx_adjust_parallelism (unsigned inner_mask, unsigned outer_mask)
+{
+ bool wv = ((inner_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
+ && (inner_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
+ if (!wv)
+ return default_goacc_adjust_parallelism (inner_mask, outer_mask);
+
+ int dims[GOMP_DIM_MAX];
+ tree attrs = oacc_get_fn_attrib (current_function_decl);
+ int fn_level = oacc_fn_attrib_level (attrs);
+ oacc_validate_dims_no_update (current_function_decl, attrs, dims, fn_level,
+ inner_mask);
+
+ if (dims[GOMP_DIM_VECTOR] > PTX_WARP_SIZE)
+ inner_mask &= ~GOMP_DIM_MASK (GOMP_DIM_WORKER);
+
+ return default_goacc_adjust_parallelism (inner_mask, outer_mask);
+}
+
/* Determine whether fork & joins are needed. */
static bool
@@ -6109,6 +6161,9 @@ nvptx_set_current_function (tree fndecl)
#undef TARGET_GOACC_DIM_LIMIT
#define TARGET_GOACC_DIM_LIMIT nvptx_dim_limit
+#undef TARGET_GOACC_ADJUST_PARALLELISM
+#define TARGET_GOACC_ADJUST_PARALLELISM nvptx_adjust_parallelism
+
#undef TARGET_GOACC_FORK_JOIN
#define TARGET_GOACC_FORK_JOIN nvptx_goacc_fork_join
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 3338e0633a1..80ecda82d24 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -580,6 +580,13 @@ oacc_get_default_dim (int dim)
return oacc_default_dims[dim];
}
+int
+oacc_get_min_dim (int dim)
+{
+ gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
+ return oacc_min_dims[dim];
+}
+
/* Parse the default dimension parameter. This is a set of
:-separated optional compute dimensions. Each specified dimension
is a positive integer. When device type support is added, it is
diff --git a/gcc/omp-offload.h b/gcc/omp-offload.h
index 176c4da7e88..08e994abdb9 100644
--- a/gcc/omp-offload.h
+++ b/gcc/omp-offload.h
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
#define GCC_OMP_DEVICE_H
extern int oacc_get_default_dim (int dim);
+extern int oacc_get_min_dim (int dim);
extern int oacc_fn_attrib_level (tree attr);
extern GTY(()) vec<tree, va_gc> *offload_funcs;