[gcc/devel/omp/gcc-14] openmp: fix max_vf setting for amdgcn offloading
Paul-Antoine Arras
parras@gcc.gnu.org
Fri Jun 28 09:50:55 GMT 2024
https://gcc.gnu.org/g:2481350075a8a562a43d9a7c13a622489c2fd435
commit 2481350075a8a562a43d9a7c13a622489c2fd435
Author: Andrew Stubbs <ams@codesourcery.com>
Date: Fri Jul 8 11:58:46 2022 +0100
openmp: fix max_vf setting for amdgcn offloading
Ensure that the "max_vf" figure used for the "safelen" attribute is large
enough for the largest configured offload device.
This change gives ~10x speed improvement on the Bablestream "dot" benchmark for
AMD GCN.
gcc/ChangeLog:
* gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add
comment.
* omp-general.cc (omp_max_simd_vf): New function.
* omp-general.h (omp_max_simd_vf): New prototype.
* omp-low.cc (lower_rec_simd_input_clauses): Select largest from
omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf.
gcc/testsuite/ChangeLog:
* lib/target-supports.exp
(check_effective_target_amdgcn_offloading_enabled): New.
(check_effective_target_nvptx_offloading_enabled): New.
* gcc.dg/gomp/target-vf.c: New test.
Diff:
---
gcc/ChangeLog.omp | 9 +++++++++
gcc/gimple-loop-versioning.cc | 5 ++++-
gcc/omp-general.cc | 18 ++++++++++++++++++
gcc/omp-general.h | 1 +
gcc/omp-low.cc | 9 ++++++++-
gcc/testsuite/ChangeLog.omp | 7 +++++++
gcc/testsuite/gcc.dg/gomp/target-vf.c | 21 +++++++++++++++++++++
gcc/testsuite/lib/target-supports.exp | 10 ++++++++++
8 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index f4b52d9e3ec..d7256902331 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,12 @@
+2022-07-12 Andrew Stubbs <ams@codesourcery.com>
+
+ * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add
+ comment.
+ * omp-general.cc (omp_max_simd_vf): New function.
+ * omp-general.h (omp_max_simd_vf): New prototype.
+ * omp-low.cc (lower_rec_simd_input_clauses): Select largest from
+ omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf.
+
2023-08-23 Andrew Stubbs <ams@codesourcery.com>
* omp-builtins.def (BUILT_IN_GOMP_ENABLE_PINNED_MODE): New.
diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
index 17877f06921..c22c24bd958 100644
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -555,7 +555,10 @@ loop_versioning::loop_versioning (function *fn)
unvectorizable code, since it is the largest size that can be
handled efficiently by scalar code. omp_max_vf calculates the
maximum number of bytes in a vector, when such a value is relevant
- to loop optimization. */
+ to loop optimization.
+ FIXME: this probably needs to use omp_max_simd_vf when in a target
+ region, but how to tell? (And MAX_FIXED_MODE_SIZE is large enough that
+ it doesn't actually matter.) */
m_maximum_scale = estimated_poly_value (omp_max_vf ());
m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
}
diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index 9a125a28afa..faa248ebd17 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1039,6 +1039,24 @@ omp_max_simt_vf (void)
return 0;
}
+/* Return maximum SIMD width if offloading may target SIMD hardware. */
+
+int
+omp_max_simd_vf (void)
+{
+ if (!optimize)
+ return 0;
+ if (ENABLE_OFFLOADING)
+ for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
+ {
+ if (startswith (c, "amdgcn"))
+ return 64;
+ else if ((c = strchr (c, ':')))
+ c++;
+ }
+ return 0;
+}
+
/* Store the construct selectors as tree codes from last to first.
CTX is a list of trait selectors, nconstructs must be equal to its
length, and the array CONSTRUCTS holds the output. */
diff --git a/gcc/omp-general.h b/gcc/omp-general.h
index 15e092f1286..e478d9bdeab 100644
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -164,6 +164,7 @@ extern gimple *omp_build_barrier (tree lhs);
extern tree find_combined_omp_for (tree *, int *, void *);
extern poly_uint64 omp_max_vf (void);
extern int omp_max_simt_vf (void);
+extern int omp_max_simd_vf (void);
extern const char *omp_context_name_list_prop (tree);
extern void omp_construct_traits_to_codes (tree, int, enum tree_code *);
extern tree omp_check_context_selector (location_t loc, tree ctx);
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 94de26b6013..dc0a6906c67 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -4822,7 +4822,14 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
{
if (known_eq (sctx->max_vf, 0U))
{
- sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf ();
+ /* If we are compiling for multiple devices choose the largest VF. */
+ sctx->max_vf = omp_max_vf ();
+ if (omp_maybe_offloaded_ctx (ctx))
+ {
+ if (sctx->is_simt)
+ sctx->max_vf = ordered_max (sctx->max_vf, (unsigned) omp_max_simt_vf ());
+ sctx->max_vf = ordered_max (sctx->max_vf, (unsigned) omp_max_simd_vf ());
+ }
if (maybe_gt (sctx->max_vf, 1U))
{
tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index 7756bc0bb92..5bf09420432 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,10 @@
+2022-07-12 Andrew Stubbs <ams@codesourcery.com>
+
+ * lib/target-supports.exp
+ (check_effective_target_amdgcn_offloading_enabled): New.
+ (check_effective_target_nvptx_offloading_enabled): New.
+ * gcc.dg/gomp/target-vf.c: New test.
+
2022-06-27 Tobias Burnus <tobias@codesourcery.com>
* gfortran.dg/gomp/num-teams-2.f90: Use dg-error not dg-warning.
diff --git a/gcc/testsuite/gcc.dg/gomp/target-vf.c b/gcc/testsuite/gcc.dg/gomp/target-vf.c
new file mode 100644
index 00000000000..14cea45e53c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/target-vf.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-omplower" } */
+
+/* Ensure that the omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf are working
+ properly to set the OpenMP vectorization factor for the offload target, and
+ not just for the host. */
+
+float
+foo (float * __restrict x, float * __restrict y)
+{
+ float sum = 0.0;
+
+#pragma omp target teams distribute parallel for simd map(tofrom: sum) reduction(+:sum)
+ for (int i=0; i<1024; i++)
+ sum += x[i] * y[i];
+
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump "safelen\\(64\\)" "omplower" { target amdgcn_offloading_enabled } } } */
+/* { dg-final { scan-tree-dump "safelen\\(32\\)" "omplower" { target { { nvptx_offloading_enabled } && { ! amdgcn_offloading_enabled } } } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3a55b2a4159..ca75911a0d6 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1120,6 +1120,16 @@ proc check_effective_target_offloading_enabled {} {
return [check_configured_with "--enable-offload-targets"]
}
+# Return 1 if compiled with --enable-offload-targets=amdgcn
+proc check_effective_target_amdgcn_offloading_enabled {} {
+ return [check_configured_with {--enable-offload-targets=[^ ]*amdgcn}]
+}
+
+# Return 1 if compiled with --enable-offload-targets=amdgcn
+proc check_effective_target_nvptx_offloading_enabled {} {
+ return [check_configured_with {--enable-offload-targets=[^ ]*nvptx}]
+}
+
# Return 1 if compilation with -fopenacc is error-free for trivial
# code, 0 otherwise.
More information about the Gcc-cvs
mailing list