[gcc/devel/omp/gcc-14] openmp: fix max_vf setting for amdgcn offloading

Paul-Antoine Arras parras@gcc.gnu.org
Fri Jun 28 09:50:55 GMT 2024


https://gcc.gnu.org/g:2481350075a8a562a43d9a7c13a622489c2fd435

commit 2481350075a8a562a43d9a7c13a622489c2fd435
Author: Andrew Stubbs <ams@codesourcery.com>
Date:   Fri Jul 8 11:58:46 2022 +0100

    openmp: fix max_vf setting for amdgcn offloading
    
    Ensure that the "max_vf" figure used for the "safelen" attribute is large
    enough for the largest configured offload device.
    
    This change gives ~10x speed improvement on the Bablestream "dot" benchmark for
    AMD GCN.
    
    gcc/ChangeLog:
    
            * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add
            comment.
            * omp-general.cc (omp_max_simd_vf): New function.
            * omp-general.h (omp_max_simd_vf): New prototype.
            * omp-low.cc (lower_rec_simd_input_clauses): Select largest from
              omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf.
    
    gcc/testsuite/ChangeLog:
    
            * lib/target-supports.exp
            (check_effective_target_amdgcn_offloading_enabled): New.
            (check_effective_target_nvptx_offloading_enabled): New.
            * gcc.dg/gomp/target-vf.c: New test.

Diff:
---
 gcc/ChangeLog.omp                     |  9 +++++++++
 gcc/gimple-loop-versioning.cc         |  5 ++++-
 gcc/omp-general.cc                    | 18 ++++++++++++++++++
 gcc/omp-general.h                     |  1 +
 gcc/omp-low.cc                        |  9 ++++++++-
 gcc/testsuite/ChangeLog.omp           |  7 +++++++
 gcc/testsuite/gcc.dg/gomp/target-vf.c | 21 +++++++++++++++++++++
 gcc/testsuite/lib/target-supports.exp | 10 ++++++++++
 8 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index f4b52d9e3ec..d7256902331 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,12 @@
+2022-07-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	* gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add
+	comment.
+	* omp-general.cc (omp_max_simd_vf): New function.
+	* omp-general.h (omp_max_simd_vf): New prototype.
+	* omp-low.cc (lower_rec_simd_input_clauses): Select largest from
+	  omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf.
+
 2023-08-23  Andrew Stubbs  <ams@codesourcery.com>
 
 	* omp-builtins.def (BUILT_IN_GOMP_ENABLE_PINNED_MODE): New.
diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
index 17877f06921..c22c24bd958 100644
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -555,7 +555,10 @@ loop_versioning::loop_versioning (function *fn)
      unvectorizable code, since it is the largest size that can be
      handled efficiently by scalar code.  omp_max_vf calculates the
      maximum number of bytes in a vector, when such a value is relevant
-     to loop optimization.  */
+     to loop optimization.
+     FIXME: this probably needs to use omp_max_simd_vf when in a target
+     region, but how to tell? (And MAX_FIXED_MODE_SIZE is large enough that
+     it doesn't actually matter.)  */
   m_maximum_scale = estimated_poly_value (omp_max_vf ());
   m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
 }
diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index 9a125a28afa..faa248ebd17 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1039,6 +1039,24 @@ omp_max_simt_vf (void)
   return 0;
 }
 
+/* Return maximum SIMD width if offloading may target SIMD hardware.  */
+
+int
+omp_max_simd_vf (void)
+{
+  if (!optimize)
+    return 0;
+  if (ENABLE_OFFLOADING)
+    for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
+      {
+	if (startswith (c, "amdgcn"))
+	  return 64;
+	else if ((c = strchr (c, ':')))
+	  c++;
+      }
+  return 0;
+}
+
 /* Store the construct selectors as tree codes from last to first.
    CTX is a list of trait selectors, nconstructs must be equal to its
    length, and the array CONSTRUCTS holds the output.  */
diff --git a/gcc/omp-general.h b/gcc/omp-general.h
index 15e092f1286..e478d9bdeab 100644
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -164,6 +164,7 @@ extern gimple *omp_build_barrier (tree lhs);
 extern tree find_combined_omp_for (tree *, int *, void *);
 extern poly_uint64 omp_max_vf (void);
 extern int omp_max_simt_vf (void);
+extern int omp_max_simd_vf (void);
 extern const char *omp_context_name_list_prop (tree);
 extern void omp_construct_traits_to_codes (tree, int, enum tree_code *);
 extern tree omp_check_context_selector (location_t loc, tree ctx);
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 94de26b6013..dc0a6906c67 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -4822,7 +4822,14 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
 {
   if (known_eq (sctx->max_vf, 0U))
     {
-      sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf ();
+      /* If we are compiling for multiple devices choose the largest VF.  */
+      sctx->max_vf = omp_max_vf ();
+      if (omp_maybe_offloaded_ctx (ctx))
+	{
+	  if (sctx->is_simt)
+	    sctx->max_vf = ordered_max (sctx->max_vf, (unsigned) omp_max_simt_vf ());
+	  sctx->max_vf = ordered_max (sctx->max_vf, (unsigned) omp_max_simd_vf ());
+	}
       if (maybe_gt (sctx->max_vf, 1U))
 	{
 	  tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index 7756bc0bb92..5bf09420432 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,10 @@
+2022-07-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	* lib/target-supports.exp
+	(check_effective_target_amdgcn_offloading_enabled): New.
+	(check_effective_target_nvptx_offloading_enabled): New.
+	* gcc.dg/gomp/target-vf.c: New test.
+
 2022-06-27  Tobias Burnus  <tobias@codesourcery.com>
 
 	* gfortran.dg/gomp/num-teams-2.f90: Use dg-error not dg-warning.
diff --git a/gcc/testsuite/gcc.dg/gomp/target-vf.c b/gcc/testsuite/gcc.dg/gomp/target-vf.c
new file mode 100644
index 00000000000..14cea45e53c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/target-vf.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-omplower" } */ 
+
+/* Ensure that the omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf are working
+   properly to set the OpenMP vectorization factor for the offload target, and
+   not just for the host.  */
+
+float
+foo (float * __restrict x, float * __restrict y)
+{
+  float sum = 0.0;
+
+#pragma omp target teams distribute parallel for simd map(tofrom: sum) reduction(+:sum)
+  for (int i=0; i<1024; i++)
+    sum += x[i] * y[i];
+
+  return sum;
+}
+
+/* { dg-final { scan-tree-dump  "safelen\\(64\\)" "omplower" { target amdgcn_offloading_enabled } } } */
+/* { dg-final { scan-tree-dump  "safelen\\(32\\)" "omplower" { target { { nvptx_offloading_enabled } && { ! amdgcn_offloading_enabled } } } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3a55b2a4159..ca75911a0d6 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1120,6 +1120,16 @@ proc check_effective_target_offloading_enabled {} {
     return [check_configured_with "--enable-offload-targets"]
 }
 
+# Return 1 if compiled with --enable-offload-targets=amdgcn
+proc check_effective_target_amdgcn_offloading_enabled {} {
+    return [check_configured_with {--enable-offload-targets=[^ ]*amdgcn}]
+}
+
+# Return 1 if compiled with --enable-offload-targets=amdgcn
+proc check_effective_target_nvptx_offloading_enabled {} {
+    return [check_configured_with {--enable-offload-targets=[^ ]*nvptx}]
+}
+
 # Return 1 if compilation with -fopenacc is error-free for trivial
 # code, 0 otherwise.


More information about the Gcc-cvs mailing list