[gcc/devel/omp/gcc-14] nvptx offloading: 'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' environment variable [PR97384, PR105274]

Paul-Antoine Arras parras@gcc.gnu.org
Fri Jun 28 09:52:13 GMT 2024


https://gcc.gnu.org/g:0c917e4821cf4b3dc46e703e7f1fcbf9dd7f0f13

commit 0c917e4821cf4b3dc46e703e7f1fcbf9dd7f0f13
Author: Thomas Schwinge <tschwinge@baylibre.com>
Date:   Fri May 31 17:04:39 2024 +0200

    nvptx offloading: 'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' environment variable [PR97384, PR105274]
    
    ... as a means to manually set the "native" GPU thread stack size.
    
            PR libgomp/97384
            PR libgomp/105274
            libgomp/
            * plugin/cuda-lib.def (cuCtxSetLimit): Add.
            * plugin/plugin-nvptx.c (nvptx_open_device): Handle
            'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' environment variable.
    
    (cherry picked from commit 0d25989d60d15866ef4737d66e02432f50717255)

Diff:
---
 libgomp/ChangeLog.omp         |  6 ++++++
 libgomp/plugin/cuda-lib.def   |  1 +
 libgomp/plugin/plugin-nvptx.c | 45 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index b1c3ec684fd..4021dc46fab 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,9 @@
+2024-05-31  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* plugin/cuda-lib.def (cuCtxSetLimit): Add.
+	* plugin/plugin-nvptx.c (nvptx_open_device): Handle
+	'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' environment variable.
+
 2024-06-05  Thomas Schwinge  <tschwinge@baylibre.com>
 
 	* plugin/plugin-nvptx.c (nvptx_do_global_cdtors): New.
diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def
index bd25375c26a..f3aa3fb3639 100644
--- a/libgomp/plugin/cuda-lib.def
+++ b/libgomp/plugin/cuda-lib.def
@@ -4,6 +4,7 @@ CUDA_ONE_CALL (cuCtxGetCurrent)
 CUDA_ONE_CALL (cuCtxGetDevice)
 CUDA_ONE_CALL (cuCtxPopCurrent)
 CUDA_ONE_CALL (cuCtxPushCurrent)
+CUDA_ONE_CALL (cuCtxSetLimit)
 CUDA_ONE_CALL (cuCtxSynchronize)
 CUDA_ONE_CALL (cuDeviceGet)
 CUDA_ONE_CALL (cuDeviceGetAttribute)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 16c69bb4511..84bac0beafd 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -150,6 +150,8 @@ init_cuda_lib (void)
 
 #include "secure_getenv.h"
 
+static void notify_var (const char *, const char *);
+
 #undef MIN
 #undef MAX
 #define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
@@ -341,6 +343,9 @@ struct ptx_device
 
 static struct ptx_device **ptx_devices;
 
+/* "Native" GPU thread stack size.  */
+static unsigned native_gpu_thread_stack_size = 0;
+
 /* OpenMP kernels reserve a small amount of ".shared" space for use by
    omp_alloc.  The size is configured using GOMP_NVPTX_LOWLAT_POOL, but the
    default is set here.  */
@@ -555,6 +560,46 @@ nvptx_open_device (int n)
   ptx_dev->free_blocks = NULL;
   pthread_mutex_init (&ptx_dev->free_blocks_lock, NULL);
 
+  /* "Native" GPU thread stack size.  */
+  {
+    /* This is intentionally undocumented, until we work out a proper, common
+       scheme (as much as makes sense) between all offload plugins as well
+       as between nvptx offloading use of "native" stacks for OpenACC vs.
+       OpenMP "soft stacks" vs. OpenMP '-msoft-stack-reserve-local=[...]'.
+
+       GCN offloading has a 'GCN_STACK_SIZE' environment variable (without
+       'GOMP_' prefix): documented; presumably used for all things OpenACC and
+       OpenMP?  Based on GCN command-line option '-mstack-size=[...]' (marked
+       "obsolete"), that one may be set via a GCN 'mkoffload'-synthesized
+       'constructor' function.  */
+    const char *var_name = "GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE";
+    const char *env_var = secure_getenv (var_name);
+    notify_var (var_name, env_var);
+
+    if (env_var != NULL)
+      {
+	char *endptr;
+	unsigned long val = strtoul (env_var, &endptr, 10);
+	if (endptr == NULL || *endptr != '\0'
+	    || errno == ERANGE || errno == EINVAL
+	    || val > UINT_MAX)
+	  GOMP_PLUGIN_error ("Error parsing %s", var_name);
+	else
+	  native_gpu_thread_stack_size = val;
+      }
+  }
+  if (native_gpu_thread_stack_size == 0)
+    ; /* Zero means use default.  */
+  else
+    {
+      GOMP_PLUGIN_debug (0, "Setting \"native\" GPU thread stack size"
+			 " ('CU_LIMIT_STACK_SIZE') to %u bytes\n",
+			 native_gpu_thread_stack_size);
+      CUDA_CALL (cuCtxSetLimit,
+		 CU_LIMIT_STACK_SIZE, (size_t) native_gpu_thread_stack_size);
+    }
+
+  /* OpenMP "soft stacks".  */
   ptx_dev->omp_stacks.ptr = 0;
   ptx_dev->omp_stacks.size = 0;
   pthread_mutex_init (&ptx_dev->omp_stacks.lock, NULL);


More information about the Gcc-cvs mailing list