This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, 5/4] Handle GOMP_OPENACC_NVPTX_PTXRW in libgomp nvptx plugin
- From: Tom de Vries <Tom_deVries at mentor dot com>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, Thomas Schwinge <thomas at codesourcery dot com>
- Date: Tue, 27 Jun 2017 11:16:46 +0200
- Subject: [PATCH, 5/4] Handle GOMP_OPENACC_NVPTX_PTXRW in libgomp nvptx plugin
- Authentication-results: sourceware.org; auth=none
- References: <2413b0f6-9cb2-243f-d805-08323a9c9a0a@mentor.com>
[ was: Re: [PATCH, 0/4] Handle
GOMP_OPENACC_NVPTX_{DISASM,SAVE_TEMPS,JIT} in libgomp nvptx plugin ]
On 06/26/2017 01:24 PM, Tom de Vries wrote:
Hi,
I've written a patch series to facilitate debugging libgomp openacc
testcase failures on the nvptx accelerator.
When running an openacc test-case on an nvptx accelerator, the following
happens:
- the plugin obtains the ptx assembly for the acceleration kernels
- it calls the cuda jit to compile and link the ptx into a module
- it loads the module
- it starts an acceleration kernel
A typical scenario when developing the compiler is:
- run gcc test.c -save-temps
- run a.out
- edit test.s to fix bug or make code faster or smaller
- run gcc test.s
- run a.out
- edit compiler sources to make the compiler do the same as the .s edit
With openacc test-cases, this scenario is currently not available for
ptx assembly. Using -save-temps -foffload=-save-temps we can get a .s
containing ptx. But to insert the edited .s back into the compilation
flow is difficult.
This patch facilitates such a scenario in the nvptx plugin.
- we define GOMP_OPENACC_NVPTX_PTXRW == 'w', and the plugin writes the
ptx assembly into a series of files
- we edit one of those files
- we define GOMP_OPENACC_NVPTX_PTXRW == 'r', and the plugin reads the
ptx assembly back from those files, and uses that instead of the ptx
in the executable.
I've tested this patch series on top of gomp-4_0-branch, by running an
openacc testcase from the command line and going through the
write-edit-readscenario with an observable ptx edit.
OK for trunk if bootstrap and reg-test on x86_64 with nvidia accelerator
succeeds?
Thanks,
- Tom
Handle GOMP_OPENACC_NVPTX_PTXRW in libgomp nvptx plugin
2017-06-27 Tom de Vries <tom@codesourcery.com>
* plugin/plugin-nvptx.c (post_process_ptx): New function.
(link_ptx): Call post_process_ptx.
---
libgomp/plugin/plugin-nvptx.c | 129 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 127 insertions(+), 2 deletions(-)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index db42292..26e453f 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1020,6 +1020,128 @@ process_GOMP_OPENACC_NVPTX_JIT (intptr_t *gomp_openacc_nvptx_o,
}
}
+/* If environment variable GOMP_OPENACC_NVPTX_PTXRW=[Ww], write *RES_CODE to
+ file plugin-nvptx.<NUM>.ptx. If it is [Rr], read *RES_CODE from file
+ instead. */
+
+static void
+post_process_ptx (unsigned num, const char **res_code, size_t *res_size)
+{
+ static int gomp_openacc_nvptx_ptxrw = -1;
+
+ if (gomp_openacc_nvptx_ptxrw == -1)
+ {
+ const char *var_name = "GOMP_OPENACC_NVPTX_PTXRW";
+ const char *env_var = secure_getenv (var_name);
+ notify_var (var_name, env_var);
+
+ gomp_openacc_nvptx_ptxrw = 0;
+ if (env_var == NULL)
+ ;
+ else if ((env_var[0] == 'w' || env_var[0] == 'W')
+ && env_var[1] == '\0')
+ gomp_openacc_nvptx_ptxrw = 1;
+ else if ((env_var[0] == 'r' || env_var[0] == 'R')
+ && env_var[1] == '\0')
+ gomp_openacc_nvptx_ptxrw = 2;
+ else
+ GOMP_PLUGIN_error ("Error parsing %s", var_name);
+ }
+
+ if (gomp_openacc_nvptx_ptxrw == 0)
+ return;
+
+ const char *prefix = "plugin-nvptx.";
+ const char *postfix = ".ptx";
+ const int len = (strlen (prefix)
+ + 10 /* %u. */
+ + strlen (postfix)
+ + 1 /* '\0'. */);
+ char file_name[len];
+ int res = snprintf (file_name, len, "%s%u%s", prefix,
+ num, postfix);
+ assert (res < len); /* Assert there's no truncation. */
+
+ GOMP_PLUGIN_debug (0, "%s %s \n",
+ (gomp_openacc_nvptx_ptxrw == 1 ? "Writing" : "Reading"),
+ file_name);
+
+ if (gomp_openacc_nvptx_ptxrw == 1)
+ {
+ FILE *ptx_file = fopen (file_name, "w");
+ if (ptx_file == NULL)
+ {
+ GOMP_PLUGIN_debug (0, "Opening %s failed\n", file_name);
+ return;
+ }
+
+ int res = fprintf (ptx_file, "%s", code);
+ unsigned int write_succeeded = res == size - 1;
+ if (!write_succeeded)
+ GOMP_PLUGIN_debug (0,
+ "Writing %s failed: written %d but expected %zu\n",
+ file_name, res, size - 1);
+
+ res = fclose (ptx_file);
+ if (res != 0)
+ GOMP_PLUGIN_debug (0, "Closing %s failed\n", file_name);
+
+ return;
+ }
+
+ if (gomp_openacc_nvptx_ptxrw == 2)
+ {
+ FILE *ptx_file = fopen (file_name, "r");
+ if (ptx_file == NULL)
+ {
+ GOMP_PLUGIN_debug (0, "Opening %s failed\n", file_name);
+ return;
+ }
+
+ if (fseek (ptx_file, 0L, SEEK_END) != 0)
+ {
+ GOMP_PLUGIN_debug (0, "Seeking end of %s failed\n", file_name);
+ return;
+ }
+
+ long bufsize = ftell (ptx_file);
+ if (bufsize == -1)
+ {
+ GOMP_PLUGIN_debug (0, "ftell of %s failed\n", file_name);
+ return;
+ }
+
+ if (fseek (ptx_file, 0L, SEEK_SET) != 0)
+ {
+ GOMP_PLUGIN_debug (0, "Seeking start of %s failed\n", file_name);
+ return;
+ }
+
+ char *new_code = GOMP_PLUGIN_malloc (sizeof (char) * (bufsize + 1));
+
+ size_t new_size = fread (new_code, sizeof (char), bufsize, ptx_file);
+ if (ferror (ptx_file) != 0)
+ {
+ GOMP_PLUGIN_debug (0, "Reading %s failed\n", file_name);
+ return;
+ }
+
+ assert (new_size < bufsize + 1);
+ new_code[new_size++] = '\0';
+
+ int res = fclose (ptx_file);
+ if (res != 0)
+ {
+ GOMP_PLUGIN_debug (0, "Closing %s failed\n", file_name);
+ return;
+ }
+
+ *res_code = new_code;
+ *res_size = new_size;
+ return;
+ }
+}
+
static bool
link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
unsigned num_objs)
@@ -1073,11 +1195,14 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
for (; num_objs--; ptx_objs++)
{
+ const char *ptx_code = ptx_objs->code;
+ size_t ptx_size = ptx_objs->size;
+ post_process_ptx (num_objs, &ptx_code, &ptx_size);
+ GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_code);
/* cuLinkAddData's 'data' argument erroneously omits the const
qualifier. */
- GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_objs->code);
r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX,
- (char *) ptx_objs->code, ptx_objs->size,
+ (char *) ptx_code, ptx_size,
0, 0, 0, 0);
if (r != CUDA_SUCCESS)
{