This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH 3/4, libgomp] Resolve deadlock on plugin exit, HSA plugin parts
- From: Chung-Lin Tang <cltang at codesourcery dot com>
- To: gcc-patches <gcc-patches at gcc dot gnu dot org>, Martin Jambor <mjambor at suse dot cz>
- Date: Mon, 21 Mar 2016 18:22:17 +0800
- Subject: [PATCH 3/4, libgomp] Resolve deadlock on plugin exit, HSA plugin parts
- Authentication-results: sourceware.org; auth=none
Hi Martin, I think you're the one to CC for this,
as I mentioned in the first email, this has been build tested, however I did
not know if I could test this without a Radeon card. If convenient,
could you or anyone familiar with the setup do a make check-target-libgomp
with this patch series?
Thanks,
Chung-Lin
* plugin/plugin-hsa.c (hsa_warn): Adjust 'hsa_error' local variable
to 'hsa_error_msg', for clarity.
(hsa_fatal): Likewise.
(hsa_error): New function.
(init_hsa_context): Change return type to bool, adjust to return
false on error.
(queue_callback): Adjust to call hsa_error.
(GOMP_OFFLOAD_get_num_devices): Adjust to handle init_hsa_context
return value.
(GOMP_OFFLOAD_init_device): Change return type to bool, adjust to
return false on error.
(get_agent_info): Adjust to return NULL on error.
(destroy_hsa_program): Change return type to bool, adjust to
return false on error.
(GOMP_OFFLOAD_load_image): Adjust to return -1 on error.
(destroy_module): Change return type to bool, adjust to
return false on error.
(GOMP_OFFLOAD_unload_image): Likewise.
(GOMP_OFFLOAD_fini_device): Likewise.
(GOMP_OFFLOAD_alloc): Change to return NULL when called.
(GOMP_OFFLOAD_free): Change to return false when called.
(GOMP_OFFLOAD_dev2host): Likewise.
(GOMP_OFFLOAD_host2dev): Likewise.
(GOMP_OFFLOAD_dev2dev): Likewise.
Index: libgomp/plugin/plugin-hsa.c
===================================================================
--- libgomp/plugin/plugin-hsa.c (revision 234358)
+++ libgomp/plugin/plugin-hsa.c (working copy)
@@ -175,10 +175,10 @@ hsa_warn (const char *str, hsa_status_t status)
if (!debug)
return;
- const char *hsa_error;
- hsa_status_string (status, &hsa_error);
+ const char *hsa_error_msg;
+ hsa_status_string (status, &hsa_error_msg);
- fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error);
+ fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
}
/* Report a fatal error STR together with the HSA error corresponding to STATUS
@@ -187,12 +187,25 @@ hsa_warn (const char *str, hsa_status_t status)
static void
hsa_fatal (const char *str, hsa_status_t status)
{
- const char *hsa_error;
- hsa_status_string (status, &hsa_error);
+ const char *hsa_error_msg;
+ hsa_status_string (status, &hsa_error_msg);
GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
- hsa_error);
+ hsa_error_msg);
}
+/* Like hsa_fatal, except only report error message, and return FALSE
+ for propagating error processing to outside of plugin. */
+
+static bool
+hsa_error (const char *str, hsa_status_t status)
+{
+ const char *hsa_error_msg;
+ hsa_status_string (status, &hsa_error_msg);
+ GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
+ hsa_error_msg);
+ return false;
+}
+
struct hsa_kernel_description
{
const char *name;
@@ -420,22 +433,22 @@ assign_agent_ids (hsa_agent_t agent, void *data)
/* Initialize hsa_context if it has not already been done. */
-static void
+static bool
init_hsa_context (void)
{
hsa_status_t status;
int agent_index = 0;
if (hsa_context.initialized)
- return;
+ return true;
init_enviroment_variables ();
status = hsa_init ();
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Run-time could not be initialized", status);
+ return hsa_error ("Run-time could not be initialized", status);
HSA_DEBUG ("HSA run-time initialized\n");
status = hsa_iterate_agents (count_gpu_agents, NULL);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("HSA GPU devices could not be enumerated", status);
+ return hsa_error ("HSA GPU devices could not be enumerated", status);
HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
hsa_context.agents
@@ -443,8 +456,12 @@ init_hsa_context (void)
* sizeof (struct agent_info));
status = hsa_iterate_agents (assign_agent_ids, &agent_index);
if (agent_index != hsa_context.agent_count)
- GOMP_PLUGIN_fatal ("Failed to assign IDs to all HSA agents");
+ {
+ GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
+ return false;
+ }
hsa_context.initialized = true;
+ return true;
}
/* Callback of dispatch queues to report errors. */
@@ -454,7 +471,7 @@ queue_callback (hsa_status_t status,
hsa_queue_t *queue __attribute__ ((unused)),
void *data __attribute__ ((unused)))
{
- hsa_fatal ("Asynchronous queue error", status);
+ hsa_error ("Asynchronous queue error", status);
}
/* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
@@ -492,61 +509,77 @@ get_kernarg_memory_region (hsa_region_t region, vo
int
GOMP_OFFLOAD_get_num_devices (void)
{
- init_hsa_context ();
+ if (!init_hsa_context ())
+ return 0;
return hsa_context.agent_count;
}
/* Part of the libgomp plugin interface. Initialize agent number N so that it
can be used for computation. */
-void
+bool
GOMP_OFFLOAD_init_device (int n)
{
- init_hsa_context ();
+ if (!init_hsa_context ())
+ return false;
if (n >= hsa_context.agent_count)
- GOMP_PLUGIN_fatal ("Request to initialize non-existing HSA device %i", n);
+ {
+ GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
+ return false;
+ }
struct agent_info *agent = &hsa_context.agents[n];
if (agent->initialized)
- return;
+ return true;
if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
- GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent rwlock");
+ {
+ GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
+ return false;
+ }
if (pthread_mutex_init (&agent->prog_mutex, NULL))
- GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent program mutex");
+ {
+ GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
+ return false;
+ }
uint32_t queue_size;
hsa_status_t status;
status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
&queue_size);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error requesting maximum queue size of the HSA agent", status);
+ return hsa_error ("Error requesting maximum queue size of the HSA agent",
+ status);
status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error querying the ISA of the agent", status);
+ return hsa_error ("Error querying the ISA of the agent", status);
status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
queue_callback, NULL, UINT32_MAX, UINT32_MAX,
&agent->command_q);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error creating command queue", status);
+ return hsa_error ("Error creating command queue", status);
status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
queue_callback, NULL, UINT32_MAX, UINT32_MAX,
&agent->kernel_dispatch_command_q);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error creating kernel dispatch command queue", status);
+ return hsa_error ("Error creating kernel dispatch command queue", status);
agent->kernarg_region.handle = (uint64_t) -1;
status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region,
&agent->kernarg_region);
if (agent->kernarg_region.handle == (uint64_t) -1)
- GOMP_PLUGIN_fatal ("Could not find suitable memory region for kernel "
- "arguments");
+ {
+ GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
+ "arguments");
+ return false;
+ }
HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
(long long unsigned) agent->command_q->id);
HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
(long long unsigned) agent->kernel_dispatch_command_q->id);
agent->initialized = true;
+ return true;
}
/* Verify that hsa_context has already been initialized and return the
@@ -556,11 +589,20 @@ static struct agent_info *
get_agent_info (int n)
{
if (!hsa_context.initialized)
- GOMP_PLUGIN_fatal ("Attempt to use uninitialized HSA context.");
+ {
+ GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
+ return NULL;
+ }
if (n >= hsa_context.agent_count)
- GOMP_PLUGIN_fatal ("Request to operate on anon-existing HSA device %i", n);
+ {
+ GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
+ return NULL;
+ }
if (!hsa_context.agents[n].initialized)
- GOMP_PLUGIN_fatal ("Attempt to use an uninitialized HSA agent.");
+ {
+ GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
+ return NULL;
+ }
return &hsa_context.agents[n];
}
@@ -592,11 +634,11 @@ remove_module_from_agent (struct agent_info *agent
/* Free the HSA program in agent and everything associated with it and set
agent->prog_finalized and the initialized flags of all kernels to false. */
-static void
+static bool
destroy_hsa_program (struct agent_info *agent)
{
if (!agent->prog_finalized || agent->prog_finalized_error)
- return;
+ return true;
hsa_status_t status;
@@ -604,7 +646,7 @@ destroy_hsa_program (struct agent_info *agent)
status = hsa_executable_destroy (agent->executable);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not destroy HSA executable", status);
+ return hsa_error ("Could not destroy HSA executable", status);
struct module_info *module;
for (module = agent->first_module; module; module = module->next)
@@ -614,6 +656,7 @@ destroy_hsa_program (struct agent_info *agent)
module->kernels[i].initialized = false;
}
agent->prog_finalized = false;
+ return true;
}
/* Part of the libgomp plugin interface. Load BRIG module described by struct
@@ -625,9 +668,12 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version
struct addr_pair **target_table)
{
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
- GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin"
- " (expected %u, received %u)",
- GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
+ {
+ GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
+ " (expected %u, received %u)",
+ GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
+ return -1;
+ }
struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
struct agent_info *agent;
@@ -637,10 +683,17 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version
int kernel_count = image_desc->kernel_count;
agent = get_agent_info (ord);
+ if (!agent)
+ return -1;
+
if (pthread_rwlock_wrlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
- if (agent->prog_finalized)
- destroy_hsa_program (agent);
+ {
+ GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
+ return -1;
+ }
+ if (agent->prog_finalized
+ && !destroy_hsa_program (agent))
+ return -1;
HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
@@ -668,7 +721,10 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version
kernel->dependencies_count = d->kernel_dependencies_count;
kernel->dependencies = d->kernel_dependencies;
if (pthread_mutex_init (&kernel->init_mutex, NULL))
- GOMP_PLUGIN_fatal ("Failed to initialize an HSA kernel mutex");
+ {
+ GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
+ return -1;
+ }
kernel++;
pair++;
@@ -676,7 +732,10 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version
add_module_to_agent (agent, module);
if (pthread_rwlock_unlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
+ {
+ GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
+ return -1;
+ }
return kernel_count;
}
@@ -1358,32 +1417,44 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn,
/* Deinitialize all information associated with MODULE and kernels within
it. */
-void
+static bool
destroy_module (struct module_info *module)
{
int i;
for (i = 0; i < module->kernel_count; i++)
if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
- GOMP_PLUGIN_fatal ("Failed to destroy an HSA kernel initialization "
- "mutex");
+ {
+ GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
+ "mutex");
+ return false;
+ }
+ return true;
}
/* Part of the libgomp plugin interface. Unload BRIG module described by
struct brig_image_desc in TARGET_DATA from agent number N. */
-void
+bool
GOMP_OFFLOAD_unload_image (int n, unsigned version, void *target_data)
{
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
- GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin"
- " (expected %u, received %u)",
- GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
+ {
+ GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
+ " (expected %u, received %u)",
+ GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
+ return false;
+ }
struct agent_info *agent;
agent = get_agent_info (n);
+ if (!agent)
+ return false;
+
if (pthread_rwlock_wrlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
-
+ {
+ GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
+ return false;
+ }
struct module_info *module = agent->first_module;
while (module)
{
@@ -1392,15 +1463,24 @@ GOMP_OFFLOAD_unload_image (int n, unsigned version
module = module->next;
}
if (!module)
- GOMP_PLUGIN_fatal ("Attempt to unload an image that has never been "
- "loaded before");
+ {
+ GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
+ "loaded before");
+ return false;
+ }
remove_module_from_agent (agent, module);
- destroy_module (module);
+ if (!destroy_module (module))
+ return false;
free (module);
- destroy_hsa_program (agent);
+ if (!destroy_hsa_program (agent))
+ return false;
if (pthread_rwlock_unlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
+ {
+ GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
+ return false;
+ }
+ return true;
}
/* Part of the libgomp plugin interface. Deinitialize all information and
@@ -1409,37 +1489,49 @@ GOMP_OFFLOAD_unload_image (int n, unsigned version
deinitialization of a device that is in any way being used at the same
time. */
-void
+bool
GOMP_OFFLOAD_fini_device (int n)
{
struct agent_info *agent = get_agent_info (n);
+ if (!agent)
+ return false;
+
if (!agent->initialized)
- return;
+ return true;
struct module_info *next_module = agent->first_module;
while (next_module)
{
struct module_info *module = next_module;
next_module = module->next;
- destroy_module (module);
+ if (!destroy_module (module))
+ return false;
free (module);
}
agent->first_module = NULL;
- destroy_hsa_program (agent);
+ if (!destroy_hsa_program (agent))
+ return false;
release_agent_shared_libraries (agent);
hsa_status_t status = hsa_queue_destroy (agent->command_q);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error destroying command queue", status);
+ return hsa_error ("Error destroying command queue", status);
status = hsa_queue_destroy (agent->kernel_dispatch_command_q);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error destroying kernel dispatch command queue", status);
+ return hsa_error ("Error destroying kernel dispatch command queue", status);
if (pthread_mutex_destroy (&agent->prog_mutex))
- GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent program mutex");
+ {
+ GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
+ return false;
+ }
if (pthread_rwlock_destroy (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent rwlock");
+ {
+ GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
+ return false;
+ }
agent->initialized = false;
+ return true;
}
/* Part of the libgomp plugin interface. Not implemented as it is not required
@@ -1448,46 +1540,51 @@ GOMP_OFFLOAD_fini_device (int n)
void *
GOMP_OFFLOAD_alloc (int ord, size_t size)
{
- GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_alloc is not implemented because "
+ GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
"it should never be called");
+ return NULL;
}
/* Part of the libgomp plugin interface. Not implemented as it is not required
for HSA. */
-void
+bool
GOMP_OFFLOAD_free (int ord, void *ptr)
{
- GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_free is not implemented because "
+ GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
"it should never be called");
+ return false;
}
/* Part of the libgomp plugin interface. Not implemented as it is not required
for HSA. */
-void *
+bool
GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
{
- GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
+ GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
"it should never be called");
+ return false;
}
/* Part of the libgomp plugin interface. Not implemented as it is not required
for HSA. */
-void *
+bool
GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
{
- GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
+ GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
"it should never be called");
+ return false;
}
/* Part of the libgomp plugin interface. Not implemented as it is not required
for HSA. */
-void *
+bool
GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
{
- GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
+ GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
"it should never be called");
+ return false;
}