This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 1/5] OpenACC 2.0 support for libgomp - OpenACC runtime, NVidia PTX/CUDA plugin (repost)


Hi!

On Tue, 11 Nov 2014 13:53:23 +0000, Julian Brown <julian@codesourcery.com> wrote:
> On Tue, 23 Sep 2014 19:19:31 +0100
> Julian Brown <julian@codesourcery.com> wrote:
> 
> > This patch contains the bulk of the OpenACC 2.0 runtime support,
> > building around, or on top of, the OpenMP 4.0 support (as previously
> > posted or already extant upstream) where we could. [...]
> 
> Here is a new version of the OpenACC support patch for libgomp, [...]

> --- a/libgomp/target.c
> +++ b/libgomp/target.c

> @@ -673,7 +753,12 @@ GOMP_target (int device, void (*fn) (void *), const void *openmp_target,
>  	     unsigned char *kinds)
>  {
>    struct gomp_device_descr *devicep = resolve_device (device);
> -  if (devicep == NULL)
> +  struct gomp_memory_mapping *mm = &devicep->mem_map;
> +
> +  if (devicep != NULL && !devicep->is_initialized)
> +    gomp_init_dev_tables (devicep);
> +
> +  if (devicep == NULL || !(devicep->capabilities & TARGET_CAP_OPENMP_400))
>      {
>        /* Host fallback.  */
>        struct gomp_thread old_thr, *thr = gomp_thread ();
> @@ -690,20 +775,30 @@ GOMP_target (int device, void (*fn) (void *), const void *openmp_target,
>        return;
>      }
>  
> -  gomp_mutex_lock (&devicep->dev_env_lock);
> -  if (!devicep->is_initialized)
> -    gomp_init_device (devicep);
> [...]
> -  gomp_mutex_unlock (&devicep->dev_env_lock);

Here, gomp_init_device has (correctly) been replaced with
gomp_init_dev_tables, but also locking of dev_env_lock removed.

Then, shortly after that:

> [...]
> +      gomp_mutex_lock (&mm->lock);
> +      if (!devicep->is_initialized)
> +	gomp_init_dev_tables (devicep);
> [...]
> +      gomp_mutex_unlock (&mm->lock);
> [...]

Again checking the device's is_initialized flag (should instead be the
memory mappings'?), and again calling gomp_init_dev_tables?  What I think
this meant to do is more fine-grained locking and initialization,
distinguishing between the device itself and its memory mapping tables?

Similar for GOMP_target_data and GOMP_target_update.

Removing locking from device initialization is certainly not a good idea,
for example:

    $ build-gcc/gcc/xgcc -Bbuild-gcc/gcc/ -Bbuild-gcc/x86_64-unknown-linux-gnu/./libgomp/ -Bbuild-gcc/x86_64-unknown-linux-gnu/./libgomp/.libs -Ibuild-gcc/x86_64-unknown-linux-gnu/./libgomp -Isource-gcc/libgomp/testsuite/../../include -Isource-gcc/libgomp/testsuite/.. -Binstall/offload-x86_64-intelmicemul-linux-gnu/libexec/gcc/x86_64-unknown-linux-gnu/5.0.0 -Binstall/offload-x86_64-intelmicemul-linux-gnu/bin -fopenmp -Bbuild-gcc/x86_64-unknown-linux-gnu/./libgomp/../libquadmath/.libs/   -O0   -Bbuild-gcc/x86_64-unknown-linux-gnu/./libgomp/../libgfortran/.libs -fintrinsic-modules-path=build-gcc/x86_64-unknown-linux-gnu/./libgomp   -Lbuild-gcc/x86_64-unknown-linux-gnu/./libgomp/.libs -Lbuild-gcc/x86_64-unknown-linux-gnu/./libgomp/../libquadmath/.libs/ -Lbuild-gcc/x86_64-unknown-linux-gnu/./libgomp/../libgfortran/.libs -lgfortran -lm -g source-gcc/libgomp/testsuite/libgomp.fortran/target7.f90 
    $ LD_LIBRARY_PATH=.:build-gcc/x86_64-unknown-linux-gnu/./libgomp/.libs:build-gcc/x86_64-unknown-linux-gnu/./libgomp/../liboffloadmic/.libs:build-gcc/x86_64-unknown-linux-gnu/./libgomp/../liboffloadmic/plugin/.libs:build-gcc/x86_64-unknown-linux-gnu/./libgomp/../libstdc++-v3/src/.libs:install/offload-x86_64-intelmicemul-linux-gnu/lib64:install/offload-x86_64-intelmicemul-linux-gnu/lib:build-gcc/gcc:build-gcc/x86_64-unknown-linux-gnu/./libgomp/../libgfortran/.libs:build-gcc/x86_64-unknown-linux-gnu/./libgomp/../libquadmath/.libs:.:build-gcc/x86_64-unknown-linux-gnu/./libgomp/.libs:build-gcc/x86_64-unknown-linux-gnu/./libgomp/../liboffloadmic/.libs:build-gcc/x86_64-unknown-linux-gnu/./libgomp/../liboffloadmic/plugin/.libs:build-gcc/x86_64-unknown-linux-gnu/./libgomp/../libstdc++-v3/src/.libs:install/offload-x86_64-intelmicemul-linux-gnu/lib64:install/offload-x86_64-intelmicemul-linux-gnu/lib:build-gcc/gcc:build-gcc/x86_64-unknown-linux-gnu/./libgomp/../libgfortran/.libs:build-gcc/x86_64-unknown-linux-gnu/./libgomp/../libquadmath/.libs: ./a.out
    
    libgomp: 
    libgomp: Duplicate node
    Duplicate node
    libgomp: 
    COI ERROR - TARGET: Cannot read from pipe.
    Success
    
    offload error: wait for process shutdown failed on device 0 (error code 1)

Smells like a concurrency issue, and indeed, with OMP_NUM_THREADS=1
everything is fine.  Confirmed (by looking at the source code, or) with
GDB:

    #0  gomp_fatal (fmt=fmt@entry=0x7ffff75ac69b "Duplicate node")
        at /home/thomas/tmp/source/gcc/openacc/gomp-4_0-branch-work_/source-gcc/libgomp/error.c:85
    #1  0x00007ffff75a857b in splay_tree_insert (sp=sp@entry=0x7fffe40058e8, node=node@entry=0x608570)
        at /home/thomas/tmp/source/gcc/openacc/gomp-4_0-branch-work_/source-gcc/libgomp/splay-tree.c:156
    #2  0x00007ffff75a5a06 in gomp_init_tables (devicep=devicep@entry=0x7fffe40057c0, mm=mm@entry=0x7fffe40058e8)
        at /home/thomas/tmp/source/gcc/openacc/gomp-4_0-branch-work_/source-gcc/libgomp/target.c:785
    #3  0x00007ffff75a5a66 in gomp_init_dev_tables (devicep=devicep@entry=0x7fffe40057c0)
        at /home/thomas/tmp/source/gcc/openacc/gomp-4_0-branch-work_/source-gcc/libgomp/target.c:796
    #4  0x00007ffff75a5b8b in GOMP_target (device=-1, fn=0x400b9c <MAIN__._omp_fn.2>, offload_table=0x6014c0, mapnum=3, hostaddrs=0x7fffffffbe60, 
        sizes=0x7fffffffbe90, kinds=0x6014a0 <.omp_data_kinds.14> "\003\034\023")
        at /home/thomas/tmp/source/gcc/openacc/gomp-4_0-branch-work_/source-gcc/libgomp/target.c:845
    #5  0x0000000000400dc4 in MAIN__._omp_fn.1 () at source-gcc/libgomp/testsuite/libgomp.fortran/target7.f90:21
    #6  0x00007ffff759deef in gomp_barrier_handle_tasks (state=state@entry=0)
        at /home/thomas/tmp/source/gcc/openacc/gomp-4_0-branch-work_/source-gcc/libgomp/task.c:733
    #7  0x00007ffff75a24cc in gomp_team_barrier_wait_end (bar=0x607190, state=0)
        at /home/thomas/tmp/source/gcc/openacc/gomp-4_0-branch-work_/source-gcc/libgomp/config/linux/bar.c:116
    #8  0x0000000000400dee in MAIN__._omp_fn.0 () at source-gcc/libgomp/testsuite/libgomp.fortran/target7.f90:16
    #9  0x00007ffff759bd3f in GOMP_parallel (fn=0x400dd3 <MAIN__._omp_fn.0>, data=0x7fffffffd050, num_threads=8, flags=0)
        at /home/thomas/tmp/source/gcc/openacc/gomp-4_0-branch-work_/source-gcc/libgomp/parallel.c:168
    #10 0x0000000000400b03 in MAIN__ () at source-gcc/libgomp/testsuite/libgomp.fortran/target7.f90:16

GOMP_target inside GOMP_parallel.

From a quick look, on the OpenACC side things also don't look too good.
This all will need careful review for concurrency issues, but I'm not
addresseing all of that in this patch, primarily just restoring OpenMP
device initialization locking.  Committed to gomp-4_0-branch in r219036:

commit c20a14aa9800eafcdbe9775cae0e47c2eb4a9769
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Mon Dec 22 22:53:55 2014 +0000

    libgomp: Fix locking in OpenMP GOMP_target* functions.
    
    	libgomp/
    	* libgomp.c (struct gomp_device_descr): Add lock member.
    	* oacc-host.c (goacc_host_init): Initialize it.
    	* target.c (gomp_target_init): Likewise.
    	(gomp_init_dev_tables): Remove function.
    	(GOMP_target, GOMP_target_data, GOMP_target_update): Instead of
    	calling gomp_init_dev_tables, separate device and memory mapping
    	initilization, guarded by appropriate locking.  Check (immutable)
    	device capabilities early.
    
    With Intel MIC offloading (emulation), this fixes:
    
        FAIL: libgomp.fortran/target7.f90   -O0  execution test
        FAIL: libgomp.fortran/target7.f90   -O1  execution test
        FAIL: libgomp.fortran/target7.f90   -O2  execution test
        FAIL: libgomp.fortran/target7.f90   -O3 -fomit-frame-pointer  execution test
        FAIL: libgomp.fortran/target7.f90   -O3 -fomit-frame-pointer -funroll-loops  execution test
        FAIL: libgomp.fortran/target7.f90   -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions  execution test
        FAIL: libgomp.fortran/target7.f90   -O3 -g  execution test
        FAIL: libgomp.fortran/target7.f90   -Os  execution test
        FAIL: libgomp.fortran/target8.f90   -O0  execution test
        FAIL: libgomp.fortran/target8.f90   -O1  execution test
        FAIL: libgomp.fortran/target8.f90   -O2  execution test
        FAIL: libgomp.fortran/target8.f90   -O3 -fomit-frame-pointer  execution test
        FAIL: libgomp.fortran/target8.f90   -O3 -fomit-frame-pointer -funroll-loops  execution test
        FAIL: libgomp.fortran/target8.f90   -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions  execution test
        FAIL: libgomp.fortran/target8.f90   -O3 -g  execution test
        FAIL: libgomp.fortran/target8.f90   -Os  execution test
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@219036 138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog.gomp | 12 ++++++++
 libgomp/libgomp.h      | 11 ++++++++
 libgomp/oacc-host.c    |  1 +
 libgomp/oacc-init.c    | 12 ++++++--
 libgomp/target.c       | 77 +++++++++++++++++++++++++++++---------------------
 5 files changed, 78 insertions(+), 35 deletions(-)

diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index b9bd024..2f5b4ae 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,5 +1,17 @@
 2014-12-22  Thomas Schwinge  <thomas@codesourcery.com>
 
+	* libgomp.c (struct gomp_device_descr): Add lock member.
+	* oacc-host.c (goacc_host_init): Initialize it.
+	* target.c (gomp_target_init): Likewise.
+	(gomp_init_dev_tables): Remove function.
+	(GOMP_target, GOMP_target_data, GOMP_target_update): Instead of
+	calling gomp_init_dev_tables, separate device and memory mapping
+	initilization, guarded by appropriate locking.  Check (immutable)
+	device capabilities early.
+
+	* oacc-init.c (lazy_open, acc_shutdown_1): Lock mem_map when in
+	use.
+
 	* target.c (num_devices_openmp): New variable.
 	(gomp_get_num_devices): Use it.
 	(gomp_target_init): Initialize it, and sort the devices array
diff --git libgomp/libgomp.h libgomp/libgomp.h
index b6d216b..edcd7bc 100644
--- libgomp/libgomp.h
+++ libgomp/libgomp.h
@@ -682,9 +682,11 @@ typedef struct acc_dispatch_t
      acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas
      (TODO).  Unlike mapped_data in the goacc_thread struct, unmapping can
      happen out-of-order with respect to mapping.  */
+  /* This is guarded by the lock in the "outer" struct gomp_device_descr.  */
   struct target_mem_desc *data_environ;
 
   /* Extra information required for a device instance by a given target.  */
+  /* This is guarded by the lock in the "outer" struct gomp_device_descr.  */
   void *target_data;
 
   /* Open or close a device instance.  */
@@ -730,6 +732,9 @@ typedef struct acc_dispatch_t
    mapped memory.  */
 struct gomp_device_descr
 {
+  /* Immutable data, which is only set during initialization, and which is not
+     guarded by the lock.  */
+
   /* The name of the device.  */
   const char *name;
 
@@ -764,10 +769,16 @@ struct gomp_device_descr
   void (*run_func) (int, void *, void *);
 
   /* OpenACC-specific functions.  */
+  /* This is mutable because of its mutable data_environ and target_data
+     members.  */
   acc_dispatch_t openacc;
 
   /* Memory-mapping info for this device instance.  */
+  /* Uses a separate lock.  */
   struct gomp_memory_mapping mem_map;
+
+  /* Mutex for the mutable data.  */
+  gomp_mutex_t lock;
 };
 
 extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
diff --git libgomp/oacc-host.c libgomp/oacc-host.c
index 2a82517..c375463 100644
--- libgomp/oacc-host.c
+++ libgomp/oacc-host.c
@@ -96,5 +96,6 @@ static __attribute__ ((constructor))
 void goacc_host_init (void)
 {
   gomp_mutex_init (&host_dispatch.mem_map.lock);
+  gomp_mutex_init (&host_dispatch.lock);
   goacc_register (&host_dispatch);
 }
diff --git libgomp/oacc-init.c libgomp/oacc-init.c
index d10b974..6acf61b 100644
--- libgomp/oacc-init.c
+++ libgomp/oacc-init.c
@@ -287,8 +287,11 @@ lazy_open (int ord)
 
   acc_dev->openacc.async_set_async_func (acc_async_sync);
 
-  if (!acc_dev->mem_map.is_initialized)
-    gomp_init_tables (acc_dev, &acc_dev->mem_map);
+  struct gomp_memory_mapping *mem_map = &acc_dev->mem_map;
+  gomp_mutex_lock (&mem_map->lock);
+  if (!mem_map->is_initialized)
+    gomp_init_tables (acc_dev, mem_map);
+  gomp_mutex_unlock (&mem_map->lock);
 }
 
 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
@@ -350,7 +353,10 @@ acc_shutdown_1 (acc_device_t d)
 
 	  walk->dev->openacc.target_data = target_data = NULL;
 
-	  gomp_free_memmap (&walk->dev->mem_map);
+	  struct gomp_memory_mapping *mem_map = &walk->dev->mem_map;
+	  gomp_mutex_lock (&mem_map->lock);
+	  gomp_free_memmap (mem_map);
+	  gomp_mutex_unlock (&mem_map->lock);
 
 	  walk->dev = NULL;
 	}
diff --git libgomp/target.c libgomp/target.c
index bf6edd2..53e0c7f 100644
--- libgomp/target.c
+++ libgomp/target.c
@@ -44,6 +44,7 @@
 
 static void gomp_target_init (void);
 
+/* The whole initialization code for offloading plugins is only run one.  */
 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
 
 /* This structure describes an offload image.
@@ -169,6 +170,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
       tgt_size = mapnum * sizeof (void *);
     }
   gomp_mutex_lock (&mm->lock);
+
   for (i = 0; i < mapnum; i++)
     {
       int kind = get_kind (is_openacc, kinds, i);
@@ -552,8 +554,9 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
       return;
     }
 
-  size_t i;
   gomp_mutex_lock (&mm->lock);
+
+  size_t i;
   for (i = 0; i < tgt->list_count; i++)
     if (tgt->list[i] == NULL)
       ;
@@ -580,6 +583,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
     tgt->refcount--;
   else
     gomp_unmap_tgt (tgt);
+
   gomp_mutex_unlock (&mm->lock);
 }
 
@@ -669,17 +673,19 @@ GOMP_offload_register (void *host_table, enum offload_target_type target_type,
   num_offload_images++;
 }
 
-/* This function initializes the target device, specified by DEVICEP.  */
+/* This function initializes the target device, specified by DEVICEP.  DEVICEP
+   must be locked on entry, and remains locked on return.  */
 
 attribute_hidden void
 gomp_init_device (struct gomp_device_descr *devicep)
 {
-  /* Initialize the target device.  */
   devicep->init_device_func (devicep->target_id);
-
   devicep->is_initialized = true;
 }
 
+/* Initialize address mapping tables.  MM must be locked on entry, and remains
+   locked on return.  */
+
 attribute_hidden void
 gomp_init_tables (struct gomp_device_descr *devicep,
 		  struct gomp_memory_mapping *mm)
@@ -716,13 +722,8 @@ gomp_init_tables (struct gomp_device_descr *devicep,
   mm->is_initialized = true;
 }
 
-static void
-gomp_init_dev_tables (struct gomp_device_descr *devicep)
-{
-  gomp_init_device (devicep);
-  gomp_init_tables (devicep, &devicep->mem_map);
-}
-
+/* Free address mapping tables.  MM must be locked on entry, and remains locked
+   on return.  */
 
 attribute_hidden void
 gomp_free_memmap (struct gomp_memory_mapping *mm)
@@ -739,6 +740,9 @@ gomp_free_memmap (struct gomp_memory_mapping *mm)
   mm->is_initialized = false;
 }
 
+/* This function de-initializes the target device, specified by DEVICEP.
+   DEVICEP must be locked on entry, and remains locked on return.  */
+
 attribute_hidden void
 gomp_fini_device (struct gomp_device_descr *devicep)
 {
@@ -766,9 +770,6 @@ GOMP_target (int device, void (*fn) (void *), const void *offload_table,
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
-  if (devicep != NULL && !devicep->is_initialized)
-    gomp_init_dev_tables (devicep);
-
   if (devicep == NULL
       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
     {
@@ -787,6 +788,11 @@ GOMP_target (int device, void (*fn) (void *), const void *offload_table,
       return;
     }
 
+  gomp_mutex_lock (&devicep->lock);
+  if (!devicep->is_initialized)
+    gomp_init_device (devicep);
+  gomp_mutex_unlock (&devicep->lock);
+
   void *fn_addr;
 
   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
@@ -795,15 +801,17 @@ GOMP_target (int device, void (*fn) (void *), const void *offload_table,
     {
       struct gomp_memory_mapping *mm = &devicep->mem_map;
       gomp_mutex_lock (&mm->lock);
-      if (!devicep->is_initialized)
-	gomp_init_dev_tables (devicep);
+
+      if (!mm->is_initialized)
+	gomp_init_tables (devicep, mm);
+
       struct splay_tree_key_s k;
       k.host_start = (uintptr_t) fn;
       k.host_end = k.host_start + 1;
-      splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map.splay_tree,
-						 &k);
+      splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
       if (tgt_fn == NULL)
 	gomp_fatal ("Target function wasn't mapped");
+
       gomp_mutex_unlock (&mm->lock);
 
       fn_addr = (void *) tgt_fn->tgt->tgt_start;
@@ -832,9 +840,6 @@ GOMP_target_data (int device, const void *offload_table, size_t mapnum,
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
-  if (devicep != NULL && !devicep->is_initialized)
-    gomp_init_dev_tables (devicep);
-
   if (devicep == NULL
       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
     {
@@ -854,10 +859,15 @@ GOMP_target_data (int device, const void *offload_table, size_t mapnum,
       return;
     }
 
+  gomp_mutex_lock (&devicep->lock);
+  if (!devicep->is_initialized)
+    gomp_init_device (devicep);
+  gomp_mutex_unlock (&devicep->lock);
+
   struct gomp_memory_mapping *mm = &devicep->mem_map;
   gomp_mutex_lock (&mm->lock);
-  if (!devicep->is_initialized)
-    gomp_init_dev_tables (devicep);
+  if (!mm->is_initialized)
+    gomp_init_tables (devicep, mm);
   gomp_mutex_unlock (&mm->lock);
 
   struct target_mem_desc *tgt
@@ -886,20 +896,22 @@ GOMP_target_update (int device, const void *offload_table, size_t mapnum,
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
-  if (devicep == NULL)
+  if (devicep == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
     return;
 
+  gomp_mutex_lock (&devicep->lock);
+  if (!devicep->is_initialized)
+    gomp_init_device (devicep);
+  gomp_mutex_unlock (&devicep->lock);
+
   struct gomp_memory_mapping *mm = &devicep->mem_map;
   gomp_mutex_lock (&mm->lock);
-  if (!devicep->is_initialized)
-    gomp_init_dev_tables (devicep);
+  if (!mm->is_initialized)
+    gomp_init_tables (devicep, mm);
   gomp_mutex_unlock (&mm->lock);
 
-  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
-    return;
-
-  gomp_update (devicep, &devicep->mem_map, mapnum, hostaddrs, sizes, kinds,
-	       false);
+  gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
 }
 
 void
@@ -1035,7 +1047,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
 }
 
 /* This function adds a compatible offload image IMAGE to an accelerator device
-   DEVICE.  */
+   DEVICE.  DEVICE must be locked on entry, and remains locked on return.  */
 
 static void
 gomp_register_image_for_device (struct gomp_device_descr *device,
@@ -1118,6 +1130,7 @@ gomp_target_init (void)
 		    current_device.target_id = i;
 		    devices[num_devices] = current_device;
 		    gomp_mutex_init (&devices[num_devices].mem_map.lock);
+		    gomp_mutex_init (&devices[num_devices].lock);
 		    num_devices++;
 		  }
 	      }


GrÃÃe,
 Thomas

Attachment: signature.asc
Description: PGP signature


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]