This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, OpenACC 2.5, libgomp] Add *_async versions of runtime library API functions
- From: Cesar Philippidis <cesar at codesourcery dot com>
- To: <cltang at codesourcery dot com>, <gcc-patches at gcc dot gnu dot org>, Jakub Jelinek <jakub at redhat dot com>
- Date: Mon, 10 Sep 2018 10:22:17 -0700
- Subject: Re: [PATCH, OpenACC 2.5, libgomp] Add *_async versions of runtime library API functions
- References: <4f2750a1-9935-6629-b7fd-ce6280f902c0@mentor.com>
On 09/10/2018 08:04 AM, Chung-Lin Tang wrote:
> GOACC_2.0 {
> Index: libgomp/oacc-mem.c
> ===================================================================
> --- libgomp/oacc-mem.c (revision 264192)
> +++ libgomp/oacc-mem.c (working copy)
> @@ -153,8 +153,9 @@ acc_free (void *d)
> gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
> }
>
> -void
> -acc_memcpy_to_device (void *d, void *h, size_t s)
> +static void
> +memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
> + const char *libfnname)
This showed up oddly in the diff, but memcpy_tofrom_device is a new
internal function that's not part of the public API. It's nice that you
were able to merge the to/from functions together. I think this is safe
in terms of backwards compatibility.
> {
> /* No need to call lazy open here, as the device pointer must have
> been obtained from a routine that did that. */
> @@ -164,31 +165,49 @@ acc_free (void *d)
>
> if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
> {
> - memmove (d, h, s);
> + if (from)
> + memmove (h, d, s);
> + else
> + memmove (d, h, s);
> return;
> }
>
> - if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s))
> - gomp_fatal ("error in %s", __FUNCTION__);
> + if (async > acc_async_sync)
> + thr->dev->openacc.async_set_async_func (async);
> +
> + bool ret = (from
> + ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s)
> + : thr->dev->host2dev_func (thr->dev->target_id, d, h, s));
> +
> + if (async > acc_async_sync)
> + thr->dev->openacc.async_set_async_func (acc_async_sync);
> +
> + if (!ret)
> + gomp_fatal ("error in %s", libfnname);
> }
>
> void
> -acc_memcpy_from_device (void *h, void *d, size_t s)
> +acc_memcpy_to_device (void *d, void *h, size_t s)
> {
> - /* No need to call lazy open here, as the device pointer must have
> - been obtained from a routine that did that. */
> - struct goacc_thread *thr = goacc_thread ();
> + memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
> +}
>
> - assert (thr && thr->dev);
> +void
> +acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
> +{
> + memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
> +}
>
> - if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
> - {
> - memmove (h, d, s);
> - return;
> - }
> +void
> +acc_memcpy_from_device (void *h, void *d, size_t s)
> +{
> + memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
> +}
>
> - if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s))
> - gomp_fatal ("error in %s", __FUNCTION__);
> +void
> +acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
> +{
> + memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
> }
>
> /* Return the device pointer that corresponds to host data H. Or NULL
> @@ -428,7 +447,7 @@ acc_unmap_data (void *h)
> #define FLAG_COPY (1 << 2)
>
> static void *
> -present_create_copy (unsigned f, void *h, size_t s)
> +present_create_copy (unsigned f, void *h, size_t s, int async)
Likewise, this is another internal function, so it shouldn't break anything.
> {
> void *d;
> splay_tree_key n;
> @@ -490,11 +509,17 @@ static void *
>
> gomp_mutex_unlock (&acc_dev->lock);
>
> + if (async > acc_async_sync)
> + acc_dev->openacc.async_set_async_func (async);
> +
> tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
> GOMP_MAP_VARS_OPENACC);
> /* Initialize dynamic refcount. */
> tgt->list[0].key->dynamic_refcount = 1;
>
> + if (async > acc_async_sync)
> + acc_dev->openacc.async_set_async_func (acc_async_sync);
> +
> gomp_mutex_lock (&acc_dev->lock);
>
> d = tgt->to_free;
> @@ -510,19 +535,32 @@ static void *
> void *
> acc_create (void *h, size_t s)
> {
> - return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
> + return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
> }
>
> +void
> +acc_create_async (void *h, size_t s, int async)
> +{
> + present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
> +}
> +
> void *
> acc_copyin (void *h, size_t s)
> {
> - return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
> + return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
> + acc_async_sync);
> }
>
> +void
> +acc_copyin_async (void *h, size_t s, int async)
> +{
> + present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
> +}
> +
> void *
> acc_present_or_create (void *h, size_t s)
> {
> - return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
> + return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
> }
>
> /* acc_pcreate is acc_present_or_create by a different name. */
> @@ -539,7 +577,8 @@ acc_pcreate (void *h, size_t s)
> void *
> acc_present_or_copyin (void *h, size_t s)
> {
> - return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
> + return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
> + acc_async_sync);
> }
>
> /* acc_pcopyin is acc_present_or_copyin by a different name. */
> @@ -557,7 +596,7 @@ acc_pcopyin (void *h, size_t s)
> #define FLAG_FINALIZE (1 << 1)
>
> static void
> -delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
> +delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
Ditto.
> {
> size_t host_size;
> splay_tree_key n;
> @@ -633,7 +672,13 @@ static void
> }
>
> if (f & FLAG_COPYOUT)
> - acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
> + {
> + if (async > acc_async_sync)
> + acc_dev->openacc.async_set_async_func (async);
> + acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
> + if (async > acc_async_sync)
> + acc_dev->openacc.async_set_async_func (acc_async_sync);
> + }
>
> gomp_remove_var (acc_dev, n);
> }
> @@ -644,41 +689,54 @@ static void
> void
> acc_delete (void *h , size_t s)
> {
> - delete_copyout (0, h, s, __FUNCTION__);
> + delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
> }
>
> void
> +acc_delete_async (void *h , size_t s, int async)
> +{
> + delete_copyout (0, h, s, async, __FUNCTION__);
> +}
> +
> +void
> acc_delete_finalize (void *h , size_t s)
> {
> - delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__);
> + delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
> }
>
> void
> acc_delete_finalize_async (void *h , size_t s, int async)
> {
> - delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__);
> + delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
> }
>
> void
> acc_copyout (void *h, size_t s)
> {
> - delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
> + delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
> }
>
> void
> +acc_copyout_async (void *h, size_t s, int async)
> +{
> + delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
> +}
> +
> +void
> acc_copyout_finalize (void *h, size_t s)
> {
> - delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__);
> + delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
> + __FUNCTION__);
> }
>
> void
> acc_copyout_finalize_async (void *h, size_t s, int async)
> {
> - delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__);
> + delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
> }
>
> static void
> -update_dev_host (int is_dev, void *h, size_t s)
> +update_dev_host (int is_dev, void *h, size_t s, int async)
> {
> splay_tree_key n;
> void *d;
> @@ -704,11 +762,17 @@ static void
> d = (void *) (n->tgt->tgt_start + n->tgt_offset
> + (uintptr_t) h - n->host_start);
>
> + if (async > acc_async_sync)
> + acc_dev->openacc.async_set_async_func (async);
> +
> if (is_dev)
> acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
> else
> acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
>
> + if (async > acc_async_sync)
> + acc_dev->openacc.async_set_async_func (acc_async_sync);
> +
> gomp_mutex_unlock (&acc_dev->lock);
> }
>
> @@ -715,16 +779,28 @@ static void
> void
> acc_update_device (void *h, size_t s)
> {
> - update_dev_host (1, h, s);
> + update_dev_host (1, h, s, acc_async_sync);
> }
>
> void
> +acc_update_device_async (void *h, size_t s, int async)
> +{
> + update_dev_host (1, h, s, async);
> +}
> +
> +void
> acc_update_self (void *h, size_t s)
> {
> - update_dev_host (0, h, s);
> + update_dev_host (0, h, s, acc_async_sync);
> }
>
> void
> +acc_update_self_async (void *h, size_t s, int async)
> +{
> + update_dev_host (0, h, s, async);
> +}
> +
> +void
> gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
> void *kinds)
> {
> Index: libgomp/openacc.f90
Given that this includes Fortran changes, you should have copied the
fortran mailing list. Those changes look fairly mechanical and
reasonable though.
> ===================================================================
> --- libgomp/openacc.f90 (revision 264192)
> +++ libgomp/openacc.f90 (working copy)
> @@ -332,6 +332,150 @@ module openacc_internal
> logical acc_is_present_array_h
> type (*), dimension (..), contiguous :: a
> end function
> +
> + subroutine acc_copyin_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_copyin_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_copyin_async_array_h (a, async)
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_create_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_create_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_create_async_array_h (a, async)
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_copyout_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_copyout_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_copyout_async_array_h (a, async)
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_delete_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_delete_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_delete_async_array_h (a, async)
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_update_device_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_update_device_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_update_device_async_array_h (a, async)
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_update_self_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_update_self_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_update_self_async_array_h (a, async)
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + end subroutine
> end interface
>
> interface
> @@ -510,6 +654,60 @@ module openacc_internal
> type (*), dimension (*) :: a
> integer (c_size_t), value :: len
> end function
> +
> + subroutine acc_copyin_async_l (a, len, async) &
> + bind (C, name = "acc_copyin_async")
> + use iso_c_binding, only: c_size_t, c_int
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_size_t), value :: len
> + integer (c_int), value :: async
> + end subroutine
> +
> + subroutine acc_create_async_l (a, len, async) &
> + bind (C, name = "acc_create_async")
> + use iso_c_binding, only: c_size_t, c_int
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_size_t), value :: len
> + integer (c_int), value :: async
> + end subroutine
> +
> + subroutine acc_copyout_async_l (a, len, async) &
> + bind (C, name = "acc_copyout_async")
> + use iso_c_binding, only: c_size_t, c_int
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_size_t), value :: len
> + integer (c_int), value :: async
> + end subroutine
> +
> + subroutine acc_delete_async_l (a, len, async) &
> + bind (C, name = "acc_delete_async")
> + use iso_c_binding, only: c_size_t, c_int
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_size_t), value :: len
> + integer (c_int), value :: async
> + end subroutine
> +
> + subroutine acc_update_device_async_l (a, len, async) &
> + bind (C, name = "acc_update_device_async")
> + use iso_c_binding, only: c_size_t, c_int
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_size_t), value :: len
> + integer (c_int), value :: async
> + end subroutine
> +
> + subroutine acc_update_self_async_l (a, len, async) &
> + bind (C, name = "acc_update_self_async")
> + use iso_c_binding, only: c_size_t, c_int
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_size_t), value :: len
> + integer (c_int), value :: async
> + end subroutine
> end interface
> end module
>
> @@ -529,6 +727,8 @@ module openacc
> public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create
> public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete
> public :: acc_update_device, acc_update_self, acc_is_present
> + public :: acc_copyin_async, acc_create_async, acc_copyout_async
> + public :: acc_delete_async, acc_update_device_async, acc_update_self_async
>
> integer, parameter :: openacc_version = 201306
>
> @@ -694,6 +894,42 @@ module openacc
> ! acc_memcpy_to_device: Only available in C/C++
> ! acc_memcpy_from_device: Only available in C/C++
>
> + interface acc_copyin_async
> + procedure :: acc_copyin_async_32_h
> + procedure :: acc_copyin_async_64_h
> + procedure :: acc_copyin_async_array_h
> + end interface
> +
> + interface acc_create_async
> + procedure :: acc_create_async_32_h
> + procedure :: acc_create_async_64_h
> + procedure :: acc_create_async_array_h
> + end interface
> +
> + interface acc_copyout_async
> + procedure :: acc_copyout_async_32_h
> + procedure :: acc_copyout_async_64_h
> + procedure :: acc_copyout_async_array_h
> + end interface
> +
> + interface acc_delete_async
> + procedure :: acc_delete_async_32_h
> + procedure :: acc_delete_async_64_h
> + procedure :: acc_delete_async_array_h
> + end interface
> +
> + interface acc_update_device_async
> + procedure :: acc_update_device_async_32_h
> + procedure :: acc_update_device_async_64_h
> + procedure :: acc_update_device_async_array_h
> + end interface
> +
> + interface acc_update_self_async
> + procedure :: acc_update_self_async_32_h
> + procedure :: acc_update_self_async_64_h
> + procedure :: acc_update_self_async_array_h
> + end interface
> +
> end module
>
> function acc_get_num_devices_h (d)
> @@ -1078,3 +1314,189 @@ function acc_is_present_array_h (a)
> type (*), dimension (..), contiguous :: a
> acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1
> end function
> +
> +subroutine acc_copyin_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t, c_size_t, c_int
> + use openacc_internal, only: acc_copyin_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + call acc_copyin_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_copyin_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t, c_size_t, c_int
> + use openacc_internal, only: acc_copyin_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + call acc_copyin_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_copyin_async_array_h (a, async)
> + use iso_c_binding, only: c_int
> + use openacc_internal, only: acc_copyin_async_l
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + call acc_copyin_async_l (a, sizeof (a), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_create_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t, c_size_t, c_int
> + use openacc_internal, only: acc_create_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + call acc_create_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_create_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t, c_size_t, c_int
> + use openacc_internal, only: acc_create_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + call acc_create_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_create_async_array_h (a, async)
> + use iso_c_binding, only: c_int
> + use openacc_internal, only: acc_create_async_l
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + call acc_create_async_l (a, sizeof (a), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_copyout_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t, c_size_t, c_int
> + use openacc_internal, only: acc_copyout_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + call acc_copyout_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_copyout_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t, c_size_t, c_int
> + use openacc_internal, only: acc_copyout_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + call acc_copyout_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_copyout_async_array_h (a, async)
> + use iso_c_binding, only: c_int
> + use openacc_internal, only: acc_copyout_async_l
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + call acc_copyout_async_l (a, sizeof (a), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_delete_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t, c_size_t, c_int
> + use openacc_internal, only: acc_delete_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + call acc_delete_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_delete_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t, c_size_t, c_int
> + use openacc_internal, only: acc_delete_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + call acc_delete_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_delete_async_array_h (a, async)
> + use iso_c_binding, only: c_int
> + use openacc_internal, only: acc_delete_async_l
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + call acc_delete_async_l (a, sizeof (a), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_update_device_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t, c_size_t, c_int
> + use openacc_internal, only: acc_update_device_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + call acc_update_device_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_update_device_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t, c_size_t, c_int
> + use openacc_internal, only: acc_update_device_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + call acc_update_device_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_update_device_async_array_h (a, async)
> + use iso_c_binding, only: c_int
> + use openacc_internal, only: acc_update_device_async_l
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + call acc_update_device_async_l (a, sizeof (a), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_update_self_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t, c_size_t, c_int
> + use openacc_internal, only: acc_update_self_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + call acc_update_self_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_update_self_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t, c_size_t, c_int
> + use openacc_internal, only: acc_update_self_async_l
> + use openacc_kinds, only: acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + call acc_update_self_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
> +end subroutine
> +
> +subroutine acc_update_self_async_array_h (a, async)
> + use iso_c_binding, only: c_int
> + use openacc_internal, only: acc_update_self_async_l
> + use openacc_kinds, only: acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async
> + call acc_update_self_async_l (a, sizeof (a), int (async, kind = c_int))
> +end subroutine
> Index: libgomp/openacc.h
> ===================================================================
> --- libgomp/openacc.h (revision 264192)
> +++ libgomp/openacc.h (working copy)
> @@ -115,6 +115,16 @@ void acc_copyout_finalize_async (void *, size_t, i
> void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
> void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
>
> +/* Async functions, specified in OpenACC 2.5. */
> +void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
> +void acc_create_async (void *, size_t, int) __GOACC_NOTHROW;
> +void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW;
> +void acc_delete_async (void *, size_t, int) __GOACC_NOTHROW;
> +void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW;
> +void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW;
> +void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
> +void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
> +
> /* CUDA-specific routines. */
> void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
> void *acc_get_current_cuda_context (void) __GOACC_NOTHROW;
> Index: libgomp/openacc_lib.h
I don't see a test case for this. I believe that openacc_lib.h is used
by fixed-mode Fortran programs (those that end in a .f). Can you add a
fixed-mode version of lib-16.f90?
> ===================================================================
> --- libgomp/openacc_lib.h (revision 264192)
> +++ libgomp/openacc_lib.h (working copy)
> @@ -403,3 +403,159 @@
>
> ! acc_memcpy_to_device: Only available in C/C++
> ! acc_memcpy_from_device: Only available in C/C++
> +
> + interface acc_copyin_async
> + subroutine acc_copyin_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_copyin_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_copyin_async_array_h (a, async_)
> + import acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async_
> + end subroutine
> + end interface
> +
> + interface acc_create_async
> + subroutine acc_create_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_create_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_create_async_array_h (a, async_)
> + import acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async_
> + end subroutine
> + end interface
> +
> + interface acc_copyout_async
> + subroutine acc_copyout_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_copyout_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_copyout_async_array_h (a, async_)
> + import acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async_
> + end subroutine
> + end interface
> +
> + interface acc_delete_async
> + subroutine acc_delete_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_delete_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_delete_async_array_h (a, async_)
> + import acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async_
> + end subroutine
> + end interface
> +
> + interface acc_update_device_async
> + subroutine acc_update_device_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_update_device_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_update_device_async_array_h (a, async_)
> + import acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async_
> + end subroutine
> + end interface
> +
> + interface acc_update_self_async
> + subroutine acc_update_self_async_32_h (a, len, async)
> + use iso_c_binding, only: c_int32_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int32_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_update_self_async_64_h (a, len, async)
> + use iso_c_binding, only: c_int64_t
> + import acc_handle_kind
> + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
> + type (*), dimension (*) :: a
> + integer (c_int64_t) len
> + integer (acc_handle_kind) async
> + end subroutine
> +
> + subroutine acc_update_self_async_array_h (a, async_)
> + import acc_handle_kind
> + type (*), dimension (..), contiguous :: a
> + integer (acc_handle_kind) async_
> + end subroutine
> + end interface
> Index: libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c
> ===================================================================
> --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c (nonexistent)
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c (working copy)
> @@ -0,0 +1,42 @@
> +/* { dg-do run } */
> +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
> +
> +#include <string.h>
> +#include <stdlib.h>
> +#include <openacc.h>
> +
> +int
> +main (int argc, char **argv)
> +{
> + const int N = 256;
> + int i;
> + int async = 8;
> + unsigned char *h;
> +
> + h = (unsigned char *) malloc (N);
> +
> + for (i = 0; i < N; i++)
> + {
> + h[i] = i;
> + }
> +
> + acc_copyin_async (h, N, async);
> +
> + memset (h, 0, N);
> +
> + acc_wait (async);
> +
> + acc_copyout_async (h, N, async + 1);
> +
> + acc_wait (async + 1);
> +
> + for (i = 0; i < N; i++)
> + {
> + if (h[i] != i)
> + abort ();
> + }
> +
> + free (h);
> +
> + return 0;
> +}
> Index: libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c
> ===================================================================
> --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c (nonexistent)
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c (working copy)
> @@ -0,0 +1,45 @@
> +/* { dg-do run } */
> +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
> +
> +#include <string.h>
> +#include <stdlib.h>
> +#include <openacc.h>
> +
> +int
> +main (int argc, char **argv)
> +{
> + const int N = 256;
> + int i, q = 5;
> + unsigned char *h, *g;
> + void *d;
> +
> + h = (unsigned char *) malloc (N);
> + g = (unsigned char *) malloc (N);
> + for (i = 0; i < N; i++)
> + {
> + g[i] = i;
> + }
> +
> + acc_create_async (h, N, q);
> +
> + acc_memcpy_to_device_async (acc_deviceptr (h), g, N, q);
> + memset (&h[0], 0, N);
> +
> + acc_wait (q);
> +
> + acc_update_self_async (h, N, q + 1);
> + acc_delete_async (h, N, q + 1);
> +
> + acc_wait (q + 1);
> +
> + for (i = 0; i < N; i++)
> + {
> + if (h[i] != i)
> + abort ();
> + }
> +
> + free (h);
> + free (g);
> +
> + return 0;
> +}
> Index: libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90
> ===================================================================
> --- libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 (nonexistent)
> +++ libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 (working copy)
> @@ -0,0 +1,57 @@
> +! { dg-do run }
> +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
> +
> +program main
> + use openacc
> + implicit none
> +
> + integer, parameter :: N = 256
> + integer, allocatable :: h(:)
> + integer :: i
> + integer :: async = 5
> +
> + allocate (h(N))
> +
> + do i = 1, N
> + h(i) = i
> + end do
> +
> + call acc_copyin (h)
> +
> + do i = 1, N
> + h(i) = i + i
> + end do
> +
> + call acc_update_device_async (h, sizeof (h), async)
> +
> + if (acc_is_present (h) .neqv. .TRUE.) call abort
> +
> + h(:) = 0
> +
> + call acc_copyout_async (h, sizeof (h), async)
> +
> + call acc_wait (async)
> +
> + do i = 1, N
> + if (h(i) /= i + i) call abort
> + end do
> +
> + call acc_copyin (h, sizeof (h))
> +
> + h(:) = 0
> +
> + call acc_update_self_async (h, sizeof (h), async)
> +
> + if (acc_is_present (h) .neqv. .TRUE.) call abort
> +
> + do i = 1, N
> + if (h(i) /= i + i) call abort
> + end do
> +
> + call acc_delete_async (h, async)
> +
> + call acc_wait (async)
> +
> + if (acc_is_present (h) .neqv. .FALSE.) call abort
> +
> +end program
>
While I can't approve this patch, it seems reasonable to me. I like how
you cleaned up things from OG8 (e.g., replacing return (n ? 1 : 0) with
return n != NULL'). Are there any other OG8 async patches in your queue?
Thanks,
Cesar