[PATCH] OpenACC reference count overhaul

Thomas Schwinge thomas@codesourcery.com
Mon Dec 9 14:44:00 GMT 2019


Hi Julian!

On 2019-10-03T09:35:04-0700, Julian Brown <julian@codesourcery.com> wrote:
> --- a/libgomp/oacc-mem.c
> +++ b/libgomp/oacc-mem.c

> @@ -715,48 +684,34 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)

>        if (f & FLAG_COPYOUT)
> [...]
>  	  gomp_copy_dev2host (acc_dev, aq, h, d, s);
>  	}
> -      gomp_remove_var (acc_dev, n);
> +      gomp_remove_var_async (acc_dev, n, aq);

Conceptually, I understand correctly that we need to use this (new)
'gomp_remove_var_async' to make sure that we don't
'gomp_free_device_memory' while the 'gomp_copy_dev2host' cited above is
still in process?

I'm curious why this isn't causing any problems for nvptx offloading
already, any thoughts on that?  Or, is this just missing test coverage?
(Always difficult for 'async' stuff, of course.)  By chance, is this
right now already causing problems with AMD GCN offloading?  (I really
need to set up AMD GCN offloading testing...)


I'm citing below the changes introducing 'gomp_remove_var_async',
modelled similar to the existing 'gomp_unmap_vars_async'.


Also for both these, do I understand correctly, that it's actually not
the 'gomp_unref_tgt' that needs to be "delayed" via 'goacc_asyncqueue',
but rather really only the 'gomp_free_device_memory', called via
'gomp_unmap_tgt', called via 'gomp_unref_tgt'?  In other words: why do we
need to keep the 'struct target_mem_desc' alive?  Per my understanding,
that one is one component of the mapping table, and not relevant anymore
(thus can be 'free'd) as soon as it has been determined that
'tgt->refcount == 0'?  Am I missing something there?

It will be OK to clean that up later, but I'd like to understand this
now.  Well, or, stating that you just blindly copied that from the
existing 'gomp_unmap_vars_async' is fine, too!  ;-P


Grüße
 Thomas


> --- a/libgomp/target.c
> +++ b/libgomp/target.c

> @@ -1092,32 +1106,66 @@ gomp_unmap_tgt (struct target_mem_desc *tgt)
>    free (tgt);
>  }
>  
> -attribute_hidden bool
> -gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
> +static bool
> +gomp_unref_tgt (void *ptr)
>  {
>    bool is_tgt_unmapped = false;
> -  splay_tree_remove (&devicep->mem_map, k);
> -  if (k->link_key)
> -    splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
> -  if (k->tgt->refcount > 1)
> -    k->tgt->refcount--;
> +
> +  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
> +
> +  if (tgt->refcount > 1)
> +    tgt->refcount--;
>    else
>      {
> +      gomp_unmap_tgt (tgt);
>        is_tgt_unmapped = true;
> -      gomp_unmap_tgt (k->tgt);
>      }
> +
>    return is_tgt_unmapped;
>  }
>  
>  static void
> -gomp_unref_tgt (void *ptr)
> +gomp_unref_tgt_void (void *ptr)
>  {
> -  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
> +  (void) gomp_unref_tgt (ptr);
> +}
>  
> -  if (tgt->refcount > 1)
> -    tgt->refcount--;
> +static inline __attribute__((always_inline)) bool
> +gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
> +			  struct goacc_asyncqueue *aq)
> +{
> +  bool is_tgt_unmapped = false;
> +  splay_tree_remove (&devicep->mem_map, k);
> +  if (k->virtual_refcount == VREFCOUNT_LINK_KEY)
> +    {
> +      if (k->u.link_key)
> +	splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->u.link_key);
> +    }
> +  if (aq)
> +    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
> +						(void *) k->tgt);
>    else
> -    gomp_unmap_tgt (tgt);
> +    is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
> +  return is_tgt_unmapped;
> +}
> +
> +attribute_hidden bool
> +gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
> +{
> +  return gomp_remove_var_internal (devicep, k, NULL);
> +}
> +
> +/* Remove a variable asynchronously.  This actually removes the variable
> +   mapping immediately, but retains the linked target_mem_desc until the
> +   asynchronous operation has completed (as it may still refer to target
> +   memory).  The device lock must be held before entry, and remains locked on
> +   exit.  */
> +
> +attribute_hidden void
> +gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
> +		       struct goacc_asyncqueue *aq)
> +{
> +  (void) gomp_remove_var_internal (devicep, k, aq);
>  }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 658 bytes
Desc: not available
URL: <http://gcc.gnu.org/pipermail/gcc-patches/attachments/20191209/eb6be058/attachment.sig>


More information about the Gcc-patches mailing list