[PATCH 2/7] [OpenACC] Adjust dynamic reference count semantics

Thomas Schwinge thomas@codesourcery.com
Wed Jun 3 15:19:47 GMT 2020


Hi Julian!

On 2020-06-03T14:36:14+0200, I wrote:
> On 2020-05-22T15:16:05-0700, Julian Brown <julian@codesourcery.com> wrote:
>> This patch adjusts the semantics of dynamic reference counts, as described
>> in the parent email.
>
> Thanks!
>
> A few questions, but no need to send an updated patch.
>
>> --- a/libgomp/oacc-mem.c
>> +++ b/libgomp/oacc-mem.c
>
>> @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
>>  {
>>    for (size_t i = 0; i < mapnum; i++)
>>      {
>> -      int group_last = find_group_last (i, mapnum, sizes, kinds);
>> +      splay_tree_key n;
>> +      size_t group_last = find_group_last (i, mapnum, sizes, kinds);
>> +      bool struct_p = false;
>> +      size_t size, groupnum = (group_last - i) + 1;
>>
>> -      gomp_map_vars_async (acc_dev, aq,
>> -                       (group_last - i) + 1,
>> -                       &hostaddrs[i], NULL,
>> -                       &sizes[i], &kinds[i], true,
>> -                       GOMP_MAP_VARS_OPENACC_ENTER_DATA);
>> +      switch (kinds[i] & 0xff)
>> +    {
>> +    case GOMP_MAP_STRUCT:
>> +      {
>> +        int last = i + sizes[i];
>
> The 'last' calculated here must always equal the 'group_last' calculated
> above.  ;-) (... so we might just use 'group_last' instead of 'last' in
> the following.)
>
>> +        size = (uintptr_t) hostaddrs[last] + sizes[last]
>> +               - (uintptr_t) hostaddrs[i];
>> +        struct_p = true;
>> +      }
>> +      break;
>> +
>> +    case GOMP_MAP_ATTACH:
>> +      size = sizeof (void *);
>> +      break;
>> +
>> +    default:
>> +      size = sizes[i];
>> +    }
>> +
>> +      n = lookup_host (acc_dev, hostaddrs[i], size);
>> +
>
>> +      if (n && struct_p)
>> +    {
>> +      if (n->refcount != REFCOUNT_INFINITY)
>> +        n->refcount += groupnum - 1;
>> +      n->dynamic_refcount += groupnum - 1;
>> +      gomp_mutex_unlock (&acc_dev->lock);
>> +    }
>
> Is the 'GOMP_MAP_STRUCT' handling here specifically necessary, or is that
> just an optimization of the 'n && groupnum > 1' case below?

Eh, OK, I think I see where this is going; the 'n && groupnum > 1' case
below might not necessarily take care of the 'groupnum - 1' refcounts
that we're filing here?

>> +      else if (n && groupnum == 1)
>> +    {
>> +      void *h = hostaddrs[i];
>> +      size_t s = sizes[i];
>> +
>> +      /* A standalone attach clause.  */
>> +      if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
>> +        gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
>> +                             (uintptr_t) h, s, NULL);
>> +      else if (h + s > (void *) n->host_end)
>> +        {
>> +          gomp_mutex_unlock (&acc_dev->lock);
>> +          gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
>> +        }
>> +
>> +      assert (n->refcount != REFCOUNT_LINK);
>> +      if (n->refcount != REFCOUNT_INFINITY)
>> +        n->refcount++;
>> +      n->dynamic_refcount++;
>> +
>> +      gomp_mutex_unlock (&acc_dev->lock);
>> +    }
>
>> +      else if (n && groupnum > 1)
>> +    {
>> +      assert (n->refcount != REFCOUNT_INFINITY
>> +              && n->refcount != REFCOUNT_LINK);
>> +
>> +      bool processed = false;
>> +
>> +      struct target_mem_desc *tgt = n->tgt;
>> +      for (size_t j = 0; j < tgt->list_count; j++)
>> +        if (tgt->list[j].key == n)
>> +          {
>> +            for (size_t k = 0; k < groupnum; k++)
>> +              if (j + k < tgt->list_count && tgt->list[j + k].key)
>> +                {
>> +                  tgt->list[j + k].key->refcount++;
>> +                  tgt->list[j + k].key->dynamic_refcount++;
>> +                }
>> +            processed = true;
>> +          }
>> +
>> +      gomp_mutex_unlock (&acc_dev->lock);
>> +      if (!processed)
>> +        gomp_fatal ("dynamic refcount incrementing failed for "
>> +                    "pointer/pset");
>> +    }
>
> Please add some text to explain the nested 'j', 'k' loops and their 'if'
> conditionals, and the 'groupnum' usage in the 'k' loop boundary.  Should
> the 'k' loop maybe run 'for (size_t k = j; k < tgt->list_count; ++k)'
> (..., or is 'groupnum' relevant?), and in the loop body then use 'k'
> instead of 'j + k'?  (Maybe I've now confused myself, staring at this for
> a while...)

Audacious as I am sometimes, I did put a '__builtin_abort' right after
'tgt->list[j].key == n' -- and it doesn't trigger one single time for the
current libgomp test cases, meaning this is all dead code?  I'm confused.

>> +      else if (hostaddrs[i])
>> +    {
>> +      gomp_mutex_unlock (&acc_dev->lock);
>> +
>> +      struct target_mem_desc *tgt
>> +        = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
>> +                               &sizes[i], &kinds[i], true,
>> +                               GOMP_MAP_VARS_ENTER_DATA);
>> +      assert (tgt);
>> +      for (size_t j = 0; j < tgt->list_count; j++)
>> +        {
>> +          n = tgt->list[j].key;
>> +          if (n)
>> +            n->dynamic_refcount++;
>> +        }
>> +    }
>
> ... else nothing.  This latter "nothing" case (not present, and no
> 'hostaddrs[i]') is exercised by
> 'libgomp.oacc-fortran/optional-data-enter-exit.f90' (only).  Is that
> alright?
>
>>
>>        i = group_last;
>>      }
>
>
>> @@ -1137,45 +1241,40 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
>
> (Diff slightly edited.)
>
>>          if (n->refcount == 0)
>> -          gomp_remove_var_async (acc_dev, n, aq);
>
>> +          {
>> +            if (aq)
>> +              {
>> +                /* TODO The way the following code is currently
>> +                   implemented, we need the 'is_tgt_unmapped' return
>> +                   value from 'gomp_remove_var', so can't use
>> +                   'gomp_remove_var_async' here -- see the
>> +                   'gomp_unref_tgt' comment in
>> +                   <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
>> +                   PR92881 -- so have to synchronize here.  */
>> +                if (!acc_dev->openacc.async.synchronize_func (aq))
>> +                  {
>> +                    gomp_mutex_unlock (&acc_dev->lock);
>> +                    gomp_fatal ("synchronize failed");
>> +                  }
>> +              }
>
> As far as I understand, it's no longer true that "The way the following
> code is [...] implemented, we need the 'is_tgt_unmapped' return value
> from 'gomp_remove_var'".  In particular, we now can/should "use
> 'gomp_remove_var_async' here", and no longer "have to synchronize here"?
>
> Indeed I'm happy to see that the logic below no longer depends on
> 'is_tgt_unmapped' for its loop exit condition.  Instead of the above,
> this now can use the standard pattern:
>
>     if (aq)
>       /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
>          'gomp_unref_tgt' comment in
>          <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
>          PR92881.  */
>       gomp_remove_var_async (acc_dev, n, aq);
>     else
>       { [as follows] }
>
>> +            int num_mappings = 0;
>> +            /* If the target_mem_desc represents a single data mapping, we
>> +               can check that it is freed when this splay tree key's
>> +               refcount reaches zero.  Otherwise (e.g. for a struct
>> +               mapping with multiple members), fall back to skipping the
>> +               test.  */
>> +            for (int j = 0; j < n->tgt->list_count; j++)
>> +              if (n->tgt->list[j].key)
>> +                num_mappings++;
>> +            bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
>> +            assert (num_mappings > 1 || is_tgt_unmapped);
>> +          }
>>        }
>>        break;
>
> For reference, the old logic (mandating what was described in the comment
> above) was:
>
>     bool is_tgt_unmapped = false;
>     for (size_t i = 0; i < t->list_count; i++)
>      {
>        is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key);
>        if (is_tgt_unmapped)
>          break;
>      }
>     assert (is_tgt_unmapped);


Grüße
 Thomas
-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter


More information about the Gcc-patches mailing list