1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
45 #include "plugin-suffix.h"
48 typedef uintptr_t *hash_entry_type
;
49 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
50 static inline void htab_free (void *ptr
) { free (ptr
); }
53 static inline hashval_t
54 htab_hash (hash_entry_type element
)
56 return hash_pointer ((void *) element
);
60 htab_eq (hash_entry_type x
, hash_entry_type y
)
65 #define FIELD_TGT_EMPTY (~(size_t) 0)
67 static void gomp_target_init (void);
69 /* The whole initialization code for offloading plugins is only run one. */
70 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
72 /* Mutex for offload image registration. */
73 static gomp_mutex_t register_lock
;
75 /* This structure describes an offload image.
76 It contains type of the target device, pointer to host table descriptor, and
77 pointer to target data. */
78 struct offload_image_descr
{
80 enum offload_target_type type
;
81 const void *host_table
;
82 const void *target_data
;
85 /* Array of descriptors of offload images. */
86 static struct offload_image_descr
*offload_images
;
88 /* Total number of offload images. */
89 static int num_offload_images
;
91 /* Array of descriptors for all available devices. */
92 static struct gomp_device_descr
*devices
;
94 /* Total number of available devices. */
95 static int num_devices
;
97 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
98 static int num_devices_openmp
;
100 /* Mask of requires directive clause values, summarized from .gnu.gomp.requires
101 section. Offload plugins are queried with this mask to see if all required
102 features are supported. */
103 static unsigned int gomp_requires_mask
;
105 /* Start/end of .gnu.gomp.requires section of program, defined in
106 crtoffloadbegin/end.o. */
107 __attribute__((weak
))
108 extern const unsigned int __requires_mask_table
[];
109 __attribute__((weak
))
110 extern const unsigned int __requires_mask_table_end
[];
112 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
115 gomp_realloc_unlock (void *old
, size_t size
)
117 void *ret
= realloc (old
, size
);
120 gomp_mutex_unlock (®ister_lock
);
121 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
126 attribute_hidden
void
127 gomp_init_targets_once (void)
129 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
133 gomp_get_num_devices (void)
135 gomp_init_targets_once ();
136 return num_devices_openmp
;
139 static struct gomp_device_descr
*
140 resolve_device (int device_id
)
142 if (device_id
== GOMP_DEVICE_ICV
)
144 struct gomp_task_icv
*icv
= gomp_icv (false);
145 device_id
= icv
->default_device_var
;
148 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
150 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
151 && device_id
!= GOMP_DEVICE_HOST_FALLBACK
152 && device_id
!= num_devices_openmp
)
153 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
154 "but device not found");
159 gomp_mutex_lock (&devices
[device_id
].lock
);
160 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
161 gomp_init_device (&devices
[device_id
]);
162 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
164 gomp_mutex_unlock (&devices
[device_id
].lock
);
166 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
167 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
168 "but device is finalized");
172 gomp_mutex_unlock (&devices
[device_id
].lock
);
174 return &devices
[device_id
];
178 static inline splay_tree_key
179 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
181 if (key
->host_start
!= key
->host_end
)
182 return splay_tree_lookup (mem_map
, key
);
185 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
190 n
= splay_tree_lookup (mem_map
, key
);
194 return splay_tree_lookup (mem_map
, key
);
197 static inline splay_tree_key
198 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
200 if (key
->host_start
!= key
->host_end
)
201 return splay_tree_lookup (mem_map
, key
);
204 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
210 gomp_device_copy (struct gomp_device_descr
*devicep
,
211 bool (*copy_func
) (int, void *, const void *, size_t),
212 const char *dst
, void *dstaddr
,
213 const char *src
, const void *srcaddr
,
216 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
218 gomp_mutex_unlock (&devicep
->lock
);
219 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
220 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
225 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
226 bool (*copy_func
) (int, void *, const void *, size_t,
227 struct goacc_asyncqueue
*),
228 const char *dst
, void *dstaddr
,
229 const char *src
, const void *srcaddr
,
230 const void *srcaddr_orig
,
231 size_t size
, struct goacc_asyncqueue
*aq
)
233 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
235 gomp_mutex_unlock (&devicep
->lock
);
236 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
237 gomp_fatal ("Copying of %s object [%p..%p)"
238 " via buffer %s object [%p..%p)"
239 " to %s object [%p..%p) failed",
240 src
, srcaddr_orig
, srcaddr_orig
+ size
,
241 src
, srcaddr
, srcaddr
+ size
,
242 dst
, dstaddr
, dstaddr
+ size
);
244 gomp_fatal ("Copying of %s object [%p..%p)"
245 " to %s object [%p..%p) failed",
246 src
, srcaddr
, srcaddr
+ size
,
247 dst
, dstaddr
, dstaddr
+ size
);
251 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
252 host to device memory transfers. */
254 struct gomp_coalesce_chunk
256 /* The starting and ending point of a coalesced chunk of memory. */
260 struct gomp_coalesce_buf
262 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
263 it will be copied to the device. */
265 struct target_mem_desc
*tgt
;
266 /* Array with offsets, chunks[i].start is the starting offset and
267 chunks[i].end ending offset relative to tgt->tgt_start device address
268 of chunks which are to be copied to buf and later copied to device. */
269 struct gomp_coalesce_chunk
*chunks
;
270 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
273 /* During construction of chunks array, how many memory regions are within
274 the last chunk. If there is just one memory region for a chunk, we copy
275 it directly to device rather than going through buf. */
279 /* Maximum size of memory region considered for coalescing. Larger copies
280 are performed directly. */
281 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
283 /* Maximum size of a gap in between regions to consider them being copied
284 within the same chunk. All the device offsets considered are within
285 newly allocated device memory, so it isn't fatal if we copy some padding
286 in between from host to device. The gaps come either from alignment
287 padding or from memory regions which are not supposed to be copied from
288 host to device (e.g. map(alloc:), map(from:) etc.). */
289 #define MAX_COALESCE_BUF_GAP (4 * 1024)
291 /* Add region with device tgt_start relative offset and length to CBUF.
293 This must not be used for asynchronous copies, because the host data might
294 not be computed yet (by an earlier asynchronous compute region, for
296 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
297 is it more performant to use libgomp CBUF buffering or individual device
298 asyncronous copying?) */
301 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
303 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
307 if (cbuf
->chunk_cnt
< 0)
309 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
311 cbuf
->chunk_cnt
= -1;
314 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
316 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
320 /* If the last chunk is only used by one mapping, discard it,
321 as it will be one host to device copy anyway and
322 memcpying it around will only waste cycles. */
323 if (cbuf
->use_cnt
== 1)
326 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
327 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
332 /* Return true for mapping kinds which need to copy data from the
333 host to device for regions that weren't previously mapped. */
336 gomp_to_device_kind_p (int kind
)
342 case GOMP_MAP_FORCE_ALLOC
:
343 case GOMP_MAP_FORCE_FROM
:
344 case GOMP_MAP_ALWAYS_FROM
:
351 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
352 non-NULL), when the source data is stack or may otherwise be deallocated
353 before the asynchronous copy takes place, EPHEMERAL must be passed as
356 attribute_hidden
void
357 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
358 struct goacc_asyncqueue
*aq
,
359 void *d
, const void *h
, size_t sz
,
360 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
362 if (__builtin_expect (aq
!= NULL
, 0))
364 /* See 'gomp_coalesce_buf_add'. */
367 void *h_buf
= (void *) h
;
370 /* We're queueing up an asynchronous copy from data that may
371 disappear before the transfer takes place (i.e. because it is a
372 stack local in a function that is no longer executing). Make a
373 copy of the data into a temporary buffer in those cases. */
374 h_buf
= gomp_malloc (sz
);
375 memcpy (h_buf
, h
, sz
);
377 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
378 "dev", d
, "host", h_buf
, h
, sz
, aq
);
380 /* Free temporary buffer once the transfer has completed. */
381 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
388 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
389 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
392 long last
= cbuf
->chunk_cnt
- 1;
393 while (first
<= last
)
395 long middle
= (first
+ last
) >> 1;
396 if (cbuf
->chunks
[middle
].end
<= doff
)
398 else if (cbuf
->chunks
[middle
].start
<= doff
)
400 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
402 gomp_mutex_unlock (&devicep
->lock
);
403 gomp_fatal ("internal libgomp cbuf error");
405 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
415 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
418 attribute_hidden
void
419 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
420 struct goacc_asyncqueue
*aq
,
421 void *h
, const void *d
, size_t sz
)
423 if (__builtin_expect (aq
!= NULL
, 0))
424 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
425 "host", h
, "dev", d
, NULL
, sz
, aq
);
427 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
431 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
433 if (!devicep
->free_func (devicep
->target_id
, devptr
))
435 gomp_mutex_unlock (&devicep
->lock
);
436 gomp_fatal ("error in freeing device memory block at %p", devptr
);
440 /* Increment reference count of a splay_tree_key region K by 1.
441 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
442 increment the value if refcount is not yet contained in the set (used for
443 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
444 once for each construct). */
447 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
449 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
452 uintptr_t *refcount_ptr
= &k
->refcount
;
454 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
455 refcount_ptr
= &k
->structelem_refcount
;
456 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
457 refcount_ptr
= k
->structelem_refcount_ptr
;
461 if (htab_find (*refcount_set
, refcount_ptr
))
463 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
464 *slot
= refcount_ptr
;
471 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
472 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
473 track already seen refcounts, and only adjust the value if refcount is not
474 yet contained in the set (like gomp_increment_refcount).
476 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
477 it is already zero and we know we decremented it earlier. This signals that
478 associated maps should be copied back to host.
480 *DO_REMOVE is set to true when we this is the first handling of this refcount
481 and we are setting it to zero. This signals a removal of this key from the
484 Copy and removal are separated due to cases like handling of structure
485 elements, e.g. each map of a structure element representing a possible copy
486 out of a structure field has to be handled individually, but we only signal
487 removal for one (the first encountered) sibing map. */
490 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
491 bool *do_copy
, bool *do_remove
)
493 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
495 *do_copy
= *do_remove
= false;
499 uintptr_t *refcount_ptr
= &k
->refcount
;
501 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
502 refcount_ptr
= &k
->structelem_refcount
;
503 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
504 refcount_ptr
= k
->structelem_refcount_ptr
;
506 bool new_encountered_refcount
;
507 bool set_to_zero
= false;
508 bool is_zero
= false;
510 uintptr_t orig_refcount
= *refcount_ptr
;
514 if (htab_find (*refcount_set
, refcount_ptr
))
516 new_encountered_refcount
= false;
520 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
521 *slot
= refcount_ptr
;
522 new_encountered_refcount
= true;
525 /* If no refcount_set being used, assume all keys are being decremented
526 for the first time. */
527 new_encountered_refcount
= true;
531 else if (*refcount_ptr
> 0)
535 if (*refcount_ptr
== 0)
537 if (orig_refcount
> 0)
543 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
544 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
547 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
548 gomp_map_0len_lookup found oldn for newn.
549 Helper function of gomp_map_vars. */
552 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
553 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
554 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
555 unsigned char kind
, bool always_to_flag
, bool implicit
,
556 struct gomp_coalesce_buf
*cbuf
,
557 htab_t
*refcount_set
)
559 assert (kind
!= GOMP_MAP_ATTACH
560 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
563 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
564 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
565 tgt_var
->is_attach
= false;
566 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
568 /* For implicit maps, old contained in new is valid. */
569 bool implicit_subset
= (implicit
570 && newn
->host_start
<= oldn
->host_start
571 && oldn
->host_end
<= newn
->host_end
);
573 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
575 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
577 if ((kind
& GOMP_MAP_FLAG_FORCE
)
578 /* For implicit maps, old contained in new is valid. */
580 /* Otherwise, new contained inside old is considered valid. */
581 || (oldn
->host_start
<= newn
->host_start
582 && newn
->host_end
<= oldn
->host_end
)))
584 gomp_mutex_unlock (&devicep
->lock
);
585 gomp_fatal ("Trying to map into device [%p..%p) object when "
586 "[%p..%p) is already mapped",
587 (void *) newn
->host_start
, (void *) newn
->host_end
,
588 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
591 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
593 /* Implicit + always should not happen. If this does occur, below
594 address/length adjustment is a TODO. */
595 assert (!implicit_subset
);
597 if (oldn
->aux
&& oldn
->aux
->attach_count
)
599 /* We have to be careful not to overwrite still attached pointers
600 during the copyback to host. */
601 uintptr_t addr
= newn
->host_start
;
602 while (addr
< newn
->host_end
)
604 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
605 if (oldn
->aux
->attach_count
[i
] == 0)
606 gomp_copy_host2dev (devicep
, aq
,
607 (void *) (oldn
->tgt
->tgt_start
609 + addr
- oldn
->host_start
),
611 sizeof (void *), false, cbuf
);
612 addr
+= sizeof (void *);
616 gomp_copy_host2dev (devicep
, aq
,
617 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
618 + newn
->host_start
- oldn
->host_start
),
619 (void *) newn
->host_start
,
620 newn
->host_end
- newn
->host_start
, false, cbuf
);
623 gomp_increment_refcount (oldn
, refcount_set
);
627 get_kind (bool short_mapkind
, void *kinds
, int idx
)
630 return ((unsigned char *) kinds
)[idx
];
632 int val
= ((unsigned short *) kinds
)[idx
];
633 if (GOMP_MAP_IMPLICIT_P (val
))
634 val
&= ~GOMP_MAP_IMPLICIT
;
640 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
645 int val
= ((unsigned short *) kinds
)[idx
];
646 return GOMP_MAP_IMPLICIT_P (val
);
650 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
651 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
652 struct gomp_coalesce_buf
*cbuf
,
653 bool allow_zero_length_array_sections
)
655 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
656 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
657 struct splay_tree_key_s cur_node
;
659 cur_node
.host_start
= host_ptr
;
660 if (cur_node
.host_start
== (uintptr_t) NULL
)
662 cur_node
.tgt_offset
= (uintptr_t) NULL
;
663 gomp_copy_host2dev (devicep
, aq
,
664 (void *) (tgt
->tgt_start
+ target_offset
),
665 (void *) &cur_node
.tgt_offset
, sizeof (void *),
669 /* Add bias to the pointer value. */
670 cur_node
.host_start
+= bias
;
671 cur_node
.host_end
= cur_node
.host_start
;
672 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
675 if (allow_zero_length_array_sections
)
676 cur_node
.tgt_offset
= 0;
679 gomp_mutex_unlock (&devicep
->lock
);
680 gomp_fatal ("Pointer target of array section wasn't mapped");
685 cur_node
.host_start
-= n
->host_start
;
687 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
688 /* At this point tgt_offset is target address of the
689 array section. Now subtract bias to get what we want
690 to initialize the pointer with. */
691 cur_node
.tgt_offset
-= bias
;
693 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
694 (void *) &cur_node
.tgt_offset
, sizeof (void *),
699 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
700 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
701 size_t first
, size_t i
, void **hostaddrs
,
702 size_t *sizes
, void *kinds
,
703 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
705 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
706 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
707 struct splay_tree_key_s cur_node
;
710 const bool short_mapkind
= true;
711 const int typemask
= short_mapkind
? 0xff : 0x7;
713 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
714 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
715 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
716 kind
= get_kind (short_mapkind
, kinds
, i
);
717 implicit
= get_implicit (short_mapkind
, kinds
, i
);
720 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
722 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
723 kind
& typemask
, false, implicit
, cbuf
,
729 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
731 cur_node
.host_start
--;
732 n2
= splay_tree_lookup (mem_map
, &cur_node
);
733 cur_node
.host_start
++;
736 && n2
->host_start
- n
->host_start
737 == n2
->tgt_offset
- n
->tgt_offset
)
739 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
740 kind
& typemask
, false, implicit
, cbuf
,
746 n2
= splay_tree_lookup (mem_map
, &cur_node
);
750 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
752 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
753 kind
& typemask
, false, implicit
, cbuf
,
758 gomp_mutex_unlock (&devicep
->lock
);
759 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
760 "other mapped elements from the same structure weren't mapped "
761 "together with it", (void *) cur_node
.host_start
,
762 (void *) cur_node
.host_end
);
765 attribute_hidden
void
766 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
767 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
768 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
769 struct gomp_coalesce_buf
*cbufp
,
770 bool allow_zero_length_array_sections
)
772 struct splay_tree_key_s s
;
777 gomp_mutex_unlock (&devicep
->lock
);
778 gomp_fatal ("enclosing struct not mapped for attach");
781 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
782 /* We might have a pointer in a packed struct: however we cannot have more
783 than one such pointer in each pointer-sized portion of the struct, so
785 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
788 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
790 if (!n
->aux
->attach_count
)
792 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
794 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
795 n
->aux
->attach_count
[idx
]++;
798 gomp_mutex_unlock (&devicep
->lock
);
799 gomp_fatal ("attach count overflow");
802 if (n
->aux
->attach_count
[idx
] == 1)
804 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
806 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
810 if ((void *) target
== NULL
)
812 gomp_mutex_unlock (&devicep
->lock
);
813 gomp_fatal ("attempt to attach null pointer");
816 s
.host_start
= target
+ bias
;
817 s
.host_end
= s
.host_start
+ 1;
818 tn
= splay_tree_lookup (mem_map
, &s
);
822 if (allow_zero_length_array_sections
)
823 /* When allowing attachment to zero-length array sections, we
824 allow attaching to NULL pointers when the target region is not
829 gomp_mutex_unlock (&devicep
->lock
);
830 gomp_fatal ("pointer target not mapped for attach");
834 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
837 "%s: attaching host %p, target %p (struct base %p) to %p\n",
838 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
839 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
841 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
842 sizeof (void *), true, cbufp
);
845 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
846 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
849 attribute_hidden
void
850 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
851 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
852 uintptr_t detach_from
, bool finalize
,
853 struct gomp_coalesce_buf
*cbufp
)
859 gomp_mutex_unlock (&devicep
->lock
);
860 gomp_fatal ("enclosing struct not mapped for detach");
863 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
865 if (!n
->aux
|| !n
->aux
->attach_count
)
867 gomp_mutex_unlock (&devicep
->lock
);
868 gomp_fatal ("no attachment counters for struct");
872 n
->aux
->attach_count
[idx
] = 1;
874 if (n
->aux
->attach_count
[idx
] == 0)
876 gomp_mutex_unlock (&devicep
->lock
);
877 gomp_fatal ("attach count underflow");
880 n
->aux
->attach_count
[idx
]--;
882 if (n
->aux
->attach_count
[idx
] == 0)
884 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
886 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
889 "%s: detaching host %p, target %p (struct base %p) to %p\n",
890 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
891 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
894 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
895 sizeof (void *), true, cbufp
);
898 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
899 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
902 attribute_hidden
uintptr_t
903 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
905 if (tgt
->list
[i
].key
!= NULL
)
906 return tgt
->list
[i
].key
->tgt
->tgt_start
907 + tgt
->list
[i
].key
->tgt_offset
908 + tgt
->list
[i
].offset
;
910 switch (tgt
->list
[i
].offset
)
913 return (uintptr_t) hostaddrs
[i
];
919 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
920 + tgt
->list
[i
+ 1].key
->tgt_offset
921 + tgt
->list
[i
+ 1].offset
922 + (uintptr_t) hostaddrs
[i
]
923 - (uintptr_t) hostaddrs
[i
+ 1];
926 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
930 static inline __attribute__((always_inline
)) struct target_mem_desc
*
931 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
932 struct goacc_asyncqueue
*aq
, size_t mapnum
,
933 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
934 void *kinds
, struct goacc_ncarray_info
*nca_info
,
935 bool short_mapkind
, htab_t
*refcount_set
,
936 enum gomp_map_vars_kind pragma_kind
)
938 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
939 size_t nca_data_row_num
= (nca_info
? nca_info
->num_data_rows
: 0);
940 bool has_firstprivate
= false;
941 bool has_always_ptrset
= false;
942 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
943 const int rshift
= short_mapkind
? 8 : 3;
944 const int typemask
= short_mapkind
? 0xff : 0x7;
945 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
946 struct splay_tree_key_s cur_node
;
947 struct target_mem_desc
*tgt
948 = gomp_malloc (sizeof (*tgt
)
949 + sizeof (tgt
->list
[0]) * (mapnum
+ nca_data_row_num
));
950 tgt
->list_count
= mapnum
+ nca_data_row_num
;
951 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
952 tgt
->device_descr
= devicep
;
954 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
963 tgt_align
= sizeof (void *);
969 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
971 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
972 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
975 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
977 size_t align
= 4 * sizeof (void *);
979 tgt_size
= mapnum
* sizeof (void *);
981 cbuf
.use_cnt
= 1 + (mapnum
> 1);
982 cbuf
.chunks
[0].start
= 0;
983 cbuf
.chunks
[0].end
= tgt_size
;
986 gomp_mutex_lock (&devicep
->lock
);
987 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
989 gomp_mutex_unlock (&devicep
->lock
);
994 for (i
= 0; i
< mapnum
; i
++)
996 int kind
= get_kind (short_mapkind
, kinds
, i
);
997 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
998 if (hostaddrs
[i
] == NULL
999 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1001 tgt
->list
[i
].key
= NULL
;
1002 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1005 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1006 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1008 tgt
->list
[i
].key
= NULL
;
1011 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1012 on a separate construct prior to using use_device_{addr,ptr}.
1013 In OpenMP 5.0, map directives need to be ordered by the
1014 middle-end before the use_device_* clauses. If
1015 !not_found_cnt, all mappings requested (if any) are already
1016 mapped, so use_device_{addr,ptr} can be resolved right away.
1017 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1018 now but would succeed after performing the mappings in the
1019 following loop. We can't defer this always to the second
1020 loop, because it is not even invoked when !not_found_cnt
1021 after the first loop. */
1022 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1023 cur_node
.host_end
= cur_node
.host_start
;
1024 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1027 cur_node
.host_start
-= n
->host_start
;
1029 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1030 + cur_node
.host_start
);
1032 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1034 gomp_mutex_unlock (&devicep
->lock
);
1035 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1037 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1038 /* If not present, continue using the host address. */
1041 __builtin_unreachable ();
1042 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1045 tgt
->list
[i
].offset
= 0;
1048 else if (devicep
->is_usm_ptr_func
1049 && devicep
->is_usm_ptr_func (hostaddrs
[i
]))
1051 /* The memory is visible from both host and target
1052 so nothing needs to be moved. */
1053 tgt
->list
[i
].key
= NULL
;
1054 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1057 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1059 size_t first
= i
+ 1;
1060 size_t last
= i
+ sizes
[i
];
1061 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1062 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1064 tgt
->list
[i
].key
= NULL
;
1065 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1066 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1069 size_t align
= (size_t) 1 << (kind
>> rshift
);
1070 if (tgt_align
< align
)
1072 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1073 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1074 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1075 not_found_cnt
+= last
- i
;
1076 for (i
= first
; i
<= last
; i
++)
1078 tgt
->list
[i
].key
= NULL
;
1080 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1082 gomp_coalesce_buf_add (&cbuf
,
1083 tgt_size
- cur_node
.host_end
1084 + (uintptr_t) hostaddrs
[i
],
1090 for (i
= first
; i
<= last
; i
++)
1091 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1092 sizes
, kinds
, NULL
, refcount_set
);
1096 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1098 tgt
->list
[i
].key
= NULL
;
1099 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1100 has_firstprivate
= true;
1103 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1104 || ((kind
& typemask
)
1105 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1107 tgt
->list
[i
].key
= NULL
;
1108 has_firstprivate
= true;
1111 else if (GOMP_MAP_NONCONTIG_ARRAY_P (kind
& typemask
))
1113 /* Ignore non-contiguous arrays for now, we process them together
1115 tgt
->list
[i
].key
= NULL
;
1116 tgt
->list
[i
].offset
= 0;
1119 /* The map for the non-contiguous array itself is never copied from
1120 during unmapping, its the data rows that count. Set copy-from
1121 flags to false here. */
1122 tgt
->list
[i
].copy_from
= false;
1123 tgt
->list
[i
].always_copy_from
= false;
1124 tgt
->list
[i
].is_attach
= false;
1126 size_t align
= (size_t) 1 << (kind
>> rshift
);
1127 if (tgt_align
< align
)
1133 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1134 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1135 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1137 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1138 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1140 tgt
->list
[i
].key
= NULL
;
1142 size_t align
= (size_t) 1 << (kind
>> rshift
);
1143 if (tgt_align
< align
)
1145 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1147 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1148 cur_node
.host_end
- cur_node
.host_start
);
1149 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1150 has_firstprivate
= true;
1154 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1156 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1159 tgt
->list
[i
].key
= NULL
;
1160 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1165 n
= splay_tree_lookup (mem_map
, &cur_node
);
1166 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1168 int always_to_cnt
= 0;
1169 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1171 bool has_nullptr
= false;
1173 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1174 if (n
->tgt
->list
[j
].key
== n
)
1176 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1179 if (n
->tgt
->list_count
== 0)
1181 /* 'declare target'; assume has_nullptr; it could also be
1182 statically assigned pointer, but that it should be to
1183 the equivalent variable on the host. */
1184 assert (n
->refcount
== REFCOUNT_INFINITY
);
1188 assert (j
< n
->tgt
->list_count
);
1189 /* Re-map the data if there is an 'always' modifier or if it a
1190 null pointer was there and non a nonnull has been found; that
1191 permits transparent re-mapping for Fortran array descriptors
1192 which were previously mapped unallocated. */
1193 for (j
= i
+ 1; j
< mapnum
; j
++)
1195 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1196 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1198 || !GOMP_MAP_POINTER_P (ptr_kind
)
1199 || *(void **) hostaddrs
[j
] == NULL
))
1201 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1202 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1203 > cur_node
.host_end
))
1207 has_always_ptrset
= true;
1212 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1213 kind
& typemask
, always_to_cnt
> 0, implicit
,
1214 NULL
, refcount_set
);
1219 tgt
->list
[i
].key
= NULL
;
1221 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1223 /* Not present, hence, skip entry - including its MAP_POINTER,
1225 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1227 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1228 == GOMP_MAP_POINTER
))
1231 tgt
->list
[i
].key
= NULL
;
1232 tgt
->list
[i
].offset
= 0;
1236 size_t align
= (size_t) 1 << (kind
>> rshift
);
1238 if (tgt_align
< align
)
1240 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1242 && gomp_to_device_kind_p (kind
& typemask
))
1243 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1244 cur_node
.host_end
- cur_node
.host_start
);
1245 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1246 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1250 for (j
= i
+ 1; j
< mapnum
; j
++)
1251 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1252 kinds
, j
)) & typemask
))
1253 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1255 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1256 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1257 > cur_node
.host_end
))
1261 tgt
->list
[j
].key
= NULL
;
1268 /* For non-contiguous arrays. Each data row is one target item, separated
1269 from the normal map clause items, hence we order them after mapnum. */
1272 struct target_var_desc
*next_var_desc
= &tgt
->list
[mapnum
];
1273 for (i
= 0; i
< nca_info
->num_ncarray
; i
++)
1275 struct goacc_ncarray
*nca
= &nca_info
->ncarray
[i
];
1276 int kind
= get_kind (short_mapkind
, kinds
, nca
->map_index
);
1277 size_t align
= (size_t) 1 << (kind
>> rshift
);
1278 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1279 tgt_size
+= nca
->ptrblock_size
;
1281 for (size_t j
= 0; j
< nca
->data_row_num
; j
++)
1283 struct target_var_desc
*row_desc
= next_var_desc
++;
1284 void *row
= nca
->data_rows
[j
];
1285 cur_node
.host_start
= (uintptr_t) row
;
1286 cur_node
.host_end
= cur_node
.host_start
+ nca
->data_row_size
;
1287 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1290 assert (n
->refcount
!= REFCOUNT_LINK
);
1291 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, row_desc
,
1292 kind
& typemask
, false, false,
1293 /* TODO: cbuf? */ NULL
,
1298 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1299 tgt_size
+= nca
->data_row_size
;
1304 assert (next_var_desc
== &tgt
->list
[mapnum
+ nca_info
->num_data_rows
]);
1311 gomp_mutex_unlock (&devicep
->lock
);
1312 gomp_fatal ("unexpected aggregation");
1314 tgt
->to_free
= devaddrs
[0];
1315 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1316 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1318 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
1320 /* Allocate tgt_align aligned tgt_size block of memory. */
1321 /* FIXME: Perhaps change interface to allocate properly aligned
1323 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1324 tgt_size
+ tgt_align
- 1);
1327 gomp_mutex_unlock (&devicep
->lock
);
1328 gomp_fatal ("device memory allocation fail");
1331 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1332 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1333 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1335 if (cbuf
.use_cnt
== 1)
1337 if (cbuf
.chunk_cnt
> 0)
1340 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1350 tgt
->to_free
= NULL
;
1356 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1357 tgt_size
= mapnum
* sizeof (void *);
1360 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1363 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1364 splay_tree_node array
= tgt
->array
;
1365 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1366 uintptr_t field_tgt_base
= 0;
1367 splay_tree_key field_tgt_structelem_first
= NULL
;
1369 for (i
= 0; i
< mapnum
; i
++)
1370 if (has_always_ptrset
1372 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1373 == GOMP_MAP_TO_PSET
)
1375 splay_tree_key k
= tgt
->list
[i
].key
;
1376 bool has_nullptr
= false;
1378 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1379 if (k
->tgt
->list
[j
].key
== k
)
1381 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1384 if (k
->tgt
->list_count
== 0)
1387 assert (j
< k
->tgt
->list_count
);
1389 tgt
->list
[i
].has_null_ptr_assoc
= false;
1390 for (j
= i
+ 1; j
< mapnum
; j
++)
1392 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1393 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1395 || !GOMP_MAP_POINTER_P (ptr_kind
)
1396 || *(void **) hostaddrs
[j
] == NULL
))
1398 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1399 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1404 if (*(void **) hostaddrs
[j
] == NULL
)
1405 tgt
->list
[i
].has_null_ptr_assoc
= true;
1406 tgt
->list
[j
].key
= k
;
1407 tgt
->list
[j
].copy_from
= false;
1408 tgt
->list
[j
].always_copy_from
= false;
1409 tgt
->list
[j
].is_attach
= false;
1410 gomp_increment_refcount (k
, refcount_set
);
1411 gomp_map_pointer (k
->tgt
, aq
,
1412 (uintptr_t) *(void **) hostaddrs
[j
],
1413 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1415 sizes
[j
], cbufp
, false);
1420 else if (tgt
->list
[i
].key
== NULL
)
1422 int kind
= get_kind (short_mapkind
, kinds
, i
);
1423 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1424 if (hostaddrs
[i
] == NULL
)
1426 switch (kind
& typemask
)
1428 size_t align
, len
, first
, last
;
1430 case GOMP_MAP_FIRSTPRIVATE
:
1431 align
= (size_t) 1 << (kind
>> rshift
);
1432 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1433 tgt
->list
[i
].offset
= tgt_size
;
1435 gomp_copy_host2dev (devicep
, aq
,
1436 (void *) (tgt
->tgt_start
+ tgt_size
),
1437 (void *) hostaddrs
[i
], len
, false, cbufp
);
1438 /* Save device address in hostaddr to permit latter availablity
1439 when doing a deep-firstprivate with pointer attach. */
1440 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1443 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1444 firstprivate to hostaddrs[i+1], which is assumed to contain a
1448 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1450 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1451 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1452 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1453 sizeof (void *), false, cbufp
);
1457 case GOMP_MAP_FIRSTPRIVATE_INT
:
1458 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1460 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1461 /* The OpenACC 'host_data' construct only allows 'use_device'
1462 "mapping" clauses, so in the first loop, 'not_found_cnt'
1463 must always have been zero, so all OpenACC 'use_device'
1464 clauses have already been handled. (We can only easily test
1465 'use_device' with 'if_present' clause here.) */
1466 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1467 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1468 code conceptually simple, similar to the first loop. */
1469 case GOMP_MAP_USE_DEVICE_PTR
:
1470 if (tgt
->list
[i
].offset
== 0)
1472 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1473 cur_node
.host_end
= cur_node
.host_start
;
1474 n
= gomp_map_lookup (mem_map
, &cur_node
);
1477 cur_node
.host_start
-= n
->host_start
;
1479 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1480 + cur_node
.host_start
);
1482 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1484 gomp_mutex_unlock (&devicep
->lock
);
1485 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1487 else if ((kind
& typemask
)
1488 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1489 /* If not present, continue using the host address. */
1492 __builtin_unreachable ();
1493 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1496 case GOMP_MAP_STRUCT
:
1498 last
= i
+ sizes
[i
];
1499 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1500 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1502 if (tgt
->list
[first
].key
!= NULL
)
1504 n
= splay_tree_lookup (mem_map
, &cur_node
);
1507 size_t align
= (size_t) 1 << (kind
>> rshift
);
1508 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1509 - (uintptr_t) hostaddrs
[i
];
1510 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1511 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1512 - (uintptr_t) hostaddrs
[i
];
1513 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1514 field_tgt_offset
= tgt_size
;
1515 field_tgt_clear
= last
;
1516 field_tgt_structelem_first
= NULL
;
1517 tgt_size
+= cur_node
.host_end
1518 - (uintptr_t) hostaddrs
[first
];
1521 for (i
= first
; i
<= last
; i
++)
1522 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1523 sizes
, kinds
, cbufp
, refcount_set
);
1526 case GOMP_MAP_ALWAYS_POINTER
:
1527 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1528 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1529 n
= splay_tree_lookup (mem_map
, &cur_node
);
1531 || n
->host_start
> cur_node
.host_start
1532 || n
->host_end
< cur_node
.host_end
)
1534 gomp_mutex_unlock (&devicep
->lock
);
1535 gomp_fatal ("always pointer not mapped");
1537 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1538 != GOMP_MAP_ALWAYS_POINTER
)
1539 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1540 if (cur_node
.tgt_offset
)
1541 cur_node
.tgt_offset
-= sizes
[i
];
1542 gomp_copy_host2dev (devicep
, aq
,
1543 (void *) (n
->tgt
->tgt_start
1545 + cur_node
.host_start
1547 (void *) &cur_node
.tgt_offset
,
1548 sizeof (void *), true, cbufp
);
1549 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1550 + cur_node
.host_start
- n
->host_start
;
1552 case GOMP_MAP_IF_PRESENT
:
1553 /* Not present - otherwise handled above. Skip over its
1554 MAP_POINTER as well. */
1556 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1557 == GOMP_MAP_POINTER
))
1560 case GOMP_MAP_ATTACH
:
1561 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1563 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1564 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1565 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1568 tgt
->list
[i
].key
= n
;
1569 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1570 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1571 tgt
->list
[i
].copy_from
= false;
1572 tgt
->list
[i
].always_copy_from
= false;
1573 tgt
->list
[i
].is_attach
= true;
1574 /* OpenACC 'attach'/'detach' doesn't affect
1575 structured/dynamic reference counts ('n->refcount',
1576 'n->dynamic_refcount'). */
1579 = ((kind
& typemask
)
1580 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1581 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1582 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1585 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1587 gomp_mutex_unlock (&devicep
->lock
);
1588 gomp_fatal ("outer struct not mapped for attach");
1593 if (tgt
->list
[i
].offset
== OFFSET_INLINED
1599 if (GOMP_MAP_NONCONTIG_ARRAY_P (kind
& typemask
))
1601 tgt
->list
[i
].key
= &array
->key
;
1602 tgt
->list
[i
].key
->tgt
= tgt
;
1607 splay_tree_key k
= &array
->key
;
1608 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1609 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1610 k
->host_end
= k
->host_start
+ sizes
[i
];
1612 k
->host_end
= k
->host_start
+ sizeof (void *);
1613 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1614 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1615 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1616 kind
& typemask
, false, implicit
, cbufp
,
1621 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1623 /* Replace target address of the pointer with target address
1624 of mapped object in the splay tree. */
1625 splay_tree_remove (mem_map
, n
);
1627 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1628 k
->aux
->link_key
= n
;
1630 size_t align
= (size_t) 1 << (kind
>> rshift
);
1631 tgt
->list
[i
].key
= k
;
1634 k
->dynamic_refcount
= 0;
1635 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1637 k
->tgt_offset
= k
->host_start
- field_tgt_base
1641 k
->refcount
= REFCOUNT_STRUCTELEM
;
1642 if (field_tgt_structelem_first
== NULL
)
1644 /* Set to first structure element of sequence. */
1645 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1646 field_tgt_structelem_first
= k
;
1649 /* Point to refcount of leading element, but do not
1651 k
->structelem_refcount_ptr
1652 = &field_tgt_structelem_first
->structelem_refcount
;
1654 if (i
== field_tgt_clear
)
1656 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1657 field_tgt_structelem_first
= NULL
;
1660 if (i
== field_tgt_clear
)
1661 field_tgt_clear
= FIELD_TGT_EMPTY
;
1665 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1666 k
->tgt_offset
= tgt_size
;
1667 tgt_size
+= k
->host_end
- k
->host_start
;
1669 /* First increment, from 0 to 1. gomp_increment_refcount
1670 encapsulates the different increment cases, so use this
1671 instead of directly setting 1 during initialization. */
1672 gomp_increment_refcount (k
, refcount_set
);
1674 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1675 tgt
->list
[i
].always_copy_from
1676 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1677 tgt
->list
[i
].is_attach
= false;
1678 tgt
->list
[i
].offset
= 0;
1679 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1682 array
->right
= NULL
;
1683 splay_tree_insert (mem_map
, array
);
1684 switch (kind
& typemask
)
1686 case GOMP_MAP_ALLOC
:
1688 case GOMP_MAP_FORCE_ALLOC
:
1689 case GOMP_MAP_FORCE_FROM
:
1690 case GOMP_MAP_ALWAYS_FROM
:
1693 case GOMP_MAP_TOFROM
:
1694 case GOMP_MAP_FORCE_TO
:
1695 case GOMP_MAP_FORCE_TOFROM
:
1696 case GOMP_MAP_ALWAYS_TO
:
1697 case GOMP_MAP_ALWAYS_TOFROM
:
1698 gomp_copy_host2dev (devicep
, aq
,
1699 (void *) (tgt
->tgt_start
1701 (void *) k
->host_start
,
1702 k
->host_end
- k
->host_start
,
1705 case GOMP_MAP_POINTER
:
1706 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1708 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1709 k
->tgt_offset
, sizes
[i
], cbufp
,
1711 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1713 case GOMP_MAP_TO_PSET
:
1714 gomp_copy_host2dev (devicep
, aq
,
1715 (void *) (tgt
->tgt_start
1717 (void *) k
->host_start
,
1718 k
->host_end
- k
->host_start
,
1720 tgt
->list
[i
].has_null_ptr_assoc
= false;
1722 for (j
= i
+ 1; j
< mapnum
; j
++)
1724 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1726 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1727 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1729 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1730 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1735 tgt
->list
[j
].key
= k
;
1736 tgt
->list
[j
].copy_from
= false;
1737 tgt
->list
[j
].always_copy_from
= false;
1738 tgt
->list
[j
].is_attach
= false;
1739 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1740 /* For OpenMP, the use of refcount_sets causes
1741 errors if we set k->refcount = 1 above but also
1742 increment it again here, for decrementing will
1743 not properly match, since we decrement only once
1744 for each key's refcount. Therefore avoid this
1745 increment for OpenMP constructs. */
1747 gomp_increment_refcount (k
, refcount_set
);
1748 gomp_map_pointer (tgt
, aq
,
1749 (uintptr_t) *(void **) hostaddrs
[j
],
1751 + ((uintptr_t) hostaddrs
[j
]
1753 sizes
[j
], cbufp
, false);
1758 case GOMP_MAP_FORCE_PRESENT
:
1760 /* We already looked up the memory region above and it
1762 size_t size
= k
->host_end
- k
->host_start
;
1763 gomp_mutex_unlock (&devicep
->lock
);
1764 #ifdef HAVE_INTTYPES_H
1765 gomp_fatal ("present clause: !acc_is_present (%p, "
1766 "%"PRIu64
" (0x%"PRIx64
"))",
1767 (void *) k
->host_start
,
1768 (uint64_t) size
, (uint64_t) size
);
1770 gomp_fatal ("present clause: !acc_is_present (%p, "
1771 "%lu (0x%lx))", (void *) k
->host_start
,
1772 (unsigned long) size
, (unsigned long) size
);
1776 case GOMP_MAP_FORCE_DEVICEPTR
:
1777 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1778 gomp_copy_host2dev (devicep
, aq
,
1779 (void *) (tgt
->tgt_start
1781 (void *) k
->host_start
,
1782 sizeof (void *), false, cbufp
);
1785 gomp_mutex_unlock (&devicep
->lock
);
1786 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1790 if (k
->aux
&& k
->aux
->link_key
)
1792 /* Set link pointer on target to the device address of the
1794 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1795 /* We intentionally do not use coalescing here, as it's not
1796 data allocated by the current call to this function. */
1797 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1798 &tgt_addr
, sizeof (void *), true, NULL
);
1804 /* Processing of non-contiguous array rows. */
1807 struct target_var_desc
*next_var_desc
= &tgt
->list
[mapnum
];
1808 for (i
= 0; i
< nca_info
->num_ncarray
; i
++)
1810 struct goacc_ncarray
*nca
= &nca_info
->ncarray
[i
];
1811 int kind
= get_kind (short_mapkind
, kinds
, nca
->map_index
);
1812 size_t align
= (size_t) 1 << (kind
>> rshift
);
1813 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1815 assert (nca
->ptr
== hostaddrs
[nca
->map_index
]);
1817 /* For the map of the non-contiguous array itself, adjust so that
1818 the passed device address points to the beginning of the
1819 ptrblock. Remember to adjust the first-dimension's bias here. */
1820 tgt
->list
[nca
->map_index
].key
->tgt_offset
1821 = tgt_size
- nca
->descr
->dims
[0].base
;
1823 void *target_ptrblock
= (void*) tgt
->tgt_start
+ tgt_size
;
1824 tgt_size
+= nca
->ptrblock_size
;
1826 /* Add splay key for each data row in current non-contiguous
1828 for (size_t j
= 0; j
< nca
->data_row_num
; j
++)
1830 struct target_var_desc
*row_desc
= next_var_desc
++;
1831 void *row
= nca
->data_rows
[j
];
1832 cur_node
.host_start
= (uintptr_t) row
;
1833 cur_node
.host_end
= cur_node
.host_start
+ nca
->data_row_size
;
1834 splay_tree_key k
= splay_tree_lookup (mem_map
, &cur_node
);
1837 assert (k
->refcount
!= REFCOUNT_LINK
);
1838 gomp_map_vars_existing (devicep
, aq
, k
, &cur_node
, row_desc
,
1839 kind
& typemask
, false, false,
1840 cbufp
, refcount_set
);
1845 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1848 k
->host_start
= (uintptr_t) row
;
1849 k
->host_end
= k
->host_start
+ nca
->data_row_size
;
1853 k
->dynamic_refcount
= 0;
1855 k
->tgt_offset
= tgt_size
;
1857 tgt_size
+= nca
->data_row_size
;
1861 = GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1862 row_desc
->always_copy_from
1863 = GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1864 row_desc
->is_attach
= false;
1865 row_desc
->offset
= 0;
1866 row_desc
->length
= nca
->data_row_size
;
1869 array
->right
= NULL
;
1870 splay_tree_insert (mem_map
, array
);
1872 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1873 gomp_copy_host2dev (devicep
, aq
,
1874 (void *) tgt
->tgt_start
+ k
->tgt_offset
,
1875 (void *) k
->host_start
,
1876 nca
->data_row_size
, false,
1880 nca
->tgt_data_rows
[j
]
1881 = (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
);
1884 /* Now we have the target memory allocated, and target offsets of all
1885 row blocks assigned and calculated, we can construct the
1886 accelerator side ptrblock and copy it in. */
1887 if (nca
->ptrblock_size
)
1889 void *ptrblock
= goacc_noncontig_array_create_ptrblock
1890 (nca
, target_ptrblock
);
1891 gomp_copy_host2dev (devicep
, aq
, target_ptrblock
, ptrblock
,
1892 nca
->ptrblock_size
, false, cbufp
);
1899 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1901 for (i
= 0; i
< mapnum
; i
++)
1903 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1904 gomp_copy_host2dev (devicep
, aq
,
1905 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1906 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1913 /* See 'gomp_coalesce_buf_add'. */
1917 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1918 gomp_copy_host2dev (devicep
, aq
,
1919 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1920 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1921 - cbuf
.chunks
[0].start
),
1922 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1929 /* If the variable from "omp target enter data" map-list was already mapped,
1930 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1932 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1938 gomp_mutex_unlock (&devicep
->lock
);
1942 attribute_hidden
struct target_mem_desc
*
1943 gomp_map_vars_openacc (struct gomp_device_descr
*devicep
,
1944 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1945 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
1948 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, NULL
,
1949 sizes
, (void *) kinds
,
1950 (struct goacc_ncarray_info
*) nca_info
,
1951 true, NULL
, GOMP_MAP_VARS_OPENACC
);
1954 static struct target_mem_desc
*
1955 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1956 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1957 bool short_mapkind
, htab_t
*refcount_set
,
1958 enum gomp_map_vars_kind pragma_kind
)
1960 /* This management of a local refcount_set is for convenience of callers
1961 who do not share a refcount_set over multiple map/unmap uses. */
1962 htab_t local_refcount_set
= NULL
;
1963 if (refcount_set
== NULL
)
1965 local_refcount_set
= htab_create (mapnum
);
1966 refcount_set
= &local_refcount_set
;
1969 struct target_mem_desc
*tgt
;
1970 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1971 sizes
, kinds
, NULL
, short_mapkind
,
1972 refcount_set
, pragma_kind
);
1973 if (local_refcount_set
)
1974 htab_free (local_refcount_set
);
1979 attribute_hidden
struct target_mem_desc
*
1980 goacc_map_vars (struct gomp_device_descr
*devicep
,
1981 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1982 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1983 void *kinds
, bool short_mapkind
,
1984 enum gomp_map_vars_kind pragma_kind
)
1986 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1987 sizes
, kinds
, NULL
, short_mapkind
, NULL
,
1988 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1992 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1994 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1996 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
2003 gomp_unref_tgt (void *ptr
)
2005 bool is_tgt_unmapped
= false;
2007 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
2009 if (tgt
->refcount
> 1)
2013 gomp_unmap_tgt (tgt
);
2014 is_tgt_unmapped
= true;
2017 return is_tgt_unmapped
;
2021 gomp_unref_tgt_void (void *ptr
)
2023 (void) gomp_unref_tgt (ptr
);
2027 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
2029 splay_tree_remove (sp
, k
);
2032 if (k
->aux
->link_key
)
2033 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
2034 if (k
->aux
->attach_count
)
2035 free (k
->aux
->attach_count
);
2041 static inline __attribute__((always_inline
)) bool
2042 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
2043 struct goacc_asyncqueue
*aq
)
2045 bool is_tgt_unmapped
= false;
2047 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
2049 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
2050 /* Infer the splay_tree_key of the first structelem key using the
2051 pointer to the first structleme_refcount. */
2052 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
2053 - offsetof (struct splay_tree_key_s
,
2054 structelem_refcount
));
2055 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
2057 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
2058 with the splay_tree_keys embedded inside. */
2059 splay_tree_node node
=
2060 (splay_tree_node
) ((char *) k
2061 - offsetof (struct splay_tree_node_s
, key
));
2064 /* Starting from the _FIRST key, and continue for all following
2066 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
2067 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
2074 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
2077 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2080 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
2081 return is_tgt_unmapped
;
2084 attribute_hidden
bool
2085 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
2087 return gomp_remove_var_internal (devicep
, k
, NULL
);
2090 /* Remove a variable asynchronously. This actually removes the variable
2091 mapping immediately, but retains the linked target_mem_desc until the
2092 asynchronous operation has completed (as it may still refer to target
2093 memory). The device lock must be held before entry, and remains locked on
2096 attribute_hidden
void
2097 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
2098 struct goacc_asyncqueue
*aq
)
2100 (void) gomp_remove_var_internal (devicep
, k
, aq
);
2103 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2104 variables back from device to host: if it is false, it is assumed that this
2105 has been done already. */
2107 static inline __attribute__((always_inline
)) void
2108 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2109 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
2111 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
2113 if (tgt
->list_count
== 0)
2119 gomp_mutex_lock (&devicep
->lock
);
2120 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2122 gomp_mutex_unlock (&devicep
->lock
);
2130 /* We must perform detachments before any copies back to the host. */
2131 for (i
= 0; i
< tgt
->list_count
; i
++)
2133 splay_tree_key k
= tgt
->list
[i
].key
;
2135 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
2136 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
2137 + tgt
->list
[i
].offset
,
2141 for (i
= 0; i
< tgt
->list_count
; i
++)
2143 splay_tree_key k
= tgt
->list
[i
].key
;
2147 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2148 counts ('n->refcount', 'n->dynamic_refcount'). */
2149 if (tgt
->list
[i
].is_attach
)
2152 bool do_copy
, do_remove
;
2153 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
2155 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
2156 || tgt
->list
[i
].always_copy_from
)
2157 gomp_copy_dev2host (devicep
, aq
,
2158 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
2159 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2160 + tgt
->list
[i
].offset
),
2161 tgt
->list
[i
].length
);
2164 struct target_mem_desc
*k_tgt
= k
->tgt
;
2165 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
2166 /* It would be bad if TGT got unmapped while we're still iterating
2167 over its LIST_COUNT, and also expect to use it in the following
2169 assert (!is_tgt_unmapped
2175 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2178 gomp_unref_tgt ((void *) tgt
);
2180 gomp_mutex_unlock (&devicep
->lock
);
2184 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2185 htab_t
*refcount_set
)
2187 /* This management of a local refcount_set is for convenience of callers
2188 who do not share a refcount_set over multiple map/unmap uses. */
2189 htab_t local_refcount_set
= NULL
;
2190 if (refcount_set
== NULL
)
2192 local_refcount_set
= htab_create (tgt
->list_count
);
2193 refcount_set
= &local_refcount_set
;
2196 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2198 if (local_refcount_set
)
2199 htab_free (local_refcount_set
);
2202 attribute_hidden
void
2203 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2204 struct goacc_asyncqueue
*aq
)
2206 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2210 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2211 size_t *sizes
, void *kinds
, bool short_mapkind
)
2214 struct splay_tree_key_s cur_node
;
2215 const int typemask
= short_mapkind
? 0xff : 0x7;
2223 gomp_mutex_lock (&devicep
->lock
);
2224 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2226 gomp_mutex_unlock (&devicep
->lock
);
2230 for (i
= 0; i
< mapnum
; i
++)
2233 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2234 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2235 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2238 int kind
= get_kind (short_mapkind
, kinds
, i
);
2239 if (n
->host_start
> cur_node
.host_start
2240 || n
->host_end
< cur_node
.host_end
)
2242 gomp_mutex_unlock (&devicep
->lock
);
2243 gomp_fatal ("Trying to update [%p..%p) object when "
2244 "only [%p..%p) is mapped",
2245 (void *) cur_node
.host_start
,
2246 (void *) cur_node
.host_end
,
2247 (void *) n
->host_start
,
2248 (void *) n
->host_end
);
2251 if (n
->aux
&& n
->aux
->attach_count
)
2253 uintptr_t addr
= cur_node
.host_start
;
2254 while (addr
< cur_node
.host_end
)
2256 /* We have to be careful not to overwrite still attached
2257 pointers during host<->device updates. */
2258 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2259 if (n
->aux
->attach_count
[i
] == 0)
2261 void *devaddr
= (void *) (n
->tgt
->tgt_start
2263 + addr
- n
->host_start
);
2264 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2265 gomp_copy_host2dev (devicep
, NULL
,
2266 devaddr
, (void *) addr
,
2267 sizeof (void *), false, NULL
);
2268 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2269 gomp_copy_dev2host (devicep
, NULL
,
2270 (void *) addr
, devaddr
,
2273 addr
+= sizeof (void *);
2278 void *hostaddr
= (void *) cur_node
.host_start
;
2279 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2280 + cur_node
.host_start
2282 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2284 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2285 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2287 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2288 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2292 gomp_mutex_unlock (&devicep
->lock
);
2295 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2296 And insert to splay tree the mapping between addresses from HOST_TABLE and
2297 from loaded target image. We rely in the host and device compiler
2298 emitting variable and functions in the same order. */
2301 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2302 const void *host_table
, const void *target_data
,
2303 bool is_register_lock
)
2305 void **host_func_table
= ((void ***) host_table
)[0];
2306 void **host_funcs_end
= ((void ***) host_table
)[1];
2307 void **host_var_table
= ((void ***) host_table
)[2];
2308 void **host_vars_end
= ((void ***) host_table
)[3];
2310 /* The func table contains only addresses, the var table contains addresses
2311 and corresponding sizes. */
2312 int num_funcs
= host_funcs_end
- host_func_table
;
2313 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2315 /* Others currently is only 'device_num' */
2318 /* Load image to device and get target addresses for the image. */
2319 struct addr_pair
*target_table
= NULL
;
2320 int i
, num_target_entries
;
2323 = devicep
->load_image_func (devicep
->target_id
, version
,
2324 target_data
, &target_table
);
2326 if (num_target_entries
!= num_funcs
+ num_vars
2327 /* Others (device_num) are included as trailing entries in pair list. */
2328 && num_target_entries
!= num_funcs
+ num_vars
+ num_others
)
2330 gomp_mutex_unlock (&devicep
->lock
);
2331 if (is_register_lock
)
2332 gomp_mutex_unlock (®ister_lock
);
2333 gomp_fatal ("Cannot map target functions or variables"
2334 " (expected %u, have %u)", num_funcs
+ num_vars
,
2335 num_target_entries
);
2338 /* Insert host-target address mapping into splay tree. */
2339 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2340 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
2341 tgt
->refcount
= REFCOUNT_INFINITY
;
2344 tgt
->to_free
= NULL
;
2346 tgt
->list_count
= 0;
2347 tgt
->device_descr
= devicep
;
2348 splay_tree_node array
= tgt
->array
;
2350 for (i
= 0; i
< num_funcs
; i
++)
2352 splay_tree_key k
= &array
->key
;
2353 k
->host_start
= (uintptr_t) host_func_table
[i
];
2354 k
->host_end
= k
->host_start
+ 1;
2356 k
->tgt_offset
= target_table
[i
].start
;
2357 k
->refcount
= REFCOUNT_INFINITY
;
2358 k
->dynamic_refcount
= 0;
2361 array
->right
= NULL
;
2362 splay_tree_insert (&devicep
->mem_map
, array
);
2366 /* Most significant bit of the size in host and target tables marks
2367 "omp declare target link" variables. */
2368 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2369 const uintptr_t size_mask
= ~link_bit
;
2371 for (i
= 0; i
< num_vars
; i
++)
2373 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2374 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2375 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2377 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2379 gomp_mutex_unlock (&devicep
->lock
);
2380 if (is_register_lock
)
2381 gomp_mutex_unlock (®ister_lock
);
2382 gomp_fatal ("Cannot map target variables (size mismatch)");
2385 splay_tree_key k
= &array
->key
;
2386 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2388 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2390 k
->tgt_offset
= target_var
->start
;
2391 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2392 k
->dynamic_refcount
= 0;
2395 array
->right
= NULL
;
2396 splay_tree_insert (&devicep
->mem_map
, array
);
2400 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2401 where plugin does not return this entry. */
2402 if (num_funcs
+ num_vars
< num_target_entries
)
2404 struct addr_pair
*device_num_var
= &target_table
[num_funcs
+ num_vars
];
2405 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2406 was found in this image. */
2407 if (device_num_var
->start
!= 0)
2409 /* The index of the devicep within devices[] is regarded as its
2410 'device number', which is different from the per-device type
2411 devicep->target_id. */
2412 int device_num_val
= (int) (devicep
- &devices
[0]);
2413 if (device_num_var
->end
- device_num_var
->start
!= sizeof (int))
2415 gomp_mutex_unlock (&devicep
->lock
);
2416 if (is_register_lock
)
2417 gomp_mutex_unlock (®ister_lock
);
2418 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2422 /* Copy device_num value to place on device memory, hereby actually
2423 designating its device number into effect. */
2424 gomp_copy_host2dev (devicep
, NULL
, (void *) device_num_var
->start
,
2425 &device_num_val
, sizeof (int), false, NULL
);
2429 free (target_table
);
2432 /* Unload the mappings described by target_data from device DEVICE_P.
2433 The device must be locked. */
2436 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2438 const void *host_table
, const void *target_data
)
2440 void **host_func_table
= ((void ***) host_table
)[0];
2441 void **host_funcs_end
= ((void ***) host_table
)[1];
2442 void **host_var_table
= ((void ***) host_table
)[2];
2443 void **host_vars_end
= ((void ***) host_table
)[3];
2445 /* The func table contains only addresses, the var table contains addresses
2446 and corresponding sizes. */
2447 int num_funcs
= host_funcs_end
- host_func_table
;
2448 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2450 struct splay_tree_key_s k
;
2451 splay_tree_key node
= NULL
;
2453 /* Find mapping at start of node array */
2454 if (num_funcs
|| num_vars
)
2456 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2457 : (uintptr_t) host_var_table
[0]);
2458 k
.host_end
= k
.host_start
+ 1;
2459 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2462 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2464 gomp_mutex_unlock (&devicep
->lock
);
2465 gomp_fatal ("image unload fail");
2468 /* Remove mappings from splay tree. */
2470 for (i
= 0; i
< num_funcs
; i
++)
2472 k
.host_start
= (uintptr_t) host_func_table
[i
];
2473 k
.host_end
= k
.host_start
+ 1;
2474 splay_tree_remove (&devicep
->mem_map
, &k
);
2477 /* Most significant bit of the size in host and target tables marks
2478 "omp declare target link" variables. */
2479 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2480 const uintptr_t size_mask
= ~link_bit
;
2481 bool is_tgt_unmapped
= false;
2483 for (i
= 0; i
< num_vars
; i
++)
2485 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2487 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2489 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2490 splay_tree_remove (&devicep
->mem_map
, &k
);
2493 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2494 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2498 if (node
&& !is_tgt_unmapped
)
2505 /* This function should be called from every offload image while loading.
2506 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2507 the target, and TARGET_DATA needed by target plugin. */
2510 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2511 int target_type
, const void *target_data
)
2515 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2516 gomp_fatal ("Library too old for offload (version %u < %u)",
2517 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2519 gomp_mutex_lock (®ister_lock
);
2521 /* Load image to all initialized devices. */
2522 for (i
= 0; i
< num_devices
; i
++)
2524 struct gomp_device_descr
*devicep
= &devices
[i
];
2525 gomp_mutex_lock (&devicep
->lock
);
2526 if (devicep
->type
== target_type
2527 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2528 gomp_load_image_to_device (devicep
, version
,
2529 host_table
, target_data
, true);
2530 gomp_mutex_unlock (&devicep
->lock
);
2533 /* Insert image to array of pending images. */
2535 = gomp_realloc_unlock (offload_images
,
2536 (num_offload_images
+ 1)
2537 * sizeof (struct offload_image_descr
));
2538 offload_images
[num_offload_images
].version
= version
;
2539 offload_images
[num_offload_images
].type
= target_type
;
2540 offload_images
[num_offload_images
].host_table
= host_table
;
2541 offload_images
[num_offload_images
].target_data
= target_data
;
2543 num_offload_images
++;
2544 gomp_mutex_unlock (®ister_lock
);
2548 GOMP_offload_register (const void *host_table
, int target_type
,
2549 const void *target_data
)
2551 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2554 /* This function should be called from every offload image while unloading.
2555 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2556 the target, and TARGET_DATA needed by target plugin. */
2559 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2560 int target_type
, const void *target_data
)
2564 gomp_mutex_lock (®ister_lock
);
2566 /* Unload image from all initialized devices. */
2567 for (i
= 0; i
< num_devices
; i
++)
2569 struct gomp_device_descr
*devicep
= &devices
[i
];
2570 gomp_mutex_lock (&devicep
->lock
);
2571 if (devicep
->type
== target_type
2572 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2573 gomp_unload_image_from_device (devicep
, version
,
2574 host_table
, target_data
);
2575 gomp_mutex_unlock (&devicep
->lock
);
2578 /* Remove image from array of pending images. */
2579 for (i
= 0; i
< num_offload_images
; i
++)
2580 if (offload_images
[i
].target_data
== target_data
)
2582 offload_images
[i
] = offload_images
[--num_offload_images
];
2586 gomp_mutex_unlock (®ister_lock
);
2590 GOMP_offload_unregister (const void *host_table
, int target_type
,
2591 const void *target_data
)
2593 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2596 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2597 must be locked on entry, and remains locked on return. */
2599 attribute_hidden
void
2600 gomp_init_device (struct gomp_device_descr
*devicep
)
2603 if (!devicep
->init_device_func (devicep
->target_id
))
2605 gomp_mutex_unlock (&devicep
->lock
);
2606 gomp_fatal ("device initialization failed");
2609 unsigned int features
= gomp_requires_mask
;
2610 if (!devicep
->supported_features_func (&features
))
2612 char buf
[64], *end
= buf
+ sizeof (buf
), *p
= buf
;
2613 if (features
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2614 p
+= snprintf (p
, end
- p
, "unified_address");
2615 if (features
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2616 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2617 (p
== buf
? "" : ", "));
2618 if (features
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2619 p
+= snprintf (p
, end
- p
, "%sreverse_offload", (p
== buf
? "" : ", "));
2620 gomp_error ("device does not support required features: %s", buf
);
2623 /* Load to device all images registered by the moment. */
2624 for (i
= 0; i
< num_offload_images
; i
++)
2626 struct offload_image_descr
*image
= &offload_images
[i
];
2627 if (image
->type
== devicep
->type
)
2628 gomp_load_image_to_device (devicep
, image
->version
,
2629 image
->host_table
, image
->target_data
,
2633 /* Initialize OpenACC asynchronous queues. */
2634 goacc_init_asyncqueues (devicep
);
2636 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2639 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2640 must be locked on entry, and remains locked on return. */
2642 attribute_hidden
bool
2643 gomp_fini_device (struct gomp_device_descr
*devicep
)
2645 bool ret
= goacc_fini_asyncqueues (devicep
);
2646 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2647 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2651 attribute_hidden
void
2652 gomp_unload_device (struct gomp_device_descr
*devicep
)
2654 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2658 /* Unload from device all images registered at the moment. */
2659 for (i
= 0; i
< num_offload_images
; i
++)
2661 struct offload_image_descr
*image
= &offload_images
[i
];
2662 if (image
->type
== devicep
->type
)
2663 gomp_unload_image_from_device (devicep
, image
->version
,
2665 image
->target_data
);
2670 /* Host fallback for GOMP_target{,_ext} routines. */
2673 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2674 struct gomp_device_descr
*devicep
, void **args
)
2676 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2678 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2680 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2681 "be used for offloading");
2684 memset (thr
, '\0', sizeof (*thr
));
2685 if (gomp_places_list
)
2687 thr
->place
= old_thr
.place
;
2688 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2693 intptr_t id
= (intptr_t) *args
++, val
;
2694 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2695 val
= (intptr_t) *args
++;
2697 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2698 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2700 id
&= GOMP_TARGET_ARG_ID_MASK
;
2701 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2703 val
= val
> INT_MAX
? INT_MAX
: val
;
2705 gomp_icv (true)->thread_limit_var
= val
;
2710 gomp_free_thread (thr
);
2714 /* Calculate alignment and size requirements of a private copy of data shared
2715 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2718 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2719 unsigned short *kinds
, size_t *tgt_align
,
2723 for (i
= 0; i
< mapnum
; i
++)
2724 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2726 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2727 if (*tgt_align
< align
)
2729 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2730 *tgt_size
+= sizes
[i
];
2734 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2737 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2738 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2741 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2743 tgt
+= tgt_align
- al
;
2746 for (i
= 0; i
< mapnum
; i
++)
2747 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2749 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2750 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2751 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2752 hostaddrs
[i
] = tgt
+ tgt_size
;
2753 tgt_size
= tgt_size
+ sizes
[i
];
2754 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2756 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2762 /* Helper function of GOMP_target{,_ext} routines. */
2765 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2766 void (*host_fn
) (void *))
2768 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2769 return (void *) host_fn
;
2772 gomp_mutex_lock (&devicep
->lock
);
2773 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2775 gomp_mutex_unlock (&devicep
->lock
);
2779 struct splay_tree_key_s k
;
2780 k
.host_start
= (uintptr_t) host_fn
;
2781 k
.host_end
= k
.host_start
+ 1;
2782 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2783 gomp_mutex_unlock (&devicep
->lock
);
2787 return (void *) tgt_fn
->tgt_offset
;
2791 /* Called when encountering a target directive. If DEVICE
2792 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2793 GOMP_DEVICE_HOST_FALLBACK (or any value
2794 larger than last available hw device), use host fallback.
2795 FN is address of host code, UNUSED is part of the current ABI, but
2796 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2797 with MAPNUM entries, with addresses of the host objects,
2798 sizes of the host objects (resp. for pointer kind pointer bias
2799 and assumed sizeof (void *) size) and kinds. */
2802 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2803 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2804 unsigned char *kinds
)
2806 struct gomp_device_descr
*devicep
= resolve_device (device
);
2810 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2811 /* All shared memory devices should use the GOMP_target_ext function. */
2812 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2813 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2814 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2816 htab_t refcount_set
= htab_create (mapnum
);
2817 struct target_mem_desc
*tgt_vars
2818 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2819 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2820 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2822 htab_clear (refcount_set
);
2823 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2824 htab_free (refcount_set
);
2827 static inline unsigned int
2828 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2830 /* If we cannot run asynchronously, simply ignore nowait. */
2831 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2832 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2837 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2838 and several arguments have been added:
2839 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2840 DEPEND is array of dependencies, see GOMP_task for details.
2842 ARGS is a pointer to an array consisting of a variable number of both
2843 device-independent and device-specific arguments, which can take one two
2844 elements where the first specifies for which device it is intended, the type
2845 and optionally also the value. If the value is not present in the first
2846 one, the whole second element the actual value. The last element of the
2847 array is a single NULL. Among the device independent can be for example
2848 NUM_TEAMS and THREAD_LIMIT.
2850 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2851 that value, or 1 if teams construct is not present, or 0, if
2852 teams construct does not have num_teams clause and so the choice is
2853 implementation defined, and -1 if it can't be determined on the host
2854 what value will GOMP_teams have on the device.
2855 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2856 body with that value, or 0, if teams construct does not have thread_limit
2857 clause or the teams construct is not present, or -1 if it can't be
2858 determined on the host what value will GOMP_teams have on the device. */
2861 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2862 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2863 unsigned int flags
, void **depend
, void **args
)
2865 struct gomp_device_descr
*devicep
= resolve_device (device
);
2866 size_t tgt_align
= 0, tgt_size
= 0;
2867 bool fpc_done
= false;
2869 flags
= clear_unsupported_flags (devicep
, flags
);
2871 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2873 struct gomp_thread
*thr
= gomp_thread ();
2874 /* Create a team if we don't have any around, as nowait
2875 target tasks make sense to run asynchronously even when
2876 outside of any parallel. */
2877 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2879 struct gomp_team
*team
= gomp_new_team (1);
2880 struct gomp_task
*task
= thr
->task
;
2881 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2882 team
->prev_ts
= thr
->ts
;
2883 thr
->ts
.team
= team
;
2884 thr
->ts
.team_id
= 0;
2885 thr
->ts
.work_share
= &team
->work_shares
[0];
2886 thr
->ts
.last_work_share
= NULL
;
2887 #ifdef HAVE_SYNC_BUILTINS
2888 thr
->ts
.single_count
= 0;
2890 thr
->ts
.static_trip
= 0;
2891 thr
->task
= &team
->implicit_task
[0];
2892 gomp_init_task (thr
->task
, NULL
, icv
);
2898 thr
->task
= &team
->implicit_task
[0];
2901 pthread_setspecific (gomp_thread_destructor
, thr
);
2904 && !thr
->task
->final_task
)
2906 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2907 sizes
, kinds
, flags
, depend
, args
,
2908 GOMP_TARGET_TASK_BEFORE_MAP
);
2913 /* If there are depend clauses, but nowait is not present
2914 (or we are in a final task), block the parent task until the
2915 dependencies are resolved and then just continue with the rest
2916 of the function as if it is a merged task. */
2919 struct gomp_thread
*thr
= gomp_thread ();
2920 if (thr
->task
&& thr
->task
->depend_hash
)
2922 /* If we might need to wait, copy firstprivate now. */
2923 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2924 &tgt_align
, &tgt_size
);
2927 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2928 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2929 tgt_align
, tgt_size
);
2932 gomp_task_maybe_wait_for_dependencies (depend
);
2938 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2939 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2940 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2944 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2945 &tgt_align
, &tgt_size
);
2948 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2949 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2950 tgt_align
, tgt_size
);
2953 gomp_target_fallback (fn
, hostaddrs
, devicep
, args
);
2957 struct target_mem_desc
*tgt_vars
;
2958 htab_t refcount_set
= NULL
;
2960 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2964 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2965 &tgt_align
, &tgt_size
);
2968 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2969 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2970 tgt_align
, tgt_size
);
2977 refcount_set
= htab_create (mapnum
);
2978 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2979 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
2981 devicep
->run_func (devicep
->target_id
, fn_addr
,
2982 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2986 htab_clear (refcount_set
);
2987 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2990 htab_free (refcount_set
);
2993 /* Host fallback for GOMP_target_data{,_ext} routines. */
2996 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
2998 struct gomp_task_icv
*icv
= gomp_icv (false);
3000 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3002 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3003 "be used for offloading");
3005 if (icv
->target_data
)
3007 /* Even when doing a host fallback, if there are any active
3008 #pragma omp target data constructs, need to remember the
3009 new #pragma omp target data, otherwise GOMP_target_end_data
3010 would get out of sync. */
3011 struct target_mem_desc
*tgt
3012 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
3013 NULL
, GOMP_MAP_VARS_DATA
);
3014 tgt
->prev
= icv
->target_data
;
3015 icv
->target_data
= tgt
;
3020 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
3021 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3023 struct gomp_device_descr
*devicep
= resolve_device (device
);
3026 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3027 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3028 return gomp_target_data_fallback (devicep
);
3030 struct target_mem_desc
*tgt
3031 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3032 NULL
, GOMP_MAP_VARS_DATA
);
3033 struct gomp_task_icv
*icv
= gomp_icv (true);
3034 tgt
->prev
= icv
->target_data
;
3035 icv
->target_data
= tgt
;
3039 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
3040 size_t *sizes
, unsigned short *kinds
)
3042 struct gomp_device_descr
*devicep
= resolve_device (device
);
3045 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3046 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3047 return gomp_target_data_fallback (devicep
);
3049 struct target_mem_desc
*tgt
3050 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
3051 NULL
, GOMP_MAP_VARS_DATA
);
3052 struct gomp_task_icv
*icv
= gomp_icv (true);
3053 tgt
->prev
= icv
->target_data
;
3054 icv
->target_data
= tgt
;
3058 GOMP_target_end_data (void)
3060 struct gomp_task_icv
*icv
= gomp_icv (false);
3061 if (icv
->target_data
)
3063 struct target_mem_desc
*tgt
= icv
->target_data
;
3064 icv
->target_data
= tgt
->prev
;
3065 gomp_unmap_vars (tgt
, true, NULL
);
3070 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
3071 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3073 struct gomp_device_descr
*devicep
= resolve_device (device
);
3076 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3077 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3080 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
3084 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
3085 size_t *sizes
, unsigned short *kinds
,
3086 unsigned int flags
, void **depend
)
3088 struct gomp_device_descr
*devicep
= resolve_device (device
);
3090 /* If there are depend clauses, but nowait is not present,
3091 block the parent task until the dependencies are resolved
3092 and then just continue with the rest of the function as if it
3093 is a merged task. Until we are able to schedule task during
3094 variable mapping or unmapping, ignore nowait if depend clauses
3098 struct gomp_thread
*thr
= gomp_thread ();
3099 if (thr
->task
&& thr
->task
->depend_hash
)
3101 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3103 && !thr
->task
->final_task
)
3105 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3106 mapnum
, hostaddrs
, sizes
, kinds
,
3107 flags
| GOMP_TARGET_FLAG_UPDATE
,
3108 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
3113 struct gomp_team
*team
= thr
->ts
.team
;
3114 /* If parallel or taskgroup has been cancelled, don't start new
3116 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3118 if (gomp_team_barrier_cancelled (&team
->barrier
))
3120 if (thr
->task
->taskgroup
)
3122 if (thr
->task
->taskgroup
->cancelled
)
3124 if (thr
->task
->taskgroup
->workshare
3125 && thr
->task
->taskgroup
->prev
3126 && thr
->task
->taskgroup
->prev
->cancelled
)
3131 gomp_task_maybe_wait_for_dependencies (depend
);
3137 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3138 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3141 struct gomp_thread
*thr
= gomp_thread ();
3142 struct gomp_team
*team
= thr
->ts
.team
;
3143 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3144 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3146 if (gomp_team_barrier_cancelled (&team
->barrier
))
3148 if (thr
->task
->taskgroup
)
3150 if (thr
->task
->taskgroup
->cancelled
)
3152 if (thr
->task
->taskgroup
->workshare
3153 && thr
->task
->taskgroup
->prev
3154 && thr
->task
->taskgroup
->prev
->cancelled
)
3159 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
3163 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
3164 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3165 htab_t
*refcount_set
)
3167 const int typemask
= 0xff;
3169 gomp_mutex_lock (&devicep
->lock
);
3170 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
3172 gomp_mutex_unlock (&devicep
->lock
);
3176 for (i
= 0; i
< mapnum
; i
++)
3177 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
3179 struct splay_tree_key_s cur_node
;
3180 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3181 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
3182 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3185 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
3190 splay_tree_key remove_vars
[mapnum
];
3192 for (i
= 0; i
< mapnum
; i
++)
3194 struct splay_tree_key_s cur_node
;
3195 unsigned char kind
= kinds
[i
] & typemask
;
3199 case GOMP_MAP_ALWAYS_FROM
:
3200 case GOMP_MAP_DELETE
:
3201 case GOMP_MAP_RELEASE
:
3202 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3203 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3204 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3205 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
3206 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3207 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
3208 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
3209 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3213 bool delete_p
= (kind
== GOMP_MAP_DELETE
3214 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
3215 bool do_copy
, do_remove
;
3216 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
3219 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
3220 || kind
== GOMP_MAP_ALWAYS_FROM
)
3222 if (k
->aux
&& k
->aux
->attach_count
)
3224 /* We have to be careful not to overwrite still attached
3225 pointers during the copyback to host. */
3226 uintptr_t addr
= k
->host_start
;
3227 while (addr
< k
->host_end
)
3229 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
3230 if (k
->aux
->attach_count
[i
] == 0)
3231 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
3232 (void *) (k
->tgt
->tgt_start
3234 + addr
- k
->host_start
),
3236 addr
+= sizeof (void *);
3240 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
3241 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
3242 + cur_node
.host_start
3244 cur_node
.host_end
- cur_node
.host_start
);
3247 /* Structure elements lists are removed altogether at once, which
3248 may cause immediate deallocation of the target_mem_desc, causing
3249 errors if we still have following element siblings to copy back.
3250 While we're at it, it also seems more disciplined to simply
3251 queue all removals together for processing below.
3253 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3254 not have this problem, since they maintain an additional
3255 tgt->refcount = 1 reference to the target_mem_desc to start with.
3258 remove_vars
[nrmvars
++] = k
;
3261 case GOMP_MAP_DETACH
:
3264 gomp_mutex_unlock (&devicep
->lock
);
3265 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3270 for (int i
= 0; i
< nrmvars
; i
++)
3271 gomp_remove_var (devicep
, remove_vars
[i
]);
3273 gomp_mutex_unlock (&devicep
->lock
);
3277 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
3278 size_t *sizes
, unsigned short *kinds
,
3279 unsigned int flags
, void **depend
)
3281 struct gomp_device_descr
*devicep
= resolve_device (device
);
3283 /* If there are depend clauses, but nowait is not present,
3284 block the parent task until the dependencies are resolved
3285 and then just continue with the rest of the function as if it
3286 is a merged task. Until we are able to schedule task during
3287 variable mapping or unmapping, ignore nowait if depend clauses
3291 struct gomp_thread
*thr
= gomp_thread ();
3292 if (thr
->task
&& thr
->task
->depend_hash
)
3294 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3296 && !thr
->task
->final_task
)
3298 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3299 mapnum
, hostaddrs
, sizes
, kinds
,
3300 flags
, depend
, NULL
,
3301 GOMP_TARGET_TASK_DATA
))
3306 struct gomp_team
*team
= thr
->ts
.team
;
3307 /* If parallel or taskgroup has been cancelled, don't start new
3309 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3311 if (gomp_team_barrier_cancelled (&team
->barrier
))
3313 if (thr
->task
->taskgroup
)
3315 if (thr
->task
->taskgroup
->cancelled
)
3317 if (thr
->task
->taskgroup
->workshare
3318 && thr
->task
->taskgroup
->prev
3319 && thr
->task
->taskgroup
->prev
->cancelled
)
3324 gomp_task_maybe_wait_for_dependencies (depend
);
3330 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3331 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3334 struct gomp_thread
*thr
= gomp_thread ();
3335 struct gomp_team
*team
= thr
->ts
.team
;
3336 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3337 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3339 if (gomp_team_barrier_cancelled (&team
->barrier
))
3341 if (thr
->task
->taskgroup
)
3343 if (thr
->task
->taskgroup
->cancelled
)
3345 if (thr
->task
->taskgroup
->workshare
3346 && thr
->task
->taskgroup
->prev
3347 && thr
->task
->taskgroup
->prev
->cancelled
)
3352 htab_t refcount_set
= htab_create (mapnum
);
3354 /* The variables are mapped separately such that they can be released
3357 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
3358 for (i
= 0; i
< mapnum
; i
++)
3359 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
3361 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
3362 &kinds
[i
], true, &refcount_set
,
3363 GOMP_MAP_VARS_ENTER_DATA
);
3366 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
3368 for (j
= i
+ 1; j
< mapnum
; j
++)
3369 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
3370 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
3372 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
3373 &kinds
[i
], true, &refcount_set
,
3374 GOMP_MAP_VARS_ENTER_DATA
);
3377 else if (i
+ 1 < mapnum
&& (kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
3379 /* An attach operation must be processed together with the mapped
3380 base-pointer list item. */
3381 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
3382 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3386 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
3387 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3389 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
3390 htab_free (refcount_set
);
3394 gomp_target_task_fn (void *data
)
3396 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
3397 struct gomp_device_descr
*devicep
= ttask
->devicep
;
3399 if (ttask
->fn
!= NULL
)
3403 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3404 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
3405 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3407 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
3408 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
3413 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
3416 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
3420 void *actual_arguments
;
3421 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3424 actual_arguments
= ttask
->hostaddrs
;
3428 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
3429 NULL
, ttask
->sizes
, ttask
->kinds
, true,
3430 NULL
, GOMP_MAP_VARS_TARGET
);
3431 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
3433 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
3435 assert (devicep
->async_run_func
);
3436 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
3437 ttask
->args
, (void *) ttask
);
3440 else if (devicep
== NULL
3441 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3442 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3446 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
3447 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3448 ttask
->kinds
, true);
3451 htab_t refcount_set
= htab_create (ttask
->mapnum
);
3452 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
3453 for (i
= 0; i
< ttask
->mapnum
; i
++)
3454 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
3456 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
3457 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
3458 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3459 i
+= ttask
->sizes
[i
];
3462 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
3463 &ttask
->kinds
[i
], true, &refcount_set
,
3464 GOMP_MAP_VARS_ENTER_DATA
);
3466 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3467 ttask
->kinds
, &refcount_set
);
3468 htab_free (refcount_set
);
3474 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
3478 struct gomp_task_icv
*icv
= gomp_icv (true);
3479 icv
->thread_limit_var
3480 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
3486 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
3487 unsigned int thread_limit
, bool first
)
3489 struct gomp_thread
*thr
= gomp_thread ();
3494 struct gomp_task_icv
*icv
= gomp_icv (true);
3495 icv
->thread_limit_var
3496 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
3498 (void) num_teams_high
;
3499 if (num_teams_low
== 0)
3501 thr
->num_teams
= num_teams_low
- 1;
3504 else if (thr
->team_num
== thr
->num_teams
)
3512 omp_target_alloc (size_t size
, int device_num
)
3514 if (device_num
== gomp_get_num_devices ())
3515 return malloc (size
);
3520 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3521 if (devicep
== NULL
)
3524 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3525 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3526 return malloc (size
);
3528 gomp_mutex_lock (&devicep
->lock
);
3529 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
3530 gomp_mutex_unlock (&devicep
->lock
);
3535 omp_target_free (void *device_ptr
, int device_num
)
3537 if (device_ptr
== NULL
)
3540 if (device_num
== gomp_get_num_devices ())
3549 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3550 if (devicep
== NULL
)
3553 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3554 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3560 gomp_mutex_lock (&devicep
->lock
);
3561 gomp_free_device_memory (devicep
, device_ptr
);
3562 gomp_mutex_unlock (&devicep
->lock
);
3566 gomp_usm_alloc (size_t size
, int device_num
)
3568 if (device_num
== gomp_get_num_devices ())
3569 return malloc (size
);
3571 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3572 if (devicep
== NULL
)
3575 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3576 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3577 return malloc (size
);
3580 gomp_mutex_lock (&devicep
->lock
);
3581 if (devicep
->usm_alloc_func
)
3582 ret
= devicep
->usm_alloc_func (devicep
->target_id
, size
);
3583 gomp_mutex_unlock (&devicep
->lock
);
3588 gomp_usm_free (void *device_ptr
, int device_num
)
3590 if (device_ptr
== NULL
)
3593 if (device_num
== gomp_get_num_devices ())
3599 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3600 if (devicep
== NULL
)
3603 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3604 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3610 gomp_mutex_lock (&devicep
->lock
);
3611 if (devicep
->usm_free_func
3612 && !devicep
->usm_free_func (devicep
->target_id
, device_ptr
))
3614 gomp_mutex_unlock (&devicep
->lock
);
3615 gomp_fatal ("error in freeing device memory block at %p", device_ptr
);
3617 gomp_mutex_unlock (&devicep
->lock
);
3621 omp_target_is_present (const void *ptr
, int device_num
)
3626 if (device_num
== gomp_get_num_devices ())
3632 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3633 if (devicep
== NULL
)
3636 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3637 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3640 gomp_mutex_lock (&devicep
->lock
);
3641 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3642 struct splay_tree_key_s cur_node
;
3644 cur_node
.host_start
= (uintptr_t) ptr
;
3645 cur_node
.host_end
= cur_node
.host_start
;
3646 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
3647 int ret
= n
!= NULL
;
3648 gomp_mutex_unlock (&devicep
->lock
);
3653 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
3654 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
3657 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3660 if (dst_device_num
!= gomp_get_num_devices ())
3662 if (dst_device_num
< 0)
3665 dst_devicep
= resolve_device (dst_device_num
);
3666 if (dst_devicep
== NULL
)
3669 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3670 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3673 if (src_device_num
!= num_devices_openmp
)
3675 if (src_device_num
< 0)
3678 src_devicep
= resolve_device (src_device_num
);
3679 if (src_devicep
== NULL
)
3682 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3683 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3686 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
3688 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
3691 if (src_devicep
== NULL
)
3693 gomp_mutex_lock (&dst_devicep
->lock
);
3694 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3695 (char *) dst
+ dst_offset
,
3696 (char *) src
+ src_offset
, length
);
3697 gomp_mutex_unlock (&dst_devicep
->lock
);
3698 return (ret
? 0 : EINVAL
);
3700 if (dst_devicep
== NULL
)
3702 gomp_mutex_lock (&src_devicep
->lock
);
3703 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3704 (char *) dst
+ dst_offset
,
3705 (char *) src
+ src_offset
, length
);
3706 gomp_mutex_unlock (&src_devicep
->lock
);
3707 return (ret
? 0 : EINVAL
);
3709 if (src_devicep
== dst_devicep
)
3711 gomp_mutex_lock (&src_devicep
->lock
);
3712 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3713 (char *) dst
+ dst_offset
,
3714 (char *) src
+ src_offset
, length
);
3715 gomp_mutex_unlock (&src_devicep
->lock
);
3716 return (ret
? 0 : EINVAL
);
3722 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
3723 int num_dims
, const size_t *volume
,
3724 const size_t *dst_offsets
,
3725 const size_t *src_offsets
,
3726 const size_t *dst_dimensions
,
3727 const size_t *src_dimensions
,
3728 struct gomp_device_descr
*dst_devicep
,
3729 struct gomp_device_descr
*src_devicep
)
3731 size_t dst_slice
= element_size
;
3732 size_t src_slice
= element_size
;
3733 size_t j
, dst_off
, src_off
, length
;
3738 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
3739 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
3740 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
3742 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
3744 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
3748 else if (src_devicep
== NULL
)
3749 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3750 (char *) dst
+ dst_off
,
3751 (const char *) src
+ src_off
,
3753 else if (dst_devicep
== NULL
)
3754 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3755 (char *) dst
+ dst_off
,
3756 (const char *) src
+ src_off
,
3758 else if (src_devicep
== dst_devicep
)
3759 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3760 (char *) dst
+ dst_off
,
3761 (const char *) src
+ src_off
,
3765 return ret
? 0 : EINVAL
;
3768 /* FIXME: it would be nice to have some plugin function to handle
3769 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3770 be handled in the generic recursion below, and for host-host it
3771 should be used even for any num_dims >= 2. */
3773 for (i
= 1; i
< num_dims
; i
++)
3774 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
3775 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
3777 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
3778 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
3780 for (j
= 0; j
< volume
[0]; j
++)
3782 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
3783 (const char *) src
+ src_off
,
3784 element_size
, num_dims
- 1,
3785 volume
+ 1, dst_offsets
+ 1,
3786 src_offsets
+ 1, dst_dimensions
+ 1,
3787 src_dimensions
+ 1, dst_devicep
,
3791 dst_off
+= dst_slice
;
3792 src_off
+= src_slice
;
3798 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
3799 int num_dims
, const size_t *volume
,
3800 const size_t *dst_offsets
,
3801 const size_t *src_offsets
,
3802 const size_t *dst_dimensions
,
3803 const size_t *src_dimensions
,
3804 int dst_device_num
, int src_device_num
)
3806 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3811 if (dst_device_num
!= gomp_get_num_devices ())
3813 if (dst_device_num
< 0)
3816 dst_devicep
= resolve_device (dst_device_num
);
3817 if (dst_devicep
== NULL
)
3820 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3821 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3824 if (src_device_num
!= num_devices_openmp
)
3826 if (src_device_num
< 0)
3829 src_devicep
= resolve_device (src_device_num
);
3830 if (src_devicep
== NULL
)
3833 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3834 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3838 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
3842 gomp_mutex_lock (&src_devicep
->lock
);
3843 else if (dst_devicep
)
3844 gomp_mutex_lock (&dst_devicep
->lock
);
3845 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
3846 volume
, dst_offsets
, src_offsets
,
3847 dst_dimensions
, src_dimensions
,
3848 dst_devicep
, src_devicep
);
3850 gomp_mutex_unlock (&src_devicep
->lock
);
3851 else if (dst_devicep
)
3852 gomp_mutex_unlock (&dst_devicep
->lock
);
3857 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
3858 size_t size
, size_t device_offset
, int device_num
)
3860 if (device_num
== gomp_get_num_devices ())
3866 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3867 if (devicep
== NULL
)
3870 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3871 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3874 gomp_mutex_lock (&devicep
->lock
);
3876 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3877 struct splay_tree_key_s cur_node
;
3880 cur_node
.host_start
= (uintptr_t) host_ptr
;
3881 cur_node
.host_end
= cur_node
.host_start
+ size
;
3882 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3885 if (n
->tgt
->tgt_start
+ n
->tgt_offset
3886 == (uintptr_t) device_ptr
+ device_offset
3887 && n
->host_start
<= cur_node
.host_start
3888 && n
->host_end
>= cur_node
.host_end
)
3893 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
3894 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
3898 tgt
->to_free
= NULL
;
3900 tgt
->list_count
= 0;
3901 tgt
->device_descr
= devicep
;
3902 splay_tree_node array
= tgt
->array
;
3903 splay_tree_key k
= &array
->key
;
3904 k
->host_start
= cur_node
.host_start
;
3905 k
->host_end
= cur_node
.host_end
;
3907 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
3908 k
->refcount
= REFCOUNT_INFINITY
;
3909 k
->dynamic_refcount
= 0;
3912 array
->right
= NULL
;
3913 splay_tree_insert (&devicep
->mem_map
, array
);
3916 gomp_mutex_unlock (&devicep
->lock
);
3921 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
3923 if (device_num
== gomp_get_num_devices ())
3929 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3930 if (devicep
== NULL
)
3933 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3936 gomp_mutex_lock (&devicep
->lock
);
3938 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3939 struct splay_tree_key_s cur_node
;
3942 cur_node
.host_start
= (uintptr_t) ptr
;
3943 cur_node
.host_end
= cur_node
.host_start
;
3944 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3946 && n
->host_start
== cur_node
.host_start
3947 && n
->refcount
== REFCOUNT_INFINITY
3948 && n
->tgt
->tgt_start
== 0
3949 && n
->tgt
->to_free
== NULL
3950 && n
->tgt
->refcount
== 1
3951 && n
->tgt
->list_count
== 0)
3953 splay_tree_remove (&devicep
->mem_map
, n
);
3954 gomp_unmap_tgt (n
->tgt
);
3958 gomp_mutex_unlock (&devicep
->lock
);
3963 omp_get_mapped_ptr (const void *ptr
, int device_num
)
3965 if (device_num
< 0 || device_num
> gomp_get_num_devices ())
3968 if (device_num
== omp_get_initial_device ())
3969 return (void *) ptr
;
3971 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3972 if (devicep
== NULL
)
3975 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3976 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3977 return (void *) ptr
;
3979 gomp_mutex_lock (&devicep
->lock
);
3981 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3982 struct splay_tree_key_s cur_node
;
3985 cur_node
.host_start
= (uintptr_t) ptr
;
3986 cur_node
.host_end
= cur_node
.host_start
;
3987 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
3991 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
3992 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
3995 gomp_mutex_unlock (&devicep
->lock
);
4001 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
4003 if (device_num
< 0 || device_num
> gomp_get_num_devices ())
4006 if (device_num
== gomp_get_num_devices ())
4009 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
4010 if (devicep
== NULL
)
4013 /* TODO: Unified shared memory must be handled when available. */
4015 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
4019 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
4022 if (device_num
== gomp_get_num_devices ())
4023 return gomp_pause_host ();
4024 if (device_num
< 0 || device_num
>= num_devices_openmp
)
4026 /* Do nothing for target devices for now. */
4031 omp_pause_resource_all (omp_pause_resource_t kind
)
4034 if (gomp_pause_host ())
4036 /* Do nothing for target devices for now. */
4040 ialias (omp_pause_resource
)
4041 ialias (omp_pause_resource_all
)
4044 GOMP_evaluate_target_device (int device_num
, const char *kind
,
4045 const char *arch
, const char *isa
)
4050 device_num
= omp_get_default_device ();
4052 if (kind
&& strcmp (kind
, "any") == 0)
4055 gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s",
4056 __FUNCTION__
, device_num
, kind
, arch
, isa
);
4058 if (omp_get_device_num () == device_num
)
4059 result
= GOMP_evaluate_current_device (kind
, arch
, isa
);
4062 if (!omp_is_initial_device ())
4063 /* Accelerators are not expected to know about other devices. */
4067 struct gomp_device_descr
*device
= resolve_device (device_num
);
4070 else if (device
->evaluate_device_func
)
4071 result
= device
->evaluate_device_func (device_num
, kind
, arch
,
4076 gomp_debug (1, " -> %s\n", result
? "true" : "false");
4080 #ifdef PLUGIN_SUPPORT
4082 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
4084 The handles of the found functions are stored in the corresponding fields
4085 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4088 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
4089 const char *plugin_name
)
4091 const char *err
= NULL
, *last_missing
= NULL
;
4093 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
4095 #if OFFLOAD_DEFAULTED
4101 /* Check if all required functions are available in the plugin and store
4102 their handlers. None of the symbols can legitimately be NULL,
4103 so we don't need to check dlerror all the time. */
4105 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4107 /* Similar, but missing functions are not an error. Return false if
4108 failed, true otherwise. */
4109 #define DLSYM_OPT(f, n) \
4110 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4111 || (last_missing = #n, 0))
4114 if (device
->version_func () != GOMP_VERSION
)
4116 err
= "plugin version mismatch";
4123 DLSYM (get_num_devices
);
4124 DLSYM (init_device
);
4125 DLSYM (fini_device
);
4126 DLSYM (supported_features
);
4128 DLSYM (unload_image
);
4131 DLSYM_OPT (usm_alloc
, usm_alloc
);
4132 DLSYM_OPT (usm_free
, usm_free
);
4133 DLSYM_OPT (is_usm_ptr
, is_usm_ptr
);
4136 DLSYM (evaluate_device
);
4137 device
->capabilities
= device
->get_caps_func ();
4138 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4141 DLSYM_OPT (async_run
, async_run
);
4142 DLSYM_OPT (can_run
, can_run
);
4145 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
4147 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
4148 || !DLSYM_OPT (openacc
.create_thread_data
,
4149 openacc_create_thread_data
)
4150 || !DLSYM_OPT (openacc
.destroy_thread_data
,
4151 openacc_destroy_thread_data
)
4152 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
4153 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
4154 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
4155 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
4156 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
4157 || !DLSYM_OPT (openacc
.async
.queue_callback
,
4158 openacc_async_queue_callback
)
4159 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
4160 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
4161 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
4162 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
4164 /* Require all the OpenACC handlers if we have
4165 GOMP_OFFLOAD_CAP_OPENACC_200. */
4166 err
= "plugin missing OpenACC handler function";
4171 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
4172 openacc_cuda_get_current_device
);
4173 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
4174 openacc_cuda_get_current_context
);
4175 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
4176 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
4177 if (cuda
&& cuda
!= 4)
4179 /* Make sure all the CUDA functions are there if any of them are. */
4180 err
= "plugin missing OpenACC CUDA handler function";
4192 gomp_error ("while loading %s: %s", plugin_name
, err
);
4194 gomp_error ("missing function was %s", last_missing
);
4196 dlclose (plugin_handle
);
4201 /* This function finalizes all initialized devices. */
4204 gomp_target_fini (void)
4207 for (i
= 0; i
< num_devices
; i
++)
4210 struct gomp_device_descr
*devicep
= &devices
[i
];
4211 gomp_mutex_lock (&devicep
->lock
);
4212 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
4213 ret
= gomp_fini_device (devicep
);
4214 gomp_mutex_unlock (&devicep
->lock
);
4216 gomp_fatal ("device finalization failed");
4220 /* This function initializes the runtime for offloading.
4221 It parses the list of offload plugins, and tries to load these.
4222 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
4223 will be set, and the array DEVICES initialized, containing descriptors for
4224 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
4228 gomp_target_init (void)
4230 const char *prefix
="libgomp-plugin-";
4231 const char *suffix
= SONAME_SUFFIX (1);
4232 const char *cur
, *next
;
4234 int i
, new_num_devs
;
4235 int num_devs
= 0, num_devs_openmp
;
4236 struct gomp_device_descr
*devs
= NULL
;
4238 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
4241 gomp_requires_mask
= 0;
4242 const unsigned int *mask_ptr
= __requires_mask_table
;
4243 bool error_emitted
= false;
4244 while (mask_ptr
!= __requires_mask_table_end
)
4246 if (gomp_requires_mask
== 0)
4247 gomp_requires_mask
= *mask_ptr
;
4248 else if (gomp_requires_mask
!= *mask_ptr
)
4252 gomp_error ("requires-directive clause inconsistency between "
4253 "compilation units detected");
4254 error_emitted
= true;
4256 /* This is inconsistent, but still merge to query for all features
4258 gomp_requires_mask
|= *mask_ptr
;
4263 cur
= OFFLOAD_PLUGINS
;
4267 struct gomp_device_descr current_device
;
4268 size_t prefix_len
, suffix_len
, cur_len
;
4270 next
= strchr (cur
, ',');
4272 prefix_len
= strlen (prefix
);
4273 cur_len
= next
? next
- cur
: strlen (cur
);
4274 suffix_len
= strlen (suffix
);
4276 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
4283 memcpy (plugin_name
, prefix
, prefix_len
);
4284 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
4285 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
4287 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
4289 new_num_devs
= current_device
.get_num_devices_func ();
4290 if (new_num_devs
>= 1)
4292 /* Augment DEVICES and NUM_DEVICES. */
4294 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
4295 * sizeof (struct gomp_device_descr
));
4303 current_device
.name
= current_device
.get_name_func ();
4304 /* current_device.capabilities has already been set. */
4305 current_device
.type
= current_device
.get_type_func ();
4306 current_device
.mem_map
.root
= NULL
;
4307 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
4308 for (i
= 0; i
< new_num_devs
; i
++)
4310 current_device
.target_id
= i
;
4311 devs
[num_devs
] = current_device
;
4312 gomp_mutex_init (&devs
[num_devs
].lock
);
4323 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
4324 NUM_DEVICES_OPENMP. */
4325 struct gomp_device_descr
*devs_s
4326 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
4333 num_devs_openmp
= 0;
4334 for (i
= 0; i
< num_devs
; i
++)
4335 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4336 devs_s
[num_devs_openmp
++] = devs
[i
];
4337 int num_devs_after_openmp
= num_devs_openmp
;
4338 for (i
= 0; i
< num_devs
; i
++)
4339 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4340 devs_s
[num_devs_after_openmp
++] = devs
[i
];
4344 for (i
= 0; i
< num_devs
; i
++)
4346 /* The 'devices' array can be moved (by the realloc call) until we have
4347 found all the plugins, so registering with the OpenACC runtime (which
4348 takes a copy of the pointer argument) must be delayed until now. */
4349 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
4350 goacc_register (&devs
[i
]);
4353 num_devices
= num_devs
;
4354 num_devices_openmp
= num_devs_openmp
;
4356 if (atexit (gomp_target_fini
) != 0)
4357 gomp_fatal ("atexit failed");
4360 #else /* PLUGIN_SUPPORT */
4361 /* If dlfcn.h is unavailable we always fallback to host execution.
4362 GOMP_target* routines are just stubs for this case. */
4364 gomp_target_init (void)
4367 #endif /* PLUGIN_SUPPORT */