1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
51 #include "splay-tree.h"
54 typedef uintptr_t *hash_entry_type
;
55 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
56 static inline void htab_free (void *ptr
) { free (ptr
); }
59 ialias_redirect (GOMP_task
)
61 static inline hashval_t
62 htab_hash (hash_entry_type element
)
64 return hash_pointer ((void *) element
);
68 htab_eq (hash_entry_type x
, hash_entry_type y
)
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
75 static void gomp_target_init (void);
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock
;
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr
{
88 enum offload_target_type type
;
89 const void *host_table
;
90 const void *target_data
;
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr
*offload_images
;
96 /* Total number of offload images. */
97 static int num_offload_images
;
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr
*devices
;
102 /* Total number of available devices. */
103 static int num_devices
;
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp
;
108 /* OpenMP requires mask. */
109 static int omp_requires_mask
;
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
114 gomp_realloc_unlock (void *old
, size_t size
)
116 void *ret
= realloc (old
, size
);
119 gomp_mutex_unlock (®ister_lock
);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
125 attribute_hidden
void
126 gomp_init_targets_once (void)
128 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
132 gomp_get_num_devices (void)
134 gomp_init_targets_once ();
135 return num_devices_openmp
;
138 static struct gomp_device_descr
*
139 resolve_device (int device_id
, bool remapped
)
141 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
143 struct gomp_task_icv
*icv
= gomp_icv (false);
144 device_id
= icv
->default_device_var
;
150 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
151 : omp_initial_device
))
153 if (device_id
== omp_invalid_device
)
154 gomp_fatal ("omp_invalid_device encountered");
155 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device not found");
161 else if (device_id
>= gomp_get_num_devices ())
163 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
164 && device_id
!= num_devices_openmp
)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
171 gomp_mutex_lock (&devices
[device_id
].lock
);
172 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
173 gomp_init_device (&devices
[device_id
]);
174 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
176 gomp_mutex_unlock (&devices
[device_id
].lock
);
178 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
179 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
180 "but device is finalized");
184 gomp_mutex_unlock (&devices
[device_id
].lock
);
186 return &devices
[device_id
];
190 static inline splay_tree_key
191 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
193 if (key
->host_start
!= key
->host_end
)
194 return splay_tree_lookup (mem_map
, key
);
197 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
202 n
= splay_tree_lookup (mem_map
, key
);
206 return splay_tree_lookup (mem_map
, key
);
209 static inline reverse_splay_tree_key
210 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev
, reverse_splay_tree_key key
)
212 return reverse_splay_tree_lookup (mem_map_rev
, key
);
215 static inline splay_tree_key
216 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
218 if (key
->host_start
!= key
->host_end
)
219 return splay_tree_lookup (mem_map
, key
);
222 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
228 gomp_device_copy (struct gomp_device_descr
*devicep
,
229 bool (*copy_func
) (int, void *, const void *, size_t),
230 const char *dst
, void *dstaddr
,
231 const char *src
, const void *srcaddr
,
234 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
236 gomp_mutex_unlock (&devicep
->lock
);
237 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
238 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
243 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
244 bool (*copy_func
) (int, void *, const void *, size_t,
245 struct goacc_asyncqueue
*),
246 const char *dst
, void *dstaddr
,
247 const char *src
, const void *srcaddr
,
248 const void *srcaddr_orig
,
249 size_t size
, struct goacc_asyncqueue
*aq
)
251 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
253 gomp_mutex_unlock (&devicep
->lock
);
254 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
255 gomp_fatal ("Copying of %s object [%p..%p)"
256 " via buffer %s object [%p..%p)"
257 " to %s object [%p..%p) failed",
258 src
, srcaddr_orig
, srcaddr_orig
+ size
,
259 src
, srcaddr
, srcaddr
+ size
,
260 dst
, dstaddr
, dstaddr
+ size
);
262 gomp_fatal ("Copying of %s object [%p..%p)"
263 " to %s object [%p..%p) failed",
264 src
, srcaddr
, srcaddr
+ size
,
265 dst
, dstaddr
, dstaddr
+ size
);
269 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
270 host to device memory transfers. */
272 struct gomp_coalesce_chunk
274 /* The starting and ending point of a coalesced chunk of memory. */
278 struct gomp_coalesce_buf
280 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
281 it will be copied to the device. */
283 struct target_mem_desc
*tgt
;
284 /* Array with offsets, chunks[i].start is the starting offset and
285 chunks[i].end ending offset relative to tgt->tgt_start device address
286 of chunks which are to be copied to buf and later copied to device. */
287 struct gomp_coalesce_chunk
*chunks
;
288 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
291 /* During construction of chunks array, how many memory regions are within
292 the last chunk. If there is just one memory region for a chunk, we copy
293 it directly to device rather than going through buf. */
297 /* Maximum size of memory region considered for coalescing. Larger copies
298 are performed directly. */
299 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
301 /* Maximum size of a gap in between regions to consider them being copied
302 within the same chunk. All the device offsets considered are within
303 newly allocated device memory, so it isn't fatal if we copy some padding
304 in between from host to device. The gaps come either from alignment
305 padding or from memory regions which are not supposed to be copied from
306 host to device (e.g. map(alloc:), map(from:) etc.). */
307 #define MAX_COALESCE_BUF_GAP (4 * 1024)
309 /* Add region with device tgt_start relative offset and length to CBUF.
311 This must not be used for asynchronous copies, because the host data might
312 not be computed yet (by an earlier asynchronous compute region, for
314 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
315 is it more performant to use libgomp CBUF buffering or individual device
316 asyncronous copying?) */
319 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
321 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
325 if (cbuf
->chunk_cnt
< 0)
327 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
329 cbuf
->chunk_cnt
= -1;
332 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
334 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
338 /* If the last chunk is only used by one mapping, discard it,
339 as it will be one host to device copy anyway and
340 memcpying it around will only waste cycles. */
341 if (cbuf
->use_cnt
== 1)
344 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
345 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
350 /* Return true for mapping kinds which need to copy data from the
351 host to device for regions that weren't previously mapped. */
354 gomp_to_device_kind_p (int kind
)
360 case GOMP_MAP_FORCE_ALLOC
:
361 case GOMP_MAP_FORCE_FROM
:
362 case GOMP_MAP_ALWAYS_FROM
:
363 case GOMP_MAP_PRESENT_FROM
:
364 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
371 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
372 non-NULL), when the source data is stack or may otherwise be deallocated
373 before the asynchronous copy takes place, EPHEMERAL must be passed as
376 attribute_hidden
void
377 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
378 struct goacc_asyncqueue
*aq
,
379 void *d
, const void *h
, size_t sz
,
380 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
382 if (__builtin_expect (aq
!= NULL
, 0))
384 /* See 'gomp_coalesce_buf_add'. */
387 void *h_buf
= (void *) h
;
390 /* We're queueing up an asynchronous copy from data that may
391 disappear before the transfer takes place (i.e. because it is a
392 stack local in a function that is no longer executing). Make a
393 copy of the data into a temporary buffer in those cases. */
394 h_buf
= gomp_malloc (sz
);
395 memcpy (h_buf
, h
, sz
);
397 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
398 "dev", d
, "host", h_buf
, h
, sz
, aq
);
400 /* Free temporary buffer once the transfer has completed. */
401 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
408 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
409 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
412 long last
= cbuf
->chunk_cnt
- 1;
413 while (first
<= last
)
415 long middle
= (first
+ last
) >> 1;
416 if (cbuf
->chunks
[middle
].end
<= doff
)
418 else if (cbuf
->chunks
[middle
].start
<= doff
)
420 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
422 gomp_mutex_unlock (&devicep
->lock
);
423 gomp_fatal ("internal libgomp cbuf error");
425 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
435 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
438 attribute_hidden
void
439 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
440 struct goacc_asyncqueue
*aq
,
441 void *h
, const void *d
, size_t sz
)
443 if (__builtin_expect (aq
!= NULL
, 0))
444 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
445 "host", h
, "dev", d
, NULL
, sz
, aq
);
447 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
451 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
453 if (!devicep
->free_func (devicep
->target_id
, devptr
))
455 gomp_mutex_unlock (&devicep
->lock
);
456 gomp_fatal ("error in freeing device memory block at %p", devptr
);
460 /* Increment reference count of a splay_tree_key region K by 1.
461 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
462 increment the value if refcount is not yet contained in the set (used for
463 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
464 once for each construct). */
467 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
469 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
472 uintptr_t *refcount_ptr
= &k
->refcount
;
474 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
475 refcount_ptr
= &k
->structelem_refcount
;
476 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
477 refcount_ptr
= k
->structelem_refcount_ptr
;
481 if (htab_find (*refcount_set
, refcount_ptr
))
483 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
484 *slot
= refcount_ptr
;
491 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
492 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
493 track already seen refcounts, and only adjust the value if refcount is not
494 yet contained in the set (like gomp_increment_refcount).
496 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
497 it is already zero and we know we decremented it earlier. This signals that
498 associated maps should be copied back to host.
500 *DO_REMOVE is set to true when we this is the first handling of this refcount
501 and we are setting it to zero. This signals a removal of this key from the
504 Copy and removal are separated due to cases like handling of structure
505 elements, e.g. each map of a structure element representing a possible copy
506 out of a structure field has to be handled individually, but we only signal
507 removal for one (the first encountered) sibing map. */
510 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
511 bool *do_copy
, bool *do_remove
)
513 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
515 *do_copy
= *do_remove
= false;
519 uintptr_t *refcount_ptr
= &k
->refcount
;
521 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
522 refcount_ptr
= &k
->structelem_refcount
;
523 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
524 refcount_ptr
= k
->structelem_refcount_ptr
;
526 bool new_encountered_refcount
;
527 bool set_to_zero
= false;
528 bool is_zero
= false;
530 uintptr_t orig_refcount
= *refcount_ptr
;
534 if (htab_find (*refcount_set
, refcount_ptr
))
536 new_encountered_refcount
= false;
540 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
541 *slot
= refcount_ptr
;
542 new_encountered_refcount
= true;
545 /* If no refcount_set being used, assume all keys are being decremented
546 for the first time. */
547 new_encountered_refcount
= true;
551 else if (*refcount_ptr
> 0)
555 if (*refcount_ptr
== 0)
557 if (orig_refcount
> 0)
563 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
564 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
567 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
568 gomp_map_0len_lookup found oldn for newn.
569 Helper function of gomp_map_vars. */
572 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
573 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
574 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
575 unsigned char kind
, bool always_to_flag
, bool implicit
,
576 struct gomp_coalesce_buf
*cbuf
,
577 htab_t
*refcount_set
)
579 assert (kind
!= GOMP_MAP_ATTACH
580 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
583 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
584 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
585 tgt_var
->is_attach
= false;
586 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
588 /* For implicit maps, old contained in new is valid. */
589 bool implicit_subset
= (implicit
590 && newn
->host_start
<= oldn
->host_start
591 && oldn
->host_end
<= newn
->host_end
);
593 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
595 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
597 if (GOMP_MAP_FORCE_P (kind
)
598 /* For implicit maps, old contained in new is valid. */
600 /* Otherwise, new contained inside old is considered valid. */
601 || (oldn
->host_start
<= newn
->host_start
602 && newn
->host_end
<= oldn
->host_end
)))
604 gomp_mutex_unlock (&devicep
->lock
);
605 gomp_fatal ("Trying to map into device [%p..%p) object when "
606 "[%p..%p) is already mapped",
607 (void *) newn
->host_start
, (void *) newn
->host_end
,
608 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
611 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
613 /* Implicit + always should not happen. If this does occur, below
614 address/length adjustment is a TODO. */
615 assert (!implicit_subset
);
617 if (oldn
->aux
&& oldn
->aux
->attach_count
)
619 /* We have to be careful not to overwrite still attached pointers
620 during the copyback to host. */
621 uintptr_t addr
= newn
->host_start
;
622 while (addr
< newn
->host_end
)
624 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
625 if (oldn
->aux
->attach_count
[i
] == 0)
626 gomp_copy_host2dev (devicep
, aq
,
627 (void *) (oldn
->tgt
->tgt_start
629 + addr
- oldn
->host_start
),
631 sizeof (void *), false, cbuf
);
632 addr
+= sizeof (void *);
636 gomp_copy_host2dev (devicep
, aq
,
637 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
638 + newn
->host_start
- oldn
->host_start
),
639 (void *) newn
->host_start
,
640 newn
->host_end
- newn
->host_start
, false, cbuf
);
643 gomp_increment_refcount (oldn
, refcount_set
);
647 get_kind (bool short_mapkind
, void *kinds
, int idx
)
650 return ((unsigned char *) kinds
)[idx
];
652 int val
= ((unsigned short *) kinds
)[idx
];
653 if (GOMP_MAP_IMPLICIT_P (val
))
654 val
&= ~GOMP_MAP_IMPLICIT
;
660 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
665 int val
= ((unsigned short *) kinds
)[idx
];
666 return GOMP_MAP_IMPLICIT_P (val
);
670 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
671 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
672 struct gomp_coalesce_buf
*cbuf
,
673 bool allow_zero_length_array_sections
)
675 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
676 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
677 struct splay_tree_key_s cur_node
;
679 cur_node
.host_start
= host_ptr
;
680 if (cur_node
.host_start
== (uintptr_t) NULL
)
682 cur_node
.tgt_offset
= (uintptr_t) NULL
;
683 gomp_copy_host2dev (devicep
, aq
,
684 (void *) (tgt
->tgt_start
+ target_offset
),
685 (void *) &cur_node
.tgt_offset
, sizeof (void *),
689 /* Add bias to the pointer value. */
690 cur_node
.host_start
+= bias
;
691 cur_node
.host_end
= cur_node
.host_start
;
692 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
695 if (allow_zero_length_array_sections
)
696 cur_node
.tgt_offset
= 0;
697 else if (devicep
->is_usm_ptr_func
698 && devicep
->is_usm_ptr_func ((void*)cur_node
.host_start
))
699 cur_node
.tgt_offset
= cur_node
.host_start
;
702 gomp_mutex_unlock (&devicep
->lock
);
703 gomp_fatal ("Pointer target of array section wasn't mapped");
708 cur_node
.host_start
-= n
->host_start
;
710 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
711 /* At this point tgt_offset is target address of the
712 array section. Now subtract bias to get what we want
713 to initialize the pointer with. */
714 cur_node
.tgt_offset
-= bias
;
716 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
717 (void *) &cur_node
.tgt_offset
, sizeof (void *),
722 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
723 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
724 size_t first
, size_t i
, void **hostaddrs
,
725 size_t *sizes
, void *kinds
,
726 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
728 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
729 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
730 struct splay_tree_key_s cur_node
;
733 const bool short_mapkind
= true;
734 const int typemask
= short_mapkind
? 0xff : 0x7;
736 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
737 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
738 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
739 kind
= get_kind (short_mapkind
, kinds
, i
);
740 implicit
= get_implicit (short_mapkind
, kinds
, i
);
743 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
745 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
746 kind
& typemask
, false, implicit
, cbuf
,
752 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
754 cur_node
.host_start
--;
755 n2
= splay_tree_lookup (mem_map
, &cur_node
);
756 cur_node
.host_start
++;
759 && n2
->host_start
- n
->host_start
760 == n2
->tgt_offset
- n
->tgt_offset
)
762 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
763 kind
& typemask
, false, implicit
, cbuf
,
769 n2
= splay_tree_lookup (mem_map
, &cur_node
);
773 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
775 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
776 kind
& typemask
, false, implicit
, cbuf
,
781 gomp_mutex_unlock (&devicep
->lock
);
782 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
783 "other mapped elements from the same structure weren't mapped "
784 "together with it", (void *) cur_node
.host_start
,
785 (void *) cur_node
.host_end
);
788 attribute_hidden
void
789 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
790 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
791 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
792 struct gomp_coalesce_buf
*cbufp
,
793 bool allow_zero_length_array_sections
)
795 struct splay_tree_key_s s
;
800 gomp_mutex_unlock (&devicep
->lock
);
801 gomp_fatal ("enclosing struct not mapped for attach");
804 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
805 /* We might have a pointer in a packed struct: however we cannot have more
806 than one such pointer in each pointer-sized portion of the struct, so
808 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
811 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
813 if (!n
->aux
->attach_count
)
815 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
817 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
818 n
->aux
->attach_count
[idx
]++;
821 gomp_mutex_unlock (&devicep
->lock
);
822 gomp_fatal ("attach count overflow");
825 if (n
->aux
->attach_count
[idx
] == 1)
827 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
829 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
833 if ((void *) target
== NULL
)
835 gomp_mutex_unlock (&devicep
->lock
);
836 gomp_fatal ("attempt to attach null pointer");
839 s
.host_start
= target
+ bias
;
840 s
.host_end
= s
.host_start
+ 1;
841 tn
= splay_tree_lookup (mem_map
, &s
);
845 if (allow_zero_length_array_sections
)
846 /* When allowing attachment to zero-length array sections, we
847 allow attaching to NULL pointers when the target region is not
852 gomp_mutex_unlock (&devicep
->lock
);
853 gomp_fatal ("pointer target not mapped for attach");
857 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
860 "%s: attaching host %p, target %p (struct base %p) to %p\n",
861 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
862 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
864 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
865 sizeof (void *), true, cbufp
);
868 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
869 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
872 attribute_hidden
void
873 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
874 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
875 uintptr_t detach_from
, bool finalize
,
876 struct gomp_coalesce_buf
*cbufp
)
882 gomp_mutex_unlock (&devicep
->lock
);
883 gomp_fatal ("enclosing struct not mapped for detach");
886 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
888 if (!n
->aux
|| !n
->aux
->attach_count
)
890 gomp_mutex_unlock (&devicep
->lock
);
891 gomp_fatal ("no attachment counters for struct");
895 n
->aux
->attach_count
[idx
] = 1;
897 if (n
->aux
->attach_count
[idx
] == 0)
899 gomp_mutex_unlock (&devicep
->lock
);
900 gomp_fatal ("attach count underflow");
903 n
->aux
->attach_count
[idx
]--;
905 if (n
->aux
->attach_count
[idx
] == 0)
907 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
909 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
912 "%s: detaching host %p, target %p (struct base %p) to %p\n",
913 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
914 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
917 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
918 sizeof (void *), true, cbufp
);
921 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
922 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
925 attribute_hidden
uintptr_t
926 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
928 if (tgt
->list
[i
].key
!= NULL
)
929 return tgt
->list
[i
].key
->tgt
->tgt_start
930 + tgt
->list
[i
].key
->tgt_offset
931 + tgt
->list
[i
].offset
;
933 switch (tgt
->list
[i
].offset
)
937 return (uintptr_t) hostaddrs
[i
];
943 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
944 + tgt
->list
[i
+ 1].key
->tgt_offset
945 + tgt
->list
[i
+ 1].offset
946 + (uintptr_t) hostaddrs
[i
]
947 - (uintptr_t) hostaddrs
[i
+ 1];
950 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
954 static inline __attribute__((always_inline
)) struct target_mem_desc
*
955 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
956 struct goacc_asyncqueue
*aq
, size_t mapnum
,
957 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
958 void *kinds
, struct goacc_ncarray_info
*nca_info
,
959 bool short_mapkind
, htab_t
*refcount_set
,
960 enum gomp_map_vars_kind pragma_kind
)
962 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
963 size_t nca_data_row_num
= (nca_info
? nca_info
->num_data_rows
: 0);
964 bool has_firstprivate
= false;
965 bool has_always_ptrset
= false;
966 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
967 const int rshift
= short_mapkind
? 8 : 3;
968 const int typemask
= short_mapkind
? 0xff : 0x7;
969 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
970 struct splay_tree_key_s cur_node
;
971 struct target_mem_desc
*tgt
972 = gomp_malloc (sizeof (*tgt
)
973 + sizeof (tgt
->list
[0]) * (mapnum
+ nca_data_row_num
));
974 tgt
->list_count
= mapnum
+ nca_data_row_num
;
975 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
976 tgt
->device_descr
= devicep
;
978 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
987 tgt_align
= sizeof (void *);
993 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
995 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
996 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
999 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1001 size_t align
= 4 * sizeof (void *);
1003 tgt_size
= mapnum
* sizeof (void *);
1005 cbuf
.use_cnt
= 1 + (mapnum
> 1);
1006 cbuf
.chunks
[0].start
= 0;
1007 cbuf
.chunks
[0].end
= tgt_size
;
1010 gomp_mutex_lock (&devicep
->lock
);
1011 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1013 gomp_mutex_unlock (&devicep
->lock
);
1018 for (i
= 0; i
< mapnum
; i
++)
1020 int kind
= get_kind (short_mapkind
, kinds
, i
);
1021 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1022 tgt
->list
[i
].offset
= 0;
1023 if (hostaddrs
[i
] == NULL
1024 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1026 tgt
->list
[i
].key
= NULL
;
1027 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1030 else if (devicep
->is_usm_ptr_func
1031 && devicep
->is_usm_ptr_func (hostaddrs
[i
]))
1033 /* The memory is visible from both host and target
1034 so nothing needs to be moved. */
1035 tgt
->list
[i
].key
= NULL
;
1036 tgt
->list
[i
].offset
= OFFSET_USM
;
1039 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1040 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1042 tgt
->list
[i
].key
= NULL
;
1045 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1046 on a separate construct prior to using use_device_{addr,ptr}.
1047 In OpenMP 5.0, map directives need to be ordered by the
1048 middle-end before the use_device_* clauses. If
1049 !not_found_cnt, all mappings requested (if any) are already
1050 mapped, so use_device_{addr,ptr} can be resolved right away.
1051 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1052 now but would succeed after performing the mappings in the
1053 following loop. We can't defer this always to the second
1054 loop, because it is not even invoked when !not_found_cnt
1055 after the first loop. */
1056 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1057 cur_node
.host_end
= cur_node
.host_start
;
1058 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1061 cur_node
.host_start
-= n
->host_start
;
1063 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1064 + cur_node
.host_start
);
1066 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1068 gomp_mutex_unlock (&devicep
->lock
);
1069 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1071 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1072 /* If not present, continue using the host address. */
1075 __builtin_unreachable ();
1076 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1079 tgt
->list
[i
].offset
= 0;
1082 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1084 size_t first
= i
+ 1;
1085 size_t last
= i
+ sizes
[i
];
1086 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1087 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1089 tgt
->list
[i
].key
= NULL
;
1090 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1091 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1094 size_t align
= (size_t) 1 << (kind
>> rshift
);
1095 if (tgt_align
< align
)
1097 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1098 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1099 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1100 not_found_cnt
+= last
- i
;
1101 for (i
= first
; i
<= last
; i
++)
1103 tgt
->list
[i
].key
= NULL
;
1105 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1107 gomp_coalesce_buf_add (&cbuf
,
1108 tgt_size
- cur_node
.host_end
1109 + (uintptr_t) hostaddrs
[i
],
1115 for (i
= first
; i
<= last
; i
++)
1116 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1117 sizes
, kinds
, NULL
, refcount_set
);
1121 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1123 tgt
->list
[i
].key
= NULL
;
1124 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1125 has_firstprivate
= true;
1128 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1129 || ((kind
& typemask
)
1130 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1132 tgt
->list
[i
].key
= NULL
;
1133 has_firstprivate
= true;
1136 else if (GOMP_MAP_NONCONTIG_ARRAY_P (kind
& typemask
))
1138 /* Ignore non-contiguous arrays for now, we process them together
1140 tgt
->list
[i
].key
= NULL
;
1141 tgt
->list
[i
].offset
= 0;
1144 /* The map for the non-contiguous array itself is never copied from
1145 during unmapping, its the data rows that count. Set copy-from
1146 flags to false here. */
1147 tgt
->list
[i
].copy_from
= false;
1148 tgt
->list
[i
].always_copy_from
= false;
1149 tgt
->list
[i
].is_attach
= false;
1151 size_t align
= (size_t) 1 << (kind
>> rshift
);
1152 if (tgt_align
< align
)
1158 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1159 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1160 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1162 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1163 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1165 tgt
->list
[i
].key
= NULL
;
1167 size_t align
= (size_t) 1 << (kind
>> rshift
);
1168 if (tgt_align
< align
)
1170 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1172 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1173 cur_node
.host_end
- cur_node
.host_start
);
1174 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1175 has_firstprivate
= true;
1179 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1181 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1184 tgt
->list
[i
].key
= NULL
;
1185 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1190 n
= splay_tree_lookup (mem_map
, &cur_node
);
1191 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1193 int always_to_cnt
= 0;
1194 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1196 bool has_nullptr
= false;
1198 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1199 if (n
->tgt
->list
[j
].key
== n
)
1201 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1204 if (n
->tgt
->list_count
== 0)
1206 /* 'declare target'; assume has_nullptr; it could also be
1207 statically assigned pointer, but that it should be to
1208 the equivalent variable on the host. */
1209 assert (n
->refcount
== REFCOUNT_INFINITY
);
1213 assert (j
< n
->tgt
->list_count
);
1214 /* Re-map the data if there is an 'always' modifier or if it a
1215 null pointer was there and non a nonnull has been found; that
1216 permits transparent re-mapping for Fortran array descriptors
1217 which were previously mapped unallocated. */
1218 for (j
= i
+ 1; j
< mapnum
; j
++)
1220 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1221 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1223 || !GOMP_MAP_POINTER_P (ptr_kind
)
1224 || *(void **) hostaddrs
[j
] == NULL
))
1226 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1227 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1228 > cur_node
.host_end
))
1232 has_always_ptrset
= true;
1237 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1238 kind
& typemask
, always_to_cnt
> 0, implicit
,
1239 NULL
, refcount_set
);
1244 tgt
->list
[i
].key
= NULL
;
1246 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1248 /* Not present, hence, skip entry - including its MAP_POINTER,
1250 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1252 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1253 == GOMP_MAP_POINTER
))
1256 tgt
->list
[i
].key
= NULL
;
1257 tgt
->list
[i
].offset
= 0;
1261 size_t align
= (size_t) 1 << (kind
>> rshift
);
1263 if (tgt_align
< align
)
1265 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1267 && gomp_to_device_kind_p (kind
& typemask
))
1268 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1269 cur_node
.host_end
- cur_node
.host_start
);
1270 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1271 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1275 for (j
= i
+ 1; j
< mapnum
; j
++)
1276 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1277 kinds
, j
)) & typemask
))
1278 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1280 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1281 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1282 > cur_node
.host_end
))
1286 tgt
->list
[j
].key
= NULL
;
1293 /* For non-contiguous arrays. Each data row is one target item, separated
1294 from the normal map clause items, hence we order them after mapnum. */
1297 struct target_var_desc
*next_var_desc
= &tgt
->list
[mapnum
];
1298 for (i
= 0; i
< nca_info
->num_ncarray
; i
++)
1300 struct goacc_ncarray
*nca
= &nca_info
->ncarray
[i
];
1301 int kind
= get_kind (short_mapkind
, kinds
, nca
->map_index
);
1302 size_t align
= (size_t) 1 << (kind
>> rshift
);
1303 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1304 tgt_size
+= nca
->ptrblock_size
;
1306 for (size_t j
= 0; j
< nca
->data_row_num
; j
++)
1308 struct target_var_desc
*row_desc
= next_var_desc
++;
1309 void *row
= nca
->data_rows
[j
];
1310 cur_node
.host_start
= (uintptr_t) row
;
1311 cur_node
.host_end
= cur_node
.host_start
+ nca
->data_row_size
;
1312 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1315 assert (n
->refcount
!= REFCOUNT_LINK
);
1316 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, row_desc
,
1317 kind
& typemask
, false, false,
1318 /* TODO: cbuf? */ NULL
,
1323 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1324 tgt_size
+= nca
->data_row_size
;
1329 assert (next_var_desc
== &tgt
->list
[mapnum
+ nca_info
->num_data_rows
]);
1336 gomp_mutex_unlock (&devicep
->lock
);
1337 gomp_fatal ("unexpected aggregation");
1339 tgt
->to_free
= devaddrs
[0];
1340 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1341 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1343 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
1345 /* Allocate tgt_align aligned tgt_size block of memory. */
1346 /* FIXME: Perhaps change interface to allocate properly aligned
1348 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1349 tgt_size
+ tgt_align
- 1);
1352 gomp_mutex_unlock (&devicep
->lock
);
1353 gomp_fatal ("device memory allocation fail");
1356 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1357 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1358 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1360 if (cbuf
.use_cnt
== 1)
1362 if (cbuf
.chunk_cnt
> 0)
1365 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1375 tgt
->to_free
= NULL
;
1381 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1382 tgt_size
= mapnum
* sizeof (void *);
1385 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1388 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1389 splay_tree_node array
= tgt
->array
;
1390 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1391 uintptr_t field_tgt_base
= 0;
1392 splay_tree_key field_tgt_structelem_first
= NULL
;
1394 for (i
= 0; i
< mapnum
; i
++)
1395 if (has_always_ptrset
1397 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1398 == GOMP_MAP_TO_PSET
)
1400 splay_tree_key k
= tgt
->list
[i
].key
;
1401 bool has_nullptr
= false;
1403 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1404 if (k
->tgt
->list
[j
].key
== k
)
1406 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1409 if (k
->tgt
->list_count
== 0)
1412 assert (j
< k
->tgt
->list_count
);
1414 tgt
->list
[i
].has_null_ptr_assoc
= false;
1415 for (j
= i
+ 1; j
< mapnum
; j
++)
1417 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1418 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1420 || !GOMP_MAP_POINTER_P (ptr_kind
)
1421 || *(void **) hostaddrs
[j
] == NULL
))
1423 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1424 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1429 if (*(void **) hostaddrs
[j
] == NULL
)
1430 tgt
->list
[i
].has_null_ptr_assoc
= true;
1431 tgt
->list
[j
].key
= k
;
1432 tgt
->list
[j
].copy_from
= false;
1433 tgt
->list
[j
].always_copy_from
= false;
1434 tgt
->list
[j
].is_attach
= false;
1435 gomp_increment_refcount (k
, refcount_set
);
1436 gomp_map_pointer (k
->tgt
, aq
,
1437 (uintptr_t) *(void **) hostaddrs
[j
],
1438 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1440 sizes
[j
], cbufp
, false);
1445 else if (tgt
->list
[i
].key
== NULL
)
1447 int kind
= get_kind (short_mapkind
, kinds
, i
);
1448 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1449 if (hostaddrs
[i
] == NULL
)
1451 if (tgt
->list
[i
].offset
== OFFSET_USM
)
1453 switch (kind
& typemask
)
1455 size_t align
, len
, first
, last
;
1457 case GOMP_MAP_FIRSTPRIVATE
:
1458 align
= (size_t) 1 << (kind
>> rshift
);
1459 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1460 tgt
->list
[i
].offset
= tgt_size
;
1462 gomp_copy_host2dev (devicep
, aq
,
1463 (void *) (tgt
->tgt_start
+ tgt_size
),
1464 (void *) hostaddrs
[i
], len
, false, cbufp
);
1465 /* Save device address in hostaddr to permit latter availablity
1466 when doing a deep-firstprivate with pointer attach. */
1467 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1470 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1471 firstprivate to hostaddrs[i+1], which is assumed to contain a
1475 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1477 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1478 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1479 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1480 sizeof (void *), false, cbufp
);
1484 case GOMP_MAP_FIRSTPRIVATE_INT
:
1485 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1487 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1488 /* The OpenACC 'host_data' construct only allows 'use_device'
1489 "mapping" clauses, so in the first loop, 'not_found_cnt'
1490 must always have been zero, so all OpenACC 'use_device'
1491 clauses have already been handled. (We can only easily test
1492 'use_device' with 'if_present' clause here.) */
1493 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1494 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1495 code conceptually simple, similar to the first loop. */
1496 case GOMP_MAP_USE_DEVICE_PTR
:
1497 if (tgt
->list
[i
].offset
== 0)
1499 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1500 cur_node
.host_end
= cur_node
.host_start
;
1501 n
= gomp_map_lookup (mem_map
, &cur_node
);
1504 cur_node
.host_start
-= n
->host_start
;
1506 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1507 + cur_node
.host_start
);
1509 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1511 gomp_mutex_unlock (&devicep
->lock
);
1512 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1514 else if ((kind
& typemask
)
1515 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1516 /* If not present, continue using the host address. */
1519 __builtin_unreachable ();
1520 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1523 case GOMP_MAP_STRUCT
:
1525 last
= i
+ sizes
[i
];
1526 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1527 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1529 if (tgt
->list
[first
].key
!= NULL
)
1531 n
= splay_tree_lookup (mem_map
, &cur_node
);
1534 size_t align
= (size_t) 1 << (kind
>> rshift
);
1535 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1536 - (uintptr_t) hostaddrs
[i
];
1537 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1538 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1539 - (uintptr_t) hostaddrs
[i
];
1540 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1541 field_tgt_offset
= tgt_size
;
1542 field_tgt_clear
= last
;
1543 field_tgt_structelem_first
= NULL
;
1544 tgt_size
+= cur_node
.host_end
1545 - (uintptr_t) hostaddrs
[first
];
1548 for (i
= first
; i
<= last
; i
++)
1549 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1550 sizes
, kinds
, cbufp
, refcount_set
);
1553 case GOMP_MAP_ALWAYS_POINTER
:
1554 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1555 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1556 n
= splay_tree_lookup (mem_map
, &cur_node
);
1558 || n
->host_start
> cur_node
.host_start
1559 || n
->host_end
< cur_node
.host_end
)
1561 gomp_mutex_unlock (&devicep
->lock
);
1562 gomp_fatal ("always pointer not mapped");
1564 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1565 != GOMP_MAP_ALWAYS_POINTER
)
1566 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1567 if (cur_node
.tgt_offset
)
1568 cur_node
.tgt_offset
-= sizes
[i
];
1569 gomp_copy_host2dev (devicep
, aq
,
1570 (void *) (n
->tgt
->tgt_start
1572 + cur_node
.host_start
1574 (void *) &cur_node
.tgt_offset
,
1575 sizeof (void *), true, cbufp
);
1576 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1577 + cur_node
.host_start
- n
->host_start
;
1579 case GOMP_MAP_IF_PRESENT
:
1580 /* Not present - otherwise handled above. Skip over its
1581 MAP_POINTER as well. */
1583 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1584 == GOMP_MAP_POINTER
))
1587 case GOMP_MAP_ATTACH
:
1588 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1590 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1591 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1592 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1595 tgt
->list
[i
].key
= n
;
1596 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1597 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1598 tgt
->list
[i
].copy_from
= false;
1599 tgt
->list
[i
].always_copy_from
= false;
1600 tgt
->list
[i
].is_attach
= true;
1601 /* OpenACC 'attach'/'detach' doesn't affect
1602 structured/dynamic reference counts ('n->refcount',
1603 'n->dynamic_refcount'). */
1606 = ((kind
& typemask
)
1607 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1608 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1609 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1612 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1614 gomp_mutex_unlock (&devicep
->lock
);
1615 gomp_fatal ("outer struct not mapped for attach");
1620 if (tgt
->list
[i
].offset
== OFFSET_INLINED
1626 if (GOMP_MAP_NONCONTIG_ARRAY_P (kind
& typemask
))
1628 tgt
->list
[i
].key
= &array
->key
;
1629 tgt
->list
[i
].key
->tgt
= tgt
;
1634 splay_tree_key k
= &array
->key
;
1635 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1636 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1637 k
->host_end
= k
->host_start
+ sizes
[i
];
1639 k
->host_end
= k
->host_start
+ sizeof (void *);
1640 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1641 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1642 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1643 kind
& typemask
, false, implicit
, cbufp
,
1648 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1650 /* Replace target address of the pointer with target address
1651 of mapped object in the splay tree. */
1652 splay_tree_remove (mem_map
, n
);
1654 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1655 k
->aux
->link_key
= n
;
1657 size_t align
= (size_t) 1 << (kind
>> rshift
);
1658 tgt
->list
[i
].key
= k
;
1661 k
->dynamic_refcount
= 0;
1662 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1664 k
->tgt_offset
= k
->host_start
- field_tgt_base
1668 k
->refcount
= REFCOUNT_STRUCTELEM
;
1669 if (field_tgt_structelem_first
== NULL
)
1671 /* Set to first structure element of sequence. */
1672 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1673 field_tgt_structelem_first
= k
;
1676 /* Point to refcount of leading element, but do not
1678 k
->structelem_refcount_ptr
1679 = &field_tgt_structelem_first
->structelem_refcount
;
1681 if (i
== field_tgt_clear
)
1683 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1684 field_tgt_structelem_first
= NULL
;
1687 if (i
== field_tgt_clear
)
1688 field_tgt_clear
= FIELD_TGT_EMPTY
;
1692 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1693 k
->tgt_offset
= tgt_size
;
1694 tgt_size
+= k
->host_end
- k
->host_start
;
1696 /* First increment, from 0 to 1. gomp_increment_refcount
1697 encapsulates the different increment cases, so use this
1698 instead of directly setting 1 during initialization. */
1699 gomp_increment_refcount (k
, refcount_set
);
1701 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1702 tgt
->list
[i
].always_copy_from
1703 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1704 tgt
->list
[i
].is_attach
= false;
1705 tgt
->list
[i
].offset
= 0;
1706 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1709 array
->right
= NULL
;
1710 splay_tree_insert (mem_map
, array
);
1711 switch (kind
& typemask
)
1713 case GOMP_MAP_ALLOC
:
1715 case GOMP_MAP_FORCE_ALLOC
:
1716 case GOMP_MAP_FORCE_FROM
:
1717 case GOMP_MAP_ALWAYS_FROM
:
1720 case GOMP_MAP_TOFROM
:
1721 case GOMP_MAP_FORCE_TO
:
1722 case GOMP_MAP_FORCE_TOFROM
:
1723 case GOMP_MAP_ALWAYS_TO
:
1724 case GOMP_MAP_ALWAYS_TOFROM
:
1725 gomp_copy_host2dev (devicep
, aq
,
1726 (void *) (tgt
->tgt_start
1728 (void *) k
->host_start
,
1729 k
->host_end
- k
->host_start
,
1732 case GOMP_MAP_POINTER
:
1733 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1735 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1736 k
->tgt_offset
, sizes
[i
], cbufp
,
1738 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1740 case GOMP_MAP_TO_PSET
:
1741 gomp_copy_host2dev (devicep
, aq
,
1742 (void *) (tgt
->tgt_start
1744 (void *) k
->host_start
,
1745 k
->host_end
- k
->host_start
,
1747 tgt
->list
[i
].has_null_ptr_assoc
= false;
1749 for (j
= i
+ 1; j
< mapnum
; j
++)
1751 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1753 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1754 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1756 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1757 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1762 tgt
->list
[j
].key
= k
;
1763 tgt
->list
[j
].copy_from
= false;
1764 tgt
->list
[j
].always_copy_from
= false;
1765 tgt
->list
[j
].is_attach
= false;
1766 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1767 /* For OpenMP, the use of refcount_sets causes
1768 errors if we set k->refcount = 1 above but also
1769 increment it again here, for decrementing will
1770 not properly match, since we decrement only once
1771 for each key's refcount. Therefore avoid this
1772 increment for OpenMP constructs. */
1774 gomp_increment_refcount (k
, refcount_set
);
1775 gomp_map_pointer (tgt
, aq
,
1776 (uintptr_t) *(void **) hostaddrs
[j
],
1778 + ((uintptr_t) hostaddrs
[j
]
1780 sizes
[j
], cbufp
, false);
1785 case GOMP_MAP_FORCE_PRESENT
:
1787 /* We already looked up the memory region above and it
1789 size_t size
= k
->host_end
- k
->host_start
;
1790 gomp_mutex_unlock (&devicep
->lock
);
1791 #ifdef HAVE_INTTYPES_H
1792 gomp_fatal ("present clause: !acc_is_present (%p, "
1793 "%"PRIu64
" (0x%"PRIx64
"))",
1794 (void *) k
->host_start
,
1795 (uint64_t) size
, (uint64_t) size
);
1797 gomp_fatal ("present clause: !acc_is_present (%p, "
1798 "%lu (0x%lx))", (void *) k
->host_start
,
1799 (unsigned long) size
, (unsigned long) size
);
1803 case GOMP_MAP_PRESENT_ALLOC
:
1804 case GOMP_MAP_PRESENT_TO
:
1805 case GOMP_MAP_PRESENT_FROM
:
1806 case GOMP_MAP_PRESENT_TOFROM
:
1807 case GOMP_MAP_ALWAYS_PRESENT_TO
:
1808 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
1809 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
1810 /* We already looked up the memory region above and it
1812 gomp_mutex_unlock (&devicep
->lock
);
1813 gomp_fatal ("present clause: !omp_target_is_present "
1815 (void *) k
->host_start
, devicep
->target_id
);
1817 case GOMP_MAP_FORCE_DEVICEPTR
:
1818 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1819 gomp_copy_host2dev (devicep
, aq
,
1820 (void *) (tgt
->tgt_start
1822 (void *) k
->host_start
,
1823 sizeof (void *), false, cbufp
);
1826 gomp_mutex_unlock (&devicep
->lock
);
1827 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1831 if (k
->aux
&& k
->aux
->link_key
)
1833 /* Set link pointer on target to the device address of the
1835 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1836 /* We intentionally do not use coalescing here, as it's not
1837 data allocated by the current call to this function. */
1838 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1839 &tgt_addr
, sizeof (void *), true, NULL
);
1845 /* Processing of non-contiguous array rows. */
1848 struct target_var_desc
*next_var_desc
= &tgt
->list
[mapnum
];
1849 for (i
= 0; i
< nca_info
->num_ncarray
; i
++)
1851 struct goacc_ncarray
*nca
= &nca_info
->ncarray
[i
];
1852 int kind
= get_kind (short_mapkind
, kinds
, nca
->map_index
);
1853 size_t align
= (size_t) 1 << (kind
>> rshift
);
1854 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1856 assert (nca
->ptr
== hostaddrs
[nca
->map_index
]);
1858 /* For the map of the non-contiguous array itself, adjust so that
1859 the passed device address points to the beginning of the
1860 ptrblock. Remember to adjust the first-dimension's bias here. */
1861 tgt
->list
[nca
->map_index
].key
->tgt_offset
1862 = tgt_size
- nca
->descr
->dims
[0].base
;
1864 void *target_ptrblock
= (void*) tgt
->tgt_start
+ tgt_size
;
1865 tgt_size
+= nca
->ptrblock_size
;
1867 /* Add splay key for each data row in current non-contiguous
1869 for (size_t j
= 0; j
< nca
->data_row_num
; j
++)
1871 struct target_var_desc
*row_desc
= next_var_desc
++;
1872 void *row
= nca
->data_rows
[j
];
1873 cur_node
.host_start
= (uintptr_t) row
;
1874 cur_node
.host_end
= cur_node
.host_start
+ nca
->data_row_size
;
1875 splay_tree_key k
= splay_tree_lookup (mem_map
, &cur_node
);
1878 assert (k
->refcount
!= REFCOUNT_LINK
);
1879 gomp_map_vars_existing (devicep
, aq
, k
, &cur_node
, row_desc
,
1880 kind
& typemask
, false, false,
1881 cbufp
, refcount_set
);
1886 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1889 k
->host_start
= (uintptr_t) row
;
1890 k
->host_end
= k
->host_start
+ nca
->data_row_size
;
1894 k
->dynamic_refcount
= 0;
1896 k
->tgt_offset
= tgt_size
;
1898 tgt_size
+= nca
->data_row_size
;
1902 = GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1903 row_desc
->always_copy_from
1904 = GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1905 row_desc
->is_attach
= false;
1906 row_desc
->offset
= 0;
1907 row_desc
->length
= nca
->data_row_size
;
1910 array
->right
= NULL
;
1911 splay_tree_insert (mem_map
, array
);
1913 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1914 gomp_copy_host2dev (devicep
, aq
,
1915 (void *) tgt
->tgt_start
+ k
->tgt_offset
,
1916 (void *) k
->host_start
,
1917 nca
->data_row_size
, false,
1921 nca
->tgt_data_rows
[j
]
1922 = (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
);
1925 /* Now we have the target memory allocated, and target offsets of all
1926 row blocks assigned and calculated, we can construct the
1927 accelerator side ptrblock and copy it in. */
1928 if (nca
->ptrblock_size
)
1930 void *ptrblock
= goacc_noncontig_array_create_ptrblock
1931 (nca
, target_ptrblock
);
1932 gomp_copy_host2dev (devicep
, aq
, target_ptrblock
, ptrblock
,
1933 nca
->ptrblock_size
, false, cbufp
);
1940 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1942 for (i
= 0; i
< mapnum
; i
++)
1944 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1945 gomp_copy_host2dev (devicep
, aq
,
1946 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1947 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1954 /* See 'gomp_coalesce_buf_add'. */
1958 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1959 gomp_copy_host2dev (devicep
, aq
,
1960 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1961 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1962 - cbuf
.chunks
[0].start
),
1963 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1970 /* If the variable from "omp target enter data" map-list was already mapped,
1971 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1973 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1979 gomp_mutex_unlock (&devicep
->lock
);
1983 attribute_hidden
struct target_mem_desc
*
1984 gomp_map_vars_openacc (struct gomp_device_descr
*devicep
,
1985 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1986 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
1989 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, NULL
,
1990 sizes
, (void *) kinds
,
1991 (struct goacc_ncarray_info
*) nca_info
,
1992 true, NULL
, GOMP_MAP_VARS_OPENACC
);
1995 static struct target_mem_desc
*
1996 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1997 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1998 bool short_mapkind
, htab_t
*refcount_set
,
1999 enum gomp_map_vars_kind pragma_kind
)
2001 /* This management of a local refcount_set is for convenience of callers
2002 who do not share a refcount_set over multiple map/unmap uses. */
2003 htab_t local_refcount_set
= NULL
;
2004 if (refcount_set
== NULL
)
2006 local_refcount_set
= htab_create (mapnum
);
2007 refcount_set
= &local_refcount_set
;
2010 struct target_mem_desc
*tgt
;
2011 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
2012 sizes
, kinds
, NULL
, short_mapkind
,
2013 refcount_set
, pragma_kind
);
2014 if (local_refcount_set
)
2015 htab_free (local_refcount_set
);
2020 attribute_hidden
struct target_mem_desc
*
2021 goacc_map_vars (struct gomp_device_descr
*devicep
,
2022 struct goacc_asyncqueue
*aq
, size_t mapnum
,
2023 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
2024 void *kinds
, bool short_mapkind
,
2025 enum gomp_map_vars_kind pragma_kind
)
2027 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
2028 sizes
, kinds
, NULL
, short_mapkind
, NULL
,
2029 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
2033 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
2035 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
2037 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
2044 gomp_unref_tgt (void *ptr
)
2046 bool is_tgt_unmapped
= false;
2048 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
2050 if (tgt
->refcount
> 1)
2054 gomp_unmap_tgt (tgt
);
2055 is_tgt_unmapped
= true;
2058 return is_tgt_unmapped
;
2062 gomp_unref_tgt_void (void *ptr
)
2064 (void) gomp_unref_tgt (ptr
);
2068 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
2070 splay_tree_remove (sp
, k
);
2073 if (k
->aux
->link_key
)
2074 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
2075 if (k
->aux
->attach_count
)
2076 free (k
->aux
->attach_count
);
2082 static inline __attribute__((always_inline
)) bool
2083 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
2084 struct goacc_asyncqueue
*aq
)
2086 bool is_tgt_unmapped
= false;
2088 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
2090 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
2091 /* Infer the splay_tree_key of the first structelem key using the
2092 pointer to the first structleme_refcount. */
2093 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
2094 - offsetof (struct splay_tree_key_s
,
2095 structelem_refcount
));
2096 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
2098 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
2099 with the splay_tree_keys embedded inside. */
2100 splay_tree_node node
=
2101 (splay_tree_node
) ((char *) k
2102 - offsetof (struct splay_tree_node_s
, key
));
2105 /* Starting from the _FIRST key, and continue for all following
2107 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
2108 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
2115 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
2118 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2121 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
2122 return is_tgt_unmapped
;
2125 attribute_hidden
bool
2126 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
2128 return gomp_remove_var_internal (devicep
, k
, NULL
);
2131 /* Remove a variable asynchronously. This actually removes the variable
2132 mapping immediately, but retains the linked target_mem_desc until the
2133 asynchronous operation has completed (as it may still refer to target
2134 memory). The device lock must be held before entry, and remains locked on
2137 attribute_hidden
void
2138 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
2139 struct goacc_asyncqueue
*aq
)
2141 (void) gomp_remove_var_internal (devicep
, k
, aq
);
2144 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2145 variables back from device to host: if it is false, it is assumed that this
2146 has been done already. */
2148 static inline __attribute__((always_inline
)) void
2149 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2150 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
2152 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
2154 if (tgt
->list_count
== 0)
2160 gomp_mutex_lock (&devicep
->lock
);
2161 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2163 gomp_mutex_unlock (&devicep
->lock
);
2171 /* We must perform detachments before any copies back to the host. */
2172 for (i
= 0; i
< tgt
->list_count
; i
++)
2174 splay_tree_key k
= tgt
->list
[i
].key
;
2176 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
2177 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
2178 + tgt
->list
[i
].offset
,
2182 for (i
= 0; i
< tgt
->list_count
; i
++)
2184 splay_tree_key k
= tgt
->list
[i
].key
;
2188 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2189 counts ('n->refcount', 'n->dynamic_refcount'). */
2190 if (tgt
->list
[i
].is_attach
)
2193 bool do_copy
, do_remove
;
2194 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
2196 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
2197 || tgt
->list
[i
].always_copy_from
)
2198 gomp_copy_dev2host (devicep
, aq
,
2199 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
2200 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2201 + tgt
->list
[i
].offset
),
2202 tgt
->list
[i
].length
);
2205 struct target_mem_desc
*k_tgt
= k
->tgt
;
2206 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
2207 /* It would be bad if TGT got unmapped while we're still iterating
2208 over its LIST_COUNT, and also expect to use it in the following
2210 assert (!is_tgt_unmapped
2216 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2219 gomp_unref_tgt ((void *) tgt
);
2221 gomp_mutex_unlock (&devicep
->lock
);
2225 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2226 htab_t
*refcount_set
)
2228 /* This management of a local refcount_set is for convenience of callers
2229 who do not share a refcount_set over multiple map/unmap uses. */
2230 htab_t local_refcount_set
= NULL
;
2231 if (refcount_set
== NULL
)
2233 local_refcount_set
= htab_create (tgt
->list_count
);
2234 refcount_set
= &local_refcount_set
;
2237 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2239 if (local_refcount_set
)
2240 htab_free (local_refcount_set
);
2243 attribute_hidden
void
2244 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2245 struct goacc_asyncqueue
*aq
)
2247 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2251 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2252 size_t *sizes
, void *kinds
, bool short_mapkind
)
2255 struct splay_tree_key_s cur_node
;
2256 const int typemask
= short_mapkind
? 0xff : 0x7;
2264 gomp_mutex_lock (&devicep
->lock
);
2265 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2267 gomp_mutex_unlock (&devicep
->lock
);
2271 for (i
= 0; i
< mapnum
; i
++)
2274 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2275 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2276 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2279 int kind
= get_kind (short_mapkind
, kinds
, i
);
2280 if (n
->host_start
> cur_node
.host_start
2281 || n
->host_end
< cur_node
.host_end
)
2283 gomp_mutex_unlock (&devicep
->lock
);
2284 gomp_fatal ("Trying to update [%p..%p) object when "
2285 "only [%p..%p) is mapped",
2286 (void *) cur_node
.host_start
,
2287 (void *) cur_node
.host_end
,
2288 (void *) n
->host_start
,
2289 (void *) n
->host_end
);
2292 if (n
->aux
&& n
->aux
->attach_count
)
2294 uintptr_t addr
= cur_node
.host_start
;
2295 while (addr
< cur_node
.host_end
)
2297 /* We have to be careful not to overwrite still attached
2298 pointers during host<->device updates. */
2299 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2300 if (n
->aux
->attach_count
[i
] == 0)
2302 void *devaddr
= (void *) (n
->tgt
->tgt_start
2304 + addr
- n
->host_start
);
2305 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2306 gomp_copy_host2dev (devicep
, NULL
,
2307 devaddr
, (void *) addr
,
2308 sizeof (void *), false, NULL
);
2309 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2310 gomp_copy_dev2host (devicep
, NULL
,
2311 (void *) addr
, devaddr
,
2314 addr
+= sizeof (void *);
2319 void *hostaddr
= (void *) cur_node
.host_start
;
2320 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2321 + cur_node
.host_start
2323 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2325 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2326 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2328 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2329 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2334 int kind
= get_kind (short_mapkind
, kinds
, i
);
2336 if (GOMP_MAP_PRESENT_P (kind
))
2338 /* We already looked up the memory region above and it
2340 gomp_mutex_unlock (&devicep
->lock
);
2341 gomp_fatal ("present clause: !omp_target_is_present "
2343 (void *) hostaddrs
[i
], devicep
->target_id
);
2347 gomp_mutex_unlock (&devicep
->lock
);
2350 static struct gomp_offload_icv_list
*
2351 gomp_get_offload_icv_item (int dev_num
)
2353 struct gomp_offload_icv_list
*l
= gomp_offload_icv_list
;
2354 while (l
!= NULL
&& l
->device_num
!= dev_num
)
2360 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2361 depending on the device num and the variable hierarchy
2362 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2363 device and thus no item with that device number is contained in
2364 gomp_offload_icv_list, then a new item is created and added to the list. */
2366 static struct gomp_offload_icvs
*
2367 get_gomp_offload_icvs (int dev_num
)
2369 struct gomp_icv_list
*dev
2370 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV
);
2371 struct gomp_icv_list
*all
2372 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL
);
2373 struct gomp_icv_list
*dev_x
= gomp_get_initial_icv_item (dev_num
);
2374 struct gomp_offload_icv_list
*offload_icvs
2375 = gomp_get_offload_icv_item (dev_num
);
2377 if (offload_icvs
!= NULL
)
2378 return &offload_icvs
->icvs
;
2380 struct gomp_offload_icv_list
*new
2381 = (struct gomp_offload_icv_list
*) gomp_malloc (sizeof (struct gomp_offload_icv_list
));
2383 new->device_num
= dev_num
;
2384 new->icvs
.device_num
= dev_num
;
2385 new->next
= gomp_offload_icv_list
;
2387 if (dev_x
!= NULL
&& gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_NTEAMS
))
2388 new->icvs
.nteams
= dev_x
->icvs
.nteams_var
;
2389 else if (dev
!= NULL
&& gomp_get_icv_flag (dev
->flags
, GOMP_ICV_NTEAMS
))
2390 new->icvs
.nteams
= dev
->icvs
.nteams_var
;
2391 else if (all
!= NULL
&& gomp_get_icv_flag (all
->flags
, GOMP_ICV_NTEAMS
))
2392 new->icvs
.nteams
= all
->icvs
.nteams_var
;
2394 new->icvs
.nteams
= gomp_default_icv_values
.nteams_var
;
2397 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2398 new->icvs
.teams_thread_limit
= dev_x
->icvs
.teams_thread_limit_var
;
2399 else if (dev
!= NULL
2400 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2401 new->icvs
.teams_thread_limit
= dev
->icvs
.teams_thread_limit_var
;
2402 else if (all
!= NULL
2403 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2404 new->icvs
.teams_thread_limit
= all
->icvs
.teams_thread_limit_var
;
2406 new->icvs
.teams_thread_limit
2407 = gomp_default_icv_values
.teams_thread_limit_var
;
2410 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2411 new->icvs
.default_device
= dev_x
->icvs
.default_device_var
;
2412 else if (dev
!= NULL
2413 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2414 new->icvs
.default_device
= dev
->icvs
.default_device_var
;
2415 else if (all
!= NULL
2416 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2417 new->icvs
.default_device
= all
->icvs
.default_device_var
;
2419 new->icvs
.default_device
= gomp_default_icv_values
.default_device_var
;
2421 gomp_offload_icv_list
= new;
2425 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2426 And insert to splay tree the mapping between addresses from HOST_TABLE and
2427 from loaded target image. We rely in the host and device compiler
2428 emitting variable and functions in the same order. */
2431 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2432 const void *host_table
, const void *target_data
,
2433 bool is_register_lock
)
2435 void **host_func_table
= ((void ***) host_table
)[0];
2436 void **host_funcs_end
= ((void ***) host_table
)[1];
2437 void **host_var_table
= ((void ***) host_table
)[2];
2438 void **host_vars_end
= ((void ***) host_table
)[3];
2440 /* The func table contains only addresses, the var table contains addresses
2441 and corresponding sizes. */
2442 int num_funcs
= host_funcs_end
- host_func_table
;
2443 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2445 /* Load image to device and get target addresses for the image. */
2446 struct addr_pair
*target_table
= NULL
;
2447 uint64_t *rev_target_fn_table
= NULL
;
2448 int i
, num_target_entries
;
2450 /* With reverse offload, insert also target-host addresses. */
2451 bool rev_lookup
= omp_requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
;
2454 = devicep
->load_image_func (devicep
->target_id
, version
,
2455 target_data
, &target_table
,
2456 rev_lookup
? &rev_target_fn_table
: NULL
);
2458 if (num_target_entries
!= num_funcs
+ num_vars
2459 /* "+1" due to the additional ICV struct. */
2460 && num_target_entries
!= num_funcs
+ num_vars
+ 1)
2462 gomp_mutex_unlock (&devicep
->lock
);
2463 if (is_register_lock
)
2464 gomp_mutex_unlock (®ister_lock
);
2465 gomp_fatal ("Cannot map target functions or variables"
2466 " (expected %u, have %u)", num_funcs
+ num_vars
,
2467 num_target_entries
);
2470 /* Insert host-target address mapping into splay tree. */
2471 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2472 /* "+1" due to the additional ICV struct. */
2473 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
+ 1)
2474 * sizeof (*tgt
->array
));
2475 if (rev_target_fn_table
)
2476 tgt
->rev_array
= gomp_malloc (num_funcs
* sizeof (*tgt
->rev_array
));
2478 tgt
->rev_array
= NULL
;
2479 tgt
->refcount
= REFCOUNT_INFINITY
;
2482 tgt
->to_free
= NULL
;
2484 tgt
->list_count
= 0;
2485 tgt
->device_descr
= devicep
;
2486 splay_tree_node array
= tgt
->array
;
2487 reverse_splay_tree_node rev_array
= tgt
->rev_array
;
2489 for (i
= 0; i
< num_funcs
; i
++)
2491 splay_tree_key k
= &array
->key
;
2492 k
->host_start
= (uintptr_t) host_func_table
[i
];
2493 k
->host_end
= k
->host_start
+ 1;
2495 k
->tgt_offset
= target_table
[i
].start
;
2496 k
->refcount
= REFCOUNT_INFINITY
;
2497 k
->dynamic_refcount
= 0;
2500 array
->right
= NULL
;
2501 splay_tree_insert (&devicep
->mem_map
, array
);
2502 if (rev_target_fn_table
)
2504 reverse_splay_tree_key k2
= &rev_array
->key
;
2505 k2
->dev
= rev_target_fn_table
[i
];
2507 rev_array
->left
= NULL
;
2508 rev_array
->right
= NULL
;
2510 reverse_splay_tree_insert (&devicep
->mem_map_rev
, rev_array
);
2516 /* Most significant bit of the size in host and target tables marks
2517 "omp declare target link" variables. */
2518 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2519 const uintptr_t size_mask
= ~link_bit
;
2521 for (i
= 0; i
< num_vars
; i
++)
2523 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2524 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2525 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2527 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2529 gomp_mutex_unlock (&devicep
->lock
);
2530 if (is_register_lock
)
2531 gomp_mutex_unlock (®ister_lock
);
2532 gomp_fatal ("Cannot map target variables (size mismatch)");
2535 splay_tree_key k
= &array
->key
;
2536 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2538 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2540 k
->tgt_offset
= target_var
->start
;
2541 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2542 k
->dynamic_refcount
= 0;
2545 array
->right
= NULL
;
2546 splay_tree_insert (&devicep
->mem_map
, array
);
2550 /* Last entry is for a ICVs variable.
2551 Tolerate case where plugin does not return those entries. */
2552 if (num_funcs
+ num_vars
< num_target_entries
)
2554 struct addr_pair
*var
= &target_table
[num_funcs
+ num_vars
];
2556 /* Start address will be non-zero for the ICVs variable if
2557 the variable was found in this image. */
2558 if (var
->start
!= 0)
2560 /* The index of the devicep within devices[] is regarded as its
2561 'device number', which is different from the per-device type
2562 devicep->target_id. */
2563 int dev_num
= (int) (devicep
- &devices
[0]);
2564 struct gomp_offload_icvs
*icvs
= get_gomp_offload_icvs (dev_num
);
2565 size_t var_size
= var
->end
- var
->start
;
2566 if (var_size
!= sizeof (struct gomp_offload_icvs
))
2568 gomp_mutex_unlock (&devicep
->lock
);
2569 if (is_register_lock
)
2570 gomp_mutex_unlock (®ister_lock
);
2571 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2574 /* Copy the ICVs variable to place on device memory, hereby
2575 actually designating its device number into effect. */
2576 gomp_copy_host2dev (devicep
, NULL
, (void *) var
->start
, icvs
,
2577 var_size
, false, NULL
);
2578 splay_tree_key k
= &array
->key
;
2579 k
->host_start
= (uintptr_t) icvs
;
2581 k
->host_start
+ (size_mask
& sizeof (struct gomp_offload_icvs
));
2583 k
->tgt_offset
= var
->start
;
2584 k
->refcount
= REFCOUNT_INFINITY
;
2585 k
->dynamic_refcount
= 0;
2588 array
->right
= NULL
;
2589 splay_tree_insert (&devicep
->mem_map
, array
);
2594 free (target_table
);
2597 /* Unload the mappings described by target_data from device DEVICE_P.
2598 The device must be locked. */
2601 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2603 const void *host_table
, const void *target_data
)
2605 void **host_func_table
= ((void ***) host_table
)[0];
2606 void **host_funcs_end
= ((void ***) host_table
)[1];
2607 void **host_var_table
= ((void ***) host_table
)[2];
2608 void **host_vars_end
= ((void ***) host_table
)[3];
2610 /* The func table contains only addresses, the var table contains addresses
2611 and corresponding sizes. */
2612 int num_funcs
= host_funcs_end
- host_func_table
;
2613 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2615 struct splay_tree_key_s k
;
2616 splay_tree_key node
= NULL
;
2618 /* Find mapping at start of node array */
2619 if (num_funcs
|| num_vars
)
2621 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2622 : (uintptr_t) host_var_table
[0]);
2623 k
.host_end
= k
.host_start
+ 1;
2624 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2627 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2629 gomp_mutex_unlock (&devicep
->lock
);
2630 gomp_fatal ("image unload fail");
2632 if (devicep
->mem_map_rev
.root
)
2634 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2636 assert (node
&& node
->tgt
&& node
->tgt
->rev_array
);
2637 assert (devicep
->mem_map_rev
.root
->key
.k
->tgt
== node
->tgt
);
2638 free (node
->tgt
->rev_array
);
2639 devicep
->mem_map_rev
.root
= NULL
;
2642 /* Remove mappings from splay tree. */
2644 for (i
= 0; i
< num_funcs
; i
++)
2646 k
.host_start
= (uintptr_t) host_func_table
[i
];
2647 k
.host_end
= k
.host_start
+ 1;
2648 splay_tree_remove (&devicep
->mem_map
, &k
);
2651 /* Most significant bit of the size in host and target tables marks
2652 "omp declare target link" variables. */
2653 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2654 const uintptr_t size_mask
= ~link_bit
;
2655 bool is_tgt_unmapped
= false;
2657 for (i
= 0; i
< num_vars
; i
++)
2659 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2661 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2663 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2664 splay_tree_remove (&devicep
->mem_map
, &k
);
2667 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2668 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2672 if (node
&& !is_tgt_unmapped
)
2680 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2682 char *end
= buf
+ size
, *p
= buf
;
2683 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2684 p
+= snprintf (p
, end
- p
, "unified_address");
2685 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2686 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2687 (p
== buf
? "" : ", "));
2688 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2689 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2690 (p
== buf
? "" : ", "));
2693 /* This function should be called from every offload image while loading.
2694 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2695 the target, and DATA. */
2698 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2699 int target_type
, const void *data
)
2703 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2704 gomp_fatal ("Library too old for offload (version %u < %u)",
2705 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2708 const void *target_data
;
2709 if (GOMP_VERSION_LIB (version
) > 1)
2711 omp_req
= (int) (size_t) ((void **) data
)[0];
2712 target_data
= &((void **) data
)[1];
2720 gomp_mutex_lock (®ister_lock
);
2722 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
2724 char buf1
[sizeof ("unified_address, unified_shared_memory, "
2725 "reverse_offload")];
2726 char buf2
[sizeof ("unified_address, unified_shared_memory, "
2727 "reverse_offload")];
2728 gomp_requires_to_name (buf2
, sizeof (buf2
),
2729 omp_req
!= GOMP_REQUIRES_TARGET_USED
2730 ? omp_req
: omp_requires_mask
);
2731 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
2732 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
2734 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
2735 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2736 "in multiple compilation units: '%s' vs. '%s'",
2740 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2741 "some compilation units", buf2
);
2743 omp_requires_mask
= omp_req
;
2745 /* Load image to all initialized devices. */
2746 for (i
= 0; i
< num_devices
; i
++)
2748 struct gomp_device_descr
*devicep
= &devices
[i
];
2749 gomp_mutex_lock (&devicep
->lock
);
2750 if (devicep
->type
== target_type
2751 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2752 gomp_load_image_to_device (devicep
, version
,
2753 host_table
, target_data
, true);
2754 gomp_mutex_unlock (&devicep
->lock
);
2757 /* Insert image to array of pending images. */
2759 = gomp_realloc_unlock (offload_images
,
2760 (num_offload_images
+ 1)
2761 * sizeof (struct offload_image_descr
));
2762 offload_images
[num_offload_images
].version
= version
;
2763 offload_images
[num_offload_images
].type
= target_type
;
2764 offload_images
[num_offload_images
].host_table
= host_table
;
2765 offload_images
[num_offload_images
].target_data
= target_data
;
2767 num_offload_images
++;
2768 gomp_mutex_unlock (®ister_lock
);
2771 /* Legacy entry point. */
2774 GOMP_offload_register (const void *host_table
, int target_type
,
2775 const void *target_data
)
2777 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2780 /* This function should be called from every offload image while unloading.
2781 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2782 the target, and DATA. */
2785 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2786 int target_type
, const void *data
)
2790 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2791 gomp_fatal ("Library too old for offload (version %u < %u)",
2792 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2794 const void *target_data
;
2795 if (GOMP_VERSION_LIB (version
) > 1)
2796 target_data
= &((void **) data
)[1];
2800 gomp_mutex_lock (®ister_lock
);
2802 /* Unload image from all initialized devices. */
2803 for (i
= 0; i
< num_devices
; i
++)
2805 struct gomp_device_descr
*devicep
= &devices
[i
];
2806 gomp_mutex_lock (&devicep
->lock
);
2807 if (devicep
->type
== target_type
2808 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2809 gomp_unload_image_from_device (devicep
, version
,
2810 host_table
, target_data
);
2811 gomp_mutex_unlock (&devicep
->lock
);
2814 /* Remove image from array of pending images. */
2815 for (i
= 0; i
< num_offload_images
; i
++)
2816 if (offload_images
[i
].target_data
== target_data
)
2818 offload_images
[i
] = offload_images
[--num_offload_images
];
2822 gomp_mutex_unlock (®ister_lock
);
2825 /* Legacy entry point. */
2828 GOMP_offload_unregister (const void *host_table
, int target_type
,
2829 const void *target_data
)
2831 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2834 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2835 must be locked on entry, and remains locked on return. */
2837 attribute_hidden
void
2838 gomp_init_device (struct gomp_device_descr
*devicep
)
2841 if (!devicep
->init_device_func (devicep
->target_id
))
2843 gomp_mutex_unlock (&devicep
->lock
);
2844 gomp_fatal ("device initialization failed");
2847 /* Load to device all images registered by the moment. */
2848 for (i
= 0; i
< num_offload_images
; i
++)
2850 struct offload_image_descr
*image
= &offload_images
[i
];
2851 if (image
->type
== devicep
->type
)
2852 gomp_load_image_to_device (devicep
, image
->version
,
2853 image
->host_table
, image
->target_data
,
2857 /* Initialize OpenACC asynchronous queues. */
2858 goacc_init_asyncqueues (devicep
);
2860 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2863 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2864 must be locked on entry, and remains locked on return. */
2866 attribute_hidden
bool
2867 gomp_fini_device (struct gomp_device_descr
*devicep
)
2869 bool ret
= goacc_fini_asyncqueues (devicep
);
2870 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2871 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2875 attribute_hidden
void
2876 gomp_unload_device (struct gomp_device_descr
*devicep
)
2878 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2882 /* Unload from device all images registered at the moment. */
2883 for (i
= 0; i
< num_offload_images
; i
++)
2885 struct offload_image_descr
*image
= &offload_images
[i
];
2886 if (image
->type
== devicep
->type
)
2887 gomp_unload_image_from_device (devicep
, image
->version
,
2889 image
->target_data
);
2894 /* Host fallback for GOMP_target{,_ext} routines. */
2897 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2898 struct gomp_device_descr
*devicep
, void **args
)
2900 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2902 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2904 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2905 "be used for offloading");
2908 memset (thr
, '\0', sizeof (*thr
));
2909 if (gomp_places_list
)
2911 thr
->place
= old_thr
.place
;
2912 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2917 intptr_t id
= (intptr_t) *args
++, val
;
2918 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2919 val
= (intptr_t) *args
++;
2921 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2922 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2924 id
&= GOMP_TARGET_ARG_ID_MASK
;
2925 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2927 val
= val
> INT_MAX
? INT_MAX
: val
;
2929 gomp_icv (true)->thread_limit_var
= val
;
2934 gomp_free_thread (thr
);
2938 /* Calculate alignment and size requirements of a private copy of data shared
2939 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2942 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2943 unsigned short *kinds
, size_t *tgt_align
,
2947 for (i
= 0; i
< mapnum
; i
++)
2948 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2950 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2951 if (*tgt_align
< align
)
2953 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2954 *tgt_size
+= sizes
[i
];
2958 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2961 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2962 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2965 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2967 tgt
+= tgt_align
- al
;
2970 for (i
= 0; i
< mapnum
; i
++)
2971 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2973 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2974 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2975 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2976 hostaddrs
[i
] = tgt
+ tgt_size
;
2977 tgt_size
= tgt_size
+ sizes
[i
];
2978 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2980 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2986 /* Helper function of GOMP_target{,_ext} routines. */
2989 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2990 void (*host_fn
) (void *))
2992 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2993 return (void *) host_fn
;
2996 gomp_mutex_lock (&devicep
->lock
);
2997 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2999 gomp_mutex_unlock (&devicep
->lock
);
3003 struct splay_tree_key_s k
;
3004 k
.host_start
= (uintptr_t) host_fn
;
3005 k
.host_end
= k
.host_start
+ 1;
3006 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
3007 gomp_mutex_unlock (&devicep
->lock
);
3011 return (void *) tgt_fn
->tgt_offset
;
3015 /* Called when encountering a target directive. If DEVICE
3016 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
3017 GOMP_DEVICE_HOST_FALLBACK (or any value
3018 larger than last available hw device), use host fallback.
3019 FN is address of host code, UNUSED is part of the current ABI, but
3020 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
3021 with MAPNUM entries, with addresses of the host objects,
3022 sizes of the host objects (resp. for pointer kind pointer bias
3023 and assumed sizeof (void *) size) and kinds. */
3026 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
3027 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
3028 unsigned char *kinds
)
3030 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3034 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3035 /* All shared memory devices should use the GOMP_target_ext function. */
3036 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
3037 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
3038 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
3040 htab_t refcount_set
= htab_create (mapnum
);
3041 struct target_mem_desc
*tgt_vars
3042 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3043 &refcount_set
, GOMP_MAP_VARS_TARGET
);
3044 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
3046 htab_clear (refcount_set
);
3047 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3048 htab_free (refcount_set
);
3051 static inline unsigned int
3052 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
3054 /* If we cannot run asynchronously, simply ignore nowait. */
3055 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
3056 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
3062 gomp_copy_back_icvs (struct gomp_device_descr
*devicep
, int device
)
3064 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3068 void *host_ptr
= &item
->icvs
;
3069 void *dev_ptr
= omp_get_mapped_ptr (host_ptr
, device
);
3070 if (dev_ptr
!= NULL
)
3071 gomp_copy_dev2host (devicep
, NULL
, host_ptr
, dev_ptr
,
3072 sizeof (struct gomp_offload_icvs
));
3075 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
3076 and several arguments have been added:
3077 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
3078 DEPEND is array of dependencies, see GOMP_task for details.
3080 ARGS is a pointer to an array consisting of a variable number of both
3081 device-independent and device-specific arguments, which can take one two
3082 elements where the first specifies for which device it is intended, the type
3083 and optionally also the value. If the value is not present in the first
3084 one, the whole second element the actual value. The last element of the
3085 array is a single NULL. Among the device independent can be for example
3086 NUM_TEAMS and THREAD_LIMIT.
3088 NUM_TEAMS is positive if GOMP_teams will be called in the body with
3089 that value, or 1 if teams construct is not present, or 0, if
3090 teams construct does not have num_teams clause and so the choice is
3091 implementation defined, and -1 if it can't be determined on the host
3092 what value will GOMP_teams have on the device.
3093 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
3094 body with that value, or 0, if teams construct does not have thread_limit
3095 clause or the teams construct is not present, or -1 if it can't be
3096 determined on the host what value will GOMP_teams have on the device. */
3099 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
3100 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3101 unsigned int flags
, void **depend
, void **args
)
3103 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3104 size_t tgt_align
= 0, tgt_size
= 0;
3105 bool fpc_done
= false;
3107 /* Obtain the original TEAMS and THREADS values from ARGS. */
3108 intptr_t orig_teams
= 1, orig_threads
= 0;
3109 size_t num_args
= 0, len
= 1, teams_len
= 1, threads_len
= 1;
3110 void **tmpargs
= args
;
3113 intptr_t id
= (intptr_t) *tmpargs
++, val
;
3114 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3116 val
= (intptr_t) *tmpargs
++;
3121 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
3125 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
3127 val
= val
> INT_MAX
? INT_MAX
: val
;
3128 if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
)
3133 else if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
)
3140 intptr_t new_teams
= orig_teams
, new_threads
= orig_threads
;
3141 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
3142 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
3143 value could not be determined. No change.
3144 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
3145 Set device-specific value.
3146 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3148 if (orig_teams
== -2)
3150 else if (orig_teams
== 0)
3152 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3154 new_teams
= item
->icvs
.nteams
;
3156 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3157 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3158 e.g. a THREAD_LIMIT clause. */
3159 if (orig_teams
> -2 && orig_threads
== 0)
3161 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3163 new_threads
= item
->icvs
.teams_thread_limit
;
3166 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3168 void **new_args
= args
;
3169 if (orig_teams
!= new_teams
|| orig_threads
!= new_threads
)
3171 size_t tms_len
= (orig_teams
== new_teams
3173 : (new_teams
> -(1 << 15) && new_teams
< (1 << 15)
3175 size_t ths_len
= (orig_threads
== new_threads
3177 : (new_threads
> -(1 << 15) && new_threads
< (1 << 15)
3179 /* One additional item after the last arg must be NULL. */
3180 size_t new_args_cnt
= num_args
- teams_len
- threads_len
+ tms_len
3182 new_args
= (void **) gomp_alloca (new_args_cnt
* sizeof (void*));
3185 void **tmp_new_args
= new_args
;
3186 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3187 too if they have not been changed and skipped otherwise. */
3190 intptr_t id
= (intptr_t) *tmpargs
;
3191 if (((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
3192 && orig_teams
!= new_teams
)
3193 || ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
3194 && orig_threads
!= new_threads
))
3197 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3202 *tmp_new_args
++ = *tmpargs
++;
3203 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3204 *tmp_new_args
++ = *tmpargs
++;
3208 /* Add the new TEAMS arg to the new args list if it has been changed. */
3209 if (orig_teams
!= new_teams
)
3211 intptr_t new_val
= new_teams
;
3214 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3215 | GOMP_TARGET_ARG_NUM_TEAMS
;
3216 *tmp_new_args
++ = (void *) new_val
;
3220 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3221 | GOMP_TARGET_ARG_NUM_TEAMS
);
3222 *tmp_new_args
++ = (void *) new_val
;
3226 /* Add the new THREADS arg to the new args list if it has been changed. */
3227 if (orig_threads
!= new_threads
)
3229 intptr_t new_val
= new_threads
;
3232 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3233 | GOMP_TARGET_ARG_THREAD_LIMIT
;
3234 *tmp_new_args
++ = (void *) new_val
;
3238 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3239 | GOMP_TARGET_ARG_THREAD_LIMIT
);
3240 *tmp_new_args
++ = (void *) new_val
;
3244 *tmp_new_args
= NULL
;
3247 flags
= clear_unsupported_flags (devicep
, flags
);
3249 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
3251 struct gomp_thread
*thr
= gomp_thread ();
3252 /* Create a team if we don't have any around, as nowait
3253 target tasks make sense to run asynchronously even when
3254 outside of any parallel. */
3255 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
3257 struct gomp_team
*team
= gomp_new_team (1);
3258 struct gomp_task
*task
= thr
->task
;
3259 struct gomp_task
**implicit_task
= &task
;
3260 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
3261 team
->prev_ts
= thr
->ts
;
3262 thr
->ts
.team
= team
;
3263 thr
->ts
.team_id
= 0;
3264 thr
->ts
.work_share
= &team
->work_shares
[0];
3265 thr
->ts
.last_work_share
= NULL
;
3266 #ifdef HAVE_SYNC_BUILTINS
3267 thr
->ts
.single_count
= 0;
3269 thr
->ts
.static_trip
= 0;
3270 thr
->task
= &team
->implicit_task
[0];
3271 gomp_init_task (thr
->task
, NULL
, icv
);
3272 while (*implicit_task
3273 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
3274 implicit_task
= &(*implicit_task
)->parent
;
3277 thr
->task
= *implicit_task
;
3279 free (*implicit_task
);
3280 thr
->task
= &team
->implicit_task
[0];
3283 pthread_setspecific (gomp_thread_destructor
, thr
);
3284 if (implicit_task
!= &task
)
3286 *implicit_task
= thr
->task
;
3291 && !thr
->task
->final_task
)
3293 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
3294 sizes
, kinds
, flags
, depend
, new_args
,
3295 GOMP_TARGET_TASK_BEFORE_MAP
);
3300 /* If there are depend clauses, but nowait is not present
3301 (or we are in a final task), block the parent task until the
3302 dependencies are resolved and then just continue with the rest
3303 of the function as if it is a merged task. */
3306 struct gomp_thread
*thr
= gomp_thread ();
3307 if (thr
->task
&& thr
->task
->depend_hash
)
3309 /* If we might need to wait, copy firstprivate now. */
3310 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3311 &tgt_align
, &tgt_size
);
3314 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3315 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3316 tgt_align
, tgt_size
);
3319 gomp_task_maybe_wait_for_dependencies (depend
);
3325 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3326 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
3327 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3331 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3332 &tgt_align
, &tgt_size
);
3335 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3336 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3337 tgt_align
, tgt_size
);
3340 gomp_target_fallback (fn
, hostaddrs
, devicep
, new_args
);
3344 struct target_mem_desc
*tgt_vars
;
3345 htab_t refcount_set
= NULL
;
3347 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3351 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3352 &tgt_align
, &tgt_size
);
3355 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3356 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3357 tgt_align
, tgt_size
);
3364 refcount_set
= htab_create (mapnum
);
3365 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
3366 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
3368 devicep
->run_func (devicep
->target_id
, fn_addr
,
3369 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
3373 htab_clear (refcount_set
);
3374 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3377 htab_free (refcount_set
);
3379 /* Copy back ICVs from device to host.
3380 HOST_PTR is expected to exist since it was added in
3381 gomp_load_image_to_device if not already available. */
3382 gomp_copy_back_icvs (devicep
, device
);
3387 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3388 keeping track of all variable handling - assuming that reverse offload occurs
3389 ony very rarely. Downside is that the reverse search is slow. */
3391 struct gomp_splay_tree_rev_lookup_data
{
3392 uintptr_t tgt_start
;
3398 gomp_splay_tree_rev_lookup (splay_tree_key key
, void *d
)
3400 struct gomp_splay_tree_rev_lookup_data
*data
;
3401 data
= (struct gomp_splay_tree_rev_lookup_data
*)d
;
3402 uintptr_t tgt_start
= key
->tgt
->tgt_start
+ key
->tgt_offset
;
3404 if (tgt_start
> data
->tgt_start
|| key
->tgt
->list_count
== 0)
3408 for (j
= 0; j
< key
->tgt
->list_count
; j
++)
3409 if (key
->tgt
->list
[j
].key
== key
)
3411 assert (j
< key
->tgt
->list_count
);
3412 uintptr_t tgt_end
= tgt_start
+ key
->tgt
->list
[j
].length
;
3414 if ((tgt_start
== data
->tgt_start
&& tgt_end
== data
->tgt_end
)
3415 || (tgt_end
> data
->tgt_start
&& tgt_start
< data
->tgt_end
))
3423 static inline splay_tree_key
3424 gomp_map_rev_lookup (splay_tree mem_map
, uint64_t tgt_start
, uint64_t tgt_end
,
3427 struct gomp_splay_tree_rev_lookup_data data
;
3429 data
.tgt_start
= tgt_start
;
3430 data
.tgt_end
= tgt_end
;
3432 if (tgt_start
!= tgt_end
)
3434 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3439 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3440 if (data
.key
!= NULL
|| zero_len
)
3445 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3452 bool present
, aligned
;
3456 /* Search just mapped reverse-offload data; returns index if found,
3460 gomp_map_cdata_lookup_int (struct cpy_data
*d
, uint64_t *devaddrs
,
3461 unsigned short *kinds
, uint64_t *sizes
, size_t n
,
3462 uint64_t tgt_start
, uint64_t tgt_end
)
3464 const bool short_mapkind
= true;
3465 const int typemask
= short_mapkind
? 0xff : 0x7;
3467 for (i
= 0; i
< n
; i
++)
3469 bool is_struct
= ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3470 == GOMP_MAP_STRUCT
);
3473 dev_end
= d
[i
].devaddr
+ sizes
[i
];
3476 if (i
+ sizes
[i
] < n
)
3477 dev_end
= d
[i
+ sizes
[i
]].devaddr
+ sizes
[i
+ sizes
[i
]];
3479 dev_end
= devaddrs
[i
+ sizes
[i
]] + sizes
[i
+ sizes
[i
]];
3481 if ((d
[i
].devaddr
== tgt_start
&& dev_end
== tgt_end
)
3482 || (dev_end
> tgt_start
&& d
[i
].devaddr
< tgt_end
))
3491 gomp_map_cdata_lookup (struct cpy_data
*d
, uint64_t *devaddrs
,
3492 unsigned short *kinds
, uint64_t *sizes
,
3493 size_t n
, uint64_t tgt_start
, uint64_t tgt_end
,
3497 if (tgt_start
!= tgt_end
)
3498 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3499 tgt_start
, tgt_end
);
3501 i
= gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3502 tgt_start
, tgt_end
);
3503 if (i
< n
|| zero_len
)
3508 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3509 tgt_start
, tgt_end
);
3512 /* Handle reverse offload. This is called by the device plugins for a
3513 reverse offload; it is not called if the outer target runs on the host.
3514 The mapping is simplified device-affecting constructs (except for target
3515 with device(ancestor:1)) must not be encountered; in particular not
3516 target (enter/exit) data. */
3519 gomp_target_rev (uint64_t fn_ptr
, uint64_t mapnum
, uint64_t devaddrs_ptr
,
3520 uint64_t sizes_ptr
, uint64_t kinds_ptr
, int dev_num
,
3521 void (*dev_to_host_cpy
) (void *, const void *, size_t, void*),
3522 void (*host_to_dev_cpy
) (void *, const void *, size_t, void*),
3525 /* Return early if there is no offload code. */
3526 if (sizeof (OFFLOAD_PLUGINS
) == sizeof (""))
3528 /* Currently, this fails because of calculate_firstprivate_requirements
3529 below; it could be fixed but additional code needs to be updated to
3530 handle 32bit hosts - thus, it is not worthwhile. */
3531 if (sizeof (void *) != sizeof (uint64_t))
3532 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3534 struct cpy_data
*cdata
= NULL
;
3537 unsigned short *kinds
;
3538 const bool short_mapkind
= true;
3539 const int typemask
= short_mapkind
? 0xff : 0x7;
3540 struct gomp_device_descr
*devicep
= resolve_device (dev_num
, false);
3542 reverse_splay_tree_key n
;
3543 struct reverse_splay_tree_key_s k
;
3546 gomp_mutex_lock (&devicep
->lock
);
3547 n
= gomp_map_lookup_rev (&devicep
->mem_map_rev
, &k
);
3548 gomp_mutex_unlock (&devicep
->lock
);
3551 gomp_fatal ("Cannot find reverse-offload function");
3552 void (*host_fn
)() = (void (*)()) n
->k
->host_start
;
3554 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) || mapnum
== 0)
3556 devaddrs
= (uint64_t *) (uintptr_t) devaddrs_ptr
;
3557 sizes
= (uint64_t *) (uintptr_t) sizes_ptr
;
3558 kinds
= (unsigned short *) (uintptr_t) kinds_ptr
;
3562 devaddrs
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3563 sizes
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3564 kinds
= (unsigned short *) gomp_malloc (mapnum
* sizeof (unsigned short));
3565 if (dev_to_host_cpy
)
3567 dev_to_host_cpy (devaddrs
, (const void *) (uintptr_t) devaddrs_ptr
,
3568 mapnum
* sizeof (uint64_t), token
);
3569 dev_to_host_cpy (sizes
, (const void *) (uintptr_t) sizes_ptr
,
3570 mapnum
* sizeof (uint64_t), token
);
3571 dev_to_host_cpy (kinds
, (const void *) (uintptr_t) kinds_ptr
,
3572 mapnum
* sizeof (unsigned short), token
);
3576 gomp_copy_dev2host (devicep
, NULL
, devaddrs
,
3577 (const void *) (uintptr_t) devaddrs_ptr
,
3578 mapnum
* sizeof (uint64_t));
3579 gomp_copy_dev2host (devicep
, NULL
, sizes
,
3580 (const void *) (uintptr_t) sizes_ptr
,
3581 mapnum
* sizeof (uint64_t));
3582 gomp_copy_dev2host (devicep
, NULL
, kinds
, (const void *) (uintptr_t) kinds_ptr
,
3583 mapnum
* sizeof (unsigned short));
3587 size_t tgt_align
= 0, tgt_size
= 0;
3589 /* If actually executed on 32bit systems, the casts lead to wrong code;
3590 but 32bit with offloading is not supported; see top of this function. */
3591 calculate_firstprivate_requirements (mapnum
, (void *) (uintptr_t) sizes
,
3592 (void *) (uintptr_t) kinds
,
3593 &tgt_align
, &tgt_size
);
3597 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3598 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3600 tgt
+= tgt_align
- al
;
3602 for (uint64_t i
= 0; i
< mapnum
; i
++)
3603 if (get_kind (short_mapkind
, kinds
, i
) == GOMP_MAP_FIRSTPRIVATE
3604 && devaddrs
[i
] != 0)
3606 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3607 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3608 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3609 memcpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3611 else if (dev_to_host_cpy
)
3612 dev_to_host_cpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3613 (size_t) sizes
[i
], token
);
3615 gomp_copy_dev2host (devicep
, NULL
, tgt
+ tgt_size
,
3616 (void *) (uintptr_t) devaddrs
[i
],
3618 devaddrs
[i
] = (uint64_t) (uintptr_t) tgt
+ tgt_size
;
3619 tgt_size
= tgt_size
+ sizes
[i
];
3620 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3622 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3623 == GOMP_MAP_ATTACH
))
3625 *(uint64_t*) (uintptr_t) (devaddrs
[i
+1] + sizes
[i
+1])
3626 = (uint64_t) devaddrs
[i
];
3632 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3634 size_t j
, struct_cpy
= 0;
3636 cdata
= gomp_alloca (sizeof (*cdata
) * mapnum
);
3637 memset (cdata
, '\0', sizeof (*cdata
) * mapnum
);
3638 gomp_mutex_lock (&devicep
->lock
);
3639 for (uint64_t i
= 0; i
< mapnum
; i
++)
3641 if (devaddrs
[i
] == 0)
3644 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3647 case GOMP_MAP_FIRSTPRIVATE
:
3648 case GOMP_MAP_FIRSTPRIVATE_INT
:
3651 case GOMP_MAP_DELETE
:
3652 case GOMP_MAP_RELEASE
:
3653 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3654 /* Assume it is present; look it up - but ignore unless the
3655 present clause is there. */
3656 case GOMP_MAP_ALLOC
:
3658 case GOMP_MAP_FORCE_ALLOC
:
3659 case GOMP_MAP_FORCE_FROM
:
3660 case GOMP_MAP_ALWAYS_FROM
:
3662 case GOMP_MAP_TOFROM
:
3663 case GOMP_MAP_FORCE_TO
:
3664 case GOMP_MAP_FORCE_TOFROM
:
3665 case GOMP_MAP_ALWAYS_TO
:
3666 case GOMP_MAP_ALWAYS_TOFROM
:
3667 case GOMP_MAP_PRESENT_FROM
:
3668 case GOMP_MAP_PRESENT_TO
:
3669 case GOMP_MAP_PRESENT_TOFROM
:
3670 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3671 case GOMP_MAP_ALWAYS_PRESENT_TO
:
3672 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3673 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3674 cdata
[i
].devaddr
= devaddrs
[i
];
3675 bool zero_len
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3676 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
);
3677 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3679 devaddrs
[i
] + sizes
[i
], zero_len
);
3683 cdata
[i
].present
= true;
3684 devaddrs
[i
] = devaddrs
[j
] + devaddrs
[i
] - cdata
[j
].devaddr
;
3688 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3690 devaddrs
[i
] + sizes
[i
], zero_len
);
3691 cdata
[i
].present
= n2
!= NULL
;
3693 if (!cdata
[i
].present
&& GOMP_MAP_PRESENT_P (kind
))
3695 gomp_mutex_unlock (&devicep
->lock
);
3696 #ifdef HAVE_INTTYPES_H
3697 gomp_fatal ("present clause: no corresponding data on "
3698 "parent device at %p with size %"PRIu64
,
3699 (void *) (uintptr_t) devaddrs
[i
],
3700 (uint64_t) sizes
[i
]);
3702 gomp_fatal ("present clause: no corresponding data on "
3703 "parent device at %p with size %lu",
3704 (void *) (uintptr_t) devaddrs
[i
],
3705 (unsigned long) sizes
[i
]);
3709 else if (!cdata
[i
].present
3710 && kind
!= GOMP_MAP_DELETE
3711 && kind
!= GOMP_MAP_RELEASE
3712 && kind
!= GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
3714 cdata
[i
].aligned
= true;
3715 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3717 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3720 else if (n2
!= NULL
)
3721 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3722 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3723 if (((!cdata
[i
].present
|| struct_cpy
)
3724 && (kind
== GOMP_MAP_TO
|| kind
== GOMP_MAP_TOFROM
))
3725 || kind
== GOMP_MAP_FORCE_TO
3726 || kind
== GOMP_MAP_FORCE_TOFROM
3727 || GOMP_MAP_ALWAYS_TO_P (kind
))
3729 if (dev_to_host_cpy
)
3730 dev_to_host_cpy ((void *) (uintptr_t) devaddrs
[i
],
3731 (void *) (uintptr_t) cdata
[i
].devaddr
,
3734 gomp_copy_dev2host (devicep
, NULL
,
3735 (void *) (uintptr_t) devaddrs
[i
],
3736 (void *) (uintptr_t) cdata
[i
].devaddr
,
3742 case GOMP_MAP_ATTACH
:
3743 case GOMP_MAP_POINTER
:
3744 case GOMP_MAP_ALWAYS_POINTER
:
3745 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3746 devaddrs
[i
] + sizes
[i
],
3747 devaddrs
[i
] + sizes
[i
]
3748 + sizeof (void*), false);
3749 cdata
[i
].present
= n2
!= NULL
;
3750 cdata
[i
].devaddr
= devaddrs
[i
];
3752 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3753 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3756 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3757 devaddrs
[i
] + sizes
[i
],
3758 devaddrs
[i
] + sizes
[i
]
3759 + sizeof (void*), false);
3762 cdata
[i
].present
= true;
3763 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3764 - cdata
[j
].devaddr
);
3767 if (!cdata
[i
].present
)
3768 devaddrs
[i
] = (uintptr_t) gomp_malloc (sizeof (void*));
3769 /* Assume that when present, the pointer is already correct. */
3771 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[i
])
3774 case GOMP_MAP_TO_PSET
:
3775 /* Assume that when present, the pointers are fine and no 'to:'
3777 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3778 devaddrs
[i
], devaddrs
[i
] + sizes
[i
],
3780 cdata
[i
].present
= n2
!= NULL
;
3781 cdata
[i
].devaddr
= devaddrs
[i
];
3783 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3784 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3787 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3789 devaddrs
[i
] + sizes
[i
], false);
3792 cdata
[i
].present
= true;
3793 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3794 - cdata
[j
].devaddr
);
3797 if (!cdata
[i
].present
)
3799 cdata
[i
].aligned
= true;
3800 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3802 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3804 if (dev_to_host_cpy
)
3805 dev_to_host_cpy ((void *) (uintptr_t) devaddrs
[i
],
3806 (void *) (uintptr_t) cdata
[i
].devaddr
,
3809 gomp_copy_dev2host (devicep
, NULL
,
3810 (void *) (uintptr_t) devaddrs
[i
],
3811 (void *) (uintptr_t) cdata
[i
].devaddr
,
3814 for (j
= i
+ 1; j
< mapnum
; j
++)
3816 kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
3817 if (!GOMP_MAP_ALWAYS_POINTER_P (kind
)
3818 && !GOMP_MAP_POINTER_P (kind
))
3820 if (devaddrs
[j
] < devaddrs
[i
])
3822 if (cdata
[i
].present
)
3824 if (devaddrs
[j
] == 0)
3826 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[j
]) = 0;
3831 cdata
[i
].present
= true;
3832 cdata
[j
].devaddr
= devaddrs
[j
];
3833 k
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, j
,
3835 devaddrs
[j
] + sizeof (void*),
3838 devaddrs
[j
] = (devaddrs
[k
] + devaddrs
[j
]
3839 - cdata
[k
].devaddr
);
3842 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3844 devaddrs
[j
] + sizeof (void*),
3848 gomp_mutex_unlock (&devicep
->lock
);
3849 gomp_fatal ("Pointer target wasn't mapped");
3851 devaddrs
[j
] = (n2
->host_start
+ cdata
[j
].devaddr
3852 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3854 *(void **) (uintptr_t) (devaddrs
[i
] + sizes
[j
])
3855 = (void *) (uintptr_t) devaddrs
[j
];
3859 case GOMP_MAP_STRUCT
:
3860 n2
= gomp_map_rev_lookup (&devicep
->mem_map
, devaddrs
[i
+1],
3861 devaddrs
[i
+ sizes
[i
]]
3862 + sizes
[i
+ sizes
[i
]], false);
3863 cdata
[i
].present
= n2
!= NULL
;
3864 cdata
[i
].devaddr
= devaddrs
[i
];
3865 struct_cpy
= cdata
[i
].present
? 0 : sizes
[i
];
3868 size_t sz
= (size_t) (devaddrs
[i
+ sizes
[i
]]
3870 + sizes
[i
+ sizes
[i
]]);
3871 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3872 cdata
[i
].aligned
= true;
3873 devaddrs
[i
] = (uintptr_t) gomp_aligned_alloc (align
, sz
);
3874 devaddrs
[i
] -= devaddrs
[i
+1] - cdata
[i
].devaddr
;
3877 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3878 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3881 gomp_mutex_unlock (&devicep
->lock
);
3882 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds
[i
]);
3885 gomp_mutex_unlock (&devicep
->lock
);
3890 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3892 uint64_t struct_cpy
= 0;
3893 bool clean_struct
= false;
3894 for (uint64_t i
= 0; i
< mapnum
; i
++)
3896 if (cdata
[i
].devaddr
== 0)
3898 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3899 bool copy
= !cdata
[i
].present
|| struct_cpy
;
3902 case GOMP_MAP_FORCE_FROM
:
3903 case GOMP_MAP_FORCE_TOFROM
:
3904 case GOMP_MAP_ALWAYS_FROM
:
3905 case GOMP_MAP_ALWAYS_TOFROM
:
3906 case GOMP_MAP_PRESENT_FROM
:
3907 case GOMP_MAP_PRESENT_TOFROM
:
3908 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3909 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3913 case GOMP_MAP_TOFROM
:
3914 if (copy
&& host_to_dev_cpy
)
3915 host_to_dev_cpy ((void *) (uintptr_t) cdata
[i
].devaddr
,
3916 (void *) (uintptr_t) devaddrs
[i
],
3919 gomp_copy_host2dev (devicep
, NULL
,
3920 (void *) (uintptr_t) cdata
[i
].devaddr
,
3921 (void *) (uintptr_t) devaddrs
[i
],
3922 sizes
[i
], false, NULL
);
3931 if (kind
== GOMP_MAP_STRUCT
&& !cdata
[i
].present
)
3933 clean_struct
= true;
3934 struct_cpy
= sizes
[i
];
3936 else if (!cdata
[i
].present
&& cdata
[i
].aligned
)
3937 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3938 else if (!cdata
[i
].present
)
3939 free ((void *) (uintptr_t) devaddrs
[i
]);
3942 for (uint64_t i
= 0; i
< mapnum
; i
++)
3943 if (!cdata
[i
].present
3944 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3945 == GOMP_MAP_STRUCT
))
3947 devaddrs
[i
] += cdata
[i
+1].devaddr
- cdata
[i
].devaddr
;
3948 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3957 /* Host fallback for GOMP_target_data{,_ext} routines. */
3960 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
3962 struct gomp_task_icv
*icv
= gomp_icv (false);
3964 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3966 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3967 "be used for offloading");
3969 if (icv
->target_data
)
3971 /* Even when doing a host fallback, if there are any active
3972 #pragma omp target data constructs, need to remember the
3973 new #pragma omp target data, otherwise GOMP_target_end_data
3974 would get out of sync. */
3975 struct target_mem_desc
*tgt
3976 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
3977 NULL
, GOMP_MAP_VARS_DATA
);
3978 tgt
->prev
= icv
->target_data
;
3979 icv
->target_data
= tgt
;
3984 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
3985 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3987 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3990 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3991 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3992 return gomp_target_data_fallback (devicep
);
3994 struct target_mem_desc
*tgt
3995 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3996 NULL
, GOMP_MAP_VARS_DATA
);
3997 struct gomp_task_icv
*icv
= gomp_icv (true);
3998 tgt
->prev
= icv
->target_data
;
3999 icv
->target_data
= tgt
;
4003 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
4004 size_t *sizes
, unsigned short *kinds
)
4006 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4009 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4010 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4011 return gomp_target_data_fallback (devicep
);
4013 struct target_mem_desc
*tgt
4014 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
4015 NULL
, GOMP_MAP_VARS_DATA
);
4016 struct gomp_task_icv
*icv
= gomp_icv (true);
4017 tgt
->prev
= icv
->target_data
;
4018 icv
->target_data
= tgt
;
4022 GOMP_target_end_data (void)
4024 struct gomp_task_icv
*icv
= gomp_icv (false);
4025 if (icv
->target_data
)
4027 struct target_mem_desc
*tgt
= icv
->target_data
;
4028 icv
->target_data
= tgt
->prev
;
4029 gomp_unmap_vars (tgt
, true, NULL
);
4034 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
4035 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
4037 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4040 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4041 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4044 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
4048 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
4049 size_t *sizes
, unsigned short *kinds
,
4050 unsigned int flags
, void **depend
)
4052 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4054 /* If there are depend clauses, but nowait is not present,
4055 block the parent task until the dependencies are resolved
4056 and then just continue with the rest of the function as if it
4057 is a merged task. Until we are able to schedule task during
4058 variable mapping or unmapping, ignore nowait if depend clauses
4062 struct gomp_thread
*thr
= gomp_thread ();
4063 if (thr
->task
&& thr
->task
->depend_hash
)
4065 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4067 && !thr
->task
->final_task
)
4069 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4070 mapnum
, hostaddrs
, sizes
, kinds
,
4071 flags
| GOMP_TARGET_FLAG_UPDATE
,
4072 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
4077 struct gomp_team
*team
= thr
->ts
.team
;
4078 /* If parallel or taskgroup has been cancelled, don't start new
4080 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4082 if (gomp_team_barrier_cancelled (&team
->barrier
))
4084 if (thr
->task
->taskgroup
)
4086 if (thr
->task
->taskgroup
->cancelled
)
4088 if (thr
->task
->taskgroup
->workshare
4089 && thr
->task
->taskgroup
->prev
4090 && thr
->task
->taskgroup
->prev
->cancelled
)
4095 gomp_task_maybe_wait_for_dependencies (depend
);
4101 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4102 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4105 struct gomp_thread
*thr
= gomp_thread ();
4106 struct gomp_team
*team
= thr
->ts
.team
;
4107 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4108 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4110 if (gomp_team_barrier_cancelled (&team
->barrier
))
4112 if (thr
->task
->taskgroup
)
4114 if (thr
->task
->taskgroup
->cancelled
)
4116 if (thr
->task
->taskgroup
->workshare
4117 && thr
->task
->taskgroup
->prev
4118 && thr
->task
->taskgroup
->prev
->cancelled
)
4123 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
4127 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
4128 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
4129 htab_t
*refcount_set
)
4131 const int typemask
= 0xff;
4133 gomp_mutex_lock (&devicep
->lock
);
4134 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
4136 gomp_mutex_unlock (&devicep
->lock
);
4140 for (i
= 0; i
< mapnum
; i
++)
4141 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
4143 struct splay_tree_key_s cur_node
;
4144 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
4145 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
4146 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
4149 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
4154 splay_tree_key remove_vars
[mapnum
];
4156 for (i
= 0; i
< mapnum
; i
++)
4158 struct splay_tree_key_s cur_node
;
4159 unsigned char kind
= kinds
[i
] & typemask
;
4163 case GOMP_MAP_ALWAYS_FROM
:
4164 case GOMP_MAP_DELETE
:
4165 case GOMP_MAP_RELEASE
:
4166 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
4167 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
4168 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
4169 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
4170 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4171 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
4172 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
4173 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
4177 bool delete_p
= (kind
== GOMP_MAP_DELETE
4178 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
4179 bool do_copy
, do_remove
;
4180 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
4183 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
4184 || kind
== GOMP_MAP_ALWAYS_FROM
)
4186 if (k
->aux
&& k
->aux
->attach_count
)
4188 /* We have to be careful not to overwrite still attached
4189 pointers during the copyback to host. */
4190 uintptr_t addr
= k
->host_start
;
4191 while (addr
< k
->host_end
)
4193 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
4194 if (k
->aux
->attach_count
[i
] == 0)
4195 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
4196 (void *) (k
->tgt
->tgt_start
4198 + addr
- k
->host_start
),
4200 addr
+= sizeof (void *);
4204 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
4205 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
4206 + cur_node
.host_start
4208 cur_node
.host_end
- cur_node
.host_start
);
4211 /* Structure elements lists are removed altogether at once, which
4212 may cause immediate deallocation of the target_mem_desc, causing
4213 errors if we still have following element siblings to copy back.
4214 While we're at it, it also seems more disciplined to simply
4215 queue all removals together for processing below.
4217 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4218 not have this problem, since they maintain an additional
4219 tgt->refcount = 1 reference to the target_mem_desc to start with.
4222 remove_vars
[nrmvars
++] = k
;
4225 case GOMP_MAP_DETACH
:
4228 gomp_mutex_unlock (&devicep
->lock
);
4229 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4234 for (int i
= 0; i
< nrmvars
; i
++)
4235 gomp_remove_var (devicep
, remove_vars
[i
]);
4237 gomp_mutex_unlock (&devicep
->lock
);
4241 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
4242 size_t *sizes
, unsigned short *kinds
,
4243 unsigned int flags
, void **depend
)
4245 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4247 /* If there are depend clauses, but nowait is not present,
4248 block the parent task until the dependencies are resolved
4249 and then just continue with the rest of the function as if it
4250 is a merged task. Until we are able to schedule task during
4251 variable mapping or unmapping, ignore nowait if depend clauses
4255 struct gomp_thread
*thr
= gomp_thread ();
4256 if (thr
->task
&& thr
->task
->depend_hash
)
4258 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4260 && !thr
->task
->final_task
)
4262 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4263 mapnum
, hostaddrs
, sizes
, kinds
,
4264 flags
, depend
, NULL
,
4265 GOMP_TARGET_TASK_DATA
))
4270 struct gomp_team
*team
= thr
->ts
.team
;
4271 /* If parallel or taskgroup has been cancelled, don't start new
4273 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4275 if (gomp_team_barrier_cancelled (&team
->barrier
))
4277 if (thr
->task
->taskgroup
)
4279 if (thr
->task
->taskgroup
->cancelled
)
4281 if (thr
->task
->taskgroup
->workshare
4282 && thr
->task
->taskgroup
->prev
4283 && thr
->task
->taskgroup
->prev
->cancelled
)
4288 gomp_task_maybe_wait_for_dependencies (depend
);
4294 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4295 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4298 struct gomp_thread
*thr
= gomp_thread ();
4299 struct gomp_team
*team
= thr
->ts
.team
;
4300 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4301 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4303 if (gomp_team_barrier_cancelled (&team
->barrier
))
4305 if (thr
->task
->taskgroup
)
4307 if (thr
->task
->taskgroup
->cancelled
)
4309 if (thr
->task
->taskgroup
->workshare
4310 && thr
->task
->taskgroup
->prev
4311 && thr
->task
->taskgroup
->prev
->cancelled
)
4316 htab_t refcount_set
= htab_create (mapnum
);
4318 /* The variables are mapped separately such that they can be released
4321 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4322 for (i
= 0; i
< mapnum
; i
++)
4323 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4325 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
4326 &kinds
[i
], true, &refcount_set
,
4327 GOMP_MAP_VARS_ENTER_DATA
);
4330 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
4332 for (j
= i
+ 1; j
< mapnum
; j
++)
4333 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
4334 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
4336 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
4337 &kinds
[i
], true, &refcount_set
,
4338 GOMP_MAP_VARS_ENTER_DATA
);
4341 else if (i
+ 1 < mapnum
&& (kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
4343 /* An attach operation must be processed together with the mapped
4344 base-pointer list item. */
4345 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4346 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4350 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4351 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4353 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
4354 htab_free (refcount_set
);
4358 gomp_target_task_fn (void *data
)
4360 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
4361 struct gomp_device_descr
*devicep
= ttask
->devicep
;
4363 if (ttask
->fn
!= NULL
)
4367 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4368 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
4369 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
4371 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
4372 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
4377 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
4380 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
4384 void *actual_arguments
;
4385 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4388 actual_arguments
= ttask
->hostaddrs
;
4392 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
4393 NULL
, ttask
->sizes
, ttask
->kinds
, true,
4394 NULL
, GOMP_MAP_VARS_TARGET
);
4395 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
4397 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
4399 assert (devicep
->async_run_func
);
4400 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
4401 ttask
->args
, (void *) ttask
);
4404 else if (devicep
== NULL
4405 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4406 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4410 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
4411 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4412 ttask
->kinds
, true);
4415 htab_t refcount_set
= htab_create (ttask
->mapnum
);
4416 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4417 for (i
= 0; i
< ttask
->mapnum
; i
++)
4418 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4420 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
4421 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
4422 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4423 i
+= ttask
->sizes
[i
];
4426 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
4427 &ttask
->kinds
[i
], true, &refcount_set
,
4428 GOMP_MAP_VARS_ENTER_DATA
);
4430 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4431 ttask
->kinds
, &refcount_set
);
4432 htab_free (refcount_set
);
4438 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
4442 struct gomp_task_icv
*icv
= gomp_icv (true);
4443 icv
->thread_limit_var
4444 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4450 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
4451 unsigned int thread_limit
, bool first
)
4453 struct gomp_thread
*thr
= gomp_thread ();
4458 struct gomp_task_icv
*icv
= gomp_icv (true);
4459 icv
->thread_limit_var
4460 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4462 (void) num_teams_high
;
4463 if (num_teams_low
== 0)
4465 thr
->num_teams
= num_teams_low
- 1;
4468 else if (thr
->team_num
== thr
->num_teams
)
4476 omp_target_alloc (size_t size
, int device_num
)
4478 if (device_num
== omp_initial_device
4479 || device_num
== gomp_get_num_devices ())
4480 return malloc (size
);
4482 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4483 if (devicep
== NULL
)
4486 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4487 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4488 return malloc (size
);
4490 gomp_mutex_lock (&devicep
->lock
);
4491 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
4492 gomp_mutex_unlock (&devicep
->lock
);
4497 omp_target_free (void *device_ptr
, int device_num
)
4499 if (device_num
== omp_initial_device
4500 || device_num
== gomp_get_num_devices ())
4506 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4507 if (devicep
== NULL
|| device_ptr
== NULL
)
4510 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4511 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4517 gomp_mutex_lock (&devicep
->lock
);
4518 gomp_free_device_memory (devicep
, device_ptr
);
4519 gomp_mutex_unlock (&devicep
->lock
);
4523 gomp_usm_alloc (size_t size
, int device_num
)
4525 if (device_num
== gomp_get_num_devices ())
4526 return malloc (size
);
4528 struct gomp_device_descr
*devicep
= resolve_device (device_num
, true);
4529 if (devicep
== NULL
)
4532 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4533 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4534 return malloc (size
);
4537 gomp_mutex_lock (&devicep
->lock
);
4538 if (devicep
->usm_alloc_func
)
4539 ret
= devicep
->usm_alloc_func (devicep
->target_id
, size
);
4540 gomp_mutex_unlock (&devicep
->lock
);
4545 gomp_usm_free (void *device_ptr
, int device_num
)
4547 if (device_ptr
== NULL
)
4550 if (device_num
== gomp_get_num_devices ())
4556 struct gomp_device_descr
*devicep
= resolve_device (device_num
, true);
4557 if (devicep
== NULL
)
4560 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4561 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4567 gomp_mutex_lock (&devicep
->lock
);
4568 if (devicep
->usm_free_func
4569 && !devicep
->usm_free_func (devicep
->target_id
, device_ptr
))
4571 gomp_mutex_unlock (&devicep
->lock
);
4572 gomp_fatal ("error in freeing device memory block at %p", device_ptr
);
4574 gomp_mutex_unlock (&devicep
->lock
);
4578 omp_target_is_present (const void *ptr
, int device_num
)
4580 if (device_num
== omp_initial_device
4581 || device_num
== gomp_get_num_devices ())
4584 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4585 if (devicep
== NULL
)
4591 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4592 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4595 gomp_mutex_lock (&devicep
->lock
);
4596 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4597 struct splay_tree_key_s cur_node
;
4599 cur_node
.host_start
= (uintptr_t) ptr
;
4600 cur_node
.host_end
= cur_node
.host_start
;
4601 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4602 int ret
= n
!= NULL
;
4603 gomp_mutex_unlock (&devicep
->lock
);
4608 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
4609 struct gomp_device_descr
**dst_devicep
,
4610 struct gomp_device_descr
**src_devicep
)
4612 if (dst_device_num
!= gomp_get_num_devices ()
4613 /* Above gomp_get_num_devices has to be called unconditionally. */
4614 && dst_device_num
!= omp_initial_device
)
4616 *dst_devicep
= resolve_device (dst_device_num
, false);
4617 if (*dst_devicep
== NULL
)
4620 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4621 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4622 *dst_devicep
= NULL
;
4625 if (src_device_num
!= num_devices_openmp
4626 && src_device_num
!= omp_initial_device
)
4628 *src_devicep
= resolve_device (src_device_num
, false);
4629 if (*src_devicep
== NULL
)
4632 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4633 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4634 *src_devicep
= NULL
;
4641 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
4642 size_t dst_offset
, size_t src_offset
,
4643 struct gomp_device_descr
*dst_devicep
,
4644 struct gomp_device_descr
*src_devicep
)
4647 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
4649 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
4652 if (src_devicep
== NULL
)
4654 gomp_mutex_lock (&dst_devicep
->lock
);
4655 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4656 (char *) dst
+ dst_offset
,
4657 (char *) src
+ src_offset
, length
);
4658 gomp_mutex_unlock (&dst_devicep
->lock
);
4659 return (ret
? 0 : EINVAL
);
4661 if (dst_devicep
== NULL
)
4663 gomp_mutex_lock (&src_devicep
->lock
);
4664 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4665 (char *) dst
+ dst_offset
,
4666 (char *) src
+ src_offset
, length
);
4667 gomp_mutex_unlock (&src_devicep
->lock
);
4668 return (ret
? 0 : EINVAL
);
4670 if (src_devicep
== dst_devicep
)
4672 gomp_mutex_lock (&src_devicep
->lock
);
4673 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4674 (char *) dst
+ dst_offset
,
4675 (char *) src
+ src_offset
, length
);
4676 gomp_mutex_unlock (&src_devicep
->lock
);
4677 return (ret
? 0 : EINVAL
);
4683 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
4684 size_t src_offset
, int dst_device_num
, int src_device_num
)
4686 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4687 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4688 &dst_devicep
, &src_devicep
);
4693 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
4694 dst_devicep
, src_devicep
);
4706 struct gomp_device_descr
*dst_devicep
;
4707 struct gomp_device_descr
*src_devicep
;
4708 } omp_target_memcpy_data
;
4711 omp_target_memcpy_async_helper (void *args
)
4713 omp_target_memcpy_data
*a
= args
;
4714 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
4715 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
4716 gomp_fatal ("omp_target_memcpy failed");
4720 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
4721 size_t dst_offset
, size_t src_offset
,
4722 int dst_device_num
, int src_device_num
,
4723 int depobj_count
, omp_depend_t
*depobj_list
)
4725 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4726 unsigned int flags
= 0;
4727 void *depend
[depobj_count
+ 5];
4729 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4730 &dst_devicep
, &src_devicep
);
4732 omp_target_memcpy_data s
= {
4736 .dst_offset
= dst_offset
,
4737 .src_offset
= src_offset
,
4738 .dst_devicep
= dst_devicep
,
4739 .src_devicep
= src_devicep
4745 if (depobj_count
> 0 && depobj_list
!= NULL
)
4747 flags
|= GOMP_TASK_FLAG_DEPEND
;
4749 depend
[1] = (void *) (uintptr_t) depobj_count
;
4750 depend
[2] = depend
[3] = depend
[4] = 0;
4751 for (i
= 0; i
< depobj_count
; ++i
)
4752 depend
[i
+ 5] = &depobj_list
[i
];
4755 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
4756 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4762 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
4763 int num_dims
, const size_t *volume
,
4764 const size_t *dst_offsets
,
4765 const size_t *src_offsets
,
4766 const size_t *dst_dimensions
,
4767 const size_t *src_dimensions
,
4768 struct gomp_device_descr
*dst_devicep
,
4769 struct gomp_device_descr
*src_devicep
)
4771 size_t dst_slice
= element_size
;
4772 size_t src_slice
= element_size
;
4773 size_t j
, dst_off
, src_off
, length
;
4778 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
4779 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
4780 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
4782 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
4784 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
4788 else if (src_devicep
== NULL
)
4789 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4790 (char *) dst
+ dst_off
,
4791 (const char *) src
+ src_off
,
4793 else if (dst_devicep
== NULL
)
4794 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4795 (char *) dst
+ dst_off
,
4796 (const char *) src
+ src_off
,
4798 else if (src_devicep
== dst_devicep
)
4799 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4800 (char *) dst
+ dst_off
,
4801 (const char *) src
+ src_off
,
4805 return ret
? 0 : EINVAL
;
4808 /* FIXME: it would be nice to have some plugin function to handle
4809 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
4810 be handled in the generic recursion below, and for host-host it
4811 should be used even for any num_dims >= 2. */
4813 for (i
= 1; i
< num_dims
; i
++)
4814 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
4815 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
4817 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
4818 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
4820 for (j
= 0; j
< volume
[0]; j
++)
4822 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
4823 (const char *) src
+ src_off
,
4824 element_size
, num_dims
- 1,
4825 volume
+ 1, dst_offsets
+ 1,
4826 src_offsets
+ 1, dst_dimensions
+ 1,
4827 src_dimensions
+ 1, dst_devicep
,
4831 dst_off
+= dst_slice
;
4832 src_off
+= src_slice
;
4838 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
4840 struct gomp_device_descr
**dst_devicep
,
4841 struct gomp_device_descr
**src_devicep
)
4846 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4847 dst_devicep
, src_devicep
);
4851 if (*src_devicep
!= NULL
&& *dst_devicep
!= NULL
&& *src_devicep
!= *dst_devicep
)
4858 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
4859 size_t element_size
, int num_dims
,
4860 const size_t *volume
, const size_t *dst_offsets
,
4861 const size_t *src_offsets
,
4862 const size_t *dst_dimensions
,
4863 const size_t *src_dimensions
,
4864 struct gomp_device_descr
*dst_devicep
,
4865 struct gomp_device_descr
*src_devicep
)
4868 gomp_mutex_lock (&src_devicep
->lock
);
4869 else if (dst_devicep
)
4870 gomp_mutex_lock (&dst_devicep
->lock
);
4871 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
4872 volume
, dst_offsets
, src_offsets
,
4873 dst_dimensions
, src_dimensions
,
4874 dst_devicep
, src_devicep
);
4876 gomp_mutex_unlock (&src_devicep
->lock
);
4877 else if (dst_devicep
)
4878 gomp_mutex_unlock (&dst_devicep
->lock
);
4884 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
4885 int num_dims
, const size_t *volume
,
4886 const size_t *dst_offsets
,
4887 const size_t *src_offsets
,
4888 const size_t *dst_dimensions
,
4889 const size_t *src_dimensions
,
4890 int dst_device_num
, int src_device_num
)
4892 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4894 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4895 src_device_num
, &dst_devicep
,
4901 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
4902 volume
, dst_offsets
, src_offsets
,
4903 dst_dimensions
, src_dimensions
,
4904 dst_devicep
, src_devicep
);
4913 size_t element_size
;
4914 const size_t *volume
;
4915 const size_t *dst_offsets
;
4916 const size_t *src_offsets
;
4917 const size_t *dst_dimensions
;
4918 const size_t *src_dimensions
;
4919 struct gomp_device_descr
*dst_devicep
;
4920 struct gomp_device_descr
*src_devicep
;
4922 } omp_target_memcpy_rect_data
;
4925 omp_target_memcpy_rect_async_helper (void *args
)
4927 omp_target_memcpy_rect_data
*a
= args
;
4928 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
4929 a
->num_dims
, a
->volume
, a
->dst_offsets
,
4930 a
->src_offsets
, a
->dst_dimensions
,
4931 a
->src_dimensions
, a
->dst_devicep
,
4934 gomp_fatal ("omp_target_memcpy_rect failed");
4938 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
4939 int num_dims
, const size_t *volume
,
4940 const size_t *dst_offsets
,
4941 const size_t *src_offsets
,
4942 const size_t *dst_dimensions
,
4943 const size_t *src_dimensions
,
4944 int dst_device_num
, int src_device_num
,
4945 int depobj_count
, omp_depend_t
*depobj_list
)
4947 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4949 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4950 src_device_num
, &dst_devicep
,
4952 void *depend
[depobj_count
+ 5];
4955 omp_target_memcpy_rect_data s
= {
4958 .element_size
= element_size
,
4959 .num_dims
= num_dims
,
4961 .dst_offsets
= dst_offsets
,
4962 .src_offsets
= src_offsets
,
4963 .dst_dimensions
= dst_dimensions
,
4964 .src_dimensions
= src_dimensions
,
4965 .dst_devicep
= dst_devicep
,
4966 .src_devicep
= src_devicep
4972 if (depobj_count
> 0 && depobj_list
!= NULL
)
4974 flags
|= GOMP_TASK_FLAG_DEPEND
;
4976 depend
[1] = (void *) (uintptr_t) depobj_count
;
4977 depend
[2] = depend
[3] = depend
[4] = 0;
4978 for (i
= 0; i
< depobj_count
; ++i
)
4979 depend
[i
+ 5] = &depobj_list
[i
];
4982 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
4983 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4989 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
4990 size_t size
, size_t device_offset
, int device_num
)
4992 if (device_num
== omp_initial_device
4993 || device_num
== gomp_get_num_devices ())
4996 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4997 if (devicep
== NULL
)
5000 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5001 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5004 gomp_mutex_lock (&devicep
->lock
);
5006 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
5007 struct splay_tree_key_s cur_node
;
5010 cur_node
.host_start
= (uintptr_t) host_ptr
;
5011 cur_node
.host_end
= cur_node
.host_start
+ size
;
5012 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
5015 if (n
->tgt
->tgt_start
+ n
->tgt_offset
5016 == (uintptr_t) device_ptr
+ device_offset
5017 && n
->host_start
<= cur_node
.host_start
5018 && n
->host_end
>= cur_node
.host_end
)
5023 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
5024 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
5028 tgt
->to_free
= NULL
;
5030 tgt
->list_count
= 0;
5031 tgt
->device_descr
= devicep
;
5032 splay_tree_node array
= tgt
->array
;
5033 splay_tree_key k
= &array
->key
;
5034 k
->host_start
= cur_node
.host_start
;
5035 k
->host_end
= cur_node
.host_end
;
5037 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
5038 k
->refcount
= REFCOUNT_INFINITY
;
5039 k
->dynamic_refcount
= 0;
5042 array
->right
= NULL
;
5043 splay_tree_insert (&devicep
->mem_map
, array
);
5046 gomp_mutex_unlock (&devicep
->lock
);
5051 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
5053 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5054 if (devicep
== NULL
)
5057 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5060 gomp_mutex_lock (&devicep
->lock
);
5062 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
5063 struct splay_tree_key_s cur_node
;
5066 cur_node
.host_start
= (uintptr_t) ptr
;
5067 cur_node
.host_end
= cur_node
.host_start
;
5068 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
5070 && n
->host_start
== cur_node
.host_start
5071 && n
->refcount
== REFCOUNT_INFINITY
5072 && n
->tgt
->tgt_start
== 0
5073 && n
->tgt
->to_free
== NULL
5074 && n
->tgt
->refcount
== 1
5075 && n
->tgt
->list_count
== 0)
5077 splay_tree_remove (&devicep
->mem_map
, n
);
5078 gomp_unmap_tgt (n
->tgt
);
5082 gomp_mutex_unlock (&devicep
->lock
);
5087 omp_get_mapped_ptr (const void *ptr
, int device_num
)
5089 if (device_num
== omp_initial_device
5090 || device_num
== omp_get_initial_device ())
5091 return (void *) ptr
;
5093 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5094 if (devicep
== NULL
)
5097 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5098 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5099 return (void *) ptr
;
5101 gomp_mutex_lock (&devicep
->lock
);
5103 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
5104 struct splay_tree_key_s cur_node
;
5107 cur_node
.host_start
= (uintptr_t) ptr
;
5108 cur_node
.host_end
= cur_node
.host_start
;
5109 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
5113 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
5114 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
5117 gomp_mutex_unlock (&devicep
->lock
);
5123 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
5125 if (device_num
== omp_initial_device
5126 || device_num
== gomp_get_num_devices ())
5129 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5130 if (devicep
== NULL
)
5133 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5136 if (devicep
->is_usm_ptr_func
&& devicep
->is_usm_ptr_func ((void *) ptr
))
5143 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
5146 if (device_num
== omp_initial_device
5147 || device_num
== gomp_get_num_devices ())
5148 return gomp_pause_host ();
5150 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5151 if (devicep
== NULL
)
5154 /* Do nothing for target devices for now. */
5159 omp_pause_resource_all (omp_pause_resource_t kind
)
5162 if (gomp_pause_host ())
5164 /* Do nothing for target devices for now. */
5168 ialias (omp_pause_resource
)
5169 ialias (omp_pause_resource_all
)
5172 GOMP_evaluate_target_device (int device_num
, const char *kind
,
5173 const char *arch
, const char *isa
)
5178 device_num
= omp_get_default_device ();
5180 if (kind
&& strcmp (kind
, "any") == 0)
5183 gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s",
5184 __FUNCTION__
, device_num
, kind
, arch
, isa
);
5186 if (omp_get_device_num () == device_num
)
5187 result
= GOMP_evaluate_current_device (kind
, arch
, isa
);
5190 if (!omp_is_initial_device ())
5191 /* Accelerators are not expected to know about other devices. */
5195 struct gomp_device_descr
*device
= resolve_device (device_num
, true);
5198 else if (device
->evaluate_device_func
)
5199 result
= device
->evaluate_device_func (device_num
, kind
, arch
,
5204 gomp_debug (1, " -> %s\n", result
? "true" : "false");
5208 #ifdef PLUGIN_SUPPORT
5210 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5212 The handles of the found functions are stored in the corresponding fields
5213 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5216 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
5217 const char *plugin_name
)
5219 const char *err
= NULL
, *last_missing
= NULL
;
5221 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
5223 #if OFFLOAD_DEFAULTED
5229 /* Check if all required functions are available in the plugin and store
5230 their handlers. None of the symbols can legitimately be NULL,
5231 so we don't need to check dlerror all the time. */
5233 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5235 /* Similar, but missing functions are not an error. Return false if
5236 failed, true otherwise. */
5237 #define DLSYM_OPT(f, n) \
5238 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5239 || (last_missing = #n, 0))
5242 if (device
->version_func () != GOMP_VERSION
)
5244 err
= "plugin version mismatch";
5251 DLSYM (get_num_devices
);
5252 DLSYM (init_device
);
5253 DLSYM (fini_device
);
5255 DLSYM (unload_image
);
5258 DLSYM_OPT (usm_alloc
, usm_alloc
);
5259 DLSYM_OPT (usm_free
, usm_free
);
5260 DLSYM_OPT (is_usm_ptr
, is_usm_ptr
);
5263 DLSYM (evaluate_device
);
5264 device
->capabilities
= device
->get_caps_func ();
5265 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5268 DLSYM_OPT (async_run
, async_run
);
5269 DLSYM_OPT (can_run
, can_run
);
5272 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5274 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
5275 || !DLSYM_OPT (openacc
.create_thread_data
,
5276 openacc_create_thread_data
)
5277 || !DLSYM_OPT (openacc
.destroy_thread_data
,
5278 openacc_destroy_thread_data
)
5279 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
5280 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
5281 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
5282 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
5283 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
5284 || !DLSYM_OPT (openacc
.async
.queue_callback
,
5285 openacc_async_queue_callback
)
5286 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
5287 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
5288 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
5289 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
5291 /* Require all the OpenACC handlers if we have
5292 GOMP_OFFLOAD_CAP_OPENACC_200. */
5293 err
= "plugin missing OpenACC handler function";
5298 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
5299 openacc_cuda_get_current_device
);
5300 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
5301 openacc_cuda_get_current_context
);
5302 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
5303 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
5304 if (cuda
&& cuda
!= 4)
5306 /* Make sure all the CUDA functions are there if any of them are. */
5307 err
= "plugin missing OpenACC CUDA handler function";
5319 gomp_error ("while loading %s: %s", plugin_name
, err
);
5321 gomp_error ("missing function was %s", last_missing
);
5323 dlclose (plugin_handle
);
5328 /* This function finalizes all initialized devices. */
5331 gomp_target_fini (void)
5334 for (i
= 0; i
< num_devices
; i
++)
5337 struct gomp_device_descr
*devicep
= &devices
[i
];
5338 gomp_mutex_lock (&devicep
->lock
);
5339 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
5340 ret
= gomp_fini_device (devicep
);
5341 gomp_mutex_unlock (&devicep
->lock
);
5343 gomp_fatal ("device finalization failed");
5347 /* This function initializes the runtime for offloading.
5348 It parses the list of offload plugins, and tries to load these.
5349 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5350 will be set, and the array DEVICES initialized, containing descriptors for
5351 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5355 gomp_target_init (void)
5357 const char *prefix
="libgomp-plugin-";
5358 const char *suffix
= SONAME_SUFFIX (1);
5359 const char *cur
, *next
;
5361 int i
, new_num_devs
;
5362 int num_devs
= 0, num_devs_openmp
;
5363 struct gomp_device_descr
*devs
= NULL
;
5365 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
5368 cur
= OFFLOAD_PLUGINS
;
5372 struct gomp_device_descr current_device
;
5373 size_t prefix_len
, suffix_len
, cur_len
;
5375 next
= strchr (cur
, ',');
5377 prefix_len
= strlen (prefix
);
5378 cur_len
= next
? next
- cur
: strlen (cur
);
5379 suffix_len
= strlen (suffix
);
5381 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
5388 memcpy (plugin_name
, prefix
, prefix_len
);
5389 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
5390 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
5392 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
5394 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
5395 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
5396 if (gomp_debug_var
> 0 && new_num_devs
< 0)
5399 int type
= current_device
.get_type_func ();
5400 for (int img
= 0; img
< num_offload_images
; img
++)
5401 if (type
== offload_images
[img
].type
)
5405 char buf
[sizeof ("unified_address, unified_shared_memory, "
5406 "reverse_offload")];
5407 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
5408 char *name
= (char *) malloc (cur_len
+ 1);
5409 memcpy (name
, cur
, cur_len
);
5410 name
[cur_len
] = '\0';
5412 "%s devices present but 'omp requires %s' "
5413 "cannot be fulfilled\n", name
, buf
);
5417 else if (new_num_devs
>= 1)
5419 /* Augment DEVICES and NUM_DEVICES. */
5421 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
5422 * sizeof (struct gomp_device_descr
));
5430 current_device
.name
= current_device
.get_name_func ();
5431 /* current_device.capabilities has already been set. */
5432 current_device
.type
= current_device
.get_type_func ();
5433 current_device
.mem_map
.root
= NULL
;
5434 current_device
.mem_map_rev
.root
= NULL
;
5435 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
5436 for (i
= 0; i
< new_num_devs
; i
++)
5438 current_device
.target_id
= i
;
5439 devs
[num_devs
] = current_device
;
5440 gomp_mutex_init (&devs
[num_devs
].lock
);
5451 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5452 NUM_DEVICES_OPENMP. */
5453 struct gomp_device_descr
*devs_s
5454 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
5461 num_devs_openmp
= 0;
5462 for (i
= 0; i
< num_devs
; i
++)
5463 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5464 devs_s
[num_devs_openmp
++] = devs
[i
];
5465 int num_devs_after_openmp
= num_devs_openmp
;
5466 for (i
= 0; i
< num_devs
; i
++)
5467 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5468 devs_s
[num_devs_after_openmp
++] = devs
[i
];
5472 for (i
= 0; i
< num_devs
; i
++)
5474 /* The 'devices' array can be moved (by the realloc call) until we have
5475 found all the plugins, so registering with the OpenACC runtime (which
5476 takes a copy of the pointer argument) must be delayed until now. */
5477 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5478 goacc_register (&devs
[i
]);
5481 num_devices
= num_devs
;
5482 num_devices_openmp
= num_devs_openmp
;
5484 if (atexit (gomp_target_fini
) != 0)
5485 gomp_fatal ("atexit failed");
5488 #else /* PLUGIN_SUPPORT */
5489 /* If dlfcn.h is unavailable we always fallback to host execution.
5490 GOMP_target* routines are just stubs for this case. */
5492 gomp_target_init (void)
5495 #endif /* PLUGIN_SUPPORT */