]> gcc.gnu.org Git - gcc.git/blob - libgomp/target.c
6edfc9214e4e1309bee7abac2ddd1fc1aa512081
[gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
42
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_c
51 #include "splay-tree.h"
52
53
54 typedef uintptr_t *hash_entry_type;
55 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
56 static inline void htab_free (void *ptr) { free (ptr); }
57 #include "hashtab.h"
58
59 ialias_redirect (GOMP_task)
60
61 static inline hashval_t
62 htab_hash (hash_entry_type element)
63 {
64 return hash_pointer ((void *) element);
65 }
66
67 static inline bool
68 htab_eq (hash_entry_type x, hash_entry_type y)
69 {
70 return x == y;
71 }
72
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
74
75 static void gomp_target_init (void);
76
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
79
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock;
82
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr {
87 unsigned version;
88 enum offload_target_type type;
89 const void *host_table;
90 const void *target_data;
91 };
92
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr *offload_images;
95
96 /* Total number of offload images. */
97 static int num_offload_images;
98
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr *devices;
101
102 /* Total number of available devices. */
103 static int num_devices;
104
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp;
107
108 /* OpenMP requires mask. */
109 static int omp_requires_mask;
110
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
112
113 static void *
114 gomp_realloc_unlock (void *old, size_t size)
115 {
116 void *ret = realloc (old, size);
117 if (ret == NULL)
118 {
119 gomp_mutex_unlock (&register_lock);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
121 }
122 return ret;
123 }
124
125 attribute_hidden void
126 gomp_init_targets_once (void)
127 {
128 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
129 }
130
131 attribute_hidden int
132 gomp_get_num_devices (void)
133 {
134 gomp_init_targets_once ();
135 return num_devices_openmp;
136 }
137
138 static struct gomp_device_descr *
139 resolve_device (int device_id, bool remapped)
140 {
141 if (remapped && device_id == GOMP_DEVICE_ICV)
142 {
143 struct gomp_task_icv *icv = gomp_icv (false);
144 device_id = icv->default_device_var;
145 remapped = false;
146 }
147
148 if (device_id < 0)
149 {
150 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
151 : omp_initial_device))
152 return NULL;
153 if (device_id == omp_invalid_device)
154 gomp_fatal ("omp_invalid_device encountered");
155 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device not found");
158
159 return NULL;
160 }
161 else if (device_id >= gomp_get_num_devices ())
162 {
163 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
164 && device_id != num_devices_openmp)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
167
168 return NULL;
169 }
170
171 gomp_mutex_lock (&devices[device_id].lock);
172 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
173 gomp_init_device (&devices[device_id]);
174 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
175 {
176 gomp_mutex_unlock (&devices[device_id].lock);
177
178 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
179 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
180 "but device is finalized");
181
182 return NULL;
183 }
184 gomp_mutex_unlock (&devices[device_id].lock);
185
186 return &devices[device_id];
187 }
188
189
190 static inline splay_tree_key
191 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
192 {
193 if (key->host_start != key->host_end)
194 return splay_tree_lookup (mem_map, key);
195
196 key->host_end++;
197 splay_tree_key n = splay_tree_lookup (mem_map, key);
198 key->host_end--;
199 if (n)
200 return n;
201 key->host_start--;
202 n = splay_tree_lookup (mem_map, key);
203 key->host_start++;
204 if (n)
205 return n;
206 return splay_tree_lookup (mem_map, key);
207 }
208
209 static inline reverse_splay_tree_key
210 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
211 {
212 return reverse_splay_tree_lookup (mem_map_rev, key);
213 }
214
215 static inline splay_tree_key
216 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
217 {
218 if (key->host_start != key->host_end)
219 return splay_tree_lookup (mem_map, key);
220
221 key->host_end++;
222 splay_tree_key n = splay_tree_lookup (mem_map, key);
223 key->host_end--;
224 return n;
225 }
226
227 static inline void
228 gomp_device_copy (struct gomp_device_descr *devicep,
229 bool (*copy_func) (int, void *, const void *, size_t),
230 const char *dst, void *dstaddr,
231 const char *src, const void *srcaddr,
232 size_t size)
233 {
234 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
235 {
236 gomp_mutex_unlock (&devicep->lock);
237 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
238 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
239 }
240 }
241
242 static inline void
243 goacc_device_copy_async (struct gomp_device_descr *devicep,
244 bool (*copy_func) (int, void *, const void *, size_t,
245 struct goacc_asyncqueue *),
246 const char *dst, void *dstaddr,
247 const char *src, const void *srcaddr,
248 const void *srcaddr_orig,
249 size_t size, struct goacc_asyncqueue *aq)
250 {
251 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
252 {
253 gomp_mutex_unlock (&devicep->lock);
254 if (srcaddr_orig && srcaddr_orig != srcaddr)
255 gomp_fatal ("Copying of %s object [%p..%p)"
256 " via buffer %s object [%p..%p)"
257 " to %s object [%p..%p) failed",
258 src, srcaddr_orig, srcaddr_orig + size,
259 src, srcaddr, srcaddr + size,
260 dst, dstaddr, dstaddr + size);
261 else
262 gomp_fatal ("Copying of %s object [%p..%p)"
263 " to %s object [%p..%p) failed",
264 src, srcaddr, srcaddr + size,
265 dst, dstaddr, dstaddr + size);
266 }
267 }
268
269 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
270 host to device memory transfers. */
271
272 struct gomp_coalesce_chunk
273 {
274 /* The starting and ending point of a coalesced chunk of memory. */
275 size_t start, end;
276 };
277
278 struct gomp_coalesce_buf
279 {
280 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
281 it will be copied to the device. */
282 void *buf;
283 struct target_mem_desc *tgt;
284 /* Array with offsets, chunks[i].start is the starting offset and
285 chunks[i].end ending offset relative to tgt->tgt_start device address
286 of chunks which are to be copied to buf and later copied to device. */
287 struct gomp_coalesce_chunk *chunks;
288 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
289 be performed. */
290 long chunk_cnt;
291 /* During construction of chunks array, how many memory regions are within
292 the last chunk. If there is just one memory region for a chunk, we copy
293 it directly to device rather than going through buf. */
294 long use_cnt;
295 };
296
297 /* Maximum size of memory region considered for coalescing. Larger copies
298 are performed directly. */
299 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
300
301 /* Maximum size of a gap in between regions to consider them being copied
302 within the same chunk. All the device offsets considered are within
303 newly allocated device memory, so it isn't fatal if we copy some padding
304 in between from host to device. The gaps come either from alignment
305 padding or from memory regions which are not supposed to be copied from
306 host to device (e.g. map(alloc:), map(from:) etc.). */
307 #define MAX_COALESCE_BUF_GAP (4 * 1024)
308
309 /* Add region with device tgt_start relative offset and length to CBUF.
310
311 This must not be used for asynchronous copies, because the host data might
312 not be computed yet (by an earlier asynchronous compute region, for
313 example).
314 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
315 is it more performant to use libgomp CBUF buffering or individual device
316 asyncronous copying?) */
317
318 static inline void
319 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
320 {
321 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
322 return;
323 if (cbuf->chunk_cnt)
324 {
325 if (cbuf->chunk_cnt < 0)
326 return;
327 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
328 {
329 cbuf->chunk_cnt = -1;
330 return;
331 }
332 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
333 {
334 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
335 cbuf->use_cnt++;
336 return;
337 }
338 /* If the last chunk is only used by one mapping, discard it,
339 as it will be one host to device copy anyway and
340 memcpying it around will only waste cycles. */
341 if (cbuf->use_cnt == 1)
342 cbuf->chunk_cnt--;
343 }
344 cbuf->chunks[cbuf->chunk_cnt].start = start;
345 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
346 cbuf->chunk_cnt++;
347 cbuf->use_cnt = 1;
348 }
349
350 /* Return true for mapping kinds which need to copy data from the
351 host to device for regions that weren't previously mapped. */
352
353 static inline bool
354 gomp_to_device_kind_p (int kind)
355 {
356 switch (kind)
357 {
358 case GOMP_MAP_ALLOC:
359 case GOMP_MAP_FROM:
360 case GOMP_MAP_FORCE_ALLOC:
361 case GOMP_MAP_FORCE_FROM:
362 case GOMP_MAP_ALWAYS_FROM:
363 case GOMP_MAP_PRESENT_FROM:
364 case GOMP_MAP_ALWAYS_PRESENT_FROM:
365 return false;
366 default:
367 return true;
368 }
369 }
370
371 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
372 non-NULL), when the source data is stack or may otherwise be deallocated
373 before the asynchronous copy takes place, EPHEMERAL must be passed as
374 TRUE. */
375
376 attribute_hidden void
377 gomp_copy_host2dev (struct gomp_device_descr *devicep,
378 struct goacc_asyncqueue *aq,
379 void *d, const void *h, size_t sz,
380 bool ephemeral, struct gomp_coalesce_buf *cbuf)
381 {
382 if (__builtin_expect (aq != NULL, 0))
383 {
384 /* See 'gomp_coalesce_buf_add'. */
385 assert (!cbuf);
386
387 void *h_buf = (void *) h;
388 if (ephemeral)
389 {
390 /* We're queueing up an asynchronous copy from data that may
391 disappear before the transfer takes place (i.e. because it is a
392 stack local in a function that is no longer executing). Make a
393 copy of the data into a temporary buffer in those cases. */
394 h_buf = gomp_malloc (sz);
395 memcpy (h_buf, h, sz);
396 }
397 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
398 "dev", d, "host", h_buf, h, sz, aq);
399 if (ephemeral)
400 /* Free temporary buffer once the transfer has completed. */
401 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
402
403 return;
404 }
405
406 if (cbuf)
407 {
408 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
409 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
410 {
411 long first = 0;
412 long last = cbuf->chunk_cnt - 1;
413 while (first <= last)
414 {
415 long middle = (first + last) >> 1;
416 if (cbuf->chunks[middle].end <= doff)
417 first = middle + 1;
418 else if (cbuf->chunks[middle].start <= doff)
419 {
420 if (doff + sz > cbuf->chunks[middle].end)
421 {
422 gomp_mutex_unlock (&devicep->lock);
423 gomp_fatal ("internal libgomp cbuf error");
424 }
425 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
426 h, sz);
427 return;
428 }
429 else
430 last = middle - 1;
431 }
432 }
433 }
434
435 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
436 }
437
438 attribute_hidden void
439 gomp_copy_dev2host (struct gomp_device_descr *devicep,
440 struct goacc_asyncqueue *aq,
441 void *h, const void *d, size_t sz)
442 {
443 if (__builtin_expect (aq != NULL, 0))
444 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
445 "host", h, "dev", d, NULL, sz, aq);
446 else
447 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
448 }
449
450 static void
451 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
452 {
453 if (!devicep->free_func (devicep->target_id, devptr))
454 {
455 gomp_mutex_unlock (&devicep->lock);
456 gomp_fatal ("error in freeing device memory block at %p", devptr);
457 }
458 }
459
460 /* Increment reference count of a splay_tree_key region K by 1.
461 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
462 increment the value if refcount is not yet contained in the set (used for
463 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
464 once for each construct). */
465
466 static inline void
467 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
468 {
469 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
470 return;
471
472 uintptr_t *refcount_ptr = &k->refcount;
473
474 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
475 refcount_ptr = &k->structelem_refcount;
476 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
477 refcount_ptr = k->structelem_refcount_ptr;
478
479 if (refcount_set)
480 {
481 if (htab_find (*refcount_set, refcount_ptr))
482 return;
483 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
484 *slot = refcount_ptr;
485 }
486
487 *refcount_ptr += 1;
488 return;
489 }
490
491 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
492 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
493 track already seen refcounts, and only adjust the value if refcount is not
494 yet contained in the set (like gomp_increment_refcount).
495
496 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
497 it is already zero and we know we decremented it earlier. This signals that
498 associated maps should be copied back to host.
499
500 *DO_REMOVE is set to true when we this is the first handling of this refcount
501 and we are setting it to zero. This signals a removal of this key from the
502 splay-tree map.
503
504 Copy and removal are separated due to cases like handling of structure
505 elements, e.g. each map of a structure element representing a possible copy
506 out of a structure field has to be handled individually, but we only signal
507 removal for one (the first encountered) sibing map. */
508
509 static inline void
510 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
511 bool *do_copy, bool *do_remove)
512 {
513 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
514 {
515 *do_copy = *do_remove = false;
516 return;
517 }
518
519 uintptr_t *refcount_ptr = &k->refcount;
520
521 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
522 refcount_ptr = &k->structelem_refcount;
523 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
524 refcount_ptr = k->structelem_refcount_ptr;
525
526 bool new_encountered_refcount;
527 bool set_to_zero = false;
528 bool is_zero = false;
529
530 uintptr_t orig_refcount = *refcount_ptr;
531
532 if (refcount_set)
533 {
534 if (htab_find (*refcount_set, refcount_ptr))
535 {
536 new_encountered_refcount = false;
537 goto end;
538 }
539
540 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
541 *slot = refcount_ptr;
542 new_encountered_refcount = true;
543 }
544 else
545 /* If no refcount_set being used, assume all keys are being decremented
546 for the first time. */
547 new_encountered_refcount = true;
548
549 if (delete_p)
550 *refcount_ptr = 0;
551 else if (*refcount_ptr > 0)
552 *refcount_ptr -= 1;
553
554 end:
555 if (*refcount_ptr == 0)
556 {
557 if (orig_refcount > 0)
558 set_to_zero = true;
559
560 is_zero = true;
561 }
562
563 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
564 *do_remove = (new_encountered_refcount && set_to_zero);
565 }
566
567 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
568 gomp_map_0len_lookup found oldn for newn.
569 Helper function of gomp_map_vars. */
570
571 static inline void
572 gomp_map_vars_existing (struct gomp_device_descr *devicep,
573 struct goacc_asyncqueue *aq, splay_tree_key oldn,
574 splay_tree_key newn, struct target_var_desc *tgt_var,
575 unsigned char kind, bool always_to_flag, bool implicit,
576 struct gomp_coalesce_buf *cbuf,
577 htab_t *refcount_set)
578 {
579 assert (kind != GOMP_MAP_ATTACH
580 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
581
582 tgt_var->key = oldn;
583 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
584 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
585 tgt_var->is_attach = false;
586 tgt_var->offset = newn->host_start - oldn->host_start;
587
588 /* For implicit maps, old contained in new is valid. */
589 bool implicit_subset = (implicit
590 && newn->host_start <= oldn->host_start
591 && oldn->host_end <= newn->host_end);
592 if (implicit_subset)
593 tgt_var->length = oldn->host_end - oldn->host_start;
594 else
595 tgt_var->length = newn->host_end - newn->host_start;
596
597 if (GOMP_MAP_FORCE_P (kind)
598 /* For implicit maps, old contained in new is valid. */
599 || !(implicit_subset
600 /* Otherwise, new contained inside old is considered valid. */
601 || (oldn->host_start <= newn->host_start
602 && newn->host_end <= oldn->host_end)))
603 {
604 gomp_mutex_unlock (&devicep->lock);
605 gomp_fatal ("Trying to map into device [%p..%p) object when "
606 "[%p..%p) is already mapped",
607 (void *) newn->host_start, (void *) newn->host_end,
608 (void *) oldn->host_start, (void *) oldn->host_end);
609 }
610
611 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
612 {
613 /* Implicit + always should not happen. If this does occur, below
614 address/length adjustment is a TODO. */
615 assert (!implicit_subset);
616
617 if (oldn->aux && oldn->aux->attach_count)
618 {
619 /* We have to be careful not to overwrite still attached pointers
620 during the copyback to host. */
621 uintptr_t addr = newn->host_start;
622 while (addr < newn->host_end)
623 {
624 size_t i = (addr - oldn->host_start) / sizeof (void *);
625 if (oldn->aux->attach_count[i] == 0)
626 gomp_copy_host2dev (devicep, aq,
627 (void *) (oldn->tgt->tgt_start
628 + oldn->tgt_offset
629 + addr - oldn->host_start),
630 (void *) addr,
631 sizeof (void *), false, cbuf);
632 addr += sizeof (void *);
633 }
634 }
635 else
636 gomp_copy_host2dev (devicep, aq,
637 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
638 + newn->host_start - oldn->host_start),
639 (void *) newn->host_start,
640 newn->host_end - newn->host_start, false, cbuf);
641 }
642
643 gomp_increment_refcount (oldn, refcount_set);
644 }
645
646 static int
647 get_kind (bool short_mapkind, void *kinds, int idx)
648 {
649 if (!short_mapkind)
650 return ((unsigned char *) kinds)[idx];
651
652 int val = ((unsigned short *) kinds)[idx];
653 if (GOMP_MAP_IMPLICIT_P (val))
654 val &= ~GOMP_MAP_IMPLICIT;
655 return val;
656 }
657
658
659 static bool
660 get_implicit (bool short_mapkind, void *kinds, int idx)
661 {
662 if (!short_mapkind)
663 return false;
664
665 int val = ((unsigned short *) kinds)[idx];
666 return GOMP_MAP_IMPLICIT_P (val);
667 }
668
669 static void
670 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
671 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
672 struct gomp_coalesce_buf *cbuf,
673 bool allow_zero_length_array_sections)
674 {
675 struct gomp_device_descr *devicep = tgt->device_descr;
676 struct splay_tree_s *mem_map = &devicep->mem_map;
677 struct splay_tree_key_s cur_node;
678
679 cur_node.host_start = host_ptr;
680 if (cur_node.host_start == (uintptr_t) NULL)
681 {
682 cur_node.tgt_offset = (uintptr_t) NULL;
683 gomp_copy_host2dev (devicep, aq,
684 (void *) (tgt->tgt_start + target_offset),
685 (void *) &cur_node.tgt_offset, sizeof (void *),
686 true, cbuf);
687 return;
688 }
689 /* Add bias to the pointer value. */
690 cur_node.host_start += bias;
691 cur_node.host_end = cur_node.host_start;
692 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
693 if (n == NULL)
694 {
695 if (allow_zero_length_array_sections)
696 cur_node.tgt_offset = 0;
697 else if (devicep->is_usm_ptr_func
698 && devicep->is_usm_ptr_func ((void*)cur_node.host_start))
699 cur_node.tgt_offset = cur_node.host_start;
700 else
701 {
702 gomp_mutex_unlock (&devicep->lock);
703 gomp_fatal ("Pointer target of array section wasn't mapped");
704 }
705 }
706 else
707 {
708 cur_node.host_start -= n->host_start;
709 cur_node.tgt_offset
710 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
711 /* At this point tgt_offset is target address of the
712 array section. Now subtract bias to get what we want
713 to initialize the pointer with. */
714 cur_node.tgt_offset -= bias;
715 }
716 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
717 (void *) &cur_node.tgt_offset, sizeof (void *),
718 true, cbuf);
719 }
720
721 static void
722 gomp_map_fields_existing (struct target_mem_desc *tgt,
723 struct goacc_asyncqueue *aq, splay_tree_key n,
724 size_t first, size_t i, void **hostaddrs,
725 size_t *sizes, void *kinds,
726 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
727 {
728 struct gomp_device_descr *devicep = tgt->device_descr;
729 struct splay_tree_s *mem_map = &devicep->mem_map;
730 struct splay_tree_key_s cur_node;
731 int kind;
732 bool implicit;
733 const bool short_mapkind = true;
734 const int typemask = short_mapkind ? 0xff : 0x7;
735
736 cur_node.host_start = (uintptr_t) hostaddrs[i];
737 cur_node.host_end = cur_node.host_start + sizes[i];
738 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
739 kind = get_kind (short_mapkind, kinds, i);
740 implicit = get_implicit (short_mapkind, kinds, i);
741 if (n2
742 && n2->tgt == n->tgt
743 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
744 {
745 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
746 kind & typemask, false, implicit, cbuf,
747 refcount_set);
748 return;
749 }
750 if (sizes[i] == 0)
751 {
752 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
753 {
754 cur_node.host_start--;
755 n2 = splay_tree_lookup (mem_map, &cur_node);
756 cur_node.host_start++;
757 if (n2
758 && n2->tgt == n->tgt
759 && n2->host_start - n->host_start
760 == n2->tgt_offset - n->tgt_offset)
761 {
762 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
763 kind & typemask, false, implicit, cbuf,
764 refcount_set);
765 return;
766 }
767 }
768 cur_node.host_end++;
769 n2 = splay_tree_lookup (mem_map, &cur_node);
770 cur_node.host_end--;
771 if (n2
772 && n2->tgt == n->tgt
773 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
774 {
775 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
776 kind & typemask, false, implicit, cbuf,
777 refcount_set);
778 return;
779 }
780 }
781 gomp_mutex_unlock (&devicep->lock);
782 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
783 "other mapped elements from the same structure weren't mapped "
784 "together with it", (void *) cur_node.host_start,
785 (void *) cur_node.host_end);
786 }
787
788 attribute_hidden void
789 gomp_attach_pointer (struct gomp_device_descr *devicep,
790 struct goacc_asyncqueue *aq, splay_tree mem_map,
791 splay_tree_key n, uintptr_t attach_to, size_t bias,
792 struct gomp_coalesce_buf *cbufp,
793 bool allow_zero_length_array_sections)
794 {
795 struct splay_tree_key_s s;
796 size_t size, idx;
797
798 if (n == NULL)
799 {
800 gomp_mutex_unlock (&devicep->lock);
801 gomp_fatal ("enclosing struct not mapped for attach");
802 }
803
804 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
805 /* We might have a pointer in a packed struct: however we cannot have more
806 than one such pointer in each pointer-sized portion of the struct, so
807 this is safe. */
808 idx = (attach_to - n->host_start) / sizeof (void *);
809
810 if (!n->aux)
811 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
812
813 if (!n->aux->attach_count)
814 n->aux->attach_count
815 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
816
817 if (n->aux->attach_count[idx] < UINTPTR_MAX)
818 n->aux->attach_count[idx]++;
819 else
820 {
821 gomp_mutex_unlock (&devicep->lock);
822 gomp_fatal ("attach count overflow");
823 }
824
825 if (n->aux->attach_count[idx] == 1)
826 {
827 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
828 - n->host_start;
829 uintptr_t target = (uintptr_t) *(void **) attach_to;
830 splay_tree_key tn;
831 uintptr_t data;
832
833 if ((void *) target == NULL)
834 {
835 gomp_mutex_unlock (&devicep->lock);
836 gomp_fatal ("attempt to attach null pointer");
837 }
838
839 s.host_start = target + bias;
840 s.host_end = s.host_start + 1;
841 tn = splay_tree_lookup (mem_map, &s);
842
843 if (!tn)
844 {
845 if (allow_zero_length_array_sections)
846 /* When allowing attachment to zero-length array sections, we
847 allow attaching to NULL pointers when the target region is not
848 mapped. */
849 data = 0;
850 else
851 {
852 gomp_mutex_unlock (&devicep->lock);
853 gomp_fatal ("pointer target not mapped for attach");
854 }
855 }
856 else
857 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
858
859 gomp_debug (1,
860 "%s: attaching host %p, target %p (struct base %p) to %p\n",
861 __FUNCTION__, (void *) attach_to, (void *) devptr,
862 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
863
864 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
865 sizeof (void *), true, cbufp);
866 }
867 else
868 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
869 (void *) attach_to, (int) n->aux->attach_count[idx]);
870 }
871
872 attribute_hidden void
873 gomp_detach_pointer (struct gomp_device_descr *devicep,
874 struct goacc_asyncqueue *aq, splay_tree_key n,
875 uintptr_t detach_from, bool finalize,
876 struct gomp_coalesce_buf *cbufp)
877 {
878 size_t idx;
879
880 if (n == NULL)
881 {
882 gomp_mutex_unlock (&devicep->lock);
883 gomp_fatal ("enclosing struct not mapped for detach");
884 }
885
886 idx = (detach_from - n->host_start) / sizeof (void *);
887
888 if (!n->aux || !n->aux->attach_count)
889 {
890 gomp_mutex_unlock (&devicep->lock);
891 gomp_fatal ("no attachment counters for struct");
892 }
893
894 if (finalize)
895 n->aux->attach_count[idx] = 1;
896
897 if (n->aux->attach_count[idx] == 0)
898 {
899 gomp_mutex_unlock (&devicep->lock);
900 gomp_fatal ("attach count underflow");
901 }
902 else
903 n->aux->attach_count[idx]--;
904
905 if (n->aux->attach_count[idx] == 0)
906 {
907 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
908 - n->host_start;
909 uintptr_t target = (uintptr_t) *(void **) detach_from;
910
911 gomp_debug (1,
912 "%s: detaching host %p, target %p (struct base %p) to %p\n",
913 __FUNCTION__, (void *) detach_from, (void *) devptr,
914 (void *) (n->tgt->tgt_start + n->tgt_offset),
915 (void *) target);
916
917 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
918 sizeof (void *), true, cbufp);
919 }
920 else
921 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
922 (void *) detach_from, (int) n->aux->attach_count[idx]);
923 }
924
925 attribute_hidden uintptr_t
926 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
927 {
928 if (tgt->list[i].key != NULL)
929 return tgt->list[i].key->tgt->tgt_start
930 + tgt->list[i].key->tgt_offset
931 + tgt->list[i].offset;
932
933 switch (tgt->list[i].offset)
934 {
935 case OFFSET_INLINED:
936 case OFFSET_USM:
937 return (uintptr_t) hostaddrs[i];
938
939 case OFFSET_POINTER:
940 return 0;
941
942 case OFFSET_STRUCT:
943 return tgt->list[i + 1].key->tgt->tgt_start
944 + tgt->list[i + 1].key->tgt_offset
945 + tgt->list[i + 1].offset
946 + (uintptr_t) hostaddrs[i]
947 - (uintptr_t) hostaddrs[i + 1];
948
949 default:
950 return tgt->tgt_start + tgt->list[i].offset;
951 }
952 }
953
954 static inline __attribute__((always_inline)) struct target_mem_desc *
955 gomp_map_vars_internal (struct gomp_device_descr *devicep,
956 struct goacc_asyncqueue *aq, size_t mapnum,
957 void **hostaddrs, void **devaddrs, size_t *sizes,
958 void *kinds, struct goacc_ncarray_info *nca_info,
959 bool short_mapkind, htab_t *refcount_set,
960 enum gomp_map_vars_kind pragma_kind)
961 {
962 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
963 size_t nca_data_row_num = (nca_info ? nca_info->num_data_rows : 0);
964 bool has_firstprivate = false;
965 bool has_always_ptrset = false;
966 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
967 const int rshift = short_mapkind ? 8 : 3;
968 const int typemask = short_mapkind ? 0xff : 0x7;
969 struct splay_tree_s *mem_map = &devicep->mem_map;
970 struct splay_tree_key_s cur_node;
971 struct target_mem_desc *tgt
972 = gomp_malloc (sizeof (*tgt)
973 + sizeof (tgt->list[0]) * (mapnum + nca_data_row_num));
974 tgt->list_count = mapnum + nca_data_row_num;
975 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
976 tgt->device_descr = devicep;
977 tgt->prev = NULL;
978 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
979
980 if (mapnum == 0)
981 {
982 tgt->tgt_start = 0;
983 tgt->tgt_end = 0;
984 return tgt;
985 }
986
987 tgt_align = sizeof (void *);
988 tgt_size = 0;
989 cbuf.chunks = NULL;
990 cbuf.chunk_cnt = -1;
991 cbuf.use_cnt = 0;
992 cbuf.buf = NULL;
993 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
994 {
995 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
996 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
997 cbuf.chunk_cnt = 0;
998 }
999 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1000 {
1001 size_t align = 4 * sizeof (void *);
1002 tgt_align = align;
1003 tgt_size = mapnum * sizeof (void *);
1004 cbuf.chunk_cnt = 1;
1005 cbuf.use_cnt = 1 + (mapnum > 1);
1006 cbuf.chunks[0].start = 0;
1007 cbuf.chunks[0].end = tgt_size;
1008 }
1009
1010 gomp_mutex_lock (&devicep->lock);
1011 if (devicep->state == GOMP_DEVICE_FINALIZED)
1012 {
1013 gomp_mutex_unlock (&devicep->lock);
1014 free (tgt);
1015 return NULL;
1016 }
1017
1018 for (i = 0; i < mapnum; i++)
1019 {
1020 int kind = get_kind (short_mapkind, kinds, i);
1021 bool implicit = get_implicit (short_mapkind, kinds, i);
1022 tgt->list[i].offset = 0;
1023 if (hostaddrs[i] == NULL
1024 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1025 {
1026 tgt->list[i].key = NULL;
1027 tgt->list[i].offset = OFFSET_INLINED;
1028 continue;
1029 }
1030 else if (devicep->is_usm_ptr_func
1031 && devicep->is_usm_ptr_func (hostaddrs[i]))
1032 {
1033 /* The memory is visible from both host and target
1034 so nothing needs to be moved. */
1035 tgt->list[i].key = NULL;
1036 tgt->list[i].offset = OFFSET_USM;
1037 continue;
1038 }
1039 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1040 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1041 {
1042 tgt->list[i].key = NULL;
1043 if (!not_found_cnt)
1044 {
1045 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1046 on a separate construct prior to using use_device_{addr,ptr}.
1047 In OpenMP 5.0, map directives need to be ordered by the
1048 middle-end before the use_device_* clauses. If
1049 !not_found_cnt, all mappings requested (if any) are already
1050 mapped, so use_device_{addr,ptr} can be resolved right away.
1051 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1052 now but would succeed after performing the mappings in the
1053 following loop. We can't defer this always to the second
1054 loop, because it is not even invoked when !not_found_cnt
1055 after the first loop. */
1056 cur_node.host_start = (uintptr_t) hostaddrs[i];
1057 cur_node.host_end = cur_node.host_start;
1058 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1059 if (n != NULL)
1060 {
1061 cur_node.host_start -= n->host_start;
1062 hostaddrs[i]
1063 = (void *) (n->tgt->tgt_start + n->tgt_offset
1064 + cur_node.host_start);
1065 }
1066 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1067 {
1068 gomp_mutex_unlock (&devicep->lock);
1069 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1070 }
1071 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1072 /* If not present, continue using the host address. */
1073 ;
1074 else
1075 __builtin_unreachable ();
1076 tgt->list[i].offset = OFFSET_INLINED;
1077 }
1078 else
1079 tgt->list[i].offset = 0;
1080 continue;
1081 }
1082 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1083 {
1084 size_t first = i + 1;
1085 size_t last = i + sizes[i];
1086 cur_node.host_start = (uintptr_t) hostaddrs[i];
1087 cur_node.host_end = (uintptr_t) hostaddrs[last]
1088 + sizes[last];
1089 tgt->list[i].key = NULL;
1090 tgt->list[i].offset = OFFSET_STRUCT;
1091 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1092 if (n == NULL)
1093 {
1094 size_t align = (size_t) 1 << (kind >> rshift);
1095 if (tgt_align < align)
1096 tgt_align = align;
1097 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1098 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1099 tgt_size += cur_node.host_end - cur_node.host_start;
1100 not_found_cnt += last - i;
1101 for (i = first; i <= last; i++)
1102 {
1103 tgt->list[i].key = NULL;
1104 if (!aq
1105 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1106 & typemask))
1107 gomp_coalesce_buf_add (&cbuf,
1108 tgt_size - cur_node.host_end
1109 + (uintptr_t) hostaddrs[i],
1110 sizes[i]);
1111 }
1112 i--;
1113 continue;
1114 }
1115 for (i = first; i <= last; i++)
1116 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1117 sizes, kinds, NULL, refcount_set);
1118 i--;
1119 continue;
1120 }
1121 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1122 {
1123 tgt->list[i].key = NULL;
1124 tgt->list[i].offset = OFFSET_POINTER;
1125 has_firstprivate = true;
1126 continue;
1127 }
1128 else if ((kind & typemask) == GOMP_MAP_ATTACH
1129 || ((kind & typemask)
1130 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1131 {
1132 tgt->list[i].key = NULL;
1133 has_firstprivate = true;
1134 continue;
1135 }
1136 else if (GOMP_MAP_NONCONTIG_ARRAY_P (kind & typemask))
1137 {
1138 /* Ignore non-contiguous arrays for now, we process them together
1139 later. */
1140 tgt->list[i].key = NULL;
1141 tgt->list[i].offset = 0;
1142 not_found_cnt++;
1143
1144 /* The map for the non-contiguous array itself is never copied from
1145 during unmapping, its the data rows that count. Set copy-from
1146 flags to false here. */
1147 tgt->list[i].copy_from = false;
1148 tgt->list[i].always_copy_from = false;
1149 tgt->list[i].is_attach = false;
1150
1151 size_t align = (size_t) 1 << (kind >> rshift);
1152 if (tgt_align < align)
1153 tgt_align = align;
1154
1155 continue;
1156 }
1157
1158 cur_node.host_start = (uintptr_t) hostaddrs[i];
1159 if (!GOMP_MAP_POINTER_P (kind & typemask))
1160 cur_node.host_end = cur_node.host_start + sizes[i];
1161 else
1162 cur_node.host_end = cur_node.host_start + sizeof (void *);
1163 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1164 {
1165 tgt->list[i].key = NULL;
1166
1167 size_t align = (size_t) 1 << (kind >> rshift);
1168 if (tgt_align < align)
1169 tgt_align = align;
1170 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1171 if (!aq)
1172 gomp_coalesce_buf_add (&cbuf, tgt_size,
1173 cur_node.host_end - cur_node.host_start);
1174 tgt_size += cur_node.host_end - cur_node.host_start;
1175 has_firstprivate = true;
1176 continue;
1177 }
1178 splay_tree_key n;
1179 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1180 {
1181 n = gomp_map_0len_lookup (mem_map, &cur_node);
1182 if (!n)
1183 {
1184 tgt->list[i].key = NULL;
1185 tgt->list[i].offset = OFFSET_POINTER;
1186 continue;
1187 }
1188 }
1189 else
1190 n = splay_tree_lookup (mem_map, &cur_node);
1191 if (n && n->refcount != REFCOUNT_LINK)
1192 {
1193 int always_to_cnt = 0;
1194 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1195 {
1196 bool has_nullptr = false;
1197 size_t j;
1198 for (j = 0; j < n->tgt->list_count; j++)
1199 if (n->tgt->list[j].key == n)
1200 {
1201 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1202 break;
1203 }
1204 if (n->tgt->list_count == 0)
1205 {
1206 /* 'declare target'; assume has_nullptr; it could also be
1207 statically assigned pointer, but that it should be to
1208 the equivalent variable on the host. */
1209 assert (n->refcount == REFCOUNT_INFINITY);
1210 has_nullptr = true;
1211 }
1212 else
1213 assert (j < n->tgt->list_count);
1214 /* Re-map the data if there is an 'always' modifier or if it a
1215 null pointer was there and non a nonnull has been found; that
1216 permits transparent re-mapping for Fortran array descriptors
1217 which were previously mapped unallocated. */
1218 for (j = i + 1; j < mapnum; j++)
1219 {
1220 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1221 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1222 && (!has_nullptr
1223 || !GOMP_MAP_POINTER_P (ptr_kind)
1224 || *(void **) hostaddrs[j] == NULL))
1225 break;
1226 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1227 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1228 > cur_node.host_end))
1229 break;
1230 else
1231 {
1232 has_always_ptrset = true;
1233 ++always_to_cnt;
1234 }
1235 }
1236 }
1237 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1238 kind & typemask, always_to_cnt > 0, implicit,
1239 NULL, refcount_set);
1240 i += always_to_cnt;
1241 }
1242 else
1243 {
1244 tgt->list[i].key = NULL;
1245
1246 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1247 {
1248 /* Not present, hence, skip entry - including its MAP_POINTER,
1249 when existing. */
1250 tgt->list[i].offset = OFFSET_POINTER;
1251 if (i + 1 < mapnum
1252 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1253 == GOMP_MAP_POINTER))
1254 {
1255 ++i;
1256 tgt->list[i].key = NULL;
1257 tgt->list[i].offset = 0;
1258 }
1259 continue;
1260 }
1261 size_t align = (size_t) 1 << (kind >> rshift);
1262 not_found_cnt++;
1263 if (tgt_align < align)
1264 tgt_align = align;
1265 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1266 if (!aq
1267 && gomp_to_device_kind_p (kind & typemask))
1268 gomp_coalesce_buf_add (&cbuf, tgt_size,
1269 cur_node.host_end - cur_node.host_start);
1270 tgt_size += cur_node.host_end - cur_node.host_start;
1271 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1272 {
1273 size_t j;
1274 int kind;
1275 for (j = i + 1; j < mapnum; j++)
1276 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1277 kinds, j)) & typemask))
1278 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1279 break;
1280 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1281 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1282 > cur_node.host_end))
1283 break;
1284 else
1285 {
1286 tgt->list[j].key = NULL;
1287 i++;
1288 }
1289 }
1290 }
1291 }
1292
1293 /* For non-contiguous arrays. Each data row is one target item, separated
1294 from the normal map clause items, hence we order them after mapnum. */
1295 if (nca_info)
1296 {
1297 struct target_var_desc *next_var_desc = &tgt->list[mapnum];
1298 for (i = 0; i < nca_info->num_ncarray; i++)
1299 {
1300 struct goacc_ncarray *nca = &nca_info->ncarray[i];
1301 int kind = get_kind (short_mapkind, kinds, nca->map_index);
1302 size_t align = (size_t) 1 << (kind >> rshift);
1303 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1304 tgt_size += nca->ptrblock_size;
1305
1306 for (size_t j = 0; j < nca->data_row_num; j++)
1307 {
1308 struct target_var_desc *row_desc = next_var_desc++;
1309 void *row = nca->data_rows[j];
1310 cur_node.host_start = (uintptr_t) row;
1311 cur_node.host_end = cur_node.host_start + nca->data_row_size;
1312 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1313 if (n)
1314 {
1315 assert (n->refcount != REFCOUNT_LINK);
1316 gomp_map_vars_existing (devicep, aq, n, &cur_node, row_desc,
1317 kind & typemask, false, false,
1318 /* TODO: cbuf? */ NULL,
1319 refcount_set);
1320 }
1321 else
1322 {
1323 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1324 tgt_size += nca->data_row_size;
1325 not_found_cnt++;
1326 }
1327 }
1328 }
1329 assert (next_var_desc == &tgt->list[mapnum + nca_info->num_data_rows]);
1330 }
1331
1332 if (devaddrs)
1333 {
1334 if (mapnum != 1)
1335 {
1336 gomp_mutex_unlock (&devicep->lock);
1337 gomp_fatal ("unexpected aggregation");
1338 }
1339 tgt->to_free = devaddrs[0];
1340 tgt->tgt_start = (uintptr_t) tgt->to_free;
1341 tgt->tgt_end = tgt->tgt_start + sizes[0];
1342 }
1343 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1344 {
1345 /* Allocate tgt_align aligned tgt_size block of memory. */
1346 /* FIXME: Perhaps change interface to allocate properly aligned
1347 memory. */
1348 tgt->to_free = devicep->alloc_func (devicep->target_id,
1349 tgt_size + tgt_align - 1);
1350 if (!tgt->to_free)
1351 {
1352 gomp_mutex_unlock (&devicep->lock);
1353 gomp_fatal ("device memory allocation fail");
1354 }
1355
1356 tgt->tgt_start = (uintptr_t) tgt->to_free;
1357 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1358 tgt->tgt_end = tgt->tgt_start + tgt_size;
1359
1360 if (cbuf.use_cnt == 1)
1361 cbuf.chunk_cnt--;
1362 if (cbuf.chunk_cnt > 0)
1363 {
1364 cbuf.buf
1365 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1366 if (cbuf.buf)
1367 {
1368 cbuf.tgt = tgt;
1369 cbufp = &cbuf;
1370 }
1371 }
1372 }
1373 else
1374 {
1375 tgt->to_free = NULL;
1376 tgt->tgt_start = 0;
1377 tgt->tgt_end = 0;
1378 }
1379
1380 tgt_size = 0;
1381 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1382 tgt_size = mapnum * sizeof (void *);
1383
1384 tgt->array = NULL;
1385 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1386 {
1387 if (not_found_cnt)
1388 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1389 splay_tree_node array = tgt->array;
1390 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1391 uintptr_t field_tgt_base = 0;
1392 splay_tree_key field_tgt_structelem_first = NULL;
1393
1394 for (i = 0; i < mapnum; i++)
1395 if (has_always_ptrset
1396 && tgt->list[i].key
1397 && (get_kind (short_mapkind, kinds, i) & typemask)
1398 == GOMP_MAP_TO_PSET)
1399 {
1400 splay_tree_key k = tgt->list[i].key;
1401 bool has_nullptr = false;
1402 size_t j;
1403 for (j = 0; j < k->tgt->list_count; j++)
1404 if (k->tgt->list[j].key == k)
1405 {
1406 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1407 break;
1408 }
1409 if (k->tgt->list_count == 0)
1410 has_nullptr = true;
1411 else
1412 assert (j < k->tgt->list_count);
1413
1414 tgt->list[i].has_null_ptr_assoc = false;
1415 for (j = i + 1; j < mapnum; j++)
1416 {
1417 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1418 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1419 && (!has_nullptr
1420 || !GOMP_MAP_POINTER_P (ptr_kind)
1421 || *(void **) hostaddrs[j] == NULL))
1422 break;
1423 else if ((uintptr_t) hostaddrs[j] < k->host_start
1424 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1425 > k->host_end))
1426 break;
1427 else
1428 {
1429 if (*(void **) hostaddrs[j] == NULL)
1430 tgt->list[i].has_null_ptr_assoc = true;
1431 tgt->list[j].key = k;
1432 tgt->list[j].copy_from = false;
1433 tgt->list[j].always_copy_from = false;
1434 tgt->list[j].is_attach = false;
1435 gomp_increment_refcount (k, refcount_set);
1436 gomp_map_pointer (k->tgt, aq,
1437 (uintptr_t) *(void **) hostaddrs[j],
1438 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1439 - k->host_start),
1440 sizes[j], cbufp, false);
1441 }
1442 }
1443 i = j - 1;
1444 }
1445 else if (tgt->list[i].key == NULL)
1446 {
1447 int kind = get_kind (short_mapkind, kinds, i);
1448 bool implicit = get_implicit (short_mapkind, kinds, i);
1449 if (hostaddrs[i] == NULL)
1450 continue;
1451 if (tgt->list[i].offset == OFFSET_USM)
1452 continue;
1453 switch (kind & typemask)
1454 {
1455 size_t align, len, first, last;
1456 splay_tree_key n;
1457 case GOMP_MAP_FIRSTPRIVATE:
1458 align = (size_t) 1 << (kind >> rshift);
1459 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1460 tgt->list[i].offset = tgt_size;
1461 len = sizes[i];
1462 gomp_copy_host2dev (devicep, aq,
1463 (void *) (tgt->tgt_start + tgt_size),
1464 (void *) hostaddrs[i], len, false, cbufp);
1465 /* Save device address in hostaddr to permit latter availablity
1466 when doing a deep-firstprivate with pointer attach. */
1467 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1468 tgt_size += len;
1469
1470 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1471 firstprivate to hostaddrs[i+1], which is assumed to contain a
1472 device address. */
1473 if (i + 1 < mapnum
1474 && (GOMP_MAP_ATTACH
1475 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1476 {
1477 uintptr_t target = (uintptr_t) hostaddrs[i];
1478 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1479 gomp_copy_host2dev (devicep, aq, devptr, &target,
1480 sizeof (void *), false, cbufp);
1481 ++i;
1482 }
1483 continue;
1484 case GOMP_MAP_FIRSTPRIVATE_INT:
1485 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1486 continue;
1487 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1488 /* The OpenACC 'host_data' construct only allows 'use_device'
1489 "mapping" clauses, so in the first loop, 'not_found_cnt'
1490 must always have been zero, so all OpenACC 'use_device'
1491 clauses have already been handled. (We can only easily test
1492 'use_device' with 'if_present' clause here.) */
1493 assert (tgt->list[i].offset == OFFSET_INLINED);
1494 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1495 code conceptually simple, similar to the first loop. */
1496 case GOMP_MAP_USE_DEVICE_PTR:
1497 if (tgt->list[i].offset == 0)
1498 {
1499 cur_node.host_start = (uintptr_t) hostaddrs[i];
1500 cur_node.host_end = cur_node.host_start;
1501 n = gomp_map_lookup (mem_map, &cur_node);
1502 if (n != NULL)
1503 {
1504 cur_node.host_start -= n->host_start;
1505 hostaddrs[i]
1506 = (void *) (n->tgt->tgt_start + n->tgt_offset
1507 + cur_node.host_start);
1508 }
1509 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1510 {
1511 gomp_mutex_unlock (&devicep->lock);
1512 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1513 }
1514 else if ((kind & typemask)
1515 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1516 /* If not present, continue using the host address. */
1517 ;
1518 else
1519 __builtin_unreachable ();
1520 tgt->list[i].offset = OFFSET_INLINED;
1521 }
1522 continue;
1523 case GOMP_MAP_STRUCT:
1524 first = i + 1;
1525 last = i + sizes[i];
1526 cur_node.host_start = (uintptr_t) hostaddrs[i];
1527 cur_node.host_end = (uintptr_t) hostaddrs[last]
1528 + sizes[last];
1529 if (tgt->list[first].key != NULL)
1530 continue;
1531 n = splay_tree_lookup (mem_map, &cur_node);
1532 if (n == NULL)
1533 {
1534 size_t align = (size_t) 1 << (kind >> rshift);
1535 tgt_size -= (uintptr_t) hostaddrs[first]
1536 - (uintptr_t) hostaddrs[i];
1537 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1538 tgt_size += (uintptr_t) hostaddrs[first]
1539 - (uintptr_t) hostaddrs[i];
1540 field_tgt_base = (uintptr_t) hostaddrs[first];
1541 field_tgt_offset = tgt_size;
1542 field_tgt_clear = last;
1543 field_tgt_structelem_first = NULL;
1544 tgt_size += cur_node.host_end
1545 - (uintptr_t) hostaddrs[first];
1546 continue;
1547 }
1548 for (i = first; i <= last; i++)
1549 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1550 sizes, kinds, cbufp, refcount_set);
1551 i--;
1552 continue;
1553 case GOMP_MAP_ALWAYS_POINTER:
1554 cur_node.host_start = (uintptr_t) hostaddrs[i];
1555 cur_node.host_end = cur_node.host_start + sizeof (void *);
1556 n = splay_tree_lookup (mem_map, &cur_node);
1557 if (n == NULL
1558 || n->host_start > cur_node.host_start
1559 || n->host_end < cur_node.host_end)
1560 {
1561 gomp_mutex_unlock (&devicep->lock);
1562 gomp_fatal ("always pointer not mapped");
1563 }
1564 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1565 != GOMP_MAP_ALWAYS_POINTER)
1566 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1567 if (cur_node.tgt_offset)
1568 cur_node.tgt_offset -= sizes[i];
1569 gomp_copy_host2dev (devicep, aq,
1570 (void *) (n->tgt->tgt_start
1571 + n->tgt_offset
1572 + cur_node.host_start
1573 - n->host_start),
1574 (void *) &cur_node.tgt_offset,
1575 sizeof (void *), true, cbufp);
1576 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1577 + cur_node.host_start - n->host_start;
1578 continue;
1579 case GOMP_MAP_IF_PRESENT:
1580 /* Not present - otherwise handled above. Skip over its
1581 MAP_POINTER as well. */
1582 if (i + 1 < mapnum
1583 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1584 == GOMP_MAP_POINTER))
1585 ++i;
1586 continue;
1587 case GOMP_MAP_ATTACH:
1588 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1589 {
1590 cur_node.host_start = (uintptr_t) hostaddrs[i];
1591 cur_node.host_end = cur_node.host_start + sizeof (void *);
1592 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1593 if (n != NULL)
1594 {
1595 tgt->list[i].key = n;
1596 tgt->list[i].offset = cur_node.host_start - n->host_start;
1597 tgt->list[i].length = n->host_end - n->host_start;
1598 tgt->list[i].copy_from = false;
1599 tgt->list[i].always_copy_from = false;
1600 tgt->list[i].is_attach = true;
1601 /* OpenACC 'attach'/'detach' doesn't affect
1602 structured/dynamic reference counts ('n->refcount',
1603 'n->dynamic_refcount'). */
1604
1605 bool zlas
1606 = ((kind & typemask)
1607 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1608 gomp_attach_pointer (devicep, aq, mem_map, n,
1609 (uintptr_t) hostaddrs[i], sizes[i],
1610 cbufp, zlas);
1611 }
1612 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1613 {
1614 gomp_mutex_unlock (&devicep->lock);
1615 gomp_fatal ("outer struct not mapped for attach");
1616 }
1617 continue;
1618 }
1619 default:
1620 if (tgt->list[i].offset == OFFSET_INLINED
1621 && !array)
1622 continue;
1623 break;
1624 }
1625
1626 if (GOMP_MAP_NONCONTIG_ARRAY_P (kind & typemask))
1627 {
1628 tgt->list[i].key = &array->key;
1629 tgt->list[i].key->tgt = tgt;
1630 array++;
1631 continue;
1632 }
1633
1634 splay_tree_key k = &array->key;
1635 k->host_start = (uintptr_t) hostaddrs[i];
1636 if (!GOMP_MAP_POINTER_P (kind & typemask))
1637 k->host_end = k->host_start + sizes[i];
1638 else
1639 k->host_end = k->host_start + sizeof (void *);
1640 splay_tree_key n = splay_tree_lookup (mem_map, k);
1641 if (n && n->refcount != REFCOUNT_LINK)
1642 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1643 kind & typemask, false, implicit, cbufp,
1644 refcount_set);
1645 else
1646 {
1647 k->aux = NULL;
1648 if (n && n->refcount == REFCOUNT_LINK)
1649 {
1650 /* Replace target address of the pointer with target address
1651 of mapped object in the splay tree. */
1652 splay_tree_remove (mem_map, n);
1653 k->aux
1654 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1655 k->aux->link_key = n;
1656 }
1657 size_t align = (size_t) 1 << (kind >> rshift);
1658 tgt->list[i].key = k;
1659 k->tgt = tgt;
1660 k->refcount = 0;
1661 k->dynamic_refcount = 0;
1662 if (field_tgt_clear != FIELD_TGT_EMPTY)
1663 {
1664 k->tgt_offset = k->host_start - field_tgt_base
1665 + field_tgt_offset;
1666 if (openmp_p)
1667 {
1668 k->refcount = REFCOUNT_STRUCTELEM;
1669 if (field_tgt_structelem_first == NULL)
1670 {
1671 /* Set to first structure element of sequence. */
1672 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1673 field_tgt_structelem_first = k;
1674 }
1675 else
1676 /* Point to refcount of leading element, but do not
1677 increment again. */
1678 k->structelem_refcount_ptr
1679 = &field_tgt_structelem_first->structelem_refcount;
1680
1681 if (i == field_tgt_clear)
1682 {
1683 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1684 field_tgt_structelem_first = NULL;
1685 }
1686 }
1687 if (i == field_tgt_clear)
1688 field_tgt_clear = FIELD_TGT_EMPTY;
1689 }
1690 else
1691 {
1692 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1693 k->tgt_offset = tgt_size;
1694 tgt_size += k->host_end - k->host_start;
1695 }
1696 /* First increment, from 0 to 1. gomp_increment_refcount
1697 encapsulates the different increment cases, so use this
1698 instead of directly setting 1 during initialization. */
1699 gomp_increment_refcount (k, refcount_set);
1700
1701 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1702 tgt->list[i].always_copy_from
1703 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1704 tgt->list[i].is_attach = false;
1705 tgt->list[i].offset = 0;
1706 tgt->list[i].length = k->host_end - k->host_start;
1707 tgt->refcount++;
1708 array->left = NULL;
1709 array->right = NULL;
1710 splay_tree_insert (mem_map, array);
1711 switch (kind & typemask)
1712 {
1713 case GOMP_MAP_ALLOC:
1714 case GOMP_MAP_FROM:
1715 case GOMP_MAP_FORCE_ALLOC:
1716 case GOMP_MAP_FORCE_FROM:
1717 case GOMP_MAP_ALWAYS_FROM:
1718 break;
1719 case GOMP_MAP_TO:
1720 case GOMP_MAP_TOFROM:
1721 case GOMP_MAP_FORCE_TO:
1722 case GOMP_MAP_FORCE_TOFROM:
1723 case GOMP_MAP_ALWAYS_TO:
1724 case GOMP_MAP_ALWAYS_TOFROM:
1725 gomp_copy_host2dev (devicep, aq,
1726 (void *) (tgt->tgt_start
1727 + k->tgt_offset),
1728 (void *) k->host_start,
1729 k->host_end - k->host_start,
1730 false, cbufp);
1731 break;
1732 case GOMP_MAP_POINTER:
1733 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1734 gomp_map_pointer
1735 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1736 k->tgt_offset, sizes[i], cbufp,
1737 ((kind & typemask)
1738 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1739 break;
1740 case GOMP_MAP_TO_PSET:
1741 gomp_copy_host2dev (devicep, aq,
1742 (void *) (tgt->tgt_start
1743 + k->tgt_offset),
1744 (void *) k->host_start,
1745 k->host_end - k->host_start,
1746 false, cbufp);
1747 tgt->list[i].has_null_ptr_assoc = false;
1748
1749 for (j = i + 1; j < mapnum; j++)
1750 {
1751 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1752 & typemask);
1753 if (!GOMP_MAP_POINTER_P (ptr_kind)
1754 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1755 break;
1756 else if ((uintptr_t) hostaddrs[j] < k->host_start
1757 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1758 > k->host_end))
1759 break;
1760 else
1761 {
1762 tgt->list[j].key = k;
1763 tgt->list[j].copy_from = false;
1764 tgt->list[j].always_copy_from = false;
1765 tgt->list[j].is_attach = false;
1766 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1767 /* For OpenMP, the use of refcount_sets causes
1768 errors if we set k->refcount = 1 above but also
1769 increment it again here, for decrementing will
1770 not properly match, since we decrement only once
1771 for each key's refcount. Therefore avoid this
1772 increment for OpenMP constructs. */
1773 if (!openmp_p)
1774 gomp_increment_refcount (k, refcount_set);
1775 gomp_map_pointer (tgt, aq,
1776 (uintptr_t) *(void **) hostaddrs[j],
1777 k->tgt_offset
1778 + ((uintptr_t) hostaddrs[j]
1779 - k->host_start),
1780 sizes[j], cbufp, false);
1781 }
1782 }
1783 i = j - 1;
1784 break;
1785 case GOMP_MAP_FORCE_PRESENT:
1786 {
1787 /* We already looked up the memory region above and it
1788 was missing. */
1789 size_t size = k->host_end - k->host_start;
1790 gomp_mutex_unlock (&devicep->lock);
1791 #ifdef HAVE_INTTYPES_H
1792 gomp_fatal ("present clause: !acc_is_present (%p, "
1793 "%"PRIu64" (0x%"PRIx64"))",
1794 (void *) k->host_start,
1795 (uint64_t) size, (uint64_t) size);
1796 #else
1797 gomp_fatal ("present clause: !acc_is_present (%p, "
1798 "%lu (0x%lx))", (void *) k->host_start,
1799 (unsigned long) size, (unsigned long) size);
1800 #endif
1801 }
1802 break;
1803 case GOMP_MAP_PRESENT_ALLOC:
1804 case GOMP_MAP_PRESENT_TO:
1805 case GOMP_MAP_PRESENT_FROM:
1806 case GOMP_MAP_PRESENT_TOFROM:
1807 case GOMP_MAP_ALWAYS_PRESENT_TO:
1808 case GOMP_MAP_ALWAYS_PRESENT_FROM:
1809 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
1810 /* We already looked up the memory region above and it
1811 was missing. */
1812 gomp_mutex_unlock (&devicep->lock);
1813 gomp_fatal ("present clause: !omp_target_is_present "
1814 "(%p, %d)",
1815 (void *) k->host_start, devicep->target_id);
1816 break;
1817 case GOMP_MAP_FORCE_DEVICEPTR:
1818 assert (k->host_end - k->host_start == sizeof (void *));
1819 gomp_copy_host2dev (devicep, aq,
1820 (void *) (tgt->tgt_start
1821 + k->tgt_offset),
1822 (void *) k->host_start,
1823 sizeof (void *), false, cbufp);
1824 break;
1825 default:
1826 gomp_mutex_unlock (&devicep->lock);
1827 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1828 kind);
1829 }
1830
1831 if (k->aux && k->aux->link_key)
1832 {
1833 /* Set link pointer on target to the device address of the
1834 mapped object. */
1835 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1836 /* We intentionally do not use coalescing here, as it's not
1837 data allocated by the current call to this function. */
1838 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1839 &tgt_addr, sizeof (void *), true, NULL);
1840 }
1841 array++;
1842 }
1843 }
1844
1845 /* Processing of non-contiguous array rows. */
1846 if (nca_info)
1847 {
1848 struct target_var_desc *next_var_desc = &tgt->list[mapnum];
1849 for (i = 0; i < nca_info->num_ncarray; i++)
1850 {
1851 struct goacc_ncarray *nca = &nca_info->ncarray[i];
1852 int kind = get_kind (short_mapkind, kinds, nca->map_index);
1853 size_t align = (size_t) 1 << (kind >> rshift);
1854 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1855
1856 assert (nca->ptr == hostaddrs[nca->map_index]);
1857
1858 /* For the map of the non-contiguous array itself, adjust so that
1859 the passed device address points to the beginning of the
1860 ptrblock. Remember to adjust the first-dimension's bias here. */
1861 tgt->list[nca->map_index].key->tgt_offset
1862 = tgt_size - nca->descr->dims[0].base;
1863
1864 void *target_ptrblock = (void*) tgt->tgt_start + tgt_size;
1865 tgt_size += nca->ptrblock_size;
1866
1867 /* Add splay key for each data row in current non-contiguous
1868 array. */
1869 for (size_t j = 0; j < nca->data_row_num; j++)
1870 {
1871 struct target_var_desc *row_desc = next_var_desc++;
1872 void *row = nca->data_rows[j];
1873 cur_node.host_start = (uintptr_t) row;
1874 cur_node.host_end = cur_node.host_start + nca->data_row_size;
1875 splay_tree_key k = splay_tree_lookup (mem_map, &cur_node);
1876 if (k)
1877 {
1878 assert (k->refcount != REFCOUNT_LINK);
1879 gomp_map_vars_existing (devicep, aq, k, &cur_node, row_desc,
1880 kind & typemask, false, false,
1881 cbufp, refcount_set);
1882 }
1883 else
1884 {
1885 tgt->refcount++;
1886 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1887
1888 k = &array->key;
1889 k->host_start = (uintptr_t) row;
1890 k->host_end = k->host_start + nca->data_row_size;
1891
1892 k->tgt = tgt;
1893 k->refcount = 1;
1894 k->dynamic_refcount = 0;
1895 k->aux = NULL;
1896 k->tgt_offset = tgt_size;
1897
1898 tgt_size += nca->data_row_size;
1899
1900 row_desc->key = k;
1901 row_desc->copy_from
1902 = GOMP_MAP_COPY_FROM_P (kind & typemask);
1903 row_desc->always_copy_from
1904 = GOMP_MAP_COPY_FROM_P (kind & typemask);
1905 row_desc->is_attach = false;
1906 row_desc->offset = 0;
1907 row_desc->length = nca->data_row_size;
1908
1909 array->left = NULL;
1910 array->right = NULL;
1911 splay_tree_insert (mem_map, array);
1912
1913 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1914 gomp_copy_host2dev (devicep, aq,
1915 (void *) tgt->tgt_start + k->tgt_offset,
1916 (void *) k->host_start,
1917 nca->data_row_size, false,
1918 cbufp);
1919 array++;
1920 }
1921 nca->tgt_data_rows[j]
1922 = (void *) (k->tgt->tgt_start + k->tgt_offset);
1923 }
1924
1925 /* Now we have the target memory allocated, and target offsets of all
1926 row blocks assigned and calculated, we can construct the
1927 accelerator side ptrblock and copy it in. */
1928 if (nca->ptrblock_size)
1929 {
1930 void *ptrblock = goacc_noncontig_array_create_ptrblock
1931 (nca, target_ptrblock);
1932 gomp_copy_host2dev (devicep, aq, target_ptrblock, ptrblock,
1933 nca->ptrblock_size, false, cbufp);
1934 free (ptrblock);
1935 }
1936 }
1937 }
1938 }
1939
1940 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1941 {
1942 for (i = 0; i < mapnum; i++)
1943 {
1944 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1945 gomp_copy_host2dev (devicep, aq,
1946 (void *) (tgt->tgt_start + i * sizeof (void *)),
1947 (void *) &cur_node.tgt_offset, sizeof (void *),
1948 true, cbufp);
1949 }
1950 }
1951
1952 if (cbufp)
1953 {
1954 /* See 'gomp_coalesce_buf_add'. */
1955 assert (!aq);
1956
1957 long c = 0;
1958 for (c = 0; c < cbuf.chunk_cnt; ++c)
1959 gomp_copy_host2dev (devicep, aq,
1960 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1961 (char *) cbuf.buf + (cbuf.chunks[c].start
1962 - cbuf.chunks[0].start),
1963 cbuf.chunks[c].end - cbuf.chunks[c].start,
1964 true, NULL);
1965 free (cbuf.buf);
1966 cbuf.buf = NULL;
1967 cbufp = NULL;
1968 }
1969
1970 /* If the variable from "omp target enter data" map-list was already mapped,
1971 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1972 gomp_exit_data. */
1973 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1974 {
1975 free (tgt);
1976 tgt = NULL;
1977 }
1978
1979 gomp_mutex_unlock (&devicep->lock);
1980 return tgt;
1981 }
1982
1983 attribute_hidden struct target_mem_desc *
1984 gomp_map_vars_openacc (struct gomp_device_descr *devicep,
1985 struct goacc_asyncqueue *aq, size_t mapnum,
1986 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1987 void *nca_info)
1988 {
1989 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, NULL,
1990 sizes, (void *) kinds,
1991 (struct goacc_ncarray_info *) nca_info,
1992 true, NULL, GOMP_MAP_VARS_OPENACC);
1993 }
1994
1995 static struct target_mem_desc *
1996 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1997 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1998 bool short_mapkind, htab_t *refcount_set,
1999 enum gomp_map_vars_kind pragma_kind)
2000 {
2001 /* This management of a local refcount_set is for convenience of callers
2002 who do not share a refcount_set over multiple map/unmap uses. */
2003 htab_t local_refcount_set = NULL;
2004 if (refcount_set == NULL)
2005 {
2006 local_refcount_set = htab_create (mapnum);
2007 refcount_set = &local_refcount_set;
2008 }
2009
2010 struct target_mem_desc *tgt;
2011 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
2012 sizes, kinds, NULL, short_mapkind,
2013 refcount_set, pragma_kind);
2014 if (local_refcount_set)
2015 htab_free (local_refcount_set);
2016
2017 return tgt;
2018 }
2019
2020 attribute_hidden struct target_mem_desc *
2021 goacc_map_vars (struct gomp_device_descr *devicep,
2022 struct goacc_asyncqueue *aq, size_t mapnum,
2023 void **hostaddrs, void **devaddrs, size_t *sizes,
2024 void *kinds, bool short_mapkind,
2025 enum gomp_map_vars_kind pragma_kind)
2026 {
2027 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
2028 sizes, kinds, NULL, short_mapkind, NULL,
2029 GOMP_MAP_VARS_OPENACC | pragma_kind);
2030 }
2031
2032 static void
2033 gomp_unmap_tgt (struct target_mem_desc *tgt)
2034 {
2035 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
2036 if (tgt->tgt_end)
2037 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
2038
2039 free (tgt->array);
2040 free (tgt);
2041 }
2042
2043 static bool
2044 gomp_unref_tgt (void *ptr)
2045 {
2046 bool is_tgt_unmapped = false;
2047
2048 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
2049
2050 if (tgt->refcount > 1)
2051 tgt->refcount--;
2052 else
2053 {
2054 gomp_unmap_tgt (tgt);
2055 is_tgt_unmapped = true;
2056 }
2057
2058 return is_tgt_unmapped;
2059 }
2060
2061 static void
2062 gomp_unref_tgt_void (void *ptr)
2063 {
2064 (void) gomp_unref_tgt (ptr);
2065 }
2066
2067 static void
2068 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
2069 {
2070 splay_tree_remove (sp, k);
2071 if (k->aux)
2072 {
2073 if (k->aux->link_key)
2074 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
2075 if (k->aux->attach_count)
2076 free (k->aux->attach_count);
2077 free (k->aux);
2078 k->aux = NULL;
2079 }
2080 }
2081
2082 static inline __attribute__((always_inline)) bool
2083 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
2084 struct goacc_asyncqueue *aq)
2085 {
2086 bool is_tgt_unmapped = false;
2087
2088 if (REFCOUNT_STRUCTELEM_P (k->refcount))
2089 {
2090 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
2091 /* Infer the splay_tree_key of the first structelem key using the
2092 pointer to the first structleme_refcount. */
2093 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
2094 - offsetof (struct splay_tree_key_s,
2095 structelem_refcount));
2096 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
2097
2098 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
2099 with the splay_tree_keys embedded inside. */
2100 splay_tree_node node =
2101 (splay_tree_node) ((char *) k
2102 - offsetof (struct splay_tree_node_s, key));
2103 while (true)
2104 {
2105 /* Starting from the _FIRST key, and continue for all following
2106 sibling keys. */
2107 gomp_remove_splay_tree_key (&devicep->mem_map, k);
2108 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
2109 break;
2110 else
2111 k = &(++node)->key;
2112 }
2113 }
2114 else
2115 gomp_remove_splay_tree_key (&devicep->mem_map, k);
2116
2117 if (aq)
2118 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2119 (void *) k->tgt);
2120 else
2121 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
2122 return is_tgt_unmapped;
2123 }
2124
2125 attribute_hidden bool
2126 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
2127 {
2128 return gomp_remove_var_internal (devicep, k, NULL);
2129 }
2130
2131 /* Remove a variable asynchronously. This actually removes the variable
2132 mapping immediately, but retains the linked target_mem_desc until the
2133 asynchronous operation has completed (as it may still refer to target
2134 memory). The device lock must be held before entry, and remains locked on
2135 exit. */
2136
2137 attribute_hidden void
2138 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
2139 struct goacc_asyncqueue *aq)
2140 {
2141 (void) gomp_remove_var_internal (devicep, k, aq);
2142 }
2143
2144 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2145 variables back from device to host: if it is false, it is assumed that this
2146 has been done already. */
2147
2148 static inline __attribute__((always_inline)) void
2149 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
2150 htab_t *refcount_set, struct goacc_asyncqueue *aq)
2151 {
2152 struct gomp_device_descr *devicep = tgt->device_descr;
2153
2154 if (tgt->list_count == 0)
2155 {
2156 free (tgt);
2157 return;
2158 }
2159
2160 gomp_mutex_lock (&devicep->lock);
2161 if (devicep->state == GOMP_DEVICE_FINALIZED)
2162 {
2163 gomp_mutex_unlock (&devicep->lock);
2164 free (tgt->array);
2165 free (tgt);
2166 return;
2167 }
2168
2169 size_t i;
2170
2171 /* We must perform detachments before any copies back to the host. */
2172 for (i = 0; i < tgt->list_count; i++)
2173 {
2174 splay_tree_key k = tgt->list[i].key;
2175
2176 if (k != NULL && tgt->list[i].is_attach)
2177 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
2178 + tgt->list[i].offset,
2179 false, NULL);
2180 }
2181
2182 for (i = 0; i < tgt->list_count; i++)
2183 {
2184 splay_tree_key k = tgt->list[i].key;
2185 if (k == NULL)
2186 continue;
2187
2188 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2189 counts ('n->refcount', 'n->dynamic_refcount'). */
2190 if (tgt->list[i].is_attach)
2191 continue;
2192
2193 bool do_copy, do_remove;
2194 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
2195
2196 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
2197 || tgt->list[i].always_copy_from)
2198 gomp_copy_dev2host (devicep, aq,
2199 (void *) (k->host_start + tgt->list[i].offset),
2200 (void *) (k->tgt->tgt_start + k->tgt_offset
2201 + tgt->list[i].offset),
2202 tgt->list[i].length);
2203 if (do_remove)
2204 {
2205 struct target_mem_desc *k_tgt = k->tgt;
2206 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2207 /* It would be bad if TGT got unmapped while we're still iterating
2208 over its LIST_COUNT, and also expect to use it in the following
2209 code. */
2210 assert (!is_tgt_unmapped
2211 || k_tgt != tgt);
2212 }
2213 }
2214
2215 if (aq)
2216 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2217 (void *) tgt);
2218 else
2219 gomp_unref_tgt ((void *) tgt);
2220
2221 gomp_mutex_unlock (&devicep->lock);
2222 }
2223
2224 static void
2225 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2226 htab_t *refcount_set)
2227 {
2228 /* This management of a local refcount_set is for convenience of callers
2229 who do not share a refcount_set over multiple map/unmap uses. */
2230 htab_t local_refcount_set = NULL;
2231 if (refcount_set == NULL)
2232 {
2233 local_refcount_set = htab_create (tgt->list_count);
2234 refcount_set = &local_refcount_set;
2235 }
2236
2237 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2238
2239 if (local_refcount_set)
2240 htab_free (local_refcount_set);
2241 }
2242
2243 attribute_hidden void
2244 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2245 struct goacc_asyncqueue *aq)
2246 {
2247 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2248 }
2249
2250 static void
2251 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2252 size_t *sizes, void *kinds, bool short_mapkind)
2253 {
2254 size_t i;
2255 struct splay_tree_key_s cur_node;
2256 const int typemask = short_mapkind ? 0xff : 0x7;
2257
2258 if (!devicep)
2259 return;
2260
2261 if (mapnum == 0)
2262 return;
2263
2264 gomp_mutex_lock (&devicep->lock);
2265 if (devicep->state == GOMP_DEVICE_FINALIZED)
2266 {
2267 gomp_mutex_unlock (&devicep->lock);
2268 return;
2269 }
2270
2271 for (i = 0; i < mapnum; i++)
2272 if (sizes[i])
2273 {
2274 cur_node.host_start = (uintptr_t) hostaddrs[i];
2275 cur_node.host_end = cur_node.host_start + sizes[i];
2276 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2277 if (n)
2278 {
2279 int kind = get_kind (short_mapkind, kinds, i);
2280 if (n->host_start > cur_node.host_start
2281 || n->host_end < cur_node.host_end)
2282 {
2283 gomp_mutex_unlock (&devicep->lock);
2284 gomp_fatal ("Trying to update [%p..%p) object when "
2285 "only [%p..%p) is mapped",
2286 (void *) cur_node.host_start,
2287 (void *) cur_node.host_end,
2288 (void *) n->host_start,
2289 (void *) n->host_end);
2290 }
2291
2292 if (n->aux && n->aux->attach_count)
2293 {
2294 uintptr_t addr = cur_node.host_start;
2295 while (addr < cur_node.host_end)
2296 {
2297 /* We have to be careful not to overwrite still attached
2298 pointers during host<->device updates. */
2299 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2300 if (n->aux->attach_count[i] == 0)
2301 {
2302 void *devaddr = (void *) (n->tgt->tgt_start
2303 + n->tgt_offset
2304 + addr - n->host_start);
2305 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2306 gomp_copy_host2dev (devicep, NULL,
2307 devaddr, (void *) addr,
2308 sizeof (void *), false, NULL);
2309 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2310 gomp_copy_dev2host (devicep, NULL,
2311 (void *) addr, devaddr,
2312 sizeof (void *));
2313 }
2314 addr += sizeof (void *);
2315 }
2316 }
2317 else
2318 {
2319 void *hostaddr = (void *) cur_node.host_start;
2320 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2321 + cur_node.host_start
2322 - n->host_start);
2323 size_t size = cur_node.host_end - cur_node.host_start;
2324
2325 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2326 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2327 false, NULL);
2328 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2329 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2330 }
2331 }
2332 else
2333 {
2334 int kind = get_kind (short_mapkind, kinds, i);
2335
2336 if (GOMP_MAP_PRESENT_P (kind))
2337 {
2338 /* We already looked up the memory region above and it
2339 was missing. */
2340 gomp_mutex_unlock (&devicep->lock);
2341 gomp_fatal ("present clause: !omp_target_is_present "
2342 "(%p, %d)",
2343 (void *) hostaddrs[i], devicep->target_id);
2344 }
2345 }
2346 }
2347 gomp_mutex_unlock (&devicep->lock);
2348 }
2349
2350 static struct gomp_offload_icv_list *
2351 gomp_get_offload_icv_item (int dev_num)
2352 {
2353 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2354 while (l != NULL && l->device_num != dev_num)
2355 l = l->next;
2356
2357 return l;
2358 }
2359
2360 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2361 depending on the device num and the variable hierarchy
2362 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2363 device and thus no item with that device number is contained in
2364 gomp_offload_icv_list, then a new item is created and added to the list. */
2365
2366 static struct gomp_offload_icvs *
2367 get_gomp_offload_icvs (int dev_num)
2368 {
2369 struct gomp_icv_list *dev
2370 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2371 struct gomp_icv_list *all
2372 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2373 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2374 struct gomp_offload_icv_list *offload_icvs
2375 = gomp_get_offload_icv_item (dev_num);
2376
2377 if (offload_icvs != NULL)
2378 return &offload_icvs->icvs;
2379
2380 struct gomp_offload_icv_list *new
2381 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2382
2383 new->device_num = dev_num;
2384 new->icvs.device_num = dev_num;
2385 new->next = gomp_offload_icv_list;
2386
2387 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2388 new->icvs.nteams = dev_x->icvs.nteams_var;
2389 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2390 new->icvs.nteams = dev->icvs.nteams_var;
2391 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2392 new->icvs.nteams = all->icvs.nteams_var;
2393 else
2394 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2395
2396 if (dev_x != NULL
2397 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2398 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2399 else if (dev != NULL
2400 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2401 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2402 else if (all != NULL
2403 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2404 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2405 else
2406 new->icvs.teams_thread_limit
2407 = gomp_default_icv_values.teams_thread_limit_var;
2408
2409 if (dev_x != NULL
2410 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2411 new->icvs.default_device = dev_x->icvs.default_device_var;
2412 else if (dev != NULL
2413 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2414 new->icvs.default_device = dev->icvs.default_device_var;
2415 else if (all != NULL
2416 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2417 new->icvs.default_device = all->icvs.default_device_var;
2418 else
2419 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2420
2421 gomp_offload_icv_list = new;
2422 return &new->icvs;
2423 }
2424
2425 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2426 And insert to splay tree the mapping between addresses from HOST_TABLE and
2427 from loaded target image. We rely in the host and device compiler
2428 emitting variable and functions in the same order. */
2429
2430 static void
2431 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2432 const void *host_table, const void *target_data,
2433 bool is_register_lock)
2434 {
2435 void **host_func_table = ((void ***) host_table)[0];
2436 void **host_funcs_end = ((void ***) host_table)[1];
2437 void **host_var_table = ((void ***) host_table)[2];
2438 void **host_vars_end = ((void ***) host_table)[3];
2439
2440 /* The func table contains only addresses, the var table contains addresses
2441 and corresponding sizes. */
2442 int num_funcs = host_funcs_end - host_func_table;
2443 int num_vars = (host_vars_end - host_var_table) / 2;
2444
2445 /* Load image to device and get target addresses for the image. */
2446 struct addr_pair *target_table = NULL;
2447 uint64_t *rev_target_fn_table = NULL;
2448 int i, num_target_entries;
2449
2450 /* With reverse offload, insert also target-host addresses. */
2451 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2452
2453 num_target_entries
2454 = devicep->load_image_func (devicep->target_id, version,
2455 target_data, &target_table,
2456 rev_lookup ? &rev_target_fn_table : NULL);
2457
2458 if (num_target_entries != num_funcs + num_vars
2459 /* "+1" due to the additional ICV struct. */
2460 && num_target_entries != num_funcs + num_vars + 1)
2461 {
2462 gomp_mutex_unlock (&devicep->lock);
2463 if (is_register_lock)
2464 gomp_mutex_unlock (&register_lock);
2465 gomp_fatal ("Cannot map target functions or variables"
2466 " (expected %u, have %u)", num_funcs + num_vars,
2467 num_target_entries);
2468 }
2469
2470 /* Insert host-target address mapping into splay tree. */
2471 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2472 /* "+1" due to the additional ICV struct. */
2473 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2474 * sizeof (*tgt->array));
2475 if (rev_target_fn_table)
2476 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2477 else
2478 tgt->rev_array = NULL;
2479 tgt->refcount = REFCOUNT_INFINITY;
2480 tgt->tgt_start = 0;
2481 tgt->tgt_end = 0;
2482 tgt->to_free = NULL;
2483 tgt->prev = NULL;
2484 tgt->list_count = 0;
2485 tgt->device_descr = devicep;
2486 splay_tree_node array = tgt->array;
2487 reverse_splay_tree_node rev_array = tgt->rev_array;
2488
2489 for (i = 0; i < num_funcs; i++)
2490 {
2491 splay_tree_key k = &array->key;
2492 k->host_start = (uintptr_t) host_func_table[i];
2493 k->host_end = k->host_start + 1;
2494 k->tgt = tgt;
2495 k->tgt_offset = target_table[i].start;
2496 k->refcount = REFCOUNT_INFINITY;
2497 k->dynamic_refcount = 0;
2498 k->aux = NULL;
2499 array->left = NULL;
2500 array->right = NULL;
2501 splay_tree_insert (&devicep->mem_map, array);
2502 if (rev_target_fn_table)
2503 {
2504 reverse_splay_tree_key k2 = &rev_array->key;
2505 k2->dev = rev_target_fn_table[i];
2506 k2->k = k;
2507 rev_array->left = NULL;
2508 rev_array->right = NULL;
2509 if (k2->dev != 0)
2510 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2511 rev_array++;
2512 }
2513 array++;
2514 }
2515
2516 /* Most significant bit of the size in host and target tables marks
2517 "omp declare target link" variables. */
2518 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2519 const uintptr_t size_mask = ~link_bit;
2520
2521 for (i = 0; i < num_vars; i++)
2522 {
2523 struct addr_pair *target_var = &target_table[num_funcs + i];
2524 uintptr_t target_size = target_var->end - target_var->start;
2525 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2526
2527 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2528 {
2529 gomp_mutex_unlock (&devicep->lock);
2530 if (is_register_lock)
2531 gomp_mutex_unlock (&register_lock);
2532 gomp_fatal ("Cannot map target variables (size mismatch)");
2533 }
2534
2535 splay_tree_key k = &array->key;
2536 k->host_start = (uintptr_t) host_var_table[i * 2];
2537 k->host_end
2538 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2539 k->tgt = tgt;
2540 k->tgt_offset = target_var->start;
2541 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2542 k->dynamic_refcount = 0;
2543 k->aux = NULL;
2544 array->left = NULL;
2545 array->right = NULL;
2546 splay_tree_insert (&devicep->mem_map, array);
2547 array++;
2548 }
2549
2550 /* Last entry is for a ICVs variable.
2551 Tolerate case where plugin does not return those entries. */
2552 if (num_funcs + num_vars < num_target_entries)
2553 {
2554 struct addr_pair *var = &target_table[num_funcs + num_vars];
2555
2556 /* Start address will be non-zero for the ICVs variable if
2557 the variable was found in this image. */
2558 if (var->start != 0)
2559 {
2560 /* The index of the devicep within devices[] is regarded as its
2561 'device number', which is different from the per-device type
2562 devicep->target_id. */
2563 int dev_num = (int) (devicep - &devices[0]);
2564 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2565 size_t var_size = var->end - var->start;
2566 if (var_size != sizeof (struct gomp_offload_icvs))
2567 {
2568 gomp_mutex_unlock (&devicep->lock);
2569 if (is_register_lock)
2570 gomp_mutex_unlock (&register_lock);
2571 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2572 "format");
2573 }
2574 /* Copy the ICVs variable to place on device memory, hereby
2575 actually designating its device number into effect. */
2576 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2577 var_size, false, NULL);
2578 splay_tree_key k = &array->key;
2579 k->host_start = (uintptr_t) icvs;
2580 k->host_end =
2581 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2582 k->tgt = tgt;
2583 k->tgt_offset = var->start;
2584 k->refcount = REFCOUNT_INFINITY;
2585 k->dynamic_refcount = 0;
2586 k->aux = NULL;
2587 array->left = NULL;
2588 array->right = NULL;
2589 splay_tree_insert (&devicep->mem_map, array);
2590 array++;
2591 }
2592 }
2593
2594 free (target_table);
2595 }
2596
2597 /* Unload the mappings described by target_data from device DEVICE_P.
2598 The device must be locked. */
2599
2600 static void
2601 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2602 unsigned version,
2603 const void *host_table, const void *target_data)
2604 {
2605 void **host_func_table = ((void ***) host_table)[0];
2606 void **host_funcs_end = ((void ***) host_table)[1];
2607 void **host_var_table = ((void ***) host_table)[2];
2608 void **host_vars_end = ((void ***) host_table)[3];
2609
2610 /* The func table contains only addresses, the var table contains addresses
2611 and corresponding sizes. */
2612 int num_funcs = host_funcs_end - host_func_table;
2613 int num_vars = (host_vars_end - host_var_table) / 2;
2614
2615 struct splay_tree_key_s k;
2616 splay_tree_key node = NULL;
2617
2618 /* Find mapping at start of node array */
2619 if (num_funcs || num_vars)
2620 {
2621 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2622 : (uintptr_t) host_var_table[0]);
2623 k.host_end = k.host_start + 1;
2624 node = splay_tree_lookup (&devicep->mem_map, &k);
2625 }
2626
2627 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2628 {
2629 gomp_mutex_unlock (&devicep->lock);
2630 gomp_fatal ("image unload fail");
2631 }
2632 if (devicep->mem_map_rev.root)
2633 {
2634 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2635 real allocation. */
2636 assert (node && node->tgt && node->tgt->rev_array);
2637 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2638 free (node->tgt->rev_array);
2639 devicep->mem_map_rev.root = NULL;
2640 }
2641
2642 /* Remove mappings from splay tree. */
2643 int i;
2644 for (i = 0; i < num_funcs; i++)
2645 {
2646 k.host_start = (uintptr_t) host_func_table[i];
2647 k.host_end = k.host_start + 1;
2648 splay_tree_remove (&devicep->mem_map, &k);
2649 }
2650
2651 /* Most significant bit of the size in host and target tables marks
2652 "omp declare target link" variables. */
2653 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2654 const uintptr_t size_mask = ~link_bit;
2655 bool is_tgt_unmapped = false;
2656
2657 for (i = 0; i < num_vars; i++)
2658 {
2659 k.host_start = (uintptr_t) host_var_table[i * 2];
2660 k.host_end
2661 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2662
2663 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2664 splay_tree_remove (&devicep->mem_map, &k);
2665 else
2666 {
2667 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2668 is_tgt_unmapped = gomp_remove_var (devicep, n);
2669 }
2670 }
2671
2672 if (node && !is_tgt_unmapped)
2673 {
2674 free (node->tgt);
2675 free (node);
2676 }
2677 }
2678
2679 static void
2680 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2681 {
2682 char *end = buf + size, *p = buf;
2683 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2684 p += snprintf (p, end - p, "unified_address");
2685 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2686 p += snprintf (p, end - p, "%sunified_shared_memory",
2687 (p == buf ? "" : ", "));
2688 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2689 p += snprintf (p, end - p, "%sreverse_offload",
2690 (p == buf ? "" : ", "));
2691 }
2692
2693 /* This function should be called from every offload image while loading.
2694 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2695 the target, and DATA. */
2696
2697 void
2698 GOMP_offload_register_ver (unsigned version, const void *host_table,
2699 int target_type, const void *data)
2700 {
2701 int i;
2702
2703 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2704 gomp_fatal ("Library too old for offload (version %u < %u)",
2705 GOMP_VERSION, GOMP_VERSION_LIB (version));
2706
2707 int omp_req;
2708 const void *target_data;
2709 if (GOMP_VERSION_LIB (version) > 1)
2710 {
2711 omp_req = (int) (size_t) ((void **) data)[0];
2712 target_data = &((void **) data)[1];
2713 }
2714 else
2715 {
2716 omp_req = 0;
2717 target_data = data;
2718 }
2719
2720 gomp_mutex_lock (&register_lock);
2721
2722 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2723 {
2724 char buf1[sizeof ("unified_address, unified_shared_memory, "
2725 "reverse_offload")];
2726 char buf2[sizeof ("unified_address, unified_shared_memory, "
2727 "reverse_offload")];
2728 gomp_requires_to_name (buf2, sizeof (buf2),
2729 omp_req != GOMP_REQUIRES_TARGET_USED
2730 ? omp_req : omp_requires_mask);
2731 if (omp_req != GOMP_REQUIRES_TARGET_USED
2732 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2733 {
2734 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2735 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2736 "in multiple compilation units: '%s' vs. '%s'",
2737 buf1, buf2);
2738 }
2739 else
2740 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2741 "some compilation units", buf2);
2742 }
2743 omp_requires_mask = omp_req;
2744
2745 /* Load image to all initialized devices. */
2746 for (i = 0; i < num_devices; i++)
2747 {
2748 struct gomp_device_descr *devicep = &devices[i];
2749 gomp_mutex_lock (&devicep->lock);
2750 if (devicep->type == target_type
2751 && devicep->state == GOMP_DEVICE_INITIALIZED)
2752 gomp_load_image_to_device (devicep, version,
2753 host_table, target_data, true);
2754 gomp_mutex_unlock (&devicep->lock);
2755 }
2756
2757 /* Insert image to array of pending images. */
2758 offload_images
2759 = gomp_realloc_unlock (offload_images,
2760 (num_offload_images + 1)
2761 * sizeof (struct offload_image_descr));
2762 offload_images[num_offload_images].version = version;
2763 offload_images[num_offload_images].type = target_type;
2764 offload_images[num_offload_images].host_table = host_table;
2765 offload_images[num_offload_images].target_data = target_data;
2766
2767 num_offload_images++;
2768 gomp_mutex_unlock (&register_lock);
2769 }
2770
2771 /* Legacy entry point. */
2772
2773 void
2774 GOMP_offload_register (const void *host_table, int target_type,
2775 const void *target_data)
2776 {
2777 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2778 }
2779
2780 /* This function should be called from every offload image while unloading.
2781 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2782 the target, and DATA. */
2783
2784 void
2785 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2786 int target_type, const void *data)
2787 {
2788 int i;
2789
2790 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2791 gomp_fatal ("Library too old for offload (version %u < %u)",
2792 GOMP_VERSION, GOMP_VERSION_LIB (version));
2793
2794 const void *target_data;
2795 if (GOMP_VERSION_LIB (version) > 1)
2796 target_data = &((void **) data)[1];
2797 else
2798 target_data = data;
2799
2800 gomp_mutex_lock (&register_lock);
2801
2802 /* Unload image from all initialized devices. */
2803 for (i = 0; i < num_devices; i++)
2804 {
2805 struct gomp_device_descr *devicep = &devices[i];
2806 gomp_mutex_lock (&devicep->lock);
2807 if (devicep->type == target_type
2808 && devicep->state == GOMP_DEVICE_INITIALIZED)
2809 gomp_unload_image_from_device (devicep, version,
2810 host_table, target_data);
2811 gomp_mutex_unlock (&devicep->lock);
2812 }
2813
2814 /* Remove image from array of pending images. */
2815 for (i = 0; i < num_offload_images; i++)
2816 if (offload_images[i].target_data == target_data)
2817 {
2818 offload_images[i] = offload_images[--num_offload_images];
2819 break;
2820 }
2821
2822 gomp_mutex_unlock (&register_lock);
2823 }
2824
2825 /* Legacy entry point. */
2826
2827 void
2828 GOMP_offload_unregister (const void *host_table, int target_type,
2829 const void *target_data)
2830 {
2831 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2832 }
2833
2834 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2835 must be locked on entry, and remains locked on return. */
2836
2837 attribute_hidden void
2838 gomp_init_device (struct gomp_device_descr *devicep)
2839 {
2840 int i;
2841 if (!devicep->init_device_func (devicep->target_id))
2842 {
2843 gomp_mutex_unlock (&devicep->lock);
2844 gomp_fatal ("device initialization failed");
2845 }
2846
2847 /* Load to device all images registered by the moment. */
2848 for (i = 0; i < num_offload_images; i++)
2849 {
2850 struct offload_image_descr *image = &offload_images[i];
2851 if (image->type == devicep->type)
2852 gomp_load_image_to_device (devicep, image->version,
2853 image->host_table, image->target_data,
2854 false);
2855 }
2856
2857 /* Initialize OpenACC asynchronous queues. */
2858 goacc_init_asyncqueues (devicep);
2859
2860 devicep->state = GOMP_DEVICE_INITIALIZED;
2861 }
2862
2863 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2864 must be locked on entry, and remains locked on return. */
2865
2866 attribute_hidden bool
2867 gomp_fini_device (struct gomp_device_descr *devicep)
2868 {
2869 bool ret = goacc_fini_asyncqueues (devicep);
2870 ret &= devicep->fini_device_func (devicep->target_id);
2871 devicep->state = GOMP_DEVICE_FINALIZED;
2872 return ret;
2873 }
2874
2875 attribute_hidden void
2876 gomp_unload_device (struct gomp_device_descr *devicep)
2877 {
2878 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2879 {
2880 unsigned i;
2881
2882 /* Unload from device all images registered at the moment. */
2883 for (i = 0; i < num_offload_images; i++)
2884 {
2885 struct offload_image_descr *image = &offload_images[i];
2886 if (image->type == devicep->type)
2887 gomp_unload_image_from_device (devicep, image->version,
2888 image->host_table,
2889 image->target_data);
2890 }
2891 }
2892 }
2893
2894 /* Host fallback for GOMP_target{,_ext} routines. */
2895
2896 static void
2897 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2898 struct gomp_device_descr *devicep, void **args)
2899 {
2900 struct gomp_thread old_thr, *thr = gomp_thread ();
2901
2902 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2903 && devicep != NULL)
2904 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2905 "be used for offloading");
2906
2907 old_thr = *thr;
2908 memset (thr, '\0', sizeof (*thr));
2909 if (gomp_places_list)
2910 {
2911 thr->place = old_thr.place;
2912 thr->ts.place_partition_len = gomp_places_list_len;
2913 }
2914 if (args)
2915 while (*args)
2916 {
2917 intptr_t id = (intptr_t) *args++, val;
2918 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2919 val = (intptr_t) *args++;
2920 else
2921 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2922 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2923 continue;
2924 id &= GOMP_TARGET_ARG_ID_MASK;
2925 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2926 continue;
2927 val = val > INT_MAX ? INT_MAX : val;
2928 if (val)
2929 gomp_icv (true)->thread_limit_var = val;
2930 break;
2931 }
2932
2933 fn (hostaddrs);
2934 gomp_free_thread (thr);
2935 *thr = old_thr;
2936 }
2937
2938 /* Calculate alignment and size requirements of a private copy of data shared
2939 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2940
2941 static inline void
2942 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2943 unsigned short *kinds, size_t *tgt_align,
2944 size_t *tgt_size)
2945 {
2946 size_t i;
2947 for (i = 0; i < mapnum; i++)
2948 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2949 {
2950 size_t align = (size_t) 1 << (kinds[i] >> 8);
2951 if (*tgt_align < align)
2952 *tgt_align = align;
2953 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2954 *tgt_size += sizes[i];
2955 }
2956 }
2957
2958 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2959
2960 static inline void
2961 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2962 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2963 size_t tgt_size)
2964 {
2965 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2966 if (al)
2967 tgt += tgt_align - al;
2968 tgt_size = 0;
2969 size_t i;
2970 for (i = 0; i < mapnum; i++)
2971 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2972 {
2973 size_t align = (size_t) 1 << (kinds[i] >> 8);
2974 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2975 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2976 hostaddrs[i] = tgt + tgt_size;
2977 tgt_size = tgt_size + sizes[i];
2978 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2979 {
2980 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2981 ++i;
2982 }
2983 }
2984 }
2985
2986 /* Helper function of GOMP_target{,_ext} routines. */
2987
2988 static void *
2989 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2990 void (*host_fn) (void *))
2991 {
2992 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2993 return (void *) host_fn;
2994 else
2995 {
2996 gomp_mutex_lock (&devicep->lock);
2997 if (devicep->state == GOMP_DEVICE_FINALIZED)
2998 {
2999 gomp_mutex_unlock (&devicep->lock);
3000 return NULL;
3001 }
3002
3003 struct splay_tree_key_s k;
3004 k.host_start = (uintptr_t) host_fn;
3005 k.host_end = k.host_start + 1;
3006 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
3007 gomp_mutex_unlock (&devicep->lock);
3008 if (tgt_fn == NULL)
3009 return NULL;
3010
3011 return (void *) tgt_fn->tgt_offset;
3012 }
3013 }
3014
3015 /* Called when encountering a target directive. If DEVICE
3016 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
3017 GOMP_DEVICE_HOST_FALLBACK (or any value
3018 larger than last available hw device), use host fallback.
3019 FN is address of host code, UNUSED is part of the current ABI, but
3020 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
3021 with MAPNUM entries, with addresses of the host objects,
3022 sizes of the host objects (resp. for pointer kind pointer bias
3023 and assumed sizeof (void *) size) and kinds. */
3024
3025 void
3026 GOMP_target (int device, void (*fn) (void *), const void *unused,
3027 size_t mapnum, void **hostaddrs, size_t *sizes,
3028 unsigned char *kinds)
3029 {
3030 struct gomp_device_descr *devicep = resolve_device (device, true);
3031
3032 void *fn_addr;
3033 if (devicep == NULL
3034 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3035 /* All shared memory devices should use the GOMP_target_ext function. */
3036 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
3037 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
3038 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
3039
3040 htab_t refcount_set = htab_create (mapnum);
3041 struct target_mem_desc *tgt_vars
3042 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3043 &refcount_set, GOMP_MAP_VARS_TARGET);
3044 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
3045 NULL);
3046 htab_clear (refcount_set);
3047 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3048 htab_free (refcount_set);
3049 }
3050
3051 static inline unsigned int
3052 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
3053 {
3054 /* If we cannot run asynchronously, simply ignore nowait. */
3055 if (devicep != NULL && devicep->async_run_func == NULL)
3056 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
3057
3058 return flags;
3059 }
3060
3061 static void
3062 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
3063 {
3064 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3065 if (item == NULL)
3066 return;
3067
3068 void *host_ptr = &item->icvs;
3069 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
3070 if (dev_ptr != NULL)
3071 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
3072 sizeof (struct gomp_offload_icvs));
3073 }
3074
3075 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
3076 and several arguments have been added:
3077 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
3078 DEPEND is array of dependencies, see GOMP_task for details.
3079
3080 ARGS is a pointer to an array consisting of a variable number of both
3081 device-independent and device-specific arguments, which can take one two
3082 elements where the first specifies for which device it is intended, the type
3083 and optionally also the value. If the value is not present in the first
3084 one, the whole second element the actual value. The last element of the
3085 array is a single NULL. Among the device independent can be for example
3086 NUM_TEAMS and THREAD_LIMIT.
3087
3088 NUM_TEAMS is positive if GOMP_teams will be called in the body with
3089 that value, or 1 if teams construct is not present, or 0, if
3090 teams construct does not have num_teams clause and so the choice is
3091 implementation defined, and -1 if it can't be determined on the host
3092 what value will GOMP_teams have on the device.
3093 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
3094 body with that value, or 0, if teams construct does not have thread_limit
3095 clause or the teams construct is not present, or -1 if it can't be
3096 determined on the host what value will GOMP_teams have on the device. */
3097
3098 void
3099 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
3100 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3101 unsigned int flags, void **depend, void **args)
3102 {
3103 struct gomp_device_descr *devicep = resolve_device (device, true);
3104 size_t tgt_align = 0, tgt_size = 0;
3105 bool fpc_done = false;
3106
3107 /* Obtain the original TEAMS and THREADS values from ARGS. */
3108 intptr_t orig_teams = 1, orig_threads = 0;
3109 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
3110 void **tmpargs = args;
3111 while (*tmpargs)
3112 {
3113 intptr_t id = (intptr_t) *tmpargs++, val;
3114 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3115 {
3116 val = (intptr_t) *tmpargs++;
3117 len = 2;
3118 }
3119 else
3120 {
3121 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
3122 len = 1;
3123 }
3124 num_args += len;
3125 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
3126 continue;
3127 val = val > INT_MAX ? INT_MAX : val;
3128 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
3129 {
3130 orig_teams = val;
3131 teams_len = len;
3132 }
3133 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
3134 {
3135 orig_threads = val;
3136 threads_len = len;
3137 }
3138 }
3139
3140 intptr_t new_teams = orig_teams, new_threads = orig_threads;
3141 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
3142 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
3143 value could not be determined. No change.
3144 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
3145 Set device-specific value.
3146 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3147 No change. */
3148 if (orig_teams == -2)
3149 new_teams = 1;
3150 else if (orig_teams == 0)
3151 {
3152 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3153 if (item != NULL)
3154 new_teams = item->icvs.nteams;
3155 }
3156 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3157 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3158 e.g. a THREAD_LIMIT clause. */
3159 if (orig_teams > -2 && orig_threads == 0)
3160 {
3161 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3162 if (item != NULL)
3163 new_threads = item->icvs.teams_thread_limit;
3164 }
3165
3166 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3167 updated. */
3168 void **new_args = args;
3169 if (orig_teams != new_teams || orig_threads != new_threads)
3170 {
3171 size_t tms_len = (orig_teams == new_teams
3172 ? teams_len
3173 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
3174 ? 1 : 2));
3175 size_t ths_len = (orig_threads == new_threads
3176 ? threads_len
3177 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
3178 ? 1 : 2));
3179 /* One additional item after the last arg must be NULL. */
3180 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
3181 + ths_len + 1;
3182 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
3183
3184 tmpargs = args;
3185 void **tmp_new_args = new_args;
3186 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3187 too if they have not been changed and skipped otherwise. */
3188 while (*tmpargs)
3189 {
3190 intptr_t id = (intptr_t) *tmpargs;
3191 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
3192 && orig_teams != new_teams)
3193 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
3194 && orig_threads != new_threads))
3195 {
3196 tmpargs++;
3197 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3198 tmpargs++;
3199 }
3200 else
3201 {
3202 *tmp_new_args++ = *tmpargs++;
3203 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3204 *tmp_new_args++ = *tmpargs++;
3205 }
3206 }
3207
3208 /* Add the new TEAMS arg to the new args list if it has been changed. */
3209 if (orig_teams != new_teams)
3210 {
3211 intptr_t new_val = new_teams;
3212 if (tms_len == 1)
3213 {
3214 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3215 | GOMP_TARGET_ARG_NUM_TEAMS;
3216 *tmp_new_args++ = (void *) new_val;
3217 }
3218 else
3219 {
3220 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3221 | GOMP_TARGET_ARG_NUM_TEAMS);
3222 *tmp_new_args++ = (void *) new_val;
3223 }
3224 }
3225
3226 /* Add the new THREADS arg to the new args list if it has been changed. */
3227 if (orig_threads != new_threads)
3228 {
3229 intptr_t new_val = new_threads;
3230 if (ths_len == 1)
3231 {
3232 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3233 | GOMP_TARGET_ARG_THREAD_LIMIT;
3234 *tmp_new_args++ = (void *) new_val;
3235 }
3236 else
3237 {
3238 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3239 | GOMP_TARGET_ARG_THREAD_LIMIT);
3240 *tmp_new_args++ = (void *) new_val;
3241 }
3242 }
3243
3244 *tmp_new_args = NULL;
3245 }
3246
3247 flags = clear_unsupported_flags (devicep, flags);
3248
3249 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3250 {
3251 struct gomp_thread *thr = gomp_thread ();
3252 /* Create a team if we don't have any around, as nowait
3253 target tasks make sense to run asynchronously even when
3254 outside of any parallel. */
3255 if (__builtin_expect (thr->ts.team == NULL, 0))
3256 {
3257 struct gomp_team *team = gomp_new_team (1);
3258 struct gomp_task *task = thr->task;
3259 struct gomp_task **implicit_task = &task;
3260 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3261 team->prev_ts = thr->ts;
3262 thr->ts.team = team;
3263 thr->ts.team_id = 0;
3264 thr->ts.work_share = &team->work_shares[0];
3265 thr->ts.last_work_share = NULL;
3266 #ifdef HAVE_SYNC_BUILTINS
3267 thr->ts.single_count = 0;
3268 #endif
3269 thr->ts.static_trip = 0;
3270 thr->task = &team->implicit_task[0];
3271 gomp_init_task (thr->task, NULL, icv);
3272 while (*implicit_task
3273 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3274 implicit_task = &(*implicit_task)->parent;
3275 if (*implicit_task)
3276 {
3277 thr->task = *implicit_task;
3278 gomp_end_task ();
3279 free (*implicit_task);
3280 thr->task = &team->implicit_task[0];
3281 }
3282 else
3283 pthread_setspecific (gomp_thread_destructor, thr);
3284 if (implicit_task != &task)
3285 {
3286 *implicit_task = thr->task;
3287 thr->task = task;
3288 }
3289 }
3290 if (thr->ts.team
3291 && !thr->task->final_task)
3292 {
3293 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3294 sizes, kinds, flags, depend, new_args,
3295 GOMP_TARGET_TASK_BEFORE_MAP);
3296 return;
3297 }
3298 }
3299
3300 /* If there are depend clauses, but nowait is not present
3301 (or we are in a final task), block the parent task until the
3302 dependencies are resolved and then just continue with the rest
3303 of the function as if it is a merged task. */
3304 if (depend != NULL)
3305 {
3306 struct gomp_thread *thr = gomp_thread ();
3307 if (thr->task && thr->task->depend_hash)
3308 {
3309 /* If we might need to wait, copy firstprivate now. */
3310 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3311 &tgt_align, &tgt_size);
3312 if (tgt_align)
3313 {
3314 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3315 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3316 tgt_align, tgt_size);
3317 }
3318 fpc_done = true;
3319 gomp_task_maybe_wait_for_dependencies (depend);
3320 }
3321 }
3322
3323 void *fn_addr;
3324 if (devicep == NULL
3325 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3326 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3327 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3328 {
3329 if (!fpc_done)
3330 {
3331 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3332 &tgt_align, &tgt_size);
3333 if (tgt_align)
3334 {
3335 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3336 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3337 tgt_align, tgt_size);
3338 }
3339 }
3340 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3341 return;
3342 }
3343
3344 struct target_mem_desc *tgt_vars;
3345 htab_t refcount_set = NULL;
3346
3347 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3348 {
3349 if (!fpc_done)
3350 {
3351 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3352 &tgt_align, &tgt_size);
3353 if (tgt_align)
3354 {
3355 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3356 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3357 tgt_align, tgt_size);
3358 }
3359 }
3360 tgt_vars = NULL;
3361 }
3362 else
3363 {
3364 refcount_set = htab_create (mapnum);
3365 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3366 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3367 }
3368 devicep->run_func (devicep->target_id, fn_addr,
3369 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3370 new_args);
3371 if (tgt_vars)
3372 {
3373 htab_clear (refcount_set);
3374 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3375 }
3376 if (refcount_set)
3377 htab_free (refcount_set);
3378
3379 /* Copy back ICVs from device to host.
3380 HOST_PTR is expected to exist since it was added in
3381 gomp_load_image_to_device if not already available. */
3382 gomp_copy_back_icvs (devicep, device);
3383
3384 }
3385
3386
3387 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3388 keeping track of all variable handling - assuming that reverse offload occurs
3389 ony very rarely. Downside is that the reverse search is slow. */
3390
3391 struct gomp_splay_tree_rev_lookup_data {
3392 uintptr_t tgt_start;
3393 uintptr_t tgt_end;
3394 splay_tree_key key;
3395 };
3396
3397 static int
3398 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3399 {
3400 struct gomp_splay_tree_rev_lookup_data *data;
3401 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3402 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3403
3404 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3405 return 0;
3406
3407 size_t j;
3408 for (j = 0; j < key->tgt->list_count; j++)
3409 if (key->tgt->list[j].key == key)
3410 break;
3411 assert (j < key->tgt->list_count);
3412 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3413
3414 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3415 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3416 {
3417 data->key = key;
3418 return 1;
3419 }
3420 return 0;
3421 }
3422
3423 static inline splay_tree_key
3424 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3425 bool zero_len)
3426 {
3427 struct gomp_splay_tree_rev_lookup_data data;
3428 data.key = NULL;
3429 data.tgt_start = tgt_start;
3430 data.tgt_end = tgt_end;
3431
3432 if (tgt_start != tgt_end)
3433 {
3434 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3435 return data.key;
3436 }
3437
3438 data.tgt_end++;
3439 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3440 if (data.key != NULL || zero_len)
3441 return data.key;
3442 data.tgt_end--;
3443
3444 data.tgt_start--;
3445 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3446 return data.key;
3447 }
3448
3449 struct cpy_data
3450 {
3451 uint64_t devaddr;
3452 bool present, aligned;
3453 };
3454
3455
3456 /* Search just mapped reverse-offload data; returns index if found,
3457 otherwise >= n. */
3458
3459 static inline int
3460 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3461 unsigned short *kinds, uint64_t *sizes, size_t n,
3462 uint64_t tgt_start, uint64_t tgt_end)
3463 {
3464 const bool short_mapkind = true;
3465 const int typemask = short_mapkind ? 0xff : 0x7;
3466 size_t i;
3467 for (i = 0; i < n; i++)
3468 {
3469 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3470 == GOMP_MAP_STRUCT);
3471 uint64_t dev_end;
3472 if (!is_struct)
3473 dev_end = d[i].devaddr + sizes[i];
3474 else
3475 {
3476 if (i + sizes[i] < n)
3477 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3478 else
3479 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3480 }
3481 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3482 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3483 break;
3484 if (is_struct)
3485 i += sizes[i];
3486 }
3487 return i;
3488 }
3489
3490 static inline int
3491 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3492 unsigned short *kinds, uint64_t *sizes,
3493 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3494 bool zero_len)
3495 {
3496 size_t i;
3497 if (tgt_start != tgt_end)
3498 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3499 tgt_start, tgt_end);
3500 tgt_end++;
3501 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3502 tgt_start, tgt_end);
3503 if (i < n || zero_len)
3504 return i;
3505 tgt_end--;
3506
3507 tgt_start--;
3508 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3509 tgt_start, tgt_end);
3510 }
3511
3512 /* Handle reverse offload. This is called by the device plugins for a
3513 reverse offload; it is not called if the outer target runs on the host.
3514 The mapping is simplified device-affecting constructs (except for target
3515 with device(ancestor:1)) must not be encountered; in particular not
3516 target (enter/exit) data. */
3517
3518 void
3519 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3520 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3521 void (*dev_to_host_cpy) (void *, const void *, size_t, void*),
3522 void (*host_to_dev_cpy) (void *, const void *, size_t, void*),
3523 void *token)
3524 {
3525 /* Return early if there is no offload code. */
3526 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3527 return;
3528 /* Currently, this fails because of calculate_firstprivate_requirements
3529 below; it could be fixed but additional code needs to be updated to
3530 handle 32bit hosts - thus, it is not worthwhile. */
3531 if (sizeof (void *) != sizeof (uint64_t))
3532 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3533
3534 struct cpy_data *cdata = NULL;
3535 uint64_t *devaddrs;
3536 uint64_t *sizes;
3537 unsigned short *kinds;
3538 const bool short_mapkind = true;
3539 const int typemask = short_mapkind ? 0xff : 0x7;
3540 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3541
3542 reverse_splay_tree_key n;
3543 struct reverse_splay_tree_key_s k;
3544 k.dev = fn_ptr;
3545
3546 gomp_mutex_lock (&devicep->lock);
3547 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3548 gomp_mutex_unlock (&devicep->lock);
3549
3550 if (n == NULL)
3551 gomp_fatal ("Cannot find reverse-offload function");
3552 void (*host_fn)() = (void (*)()) n->k->host_start;
3553
3554 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3555 {
3556 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3557 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3558 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3559 }
3560 else
3561 {
3562 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3563 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3564 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3565 if (dev_to_host_cpy)
3566 {
3567 dev_to_host_cpy (devaddrs, (const void *) (uintptr_t) devaddrs_ptr,
3568 mapnum * sizeof (uint64_t), token);
3569 dev_to_host_cpy (sizes, (const void *) (uintptr_t) sizes_ptr,
3570 mapnum * sizeof (uint64_t), token);
3571 dev_to_host_cpy (kinds, (const void *) (uintptr_t) kinds_ptr,
3572 mapnum * sizeof (unsigned short), token);
3573 }
3574 else
3575 {
3576 gomp_copy_dev2host (devicep, NULL, devaddrs,
3577 (const void *) (uintptr_t) devaddrs_ptr,
3578 mapnum * sizeof (uint64_t));
3579 gomp_copy_dev2host (devicep, NULL, sizes,
3580 (const void *) (uintptr_t) sizes_ptr,
3581 mapnum * sizeof (uint64_t));
3582 gomp_copy_dev2host (devicep, NULL, kinds, (const void *) (uintptr_t) kinds_ptr,
3583 mapnum * sizeof (unsigned short));
3584 }
3585 }
3586
3587 size_t tgt_align = 0, tgt_size = 0;
3588
3589 /* If actually executed on 32bit systems, the casts lead to wrong code;
3590 but 32bit with offloading is not supported; see top of this function. */
3591 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3592 (void *) (uintptr_t) kinds,
3593 &tgt_align, &tgt_size);
3594
3595 if (tgt_align)
3596 {
3597 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3598 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3599 if (al)
3600 tgt += tgt_align - al;
3601 tgt_size = 0;
3602 for (uint64_t i = 0; i < mapnum; i++)
3603 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3604 && devaddrs[i] != 0)
3605 {
3606 size_t align = (size_t) 1 << (kinds[i] >> 8);
3607 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3608 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3609 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3610 (size_t) sizes[i]);
3611 else if (dev_to_host_cpy)
3612 dev_to_host_cpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3613 (size_t) sizes[i], token);
3614 else
3615 gomp_copy_dev2host (devicep, NULL, tgt + tgt_size,
3616 (void *) (uintptr_t) devaddrs[i],
3617 (size_t) sizes[i]);
3618 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3619 tgt_size = tgt_size + sizes[i];
3620 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3621 && i + 1 < mapnum
3622 && ((get_kind (short_mapkind, kinds, i) & typemask)
3623 == GOMP_MAP_ATTACH))
3624 {
3625 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3626 = (uint64_t) devaddrs[i];
3627 ++i;
3628 }
3629 }
3630 }
3631
3632 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3633 {
3634 size_t j, struct_cpy = 0;
3635 splay_tree_key n2;
3636 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3637 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3638 gomp_mutex_lock (&devicep->lock);
3639 for (uint64_t i = 0; i < mapnum; i++)
3640 {
3641 if (devaddrs[i] == 0)
3642 continue;
3643 n = NULL;
3644 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3645 switch (kind)
3646 {
3647 case GOMP_MAP_FIRSTPRIVATE:
3648 case GOMP_MAP_FIRSTPRIVATE_INT:
3649 continue;
3650
3651 case GOMP_MAP_DELETE:
3652 case GOMP_MAP_RELEASE:
3653 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3654 /* Assume it is present; look it up - but ignore unless the
3655 present clause is there. */
3656 case GOMP_MAP_ALLOC:
3657 case GOMP_MAP_FROM:
3658 case GOMP_MAP_FORCE_ALLOC:
3659 case GOMP_MAP_FORCE_FROM:
3660 case GOMP_MAP_ALWAYS_FROM:
3661 case GOMP_MAP_TO:
3662 case GOMP_MAP_TOFROM:
3663 case GOMP_MAP_FORCE_TO:
3664 case GOMP_MAP_FORCE_TOFROM:
3665 case GOMP_MAP_ALWAYS_TO:
3666 case GOMP_MAP_ALWAYS_TOFROM:
3667 case GOMP_MAP_PRESENT_FROM:
3668 case GOMP_MAP_PRESENT_TO:
3669 case GOMP_MAP_PRESENT_TOFROM:
3670 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3671 case GOMP_MAP_ALWAYS_PRESENT_TO:
3672 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3673 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3674 cdata[i].devaddr = devaddrs[i];
3675 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3676 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3677 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3678 devaddrs[i],
3679 devaddrs[i] + sizes[i], zero_len);
3680 if (j < i)
3681 {
3682 n2 = NULL;
3683 cdata[i].present = true;
3684 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3685 }
3686 else
3687 {
3688 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3689 devaddrs[i],
3690 devaddrs[i] + sizes[i], zero_len);
3691 cdata[i].present = n2 != NULL;
3692 }
3693 if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
3694 {
3695 gomp_mutex_unlock (&devicep->lock);
3696 #ifdef HAVE_INTTYPES_H
3697 gomp_fatal ("present clause: no corresponding data on "
3698 "parent device at %p with size %"PRIu64,
3699 (void *) (uintptr_t) devaddrs[i],
3700 (uint64_t) sizes[i]);
3701 #else
3702 gomp_fatal ("present clause: no corresponding data on "
3703 "parent device at %p with size %lu",
3704 (void *) (uintptr_t) devaddrs[i],
3705 (unsigned long) sizes[i]);
3706 #endif
3707 break;
3708 }
3709 else if (!cdata[i].present
3710 && kind != GOMP_MAP_DELETE
3711 && kind != GOMP_MAP_RELEASE
3712 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3713 {
3714 cdata[i].aligned = true;
3715 size_t align = (size_t) 1 << (kinds[i] >> 8);
3716 devaddrs[i]
3717 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3718 sizes[i]);
3719 }
3720 else if (n2 != NULL)
3721 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3722 - (n2->tgt->tgt_start + n2->tgt_offset));
3723 if (((!cdata[i].present || struct_cpy)
3724 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3725 || kind == GOMP_MAP_FORCE_TO
3726 || kind == GOMP_MAP_FORCE_TOFROM
3727 || GOMP_MAP_ALWAYS_TO_P (kind))
3728 {
3729 if (dev_to_host_cpy)
3730 dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i],
3731 (void *) (uintptr_t) cdata[i].devaddr,
3732 sizes[i], token);
3733 else
3734 gomp_copy_dev2host (devicep, NULL,
3735 (void *) (uintptr_t) devaddrs[i],
3736 (void *) (uintptr_t) cdata[i].devaddr,
3737 sizes[i]);
3738 }
3739 if (struct_cpy)
3740 struct_cpy--;
3741 break;
3742 case GOMP_MAP_ATTACH:
3743 case GOMP_MAP_POINTER:
3744 case GOMP_MAP_ALWAYS_POINTER:
3745 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3746 devaddrs[i] + sizes[i],
3747 devaddrs[i] + sizes[i]
3748 + sizeof (void*), false);
3749 cdata[i].present = n2 != NULL;
3750 cdata[i].devaddr = devaddrs[i];
3751 if (n2)
3752 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3753 - (n2->tgt->tgt_start + n2->tgt_offset));
3754 else
3755 {
3756 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3757 devaddrs[i] + sizes[i],
3758 devaddrs[i] + sizes[i]
3759 + sizeof (void*), false);
3760 if (j < i)
3761 {
3762 cdata[i].present = true;
3763 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3764 - cdata[j].devaddr);
3765 }
3766 }
3767 if (!cdata[i].present)
3768 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3769 /* Assume that when present, the pointer is already correct. */
3770 if (!n2)
3771 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3772 = devaddrs[i-1];
3773 break;
3774 case GOMP_MAP_TO_PSET:
3775 /* Assume that when present, the pointers are fine and no 'to:'
3776 is required. */
3777 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3778 devaddrs[i], devaddrs[i] + sizes[i],
3779 false);
3780 cdata[i].present = n2 != NULL;
3781 cdata[i].devaddr = devaddrs[i];
3782 if (n2)
3783 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3784 - (n2->tgt->tgt_start + n2->tgt_offset));
3785 else
3786 {
3787 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3788 devaddrs[i],
3789 devaddrs[i] + sizes[i], false);
3790 if (j < i)
3791 {
3792 cdata[i].present = true;
3793 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3794 - cdata[j].devaddr);
3795 }
3796 }
3797 if (!cdata[i].present)
3798 {
3799 cdata[i].aligned = true;
3800 size_t align = (size_t) 1 << (kinds[i] >> 8);
3801 devaddrs[i]
3802 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3803 sizes[i]);
3804 if (dev_to_host_cpy)
3805 dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i],
3806 (void *) (uintptr_t) cdata[i].devaddr,
3807 sizes[i], token);
3808 else
3809 gomp_copy_dev2host (devicep, NULL,
3810 (void *) (uintptr_t) devaddrs[i],
3811 (void *) (uintptr_t) cdata[i].devaddr,
3812 sizes[i]);
3813 }
3814 for (j = i + 1; j < mapnum; j++)
3815 {
3816 kind = get_kind (short_mapkind, kinds, j) & typemask;
3817 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3818 && !GOMP_MAP_POINTER_P (kind))
3819 break;
3820 if (devaddrs[j] < devaddrs[i])
3821 break;
3822 if (cdata[i].present)
3823 continue;
3824 if (devaddrs[j] == 0)
3825 {
3826 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3827 continue;
3828 }
3829 int k;
3830 n2 = NULL;
3831 cdata[i].present = true;
3832 cdata[j].devaddr = devaddrs[j];
3833 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3834 devaddrs[j],
3835 devaddrs[j] + sizeof (void*),
3836 false);
3837 if (k < j)
3838 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3839 - cdata[k].devaddr);
3840 else
3841 {
3842 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3843 devaddrs[j],
3844 devaddrs[j] + sizeof (void*),
3845 false);
3846 if (n2 == NULL)
3847 {
3848 gomp_mutex_unlock (&devicep->lock);
3849 gomp_fatal ("Pointer target wasn't mapped");
3850 }
3851 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3852 - (n2->tgt->tgt_start + n2->tgt_offset));
3853 }
3854 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3855 = (void *) (uintptr_t) devaddrs[j];
3856 }
3857 i = j -1;
3858 break;
3859 case GOMP_MAP_STRUCT:
3860 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3861 devaddrs[i + sizes[i]]
3862 + sizes[i + sizes[i]], false);
3863 cdata[i].present = n2 != NULL;
3864 cdata[i].devaddr = devaddrs[i];
3865 struct_cpy = cdata[i].present ? 0 : sizes[i];
3866 if (!n2)
3867 {
3868 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3869 - devaddrs[i+1]
3870 + sizes[i + sizes[i]]);
3871 size_t align = (size_t) 1 << (kinds[i] >> 8);
3872 cdata[i].aligned = true;
3873 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3874 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3875 }
3876 else
3877 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3878 - (n2->tgt->tgt_start + n2->tgt_offset));
3879 break;
3880 default:
3881 gomp_mutex_unlock (&devicep->lock);
3882 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3883 }
3884 }
3885 gomp_mutex_unlock (&devicep->lock);
3886 }
3887
3888 host_fn (devaddrs);
3889
3890 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3891 {
3892 uint64_t struct_cpy = 0;
3893 bool clean_struct = false;
3894 for (uint64_t i = 0; i < mapnum; i++)
3895 {
3896 if (cdata[i].devaddr == 0)
3897 continue;
3898 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3899 bool copy = !cdata[i].present || struct_cpy;
3900 switch (kind)
3901 {
3902 case GOMP_MAP_FORCE_FROM:
3903 case GOMP_MAP_FORCE_TOFROM:
3904 case GOMP_MAP_ALWAYS_FROM:
3905 case GOMP_MAP_ALWAYS_TOFROM:
3906 case GOMP_MAP_PRESENT_FROM:
3907 case GOMP_MAP_PRESENT_TOFROM:
3908 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3909 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3910 copy = true;
3911 /* FALLTHRU */
3912 case GOMP_MAP_FROM:
3913 case GOMP_MAP_TOFROM:
3914 if (copy && host_to_dev_cpy)
3915 host_to_dev_cpy ((void *) (uintptr_t) cdata[i].devaddr,
3916 (void *) (uintptr_t) devaddrs[i],
3917 sizes[i], token);
3918 else if (copy)
3919 gomp_copy_host2dev (devicep, NULL,
3920 (void *) (uintptr_t) cdata[i].devaddr,
3921 (void *) (uintptr_t) devaddrs[i],
3922 sizes[i], false, NULL);
3923 default:
3924 break;
3925 }
3926 if (struct_cpy)
3927 {
3928 struct_cpy--;
3929 continue;
3930 }
3931 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3932 {
3933 clean_struct = true;
3934 struct_cpy = sizes[i];
3935 }
3936 else if (!cdata[i].present && cdata[i].aligned)
3937 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3938 else if (!cdata[i].present)
3939 free ((void *) (uintptr_t) devaddrs[i]);
3940 }
3941 if (clean_struct)
3942 for (uint64_t i = 0; i < mapnum; i++)
3943 if (!cdata[i].present
3944 && ((get_kind (short_mapkind, kinds, i) & typemask)
3945 == GOMP_MAP_STRUCT))
3946 {
3947 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3948 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3949 }
3950
3951 free (devaddrs);
3952 free (sizes);
3953 free (kinds);
3954 }
3955 }
3956
3957 /* Host fallback for GOMP_target_data{,_ext} routines. */
3958
3959 static void
3960 gomp_target_data_fallback (struct gomp_device_descr *devicep)
3961 {
3962 struct gomp_task_icv *icv = gomp_icv (false);
3963
3964 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3965 && devicep != NULL)
3966 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3967 "be used for offloading");
3968
3969 if (icv->target_data)
3970 {
3971 /* Even when doing a host fallback, if there are any active
3972 #pragma omp target data constructs, need to remember the
3973 new #pragma omp target data, otherwise GOMP_target_end_data
3974 would get out of sync. */
3975 struct target_mem_desc *tgt
3976 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3977 NULL, GOMP_MAP_VARS_DATA);
3978 tgt->prev = icv->target_data;
3979 icv->target_data = tgt;
3980 }
3981 }
3982
3983 void
3984 GOMP_target_data (int device, const void *unused, size_t mapnum,
3985 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3986 {
3987 struct gomp_device_descr *devicep = resolve_device (device, true);
3988
3989 if (devicep == NULL
3990 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3991 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3992 return gomp_target_data_fallback (devicep);
3993
3994 struct target_mem_desc *tgt
3995 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3996 NULL, GOMP_MAP_VARS_DATA);
3997 struct gomp_task_icv *icv = gomp_icv (true);
3998 tgt->prev = icv->target_data;
3999 icv->target_data = tgt;
4000 }
4001
4002 void
4003 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
4004 size_t *sizes, unsigned short *kinds)
4005 {
4006 struct gomp_device_descr *devicep = resolve_device (device, true);
4007
4008 if (devicep == NULL
4009 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4010 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4011 return gomp_target_data_fallback (devicep);
4012
4013 struct target_mem_desc *tgt
4014 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
4015 NULL, GOMP_MAP_VARS_DATA);
4016 struct gomp_task_icv *icv = gomp_icv (true);
4017 tgt->prev = icv->target_data;
4018 icv->target_data = tgt;
4019 }
4020
4021 void
4022 GOMP_target_end_data (void)
4023 {
4024 struct gomp_task_icv *icv = gomp_icv (false);
4025 if (icv->target_data)
4026 {
4027 struct target_mem_desc *tgt = icv->target_data;
4028 icv->target_data = tgt->prev;
4029 gomp_unmap_vars (tgt, true, NULL);
4030 }
4031 }
4032
4033 void
4034 GOMP_target_update (int device, const void *unused, size_t mapnum,
4035 void **hostaddrs, size_t *sizes, unsigned char *kinds)
4036 {
4037 struct gomp_device_descr *devicep = resolve_device (device, true);
4038
4039 if (devicep == NULL
4040 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4041 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4042 return;
4043
4044 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
4045 }
4046
4047 void
4048 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
4049 size_t *sizes, unsigned short *kinds,
4050 unsigned int flags, void **depend)
4051 {
4052 struct gomp_device_descr *devicep = resolve_device (device, true);
4053
4054 /* If there are depend clauses, but nowait is not present,
4055 block the parent task until the dependencies are resolved
4056 and then just continue with the rest of the function as if it
4057 is a merged task. Until we are able to schedule task during
4058 variable mapping or unmapping, ignore nowait if depend clauses
4059 are not present. */
4060 if (depend != NULL)
4061 {
4062 struct gomp_thread *thr = gomp_thread ();
4063 if (thr->task && thr->task->depend_hash)
4064 {
4065 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4066 && thr->ts.team
4067 && !thr->task->final_task)
4068 {
4069 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4070 mapnum, hostaddrs, sizes, kinds,
4071 flags | GOMP_TARGET_FLAG_UPDATE,
4072 depend, NULL, GOMP_TARGET_TASK_DATA))
4073 return;
4074 }
4075 else
4076 {
4077 struct gomp_team *team = thr->ts.team;
4078 /* If parallel or taskgroup has been cancelled, don't start new
4079 tasks. */
4080 if (__builtin_expect (gomp_cancel_var, 0) && team)
4081 {
4082 if (gomp_team_barrier_cancelled (&team->barrier))
4083 return;
4084 if (thr->task->taskgroup)
4085 {
4086 if (thr->task->taskgroup->cancelled)
4087 return;
4088 if (thr->task->taskgroup->workshare
4089 && thr->task->taskgroup->prev
4090 && thr->task->taskgroup->prev->cancelled)
4091 return;
4092 }
4093 }
4094
4095 gomp_task_maybe_wait_for_dependencies (depend);
4096 }
4097 }
4098 }
4099
4100 if (devicep == NULL
4101 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4102 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4103 return;
4104
4105 struct gomp_thread *thr = gomp_thread ();
4106 struct gomp_team *team = thr->ts.team;
4107 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4108 if (__builtin_expect (gomp_cancel_var, 0) && team)
4109 {
4110 if (gomp_team_barrier_cancelled (&team->barrier))
4111 return;
4112 if (thr->task->taskgroup)
4113 {
4114 if (thr->task->taskgroup->cancelled)
4115 return;
4116 if (thr->task->taskgroup->workshare
4117 && thr->task->taskgroup->prev
4118 && thr->task->taskgroup->prev->cancelled)
4119 return;
4120 }
4121 }
4122
4123 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
4124 }
4125
4126 static void
4127 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
4128 void **hostaddrs, size_t *sizes, unsigned short *kinds,
4129 htab_t *refcount_set)
4130 {
4131 const int typemask = 0xff;
4132 size_t i;
4133 gomp_mutex_lock (&devicep->lock);
4134 if (devicep->state == GOMP_DEVICE_FINALIZED)
4135 {
4136 gomp_mutex_unlock (&devicep->lock);
4137 return;
4138 }
4139
4140 for (i = 0; i < mapnum; i++)
4141 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
4142 {
4143 struct splay_tree_key_s cur_node;
4144 cur_node.host_start = (uintptr_t) hostaddrs[i];
4145 cur_node.host_end = cur_node.host_start + sizeof (void *);
4146 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
4147
4148 if (n)
4149 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
4150 false, NULL);
4151 }
4152
4153 int nrmvars = 0;
4154 splay_tree_key remove_vars[mapnum];
4155
4156 for (i = 0; i < mapnum; i++)
4157 {
4158 struct splay_tree_key_s cur_node;
4159 unsigned char kind = kinds[i] & typemask;
4160 switch (kind)
4161 {
4162 case GOMP_MAP_FROM:
4163 case GOMP_MAP_ALWAYS_FROM:
4164 case GOMP_MAP_DELETE:
4165 case GOMP_MAP_RELEASE:
4166 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
4167 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
4168 cur_node.host_start = (uintptr_t) hostaddrs[i];
4169 cur_node.host_end = cur_node.host_start + sizes[i];
4170 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4171 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
4172 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
4173 : splay_tree_lookup (&devicep->mem_map, &cur_node);
4174 if (!k)
4175 continue;
4176
4177 bool delete_p = (kind == GOMP_MAP_DELETE
4178 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
4179 bool do_copy, do_remove;
4180 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
4181 &do_remove);
4182
4183 if ((kind == GOMP_MAP_FROM && do_copy)
4184 || kind == GOMP_MAP_ALWAYS_FROM)
4185 {
4186 if (k->aux && k->aux->attach_count)
4187 {
4188 /* We have to be careful not to overwrite still attached
4189 pointers during the copyback to host. */
4190 uintptr_t addr = k->host_start;
4191 while (addr < k->host_end)
4192 {
4193 size_t i = (addr - k->host_start) / sizeof (void *);
4194 if (k->aux->attach_count[i] == 0)
4195 gomp_copy_dev2host (devicep, NULL, (void *) addr,
4196 (void *) (k->tgt->tgt_start
4197 + k->tgt_offset
4198 + addr - k->host_start),
4199 sizeof (void *));
4200 addr += sizeof (void *);
4201 }
4202 }
4203 else
4204 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
4205 (void *) (k->tgt->tgt_start + k->tgt_offset
4206 + cur_node.host_start
4207 - k->host_start),
4208 cur_node.host_end - cur_node.host_start);
4209 }
4210
4211 /* Structure elements lists are removed altogether at once, which
4212 may cause immediate deallocation of the target_mem_desc, causing
4213 errors if we still have following element siblings to copy back.
4214 While we're at it, it also seems more disciplined to simply
4215 queue all removals together for processing below.
4216
4217 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4218 not have this problem, since they maintain an additional
4219 tgt->refcount = 1 reference to the target_mem_desc to start with.
4220 */
4221 if (do_remove)
4222 remove_vars[nrmvars++] = k;
4223 break;
4224
4225 case GOMP_MAP_DETACH:
4226 break;
4227 default:
4228 gomp_mutex_unlock (&devicep->lock);
4229 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4230 kind);
4231 }
4232 }
4233
4234 for (int i = 0; i < nrmvars; i++)
4235 gomp_remove_var (devicep, remove_vars[i]);
4236
4237 gomp_mutex_unlock (&devicep->lock);
4238 }
4239
4240 void
4241 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4242 size_t *sizes, unsigned short *kinds,
4243 unsigned int flags, void **depend)
4244 {
4245 struct gomp_device_descr *devicep = resolve_device (device, true);
4246
4247 /* If there are depend clauses, but nowait is not present,
4248 block the parent task until the dependencies are resolved
4249 and then just continue with the rest of the function as if it
4250 is a merged task. Until we are able to schedule task during
4251 variable mapping or unmapping, ignore nowait if depend clauses
4252 are not present. */
4253 if (depend != NULL)
4254 {
4255 struct gomp_thread *thr = gomp_thread ();
4256 if (thr->task && thr->task->depend_hash)
4257 {
4258 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4259 && thr->ts.team
4260 && !thr->task->final_task)
4261 {
4262 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4263 mapnum, hostaddrs, sizes, kinds,
4264 flags, depend, NULL,
4265 GOMP_TARGET_TASK_DATA))
4266 return;
4267 }
4268 else
4269 {
4270 struct gomp_team *team = thr->ts.team;
4271 /* If parallel or taskgroup has been cancelled, don't start new
4272 tasks. */
4273 if (__builtin_expect (gomp_cancel_var, 0) && team)
4274 {
4275 if (gomp_team_barrier_cancelled (&team->barrier))
4276 return;
4277 if (thr->task->taskgroup)
4278 {
4279 if (thr->task->taskgroup->cancelled)
4280 return;
4281 if (thr->task->taskgroup->workshare
4282 && thr->task->taskgroup->prev
4283 && thr->task->taskgroup->prev->cancelled)
4284 return;
4285 }
4286 }
4287
4288 gomp_task_maybe_wait_for_dependencies (depend);
4289 }
4290 }
4291 }
4292
4293 if (devicep == NULL
4294 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4295 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4296 return;
4297
4298 struct gomp_thread *thr = gomp_thread ();
4299 struct gomp_team *team = thr->ts.team;
4300 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4301 if (__builtin_expect (gomp_cancel_var, 0) && team)
4302 {
4303 if (gomp_team_barrier_cancelled (&team->barrier))
4304 return;
4305 if (thr->task->taskgroup)
4306 {
4307 if (thr->task->taskgroup->cancelled)
4308 return;
4309 if (thr->task->taskgroup->workshare
4310 && thr->task->taskgroup->prev
4311 && thr->task->taskgroup->prev->cancelled)
4312 return;
4313 }
4314 }
4315
4316 htab_t refcount_set = htab_create (mapnum);
4317
4318 /* The variables are mapped separately such that they can be released
4319 independently. */
4320 size_t i, j;
4321 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4322 for (i = 0; i < mapnum; i++)
4323 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4324 {
4325 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4326 &kinds[i], true, &refcount_set,
4327 GOMP_MAP_VARS_ENTER_DATA);
4328 i += sizes[i];
4329 }
4330 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4331 {
4332 for (j = i + 1; j < mapnum; j++)
4333 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4334 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4335 break;
4336 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4337 &kinds[i], true, &refcount_set,
4338 GOMP_MAP_VARS_ENTER_DATA);
4339 i += j - i - 1;
4340 }
4341 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
4342 {
4343 /* An attach operation must be processed together with the mapped
4344 base-pointer list item. */
4345 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4346 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4347 i += 1;
4348 }
4349 else
4350 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4351 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4352 else
4353 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4354 htab_free (refcount_set);
4355 }
4356
4357 bool
4358 gomp_target_task_fn (void *data)
4359 {
4360 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4361 struct gomp_device_descr *devicep = ttask->devicep;
4362
4363 if (ttask->fn != NULL)
4364 {
4365 void *fn_addr;
4366 if (devicep == NULL
4367 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4368 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4369 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4370 {
4371 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4372 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4373 ttask->args);
4374 return false;
4375 }
4376
4377 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4378 {
4379 if (ttask->tgt)
4380 gomp_unmap_vars (ttask->tgt, true, NULL);
4381 return false;
4382 }
4383
4384 void *actual_arguments;
4385 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4386 {
4387 ttask->tgt = NULL;
4388 actual_arguments = ttask->hostaddrs;
4389 }
4390 else
4391 {
4392 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4393 NULL, ttask->sizes, ttask->kinds, true,
4394 NULL, GOMP_MAP_VARS_TARGET);
4395 actual_arguments = (void *) ttask->tgt->tgt_start;
4396 }
4397 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4398
4399 assert (devicep->async_run_func);
4400 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4401 ttask->args, (void *) ttask);
4402 return true;
4403 }
4404 else if (devicep == NULL
4405 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4406 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4407 return false;
4408
4409 size_t i;
4410 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4411 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4412 ttask->kinds, true);
4413 else
4414 {
4415 htab_t refcount_set = htab_create (ttask->mapnum);
4416 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4417 for (i = 0; i < ttask->mapnum; i++)
4418 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4419 {
4420 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4421 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4422 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4423 i += ttask->sizes[i];
4424 }
4425 else
4426 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4427 &ttask->kinds[i], true, &refcount_set,
4428 GOMP_MAP_VARS_ENTER_DATA);
4429 else
4430 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4431 ttask->kinds, &refcount_set);
4432 htab_free (refcount_set);
4433 }
4434 return false;
4435 }
4436
4437 void
4438 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4439 {
4440 if (thread_limit)
4441 {
4442 struct gomp_task_icv *icv = gomp_icv (true);
4443 icv->thread_limit_var
4444 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4445 }
4446 (void) num_teams;
4447 }
4448
4449 bool
4450 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4451 unsigned int thread_limit, bool first)
4452 {
4453 struct gomp_thread *thr = gomp_thread ();
4454 if (first)
4455 {
4456 if (thread_limit)
4457 {
4458 struct gomp_task_icv *icv = gomp_icv (true);
4459 icv->thread_limit_var
4460 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4461 }
4462 (void) num_teams_high;
4463 if (num_teams_low == 0)
4464 num_teams_low = 1;
4465 thr->num_teams = num_teams_low - 1;
4466 thr->team_num = 0;
4467 }
4468 else if (thr->team_num == thr->num_teams)
4469 return false;
4470 else
4471 ++thr->team_num;
4472 return true;
4473 }
4474
4475 void *
4476 omp_target_alloc (size_t size, int device_num)
4477 {
4478 if (device_num == omp_initial_device
4479 || device_num == gomp_get_num_devices ())
4480 return malloc (size);
4481
4482 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4483 if (devicep == NULL)
4484 return NULL;
4485
4486 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4487 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4488 return malloc (size);
4489
4490 gomp_mutex_lock (&devicep->lock);
4491 void *ret = devicep->alloc_func (devicep->target_id, size);
4492 gomp_mutex_unlock (&devicep->lock);
4493 return ret;
4494 }
4495
4496 void
4497 omp_target_free (void *device_ptr, int device_num)
4498 {
4499 if (device_num == omp_initial_device
4500 || device_num == gomp_get_num_devices ())
4501 {
4502 free (device_ptr);
4503 return;
4504 }
4505
4506 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4507 if (devicep == NULL || device_ptr == NULL)
4508 return;
4509
4510 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4511 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4512 {
4513 free (device_ptr);
4514 return;
4515 }
4516
4517 gomp_mutex_lock (&devicep->lock);
4518 gomp_free_device_memory (devicep, device_ptr);
4519 gomp_mutex_unlock (&devicep->lock);
4520 }
4521
4522 void *
4523 gomp_usm_alloc (size_t size, int device_num)
4524 {
4525 if (device_num == gomp_get_num_devices ())
4526 return malloc (size);
4527
4528 struct gomp_device_descr *devicep = resolve_device (device_num, true);
4529 if (devicep == NULL)
4530 return NULL;
4531
4532 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4533 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4534 return malloc (size);
4535
4536 void *ret = NULL;
4537 gomp_mutex_lock (&devicep->lock);
4538 if (devicep->usm_alloc_func)
4539 ret = devicep->usm_alloc_func (devicep->target_id, size);
4540 gomp_mutex_unlock (&devicep->lock);
4541 return ret;
4542 }
4543
4544 void
4545 gomp_usm_free (void *device_ptr, int device_num)
4546 {
4547 if (device_ptr == NULL)
4548 return;
4549
4550 if (device_num == gomp_get_num_devices ())
4551 {
4552 free (device_ptr);
4553 return;
4554 }
4555
4556 struct gomp_device_descr *devicep = resolve_device (device_num, true);
4557 if (devicep == NULL)
4558 return;
4559
4560 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4561 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4562 {
4563 free (device_ptr);
4564 return;
4565 }
4566
4567 gomp_mutex_lock (&devicep->lock);
4568 if (devicep->usm_free_func
4569 && !devicep->usm_free_func (devicep->target_id, device_ptr))
4570 {
4571 gomp_mutex_unlock (&devicep->lock);
4572 gomp_fatal ("error in freeing device memory block at %p", device_ptr);
4573 }
4574 gomp_mutex_unlock (&devicep->lock);
4575 }
4576
4577 int
4578 omp_target_is_present (const void *ptr, int device_num)
4579 {
4580 if (device_num == omp_initial_device
4581 || device_num == gomp_get_num_devices ())
4582 return 1;
4583
4584 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4585 if (devicep == NULL)
4586 return 0;
4587
4588 if (ptr == NULL)
4589 return 1;
4590
4591 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4592 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4593 return 1;
4594
4595 gomp_mutex_lock (&devicep->lock);
4596 struct splay_tree_s *mem_map = &devicep->mem_map;
4597 struct splay_tree_key_s cur_node;
4598
4599 cur_node.host_start = (uintptr_t) ptr;
4600 cur_node.host_end = cur_node.host_start;
4601 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4602 int ret = n != NULL;
4603 gomp_mutex_unlock (&devicep->lock);
4604 return ret;
4605 }
4606
4607 static int
4608 omp_target_memcpy_check (int dst_device_num, int src_device_num,
4609 struct gomp_device_descr **dst_devicep,
4610 struct gomp_device_descr **src_devicep)
4611 {
4612 if (dst_device_num != gomp_get_num_devices ()
4613 /* Above gomp_get_num_devices has to be called unconditionally. */
4614 && dst_device_num != omp_initial_device)
4615 {
4616 *dst_devicep = resolve_device (dst_device_num, false);
4617 if (*dst_devicep == NULL)
4618 return EINVAL;
4619
4620 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4621 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4622 *dst_devicep = NULL;
4623 }
4624
4625 if (src_device_num != num_devices_openmp
4626 && src_device_num != omp_initial_device)
4627 {
4628 *src_devicep = resolve_device (src_device_num, false);
4629 if (*src_devicep == NULL)
4630 return EINVAL;
4631
4632 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4633 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4634 *src_devicep = NULL;
4635 }
4636
4637 return 0;
4638 }
4639
4640 static int
4641 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4642 size_t dst_offset, size_t src_offset,
4643 struct gomp_device_descr *dst_devicep,
4644 struct gomp_device_descr *src_devicep)
4645 {
4646 bool ret;
4647 if (src_devicep == NULL && dst_devicep == NULL)
4648 {
4649 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4650 return 0;
4651 }
4652 if (src_devicep == NULL)
4653 {
4654 gomp_mutex_lock (&dst_devicep->lock);
4655 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4656 (char *) dst + dst_offset,
4657 (char *) src + src_offset, length);
4658 gomp_mutex_unlock (&dst_devicep->lock);
4659 return (ret ? 0 : EINVAL);
4660 }
4661 if (dst_devicep == NULL)
4662 {
4663 gomp_mutex_lock (&src_devicep->lock);
4664 ret = src_devicep->dev2host_func (src_devicep->target_id,
4665 (char *) dst + dst_offset,
4666 (char *) src + src_offset, length);
4667 gomp_mutex_unlock (&src_devicep->lock);
4668 return (ret ? 0 : EINVAL);
4669 }
4670 if (src_devicep == dst_devicep)
4671 {
4672 gomp_mutex_lock (&src_devicep->lock);
4673 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4674 (char *) dst + dst_offset,
4675 (char *) src + src_offset, length);
4676 gomp_mutex_unlock (&src_devicep->lock);
4677 return (ret ? 0 : EINVAL);
4678 }
4679 return EINVAL;
4680 }
4681
4682 int
4683 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4684 size_t src_offset, int dst_device_num, int src_device_num)
4685 {
4686 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4687 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4688 &dst_devicep, &src_devicep);
4689
4690 if (ret)
4691 return ret;
4692
4693 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4694 dst_devicep, src_devicep);
4695
4696 return ret;
4697 }
4698
4699 typedef struct
4700 {
4701 void *dst;
4702 const void *src;
4703 size_t length;
4704 size_t dst_offset;
4705 size_t src_offset;
4706 struct gomp_device_descr *dst_devicep;
4707 struct gomp_device_descr *src_devicep;
4708 } omp_target_memcpy_data;
4709
4710 static void
4711 omp_target_memcpy_async_helper (void *args)
4712 {
4713 omp_target_memcpy_data *a = args;
4714 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4715 a->src_offset, a->dst_devicep, a->src_devicep))
4716 gomp_fatal ("omp_target_memcpy failed");
4717 }
4718
4719 int
4720 omp_target_memcpy_async (void *dst, const void *src, size_t length,
4721 size_t dst_offset, size_t src_offset,
4722 int dst_device_num, int src_device_num,
4723 int depobj_count, omp_depend_t *depobj_list)
4724 {
4725 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4726 unsigned int flags = 0;
4727 void *depend[depobj_count + 5];
4728 int i;
4729 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4730 &dst_devicep, &src_devicep);
4731
4732 omp_target_memcpy_data s = {
4733 .dst = dst,
4734 .src = src,
4735 .length = length,
4736 .dst_offset = dst_offset,
4737 .src_offset = src_offset,
4738 .dst_devicep = dst_devicep,
4739 .src_devicep = src_devicep
4740 };
4741
4742 if (check)
4743 return check;
4744
4745 if (depobj_count > 0 && depobj_list != NULL)
4746 {
4747 flags |= GOMP_TASK_FLAG_DEPEND;
4748 depend[0] = 0;
4749 depend[1] = (void *) (uintptr_t) depobj_count;
4750 depend[2] = depend[3] = depend[4] = 0;
4751 for (i = 0; i < depobj_count; ++i)
4752 depend[i + 5] = &depobj_list[i];
4753 }
4754
4755 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4756 __alignof__ (s), true, flags, depend, 0, NULL);
4757
4758 return 0;
4759 }
4760
4761 static int
4762 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4763 int num_dims, const size_t *volume,
4764 const size_t *dst_offsets,
4765 const size_t *src_offsets,
4766 const size_t *dst_dimensions,
4767 const size_t *src_dimensions,
4768 struct gomp_device_descr *dst_devicep,
4769 struct gomp_device_descr *src_devicep)
4770 {
4771 size_t dst_slice = element_size;
4772 size_t src_slice = element_size;
4773 size_t j, dst_off, src_off, length;
4774 int i, ret;
4775
4776 if (num_dims == 1)
4777 {
4778 if (__builtin_mul_overflow (element_size, volume[0], &length)
4779 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4780 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4781 return EINVAL;
4782 if (dst_devicep == NULL && src_devicep == NULL)
4783 {
4784 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4785 length);
4786 ret = 1;
4787 }
4788 else if (src_devicep == NULL)
4789 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4790 (char *) dst + dst_off,
4791 (const char *) src + src_off,
4792 length);
4793 else if (dst_devicep == NULL)
4794 ret = src_devicep->dev2host_func (src_devicep->target_id,
4795 (char *) dst + dst_off,
4796 (const char *) src + src_off,
4797 length);
4798 else if (src_devicep == dst_devicep)
4799 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4800 (char *) dst + dst_off,
4801 (const char *) src + src_off,
4802 length);
4803 else
4804 ret = 0;
4805 return ret ? 0 : EINVAL;
4806 }
4807
4808 /* FIXME: it would be nice to have some plugin function to handle
4809 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
4810 be handled in the generic recursion below, and for host-host it
4811 should be used even for any num_dims >= 2. */
4812
4813 for (i = 1; i < num_dims; i++)
4814 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4815 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4816 return EINVAL;
4817 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4818 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4819 return EINVAL;
4820 for (j = 0; j < volume[0]; j++)
4821 {
4822 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4823 (const char *) src + src_off,
4824 element_size, num_dims - 1,
4825 volume + 1, dst_offsets + 1,
4826 src_offsets + 1, dst_dimensions + 1,
4827 src_dimensions + 1, dst_devicep,
4828 src_devicep);
4829 if (ret)
4830 return ret;
4831 dst_off += dst_slice;
4832 src_off += src_slice;
4833 }
4834 return 0;
4835 }
4836
4837 static int
4838 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4839 int src_device_num,
4840 struct gomp_device_descr **dst_devicep,
4841 struct gomp_device_descr **src_devicep)
4842 {
4843 if (!dst && !src)
4844 return INT_MAX;
4845
4846 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4847 dst_devicep, src_devicep);
4848 if (ret)
4849 return ret;
4850
4851 if (*src_devicep != NULL && *dst_devicep != NULL && *src_devicep != *dst_devicep)
4852 return EINVAL;
4853
4854 return 0;
4855 }
4856
4857 static int
4858 omp_target_memcpy_rect_copy (void *dst, const void *src,
4859 size_t element_size, int num_dims,
4860 const size_t *volume, const size_t *dst_offsets,
4861 const size_t *src_offsets,
4862 const size_t *dst_dimensions,
4863 const size_t *src_dimensions,
4864 struct gomp_device_descr *dst_devicep,
4865 struct gomp_device_descr *src_devicep)
4866 {
4867 if (src_devicep)
4868 gomp_mutex_lock (&src_devicep->lock);
4869 else if (dst_devicep)
4870 gomp_mutex_lock (&dst_devicep->lock);
4871 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4872 volume, dst_offsets, src_offsets,
4873 dst_dimensions, src_dimensions,
4874 dst_devicep, src_devicep);
4875 if (src_devicep)
4876 gomp_mutex_unlock (&src_devicep->lock);
4877 else if (dst_devicep)
4878 gomp_mutex_unlock (&dst_devicep->lock);
4879
4880 return ret;
4881 }
4882
4883 int
4884 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4885 int num_dims, const size_t *volume,
4886 const size_t *dst_offsets,
4887 const size_t *src_offsets,
4888 const size_t *dst_dimensions,
4889 const size_t *src_dimensions,
4890 int dst_device_num, int src_device_num)
4891 {
4892 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4893
4894 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4895 src_device_num, &dst_devicep,
4896 &src_devicep);
4897
4898 if (check)
4899 return check;
4900
4901 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4902 volume, dst_offsets, src_offsets,
4903 dst_dimensions, src_dimensions,
4904 dst_devicep, src_devicep);
4905
4906 return ret;
4907 }
4908
4909 typedef struct
4910 {
4911 void *dst;
4912 const void *src;
4913 size_t element_size;
4914 const size_t *volume;
4915 const size_t *dst_offsets;
4916 const size_t *src_offsets;
4917 const size_t *dst_dimensions;
4918 const size_t *src_dimensions;
4919 struct gomp_device_descr *dst_devicep;
4920 struct gomp_device_descr *src_devicep;
4921 int num_dims;
4922 } omp_target_memcpy_rect_data;
4923
4924 static void
4925 omp_target_memcpy_rect_async_helper (void *args)
4926 {
4927 omp_target_memcpy_rect_data *a = args;
4928 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4929 a->num_dims, a->volume, a->dst_offsets,
4930 a->src_offsets, a->dst_dimensions,
4931 a->src_dimensions, a->dst_devicep,
4932 a->src_devicep);
4933 if (ret)
4934 gomp_fatal ("omp_target_memcpy_rect failed");
4935 }
4936
4937 int
4938 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4939 int num_dims, const size_t *volume,
4940 const size_t *dst_offsets,
4941 const size_t *src_offsets,
4942 const size_t *dst_dimensions,
4943 const size_t *src_dimensions,
4944 int dst_device_num, int src_device_num,
4945 int depobj_count, omp_depend_t *depobj_list)
4946 {
4947 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4948 unsigned flags = 0;
4949 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4950 src_device_num, &dst_devicep,
4951 &src_devicep);
4952 void *depend[depobj_count + 5];
4953 int i;
4954
4955 omp_target_memcpy_rect_data s = {
4956 .dst = dst,
4957 .src = src,
4958 .element_size = element_size,
4959 .num_dims = num_dims,
4960 .volume = volume,
4961 .dst_offsets = dst_offsets,
4962 .src_offsets = src_offsets,
4963 .dst_dimensions = dst_dimensions,
4964 .src_dimensions = src_dimensions,
4965 .dst_devicep = dst_devicep,
4966 .src_devicep = src_devicep
4967 };
4968
4969 if (check)
4970 return check;
4971
4972 if (depobj_count > 0 && depobj_list != NULL)
4973 {
4974 flags |= GOMP_TASK_FLAG_DEPEND;
4975 depend[0] = 0;
4976 depend[1] = (void *) (uintptr_t) depobj_count;
4977 depend[2] = depend[3] = depend[4] = 0;
4978 for (i = 0; i < depobj_count; ++i)
4979 depend[i + 5] = &depobj_list[i];
4980 }
4981
4982 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4983 __alignof__ (s), true, flags, depend, 0, NULL);
4984
4985 return 0;
4986 }
4987
4988 int
4989 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4990 size_t size, size_t device_offset, int device_num)
4991 {
4992 if (device_num == omp_initial_device
4993 || device_num == gomp_get_num_devices ())
4994 return EINVAL;
4995
4996 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4997 if (devicep == NULL)
4998 return EINVAL;
4999
5000 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5001 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5002 return EINVAL;
5003
5004 gomp_mutex_lock (&devicep->lock);
5005
5006 struct splay_tree_s *mem_map = &devicep->mem_map;
5007 struct splay_tree_key_s cur_node;
5008 int ret = EINVAL;
5009
5010 cur_node.host_start = (uintptr_t) host_ptr;
5011 cur_node.host_end = cur_node.host_start + size;
5012 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
5013 if (n)
5014 {
5015 if (n->tgt->tgt_start + n->tgt_offset
5016 == (uintptr_t) device_ptr + device_offset
5017 && n->host_start <= cur_node.host_start
5018 && n->host_end >= cur_node.host_end)
5019 ret = 0;
5020 }
5021 else
5022 {
5023 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
5024 tgt->array = gomp_malloc (sizeof (*tgt->array));
5025 tgt->refcount = 1;
5026 tgt->tgt_start = 0;
5027 tgt->tgt_end = 0;
5028 tgt->to_free = NULL;
5029 tgt->prev = NULL;
5030 tgt->list_count = 0;
5031 tgt->device_descr = devicep;
5032 splay_tree_node array = tgt->array;
5033 splay_tree_key k = &array->key;
5034 k->host_start = cur_node.host_start;
5035 k->host_end = cur_node.host_end;
5036 k->tgt = tgt;
5037 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
5038 k->refcount = REFCOUNT_INFINITY;
5039 k->dynamic_refcount = 0;
5040 k->aux = NULL;
5041 array->left = NULL;
5042 array->right = NULL;
5043 splay_tree_insert (&devicep->mem_map, array);
5044 ret = 0;
5045 }
5046 gomp_mutex_unlock (&devicep->lock);
5047 return ret;
5048 }
5049
5050 int
5051 omp_target_disassociate_ptr (const void *ptr, int device_num)
5052 {
5053 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5054 if (devicep == NULL)
5055 return EINVAL;
5056
5057 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5058 return EINVAL;
5059
5060 gomp_mutex_lock (&devicep->lock);
5061
5062 struct splay_tree_s *mem_map = &devicep->mem_map;
5063 struct splay_tree_key_s cur_node;
5064 int ret = EINVAL;
5065
5066 cur_node.host_start = (uintptr_t) ptr;
5067 cur_node.host_end = cur_node.host_start;
5068 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
5069 if (n
5070 && n->host_start == cur_node.host_start
5071 && n->refcount == REFCOUNT_INFINITY
5072 && n->tgt->tgt_start == 0
5073 && n->tgt->to_free == NULL
5074 && n->tgt->refcount == 1
5075 && n->tgt->list_count == 0)
5076 {
5077 splay_tree_remove (&devicep->mem_map, n);
5078 gomp_unmap_tgt (n->tgt);
5079 ret = 0;
5080 }
5081
5082 gomp_mutex_unlock (&devicep->lock);
5083 return ret;
5084 }
5085
5086 void *
5087 omp_get_mapped_ptr (const void *ptr, int device_num)
5088 {
5089 if (device_num == omp_initial_device
5090 || device_num == omp_get_initial_device ())
5091 return (void *) ptr;
5092
5093 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5094 if (devicep == NULL)
5095 return NULL;
5096
5097 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5098 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5099 return (void *) ptr;
5100
5101 gomp_mutex_lock (&devicep->lock);
5102
5103 struct splay_tree_s *mem_map = &devicep->mem_map;
5104 struct splay_tree_key_s cur_node;
5105 void *ret = NULL;
5106
5107 cur_node.host_start = (uintptr_t) ptr;
5108 cur_node.host_end = cur_node.host_start;
5109 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
5110
5111 if (n)
5112 {
5113 uintptr_t offset = cur_node.host_start - n->host_start;
5114 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
5115 }
5116
5117 gomp_mutex_unlock (&devicep->lock);
5118
5119 return ret;
5120 }
5121
5122 int
5123 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
5124 {
5125 if (device_num == omp_initial_device
5126 || device_num == gomp_get_num_devices ())
5127 return true;
5128
5129 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5130 if (devicep == NULL)
5131 return false;
5132
5133 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5134 return true;
5135
5136 if (devicep->is_usm_ptr_func && devicep->is_usm_ptr_func ((void *) ptr))
5137 return true;
5138
5139 return false;
5140 }
5141
5142 int
5143 omp_pause_resource (omp_pause_resource_t kind, int device_num)
5144 {
5145 (void) kind;
5146 if (device_num == omp_initial_device
5147 || device_num == gomp_get_num_devices ())
5148 return gomp_pause_host ();
5149
5150 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5151 if (devicep == NULL)
5152 return -1;
5153
5154 /* Do nothing for target devices for now. */
5155 return 0;
5156 }
5157
5158 int
5159 omp_pause_resource_all (omp_pause_resource_t kind)
5160 {
5161 (void) kind;
5162 if (gomp_pause_host ())
5163 return -1;
5164 /* Do nothing for target devices for now. */
5165 return 0;
5166 }
5167
5168 ialias (omp_pause_resource)
5169 ialias (omp_pause_resource_all)
5170
5171 bool
5172 GOMP_evaluate_target_device (int device_num, const char *kind,
5173 const char *arch, const char *isa)
5174 {
5175 bool result = true;
5176
5177 if (device_num < 0)
5178 device_num = omp_get_default_device ();
5179
5180 if (kind && strcmp (kind, "any") == 0)
5181 kind = NULL;
5182
5183 gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s",
5184 __FUNCTION__, device_num, kind, arch, isa);
5185
5186 if (omp_get_device_num () == device_num)
5187 result = GOMP_evaluate_current_device (kind, arch, isa);
5188 else
5189 {
5190 if (!omp_is_initial_device ())
5191 /* Accelerators are not expected to know about other devices. */
5192 result = false;
5193 else
5194 {
5195 struct gomp_device_descr *device = resolve_device (device_num, true);
5196 if (device == NULL)
5197 result = false;
5198 else if (device->evaluate_device_func)
5199 result = device->evaluate_device_func (device_num, kind, arch,
5200 isa);
5201 }
5202 }
5203
5204 gomp_debug (1, " -> %s\n", result ? "true" : "false");
5205 return result;
5206 }
5207
5208 #ifdef PLUGIN_SUPPORT
5209
5210 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5211 in PLUGIN_NAME.
5212 The handles of the found functions are stored in the corresponding fields
5213 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5214
5215 static bool
5216 gomp_load_plugin_for_device (struct gomp_device_descr *device,
5217 const char *plugin_name)
5218 {
5219 const char *err = NULL, *last_missing = NULL;
5220
5221 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
5222 if (!plugin_handle)
5223 #if OFFLOAD_DEFAULTED
5224 return 0;
5225 #else
5226 goto dl_fail;
5227 #endif
5228
5229 /* Check if all required functions are available in the plugin and store
5230 their handlers. None of the symbols can legitimately be NULL,
5231 so we don't need to check dlerror all the time. */
5232 #define DLSYM(f) \
5233 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5234 goto dl_fail
5235 /* Similar, but missing functions are not an error. Return false if
5236 failed, true otherwise. */
5237 #define DLSYM_OPT(f, n) \
5238 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5239 || (last_missing = #n, 0))
5240
5241 DLSYM (version);
5242 if (device->version_func () != GOMP_VERSION)
5243 {
5244 err = "plugin version mismatch";
5245 goto fail;
5246 }
5247
5248 DLSYM (get_name);
5249 DLSYM (get_caps);
5250 DLSYM (get_type);
5251 DLSYM (get_num_devices);
5252 DLSYM (init_device);
5253 DLSYM (fini_device);
5254 DLSYM (load_image);
5255 DLSYM (unload_image);
5256 DLSYM (alloc);
5257 DLSYM (free);
5258 DLSYM_OPT (usm_alloc, usm_alloc);
5259 DLSYM_OPT (usm_free, usm_free);
5260 DLSYM_OPT (is_usm_ptr, is_usm_ptr);
5261 DLSYM (dev2host);
5262 DLSYM (host2dev);
5263 DLSYM (evaluate_device);
5264 device->capabilities = device->get_caps_func ();
5265 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5266 {
5267 DLSYM (run);
5268 DLSYM_OPT (async_run, async_run);
5269 DLSYM_OPT (can_run, can_run);
5270 DLSYM (dev2dev);
5271 }
5272 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5273 {
5274 if (!DLSYM_OPT (openacc.exec, openacc_exec)
5275 || !DLSYM_OPT (openacc.create_thread_data,
5276 openacc_create_thread_data)
5277 || !DLSYM_OPT (openacc.destroy_thread_data,
5278 openacc_destroy_thread_data)
5279 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
5280 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
5281 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
5282 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
5283 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
5284 || !DLSYM_OPT (openacc.async.queue_callback,
5285 openacc_async_queue_callback)
5286 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
5287 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
5288 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
5289 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
5290 {
5291 /* Require all the OpenACC handlers if we have
5292 GOMP_OFFLOAD_CAP_OPENACC_200. */
5293 err = "plugin missing OpenACC handler function";
5294 goto fail;
5295 }
5296
5297 unsigned cuda = 0;
5298 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
5299 openacc_cuda_get_current_device);
5300 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
5301 openacc_cuda_get_current_context);
5302 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
5303 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
5304 if (cuda && cuda != 4)
5305 {
5306 /* Make sure all the CUDA functions are there if any of them are. */
5307 err = "plugin missing OpenACC CUDA handler function";
5308 goto fail;
5309 }
5310 }
5311 #undef DLSYM
5312 #undef DLSYM_OPT
5313
5314 return 1;
5315
5316 dl_fail:
5317 err = dlerror ();
5318 fail:
5319 gomp_error ("while loading %s: %s", plugin_name, err);
5320 if (last_missing)
5321 gomp_error ("missing function was %s", last_missing);
5322 if (plugin_handle)
5323 dlclose (plugin_handle);
5324
5325 return 0;
5326 }
5327
5328 /* This function finalizes all initialized devices. */
5329
5330 static void
5331 gomp_target_fini (void)
5332 {
5333 int i;
5334 for (i = 0; i < num_devices; i++)
5335 {
5336 bool ret = true;
5337 struct gomp_device_descr *devicep = &devices[i];
5338 gomp_mutex_lock (&devicep->lock);
5339 if (devicep->state == GOMP_DEVICE_INITIALIZED)
5340 ret = gomp_fini_device (devicep);
5341 gomp_mutex_unlock (&devicep->lock);
5342 if (!ret)
5343 gomp_fatal ("device finalization failed");
5344 }
5345 }
5346
5347 /* This function initializes the runtime for offloading.
5348 It parses the list of offload plugins, and tries to load these.
5349 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5350 will be set, and the array DEVICES initialized, containing descriptors for
5351 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5352 by the others. */
5353
5354 static void
5355 gomp_target_init (void)
5356 {
5357 const char *prefix ="libgomp-plugin-";
5358 const char *suffix = SONAME_SUFFIX (1);
5359 const char *cur, *next;
5360 char *plugin_name;
5361 int i, new_num_devs;
5362 int num_devs = 0, num_devs_openmp;
5363 struct gomp_device_descr *devs = NULL;
5364
5365 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5366 return;
5367
5368 cur = OFFLOAD_PLUGINS;
5369 if (*cur)
5370 do
5371 {
5372 struct gomp_device_descr current_device;
5373 size_t prefix_len, suffix_len, cur_len;
5374
5375 next = strchr (cur, ',');
5376
5377 prefix_len = strlen (prefix);
5378 cur_len = next ? next - cur : strlen (cur);
5379 suffix_len = strlen (suffix);
5380
5381 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5382 if (!plugin_name)
5383 {
5384 num_devs = 0;
5385 break;
5386 }
5387
5388 memcpy (plugin_name, prefix, prefix_len);
5389 memcpy (plugin_name + prefix_len, cur, cur_len);
5390 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5391
5392 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5393 {
5394 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5395 new_num_devs = current_device.get_num_devices_func (omp_req);
5396 if (gomp_debug_var > 0 && new_num_devs < 0)
5397 {
5398 bool found = false;
5399 int type = current_device.get_type_func ();
5400 for (int img = 0; img < num_offload_images; img++)
5401 if (type == offload_images[img].type)
5402 found = true;
5403 if (found)
5404 {
5405 char buf[sizeof ("unified_address, unified_shared_memory, "
5406 "reverse_offload")];
5407 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5408 char *name = (char *) malloc (cur_len + 1);
5409 memcpy (name, cur, cur_len);
5410 name[cur_len] = '\0';
5411 gomp_debug (1,
5412 "%s devices present but 'omp requires %s' "
5413 "cannot be fulfilled\n", name, buf);
5414 free (name);
5415 }
5416 }
5417 else if (new_num_devs >= 1)
5418 {
5419 /* Augment DEVICES and NUM_DEVICES. */
5420
5421 devs = realloc (devs, (num_devs + new_num_devs)
5422 * sizeof (struct gomp_device_descr));
5423 if (!devs)
5424 {
5425 num_devs = 0;
5426 free (plugin_name);
5427 break;
5428 }
5429
5430 current_device.name = current_device.get_name_func ();
5431 /* current_device.capabilities has already been set. */
5432 current_device.type = current_device.get_type_func ();
5433 current_device.mem_map.root = NULL;
5434 current_device.mem_map_rev.root = NULL;
5435 current_device.state = GOMP_DEVICE_UNINITIALIZED;
5436 for (i = 0; i < new_num_devs; i++)
5437 {
5438 current_device.target_id = i;
5439 devs[num_devs] = current_device;
5440 gomp_mutex_init (&devs[num_devs].lock);
5441 num_devs++;
5442 }
5443 }
5444 }
5445
5446 free (plugin_name);
5447 cur = next + 1;
5448 }
5449 while (next);
5450
5451 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5452 NUM_DEVICES_OPENMP. */
5453 struct gomp_device_descr *devs_s
5454 = malloc (num_devs * sizeof (struct gomp_device_descr));
5455 if (!devs_s)
5456 {
5457 num_devs = 0;
5458 free (devs);
5459 devs = NULL;
5460 }
5461 num_devs_openmp = 0;
5462 for (i = 0; i < num_devs; i++)
5463 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5464 devs_s[num_devs_openmp++] = devs[i];
5465 int num_devs_after_openmp = num_devs_openmp;
5466 for (i = 0; i < num_devs; i++)
5467 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5468 devs_s[num_devs_after_openmp++] = devs[i];
5469 free (devs);
5470 devs = devs_s;
5471
5472 for (i = 0; i < num_devs; i++)
5473 {
5474 /* The 'devices' array can be moved (by the realloc call) until we have
5475 found all the plugins, so registering with the OpenACC runtime (which
5476 takes a copy of the pointer argument) must be delayed until now. */
5477 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5478 goacc_register (&devs[i]);
5479 }
5480
5481 num_devices = num_devs;
5482 num_devices_openmp = num_devs_openmp;
5483 devices = devs;
5484 if (atexit (gomp_target_fini) != 0)
5485 gomp_fatal ("atexit failed");
5486 }
5487
5488 #else /* PLUGIN_SUPPORT */
5489 /* If dlfcn.h is unavailable we always fallback to host execution.
5490 GOMP_target* routines are just stubs for this case. */
5491 static void
5492 gomp_target_init (void)
5493 {
5494 }
5495 #endif /* PLUGIN_SUPPORT */
This page took 0.284843 seconds and 4 git commands to generate.