]> gcc.gnu.org Git - gcc.git/blob - libgomp/target.c
OpenMP, libgomp: Add new runtime routine omp_target_is_accessible.
[gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
42
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47
48 typedef uintptr_t *hash_entry_type;
49 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
50 static inline void htab_free (void *ptr) { free (ptr); }
51 #include "hashtab.h"
52
53 static inline hashval_t
54 htab_hash (hash_entry_type element)
55 {
56 return hash_pointer ((void *) element);
57 }
58
59 static inline bool
60 htab_eq (hash_entry_type x, hash_entry_type y)
61 {
62 return x == y;
63 }
64
65 #define FIELD_TGT_EMPTY (~(size_t) 0)
66
67 static void gomp_target_init (void);
68
69 /* The whole initialization code for offloading plugins is only run one. */
70 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
71
72 /* Mutex for offload image registration. */
73 static gomp_mutex_t register_lock;
74
75 /* This structure describes an offload image.
76 It contains type of the target device, pointer to host table descriptor, and
77 pointer to target data. */
78 struct offload_image_descr {
79 unsigned version;
80 enum offload_target_type type;
81 const void *host_table;
82 const void *target_data;
83 };
84
85 /* Array of descriptors of offload images. */
86 static struct offload_image_descr *offload_images;
87
88 /* Total number of offload images. */
89 static int num_offload_images;
90
91 /* Array of descriptors for all available devices. */
92 static struct gomp_device_descr *devices;
93
94 /* Total number of available devices. */
95 static int num_devices;
96
97 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
98 static int num_devices_openmp;
99
100 /* Mask of requires directive clause values, summarized from .gnu.gomp.requires
101 section. Offload plugins are queried with this mask to see if all required
102 features are supported. */
103 static unsigned int gomp_requires_mask;
104
105 /* Start/end of .gnu.gomp.requires section of program, defined in
106 crtoffloadbegin/end.o. */
107 __attribute__((weak))
108 extern const unsigned int __requires_mask_table[];
109 __attribute__((weak))
110 extern const unsigned int __requires_mask_table_end[];
111
112 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
113
114 static void *
115 gomp_realloc_unlock (void *old, size_t size)
116 {
117 void *ret = realloc (old, size);
118 if (ret == NULL)
119 {
120 gomp_mutex_unlock (&register_lock);
121 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
122 }
123 return ret;
124 }
125
126 attribute_hidden void
127 gomp_init_targets_once (void)
128 {
129 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
130 }
131
132 attribute_hidden int
133 gomp_get_num_devices (void)
134 {
135 gomp_init_targets_once ();
136 return num_devices_openmp;
137 }
138
139 static struct gomp_device_descr *
140 resolve_device (int device_id)
141 {
142 if (device_id == GOMP_DEVICE_ICV)
143 {
144 struct gomp_task_icv *icv = gomp_icv (false);
145 device_id = icv->default_device_var;
146 }
147
148 if (device_id < 0 || device_id >= gomp_get_num_devices ())
149 {
150 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
151 && device_id != GOMP_DEVICE_HOST_FALLBACK
152 && device_id != num_devices_openmp)
153 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
154 "but device not found");
155
156 return NULL;
157 }
158
159 gomp_mutex_lock (&devices[device_id].lock);
160 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
161 gomp_init_device (&devices[device_id]);
162 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
163 {
164 gomp_mutex_unlock (&devices[device_id].lock);
165
166 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
167 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
168 "but device is finalized");
169
170 return NULL;
171 }
172 gomp_mutex_unlock (&devices[device_id].lock);
173
174 return &devices[device_id];
175 }
176
177
178 static inline splay_tree_key
179 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
180 {
181 if (key->host_start != key->host_end)
182 return splay_tree_lookup (mem_map, key);
183
184 key->host_end++;
185 splay_tree_key n = splay_tree_lookup (mem_map, key);
186 key->host_end--;
187 if (n)
188 return n;
189 key->host_start--;
190 n = splay_tree_lookup (mem_map, key);
191 key->host_start++;
192 if (n)
193 return n;
194 return splay_tree_lookup (mem_map, key);
195 }
196
197 static inline splay_tree_key
198 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
199 {
200 if (key->host_start != key->host_end)
201 return splay_tree_lookup (mem_map, key);
202
203 key->host_end++;
204 splay_tree_key n = splay_tree_lookup (mem_map, key);
205 key->host_end--;
206 return n;
207 }
208
209 static inline void
210 gomp_device_copy (struct gomp_device_descr *devicep,
211 bool (*copy_func) (int, void *, const void *, size_t),
212 const char *dst, void *dstaddr,
213 const char *src, const void *srcaddr,
214 size_t size)
215 {
216 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
217 {
218 gomp_mutex_unlock (&devicep->lock);
219 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
220 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
221 }
222 }
223
224 static inline void
225 goacc_device_copy_async (struct gomp_device_descr *devicep,
226 bool (*copy_func) (int, void *, const void *, size_t,
227 struct goacc_asyncqueue *),
228 const char *dst, void *dstaddr,
229 const char *src, const void *srcaddr,
230 const void *srcaddr_orig,
231 size_t size, struct goacc_asyncqueue *aq)
232 {
233 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
234 {
235 gomp_mutex_unlock (&devicep->lock);
236 if (srcaddr_orig && srcaddr_orig != srcaddr)
237 gomp_fatal ("Copying of %s object [%p..%p)"
238 " via buffer %s object [%p..%p)"
239 " to %s object [%p..%p) failed",
240 src, srcaddr_orig, srcaddr_orig + size,
241 src, srcaddr, srcaddr + size,
242 dst, dstaddr, dstaddr + size);
243 else
244 gomp_fatal ("Copying of %s object [%p..%p)"
245 " to %s object [%p..%p) failed",
246 src, srcaddr, srcaddr + size,
247 dst, dstaddr, dstaddr + size);
248 }
249 }
250
251 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
252 host to device memory transfers. */
253
254 struct gomp_coalesce_chunk
255 {
256 /* The starting and ending point of a coalesced chunk of memory. */
257 size_t start, end;
258 };
259
260 struct gomp_coalesce_buf
261 {
262 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
263 it will be copied to the device. */
264 void *buf;
265 struct target_mem_desc *tgt;
266 /* Array with offsets, chunks[i].start is the starting offset and
267 chunks[i].end ending offset relative to tgt->tgt_start device address
268 of chunks which are to be copied to buf and later copied to device. */
269 struct gomp_coalesce_chunk *chunks;
270 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
271 be performed. */
272 long chunk_cnt;
273 /* During construction of chunks array, how many memory regions are within
274 the last chunk. If there is just one memory region for a chunk, we copy
275 it directly to device rather than going through buf. */
276 long use_cnt;
277 };
278
279 /* Maximum size of memory region considered for coalescing. Larger copies
280 are performed directly. */
281 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
282
283 /* Maximum size of a gap in between regions to consider them being copied
284 within the same chunk. All the device offsets considered are within
285 newly allocated device memory, so it isn't fatal if we copy some padding
286 in between from host to device. The gaps come either from alignment
287 padding or from memory regions which are not supposed to be copied from
288 host to device (e.g. map(alloc:), map(from:) etc.). */
289 #define MAX_COALESCE_BUF_GAP (4 * 1024)
290
291 /* Add region with device tgt_start relative offset and length to CBUF.
292
293 This must not be used for asynchronous copies, because the host data might
294 not be computed yet (by an earlier asynchronous compute region, for
295 example).
296 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
297 is it more performant to use libgomp CBUF buffering or individual device
298 asyncronous copying?) */
299
300 static inline void
301 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
302 {
303 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
304 return;
305 if (cbuf->chunk_cnt)
306 {
307 if (cbuf->chunk_cnt < 0)
308 return;
309 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
310 {
311 cbuf->chunk_cnt = -1;
312 return;
313 }
314 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
315 {
316 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
317 cbuf->use_cnt++;
318 return;
319 }
320 /* If the last chunk is only used by one mapping, discard it,
321 as it will be one host to device copy anyway and
322 memcpying it around will only waste cycles. */
323 if (cbuf->use_cnt == 1)
324 cbuf->chunk_cnt--;
325 }
326 cbuf->chunks[cbuf->chunk_cnt].start = start;
327 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
328 cbuf->chunk_cnt++;
329 cbuf->use_cnt = 1;
330 }
331
332 /* Return true for mapping kinds which need to copy data from the
333 host to device for regions that weren't previously mapped. */
334
335 static inline bool
336 gomp_to_device_kind_p (int kind)
337 {
338 switch (kind)
339 {
340 case GOMP_MAP_ALLOC:
341 case GOMP_MAP_FROM:
342 case GOMP_MAP_FORCE_ALLOC:
343 case GOMP_MAP_FORCE_FROM:
344 case GOMP_MAP_ALWAYS_FROM:
345 return false;
346 default:
347 return true;
348 }
349 }
350
351 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
352 non-NULL), when the source data is stack or may otherwise be deallocated
353 before the asynchronous copy takes place, EPHEMERAL must be passed as
354 TRUE. */
355
356 attribute_hidden void
357 gomp_copy_host2dev (struct gomp_device_descr *devicep,
358 struct goacc_asyncqueue *aq,
359 void *d, const void *h, size_t sz,
360 bool ephemeral, struct gomp_coalesce_buf *cbuf)
361 {
362 if (__builtin_expect (aq != NULL, 0))
363 {
364 /* See 'gomp_coalesce_buf_add'. */
365 assert (!cbuf);
366
367 void *h_buf = (void *) h;
368 if (ephemeral)
369 {
370 /* We're queueing up an asynchronous copy from data that may
371 disappear before the transfer takes place (i.e. because it is a
372 stack local in a function that is no longer executing). Make a
373 copy of the data into a temporary buffer in those cases. */
374 h_buf = gomp_malloc (sz);
375 memcpy (h_buf, h, sz);
376 }
377 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
378 "dev", d, "host", h_buf, h, sz, aq);
379 if (ephemeral)
380 /* Free temporary buffer once the transfer has completed. */
381 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
382
383 return;
384 }
385
386 if (cbuf)
387 {
388 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
389 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
390 {
391 long first = 0;
392 long last = cbuf->chunk_cnt - 1;
393 while (first <= last)
394 {
395 long middle = (first + last) >> 1;
396 if (cbuf->chunks[middle].end <= doff)
397 first = middle + 1;
398 else if (cbuf->chunks[middle].start <= doff)
399 {
400 if (doff + sz > cbuf->chunks[middle].end)
401 {
402 gomp_mutex_unlock (&devicep->lock);
403 gomp_fatal ("internal libgomp cbuf error");
404 }
405 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
406 h, sz);
407 return;
408 }
409 else
410 last = middle - 1;
411 }
412 }
413 }
414
415 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
416 }
417
418 attribute_hidden void
419 gomp_copy_dev2host (struct gomp_device_descr *devicep,
420 struct goacc_asyncqueue *aq,
421 void *h, const void *d, size_t sz)
422 {
423 if (__builtin_expect (aq != NULL, 0))
424 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
425 "host", h, "dev", d, NULL, sz, aq);
426 else
427 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
428 }
429
430 static void
431 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
432 {
433 if (!devicep->free_func (devicep->target_id, devptr))
434 {
435 gomp_mutex_unlock (&devicep->lock);
436 gomp_fatal ("error in freeing device memory block at %p", devptr);
437 }
438 }
439
440 /* Increment reference count of a splay_tree_key region K by 1.
441 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
442 increment the value if refcount is not yet contained in the set (used for
443 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
444 once for each construct). */
445
446 static inline void
447 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
448 {
449 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
450 return;
451
452 uintptr_t *refcount_ptr = &k->refcount;
453
454 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
455 refcount_ptr = &k->structelem_refcount;
456 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
457 refcount_ptr = k->structelem_refcount_ptr;
458
459 if (refcount_set)
460 {
461 if (htab_find (*refcount_set, refcount_ptr))
462 return;
463 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
464 *slot = refcount_ptr;
465 }
466
467 *refcount_ptr += 1;
468 return;
469 }
470
471 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
472 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
473 track already seen refcounts, and only adjust the value if refcount is not
474 yet contained in the set (like gomp_increment_refcount).
475
476 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
477 it is already zero and we know we decremented it earlier. This signals that
478 associated maps should be copied back to host.
479
480 *DO_REMOVE is set to true when we this is the first handling of this refcount
481 and we are setting it to zero. This signals a removal of this key from the
482 splay-tree map.
483
484 Copy and removal are separated due to cases like handling of structure
485 elements, e.g. each map of a structure element representing a possible copy
486 out of a structure field has to be handled individually, but we only signal
487 removal for one (the first encountered) sibing map. */
488
489 static inline void
490 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
491 bool *do_copy, bool *do_remove)
492 {
493 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
494 {
495 *do_copy = *do_remove = false;
496 return;
497 }
498
499 uintptr_t *refcount_ptr = &k->refcount;
500
501 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
502 refcount_ptr = &k->structelem_refcount;
503 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
504 refcount_ptr = k->structelem_refcount_ptr;
505
506 bool new_encountered_refcount;
507 bool set_to_zero = false;
508 bool is_zero = false;
509
510 uintptr_t orig_refcount = *refcount_ptr;
511
512 if (refcount_set)
513 {
514 if (htab_find (*refcount_set, refcount_ptr))
515 {
516 new_encountered_refcount = false;
517 goto end;
518 }
519
520 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
521 *slot = refcount_ptr;
522 new_encountered_refcount = true;
523 }
524 else
525 /* If no refcount_set being used, assume all keys are being decremented
526 for the first time. */
527 new_encountered_refcount = true;
528
529 if (delete_p)
530 *refcount_ptr = 0;
531 else if (*refcount_ptr > 0)
532 *refcount_ptr -= 1;
533
534 end:
535 if (*refcount_ptr == 0)
536 {
537 if (orig_refcount > 0)
538 set_to_zero = true;
539
540 is_zero = true;
541 }
542
543 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
544 *do_remove = (new_encountered_refcount && set_to_zero);
545 }
546
547 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
548 gomp_map_0len_lookup found oldn for newn.
549 Helper function of gomp_map_vars. */
550
551 static inline void
552 gomp_map_vars_existing (struct gomp_device_descr *devicep,
553 struct goacc_asyncqueue *aq, splay_tree_key oldn,
554 splay_tree_key newn, struct target_var_desc *tgt_var,
555 unsigned char kind, bool always_to_flag, bool implicit,
556 struct gomp_coalesce_buf *cbuf,
557 htab_t *refcount_set)
558 {
559 assert (kind != GOMP_MAP_ATTACH
560 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
561
562 tgt_var->key = oldn;
563 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
564 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
565 tgt_var->is_attach = false;
566 tgt_var->offset = newn->host_start - oldn->host_start;
567
568 /* For implicit maps, old contained in new is valid. */
569 bool implicit_subset = (implicit
570 && newn->host_start <= oldn->host_start
571 && oldn->host_end <= newn->host_end);
572 if (implicit_subset)
573 tgt_var->length = oldn->host_end - oldn->host_start;
574 else
575 tgt_var->length = newn->host_end - newn->host_start;
576
577 if ((kind & GOMP_MAP_FLAG_FORCE)
578 /* For implicit maps, old contained in new is valid. */
579 || !(implicit_subset
580 /* Otherwise, new contained inside old is considered valid. */
581 || (oldn->host_start <= newn->host_start
582 && newn->host_end <= oldn->host_end)))
583 {
584 gomp_mutex_unlock (&devicep->lock);
585 gomp_fatal ("Trying to map into device [%p..%p) object when "
586 "[%p..%p) is already mapped",
587 (void *) newn->host_start, (void *) newn->host_end,
588 (void *) oldn->host_start, (void *) oldn->host_end);
589 }
590
591 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
592 {
593 /* Implicit + always should not happen. If this does occur, below
594 address/length adjustment is a TODO. */
595 assert (!implicit_subset);
596
597 if (oldn->aux && oldn->aux->attach_count)
598 {
599 /* We have to be careful not to overwrite still attached pointers
600 during the copyback to host. */
601 uintptr_t addr = newn->host_start;
602 while (addr < newn->host_end)
603 {
604 size_t i = (addr - oldn->host_start) / sizeof (void *);
605 if (oldn->aux->attach_count[i] == 0)
606 gomp_copy_host2dev (devicep, aq,
607 (void *) (oldn->tgt->tgt_start
608 + oldn->tgt_offset
609 + addr - oldn->host_start),
610 (void *) addr,
611 sizeof (void *), false, cbuf);
612 addr += sizeof (void *);
613 }
614 }
615 else
616 gomp_copy_host2dev (devicep, aq,
617 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
618 + newn->host_start - oldn->host_start),
619 (void *) newn->host_start,
620 newn->host_end - newn->host_start, false, cbuf);
621 }
622
623 gomp_increment_refcount (oldn, refcount_set);
624 }
625
626 static int
627 get_kind (bool short_mapkind, void *kinds, int idx)
628 {
629 if (!short_mapkind)
630 return ((unsigned char *) kinds)[idx];
631
632 int val = ((unsigned short *) kinds)[idx];
633 if (GOMP_MAP_IMPLICIT_P (val))
634 val &= ~GOMP_MAP_IMPLICIT;
635 return val;
636 }
637
638
639 static bool
640 get_implicit (bool short_mapkind, void *kinds, int idx)
641 {
642 if (!short_mapkind)
643 return false;
644
645 int val = ((unsigned short *) kinds)[idx];
646 return GOMP_MAP_IMPLICIT_P (val);
647 }
648
649 static void
650 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
651 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
652 struct gomp_coalesce_buf *cbuf,
653 bool allow_zero_length_array_sections)
654 {
655 struct gomp_device_descr *devicep = tgt->device_descr;
656 struct splay_tree_s *mem_map = &devicep->mem_map;
657 struct splay_tree_key_s cur_node;
658
659 cur_node.host_start = host_ptr;
660 if (cur_node.host_start == (uintptr_t) NULL)
661 {
662 cur_node.tgt_offset = (uintptr_t) NULL;
663 gomp_copy_host2dev (devicep, aq,
664 (void *) (tgt->tgt_start + target_offset),
665 (void *) &cur_node.tgt_offset, sizeof (void *),
666 true, cbuf);
667 return;
668 }
669 /* Add bias to the pointer value. */
670 cur_node.host_start += bias;
671 cur_node.host_end = cur_node.host_start;
672 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
673 if (n == NULL)
674 {
675 if (allow_zero_length_array_sections)
676 cur_node.tgt_offset = 0;
677 else
678 {
679 gomp_mutex_unlock (&devicep->lock);
680 gomp_fatal ("Pointer target of array section wasn't mapped");
681 }
682 }
683 else
684 {
685 cur_node.host_start -= n->host_start;
686 cur_node.tgt_offset
687 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
688 /* At this point tgt_offset is target address of the
689 array section. Now subtract bias to get what we want
690 to initialize the pointer with. */
691 cur_node.tgt_offset -= bias;
692 }
693 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
694 (void *) &cur_node.tgt_offset, sizeof (void *),
695 true, cbuf);
696 }
697
698 static void
699 gomp_map_fields_existing (struct target_mem_desc *tgt,
700 struct goacc_asyncqueue *aq, splay_tree_key n,
701 size_t first, size_t i, void **hostaddrs,
702 size_t *sizes, void *kinds,
703 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
704 {
705 struct gomp_device_descr *devicep = tgt->device_descr;
706 struct splay_tree_s *mem_map = &devicep->mem_map;
707 struct splay_tree_key_s cur_node;
708 int kind;
709 bool implicit;
710 const bool short_mapkind = true;
711 const int typemask = short_mapkind ? 0xff : 0x7;
712
713 cur_node.host_start = (uintptr_t) hostaddrs[i];
714 cur_node.host_end = cur_node.host_start + sizes[i];
715 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
716 kind = get_kind (short_mapkind, kinds, i);
717 implicit = get_implicit (short_mapkind, kinds, i);
718 if (n2
719 && n2->tgt == n->tgt
720 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
721 {
722 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
723 kind & typemask, false, implicit, cbuf,
724 refcount_set);
725 return;
726 }
727 if (sizes[i] == 0)
728 {
729 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
730 {
731 cur_node.host_start--;
732 n2 = splay_tree_lookup (mem_map, &cur_node);
733 cur_node.host_start++;
734 if (n2
735 && n2->tgt == n->tgt
736 && n2->host_start - n->host_start
737 == n2->tgt_offset - n->tgt_offset)
738 {
739 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
740 kind & typemask, false, implicit, cbuf,
741 refcount_set);
742 return;
743 }
744 }
745 cur_node.host_end++;
746 n2 = splay_tree_lookup (mem_map, &cur_node);
747 cur_node.host_end--;
748 if (n2
749 && n2->tgt == n->tgt
750 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
751 {
752 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
753 kind & typemask, false, implicit, cbuf,
754 refcount_set);
755 return;
756 }
757 }
758 gomp_mutex_unlock (&devicep->lock);
759 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
760 "other mapped elements from the same structure weren't mapped "
761 "together with it", (void *) cur_node.host_start,
762 (void *) cur_node.host_end);
763 }
764
765 attribute_hidden void
766 gomp_attach_pointer (struct gomp_device_descr *devicep,
767 struct goacc_asyncqueue *aq, splay_tree mem_map,
768 splay_tree_key n, uintptr_t attach_to, size_t bias,
769 struct gomp_coalesce_buf *cbufp,
770 bool allow_zero_length_array_sections)
771 {
772 struct splay_tree_key_s s;
773 size_t size, idx;
774
775 if (n == NULL)
776 {
777 gomp_mutex_unlock (&devicep->lock);
778 gomp_fatal ("enclosing struct not mapped for attach");
779 }
780
781 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
782 /* We might have a pointer in a packed struct: however we cannot have more
783 than one such pointer in each pointer-sized portion of the struct, so
784 this is safe. */
785 idx = (attach_to - n->host_start) / sizeof (void *);
786
787 if (!n->aux)
788 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
789
790 if (!n->aux->attach_count)
791 n->aux->attach_count
792 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
793
794 if (n->aux->attach_count[idx] < UINTPTR_MAX)
795 n->aux->attach_count[idx]++;
796 else
797 {
798 gomp_mutex_unlock (&devicep->lock);
799 gomp_fatal ("attach count overflow");
800 }
801
802 if (n->aux->attach_count[idx] == 1)
803 {
804 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
805 - n->host_start;
806 uintptr_t target = (uintptr_t) *(void **) attach_to;
807 splay_tree_key tn;
808 uintptr_t data;
809
810 if ((void *) target == NULL)
811 {
812 gomp_mutex_unlock (&devicep->lock);
813 gomp_fatal ("attempt to attach null pointer");
814 }
815
816 s.host_start = target + bias;
817 s.host_end = s.host_start + 1;
818 tn = splay_tree_lookup (mem_map, &s);
819
820 if (!tn)
821 {
822 if (allow_zero_length_array_sections)
823 /* When allowing attachment to zero-length array sections, we
824 allow attaching to NULL pointers when the target region is not
825 mapped. */
826 data = 0;
827 else
828 {
829 gomp_mutex_unlock (&devicep->lock);
830 gomp_fatal ("pointer target not mapped for attach");
831 }
832 }
833 else
834 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
835
836 gomp_debug (1,
837 "%s: attaching host %p, target %p (struct base %p) to %p\n",
838 __FUNCTION__, (void *) attach_to, (void *) devptr,
839 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
840
841 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
842 sizeof (void *), true, cbufp);
843 }
844 else
845 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
846 (void *) attach_to, (int) n->aux->attach_count[idx]);
847 }
848
849 attribute_hidden void
850 gomp_detach_pointer (struct gomp_device_descr *devicep,
851 struct goacc_asyncqueue *aq, splay_tree_key n,
852 uintptr_t detach_from, bool finalize,
853 struct gomp_coalesce_buf *cbufp)
854 {
855 size_t idx;
856
857 if (n == NULL)
858 {
859 gomp_mutex_unlock (&devicep->lock);
860 gomp_fatal ("enclosing struct not mapped for detach");
861 }
862
863 idx = (detach_from - n->host_start) / sizeof (void *);
864
865 if (!n->aux || !n->aux->attach_count)
866 {
867 gomp_mutex_unlock (&devicep->lock);
868 gomp_fatal ("no attachment counters for struct");
869 }
870
871 if (finalize)
872 n->aux->attach_count[idx] = 1;
873
874 if (n->aux->attach_count[idx] == 0)
875 {
876 gomp_mutex_unlock (&devicep->lock);
877 gomp_fatal ("attach count underflow");
878 }
879 else
880 n->aux->attach_count[idx]--;
881
882 if (n->aux->attach_count[idx] == 0)
883 {
884 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
885 - n->host_start;
886 uintptr_t target = (uintptr_t) *(void **) detach_from;
887
888 gomp_debug (1,
889 "%s: detaching host %p, target %p (struct base %p) to %p\n",
890 __FUNCTION__, (void *) detach_from, (void *) devptr,
891 (void *) (n->tgt->tgt_start + n->tgt_offset),
892 (void *) target);
893
894 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
895 sizeof (void *), true, cbufp);
896 }
897 else
898 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
899 (void *) detach_from, (int) n->aux->attach_count[idx]);
900 }
901
902 attribute_hidden uintptr_t
903 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
904 {
905 if (tgt->list[i].key != NULL)
906 return tgt->list[i].key->tgt->tgt_start
907 + tgt->list[i].key->tgt_offset
908 + tgt->list[i].offset;
909
910 switch (tgt->list[i].offset)
911 {
912 case OFFSET_INLINED:
913 return (uintptr_t) hostaddrs[i];
914
915 case OFFSET_POINTER:
916 return 0;
917
918 case OFFSET_STRUCT:
919 return tgt->list[i + 1].key->tgt->tgt_start
920 + tgt->list[i + 1].key->tgt_offset
921 + tgt->list[i + 1].offset
922 + (uintptr_t) hostaddrs[i]
923 - (uintptr_t) hostaddrs[i + 1];
924
925 default:
926 return tgt->tgt_start + tgt->list[i].offset;
927 }
928 }
929
930 static inline __attribute__((always_inline)) struct target_mem_desc *
931 gomp_map_vars_internal (struct gomp_device_descr *devicep,
932 struct goacc_asyncqueue *aq, size_t mapnum,
933 void **hostaddrs, void **devaddrs, size_t *sizes,
934 void *kinds, struct goacc_ncarray_info *nca_info,
935 bool short_mapkind, htab_t *refcount_set,
936 enum gomp_map_vars_kind pragma_kind)
937 {
938 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
939 size_t nca_data_row_num = (nca_info ? nca_info->num_data_rows : 0);
940 bool has_firstprivate = false;
941 bool has_always_ptrset = false;
942 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
943 const int rshift = short_mapkind ? 8 : 3;
944 const int typemask = short_mapkind ? 0xff : 0x7;
945 struct splay_tree_s *mem_map = &devicep->mem_map;
946 struct splay_tree_key_s cur_node;
947 struct target_mem_desc *tgt
948 = gomp_malloc (sizeof (*tgt)
949 + sizeof (tgt->list[0]) * (mapnum + nca_data_row_num));
950 tgt->list_count = mapnum + nca_data_row_num;
951 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
952 tgt->device_descr = devicep;
953 tgt->prev = NULL;
954 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
955
956 if (mapnum == 0)
957 {
958 tgt->tgt_start = 0;
959 tgt->tgt_end = 0;
960 return tgt;
961 }
962
963 tgt_align = sizeof (void *);
964 tgt_size = 0;
965 cbuf.chunks = NULL;
966 cbuf.chunk_cnt = -1;
967 cbuf.use_cnt = 0;
968 cbuf.buf = NULL;
969 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
970 {
971 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
972 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
973 cbuf.chunk_cnt = 0;
974 }
975 if (pragma_kind == GOMP_MAP_VARS_TARGET)
976 {
977 size_t align = 4 * sizeof (void *);
978 tgt_align = align;
979 tgt_size = mapnum * sizeof (void *);
980 cbuf.chunk_cnt = 1;
981 cbuf.use_cnt = 1 + (mapnum > 1);
982 cbuf.chunks[0].start = 0;
983 cbuf.chunks[0].end = tgt_size;
984 }
985
986 gomp_mutex_lock (&devicep->lock);
987 if (devicep->state == GOMP_DEVICE_FINALIZED)
988 {
989 gomp_mutex_unlock (&devicep->lock);
990 free (tgt);
991 return NULL;
992 }
993
994 for (i = 0; i < mapnum; i++)
995 {
996 int kind = get_kind (short_mapkind, kinds, i);
997 bool implicit = get_implicit (short_mapkind, kinds, i);
998 if (hostaddrs[i] == NULL
999 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1000 {
1001 tgt->list[i].key = NULL;
1002 tgt->list[i].offset = OFFSET_INLINED;
1003 continue;
1004 }
1005 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1006 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1007 {
1008 tgt->list[i].key = NULL;
1009 if (!not_found_cnt)
1010 {
1011 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1012 on a separate construct prior to using use_device_{addr,ptr}.
1013 In OpenMP 5.0, map directives need to be ordered by the
1014 middle-end before the use_device_* clauses. If
1015 !not_found_cnt, all mappings requested (if any) are already
1016 mapped, so use_device_{addr,ptr} can be resolved right away.
1017 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1018 now but would succeed after performing the mappings in the
1019 following loop. We can't defer this always to the second
1020 loop, because it is not even invoked when !not_found_cnt
1021 after the first loop. */
1022 cur_node.host_start = (uintptr_t) hostaddrs[i];
1023 cur_node.host_end = cur_node.host_start;
1024 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1025 if (n != NULL)
1026 {
1027 cur_node.host_start -= n->host_start;
1028 hostaddrs[i]
1029 = (void *) (n->tgt->tgt_start + n->tgt_offset
1030 + cur_node.host_start);
1031 }
1032 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1033 {
1034 gomp_mutex_unlock (&devicep->lock);
1035 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1036 }
1037 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1038 /* If not present, continue using the host address. */
1039 ;
1040 else
1041 __builtin_unreachable ();
1042 tgt->list[i].offset = OFFSET_INLINED;
1043 }
1044 else
1045 tgt->list[i].offset = 0;
1046 continue;
1047 }
1048 else if (devicep->is_usm_ptr_func
1049 && devicep->is_usm_ptr_func (hostaddrs[i]))
1050 {
1051 /* The memory is visible from both host and target
1052 so nothing needs to be moved. */
1053 tgt->list[i].key = NULL;
1054 tgt->list[i].offset = OFFSET_INLINED;
1055 continue;
1056 }
1057 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1058 {
1059 size_t first = i + 1;
1060 size_t last = i + sizes[i];
1061 cur_node.host_start = (uintptr_t) hostaddrs[i];
1062 cur_node.host_end = (uintptr_t) hostaddrs[last]
1063 + sizes[last];
1064 tgt->list[i].key = NULL;
1065 tgt->list[i].offset = OFFSET_STRUCT;
1066 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1067 if (n == NULL)
1068 {
1069 size_t align = (size_t) 1 << (kind >> rshift);
1070 if (tgt_align < align)
1071 tgt_align = align;
1072 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1073 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1074 tgt_size += cur_node.host_end - cur_node.host_start;
1075 not_found_cnt += last - i;
1076 for (i = first; i <= last; i++)
1077 {
1078 tgt->list[i].key = NULL;
1079 if (!aq
1080 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1081 & typemask))
1082 gomp_coalesce_buf_add (&cbuf,
1083 tgt_size - cur_node.host_end
1084 + (uintptr_t) hostaddrs[i],
1085 sizes[i]);
1086 }
1087 i--;
1088 continue;
1089 }
1090 for (i = first; i <= last; i++)
1091 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1092 sizes, kinds, NULL, refcount_set);
1093 i--;
1094 continue;
1095 }
1096 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1097 {
1098 tgt->list[i].key = NULL;
1099 tgt->list[i].offset = OFFSET_POINTER;
1100 has_firstprivate = true;
1101 continue;
1102 }
1103 else if ((kind & typemask) == GOMP_MAP_ATTACH
1104 || ((kind & typemask)
1105 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1106 {
1107 tgt->list[i].key = NULL;
1108 has_firstprivate = true;
1109 continue;
1110 }
1111 else if (GOMP_MAP_NONCONTIG_ARRAY_P (kind & typemask))
1112 {
1113 /* Ignore non-contiguous arrays for now, we process them together
1114 later. */
1115 tgt->list[i].key = NULL;
1116 tgt->list[i].offset = 0;
1117 not_found_cnt++;
1118
1119 /* The map for the non-contiguous array itself is never copied from
1120 during unmapping, its the data rows that count. Set copy-from
1121 flags to false here. */
1122 tgt->list[i].copy_from = false;
1123 tgt->list[i].always_copy_from = false;
1124 tgt->list[i].is_attach = false;
1125
1126 size_t align = (size_t) 1 << (kind >> rshift);
1127 if (tgt_align < align)
1128 tgt_align = align;
1129
1130 continue;
1131 }
1132
1133 cur_node.host_start = (uintptr_t) hostaddrs[i];
1134 if (!GOMP_MAP_POINTER_P (kind & typemask))
1135 cur_node.host_end = cur_node.host_start + sizes[i];
1136 else
1137 cur_node.host_end = cur_node.host_start + sizeof (void *);
1138 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1139 {
1140 tgt->list[i].key = NULL;
1141
1142 size_t align = (size_t) 1 << (kind >> rshift);
1143 if (tgt_align < align)
1144 tgt_align = align;
1145 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1146 if (!aq)
1147 gomp_coalesce_buf_add (&cbuf, tgt_size,
1148 cur_node.host_end - cur_node.host_start);
1149 tgt_size += cur_node.host_end - cur_node.host_start;
1150 has_firstprivate = true;
1151 continue;
1152 }
1153 splay_tree_key n;
1154 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1155 {
1156 n = gomp_map_0len_lookup (mem_map, &cur_node);
1157 if (!n)
1158 {
1159 tgt->list[i].key = NULL;
1160 tgt->list[i].offset = OFFSET_POINTER;
1161 continue;
1162 }
1163 }
1164 else
1165 n = splay_tree_lookup (mem_map, &cur_node);
1166 if (n && n->refcount != REFCOUNT_LINK)
1167 {
1168 int always_to_cnt = 0;
1169 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1170 {
1171 bool has_nullptr = false;
1172 size_t j;
1173 for (j = 0; j < n->tgt->list_count; j++)
1174 if (n->tgt->list[j].key == n)
1175 {
1176 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1177 break;
1178 }
1179 if (n->tgt->list_count == 0)
1180 {
1181 /* 'declare target'; assume has_nullptr; it could also be
1182 statically assigned pointer, but that it should be to
1183 the equivalent variable on the host. */
1184 assert (n->refcount == REFCOUNT_INFINITY);
1185 has_nullptr = true;
1186 }
1187 else
1188 assert (j < n->tgt->list_count);
1189 /* Re-map the data if there is an 'always' modifier or if it a
1190 null pointer was there and non a nonnull has been found; that
1191 permits transparent re-mapping for Fortran array descriptors
1192 which were previously mapped unallocated. */
1193 for (j = i + 1; j < mapnum; j++)
1194 {
1195 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1196 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1197 && (!has_nullptr
1198 || !GOMP_MAP_POINTER_P (ptr_kind)
1199 || *(void **) hostaddrs[j] == NULL))
1200 break;
1201 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1202 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1203 > cur_node.host_end))
1204 break;
1205 else
1206 {
1207 has_always_ptrset = true;
1208 ++always_to_cnt;
1209 }
1210 }
1211 }
1212 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1213 kind & typemask, always_to_cnt > 0, implicit,
1214 NULL, refcount_set);
1215 i += always_to_cnt;
1216 }
1217 else
1218 {
1219 tgt->list[i].key = NULL;
1220
1221 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1222 {
1223 /* Not present, hence, skip entry - including its MAP_POINTER,
1224 when existing. */
1225 tgt->list[i].offset = OFFSET_POINTER;
1226 if (i + 1 < mapnum
1227 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1228 == GOMP_MAP_POINTER))
1229 {
1230 ++i;
1231 tgt->list[i].key = NULL;
1232 tgt->list[i].offset = 0;
1233 }
1234 continue;
1235 }
1236 size_t align = (size_t) 1 << (kind >> rshift);
1237 not_found_cnt++;
1238 if (tgt_align < align)
1239 tgt_align = align;
1240 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1241 if (!aq
1242 && gomp_to_device_kind_p (kind & typemask))
1243 gomp_coalesce_buf_add (&cbuf, tgt_size,
1244 cur_node.host_end - cur_node.host_start);
1245 tgt_size += cur_node.host_end - cur_node.host_start;
1246 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1247 {
1248 size_t j;
1249 int kind;
1250 for (j = i + 1; j < mapnum; j++)
1251 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1252 kinds, j)) & typemask))
1253 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1254 break;
1255 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1256 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1257 > cur_node.host_end))
1258 break;
1259 else
1260 {
1261 tgt->list[j].key = NULL;
1262 i++;
1263 }
1264 }
1265 }
1266 }
1267
1268 /* For non-contiguous arrays. Each data row is one target item, separated
1269 from the normal map clause items, hence we order them after mapnum. */
1270 if (nca_info)
1271 {
1272 struct target_var_desc *next_var_desc = &tgt->list[mapnum];
1273 for (i = 0; i < nca_info->num_ncarray; i++)
1274 {
1275 struct goacc_ncarray *nca = &nca_info->ncarray[i];
1276 int kind = get_kind (short_mapkind, kinds, nca->map_index);
1277 size_t align = (size_t) 1 << (kind >> rshift);
1278 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1279 tgt_size += nca->ptrblock_size;
1280
1281 for (size_t j = 0; j < nca->data_row_num; j++)
1282 {
1283 struct target_var_desc *row_desc = next_var_desc++;
1284 void *row = nca->data_rows[j];
1285 cur_node.host_start = (uintptr_t) row;
1286 cur_node.host_end = cur_node.host_start + nca->data_row_size;
1287 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1288 if (n)
1289 {
1290 assert (n->refcount != REFCOUNT_LINK);
1291 gomp_map_vars_existing (devicep, aq, n, &cur_node, row_desc,
1292 kind & typemask, false, false,
1293 /* TODO: cbuf? */ NULL,
1294 refcount_set);
1295 }
1296 else
1297 {
1298 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1299 tgt_size += nca->data_row_size;
1300 not_found_cnt++;
1301 }
1302 }
1303 }
1304 assert (next_var_desc == &tgt->list[mapnum + nca_info->num_data_rows]);
1305 }
1306
1307 if (devaddrs)
1308 {
1309 if (mapnum != 1)
1310 {
1311 gomp_mutex_unlock (&devicep->lock);
1312 gomp_fatal ("unexpected aggregation");
1313 }
1314 tgt->to_free = devaddrs[0];
1315 tgt->tgt_start = (uintptr_t) tgt->to_free;
1316 tgt->tgt_end = tgt->tgt_start + sizes[0];
1317 }
1318 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1319 {
1320 /* Allocate tgt_align aligned tgt_size block of memory. */
1321 /* FIXME: Perhaps change interface to allocate properly aligned
1322 memory. */
1323 tgt->to_free = devicep->alloc_func (devicep->target_id,
1324 tgt_size + tgt_align - 1);
1325 if (!tgt->to_free)
1326 {
1327 gomp_mutex_unlock (&devicep->lock);
1328 gomp_fatal ("device memory allocation fail");
1329 }
1330
1331 tgt->tgt_start = (uintptr_t) tgt->to_free;
1332 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1333 tgt->tgt_end = tgt->tgt_start + tgt_size;
1334
1335 if (cbuf.use_cnt == 1)
1336 cbuf.chunk_cnt--;
1337 if (cbuf.chunk_cnt > 0)
1338 {
1339 cbuf.buf
1340 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1341 if (cbuf.buf)
1342 {
1343 cbuf.tgt = tgt;
1344 cbufp = &cbuf;
1345 }
1346 }
1347 }
1348 else
1349 {
1350 tgt->to_free = NULL;
1351 tgt->tgt_start = 0;
1352 tgt->tgt_end = 0;
1353 }
1354
1355 tgt_size = 0;
1356 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1357 tgt_size = mapnum * sizeof (void *);
1358
1359 tgt->array = NULL;
1360 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1361 {
1362 if (not_found_cnt)
1363 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1364 splay_tree_node array = tgt->array;
1365 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1366 uintptr_t field_tgt_base = 0;
1367 splay_tree_key field_tgt_structelem_first = NULL;
1368
1369 for (i = 0; i < mapnum; i++)
1370 if (has_always_ptrset
1371 && tgt->list[i].key
1372 && (get_kind (short_mapkind, kinds, i) & typemask)
1373 == GOMP_MAP_TO_PSET)
1374 {
1375 splay_tree_key k = tgt->list[i].key;
1376 bool has_nullptr = false;
1377 size_t j;
1378 for (j = 0; j < k->tgt->list_count; j++)
1379 if (k->tgt->list[j].key == k)
1380 {
1381 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1382 break;
1383 }
1384 if (k->tgt->list_count == 0)
1385 has_nullptr = true;
1386 else
1387 assert (j < k->tgt->list_count);
1388
1389 tgt->list[i].has_null_ptr_assoc = false;
1390 for (j = i + 1; j < mapnum; j++)
1391 {
1392 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1393 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1394 && (!has_nullptr
1395 || !GOMP_MAP_POINTER_P (ptr_kind)
1396 || *(void **) hostaddrs[j] == NULL))
1397 break;
1398 else if ((uintptr_t) hostaddrs[j] < k->host_start
1399 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1400 > k->host_end))
1401 break;
1402 else
1403 {
1404 if (*(void **) hostaddrs[j] == NULL)
1405 tgt->list[i].has_null_ptr_assoc = true;
1406 tgt->list[j].key = k;
1407 tgt->list[j].copy_from = false;
1408 tgt->list[j].always_copy_from = false;
1409 tgt->list[j].is_attach = false;
1410 gomp_increment_refcount (k, refcount_set);
1411 gomp_map_pointer (k->tgt, aq,
1412 (uintptr_t) *(void **) hostaddrs[j],
1413 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1414 - k->host_start),
1415 sizes[j], cbufp, false);
1416 }
1417 }
1418 i = j - 1;
1419 }
1420 else if (tgt->list[i].key == NULL)
1421 {
1422 int kind = get_kind (short_mapkind, kinds, i);
1423 bool implicit = get_implicit (short_mapkind, kinds, i);
1424 if (hostaddrs[i] == NULL)
1425 continue;
1426 switch (kind & typemask)
1427 {
1428 size_t align, len, first, last;
1429 splay_tree_key n;
1430 case GOMP_MAP_FIRSTPRIVATE:
1431 align = (size_t) 1 << (kind >> rshift);
1432 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1433 tgt->list[i].offset = tgt_size;
1434 len = sizes[i];
1435 gomp_copy_host2dev (devicep, aq,
1436 (void *) (tgt->tgt_start + tgt_size),
1437 (void *) hostaddrs[i], len, false, cbufp);
1438 /* Save device address in hostaddr to permit latter availablity
1439 when doing a deep-firstprivate with pointer attach. */
1440 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1441 tgt_size += len;
1442
1443 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1444 firstprivate to hostaddrs[i+1], which is assumed to contain a
1445 device address. */
1446 if (i + 1 < mapnum
1447 && (GOMP_MAP_ATTACH
1448 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1449 {
1450 uintptr_t target = (uintptr_t) hostaddrs[i];
1451 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1452 gomp_copy_host2dev (devicep, aq, devptr, &target,
1453 sizeof (void *), false, cbufp);
1454 ++i;
1455 }
1456 continue;
1457 case GOMP_MAP_FIRSTPRIVATE_INT:
1458 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1459 continue;
1460 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1461 /* The OpenACC 'host_data' construct only allows 'use_device'
1462 "mapping" clauses, so in the first loop, 'not_found_cnt'
1463 must always have been zero, so all OpenACC 'use_device'
1464 clauses have already been handled. (We can only easily test
1465 'use_device' with 'if_present' clause here.) */
1466 assert (tgt->list[i].offset == OFFSET_INLINED);
1467 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1468 code conceptually simple, similar to the first loop. */
1469 case GOMP_MAP_USE_DEVICE_PTR:
1470 if (tgt->list[i].offset == 0)
1471 {
1472 cur_node.host_start = (uintptr_t) hostaddrs[i];
1473 cur_node.host_end = cur_node.host_start;
1474 n = gomp_map_lookup (mem_map, &cur_node);
1475 if (n != NULL)
1476 {
1477 cur_node.host_start -= n->host_start;
1478 hostaddrs[i]
1479 = (void *) (n->tgt->tgt_start + n->tgt_offset
1480 + cur_node.host_start);
1481 }
1482 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1483 {
1484 gomp_mutex_unlock (&devicep->lock);
1485 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1486 }
1487 else if ((kind & typemask)
1488 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1489 /* If not present, continue using the host address. */
1490 ;
1491 else
1492 __builtin_unreachable ();
1493 tgt->list[i].offset = OFFSET_INLINED;
1494 }
1495 continue;
1496 case GOMP_MAP_STRUCT:
1497 first = i + 1;
1498 last = i + sizes[i];
1499 cur_node.host_start = (uintptr_t) hostaddrs[i];
1500 cur_node.host_end = (uintptr_t) hostaddrs[last]
1501 + sizes[last];
1502 if (tgt->list[first].key != NULL)
1503 continue;
1504 n = splay_tree_lookup (mem_map, &cur_node);
1505 if (n == NULL)
1506 {
1507 size_t align = (size_t) 1 << (kind >> rshift);
1508 tgt_size -= (uintptr_t) hostaddrs[first]
1509 - (uintptr_t) hostaddrs[i];
1510 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1511 tgt_size += (uintptr_t) hostaddrs[first]
1512 - (uintptr_t) hostaddrs[i];
1513 field_tgt_base = (uintptr_t) hostaddrs[first];
1514 field_tgt_offset = tgt_size;
1515 field_tgt_clear = last;
1516 field_tgt_structelem_first = NULL;
1517 tgt_size += cur_node.host_end
1518 - (uintptr_t) hostaddrs[first];
1519 continue;
1520 }
1521 for (i = first; i <= last; i++)
1522 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1523 sizes, kinds, cbufp, refcount_set);
1524 i--;
1525 continue;
1526 case GOMP_MAP_ALWAYS_POINTER:
1527 cur_node.host_start = (uintptr_t) hostaddrs[i];
1528 cur_node.host_end = cur_node.host_start + sizeof (void *);
1529 n = splay_tree_lookup (mem_map, &cur_node);
1530 if (n == NULL
1531 || n->host_start > cur_node.host_start
1532 || n->host_end < cur_node.host_end)
1533 {
1534 gomp_mutex_unlock (&devicep->lock);
1535 gomp_fatal ("always pointer not mapped");
1536 }
1537 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1538 != GOMP_MAP_ALWAYS_POINTER)
1539 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1540 if (cur_node.tgt_offset)
1541 cur_node.tgt_offset -= sizes[i];
1542 gomp_copy_host2dev (devicep, aq,
1543 (void *) (n->tgt->tgt_start
1544 + n->tgt_offset
1545 + cur_node.host_start
1546 - n->host_start),
1547 (void *) &cur_node.tgt_offset,
1548 sizeof (void *), true, cbufp);
1549 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1550 + cur_node.host_start - n->host_start;
1551 continue;
1552 case GOMP_MAP_IF_PRESENT:
1553 /* Not present - otherwise handled above. Skip over its
1554 MAP_POINTER as well. */
1555 if (i + 1 < mapnum
1556 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1557 == GOMP_MAP_POINTER))
1558 ++i;
1559 continue;
1560 case GOMP_MAP_ATTACH:
1561 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1562 {
1563 cur_node.host_start = (uintptr_t) hostaddrs[i];
1564 cur_node.host_end = cur_node.host_start + sizeof (void *);
1565 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1566 if (n != NULL)
1567 {
1568 tgt->list[i].key = n;
1569 tgt->list[i].offset = cur_node.host_start - n->host_start;
1570 tgt->list[i].length = n->host_end - n->host_start;
1571 tgt->list[i].copy_from = false;
1572 tgt->list[i].always_copy_from = false;
1573 tgt->list[i].is_attach = true;
1574 /* OpenACC 'attach'/'detach' doesn't affect
1575 structured/dynamic reference counts ('n->refcount',
1576 'n->dynamic_refcount'). */
1577
1578 bool zlas
1579 = ((kind & typemask)
1580 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1581 gomp_attach_pointer (devicep, aq, mem_map, n,
1582 (uintptr_t) hostaddrs[i], sizes[i],
1583 cbufp, zlas);
1584 }
1585 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1586 {
1587 gomp_mutex_unlock (&devicep->lock);
1588 gomp_fatal ("outer struct not mapped for attach");
1589 }
1590 continue;
1591 }
1592 default:
1593 if (tgt->list[i].offset == OFFSET_INLINED
1594 && !array)
1595 continue;
1596 break;
1597 }
1598
1599 if (GOMP_MAP_NONCONTIG_ARRAY_P (kind & typemask))
1600 {
1601 tgt->list[i].key = &array->key;
1602 tgt->list[i].key->tgt = tgt;
1603 array++;
1604 continue;
1605 }
1606
1607 splay_tree_key k = &array->key;
1608 k->host_start = (uintptr_t) hostaddrs[i];
1609 if (!GOMP_MAP_POINTER_P (kind & typemask))
1610 k->host_end = k->host_start + sizes[i];
1611 else
1612 k->host_end = k->host_start + sizeof (void *);
1613 splay_tree_key n = splay_tree_lookup (mem_map, k);
1614 if (n && n->refcount != REFCOUNT_LINK)
1615 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1616 kind & typemask, false, implicit, cbufp,
1617 refcount_set);
1618 else
1619 {
1620 k->aux = NULL;
1621 if (n && n->refcount == REFCOUNT_LINK)
1622 {
1623 /* Replace target address of the pointer with target address
1624 of mapped object in the splay tree. */
1625 splay_tree_remove (mem_map, n);
1626 k->aux
1627 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1628 k->aux->link_key = n;
1629 }
1630 size_t align = (size_t) 1 << (kind >> rshift);
1631 tgt->list[i].key = k;
1632 k->tgt = tgt;
1633 k->refcount = 0;
1634 k->dynamic_refcount = 0;
1635 if (field_tgt_clear != FIELD_TGT_EMPTY)
1636 {
1637 k->tgt_offset = k->host_start - field_tgt_base
1638 + field_tgt_offset;
1639 if (openmp_p)
1640 {
1641 k->refcount = REFCOUNT_STRUCTELEM;
1642 if (field_tgt_structelem_first == NULL)
1643 {
1644 /* Set to first structure element of sequence. */
1645 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1646 field_tgt_structelem_first = k;
1647 }
1648 else
1649 /* Point to refcount of leading element, but do not
1650 increment again. */
1651 k->structelem_refcount_ptr
1652 = &field_tgt_structelem_first->structelem_refcount;
1653
1654 if (i == field_tgt_clear)
1655 {
1656 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1657 field_tgt_structelem_first = NULL;
1658 }
1659 }
1660 if (i == field_tgt_clear)
1661 field_tgt_clear = FIELD_TGT_EMPTY;
1662 }
1663 else
1664 {
1665 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1666 k->tgt_offset = tgt_size;
1667 tgt_size += k->host_end - k->host_start;
1668 }
1669 /* First increment, from 0 to 1. gomp_increment_refcount
1670 encapsulates the different increment cases, so use this
1671 instead of directly setting 1 during initialization. */
1672 gomp_increment_refcount (k, refcount_set);
1673
1674 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1675 tgt->list[i].always_copy_from
1676 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1677 tgt->list[i].is_attach = false;
1678 tgt->list[i].offset = 0;
1679 tgt->list[i].length = k->host_end - k->host_start;
1680 tgt->refcount++;
1681 array->left = NULL;
1682 array->right = NULL;
1683 splay_tree_insert (mem_map, array);
1684 switch (kind & typemask)
1685 {
1686 case GOMP_MAP_ALLOC:
1687 case GOMP_MAP_FROM:
1688 case GOMP_MAP_FORCE_ALLOC:
1689 case GOMP_MAP_FORCE_FROM:
1690 case GOMP_MAP_ALWAYS_FROM:
1691 break;
1692 case GOMP_MAP_TO:
1693 case GOMP_MAP_TOFROM:
1694 case GOMP_MAP_FORCE_TO:
1695 case GOMP_MAP_FORCE_TOFROM:
1696 case GOMP_MAP_ALWAYS_TO:
1697 case GOMP_MAP_ALWAYS_TOFROM:
1698 gomp_copy_host2dev (devicep, aq,
1699 (void *) (tgt->tgt_start
1700 + k->tgt_offset),
1701 (void *) k->host_start,
1702 k->host_end - k->host_start,
1703 false, cbufp);
1704 break;
1705 case GOMP_MAP_POINTER:
1706 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1707 gomp_map_pointer
1708 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1709 k->tgt_offset, sizes[i], cbufp,
1710 ((kind & typemask)
1711 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1712 break;
1713 case GOMP_MAP_TO_PSET:
1714 gomp_copy_host2dev (devicep, aq,
1715 (void *) (tgt->tgt_start
1716 + k->tgt_offset),
1717 (void *) k->host_start,
1718 k->host_end - k->host_start,
1719 false, cbufp);
1720 tgt->list[i].has_null_ptr_assoc = false;
1721
1722 for (j = i + 1; j < mapnum; j++)
1723 {
1724 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1725 & typemask);
1726 if (!GOMP_MAP_POINTER_P (ptr_kind)
1727 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1728 break;
1729 else if ((uintptr_t) hostaddrs[j] < k->host_start
1730 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1731 > k->host_end))
1732 break;
1733 else
1734 {
1735 tgt->list[j].key = k;
1736 tgt->list[j].copy_from = false;
1737 tgt->list[j].always_copy_from = false;
1738 tgt->list[j].is_attach = false;
1739 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1740 /* For OpenMP, the use of refcount_sets causes
1741 errors if we set k->refcount = 1 above but also
1742 increment it again here, for decrementing will
1743 not properly match, since we decrement only once
1744 for each key's refcount. Therefore avoid this
1745 increment for OpenMP constructs. */
1746 if (!openmp_p)
1747 gomp_increment_refcount (k, refcount_set);
1748 gomp_map_pointer (tgt, aq,
1749 (uintptr_t) *(void **) hostaddrs[j],
1750 k->tgt_offset
1751 + ((uintptr_t) hostaddrs[j]
1752 - k->host_start),
1753 sizes[j], cbufp, false);
1754 }
1755 }
1756 i = j - 1;
1757 break;
1758 case GOMP_MAP_FORCE_PRESENT:
1759 {
1760 /* We already looked up the memory region above and it
1761 was missing. */
1762 size_t size = k->host_end - k->host_start;
1763 gomp_mutex_unlock (&devicep->lock);
1764 #ifdef HAVE_INTTYPES_H
1765 gomp_fatal ("present clause: !acc_is_present (%p, "
1766 "%"PRIu64" (0x%"PRIx64"))",
1767 (void *) k->host_start,
1768 (uint64_t) size, (uint64_t) size);
1769 #else
1770 gomp_fatal ("present clause: !acc_is_present (%p, "
1771 "%lu (0x%lx))", (void *) k->host_start,
1772 (unsigned long) size, (unsigned long) size);
1773 #endif
1774 }
1775 break;
1776 case GOMP_MAP_FORCE_DEVICEPTR:
1777 assert (k->host_end - k->host_start == sizeof (void *));
1778 gomp_copy_host2dev (devicep, aq,
1779 (void *) (tgt->tgt_start
1780 + k->tgt_offset),
1781 (void *) k->host_start,
1782 sizeof (void *), false, cbufp);
1783 break;
1784 default:
1785 gomp_mutex_unlock (&devicep->lock);
1786 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1787 kind);
1788 }
1789
1790 if (k->aux && k->aux->link_key)
1791 {
1792 /* Set link pointer on target to the device address of the
1793 mapped object. */
1794 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1795 /* We intentionally do not use coalescing here, as it's not
1796 data allocated by the current call to this function. */
1797 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1798 &tgt_addr, sizeof (void *), true, NULL);
1799 }
1800 array++;
1801 }
1802 }
1803
1804 /* Processing of non-contiguous array rows. */
1805 if (nca_info)
1806 {
1807 struct target_var_desc *next_var_desc = &tgt->list[mapnum];
1808 for (i = 0; i < nca_info->num_ncarray; i++)
1809 {
1810 struct goacc_ncarray *nca = &nca_info->ncarray[i];
1811 int kind = get_kind (short_mapkind, kinds, nca->map_index);
1812 size_t align = (size_t) 1 << (kind >> rshift);
1813 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1814
1815 assert (nca->ptr == hostaddrs[nca->map_index]);
1816
1817 /* For the map of the non-contiguous array itself, adjust so that
1818 the passed device address points to the beginning of the
1819 ptrblock. Remember to adjust the first-dimension's bias here. */
1820 tgt->list[nca->map_index].key->tgt_offset
1821 = tgt_size - nca->descr->dims[0].base;
1822
1823 void *target_ptrblock = (void*) tgt->tgt_start + tgt_size;
1824 tgt_size += nca->ptrblock_size;
1825
1826 /* Add splay key for each data row in current non-contiguous
1827 array. */
1828 for (size_t j = 0; j < nca->data_row_num; j++)
1829 {
1830 struct target_var_desc *row_desc = next_var_desc++;
1831 void *row = nca->data_rows[j];
1832 cur_node.host_start = (uintptr_t) row;
1833 cur_node.host_end = cur_node.host_start + nca->data_row_size;
1834 splay_tree_key k = splay_tree_lookup (mem_map, &cur_node);
1835 if (k)
1836 {
1837 assert (k->refcount != REFCOUNT_LINK);
1838 gomp_map_vars_existing (devicep, aq, k, &cur_node, row_desc,
1839 kind & typemask, false, false,
1840 cbufp, refcount_set);
1841 }
1842 else
1843 {
1844 tgt->refcount++;
1845 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1846
1847 k = &array->key;
1848 k->host_start = (uintptr_t) row;
1849 k->host_end = k->host_start + nca->data_row_size;
1850
1851 k->tgt = tgt;
1852 k->refcount = 1;
1853 k->dynamic_refcount = 0;
1854 k->aux = NULL;
1855 k->tgt_offset = tgt_size;
1856
1857 tgt_size += nca->data_row_size;
1858
1859 row_desc->key = k;
1860 row_desc->copy_from
1861 = GOMP_MAP_COPY_FROM_P (kind & typemask);
1862 row_desc->always_copy_from
1863 = GOMP_MAP_COPY_FROM_P (kind & typemask);
1864 row_desc->is_attach = false;
1865 row_desc->offset = 0;
1866 row_desc->length = nca->data_row_size;
1867
1868 array->left = NULL;
1869 array->right = NULL;
1870 splay_tree_insert (mem_map, array);
1871
1872 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1873 gomp_copy_host2dev (devicep, aq,
1874 (void *) tgt->tgt_start + k->tgt_offset,
1875 (void *) k->host_start,
1876 nca->data_row_size, false,
1877 cbufp);
1878 array++;
1879 }
1880 nca->tgt_data_rows[j]
1881 = (void *) (k->tgt->tgt_start + k->tgt_offset);
1882 }
1883
1884 /* Now we have the target memory allocated, and target offsets of all
1885 row blocks assigned and calculated, we can construct the
1886 accelerator side ptrblock and copy it in. */
1887 if (nca->ptrblock_size)
1888 {
1889 void *ptrblock = goacc_noncontig_array_create_ptrblock
1890 (nca, target_ptrblock);
1891 gomp_copy_host2dev (devicep, aq, target_ptrblock, ptrblock,
1892 nca->ptrblock_size, false, cbufp);
1893 free (ptrblock);
1894 }
1895 }
1896 }
1897 }
1898
1899 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1900 {
1901 for (i = 0; i < mapnum; i++)
1902 {
1903 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1904 gomp_copy_host2dev (devicep, aq,
1905 (void *) (tgt->tgt_start + i * sizeof (void *)),
1906 (void *) &cur_node.tgt_offset, sizeof (void *),
1907 true, cbufp);
1908 }
1909 }
1910
1911 if (cbufp)
1912 {
1913 /* See 'gomp_coalesce_buf_add'. */
1914 assert (!aq);
1915
1916 long c = 0;
1917 for (c = 0; c < cbuf.chunk_cnt; ++c)
1918 gomp_copy_host2dev (devicep, aq,
1919 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1920 (char *) cbuf.buf + (cbuf.chunks[c].start
1921 - cbuf.chunks[0].start),
1922 cbuf.chunks[c].end - cbuf.chunks[c].start,
1923 true, NULL);
1924 free (cbuf.buf);
1925 cbuf.buf = NULL;
1926 cbufp = NULL;
1927 }
1928
1929 /* If the variable from "omp target enter data" map-list was already mapped,
1930 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1931 gomp_exit_data. */
1932 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1933 {
1934 free (tgt);
1935 tgt = NULL;
1936 }
1937
1938 gomp_mutex_unlock (&devicep->lock);
1939 return tgt;
1940 }
1941
1942 attribute_hidden struct target_mem_desc *
1943 gomp_map_vars_openacc (struct gomp_device_descr *devicep,
1944 struct goacc_asyncqueue *aq, size_t mapnum,
1945 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1946 void *nca_info)
1947 {
1948 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, NULL,
1949 sizes, (void *) kinds,
1950 (struct goacc_ncarray_info *) nca_info,
1951 true, NULL, GOMP_MAP_VARS_OPENACC);
1952 }
1953
1954 static struct target_mem_desc *
1955 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1956 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1957 bool short_mapkind, htab_t *refcount_set,
1958 enum gomp_map_vars_kind pragma_kind)
1959 {
1960 /* This management of a local refcount_set is for convenience of callers
1961 who do not share a refcount_set over multiple map/unmap uses. */
1962 htab_t local_refcount_set = NULL;
1963 if (refcount_set == NULL)
1964 {
1965 local_refcount_set = htab_create (mapnum);
1966 refcount_set = &local_refcount_set;
1967 }
1968
1969 struct target_mem_desc *tgt;
1970 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1971 sizes, kinds, NULL, short_mapkind,
1972 refcount_set, pragma_kind);
1973 if (local_refcount_set)
1974 htab_free (local_refcount_set);
1975
1976 return tgt;
1977 }
1978
1979 attribute_hidden struct target_mem_desc *
1980 goacc_map_vars (struct gomp_device_descr *devicep,
1981 struct goacc_asyncqueue *aq, size_t mapnum,
1982 void **hostaddrs, void **devaddrs, size_t *sizes,
1983 void *kinds, bool short_mapkind,
1984 enum gomp_map_vars_kind pragma_kind)
1985 {
1986 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1987 sizes, kinds, NULL, short_mapkind, NULL,
1988 GOMP_MAP_VARS_OPENACC | pragma_kind);
1989 }
1990
1991 static void
1992 gomp_unmap_tgt (struct target_mem_desc *tgt)
1993 {
1994 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1995 if (tgt->tgt_end)
1996 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1997
1998 free (tgt->array);
1999 free (tgt);
2000 }
2001
2002 static bool
2003 gomp_unref_tgt (void *ptr)
2004 {
2005 bool is_tgt_unmapped = false;
2006
2007 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
2008
2009 if (tgt->refcount > 1)
2010 tgt->refcount--;
2011 else
2012 {
2013 gomp_unmap_tgt (tgt);
2014 is_tgt_unmapped = true;
2015 }
2016
2017 return is_tgt_unmapped;
2018 }
2019
2020 static void
2021 gomp_unref_tgt_void (void *ptr)
2022 {
2023 (void) gomp_unref_tgt (ptr);
2024 }
2025
2026 static void
2027 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
2028 {
2029 splay_tree_remove (sp, k);
2030 if (k->aux)
2031 {
2032 if (k->aux->link_key)
2033 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
2034 if (k->aux->attach_count)
2035 free (k->aux->attach_count);
2036 free (k->aux);
2037 k->aux = NULL;
2038 }
2039 }
2040
2041 static inline __attribute__((always_inline)) bool
2042 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
2043 struct goacc_asyncqueue *aq)
2044 {
2045 bool is_tgt_unmapped = false;
2046
2047 if (REFCOUNT_STRUCTELEM_P (k->refcount))
2048 {
2049 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
2050 /* Infer the splay_tree_key of the first structelem key using the
2051 pointer to the first structleme_refcount. */
2052 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
2053 - offsetof (struct splay_tree_key_s,
2054 structelem_refcount));
2055 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
2056
2057 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
2058 with the splay_tree_keys embedded inside. */
2059 splay_tree_node node =
2060 (splay_tree_node) ((char *) k
2061 - offsetof (struct splay_tree_node_s, key));
2062 while (true)
2063 {
2064 /* Starting from the _FIRST key, and continue for all following
2065 sibling keys. */
2066 gomp_remove_splay_tree_key (&devicep->mem_map, k);
2067 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
2068 break;
2069 else
2070 k = &(++node)->key;
2071 }
2072 }
2073 else
2074 gomp_remove_splay_tree_key (&devicep->mem_map, k);
2075
2076 if (aq)
2077 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2078 (void *) k->tgt);
2079 else
2080 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
2081 return is_tgt_unmapped;
2082 }
2083
2084 attribute_hidden bool
2085 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
2086 {
2087 return gomp_remove_var_internal (devicep, k, NULL);
2088 }
2089
2090 /* Remove a variable asynchronously. This actually removes the variable
2091 mapping immediately, but retains the linked target_mem_desc until the
2092 asynchronous operation has completed (as it may still refer to target
2093 memory). The device lock must be held before entry, and remains locked on
2094 exit. */
2095
2096 attribute_hidden void
2097 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
2098 struct goacc_asyncqueue *aq)
2099 {
2100 (void) gomp_remove_var_internal (devicep, k, aq);
2101 }
2102
2103 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2104 variables back from device to host: if it is false, it is assumed that this
2105 has been done already. */
2106
2107 static inline __attribute__((always_inline)) void
2108 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
2109 htab_t *refcount_set, struct goacc_asyncqueue *aq)
2110 {
2111 struct gomp_device_descr *devicep = tgt->device_descr;
2112
2113 if (tgt->list_count == 0)
2114 {
2115 free (tgt);
2116 return;
2117 }
2118
2119 gomp_mutex_lock (&devicep->lock);
2120 if (devicep->state == GOMP_DEVICE_FINALIZED)
2121 {
2122 gomp_mutex_unlock (&devicep->lock);
2123 free (tgt->array);
2124 free (tgt);
2125 return;
2126 }
2127
2128 size_t i;
2129
2130 /* We must perform detachments before any copies back to the host. */
2131 for (i = 0; i < tgt->list_count; i++)
2132 {
2133 splay_tree_key k = tgt->list[i].key;
2134
2135 if (k != NULL && tgt->list[i].is_attach)
2136 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
2137 + tgt->list[i].offset,
2138 false, NULL);
2139 }
2140
2141 for (i = 0; i < tgt->list_count; i++)
2142 {
2143 splay_tree_key k = tgt->list[i].key;
2144 if (k == NULL)
2145 continue;
2146
2147 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2148 counts ('n->refcount', 'n->dynamic_refcount'). */
2149 if (tgt->list[i].is_attach)
2150 continue;
2151
2152 bool do_copy, do_remove;
2153 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
2154
2155 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
2156 || tgt->list[i].always_copy_from)
2157 gomp_copy_dev2host (devicep, aq,
2158 (void *) (k->host_start + tgt->list[i].offset),
2159 (void *) (k->tgt->tgt_start + k->tgt_offset
2160 + tgt->list[i].offset),
2161 tgt->list[i].length);
2162 if (do_remove)
2163 {
2164 struct target_mem_desc *k_tgt = k->tgt;
2165 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2166 /* It would be bad if TGT got unmapped while we're still iterating
2167 over its LIST_COUNT, and also expect to use it in the following
2168 code. */
2169 assert (!is_tgt_unmapped
2170 || k_tgt != tgt);
2171 }
2172 }
2173
2174 if (aq)
2175 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2176 (void *) tgt);
2177 else
2178 gomp_unref_tgt ((void *) tgt);
2179
2180 gomp_mutex_unlock (&devicep->lock);
2181 }
2182
2183 static void
2184 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2185 htab_t *refcount_set)
2186 {
2187 /* This management of a local refcount_set is for convenience of callers
2188 who do not share a refcount_set over multiple map/unmap uses. */
2189 htab_t local_refcount_set = NULL;
2190 if (refcount_set == NULL)
2191 {
2192 local_refcount_set = htab_create (tgt->list_count);
2193 refcount_set = &local_refcount_set;
2194 }
2195
2196 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2197
2198 if (local_refcount_set)
2199 htab_free (local_refcount_set);
2200 }
2201
2202 attribute_hidden void
2203 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2204 struct goacc_asyncqueue *aq)
2205 {
2206 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2207 }
2208
2209 static void
2210 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2211 size_t *sizes, void *kinds, bool short_mapkind)
2212 {
2213 size_t i;
2214 struct splay_tree_key_s cur_node;
2215 const int typemask = short_mapkind ? 0xff : 0x7;
2216
2217 if (!devicep)
2218 return;
2219
2220 if (mapnum == 0)
2221 return;
2222
2223 gomp_mutex_lock (&devicep->lock);
2224 if (devicep->state == GOMP_DEVICE_FINALIZED)
2225 {
2226 gomp_mutex_unlock (&devicep->lock);
2227 return;
2228 }
2229
2230 for (i = 0; i < mapnum; i++)
2231 if (sizes[i])
2232 {
2233 cur_node.host_start = (uintptr_t) hostaddrs[i];
2234 cur_node.host_end = cur_node.host_start + sizes[i];
2235 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2236 if (n)
2237 {
2238 int kind = get_kind (short_mapkind, kinds, i);
2239 if (n->host_start > cur_node.host_start
2240 || n->host_end < cur_node.host_end)
2241 {
2242 gomp_mutex_unlock (&devicep->lock);
2243 gomp_fatal ("Trying to update [%p..%p) object when "
2244 "only [%p..%p) is mapped",
2245 (void *) cur_node.host_start,
2246 (void *) cur_node.host_end,
2247 (void *) n->host_start,
2248 (void *) n->host_end);
2249 }
2250
2251 if (n->aux && n->aux->attach_count)
2252 {
2253 uintptr_t addr = cur_node.host_start;
2254 while (addr < cur_node.host_end)
2255 {
2256 /* We have to be careful not to overwrite still attached
2257 pointers during host<->device updates. */
2258 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2259 if (n->aux->attach_count[i] == 0)
2260 {
2261 void *devaddr = (void *) (n->tgt->tgt_start
2262 + n->tgt_offset
2263 + addr - n->host_start);
2264 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2265 gomp_copy_host2dev (devicep, NULL,
2266 devaddr, (void *) addr,
2267 sizeof (void *), false, NULL);
2268 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2269 gomp_copy_dev2host (devicep, NULL,
2270 (void *) addr, devaddr,
2271 sizeof (void *));
2272 }
2273 addr += sizeof (void *);
2274 }
2275 }
2276 else
2277 {
2278 void *hostaddr = (void *) cur_node.host_start;
2279 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2280 + cur_node.host_start
2281 - n->host_start);
2282 size_t size = cur_node.host_end - cur_node.host_start;
2283
2284 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2285 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2286 false, NULL);
2287 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2288 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2289 }
2290 }
2291 }
2292 gomp_mutex_unlock (&devicep->lock);
2293 }
2294
2295 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2296 And insert to splay tree the mapping between addresses from HOST_TABLE and
2297 from loaded target image. We rely in the host and device compiler
2298 emitting variable and functions in the same order. */
2299
2300 static void
2301 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2302 const void *host_table, const void *target_data,
2303 bool is_register_lock)
2304 {
2305 void **host_func_table = ((void ***) host_table)[0];
2306 void **host_funcs_end = ((void ***) host_table)[1];
2307 void **host_var_table = ((void ***) host_table)[2];
2308 void **host_vars_end = ((void ***) host_table)[3];
2309
2310 /* The func table contains only addresses, the var table contains addresses
2311 and corresponding sizes. */
2312 int num_funcs = host_funcs_end - host_func_table;
2313 int num_vars = (host_vars_end - host_var_table) / 2;
2314
2315 /* Others currently is only 'device_num' */
2316 int num_others = 1;
2317
2318 /* Load image to device and get target addresses for the image. */
2319 struct addr_pair *target_table = NULL;
2320 int i, num_target_entries;
2321
2322 num_target_entries
2323 = devicep->load_image_func (devicep->target_id, version,
2324 target_data, &target_table);
2325
2326 if (num_target_entries != num_funcs + num_vars
2327 /* Others (device_num) are included as trailing entries in pair list. */
2328 && num_target_entries != num_funcs + num_vars + num_others)
2329 {
2330 gomp_mutex_unlock (&devicep->lock);
2331 if (is_register_lock)
2332 gomp_mutex_unlock (&register_lock);
2333 gomp_fatal ("Cannot map target functions or variables"
2334 " (expected %u, have %u)", num_funcs + num_vars,
2335 num_target_entries);
2336 }
2337
2338 /* Insert host-target address mapping into splay tree. */
2339 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2340 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
2341 tgt->refcount = REFCOUNT_INFINITY;
2342 tgt->tgt_start = 0;
2343 tgt->tgt_end = 0;
2344 tgt->to_free = NULL;
2345 tgt->prev = NULL;
2346 tgt->list_count = 0;
2347 tgt->device_descr = devicep;
2348 splay_tree_node array = tgt->array;
2349
2350 for (i = 0; i < num_funcs; i++)
2351 {
2352 splay_tree_key k = &array->key;
2353 k->host_start = (uintptr_t) host_func_table[i];
2354 k->host_end = k->host_start + 1;
2355 k->tgt = tgt;
2356 k->tgt_offset = target_table[i].start;
2357 k->refcount = REFCOUNT_INFINITY;
2358 k->dynamic_refcount = 0;
2359 k->aux = NULL;
2360 array->left = NULL;
2361 array->right = NULL;
2362 splay_tree_insert (&devicep->mem_map, array);
2363 array++;
2364 }
2365
2366 /* Most significant bit of the size in host and target tables marks
2367 "omp declare target link" variables. */
2368 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2369 const uintptr_t size_mask = ~link_bit;
2370
2371 for (i = 0; i < num_vars; i++)
2372 {
2373 struct addr_pair *target_var = &target_table[num_funcs + i];
2374 uintptr_t target_size = target_var->end - target_var->start;
2375 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2376
2377 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2378 {
2379 gomp_mutex_unlock (&devicep->lock);
2380 if (is_register_lock)
2381 gomp_mutex_unlock (&register_lock);
2382 gomp_fatal ("Cannot map target variables (size mismatch)");
2383 }
2384
2385 splay_tree_key k = &array->key;
2386 k->host_start = (uintptr_t) host_var_table[i * 2];
2387 k->host_end
2388 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2389 k->tgt = tgt;
2390 k->tgt_offset = target_var->start;
2391 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2392 k->dynamic_refcount = 0;
2393 k->aux = NULL;
2394 array->left = NULL;
2395 array->right = NULL;
2396 splay_tree_insert (&devicep->mem_map, array);
2397 array++;
2398 }
2399
2400 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2401 where plugin does not return this entry. */
2402 if (num_funcs + num_vars < num_target_entries)
2403 {
2404 struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2405 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2406 was found in this image. */
2407 if (device_num_var->start != 0)
2408 {
2409 /* The index of the devicep within devices[] is regarded as its
2410 'device number', which is different from the per-device type
2411 devicep->target_id. */
2412 int device_num_val = (int) (devicep - &devices[0]);
2413 if (device_num_var->end - device_num_var->start != sizeof (int))
2414 {
2415 gomp_mutex_unlock (&devicep->lock);
2416 if (is_register_lock)
2417 gomp_mutex_unlock (&register_lock);
2418 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2419 "format");
2420 }
2421
2422 /* Copy device_num value to place on device memory, hereby actually
2423 designating its device number into effect. */
2424 gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2425 &device_num_val, sizeof (int), false, NULL);
2426 }
2427 }
2428
2429 free (target_table);
2430 }
2431
2432 /* Unload the mappings described by target_data from device DEVICE_P.
2433 The device must be locked. */
2434
2435 static void
2436 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2437 unsigned version,
2438 const void *host_table, const void *target_data)
2439 {
2440 void **host_func_table = ((void ***) host_table)[0];
2441 void **host_funcs_end = ((void ***) host_table)[1];
2442 void **host_var_table = ((void ***) host_table)[2];
2443 void **host_vars_end = ((void ***) host_table)[3];
2444
2445 /* The func table contains only addresses, the var table contains addresses
2446 and corresponding sizes. */
2447 int num_funcs = host_funcs_end - host_func_table;
2448 int num_vars = (host_vars_end - host_var_table) / 2;
2449
2450 struct splay_tree_key_s k;
2451 splay_tree_key node = NULL;
2452
2453 /* Find mapping at start of node array */
2454 if (num_funcs || num_vars)
2455 {
2456 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2457 : (uintptr_t) host_var_table[0]);
2458 k.host_end = k.host_start + 1;
2459 node = splay_tree_lookup (&devicep->mem_map, &k);
2460 }
2461
2462 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2463 {
2464 gomp_mutex_unlock (&devicep->lock);
2465 gomp_fatal ("image unload fail");
2466 }
2467
2468 /* Remove mappings from splay tree. */
2469 int i;
2470 for (i = 0; i < num_funcs; i++)
2471 {
2472 k.host_start = (uintptr_t) host_func_table[i];
2473 k.host_end = k.host_start + 1;
2474 splay_tree_remove (&devicep->mem_map, &k);
2475 }
2476
2477 /* Most significant bit of the size in host and target tables marks
2478 "omp declare target link" variables. */
2479 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2480 const uintptr_t size_mask = ~link_bit;
2481 bool is_tgt_unmapped = false;
2482
2483 for (i = 0; i < num_vars; i++)
2484 {
2485 k.host_start = (uintptr_t) host_var_table[i * 2];
2486 k.host_end
2487 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2488
2489 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2490 splay_tree_remove (&devicep->mem_map, &k);
2491 else
2492 {
2493 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2494 is_tgt_unmapped = gomp_remove_var (devicep, n);
2495 }
2496 }
2497
2498 if (node && !is_tgt_unmapped)
2499 {
2500 free (node->tgt);
2501 free (node);
2502 }
2503 }
2504
2505 /* This function should be called from every offload image while loading.
2506 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2507 the target, and TARGET_DATA needed by target plugin. */
2508
2509 void
2510 GOMP_offload_register_ver (unsigned version, const void *host_table,
2511 int target_type, const void *target_data)
2512 {
2513 int i;
2514
2515 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2516 gomp_fatal ("Library too old for offload (version %u < %u)",
2517 GOMP_VERSION, GOMP_VERSION_LIB (version));
2518
2519 gomp_mutex_lock (&register_lock);
2520
2521 /* Load image to all initialized devices. */
2522 for (i = 0; i < num_devices; i++)
2523 {
2524 struct gomp_device_descr *devicep = &devices[i];
2525 gomp_mutex_lock (&devicep->lock);
2526 if (devicep->type == target_type
2527 && devicep->state == GOMP_DEVICE_INITIALIZED)
2528 gomp_load_image_to_device (devicep, version,
2529 host_table, target_data, true);
2530 gomp_mutex_unlock (&devicep->lock);
2531 }
2532
2533 /* Insert image to array of pending images. */
2534 offload_images
2535 = gomp_realloc_unlock (offload_images,
2536 (num_offload_images + 1)
2537 * sizeof (struct offload_image_descr));
2538 offload_images[num_offload_images].version = version;
2539 offload_images[num_offload_images].type = target_type;
2540 offload_images[num_offload_images].host_table = host_table;
2541 offload_images[num_offload_images].target_data = target_data;
2542
2543 num_offload_images++;
2544 gomp_mutex_unlock (&register_lock);
2545 }
2546
2547 void
2548 GOMP_offload_register (const void *host_table, int target_type,
2549 const void *target_data)
2550 {
2551 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2552 }
2553
2554 /* This function should be called from every offload image while unloading.
2555 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2556 the target, and TARGET_DATA needed by target plugin. */
2557
2558 void
2559 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2560 int target_type, const void *target_data)
2561 {
2562 int i;
2563
2564 gomp_mutex_lock (&register_lock);
2565
2566 /* Unload image from all initialized devices. */
2567 for (i = 0; i < num_devices; i++)
2568 {
2569 struct gomp_device_descr *devicep = &devices[i];
2570 gomp_mutex_lock (&devicep->lock);
2571 if (devicep->type == target_type
2572 && devicep->state == GOMP_DEVICE_INITIALIZED)
2573 gomp_unload_image_from_device (devicep, version,
2574 host_table, target_data);
2575 gomp_mutex_unlock (&devicep->lock);
2576 }
2577
2578 /* Remove image from array of pending images. */
2579 for (i = 0; i < num_offload_images; i++)
2580 if (offload_images[i].target_data == target_data)
2581 {
2582 offload_images[i] = offload_images[--num_offload_images];
2583 break;
2584 }
2585
2586 gomp_mutex_unlock (&register_lock);
2587 }
2588
2589 void
2590 GOMP_offload_unregister (const void *host_table, int target_type,
2591 const void *target_data)
2592 {
2593 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2594 }
2595
2596 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2597 must be locked on entry, and remains locked on return. */
2598
2599 attribute_hidden void
2600 gomp_init_device (struct gomp_device_descr *devicep)
2601 {
2602 int i;
2603 if (!devicep->init_device_func (devicep->target_id))
2604 {
2605 gomp_mutex_unlock (&devicep->lock);
2606 gomp_fatal ("device initialization failed");
2607 }
2608
2609 unsigned int features = gomp_requires_mask;
2610 if (!devicep->supported_features_func (&features))
2611 {
2612 char buf[64], *end = buf + sizeof (buf), *p = buf;
2613 if (features & GOMP_REQUIRES_UNIFIED_ADDRESS)
2614 p += snprintf (p, end - p, "unified_address");
2615 if (features & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2616 p += snprintf (p, end - p, "%sunified_shared_memory",
2617 (p == buf ? "" : ", "));
2618 if (features & GOMP_REQUIRES_REVERSE_OFFLOAD)
2619 p += snprintf (p, end - p, "%sreverse_offload", (p == buf ? "" : ", "));
2620 gomp_error ("device does not support required features: %s", buf);
2621 }
2622
2623 /* Load to device all images registered by the moment. */
2624 for (i = 0; i < num_offload_images; i++)
2625 {
2626 struct offload_image_descr *image = &offload_images[i];
2627 if (image->type == devicep->type)
2628 gomp_load_image_to_device (devicep, image->version,
2629 image->host_table, image->target_data,
2630 false);
2631 }
2632
2633 /* Initialize OpenACC asynchronous queues. */
2634 goacc_init_asyncqueues (devicep);
2635
2636 devicep->state = GOMP_DEVICE_INITIALIZED;
2637 }
2638
2639 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2640 must be locked on entry, and remains locked on return. */
2641
2642 attribute_hidden bool
2643 gomp_fini_device (struct gomp_device_descr *devicep)
2644 {
2645 bool ret = goacc_fini_asyncqueues (devicep);
2646 ret &= devicep->fini_device_func (devicep->target_id);
2647 devicep->state = GOMP_DEVICE_FINALIZED;
2648 return ret;
2649 }
2650
2651 attribute_hidden void
2652 gomp_unload_device (struct gomp_device_descr *devicep)
2653 {
2654 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2655 {
2656 unsigned i;
2657
2658 /* Unload from device all images registered at the moment. */
2659 for (i = 0; i < num_offload_images; i++)
2660 {
2661 struct offload_image_descr *image = &offload_images[i];
2662 if (image->type == devicep->type)
2663 gomp_unload_image_from_device (devicep, image->version,
2664 image->host_table,
2665 image->target_data);
2666 }
2667 }
2668 }
2669
2670 /* Host fallback for GOMP_target{,_ext} routines. */
2671
2672 static void
2673 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2674 struct gomp_device_descr *devicep, void **args)
2675 {
2676 struct gomp_thread old_thr, *thr = gomp_thread ();
2677
2678 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2679 && devicep != NULL)
2680 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2681 "be used for offloading");
2682
2683 old_thr = *thr;
2684 memset (thr, '\0', sizeof (*thr));
2685 if (gomp_places_list)
2686 {
2687 thr->place = old_thr.place;
2688 thr->ts.place_partition_len = gomp_places_list_len;
2689 }
2690 if (args)
2691 while (*args)
2692 {
2693 intptr_t id = (intptr_t) *args++, val;
2694 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2695 val = (intptr_t) *args++;
2696 else
2697 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2698 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2699 continue;
2700 id &= GOMP_TARGET_ARG_ID_MASK;
2701 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2702 continue;
2703 val = val > INT_MAX ? INT_MAX : val;
2704 if (val)
2705 gomp_icv (true)->thread_limit_var = val;
2706 break;
2707 }
2708
2709 fn (hostaddrs);
2710 gomp_free_thread (thr);
2711 *thr = old_thr;
2712 }
2713
2714 /* Calculate alignment and size requirements of a private copy of data shared
2715 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2716
2717 static inline void
2718 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2719 unsigned short *kinds, size_t *tgt_align,
2720 size_t *tgt_size)
2721 {
2722 size_t i;
2723 for (i = 0; i < mapnum; i++)
2724 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2725 {
2726 size_t align = (size_t) 1 << (kinds[i] >> 8);
2727 if (*tgt_align < align)
2728 *tgt_align = align;
2729 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2730 *tgt_size += sizes[i];
2731 }
2732 }
2733
2734 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2735
2736 static inline void
2737 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2738 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2739 size_t tgt_size)
2740 {
2741 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2742 if (al)
2743 tgt += tgt_align - al;
2744 tgt_size = 0;
2745 size_t i;
2746 for (i = 0; i < mapnum; i++)
2747 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2748 {
2749 size_t align = (size_t) 1 << (kinds[i] >> 8);
2750 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2751 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2752 hostaddrs[i] = tgt + tgt_size;
2753 tgt_size = tgt_size + sizes[i];
2754 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2755 {
2756 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2757 ++i;
2758 }
2759 }
2760 }
2761
2762 /* Helper function of GOMP_target{,_ext} routines. */
2763
2764 static void *
2765 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2766 void (*host_fn) (void *))
2767 {
2768 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2769 return (void *) host_fn;
2770 else
2771 {
2772 gomp_mutex_lock (&devicep->lock);
2773 if (devicep->state == GOMP_DEVICE_FINALIZED)
2774 {
2775 gomp_mutex_unlock (&devicep->lock);
2776 return NULL;
2777 }
2778
2779 struct splay_tree_key_s k;
2780 k.host_start = (uintptr_t) host_fn;
2781 k.host_end = k.host_start + 1;
2782 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2783 gomp_mutex_unlock (&devicep->lock);
2784 if (tgt_fn == NULL)
2785 return NULL;
2786
2787 return (void *) tgt_fn->tgt_offset;
2788 }
2789 }
2790
2791 /* Called when encountering a target directive. If DEVICE
2792 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2793 GOMP_DEVICE_HOST_FALLBACK (or any value
2794 larger than last available hw device), use host fallback.
2795 FN is address of host code, UNUSED is part of the current ABI, but
2796 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2797 with MAPNUM entries, with addresses of the host objects,
2798 sizes of the host objects (resp. for pointer kind pointer bias
2799 and assumed sizeof (void *) size) and kinds. */
2800
2801 void
2802 GOMP_target (int device, void (*fn) (void *), const void *unused,
2803 size_t mapnum, void **hostaddrs, size_t *sizes,
2804 unsigned char *kinds)
2805 {
2806 struct gomp_device_descr *devicep = resolve_device (device);
2807
2808 void *fn_addr;
2809 if (devicep == NULL
2810 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2811 /* All shared memory devices should use the GOMP_target_ext function. */
2812 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2813 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2814 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2815
2816 htab_t refcount_set = htab_create (mapnum);
2817 struct target_mem_desc *tgt_vars
2818 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2819 &refcount_set, GOMP_MAP_VARS_TARGET);
2820 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2821 NULL);
2822 htab_clear (refcount_set);
2823 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2824 htab_free (refcount_set);
2825 }
2826
2827 static inline unsigned int
2828 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2829 {
2830 /* If we cannot run asynchronously, simply ignore nowait. */
2831 if (devicep != NULL && devicep->async_run_func == NULL)
2832 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2833
2834 return flags;
2835 }
2836
2837 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2838 and several arguments have been added:
2839 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2840 DEPEND is array of dependencies, see GOMP_task for details.
2841
2842 ARGS is a pointer to an array consisting of a variable number of both
2843 device-independent and device-specific arguments, which can take one two
2844 elements where the first specifies for which device it is intended, the type
2845 and optionally also the value. If the value is not present in the first
2846 one, the whole second element the actual value. The last element of the
2847 array is a single NULL. Among the device independent can be for example
2848 NUM_TEAMS and THREAD_LIMIT.
2849
2850 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2851 that value, or 1 if teams construct is not present, or 0, if
2852 teams construct does not have num_teams clause and so the choice is
2853 implementation defined, and -1 if it can't be determined on the host
2854 what value will GOMP_teams have on the device.
2855 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2856 body with that value, or 0, if teams construct does not have thread_limit
2857 clause or the teams construct is not present, or -1 if it can't be
2858 determined on the host what value will GOMP_teams have on the device. */
2859
2860 void
2861 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2862 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2863 unsigned int flags, void **depend, void **args)
2864 {
2865 struct gomp_device_descr *devicep = resolve_device (device);
2866 size_t tgt_align = 0, tgt_size = 0;
2867 bool fpc_done = false;
2868
2869 flags = clear_unsupported_flags (devicep, flags);
2870
2871 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2872 {
2873 struct gomp_thread *thr = gomp_thread ();
2874 /* Create a team if we don't have any around, as nowait
2875 target tasks make sense to run asynchronously even when
2876 outside of any parallel. */
2877 if (__builtin_expect (thr->ts.team == NULL, 0))
2878 {
2879 struct gomp_team *team = gomp_new_team (1);
2880 struct gomp_task *task = thr->task;
2881 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2882 team->prev_ts = thr->ts;
2883 thr->ts.team = team;
2884 thr->ts.team_id = 0;
2885 thr->ts.work_share = &team->work_shares[0];
2886 thr->ts.last_work_share = NULL;
2887 #ifdef HAVE_SYNC_BUILTINS
2888 thr->ts.single_count = 0;
2889 #endif
2890 thr->ts.static_trip = 0;
2891 thr->task = &team->implicit_task[0];
2892 gomp_init_task (thr->task, NULL, icv);
2893 if (task)
2894 {
2895 thr->task = task;
2896 gomp_end_task ();
2897 free (task);
2898 thr->task = &team->implicit_task[0];
2899 }
2900 else
2901 pthread_setspecific (gomp_thread_destructor, thr);
2902 }
2903 if (thr->ts.team
2904 && !thr->task->final_task)
2905 {
2906 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2907 sizes, kinds, flags, depend, args,
2908 GOMP_TARGET_TASK_BEFORE_MAP);
2909 return;
2910 }
2911 }
2912
2913 /* If there are depend clauses, but nowait is not present
2914 (or we are in a final task), block the parent task until the
2915 dependencies are resolved and then just continue with the rest
2916 of the function as if it is a merged task. */
2917 if (depend != NULL)
2918 {
2919 struct gomp_thread *thr = gomp_thread ();
2920 if (thr->task && thr->task->depend_hash)
2921 {
2922 /* If we might need to wait, copy firstprivate now. */
2923 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2924 &tgt_align, &tgt_size);
2925 if (tgt_align)
2926 {
2927 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2928 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2929 tgt_align, tgt_size);
2930 }
2931 fpc_done = true;
2932 gomp_task_maybe_wait_for_dependencies (depend);
2933 }
2934 }
2935
2936 void *fn_addr;
2937 if (devicep == NULL
2938 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2939 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2940 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2941 {
2942 if (!fpc_done)
2943 {
2944 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2945 &tgt_align, &tgt_size);
2946 if (tgt_align)
2947 {
2948 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2949 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2950 tgt_align, tgt_size);
2951 }
2952 }
2953 gomp_target_fallback (fn, hostaddrs, devicep, args);
2954 return;
2955 }
2956
2957 struct target_mem_desc *tgt_vars;
2958 htab_t refcount_set = NULL;
2959
2960 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2961 {
2962 if (!fpc_done)
2963 {
2964 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2965 &tgt_align, &tgt_size);
2966 if (tgt_align)
2967 {
2968 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2969 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2970 tgt_align, tgt_size);
2971 }
2972 }
2973 tgt_vars = NULL;
2974 }
2975 else
2976 {
2977 refcount_set = htab_create (mapnum);
2978 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2979 true, &refcount_set, GOMP_MAP_VARS_TARGET);
2980 }
2981 devicep->run_func (devicep->target_id, fn_addr,
2982 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2983 args);
2984 if (tgt_vars)
2985 {
2986 htab_clear (refcount_set);
2987 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2988 }
2989 if (refcount_set)
2990 htab_free (refcount_set);
2991 }
2992
2993 /* Host fallback for GOMP_target_data{,_ext} routines. */
2994
2995 static void
2996 gomp_target_data_fallback (struct gomp_device_descr *devicep)
2997 {
2998 struct gomp_task_icv *icv = gomp_icv (false);
2999
3000 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3001 && devicep != NULL)
3002 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3003 "be used for offloading");
3004
3005 if (icv->target_data)
3006 {
3007 /* Even when doing a host fallback, if there are any active
3008 #pragma omp target data constructs, need to remember the
3009 new #pragma omp target data, otherwise GOMP_target_end_data
3010 would get out of sync. */
3011 struct target_mem_desc *tgt
3012 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3013 NULL, GOMP_MAP_VARS_DATA);
3014 tgt->prev = icv->target_data;
3015 icv->target_data = tgt;
3016 }
3017 }
3018
3019 void
3020 GOMP_target_data (int device, const void *unused, size_t mapnum,
3021 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3022 {
3023 struct gomp_device_descr *devicep = resolve_device (device);
3024
3025 if (devicep == NULL
3026 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3027 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3028 return gomp_target_data_fallback (devicep);
3029
3030 struct target_mem_desc *tgt
3031 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3032 NULL, GOMP_MAP_VARS_DATA);
3033 struct gomp_task_icv *icv = gomp_icv (true);
3034 tgt->prev = icv->target_data;
3035 icv->target_data = tgt;
3036 }
3037
3038 void
3039 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3040 size_t *sizes, unsigned short *kinds)
3041 {
3042 struct gomp_device_descr *devicep = resolve_device (device);
3043
3044 if (devicep == NULL
3045 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3046 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3047 return gomp_target_data_fallback (devicep);
3048
3049 struct target_mem_desc *tgt
3050 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3051 NULL, GOMP_MAP_VARS_DATA);
3052 struct gomp_task_icv *icv = gomp_icv (true);
3053 tgt->prev = icv->target_data;
3054 icv->target_data = tgt;
3055 }
3056
3057 void
3058 GOMP_target_end_data (void)
3059 {
3060 struct gomp_task_icv *icv = gomp_icv (false);
3061 if (icv->target_data)
3062 {
3063 struct target_mem_desc *tgt = icv->target_data;
3064 icv->target_data = tgt->prev;
3065 gomp_unmap_vars (tgt, true, NULL);
3066 }
3067 }
3068
3069 void
3070 GOMP_target_update (int device, const void *unused, size_t mapnum,
3071 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3072 {
3073 struct gomp_device_descr *devicep = resolve_device (device);
3074
3075 if (devicep == NULL
3076 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3077 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3078 return;
3079
3080 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3081 }
3082
3083 void
3084 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3085 size_t *sizes, unsigned short *kinds,
3086 unsigned int flags, void **depend)
3087 {
3088 struct gomp_device_descr *devicep = resolve_device (device);
3089
3090 /* If there are depend clauses, but nowait is not present,
3091 block the parent task until the dependencies are resolved
3092 and then just continue with the rest of the function as if it
3093 is a merged task. Until we are able to schedule task during
3094 variable mapping or unmapping, ignore nowait if depend clauses
3095 are not present. */
3096 if (depend != NULL)
3097 {
3098 struct gomp_thread *thr = gomp_thread ();
3099 if (thr->task && thr->task->depend_hash)
3100 {
3101 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3102 && thr->ts.team
3103 && !thr->task->final_task)
3104 {
3105 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3106 mapnum, hostaddrs, sizes, kinds,
3107 flags | GOMP_TARGET_FLAG_UPDATE,
3108 depend, NULL, GOMP_TARGET_TASK_DATA))
3109 return;
3110 }
3111 else
3112 {
3113 struct gomp_team *team = thr->ts.team;
3114 /* If parallel or taskgroup has been cancelled, don't start new
3115 tasks. */
3116 if (__builtin_expect (gomp_cancel_var, 0) && team)
3117 {
3118 if (gomp_team_barrier_cancelled (&team->barrier))
3119 return;
3120 if (thr->task->taskgroup)
3121 {
3122 if (thr->task->taskgroup->cancelled)
3123 return;
3124 if (thr->task->taskgroup->workshare
3125 && thr->task->taskgroup->prev
3126 && thr->task->taskgroup->prev->cancelled)
3127 return;
3128 }
3129 }
3130
3131 gomp_task_maybe_wait_for_dependencies (depend);
3132 }
3133 }
3134 }
3135
3136 if (devicep == NULL
3137 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3138 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3139 return;
3140
3141 struct gomp_thread *thr = gomp_thread ();
3142 struct gomp_team *team = thr->ts.team;
3143 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3144 if (__builtin_expect (gomp_cancel_var, 0) && team)
3145 {
3146 if (gomp_team_barrier_cancelled (&team->barrier))
3147 return;
3148 if (thr->task->taskgroup)
3149 {
3150 if (thr->task->taskgroup->cancelled)
3151 return;
3152 if (thr->task->taskgroup->workshare
3153 && thr->task->taskgroup->prev
3154 && thr->task->taskgroup->prev->cancelled)
3155 return;
3156 }
3157 }
3158
3159 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
3160 }
3161
3162 static void
3163 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
3164 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3165 htab_t *refcount_set)
3166 {
3167 const int typemask = 0xff;
3168 size_t i;
3169 gomp_mutex_lock (&devicep->lock);
3170 if (devicep->state == GOMP_DEVICE_FINALIZED)
3171 {
3172 gomp_mutex_unlock (&devicep->lock);
3173 return;
3174 }
3175
3176 for (i = 0; i < mapnum; i++)
3177 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
3178 {
3179 struct splay_tree_key_s cur_node;
3180 cur_node.host_start = (uintptr_t) hostaddrs[i];
3181 cur_node.host_end = cur_node.host_start + sizeof (void *);
3182 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
3183
3184 if (n)
3185 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
3186 false, NULL);
3187 }
3188
3189 int nrmvars = 0;
3190 splay_tree_key remove_vars[mapnum];
3191
3192 for (i = 0; i < mapnum; i++)
3193 {
3194 struct splay_tree_key_s cur_node;
3195 unsigned char kind = kinds[i] & typemask;
3196 switch (kind)
3197 {
3198 case GOMP_MAP_FROM:
3199 case GOMP_MAP_ALWAYS_FROM:
3200 case GOMP_MAP_DELETE:
3201 case GOMP_MAP_RELEASE:
3202 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3203 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3204 cur_node.host_start = (uintptr_t) hostaddrs[i];
3205 cur_node.host_end = cur_node.host_start + sizes[i];
3206 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3207 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
3208 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
3209 : splay_tree_lookup (&devicep->mem_map, &cur_node);
3210 if (!k)
3211 continue;
3212
3213 bool delete_p = (kind == GOMP_MAP_DELETE
3214 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
3215 bool do_copy, do_remove;
3216 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
3217 &do_remove);
3218
3219 if ((kind == GOMP_MAP_FROM && do_copy)
3220 || kind == GOMP_MAP_ALWAYS_FROM)
3221 {
3222 if (k->aux && k->aux->attach_count)
3223 {
3224 /* We have to be careful not to overwrite still attached
3225 pointers during the copyback to host. */
3226 uintptr_t addr = k->host_start;
3227 while (addr < k->host_end)
3228 {
3229 size_t i = (addr - k->host_start) / sizeof (void *);
3230 if (k->aux->attach_count[i] == 0)
3231 gomp_copy_dev2host (devicep, NULL, (void *) addr,
3232 (void *) (k->tgt->tgt_start
3233 + k->tgt_offset
3234 + addr - k->host_start),
3235 sizeof (void *));
3236 addr += sizeof (void *);
3237 }
3238 }
3239 else
3240 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3241 (void *) (k->tgt->tgt_start + k->tgt_offset
3242 + cur_node.host_start
3243 - k->host_start),
3244 cur_node.host_end - cur_node.host_start);
3245 }
3246
3247 /* Structure elements lists are removed altogether at once, which
3248 may cause immediate deallocation of the target_mem_desc, causing
3249 errors if we still have following element siblings to copy back.
3250 While we're at it, it also seems more disciplined to simply
3251 queue all removals together for processing below.
3252
3253 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3254 not have this problem, since they maintain an additional
3255 tgt->refcount = 1 reference to the target_mem_desc to start with.
3256 */
3257 if (do_remove)
3258 remove_vars[nrmvars++] = k;
3259 break;
3260
3261 case GOMP_MAP_DETACH:
3262 break;
3263 default:
3264 gomp_mutex_unlock (&devicep->lock);
3265 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3266 kind);
3267 }
3268 }
3269
3270 for (int i = 0; i < nrmvars; i++)
3271 gomp_remove_var (devicep, remove_vars[i]);
3272
3273 gomp_mutex_unlock (&devicep->lock);
3274 }
3275
3276 void
3277 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3278 size_t *sizes, unsigned short *kinds,
3279 unsigned int flags, void **depend)
3280 {
3281 struct gomp_device_descr *devicep = resolve_device (device);
3282
3283 /* If there are depend clauses, but nowait is not present,
3284 block the parent task until the dependencies are resolved
3285 and then just continue with the rest of the function as if it
3286 is a merged task. Until we are able to schedule task during
3287 variable mapping or unmapping, ignore nowait if depend clauses
3288 are not present. */
3289 if (depend != NULL)
3290 {
3291 struct gomp_thread *thr = gomp_thread ();
3292 if (thr->task && thr->task->depend_hash)
3293 {
3294 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3295 && thr->ts.team
3296 && !thr->task->final_task)
3297 {
3298 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3299 mapnum, hostaddrs, sizes, kinds,
3300 flags, depend, NULL,
3301 GOMP_TARGET_TASK_DATA))
3302 return;
3303 }
3304 else
3305 {
3306 struct gomp_team *team = thr->ts.team;
3307 /* If parallel or taskgroup has been cancelled, don't start new
3308 tasks. */
3309 if (__builtin_expect (gomp_cancel_var, 0) && team)
3310 {
3311 if (gomp_team_barrier_cancelled (&team->barrier))
3312 return;
3313 if (thr->task->taskgroup)
3314 {
3315 if (thr->task->taskgroup->cancelled)
3316 return;
3317 if (thr->task->taskgroup->workshare
3318 && thr->task->taskgroup->prev
3319 && thr->task->taskgroup->prev->cancelled)
3320 return;
3321 }
3322 }
3323
3324 gomp_task_maybe_wait_for_dependencies (depend);
3325 }
3326 }
3327 }
3328
3329 if (devicep == NULL
3330 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3331 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3332 return;
3333
3334 struct gomp_thread *thr = gomp_thread ();
3335 struct gomp_team *team = thr->ts.team;
3336 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3337 if (__builtin_expect (gomp_cancel_var, 0) && team)
3338 {
3339 if (gomp_team_barrier_cancelled (&team->barrier))
3340 return;
3341 if (thr->task->taskgroup)
3342 {
3343 if (thr->task->taskgroup->cancelled)
3344 return;
3345 if (thr->task->taskgroup->workshare
3346 && thr->task->taskgroup->prev
3347 && thr->task->taskgroup->prev->cancelled)
3348 return;
3349 }
3350 }
3351
3352 htab_t refcount_set = htab_create (mapnum);
3353
3354 /* The variables are mapped separately such that they can be released
3355 independently. */
3356 size_t i, j;
3357 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3358 for (i = 0; i < mapnum; i++)
3359 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3360 {
3361 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
3362 &kinds[i], true, &refcount_set,
3363 GOMP_MAP_VARS_ENTER_DATA);
3364 i += sizes[i];
3365 }
3366 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
3367 {
3368 for (j = i + 1; j < mapnum; j++)
3369 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
3370 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
3371 break;
3372 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
3373 &kinds[i], true, &refcount_set,
3374 GOMP_MAP_VARS_ENTER_DATA);
3375 i += j - i - 1;
3376 }
3377 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
3378 {
3379 /* An attach operation must be processed together with the mapped
3380 base-pointer list item. */
3381 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3382 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3383 i += 1;
3384 }
3385 else
3386 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3387 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3388 else
3389 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
3390 htab_free (refcount_set);
3391 }
3392
3393 bool
3394 gomp_target_task_fn (void *data)
3395 {
3396 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
3397 struct gomp_device_descr *devicep = ttask->devicep;
3398
3399 if (ttask->fn != NULL)
3400 {
3401 void *fn_addr;
3402 if (devicep == NULL
3403 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3404 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3405 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3406 {
3407 ttask->state = GOMP_TARGET_TASK_FALLBACK;
3408 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
3409 ttask->args);
3410 return false;
3411 }
3412
3413 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3414 {
3415 if (ttask->tgt)
3416 gomp_unmap_vars (ttask->tgt, true, NULL);
3417 return false;
3418 }
3419
3420 void *actual_arguments;
3421 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3422 {
3423 ttask->tgt = NULL;
3424 actual_arguments = ttask->hostaddrs;
3425 }
3426 else
3427 {
3428 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3429 NULL, ttask->sizes, ttask->kinds, true,
3430 NULL, GOMP_MAP_VARS_TARGET);
3431 actual_arguments = (void *) ttask->tgt->tgt_start;
3432 }
3433 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3434
3435 assert (devicep->async_run_func);
3436 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3437 ttask->args, (void *) ttask);
3438 return true;
3439 }
3440 else if (devicep == NULL
3441 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3442 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3443 return false;
3444
3445 size_t i;
3446 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
3447 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3448 ttask->kinds, true);
3449 else
3450 {
3451 htab_t refcount_set = htab_create (ttask->mapnum);
3452 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3453 for (i = 0; i < ttask->mapnum; i++)
3454 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3455 {
3456 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3457 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3458 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3459 i += ttask->sizes[i];
3460 }
3461 else
3462 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3463 &ttask->kinds[i], true, &refcount_set,
3464 GOMP_MAP_VARS_ENTER_DATA);
3465 else
3466 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3467 ttask->kinds, &refcount_set);
3468 htab_free (refcount_set);
3469 }
3470 return false;
3471 }
3472
3473 void
3474 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3475 {
3476 if (thread_limit)
3477 {
3478 struct gomp_task_icv *icv = gomp_icv (true);
3479 icv->thread_limit_var
3480 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3481 }
3482 (void) num_teams;
3483 }
3484
3485 bool
3486 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
3487 unsigned int thread_limit, bool first)
3488 {
3489 struct gomp_thread *thr = gomp_thread ();
3490 if (first)
3491 {
3492 if (thread_limit)
3493 {
3494 struct gomp_task_icv *icv = gomp_icv (true);
3495 icv->thread_limit_var
3496 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3497 }
3498 (void) num_teams_high;
3499 if (num_teams_low == 0)
3500 num_teams_low = 1;
3501 thr->num_teams = num_teams_low - 1;
3502 thr->team_num = 0;
3503 }
3504 else if (thr->team_num == thr->num_teams)
3505 return false;
3506 else
3507 ++thr->team_num;
3508 return true;
3509 }
3510
3511 void *
3512 omp_target_alloc (size_t size, int device_num)
3513 {
3514 if (device_num == gomp_get_num_devices ())
3515 return malloc (size);
3516
3517 if (device_num < 0)
3518 return NULL;
3519
3520 struct gomp_device_descr *devicep = resolve_device (device_num);
3521 if (devicep == NULL)
3522 return NULL;
3523
3524 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3525 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3526 return malloc (size);
3527
3528 gomp_mutex_lock (&devicep->lock);
3529 void *ret = devicep->alloc_func (devicep->target_id, size);
3530 gomp_mutex_unlock (&devicep->lock);
3531 return ret;
3532 }
3533
3534 void
3535 omp_target_free (void *device_ptr, int device_num)
3536 {
3537 if (device_ptr == NULL)
3538 return;
3539
3540 if (device_num == gomp_get_num_devices ())
3541 {
3542 free (device_ptr);
3543 return;
3544 }
3545
3546 if (device_num < 0)
3547 return;
3548
3549 struct gomp_device_descr *devicep = resolve_device (device_num);
3550 if (devicep == NULL)
3551 return;
3552
3553 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3554 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3555 {
3556 free (device_ptr);
3557 return;
3558 }
3559
3560 gomp_mutex_lock (&devicep->lock);
3561 gomp_free_device_memory (devicep, device_ptr);
3562 gomp_mutex_unlock (&devicep->lock);
3563 }
3564
3565 void *
3566 gomp_usm_alloc (size_t size, int device_num)
3567 {
3568 if (device_num == gomp_get_num_devices ())
3569 return malloc (size);
3570
3571 struct gomp_device_descr *devicep = resolve_device (device_num);
3572 if (devicep == NULL)
3573 return NULL;
3574
3575 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3576 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3577 return malloc (size);
3578
3579 void *ret = NULL;
3580 gomp_mutex_lock (&devicep->lock);
3581 if (devicep->usm_alloc_func)
3582 ret = devicep->usm_alloc_func (devicep->target_id, size);
3583 gomp_mutex_unlock (&devicep->lock);
3584 return ret;
3585 }
3586
3587 void
3588 gomp_usm_free (void *device_ptr, int device_num)
3589 {
3590 if (device_ptr == NULL)
3591 return;
3592
3593 if (device_num == gomp_get_num_devices ())
3594 {
3595 free (device_ptr);
3596 return;
3597 }
3598
3599 struct gomp_device_descr *devicep = resolve_device (device_num);
3600 if (devicep == NULL)
3601 return;
3602
3603 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3604 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3605 {
3606 free (device_ptr);
3607 return;
3608 }
3609
3610 gomp_mutex_lock (&devicep->lock);
3611 if (devicep->usm_free_func
3612 && !devicep->usm_free_func (devicep->target_id, device_ptr))
3613 {
3614 gomp_mutex_unlock (&devicep->lock);
3615 gomp_fatal ("error in freeing device memory block at %p", device_ptr);
3616 }
3617 gomp_mutex_unlock (&devicep->lock);
3618 }
3619
3620 int
3621 omp_target_is_present (const void *ptr, int device_num)
3622 {
3623 if (ptr == NULL)
3624 return 1;
3625
3626 if (device_num == gomp_get_num_devices ())
3627 return 1;
3628
3629 if (device_num < 0)
3630 return 0;
3631
3632 struct gomp_device_descr *devicep = resolve_device (device_num);
3633 if (devicep == NULL)
3634 return 0;
3635
3636 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3637 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3638 return 1;
3639
3640 gomp_mutex_lock (&devicep->lock);
3641 struct splay_tree_s *mem_map = &devicep->mem_map;
3642 struct splay_tree_key_s cur_node;
3643
3644 cur_node.host_start = (uintptr_t) ptr;
3645 cur_node.host_end = cur_node.host_start;
3646 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3647 int ret = n != NULL;
3648 gomp_mutex_unlock (&devicep->lock);
3649 return ret;
3650 }
3651
3652 int
3653 omp_target_memcpy (void *dst, const void *src, size_t length,
3654 size_t dst_offset, size_t src_offset, int dst_device_num,
3655 int src_device_num)
3656 {
3657 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3658 bool ret;
3659
3660 if (dst_device_num != gomp_get_num_devices ())
3661 {
3662 if (dst_device_num < 0)
3663 return EINVAL;
3664
3665 dst_devicep = resolve_device (dst_device_num);
3666 if (dst_devicep == NULL)
3667 return EINVAL;
3668
3669 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3670 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3671 dst_devicep = NULL;
3672 }
3673 if (src_device_num != num_devices_openmp)
3674 {
3675 if (src_device_num < 0)
3676 return EINVAL;
3677
3678 src_devicep = resolve_device (src_device_num);
3679 if (src_devicep == NULL)
3680 return EINVAL;
3681
3682 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3683 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3684 src_devicep = NULL;
3685 }
3686 if (src_devicep == NULL && dst_devicep == NULL)
3687 {
3688 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3689 return 0;
3690 }
3691 if (src_devicep == NULL)
3692 {
3693 gomp_mutex_lock (&dst_devicep->lock);
3694 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3695 (char *) dst + dst_offset,
3696 (char *) src + src_offset, length);
3697 gomp_mutex_unlock (&dst_devicep->lock);
3698 return (ret ? 0 : EINVAL);
3699 }
3700 if (dst_devicep == NULL)
3701 {
3702 gomp_mutex_lock (&src_devicep->lock);
3703 ret = src_devicep->dev2host_func (src_devicep->target_id,
3704 (char *) dst + dst_offset,
3705 (char *) src + src_offset, length);
3706 gomp_mutex_unlock (&src_devicep->lock);
3707 return (ret ? 0 : EINVAL);
3708 }
3709 if (src_devicep == dst_devicep)
3710 {
3711 gomp_mutex_lock (&src_devicep->lock);
3712 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3713 (char *) dst + dst_offset,
3714 (char *) src + src_offset, length);
3715 gomp_mutex_unlock (&src_devicep->lock);
3716 return (ret ? 0 : EINVAL);
3717 }
3718 return EINVAL;
3719 }
3720
3721 static int
3722 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
3723 int num_dims, const size_t *volume,
3724 const size_t *dst_offsets,
3725 const size_t *src_offsets,
3726 const size_t *dst_dimensions,
3727 const size_t *src_dimensions,
3728 struct gomp_device_descr *dst_devicep,
3729 struct gomp_device_descr *src_devicep)
3730 {
3731 size_t dst_slice = element_size;
3732 size_t src_slice = element_size;
3733 size_t j, dst_off, src_off, length;
3734 int i, ret;
3735
3736 if (num_dims == 1)
3737 {
3738 if (__builtin_mul_overflow (element_size, volume[0], &length)
3739 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3740 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3741 return EINVAL;
3742 if (dst_devicep == NULL && src_devicep == NULL)
3743 {
3744 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3745 length);
3746 ret = 1;
3747 }
3748 else if (src_devicep == NULL)
3749 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3750 (char *) dst + dst_off,
3751 (const char *) src + src_off,
3752 length);
3753 else if (dst_devicep == NULL)
3754 ret = src_devicep->dev2host_func (src_devicep->target_id,
3755 (char *) dst + dst_off,
3756 (const char *) src + src_off,
3757 length);
3758 else if (src_devicep == dst_devicep)
3759 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3760 (char *) dst + dst_off,
3761 (const char *) src + src_off,
3762 length);
3763 else
3764 ret = 0;
3765 return ret ? 0 : EINVAL;
3766 }
3767
3768 /* FIXME: it would be nice to have some plugin function to handle
3769 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3770 be handled in the generic recursion below, and for host-host it
3771 should be used even for any num_dims >= 2. */
3772
3773 for (i = 1; i < num_dims; i++)
3774 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3775 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3776 return EINVAL;
3777 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3778 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3779 return EINVAL;
3780 for (j = 0; j < volume[0]; j++)
3781 {
3782 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
3783 (const char *) src + src_off,
3784 element_size, num_dims - 1,
3785 volume + 1, dst_offsets + 1,
3786 src_offsets + 1, dst_dimensions + 1,
3787 src_dimensions + 1, dst_devicep,
3788 src_devicep);
3789 if (ret)
3790 return ret;
3791 dst_off += dst_slice;
3792 src_off += src_slice;
3793 }
3794 return 0;
3795 }
3796
3797 int
3798 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
3799 int num_dims, const size_t *volume,
3800 const size_t *dst_offsets,
3801 const size_t *src_offsets,
3802 const size_t *dst_dimensions,
3803 const size_t *src_dimensions,
3804 int dst_device_num, int src_device_num)
3805 {
3806 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3807
3808 if (!dst && !src)
3809 return INT_MAX;
3810
3811 if (dst_device_num != gomp_get_num_devices ())
3812 {
3813 if (dst_device_num < 0)
3814 return EINVAL;
3815
3816 dst_devicep = resolve_device (dst_device_num);
3817 if (dst_devicep == NULL)
3818 return EINVAL;
3819
3820 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3821 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3822 dst_devicep = NULL;
3823 }
3824 if (src_device_num != num_devices_openmp)
3825 {
3826 if (src_device_num < 0)
3827 return EINVAL;
3828
3829 src_devicep = resolve_device (src_device_num);
3830 if (src_devicep == NULL)
3831 return EINVAL;
3832
3833 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3834 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3835 src_devicep = NULL;
3836 }
3837
3838 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3839 return EINVAL;
3840
3841 if (src_devicep)
3842 gomp_mutex_lock (&src_devicep->lock);
3843 else if (dst_devicep)
3844 gomp_mutex_lock (&dst_devicep->lock);
3845 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3846 volume, dst_offsets, src_offsets,
3847 dst_dimensions, src_dimensions,
3848 dst_devicep, src_devicep);
3849 if (src_devicep)
3850 gomp_mutex_unlock (&src_devicep->lock);
3851 else if (dst_devicep)
3852 gomp_mutex_unlock (&dst_devicep->lock);
3853 return ret;
3854 }
3855
3856 int
3857 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3858 size_t size, size_t device_offset, int device_num)
3859 {
3860 if (device_num == gomp_get_num_devices ())
3861 return EINVAL;
3862
3863 if (device_num < 0)
3864 return EINVAL;
3865
3866 struct gomp_device_descr *devicep = resolve_device (device_num);
3867 if (devicep == NULL)
3868 return EINVAL;
3869
3870 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3871 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3872 return EINVAL;
3873
3874 gomp_mutex_lock (&devicep->lock);
3875
3876 struct splay_tree_s *mem_map = &devicep->mem_map;
3877 struct splay_tree_key_s cur_node;
3878 int ret = EINVAL;
3879
3880 cur_node.host_start = (uintptr_t) host_ptr;
3881 cur_node.host_end = cur_node.host_start + size;
3882 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3883 if (n)
3884 {
3885 if (n->tgt->tgt_start + n->tgt_offset
3886 == (uintptr_t) device_ptr + device_offset
3887 && n->host_start <= cur_node.host_start
3888 && n->host_end >= cur_node.host_end)
3889 ret = 0;
3890 }
3891 else
3892 {
3893 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3894 tgt->array = gomp_malloc (sizeof (*tgt->array));
3895 tgt->refcount = 1;
3896 tgt->tgt_start = 0;
3897 tgt->tgt_end = 0;
3898 tgt->to_free = NULL;
3899 tgt->prev = NULL;
3900 tgt->list_count = 0;
3901 tgt->device_descr = devicep;
3902 splay_tree_node array = tgt->array;
3903 splay_tree_key k = &array->key;
3904 k->host_start = cur_node.host_start;
3905 k->host_end = cur_node.host_end;
3906 k->tgt = tgt;
3907 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3908 k->refcount = REFCOUNT_INFINITY;
3909 k->dynamic_refcount = 0;
3910 k->aux = NULL;
3911 array->left = NULL;
3912 array->right = NULL;
3913 splay_tree_insert (&devicep->mem_map, array);
3914 ret = 0;
3915 }
3916 gomp_mutex_unlock (&devicep->lock);
3917 return ret;
3918 }
3919
3920 int
3921 omp_target_disassociate_ptr (const void *ptr, int device_num)
3922 {
3923 if (device_num == gomp_get_num_devices ())
3924 return EINVAL;
3925
3926 if (device_num < 0)
3927 return EINVAL;
3928
3929 struct gomp_device_descr *devicep = resolve_device (device_num);
3930 if (devicep == NULL)
3931 return EINVAL;
3932
3933 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3934 return EINVAL;
3935
3936 gomp_mutex_lock (&devicep->lock);
3937
3938 struct splay_tree_s *mem_map = &devicep->mem_map;
3939 struct splay_tree_key_s cur_node;
3940 int ret = EINVAL;
3941
3942 cur_node.host_start = (uintptr_t) ptr;
3943 cur_node.host_end = cur_node.host_start;
3944 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3945 if (n
3946 && n->host_start == cur_node.host_start
3947 && n->refcount == REFCOUNT_INFINITY
3948 && n->tgt->tgt_start == 0
3949 && n->tgt->to_free == NULL
3950 && n->tgt->refcount == 1
3951 && n->tgt->list_count == 0)
3952 {
3953 splay_tree_remove (&devicep->mem_map, n);
3954 gomp_unmap_tgt (n->tgt);
3955 ret = 0;
3956 }
3957
3958 gomp_mutex_unlock (&devicep->lock);
3959 return ret;
3960 }
3961
3962 void *
3963 omp_get_mapped_ptr (const void *ptr, int device_num)
3964 {
3965 if (device_num < 0 || device_num > gomp_get_num_devices ())
3966 return NULL;
3967
3968 if (device_num == omp_get_initial_device ())
3969 return (void *) ptr;
3970
3971 struct gomp_device_descr *devicep = resolve_device (device_num);
3972 if (devicep == NULL)
3973 return NULL;
3974
3975 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3976 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3977 return (void *) ptr;
3978
3979 gomp_mutex_lock (&devicep->lock);
3980
3981 struct splay_tree_s *mem_map = &devicep->mem_map;
3982 struct splay_tree_key_s cur_node;
3983 void *ret = NULL;
3984
3985 cur_node.host_start = (uintptr_t) ptr;
3986 cur_node.host_end = cur_node.host_start;
3987 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3988
3989 if (n)
3990 {
3991 uintptr_t offset = cur_node.host_start - n->host_start;
3992 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
3993 }
3994
3995 gomp_mutex_unlock (&devicep->lock);
3996
3997 return ret;
3998 }
3999
4000 int
4001 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
4002 {
4003 if (device_num < 0 || device_num > gomp_get_num_devices ())
4004 return false;
4005
4006 if (device_num == gomp_get_num_devices ())
4007 return true;
4008
4009 struct gomp_device_descr *devicep = resolve_device (device_num);
4010 if (devicep == NULL)
4011 return false;
4012
4013 /* TODO: Unified shared memory must be handled when available. */
4014
4015 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
4016 }
4017
4018 int
4019 omp_pause_resource (omp_pause_resource_t kind, int device_num)
4020 {
4021 (void) kind;
4022 if (device_num == gomp_get_num_devices ())
4023 return gomp_pause_host ();
4024 if (device_num < 0 || device_num >= num_devices_openmp)
4025 return -1;
4026 /* Do nothing for target devices for now. */
4027 return 0;
4028 }
4029
4030 int
4031 omp_pause_resource_all (omp_pause_resource_t kind)
4032 {
4033 (void) kind;
4034 if (gomp_pause_host ())
4035 return -1;
4036 /* Do nothing for target devices for now. */
4037 return 0;
4038 }
4039
4040 ialias (omp_pause_resource)
4041 ialias (omp_pause_resource_all)
4042
4043 bool
4044 GOMP_evaluate_target_device (int device_num, const char *kind,
4045 const char *arch, const char *isa)
4046 {
4047 bool result = true;
4048
4049 if (device_num < 0)
4050 device_num = omp_get_default_device ();
4051
4052 if (kind && strcmp (kind, "any") == 0)
4053 kind = NULL;
4054
4055 gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s",
4056 __FUNCTION__, device_num, kind, arch, isa);
4057
4058 if (omp_get_device_num () == device_num)
4059 result = GOMP_evaluate_current_device (kind, arch, isa);
4060 else
4061 {
4062 if (!omp_is_initial_device ())
4063 /* Accelerators are not expected to know about other devices. */
4064 result = false;
4065 else
4066 {
4067 struct gomp_device_descr *device = resolve_device (device_num);
4068 if (device == NULL)
4069 result = false;
4070 else if (device->evaluate_device_func)
4071 result = device->evaluate_device_func (device_num, kind, arch,
4072 isa);
4073 }
4074 }
4075
4076 gomp_debug (1, " -> %s\n", result ? "true" : "false");
4077 return result;
4078 }
4079
4080 #ifdef PLUGIN_SUPPORT
4081
4082 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
4083 in PLUGIN_NAME.
4084 The handles of the found functions are stored in the corresponding fields
4085 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4086
4087 static bool
4088 gomp_load_plugin_for_device (struct gomp_device_descr *device,
4089 const char *plugin_name)
4090 {
4091 const char *err = NULL, *last_missing = NULL;
4092
4093 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
4094 if (!plugin_handle)
4095 #if OFFLOAD_DEFAULTED
4096 return 0;
4097 #else
4098 goto dl_fail;
4099 #endif
4100
4101 /* Check if all required functions are available in the plugin and store
4102 their handlers. None of the symbols can legitimately be NULL,
4103 so we don't need to check dlerror all the time. */
4104 #define DLSYM(f) \
4105 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4106 goto dl_fail
4107 /* Similar, but missing functions are not an error. Return false if
4108 failed, true otherwise. */
4109 #define DLSYM_OPT(f, n) \
4110 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4111 || (last_missing = #n, 0))
4112
4113 DLSYM (version);
4114 if (device->version_func () != GOMP_VERSION)
4115 {
4116 err = "plugin version mismatch";
4117 goto fail;
4118 }
4119
4120 DLSYM (get_name);
4121 DLSYM (get_caps);
4122 DLSYM (get_type);
4123 DLSYM (get_num_devices);
4124 DLSYM (init_device);
4125 DLSYM (fini_device);
4126 DLSYM (supported_features);
4127 DLSYM (load_image);
4128 DLSYM (unload_image);
4129 DLSYM (alloc);
4130 DLSYM (free);
4131 DLSYM_OPT (usm_alloc, usm_alloc);
4132 DLSYM_OPT (usm_free, usm_free);
4133 DLSYM_OPT (is_usm_ptr, is_usm_ptr);
4134 DLSYM (dev2host);
4135 DLSYM (host2dev);
4136 DLSYM (evaluate_device);
4137 device->capabilities = device->get_caps_func ();
4138 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4139 {
4140 DLSYM (run);
4141 DLSYM_OPT (async_run, async_run);
4142 DLSYM_OPT (can_run, can_run);
4143 DLSYM (dev2dev);
4144 }
4145 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
4146 {
4147 if (!DLSYM_OPT (openacc.exec, openacc_exec)
4148 || !DLSYM_OPT (openacc.create_thread_data,
4149 openacc_create_thread_data)
4150 || !DLSYM_OPT (openacc.destroy_thread_data,
4151 openacc_destroy_thread_data)
4152 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
4153 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
4154 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
4155 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
4156 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
4157 || !DLSYM_OPT (openacc.async.queue_callback,
4158 openacc_async_queue_callback)
4159 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
4160 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
4161 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
4162 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
4163 {
4164 /* Require all the OpenACC handlers if we have
4165 GOMP_OFFLOAD_CAP_OPENACC_200. */
4166 err = "plugin missing OpenACC handler function";
4167 goto fail;
4168 }
4169
4170 unsigned cuda = 0;
4171 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
4172 openacc_cuda_get_current_device);
4173 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
4174 openacc_cuda_get_current_context);
4175 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
4176 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
4177 if (cuda && cuda != 4)
4178 {
4179 /* Make sure all the CUDA functions are there if any of them are. */
4180 err = "plugin missing OpenACC CUDA handler function";
4181 goto fail;
4182 }
4183 }
4184 #undef DLSYM
4185 #undef DLSYM_OPT
4186
4187 return 1;
4188
4189 dl_fail:
4190 err = dlerror ();
4191 fail:
4192 gomp_error ("while loading %s: %s", plugin_name, err);
4193 if (last_missing)
4194 gomp_error ("missing function was %s", last_missing);
4195 if (plugin_handle)
4196 dlclose (plugin_handle);
4197
4198 return 0;
4199 }
4200
4201 /* This function finalizes all initialized devices. */
4202
4203 static void
4204 gomp_target_fini (void)
4205 {
4206 int i;
4207 for (i = 0; i < num_devices; i++)
4208 {
4209 bool ret = true;
4210 struct gomp_device_descr *devicep = &devices[i];
4211 gomp_mutex_lock (&devicep->lock);
4212 if (devicep->state == GOMP_DEVICE_INITIALIZED)
4213 ret = gomp_fini_device (devicep);
4214 gomp_mutex_unlock (&devicep->lock);
4215 if (!ret)
4216 gomp_fatal ("device finalization failed");
4217 }
4218 }
4219
4220 /* This function initializes the runtime for offloading.
4221 It parses the list of offload plugins, and tries to load these.
4222 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
4223 will be set, and the array DEVICES initialized, containing descriptors for
4224 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
4225 by the others. */
4226
4227 static void
4228 gomp_target_init (void)
4229 {
4230 const char *prefix ="libgomp-plugin-";
4231 const char *suffix = SONAME_SUFFIX (1);
4232 const char *cur, *next;
4233 char *plugin_name;
4234 int i, new_num_devs;
4235 int num_devs = 0, num_devs_openmp;
4236 struct gomp_device_descr *devs = NULL;
4237
4238 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
4239 return;
4240
4241 gomp_requires_mask = 0;
4242 const unsigned int *mask_ptr = __requires_mask_table;
4243 bool error_emitted = false;
4244 while (mask_ptr != __requires_mask_table_end)
4245 {
4246 if (gomp_requires_mask == 0)
4247 gomp_requires_mask = *mask_ptr;
4248 else if (gomp_requires_mask != *mask_ptr)
4249 {
4250 if (!error_emitted)
4251 {
4252 gomp_error ("requires-directive clause inconsistency between "
4253 "compilation units detected");
4254 error_emitted = true;
4255 }
4256 /* This is inconsistent, but still merge to query for all features
4257 later. */
4258 gomp_requires_mask |= *mask_ptr;
4259 }
4260 mask_ptr++;
4261 }
4262
4263 cur = OFFLOAD_PLUGINS;
4264 if (*cur)
4265 do
4266 {
4267 struct gomp_device_descr current_device;
4268 size_t prefix_len, suffix_len, cur_len;
4269
4270 next = strchr (cur, ',');
4271
4272 prefix_len = strlen (prefix);
4273 cur_len = next ? next - cur : strlen (cur);
4274 suffix_len = strlen (suffix);
4275
4276 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
4277 if (!plugin_name)
4278 {
4279 num_devs = 0;
4280 break;
4281 }
4282
4283 memcpy (plugin_name, prefix, prefix_len);
4284 memcpy (plugin_name + prefix_len, cur, cur_len);
4285 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
4286
4287 if (gomp_load_plugin_for_device (&current_device, plugin_name))
4288 {
4289 new_num_devs = current_device.get_num_devices_func ();
4290 if (new_num_devs >= 1)
4291 {
4292 /* Augment DEVICES and NUM_DEVICES. */
4293
4294 devs = realloc (devs, (num_devs + new_num_devs)
4295 * sizeof (struct gomp_device_descr));
4296 if (!devs)
4297 {
4298 num_devs = 0;
4299 free (plugin_name);
4300 break;
4301 }
4302
4303 current_device.name = current_device.get_name_func ();
4304 /* current_device.capabilities has already been set. */
4305 current_device.type = current_device.get_type_func ();
4306 current_device.mem_map.root = NULL;
4307 current_device.state = GOMP_DEVICE_UNINITIALIZED;
4308 for (i = 0; i < new_num_devs; i++)
4309 {
4310 current_device.target_id = i;
4311 devs[num_devs] = current_device;
4312 gomp_mutex_init (&devs[num_devs].lock);
4313 num_devs++;
4314 }
4315 }
4316 }
4317
4318 free (plugin_name);
4319 cur = next + 1;
4320 }
4321 while (next);
4322
4323 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
4324 NUM_DEVICES_OPENMP. */
4325 struct gomp_device_descr *devs_s
4326 = malloc (num_devs * sizeof (struct gomp_device_descr));
4327 if (!devs_s)
4328 {
4329 num_devs = 0;
4330 free (devs);
4331 devs = NULL;
4332 }
4333 num_devs_openmp = 0;
4334 for (i = 0; i < num_devs; i++)
4335 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4336 devs_s[num_devs_openmp++] = devs[i];
4337 int num_devs_after_openmp = num_devs_openmp;
4338 for (i = 0; i < num_devs; i++)
4339 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4340 devs_s[num_devs_after_openmp++] = devs[i];
4341 free (devs);
4342 devs = devs_s;
4343
4344 for (i = 0; i < num_devs; i++)
4345 {
4346 /* The 'devices' array can be moved (by the realloc call) until we have
4347 found all the plugins, so registering with the OpenACC runtime (which
4348 takes a copy of the pointer argument) must be delayed until now. */
4349 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
4350 goacc_register (&devs[i]);
4351 }
4352
4353 num_devices = num_devs;
4354 num_devices_openmp = num_devs_openmp;
4355 devices = devs;
4356 if (atexit (gomp_target_fini) != 0)
4357 gomp_fatal ("atexit failed");
4358 }
4359
4360 #else /* PLUGIN_SUPPORT */
4361 /* If dlfcn.h is unavailable we always fallback to host execution.
4362 GOMP_target* routines are just stubs for this case. */
4363 static void
4364 gomp_target_init (void)
4365 {
4366 }
4367 #endif /* PLUGIN_SUPPORT */
This page took 0.219493 seconds and 5 git commands to generate.