1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "oacc-plugin.h"
32 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
45 #include "plugin-suffix.h"
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock
;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr
{
61 enum offload_target_type type
;
62 const void *host_table
;
63 const void *target_data
;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr
*offload_images
;
69 /* Total number of offload images. */
70 static int num_offload_images
;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr
*devices
;
75 /* Total number of available devices. */
76 static int num_devices
;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp
;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
84 gomp_realloc_unlock (void *old
, size_t size
)
86 void *ret
= realloc (old
, size
);
89 gomp_mutex_unlock (®ister_lock
);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp
;
108 static struct gomp_device_descr
*
109 resolve_device (int device_id
)
111 if (device_id
== GOMP_DEVICE_ICV
)
113 struct gomp_task_icv
*icv
= gomp_icv (false);
114 device_id
= icv
->default_device_var
;
117 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
120 gomp_mutex_lock (&devices
[device_id
].lock
);
121 if (!devices
[device_id
].is_initialized
)
122 gomp_init_device (&devices
[device_id
]);
123 gomp_mutex_unlock (&devices
[device_id
].lock
);
125 return &devices
[device_id
];
129 static inline splay_tree_key
130 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
132 if (key
->host_start
!= key
->host_end
)
133 return splay_tree_lookup (mem_map
, key
);
136 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
141 n
= splay_tree_lookup (mem_map
, key
);
145 return splay_tree_lookup (mem_map
, key
);
148 static inline splay_tree_key
149 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
151 if (key
->host_start
!= key
->host_end
)
152 return splay_tree_lookup (mem_map
, key
);
155 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
160 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
161 gomp_map_0len_lookup found oldn for newn.
162 Helper function of gomp_map_vars. */
165 gomp_map_vars_existing (struct gomp_device_descr
*devicep
, splay_tree_key oldn
,
166 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
170 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
171 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
172 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
173 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
175 if ((kind
& GOMP_MAP_FLAG_FORCE
)
176 || oldn
->host_start
> newn
->host_start
177 || oldn
->host_end
< newn
->host_end
)
179 gomp_mutex_unlock (&devicep
->lock
);
180 gomp_fatal ("Trying to map into device [%p..%p) object when "
181 "[%p..%p) is already mapped",
182 (void *) newn
->host_start
, (void *) newn
->host_end
,
183 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
186 if (GOMP_MAP_ALWAYS_TO_P (kind
))
187 devicep
->host2dev_func (devicep
->target_id
,
188 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
189 + newn
->host_start
- oldn
->host_start
),
190 (void *) newn
->host_start
,
191 newn
->host_end
- newn
->host_start
);
192 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
197 get_kind (bool short_mapkind
, void *kinds
, int idx
)
199 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
200 : ((unsigned char *) kinds
)[idx
];
204 gomp_map_pointer (struct target_mem_desc
*tgt
, uintptr_t host_ptr
,
205 uintptr_t target_offset
, uintptr_t bias
)
207 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
208 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
209 struct splay_tree_key_s cur_node
;
211 cur_node
.host_start
= host_ptr
;
212 if (cur_node
.host_start
== (uintptr_t) NULL
)
214 cur_node
.tgt_offset
= (uintptr_t) NULL
;
215 /* FIXME: see comment about coalescing host/dev transfers below. */
216 devicep
->host2dev_func (devicep
->target_id
,
217 (void *) (tgt
->tgt_start
+ target_offset
),
218 (void *) &cur_node
.tgt_offset
,
222 /* Add bias to the pointer value. */
223 cur_node
.host_start
+= bias
;
224 cur_node
.host_end
= cur_node
.host_start
;
225 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
228 gomp_mutex_unlock (&devicep
->lock
);
229 gomp_fatal ("Pointer target of array section wasn't mapped");
231 cur_node
.host_start
-= n
->host_start
;
233 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
234 /* At this point tgt_offset is target address of the
235 array section. Now subtract bias to get what we want
236 to initialize the pointer with. */
237 cur_node
.tgt_offset
-= bias
;
238 /* FIXME: see comment about coalescing host/dev transfers below. */
239 devicep
->host2dev_func (devicep
->target_id
,
240 (void *) (tgt
->tgt_start
+ target_offset
),
241 (void *) &cur_node
.tgt_offset
,
246 gomp_map_fields_existing (struct target_mem_desc
*tgt
, splay_tree_key n
,
247 size_t first
, size_t i
, void **hostaddrs
,
248 size_t *sizes
, void *kinds
)
250 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
251 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
252 struct splay_tree_key_s cur_node
;
254 const bool short_mapkind
= true;
255 const int typemask
= short_mapkind
? 0xff : 0x7;
257 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
258 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
259 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
260 kind
= get_kind (short_mapkind
, kinds
, i
);
263 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
265 gomp_map_vars_existing (devicep
, n2
, &cur_node
,
266 &tgt
->list
[i
], kind
& typemask
);
271 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
273 cur_node
.host_start
--;
274 n2
= splay_tree_lookup (mem_map
, &cur_node
);
275 cur_node
.host_start
++;
278 && n2
->host_start
- n
->host_start
279 == n2
->tgt_offset
- n
->tgt_offset
)
281 gomp_map_vars_existing (devicep
, n2
, &cur_node
, &tgt
->list
[i
],
287 n2
= splay_tree_lookup (mem_map
, &cur_node
);
291 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
293 gomp_map_vars_existing (devicep
, n2
, &cur_node
, &tgt
->list
[i
],
298 gomp_mutex_unlock (&devicep
->lock
);
299 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
300 "other mapped elements from the same structure weren't mapped "
301 "together with it", (void *) cur_node
.host_start
,
302 (void *) cur_node
.host_end
);
305 static inline uintptr_t
306 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
308 if (tgt
->list
[i
].key
!= NULL
)
309 return tgt
->list
[i
].key
->tgt
->tgt_start
310 + tgt
->list
[i
].key
->tgt_offset
311 + tgt
->list
[i
].offset
;
312 if (tgt
->list
[i
].offset
== ~(uintptr_t) 0)
313 return (uintptr_t) hostaddrs
[i
];
314 if (tgt
->list
[i
].offset
== ~(uintptr_t) 1)
316 if (tgt
->list
[i
].offset
== ~(uintptr_t) 2)
317 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
318 + tgt
->list
[i
+ 1].key
->tgt_offset
319 + tgt
->list
[i
+ 1].offset
320 + (uintptr_t) hostaddrs
[i
]
321 - (uintptr_t) hostaddrs
[i
+ 1];
322 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
325 attribute_hidden
struct target_mem_desc
*
326 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
327 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
328 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
330 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
331 bool has_firstprivate
= false;
332 const int rshift
= short_mapkind
? 8 : 3;
333 const int typemask
= short_mapkind
? 0xff : 0x7;
334 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
335 struct splay_tree_key_s cur_node
;
336 struct target_mem_desc
*tgt
337 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
338 tgt
->list_count
= mapnum
;
339 tgt
->refcount
= pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
? 0 : 1;
340 tgt
->device_descr
= devicep
;
349 tgt_align
= sizeof (void *);
351 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
353 size_t align
= 4 * sizeof (void *);
355 tgt_size
= mapnum
* sizeof (void *);
358 gomp_mutex_lock (&devicep
->lock
);
360 for (i
= 0; i
< mapnum
; i
++)
362 int kind
= get_kind (short_mapkind
, kinds
, i
);
363 if (hostaddrs
[i
] == NULL
364 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
366 tgt
->list
[i
].key
= NULL
;
367 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
370 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
372 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
373 cur_node
.host_end
= cur_node
.host_start
;
374 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
377 gomp_mutex_unlock (&devicep
->lock
);
378 gomp_fatal ("use_device_ptr pointer wasn't mapped");
380 cur_node
.host_start
-= n
->host_start
;
382 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
383 + cur_node
.host_start
);
384 tgt
->list
[i
].key
= NULL
;
385 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
388 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
390 size_t first
= i
+ 1;
391 size_t last
= i
+ sizes
[i
];
392 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
393 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
395 tgt
->list
[i
].key
= NULL
;
396 tgt
->list
[i
].offset
= ~(uintptr_t) 2;
397 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
400 size_t align
= (size_t) 1 << (kind
>> rshift
);
401 if (tgt_align
< align
)
403 tgt_size
-= (uintptr_t) hostaddrs
[first
]
404 - (uintptr_t) hostaddrs
[i
];
405 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
406 tgt_size
+= cur_node
.host_end
- (uintptr_t) hostaddrs
[i
];
407 not_found_cnt
+= last
- i
;
408 for (i
= first
; i
<= last
; i
++)
409 tgt
->list
[i
].key
= NULL
;
413 for (i
= first
; i
<= last
; i
++)
414 gomp_map_fields_existing (tgt
, n
, first
, i
, hostaddrs
,
419 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
421 tgt
->list
[i
].key
= NULL
;
422 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
423 has_firstprivate
= true;
426 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
427 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
428 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
430 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
431 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
433 tgt
->list
[i
].key
= NULL
;
435 size_t align
= (size_t) 1 << (kind
>> rshift
);
436 if (tgt_align
< align
)
438 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
439 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
440 has_firstprivate
= true;
444 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
446 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
449 tgt
->list
[i
].key
= NULL
;
450 tgt
->list
[i
].offset
= ~(uintptr_t) 1;
455 n
= splay_tree_lookup (mem_map
, &cur_node
);
457 gomp_map_vars_existing (devicep
, n
, &cur_node
, &tgt
->list
[i
],
461 tgt
->list
[i
].key
= NULL
;
463 size_t align
= (size_t) 1 << (kind
>> rshift
);
465 if (tgt_align
< align
)
467 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
468 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
469 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
472 for (j
= i
+ 1; j
< mapnum
; j
++)
473 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
, j
)
476 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
477 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
478 > cur_node
.host_end
))
482 tgt
->list
[j
].key
= NULL
;
493 gomp_mutex_unlock (&devicep
->lock
);
494 gomp_fatal ("unexpected aggregation");
496 tgt
->to_free
= devaddrs
[0];
497 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
498 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
500 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
502 /* Allocate tgt_align aligned tgt_size block of memory. */
503 /* FIXME: Perhaps change interface to allocate properly aligned
505 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
506 tgt_size
+ tgt_align
- 1);
507 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
508 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
509 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
519 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
520 tgt_size
= mapnum
* sizeof (void *);
523 if (not_found_cnt
|| has_firstprivate
)
526 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
527 splay_tree_node array
= tgt
->array
;
528 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
529 uintptr_t field_tgt_base
= 0;
531 for (i
= 0; i
< mapnum
; i
++)
532 if (tgt
->list
[i
].key
== NULL
)
534 int kind
= get_kind (short_mapkind
, kinds
, i
);
535 if (hostaddrs
[i
] == NULL
)
537 switch (kind
& typemask
)
539 size_t align
, len
, first
, last
;
541 case GOMP_MAP_FIRSTPRIVATE
:
542 align
= (size_t) 1 << (kind
>> rshift
);
543 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
544 tgt
->list
[i
].offset
= tgt_size
;
546 devicep
->host2dev_func (devicep
->target_id
,
547 (void *) (tgt
->tgt_start
+ tgt_size
),
548 (void *) hostaddrs
[i
], len
);
551 case GOMP_MAP_FIRSTPRIVATE_INT
:
552 case GOMP_MAP_USE_DEVICE_PTR
:
553 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
555 case GOMP_MAP_STRUCT
:
558 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
559 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
561 if (tgt
->list
[first
].key
!= NULL
)
563 n
= splay_tree_lookup (mem_map
, &cur_node
);
566 size_t align
= (size_t) 1 << (kind
>> rshift
);
567 tgt_size
-= (uintptr_t) hostaddrs
[first
]
568 - (uintptr_t) hostaddrs
[i
];
569 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
570 tgt_size
+= (uintptr_t) hostaddrs
[first
]
571 - (uintptr_t) hostaddrs
[i
];
572 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
573 field_tgt_offset
= tgt_size
;
574 field_tgt_clear
= last
;
575 tgt_size
+= cur_node
.host_end
576 - (uintptr_t) hostaddrs
[first
];
579 for (i
= first
; i
<= last
; i
++)
580 gomp_map_fields_existing (tgt
, n
, first
, i
, hostaddrs
,
584 case GOMP_MAP_ALWAYS_POINTER
:
585 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
586 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
587 n
= splay_tree_lookup (mem_map
, &cur_node
);
589 || n
->host_start
> cur_node
.host_start
590 || n
->host_end
< cur_node
.host_end
)
592 gomp_mutex_unlock (&devicep
->lock
);
593 gomp_fatal ("always pointer not mapped");
595 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
596 != GOMP_MAP_ALWAYS_POINTER
)
597 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
598 if (cur_node
.tgt_offset
)
599 cur_node
.tgt_offset
-= sizes
[i
];
600 devicep
->host2dev_func (devicep
->target_id
,
601 (void *) (n
->tgt
->tgt_start
603 + cur_node
.host_start
605 (void *) &cur_node
.tgt_offset
,
607 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
608 + cur_node
.host_start
- n
->host_start
;
613 splay_tree_key k
= &array
->key
;
614 k
->host_start
= (uintptr_t) hostaddrs
[i
];
615 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
616 k
->host_end
= k
->host_start
+ sizes
[i
];
618 k
->host_end
= k
->host_start
+ sizeof (void *);
619 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
621 gomp_map_vars_existing (devicep
, n
, k
, &tgt
->list
[i
],
625 size_t align
= (size_t) 1 << (kind
>> rshift
);
626 tgt
->list
[i
].key
= k
;
628 if (field_tgt_clear
!= ~(size_t) 0)
630 k
->tgt_offset
= k
->host_start
- field_tgt_base
632 if (i
== field_tgt_clear
)
633 field_tgt_clear
= ~(size_t) 0;
637 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
638 k
->tgt_offset
= tgt_size
;
639 tgt_size
+= k
->host_end
- k
->host_start
;
641 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
642 tgt
->list
[i
].always_copy_from
643 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
644 tgt
->list
[i
].offset
= 0;
645 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
647 k
->async_refcount
= 0;
651 splay_tree_insert (mem_map
, array
);
652 switch (kind
& typemask
)
656 case GOMP_MAP_FORCE_ALLOC
:
657 case GOMP_MAP_FORCE_FROM
:
658 case GOMP_MAP_ALWAYS_FROM
:
661 case GOMP_MAP_TOFROM
:
662 case GOMP_MAP_FORCE_TO
:
663 case GOMP_MAP_FORCE_TOFROM
:
664 case GOMP_MAP_ALWAYS_TO
:
665 case GOMP_MAP_ALWAYS_TOFROM
:
666 /* FIXME: Perhaps add some smarts, like if copying
667 several adjacent fields from host to target, use some
668 host buffer to avoid sending each var individually. */
669 devicep
->host2dev_func (devicep
->target_id
,
670 (void *) (tgt
->tgt_start
672 (void *) k
->host_start
,
673 k
->host_end
- k
->host_start
);
675 case GOMP_MAP_POINTER
:
676 gomp_map_pointer (tgt
, (uintptr_t) *(void **) k
->host_start
,
677 k
->tgt_offset
, sizes
[i
]);
679 case GOMP_MAP_TO_PSET
:
680 /* FIXME: see above FIXME comment. */
681 devicep
->host2dev_func (devicep
->target_id
,
682 (void *) (tgt
->tgt_start
684 (void *) k
->host_start
,
685 k
->host_end
- k
->host_start
);
687 for (j
= i
+ 1; j
< mapnum
; j
++)
688 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
,
692 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
693 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
698 tgt
->list
[j
].key
= k
;
699 tgt
->list
[j
].copy_from
= false;
700 tgt
->list
[j
].always_copy_from
= false;
701 if (k
->refcount
!= REFCOUNT_INFINITY
)
703 gomp_map_pointer (tgt
,
704 (uintptr_t) *(void **) hostaddrs
[j
],
706 + ((uintptr_t) hostaddrs
[j
]
712 case GOMP_MAP_FORCE_PRESENT
:
714 /* We already looked up the memory region above and it
716 size_t size
= k
->host_end
- k
->host_start
;
717 gomp_mutex_unlock (&devicep
->lock
);
718 #ifdef HAVE_INTTYPES_H
719 gomp_fatal ("present clause: !acc_is_present (%p, "
720 "%"PRIu64
" (0x%"PRIx64
"))",
721 (void *) k
->host_start
,
722 (uint64_t) size
, (uint64_t) size
);
724 gomp_fatal ("present clause: !acc_is_present (%p, "
725 "%lu (0x%lx))", (void *) k
->host_start
,
726 (unsigned long) size
, (unsigned long) size
);
730 case GOMP_MAP_FORCE_DEVICEPTR
:
731 assert (k
->host_end
- k
->host_start
== sizeof (void *));
733 devicep
->host2dev_func (devicep
->target_id
,
734 (void *) (tgt
->tgt_start
736 (void *) k
->host_start
,
740 gomp_mutex_unlock (&devicep
->lock
);
741 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
749 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
751 for (i
= 0; i
< mapnum
; i
++)
753 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
754 /* FIXME: see above FIXME comment. */
755 devicep
->host2dev_func (devicep
->target_id
,
756 (void *) (tgt
->tgt_start
757 + i
* sizeof (void *)),
758 (void *) &cur_node
.tgt_offset
,
763 /* If the variable from "omp target enter data" map-list was already mapped,
764 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
766 if (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
&& tgt
->refcount
== 0)
772 gomp_mutex_unlock (&devicep
->lock
);
777 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
779 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
781 tgt
->device_descr
->free_func (tgt
->device_descr
->target_id
, tgt
->to_free
);
787 /* Decrease the refcount for a set of mapped variables, and queue asychronous
788 copies from the device back to the host after any work that has been issued.
789 Because the regions are still "live", increment an asynchronous reference
790 count to indicate that they should not be unmapped from host-side data
791 structures until the asynchronous copy has completed. */
793 attribute_hidden
void
794 gomp_copy_from_async (struct target_mem_desc
*tgt
)
796 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
799 gomp_mutex_lock (&devicep
->lock
);
801 for (i
= 0; i
< tgt
->list_count
; i
++)
802 if (tgt
->list
[i
].key
== NULL
)
804 else if (tgt
->list
[i
].key
->refcount
> 1)
806 tgt
->list
[i
].key
->refcount
--;
807 tgt
->list
[i
].key
->async_refcount
++;
811 splay_tree_key k
= tgt
->list
[i
].key
;
812 if (tgt
->list
[i
].copy_from
)
813 devicep
->dev2host_func (devicep
->target_id
, (void *) k
->host_start
,
814 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
),
815 k
->host_end
- k
->host_start
);
818 gomp_mutex_unlock (&devicep
->lock
);
821 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
822 variables back from device to host: if it is false, it is assumed that this
823 has been done already, i.e. by gomp_copy_from_async above. */
825 attribute_hidden
void
826 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
828 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
830 if (tgt
->list_count
== 0)
836 gomp_mutex_lock (&devicep
->lock
);
839 for (i
= 0; i
< tgt
->list_count
; i
++)
841 splay_tree_key k
= tgt
->list
[i
].key
;
845 bool do_unmap
= false;
846 if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
848 else if (k
->refcount
== 1)
850 if (k
->async_refcount
> 0)
859 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
860 || tgt
->list
[i
].always_copy_from
)
861 devicep
->dev2host_func (devicep
->target_id
,
862 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
863 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
864 + tgt
->list
[i
].offset
),
865 tgt
->list
[i
].length
);
868 splay_tree_remove (&devicep
->mem_map
, k
);
869 if (k
->tgt
->refcount
> 1)
872 gomp_unmap_tgt (k
->tgt
);
876 if (tgt
->refcount
> 1)
879 gomp_unmap_tgt (tgt
);
881 gomp_mutex_unlock (&devicep
->lock
);
885 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
886 size_t *sizes
, void *kinds
, bool short_mapkind
)
889 struct splay_tree_key_s cur_node
;
890 const int typemask
= short_mapkind
? 0xff : 0x7;
898 gomp_mutex_lock (&devicep
->lock
);
899 for (i
= 0; i
< mapnum
; i
++)
902 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
903 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
904 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
907 int kind
= get_kind (short_mapkind
, kinds
, i
);
908 if (n
->host_start
> cur_node
.host_start
909 || n
->host_end
< cur_node
.host_end
)
911 gomp_mutex_unlock (&devicep
->lock
);
912 gomp_fatal ("Trying to update [%p..%p) object when "
913 "only [%p..%p) is mapped",
914 (void *) cur_node
.host_start
,
915 (void *) cur_node
.host_end
,
916 (void *) n
->host_start
,
917 (void *) n
->host_end
);
919 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
920 devicep
->host2dev_func (devicep
->target_id
,
921 (void *) (n
->tgt
->tgt_start
923 + cur_node
.host_start
925 (void *) cur_node
.host_start
,
926 cur_node
.host_end
- cur_node
.host_start
);
927 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
928 devicep
->dev2host_func (devicep
->target_id
,
929 (void *) cur_node
.host_start
,
930 (void *) (n
->tgt
->tgt_start
932 + cur_node
.host_start
934 cur_node
.host_end
- cur_node
.host_start
);
937 gomp_mutex_unlock (&devicep
->lock
);
940 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
941 And insert to splay tree the mapping between addresses from HOST_TABLE and
942 from loaded target image. We rely in the host and device compiler
943 emitting variable and functions in the same order. */
946 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
947 const void *host_table
, const void *target_data
,
948 bool is_register_lock
)
950 void **host_func_table
= ((void ***) host_table
)[0];
951 void **host_funcs_end
= ((void ***) host_table
)[1];
952 void **host_var_table
= ((void ***) host_table
)[2];
953 void **host_vars_end
= ((void ***) host_table
)[3];
955 /* The func table contains only addresses, the var table contains addresses
956 and corresponding sizes. */
957 int num_funcs
= host_funcs_end
- host_func_table
;
958 int num_vars
= (host_vars_end
- host_var_table
) / 2;
960 /* Load image to device and get target addresses for the image. */
961 struct addr_pair
*target_table
= NULL
;
962 int i
, num_target_entries
;
965 = devicep
->load_image_func (devicep
->target_id
, version
,
966 target_data
, &target_table
);
968 if (num_target_entries
!= num_funcs
+ num_vars
)
970 gomp_mutex_unlock (&devicep
->lock
);
971 if (is_register_lock
)
972 gomp_mutex_unlock (®ister_lock
);
973 gomp_fatal ("Cannot map target functions or variables"
974 " (expected %u, have %u)", num_funcs
+ num_vars
,
978 /* Insert host-target address mapping into splay tree. */
979 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
980 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
981 tgt
->refcount
= REFCOUNT_INFINITY
;
987 tgt
->device_descr
= devicep
;
988 splay_tree_node array
= tgt
->array
;
990 for (i
= 0; i
< num_funcs
; i
++)
992 splay_tree_key k
= &array
->key
;
993 k
->host_start
= (uintptr_t) host_func_table
[i
];
994 k
->host_end
= k
->host_start
+ 1;
996 k
->tgt_offset
= target_table
[i
].start
;
997 k
->refcount
= REFCOUNT_INFINITY
;
998 k
->async_refcount
= 0;
1000 array
->right
= NULL
;
1001 splay_tree_insert (&devicep
->mem_map
, array
);
1005 for (i
= 0; i
< num_vars
; i
++)
1007 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1008 if (target_var
->end
- target_var
->start
1009 != (uintptr_t) host_var_table
[i
* 2 + 1])
1011 gomp_mutex_unlock (&devicep
->lock
);
1012 if (is_register_lock
)
1013 gomp_mutex_unlock (®ister_lock
);
1014 gomp_fatal ("Can't map target variables (size mismatch)");
1017 splay_tree_key k
= &array
->key
;
1018 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1019 k
->host_end
= k
->host_start
+ (uintptr_t) host_var_table
[i
* 2 + 1];
1021 k
->tgt_offset
= target_var
->start
;
1022 k
->refcount
= REFCOUNT_INFINITY
;
1023 k
->async_refcount
= 0;
1025 array
->right
= NULL
;
1026 splay_tree_insert (&devicep
->mem_map
, array
);
1030 free (target_table
);
1033 /* Unload the mappings described by target_data from device DEVICE_P.
1034 The device must be locked. */
1037 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1039 const void *host_table
, const void *target_data
)
1041 void **host_func_table
= ((void ***) host_table
)[0];
1042 void **host_funcs_end
= ((void ***) host_table
)[1];
1043 void **host_var_table
= ((void ***) host_table
)[2];
1044 void **host_vars_end
= ((void ***) host_table
)[3];
1046 /* The func table contains only addresses, the var table contains addresses
1047 and corresponding sizes. */
1048 int num_funcs
= host_funcs_end
- host_func_table
;
1049 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1052 struct splay_tree_key_s k
;
1053 splay_tree_key node
= NULL
;
1055 /* Find mapping at start of node array */
1056 if (num_funcs
|| num_vars
)
1058 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1059 : (uintptr_t) host_var_table
[0]);
1060 k
.host_end
= k
.host_start
+ 1;
1061 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1064 devicep
->unload_image_func (devicep
->target_id
, version
, target_data
);
1066 /* Remove mappings from splay tree. */
1067 for (j
= 0; j
< num_funcs
; j
++)
1069 k
.host_start
= (uintptr_t) host_func_table
[j
];
1070 k
.host_end
= k
.host_start
+ 1;
1071 splay_tree_remove (&devicep
->mem_map
, &k
);
1074 for (j
= 0; j
< num_vars
; j
++)
1076 k
.host_start
= (uintptr_t) host_var_table
[j
* 2];
1077 k
.host_end
= k
.host_start
+ (uintptr_t) host_var_table
[j
* 2 + 1];
1078 splay_tree_remove (&devicep
->mem_map
, &k
);
1088 /* This function should be called from every offload image while loading.
1089 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1090 the target, and TARGET_DATA needed by target plugin. */
1093 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1094 int target_type
, const void *target_data
)
1098 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1099 gomp_fatal ("Library too old for offload (version %u < %u)",
1100 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1102 gomp_mutex_lock (®ister_lock
);
1104 /* Load image to all initialized devices. */
1105 for (i
= 0; i
< num_devices
; i
++)
1107 struct gomp_device_descr
*devicep
= &devices
[i
];
1108 gomp_mutex_lock (&devicep
->lock
);
1109 if (devicep
->type
== target_type
&& devicep
->is_initialized
)
1110 gomp_load_image_to_device (devicep
, version
,
1111 host_table
, target_data
, true);
1112 gomp_mutex_unlock (&devicep
->lock
);
1115 /* Insert image to array of pending images. */
1117 = gomp_realloc_unlock (offload_images
,
1118 (num_offload_images
+ 1)
1119 * sizeof (struct offload_image_descr
));
1120 offload_images
[num_offload_images
].version
= version
;
1121 offload_images
[num_offload_images
].type
= target_type
;
1122 offload_images
[num_offload_images
].host_table
= host_table
;
1123 offload_images
[num_offload_images
].target_data
= target_data
;
1125 num_offload_images
++;
1126 gomp_mutex_unlock (®ister_lock
);
1130 GOMP_offload_register (const void *host_table
, int target_type
,
1131 const void *target_data
)
1133 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1136 /* This function should be called from every offload image while unloading.
1137 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1138 the target, and TARGET_DATA needed by target plugin. */
1141 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1142 int target_type
, const void *target_data
)
1146 gomp_mutex_lock (®ister_lock
);
1148 /* Unload image from all initialized devices. */
1149 for (i
= 0; i
< num_devices
; i
++)
1151 struct gomp_device_descr
*devicep
= &devices
[i
];
1152 gomp_mutex_lock (&devicep
->lock
);
1153 if (devicep
->type
== target_type
&& devicep
->is_initialized
)
1154 gomp_unload_image_from_device (devicep
, version
,
1155 host_table
, target_data
);
1156 gomp_mutex_unlock (&devicep
->lock
);
1159 /* Remove image from array of pending images. */
1160 for (i
= 0; i
< num_offload_images
; i
++)
1161 if (offload_images
[i
].target_data
== target_data
)
1163 offload_images
[i
] = offload_images
[--num_offload_images
];
1167 gomp_mutex_unlock (®ister_lock
);
1171 GOMP_offload_unregister (const void *host_table
, int target_type
,
1172 const void *target_data
)
1174 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1177 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1178 must be locked on entry, and remains locked on return. */
1180 attribute_hidden
void
1181 gomp_init_device (struct gomp_device_descr
*devicep
)
1184 devicep
->init_device_func (devicep
->target_id
);
1186 /* Load to device all images registered by the moment. */
1187 for (i
= 0; i
< num_offload_images
; i
++)
1189 struct offload_image_descr
*image
= &offload_images
[i
];
1190 if (image
->type
== devicep
->type
)
1191 gomp_load_image_to_device (devicep
, image
->version
,
1192 image
->host_table
, image
->target_data
,
1196 devicep
->is_initialized
= true;
1199 attribute_hidden
void
1200 gomp_unload_device (struct gomp_device_descr
*devicep
)
1202 if (devicep
->is_initialized
)
1206 /* Unload from device all images registered at the moment. */
1207 for (i
= 0; i
< num_offload_images
; i
++)
1209 struct offload_image_descr
*image
= &offload_images
[i
];
1210 if (image
->type
== devicep
->type
)
1211 gomp_unload_image_from_device (devicep
, image
->version
,
1213 image
->target_data
);
1218 /* Free address mapping tables. MM must be locked on entry, and remains locked
1221 attribute_hidden
void
1222 gomp_free_memmap (struct splay_tree_s
*mem_map
)
1224 while (mem_map
->root
)
1226 struct target_mem_desc
*tgt
= mem_map
->root
->key
.tgt
;
1228 splay_tree_remove (mem_map
, &mem_map
->root
->key
);
1234 /* This function de-initializes the target device, specified by DEVICEP.
1235 DEVICEP must be locked on entry, and remains locked on return. */
1237 attribute_hidden
void
1238 gomp_fini_device (struct gomp_device_descr
*devicep
)
1240 if (devicep
->is_initialized
)
1241 devicep
->fini_device_func (devicep
->target_id
);
1243 devicep
->is_initialized
= false;
1246 /* Host fallback for GOMP_target{,_ext} routines. */
1249 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
1251 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1253 memset (thr
, '\0', sizeof (*thr
));
1254 if (gomp_places_list
)
1256 thr
->place
= old_thr
.place
;
1257 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1260 gomp_free_thread (thr
);
1264 /* Host fallback with firstprivate map-type handling. */
1267 gomp_target_fallback_firstprivate (void (*fn
) (void *), size_t mapnum
,
1268 void **hostaddrs
, size_t *sizes
,
1269 unsigned short *kinds
)
1271 size_t i
, tgt_align
= 0, tgt_size
= 0;
1273 for (i
= 0; i
< mapnum
; i
++)
1274 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1276 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1277 if (tgt_align
< align
)
1279 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1280 tgt_size
+= sizes
[i
];
1284 tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1285 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
1287 tgt
+= tgt_align
- al
;
1289 for (i
= 0; i
< mapnum
; i
++)
1290 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1292 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1293 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1294 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
1295 hostaddrs
[i
] = tgt
+ tgt_size
;
1296 tgt_size
= tgt_size
+ sizes
[i
];
1299 gomp_target_fallback (fn
, hostaddrs
);
1302 /* Helper function of GOMP_target{,_ext} routines. */
1305 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
1306 void (*host_fn
) (void *))
1308 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
1309 return (void *) host_fn
;
1312 gomp_mutex_lock (&devicep
->lock
);
1313 struct splay_tree_key_s k
;
1314 k
.host_start
= (uintptr_t) host_fn
;
1315 k
.host_end
= k
.host_start
+ 1;
1316 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1317 gomp_mutex_unlock (&devicep
->lock
);
1319 gomp_fatal ("Target function wasn't mapped");
1321 return (void *) tgt_fn
->tgt_offset
;
1325 /* Called when encountering a target directive. If DEVICE
1326 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1327 GOMP_DEVICE_HOST_FALLBACK (or any value
1328 larger than last available hw device), use host fallback.
1329 FN is address of host code, UNUSED is part of the current ABI, but
1330 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1331 with MAPNUM entries, with addresses of the host objects,
1332 sizes of the host objects (resp. for pointer kind pointer bias
1333 and assumed sizeof (void *) size) and kinds. */
1336 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
1337 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
1338 unsigned char *kinds
)
1340 struct gomp_device_descr
*devicep
= resolve_device (device
);
1343 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1344 return gomp_target_fallback (fn
, hostaddrs
);
1346 void *fn_addr
= gomp_get_target_fn_addr (devicep
, fn
);
1348 struct target_mem_desc
*tgt_vars
1349 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1350 GOMP_MAP_VARS_TARGET
);
1351 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
);
1352 gomp_unmap_vars (tgt_vars
, true);
1355 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1356 and several arguments have been added:
1357 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1358 DEPEND is array of dependencies, see GOMP_task for details.
1359 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1360 that value, or 1 if teams construct is not present, or 0, if
1361 teams construct does not have num_teams clause and so the choice is
1362 implementation defined, and -1 if it can't be determined on the host
1363 what value will GOMP_teams have on the device.
1364 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1365 body with that value, or 0, if teams construct does not have thread_limit
1366 clause or the teams construct is not present, or -1 if it can't be
1367 determined on the host what value will GOMP_teams have on the device. */
1370 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
1371 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
1372 unsigned int flags
, void **depend
, int num_teams
,
1375 struct gomp_device_descr
*devicep
= resolve_device (device
);
1378 (void) thread_limit
;
1380 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
1382 struct gomp_thread
*thr
= gomp_thread ();
1383 /* Create a team if we don't have any around, as nowait
1384 target tasks make sense to run asynchronously even when
1385 outside of any parallel. */
1386 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
1388 struct gomp_team
*team
= gomp_new_team (1);
1389 struct gomp_task
*task
= thr
->task
;
1390 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
1391 team
->prev_ts
= thr
->ts
;
1392 thr
->ts
.team
= team
;
1393 thr
->ts
.team_id
= 0;
1394 thr
->ts
.work_share
= &team
->work_shares
[0];
1395 thr
->ts
.last_work_share
= NULL
;
1396 #ifdef HAVE_SYNC_BUILTINS
1397 thr
->ts
.single_count
= 0;
1399 thr
->ts
.static_trip
= 0;
1400 thr
->task
= &team
->implicit_task
[0];
1401 gomp_init_task (thr
->task
, NULL
, icv
);
1407 thr
->task
= &team
->implicit_task
[0];
1410 pthread_setspecific (gomp_thread_destructor
, thr
);
1413 && !thr
->task
->final_task
)
1415 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
1416 sizes
, kinds
, flags
, depend
,
1417 GOMP_TARGET_TASK_BEFORE_MAP
);
1422 /* If there are depend clauses, but nowait is not present
1423 (or we are in a final task), block the parent task until the
1424 dependencies are resolved and then just continue with the rest
1425 of the function as if it is a merged task. */
1428 struct gomp_thread
*thr
= gomp_thread ();
1429 if (thr
->task
&& thr
->task
->depend_hash
)
1430 gomp_task_maybe_wait_for_dependencies (depend
);
1434 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1436 gomp_target_fallback_firstprivate (fn
, mapnum
, hostaddrs
, sizes
, kinds
);
1440 void *fn_addr
= gomp_get_target_fn_addr (devicep
, fn
);
1442 struct target_mem_desc
*tgt_vars
1443 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
1444 GOMP_MAP_VARS_TARGET
);
1445 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
);
1446 gomp_unmap_vars (tgt_vars
, true);
1449 /* Host fallback for GOMP_target_data{,_ext} routines. */
1452 gomp_target_data_fallback (void)
1454 struct gomp_task_icv
*icv
= gomp_icv (false);
1455 if (icv
->target_data
)
1457 /* Even when doing a host fallback, if there are any active
1458 #pragma omp target data constructs, need to remember the
1459 new #pragma omp target data, otherwise GOMP_target_end_data
1460 would get out of sync. */
1461 struct target_mem_desc
*tgt
1462 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
1463 GOMP_MAP_VARS_DATA
);
1464 tgt
->prev
= icv
->target_data
;
1465 icv
->target_data
= tgt
;
1470 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1471 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1473 struct gomp_device_descr
*devicep
= resolve_device (device
);
1476 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1477 return gomp_target_data_fallback ();
1479 struct target_mem_desc
*tgt
1480 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1481 GOMP_MAP_VARS_DATA
);
1482 struct gomp_task_icv
*icv
= gomp_icv (true);
1483 tgt
->prev
= icv
->target_data
;
1484 icv
->target_data
= tgt
;
1488 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
1489 size_t *sizes
, unsigned short *kinds
)
1491 struct gomp_device_descr
*devicep
= resolve_device (device
);
1494 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1495 return gomp_target_data_fallback ();
1497 struct target_mem_desc
*tgt
1498 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
1499 GOMP_MAP_VARS_DATA
);
1500 struct gomp_task_icv
*icv
= gomp_icv (true);
1501 tgt
->prev
= icv
->target_data
;
1502 icv
->target_data
= tgt
;
1506 GOMP_target_end_data (void)
1508 struct gomp_task_icv
*icv
= gomp_icv (false);
1509 if (icv
->target_data
)
1511 struct target_mem_desc
*tgt
= icv
->target_data
;
1512 icv
->target_data
= tgt
->prev
;
1513 gomp_unmap_vars (tgt
, true);
1518 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
1519 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1521 struct gomp_device_descr
*devicep
= resolve_device (device
);
1524 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1527 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
1531 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
1532 size_t *sizes
, unsigned short *kinds
,
1533 unsigned int flags
, void **depend
)
1535 struct gomp_device_descr
*devicep
= resolve_device (device
);
1537 /* If there are depend clauses, but nowait is not present,
1538 block the parent task until the dependencies are resolved
1539 and then just continue with the rest of the function as if it
1540 is a merged task. Until we are able to schedule task during
1541 variable mapping or unmapping, ignore nowait if depend clauses
1545 struct gomp_thread
*thr
= gomp_thread ();
1546 if (thr
->task
&& thr
->task
->depend_hash
)
1548 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1550 && !thr
->task
->final_task
)
1552 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1553 mapnum
, hostaddrs
, sizes
, kinds
,
1554 flags
| GOMP_TARGET_FLAG_UPDATE
,
1555 depend
, GOMP_TARGET_TASK_DATA
))
1560 struct gomp_team
*team
= thr
->ts
.team
;
1561 /* If parallel or taskgroup has been cancelled, don't start new
1564 && (gomp_team_barrier_cancelled (&team
->barrier
)
1565 || (thr
->task
->taskgroup
1566 && thr
->task
->taskgroup
->cancelled
)))
1569 gomp_task_maybe_wait_for_dependencies (depend
);
1575 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1578 struct gomp_thread
*thr
= gomp_thread ();
1579 struct gomp_team
*team
= thr
->ts
.team
;
1580 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1582 && (gomp_team_barrier_cancelled (&team
->barrier
)
1583 || (thr
->task
->taskgroup
&& thr
->task
->taskgroup
->cancelled
)))
1586 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
1590 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
1591 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
1593 const int typemask
= 0xff;
1595 gomp_mutex_lock (&devicep
->lock
);
1596 for (i
= 0; i
< mapnum
; i
++)
1598 struct splay_tree_key_s cur_node
;
1599 unsigned char kind
= kinds
[i
] & typemask
;
1603 case GOMP_MAP_ALWAYS_FROM
:
1604 case GOMP_MAP_DELETE
:
1605 case GOMP_MAP_RELEASE
:
1606 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1607 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
1608 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1609 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1610 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1611 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1612 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
1613 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1617 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
1619 if ((kind
== GOMP_MAP_DELETE
1620 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
1621 && k
->refcount
!= REFCOUNT_INFINITY
)
1624 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
1625 || kind
== GOMP_MAP_ALWAYS_FROM
)
1626 devicep
->dev2host_func (devicep
->target_id
,
1627 (void *) cur_node
.host_start
,
1628 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1629 + cur_node
.host_start
1631 cur_node
.host_end
- cur_node
.host_start
);
1632 if (k
->refcount
== 0)
1634 splay_tree_remove (&devicep
->mem_map
, k
);
1635 if (k
->tgt
->refcount
> 1)
1638 gomp_unmap_tgt (k
->tgt
);
1643 gomp_mutex_unlock (&devicep
->lock
);
1644 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1649 gomp_mutex_unlock (&devicep
->lock
);
1653 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
1654 size_t *sizes
, unsigned short *kinds
,
1655 unsigned int flags
, void **depend
)
1657 struct gomp_device_descr
*devicep
= resolve_device (device
);
1659 /* If there are depend clauses, but nowait is not present,
1660 block the parent task until the dependencies are resolved
1661 and then just continue with the rest of the function as if it
1662 is a merged task. Until we are able to schedule task during
1663 variable mapping or unmapping, ignore nowait if depend clauses
1667 struct gomp_thread
*thr
= gomp_thread ();
1668 if (thr
->task
&& thr
->task
->depend_hash
)
1670 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
1672 && !thr
->task
->final_task
)
1674 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
1675 mapnum
, hostaddrs
, sizes
, kinds
,
1677 GOMP_TARGET_TASK_DATA
))
1682 struct gomp_team
*team
= thr
->ts
.team
;
1683 /* If parallel or taskgroup has been cancelled, don't start new
1686 && (gomp_team_barrier_cancelled (&team
->barrier
)
1687 || (thr
->task
->taskgroup
1688 && thr
->task
->taskgroup
->cancelled
)))
1691 gomp_task_maybe_wait_for_dependencies (depend
);
1697 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1700 struct gomp_thread
*thr
= gomp_thread ();
1701 struct gomp_team
*team
= thr
->ts
.team
;
1702 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1704 && (gomp_team_barrier_cancelled (&team
->barrier
)
1705 || (thr
->task
->taskgroup
&& thr
->task
->taskgroup
->cancelled
)))
1709 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
1710 for (i
= 0; i
< mapnum
; i
++)
1711 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
1713 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
1714 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
1718 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
1719 true, GOMP_MAP_VARS_ENTER_DATA
);
1721 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
1725 gomp_target_task_fn (void *data
)
1727 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
1728 struct gomp_device_descr
*devicep
= ttask
->devicep
;
1730 if (ttask
->fn
!= NULL
)
1733 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1735 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
1736 gomp_target_fallback_firstprivate (ttask
->fn
, ttask
->mapnum
,
1737 ttask
->hostaddrs
, ttask
->sizes
,
1742 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
1744 gomp_unmap_vars (ttask
->tgt
, true);
1748 void *fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
);
1750 = gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, NULL
,
1751 ttask
->sizes
, ttask
->kinds
, true,
1752 GOMP_MAP_VARS_TARGET
);
1753 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
1755 devicep
->async_run_func (devicep
->target_id
, fn_addr
,
1756 (void *) ttask
->tgt
->tgt_start
, (void *) ttask
);
1759 else if (devicep
== NULL
1760 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1764 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
1765 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
1766 ttask
->kinds
, true);
1767 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
1768 for (i
= 0; i
< ttask
->mapnum
; i
++)
1769 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
1771 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
1772 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
1773 GOMP_MAP_VARS_ENTER_DATA
);
1774 i
+= ttask
->sizes
[i
];
1777 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
1778 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
1780 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
1786 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
1790 struct gomp_task_icv
*icv
= gomp_icv (true);
1791 icv
->thread_limit_var
1792 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
1798 omp_target_alloc (size_t size
, int device_num
)
1800 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
1801 return malloc (size
);
1806 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
1807 if (devicep
== NULL
)
1810 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1811 return malloc (size
);
1813 gomp_mutex_lock (&devicep
->lock
);
1814 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
1815 gomp_mutex_unlock (&devicep
->lock
);
1820 omp_target_free (void *device_ptr
, int device_num
)
1822 if (device_ptr
== NULL
)
1825 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
1834 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
1835 if (devicep
== NULL
)
1838 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1844 gomp_mutex_lock (&devicep
->lock
);
1845 devicep
->free_func (devicep
->target_id
, device_ptr
);
1846 gomp_mutex_unlock (&devicep
->lock
);
1850 omp_target_is_present (void *ptr
, int device_num
)
1855 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
1861 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
1862 if (devicep
== NULL
)
1865 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1868 gomp_mutex_lock (&devicep
->lock
);
1869 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
1870 struct splay_tree_key_s cur_node
;
1872 cur_node
.host_start
= (uintptr_t) ptr
;
1873 cur_node
.host_end
= cur_node
.host_start
;
1874 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1875 int ret
= n
!= NULL
;
1876 gomp_mutex_unlock (&devicep
->lock
);
1881 omp_target_memcpy (void *dst
, void *src
, size_t length
, size_t dst_offset
,
1882 size_t src_offset
, int dst_device_num
, int src_device_num
)
1884 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
1886 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
1888 if (dst_device_num
< 0)
1891 dst_devicep
= resolve_device (dst_device_num
);
1892 if (dst_devicep
== NULL
)
1895 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1898 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
1900 if (src_device_num
< 0)
1903 src_devicep
= resolve_device (src_device_num
);
1904 if (src_devicep
== NULL
)
1907 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1910 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
1912 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
1915 if (src_devicep
== NULL
)
1917 gomp_mutex_lock (&dst_devicep
->lock
);
1918 dst_devicep
->host2dev_func (dst_devicep
->target_id
,
1919 (char *) dst
+ dst_offset
,
1920 (char *) src
+ src_offset
, length
);
1921 gomp_mutex_unlock (&dst_devicep
->lock
);
1924 if (dst_devicep
== NULL
)
1926 gomp_mutex_lock (&src_devicep
->lock
);
1927 src_devicep
->dev2host_func (src_devicep
->target_id
,
1928 (char *) dst
+ dst_offset
,
1929 (char *) src
+ src_offset
, length
);
1930 gomp_mutex_unlock (&src_devicep
->lock
);
1933 if (src_devicep
== dst_devicep
)
1935 gomp_mutex_lock (&src_devicep
->lock
);
1936 src_devicep
->dev2dev_func (src_devicep
->target_id
,
1937 (char *) dst
+ dst_offset
,
1938 (char *) src
+ src_offset
, length
);
1939 gomp_mutex_unlock (&src_devicep
->lock
);
1946 omp_target_memcpy_rect_worker (void *dst
, void *src
, size_t element_size
,
1947 int num_dims
, const size_t *volume
,
1948 const size_t *dst_offsets
,
1949 const size_t *src_offsets
,
1950 const size_t *dst_dimensions
,
1951 const size_t *src_dimensions
,
1952 struct gomp_device_descr
*dst_devicep
,
1953 struct gomp_device_descr
*src_devicep
)
1955 size_t dst_slice
= element_size
;
1956 size_t src_slice
= element_size
;
1957 size_t j
, dst_off
, src_off
, length
;
1962 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
1963 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
1964 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
1966 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
1967 memcpy ((char *) dst
+ dst_off
, (char *) src
+ src_off
, length
);
1968 else if (src_devicep
== NULL
)
1969 dst_devicep
->host2dev_func (dst_devicep
->target_id
,
1970 (char *) dst
+ dst_off
,
1971 (char *) src
+ src_off
, length
);
1972 else if (dst_devicep
== NULL
)
1973 src_devicep
->dev2host_func (src_devicep
->target_id
,
1974 (char *) dst
+ dst_off
,
1975 (char *) src
+ src_off
, length
);
1976 else if (src_devicep
== dst_devicep
)
1977 src_devicep
->dev2dev_func (src_devicep
->target_id
,
1978 (char *) dst
+ dst_off
,
1979 (char *) src
+ src_off
, length
);
1985 /* FIXME: it would be nice to have some plugin function to handle
1986 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
1987 be handled in the generic recursion below, and for host-host it
1988 should be used even for any num_dims >= 2. */
1990 for (i
= 1; i
< num_dims
; i
++)
1991 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
1992 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
1994 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
1995 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
1997 for (j
= 0; j
< volume
[0]; j
++)
1999 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2000 (char *) src
+ src_off
,
2001 element_size
, num_dims
- 1,
2002 volume
+ 1, dst_offsets
+ 1,
2003 src_offsets
+ 1, dst_dimensions
+ 1,
2004 src_dimensions
+ 1, dst_devicep
,
2008 dst_off
+= dst_slice
;
2009 src_off
+= src_slice
;
2015 omp_target_memcpy_rect (void *dst
, void *src
, size_t element_size
,
2016 int num_dims
, const size_t *volume
,
2017 const size_t *dst_offsets
,
2018 const size_t *src_offsets
,
2019 const size_t *dst_dimensions
,
2020 const size_t *src_dimensions
,
2021 int dst_device_num
, int src_device_num
)
2023 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2028 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2030 if (dst_device_num
< 0)
2033 dst_devicep
= resolve_device (dst_device_num
);
2034 if (dst_devicep
== NULL
)
2037 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2040 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2042 if (src_device_num
< 0)
2045 src_devicep
= resolve_device (src_device_num
);
2046 if (src_devicep
== NULL
)
2049 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2053 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2057 gomp_mutex_lock (&src_devicep
->lock
);
2058 else if (dst_devicep
)
2059 gomp_mutex_lock (&dst_devicep
->lock
);
2060 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2061 volume
, dst_offsets
, src_offsets
,
2062 dst_dimensions
, src_dimensions
,
2063 dst_devicep
, src_devicep
);
2065 gomp_mutex_unlock (&src_devicep
->lock
);
2066 else if (dst_devicep
)
2067 gomp_mutex_unlock (&dst_devicep
->lock
);
2072 omp_target_associate_ptr (void *host_ptr
, void *device_ptr
, size_t size
,
2073 size_t device_offset
, int device_num
)
2075 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2081 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2082 if (devicep
== NULL
)
2085 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2088 gomp_mutex_lock (&devicep
->lock
);
2090 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2091 struct splay_tree_key_s cur_node
;
2094 cur_node
.host_start
= (uintptr_t) host_ptr
;
2095 cur_node
.host_end
= cur_node
.host_start
+ size
;
2096 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2099 if (n
->tgt
->tgt_start
+ n
->tgt_offset
2100 == (uintptr_t) device_ptr
+ device_offset
2101 && n
->host_start
<= cur_node
.host_start
2102 && n
->host_end
>= cur_node
.host_end
)
2107 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2108 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
2112 tgt
->to_free
= NULL
;
2114 tgt
->list_count
= 0;
2115 tgt
->device_descr
= devicep
;
2116 splay_tree_node array
= tgt
->array
;
2117 splay_tree_key k
= &array
->key
;
2118 k
->host_start
= cur_node
.host_start
;
2119 k
->host_end
= cur_node
.host_end
;
2121 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
2122 k
->refcount
= REFCOUNT_INFINITY
;
2123 k
->async_refcount
= 0;
2125 array
->right
= NULL
;
2126 splay_tree_insert (&devicep
->mem_map
, array
);
2129 gomp_mutex_unlock (&devicep
->lock
);
2134 omp_target_disassociate_ptr (void *ptr
, int device_num
)
2136 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2142 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2143 if (devicep
== NULL
)
2146 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2149 gomp_mutex_lock (&devicep
->lock
);
2151 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2152 struct splay_tree_key_s cur_node
;
2155 cur_node
.host_start
= (uintptr_t) ptr
;
2156 cur_node
.host_end
= cur_node
.host_start
;
2157 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2159 && n
->host_start
== cur_node
.host_start
2160 && n
->refcount
== REFCOUNT_INFINITY
2161 && n
->tgt
->tgt_start
== 0
2162 && n
->tgt
->to_free
== NULL
2163 && n
->tgt
->refcount
== 1
2164 && n
->tgt
->list_count
== 0)
2166 splay_tree_remove (&devicep
->mem_map
, n
);
2167 gomp_unmap_tgt (n
->tgt
);
2171 gomp_mutex_unlock (&devicep
->lock
);
2175 #ifdef PLUGIN_SUPPORT
2177 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2179 The handles of the found functions are stored in the corresponding fields
2180 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2183 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
2184 const char *plugin_name
)
2186 const char *err
= NULL
, *last_missing
= NULL
;
2188 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
2192 /* Check if all required functions are available in the plugin and store
2193 their handlers. None of the symbols can legitimately be NULL,
2194 so we don't need to check dlerror all the time. */
2196 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2198 /* Similar, but missing functions are not an error. Return false if
2199 failed, true otherwise. */
2200 #define DLSYM_OPT(f, n) \
2201 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2202 || (last_missing = #n, 0))
2205 if (device
->version_func () != GOMP_VERSION
)
2207 err
= "plugin version mismatch";
2214 DLSYM (get_num_devices
);
2215 DLSYM (init_device
);
2216 DLSYM (fini_device
);
2218 DLSYM (unload_image
);
2223 device
->capabilities
= device
->get_caps_func ();
2224 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2230 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2232 if (!DLSYM_OPT (openacc
.exec
, openacc_parallel
)
2233 || !DLSYM_OPT (openacc
.register_async_cleanup
,
2234 openacc_register_async_cleanup
)
2235 || !DLSYM_OPT (openacc
.async_test
, openacc_async_test
)
2236 || !DLSYM_OPT (openacc
.async_test_all
, openacc_async_test_all
)
2237 || !DLSYM_OPT (openacc
.async_wait
, openacc_async_wait
)
2238 || !DLSYM_OPT (openacc
.async_wait_async
, openacc_async_wait_async
)
2239 || !DLSYM_OPT (openacc
.async_wait_all
, openacc_async_wait_all
)
2240 || !DLSYM_OPT (openacc
.async_wait_all_async
,
2241 openacc_async_wait_all_async
)
2242 || !DLSYM_OPT (openacc
.async_set_async
, openacc_async_set_async
)
2243 || !DLSYM_OPT (openacc
.create_thread_data
,
2244 openacc_create_thread_data
)
2245 || !DLSYM_OPT (openacc
.destroy_thread_data
,
2246 openacc_destroy_thread_data
))
2248 /* Require all the OpenACC handlers if we have
2249 GOMP_OFFLOAD_CAP_OPENACC_200. */
2250 err
= "plugin missing OpenACC handler function";
2255 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
2256 openacc_get_current_cuda_device
);
2257 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
2258 openacc_get_current_cuda_context
);
2259 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_get_cuda_stream
);
2260 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_set_cuda_stream
);
2261 if (cuda
&& cuda
!= 4)
2263 /* Make sure all the CUDA functions are there if any of them are. */
2264 err
= "plugin missing OpenACC CUDA handler function";
2276 gomp_error ("while loading %s: %s", plugin_name
, err
);
2278 gomp_error ("missing function was %s", last_missing
);
2280 dlclose (plugin_handle
);
2285 /* This function initializes the runtime needed for offloading.
2286 It parses the list of offload targets and tries to load the plugins for
2287 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2288 will be set, and the array DEVICES initialized, containing descriptors for
2289 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2293 gomp_target_init (void)
2295 const char *prefix
="libgomp-plugin-";
2296 const char *suffix
= SONAME_SUFFIX (1);
2297 const char *cur
, *next
;
2299 int i
, new_num_devices
;
2304 cur
= OFFLOAD_TARGETS
;
2308 struct gomp_device_descr current_device
;
2310 next
= strchr (cur
, ',');
2312 plugin_name
= (char *) malloc (1 + (next
? next
- cur
: strlen (cur
))
2313 + strlen (prefix
) + strlen (suffix
));
2320 strcpy (plugin_name
, prefix
);
2321 strncat (plugin_name
, cur
, next
? next
- cur
: strlen (cur
));
2322 strcat (plugin_name
, suffix
);
2324 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
2326 new_num_devices
= current_device
.get_num_devices_func ();
2327 if (new_num_devices
>= 1)
2329 /* Augment DEVICES and NUM_DEVICES. */
2331 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
2332 * sizeof (struct gomp_device_descr
));
2340 current_device
.name
= current_device
.get_name_func ();
2341 /* current_device.capabilities has already been set. */
2342 current_device
.type
= current_device
.get_type_func ();
2343 current_device
.mem_map
.root
= NULL
;
2344 current_device
.is_initialized
= false;
2345 current_device
.openacc
.data_environ
= NULL
;
2346 for (i
= 0; i
< new_num_devices
; i
++)
2348 current_device
.target_id
= i
;
2349 devices
[num_devices
] = current_device
;
2350 gomp_mutex_init (&devices
[num_devices
].lock
);
2361 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2362 NUM_DEVICES_OPENMP. */
2363 struct gomp_device_descr
*devices_s
2364 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
2371 num_devices_openmp
= 0;
2372 for (i
= 0; i
< num_devices
; i
++)
2373 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2374 devices_s
[num_devices_openmp
++] = devices
[i
];
2375 int num_devices_after_openmp
= num_devices_openmp
;
2376 for (i
= 0; i
< num_devices
; i
++)
2377 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2378 devices_s
[num_devices_after_openmp
++] = devices
[i
];
2380 devices
= devices_s
;
2382 for (i
= 0; i
< num_devices
; i
++)
2384 /* The 'devices' array can be moved (by the realloc call) until we have
2385 found all the plugins, so registering with the OpenACC runtime (which
2386 takes a copy of the pointer argument) must be delayed until now. */
2387 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2388 goacc_register (&devices
[i
]);
2392 #else /* PLUGIN_SUPPORT */
2393 /* If dlfcn.h is unavailable we always fallback to host execution.
2394 GOMP_target* routines are just stubs for this case. */
2396 gomp_target_init (void)
2399 #endif /* PLUGIN_SUPPORT */