]>
Commit | Line | Data |
---|---|---|
41dbbb37 TS |
1 | /* OpenACC Runtime initialization routines |
2 | ||
8d9254fc | 3 | Copyright (C) 2013-2020 Free Software Foundation, Inc. |
41dbbb37 TS |
4 | |
5 | Contributed by Mentor Embedded. | |
6 | ||
7 | This file is part of the GNU Offloading and Multi Processing Library | |
8 | (libgomp). | |
9 | ||
10 | Libgomp is free software; you can redistribute it and/or modify it | |
11 | under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 3, or (at your option) | |
13 | any later version. | |
14 | ||
15 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
17 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
18 | more details. | |
19 | ||
20 | Under Section 7 of GPL version 3, you are granted additional | |
21 | permissions described in the GCC Runtime Library Exception, version | |
22 | 3.1, as published by the Free Software Foundation. | |
23 | ||
24 | You should have received a copy of the GNU General Public License and | |
25 | a copy of the GCC Runtime Library Exception along with this program; | |
26 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
27 | <http://www.gnu.org/licenses/>. */ | |
28 | ||
29 | #include "openacc.h" | |
41dbbb37 TS |
30 | #include "libgomp.h" |
31 | #include "gomp-constants.h" | |
32 | #include "oacc-int.h" | |
e46c7770 | 33 | #include <string.h> |
41dbbb37 TS |
34 | #include <assert.h> |
35 | ||
e38fdba4 JB |
36 | /* Return block containing [H->S), or NULL if not contained. The device lock |
37 | for DEV must be locked on entry, and remains locked on exit. */ | |
41dbbb37 TS |
38 | |
39 | static splay_tree_key | |
a51df54e | 40 | lookup_host (struct gomp_device_descr *dev, void *h, size_t s) |
41dbbb37 TS |
41 | { |
42 | struct splay_tree_key_s node; | |
43 | splay_tree_key key; | |
44 | ||
45 | node.host_start = (uintptr_t) h; | |
46 | node.host_end = (uintptr_t) h + s; | |
47 | ||
a51df54e | 48 | key = splay_tree_lookup (&dev->mem_map, &node); |
41dbbb37 TS |
49 | |
50 | return key; | |
51 | } | |
52 | ||
47afc7b4 | 53 | /* Helper for lookup_dev. Iterate over splay tree. */ |
41dbbb37 TS |
54 | |
55 | static splay_tree_key | |
47afc7b4 | 56 | lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s) |
41dbbb37 | 57 | { |
47afc7b4 TS |
58 | splay_tree_key key = &node->key; |
59 | if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end) | |
60 | return key; | |
41dbbb37 | 61 | |
47afc7b4 TS |
62 | key = NULL; |
63 | if (node->left) | |
64 | key = lookup_dev_1 (node->left, d, s); | |
65 | if (!key && node->right) | |
66 | key = lookup_dev_1 (node->right, d, s); | |
41dbbb37 | 67 | |
47afc7b4 TS |
68 | return key; |
69 | } | |
41dbbb37 | 70 | |
47afc7b4 | 71 | /* Return block containing [D->S), or NULL if not contained. |
41dbbb37 | 72 | |
47afc7b4 TS |
73 | This iterates over the splay tree. This is not expected to be a common |
74 | operation. | |
41dbbb37 | 75 | |
47afc7b4 TS |
76 | The device lock associated with MEM_MAP must be locked on entry, and remains |
77 | locked on exit. */ | |
41dbbb37 | 78 | |
47afc7b4 TS |
79 | static splay_tree_key |
80 | lookup_dev (splay_tree mem_map, void *d, size_t s) | |
81 | { | |
82 | if (!mem_map || !mem_map->root) | |
83 | return NULL; | |
41dbbb37 | 84 | |
47afc7b4 | 85 | return lookup_dev_1 (mem_map->root, (uintptr_t) d, s); |
41dbbb37 TS |
86 | } |
87 | ||
47afc7b4 | 88 | |
41dbbb37 TS |
89 | /* OpenACC is silent on how memory exhaustion is indicated. We return |
90 | NULL. */ | |
91 | ||
92 | void * | |
93 | acc_malloc (size_t s) | |
94 | { | |
95 | if (!s) | |
96 | return NULL; | |
97 | ||
98 | goacc_lazy_initialize (); | |
99 | ||
100 | struct goacc_thread *thr = goacc_thread (); | |
101 | ||
d93bdab5 JB |
102 | assert (thr->dev); |
103 | ||
e46c7770 CP |
104 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
105 | return malloc (s); | |
106 | ||
5fae049d TS |
107 | acc_prof_info prof_info; |
108 | acc_api_info api_info; | |
109 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
110 | ||
111 | void *res = thr->dev->alloc_func (thr->dev->target_id, s); | |
112 | ||
113 | if (profiling_p) | |
114 | { | |
115 | thr->prof_info = NULL; | |
116 | thr->api_info = NULL; | |
117 | } | |
118 | ||
119 | return res; | |
41dbbb37 TS |
120 | } |
121 | ||
41dbbb37 TS |
122 | void |
123 | acc_free (void *d) | |
124 | { | |
125 | splay_tree_key k; | |
41dbbb37 TS |
126 | |
127 | if (!d) | |
128 | return; | |
129 | ||
e38fdba4 JB |
130 | struct goacc_thread *thr = goacc_thread (); |
131 | ||
d93bdab5 JB |
132 | assert (thr && thr->dev); |
133 | ||
e38fdba4 JB |
134 | struct gomp_device_descr *acc_dev = thr->dev; |
135 | ||
e46c7770 CP |
136 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
137 | return free (d); | |
138 | ||
5fae049d TS |
139 | acc_prof_info prof_info; |
140 | acc_api_info api_info; | |
141 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
142 | ||
e38fdba4 JB |
143 | gomp_mutex_lock (&acc_dev->lock); |
144 | ||
41dbbb37 TS |
145 | /* We don't have to call lazy open here, as the ptr value must have |
146 | been returned by acc_malloc. It's not permitted to pass NULL in | |
147 | (unless you got that null from acc_malloc). */ | |
47afc7b4 | 148 | if ((k = lookup_dev (&acc_dev->mem_map, d, 1))) |
e38fdba4 | 149 | { |
cec41816 TS |
150 | void *offset = d - k->tgt->tgt_start + k->tgt_offset; |
151 | void *h = k->host_start + offset; | |
152 | size_t h_size = k->host_end - k->host_start; | |
e38fdba4 | 153 | gomp_mutex_unlock (&acc_dev->lock); |
cec41816 TS |
154 | /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still |
155 | used in a mapping". */ | |
156 | gomp_fatal ("refusing to free device memory space at %p that is still" | |
157 | " mapped at [%p,+%d]", | |
158 | d, h, (int) h_size); | |
e38fdba4 JB |
159 | } |
160 | else | |
161 | gomp_mutex_unlock (&acc_dev->lock); | |
162 | ||
6ce13072 CLT |
163 | if (!acc_dev->free_func (acc_dev->target_id, d)) |
164 | gomp_fatal ("error in freeing device memory in %s", __FUNCTION__); | |
5fae049d TS |
165 | |
166 | if (profiling_p) | |
167 | { | |
168 | thr->prof_info = NULL; | |
169 | thr->api_info = NULL; | |
170 | } | |
41dbbb37 TS |
171 | } |
172 | ||
58168bbf CLT |
173 | static void |
174 | memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, | |
175 | const char *libfnname) | |
41dbbb37 TS |
176 | { |
177 | /* No need to call lazy open here, as the device pointer must have | |
178 | been obtained from a routine that did that. */ | |
179 | struct goacc_thread *thr = goacc_thread (); | |
180 | ||
d93bdab5 JB |
181 | assert (thr && thr->dev); |
182 | ||
e46c7770 CP |
183 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
184 | { | |
58168bbf CLT |
185 | if (from) |
186 | memmove (h, d, s); | |
187 | else | |
188 | memmove (d, h, s); | |
e46c7770 CP |
189 | return; |
190 | } | |
191 | ||
5fae049d TS |
192 | acc_prof_info prof_info; |
193 | acc_api_info api_info; | |
194 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
195 | if (profiling_p) | |
196 | { | |
197 | prof_info.async = async; | |
198 | prof_info.async_queue = prof_info.async; | |
199 | } | |
200 | ||
1f4c5b9b CLT |
201 | goacc_aq aq = get_goacc_asyncqueue (async); |
202 | if (from) | |
203 | gomp_copy_dev2host (thr->dev, aq, h, d, s); | |
204 | else | |
205 | gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL); | |
5fae049d TS |
206 | |
207 | if (profiling_p) | |
208 | { | |
209 | thr->prof_info = NULL; | |
210 | thr->api_info = NULL; | |
211 | } | |
41dbbb37 TS |
212 | } |
213 | ||
214 | void | |
58168bbf | 215 | acc_memcpy_to_device (void *d, void *h, size_t s) |
41dbbb37 | 216 | { |
58168bbf CLT |
217 | memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); |
218 | } | |
41dbbb37 | 219 | |
58168bbf CLT |
220 | void |
221 | acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) | |
222 | { | |
223 | memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); | |
224 | } | |
d93bdab5 | 225 | |
58168bbf CLT |
226 | void |
227 | acc_memcpy_from_device (void *h, void *d, size_t s) | |
228 | { | |
229 | memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); | |
230 | } | |
e46c7770 | 231 | |
58168bbf CLT |
232 | void |
233 | acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) | |
234 | { | |
235 | memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); | |
41dbbb37 TS |
236 | } |
237 | ||
238 | /* Return the device pointer that corresponds to host data H. Or NULL | |
239 | if no mapping. */ | |
240 | ||
241 | void * | |
242 | acc_deviceptr (void *h) | |
243 | { | |
244 | splay_tree_key n; | |
245 | void *d; | |
246 | void *offset; | |
247 | ||
248 | goacc_lazy_initialize (); | |
249 | ||
250 | struct goacc_thread *thr = goacc_thread (); | |
e38fdba4 JB |
251 | struct gomp_device_descr *dev = thr->dev; |
252 | ||
e46c7770 CP |
253 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
254 | return h; | |
255 | ||
5fae049d TS |
256 | /* In the following, no OpenACC Profiling Interface events can possibly be |
257 | generated. */ | |
258 | ||
e38fdba4 | 259 | gomp_mutex_lock (&dev->lock); |
41dbbb37 | 260 | |
e38fdba4 | 261 | n = lookup_host (dev, h, 1); |
41dbbb37 TS |
262 | |
263 | if (!n) | |
e38fdba4 JB |
264 | { |
265 | gomp_mutex_unlock (&dev->lock); | |
266 | return NULL; | |
267 | } | |
41dbbb37 TS |
268 | |
269 | offset = h - n->host_start; | |
270 | ||
271 | d = n->tgt->tgt_start + n->tgt_offset + offset; | |
272 | ||
e38fdba4 JB |
273 | gomp_mutex_unlock (&dev->lock); |
274 | ||
41dbbb37 TS |
275 | return d; |
276 | } | |
277 | ||
278 | /* Return the host pointer that corresponds to device data D. Or NULL | |
279 | if no mapping. */ | |
280 | ||
281 | void * | |
282 | acc_hostptr (void *d) | |
283 | { | |
284 | splay_tree_key n; | |
285 | void *h; | |
286 | void *offset; | |
287 | ||
288 | goacc_lazy_initialize (); | |
289 | ||
290 | struct goacc_thread *thr = goacc_thread (); | |
e38fdba4 | 291 | struct gomp_device_descr *acc_dev = thr->dev; |
41dbbb37 | 292 | |
e46c7770 CP |
293 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
294 | return d; | |
295 | ||
5fae049d TS |
296 | /* In the following, no OpenACC Profiling Interface events can possibly be |
297 | generated. */ | |
298 | ||
e38fdba4 JB |
299 | gomp_mutex_lock (&acc_dev->lock); |
300 | ||
47afc7b4 | 301 | n = lookup_dev (&acc_dev->mem_map, d, 1); |
41dbbb37 TS |
302 | |
303 | if (!n) | |
e38fdba4 JB |
304 | { |
305 | gomp_mutex_unlock (&acc_dev->lock); | |
306 | return NULL; | |
307 | } | |
41dbbb37 TS |
308 | |
309 | offset = d - n->tgt->tgt_start + n->tgt_offset; | |
310 | ||
311 | h = n->host_start + offset; | |
312 | ||
e38fdba4 JB |
313 | gomp_mutex_unlock (&acc_dev->lock); |
314 | ||
41dbbb37 TS |
315 | return h; |
316 | } | |
317 | ||
318 | /* Return 1 if host data [H,+S] is present on the device. */ | |
319 | ||
320 | int | |
321 | acc_is_present (void *h, size_t s) | |
322 | { | |
323 | splay_tree_key n; | |
324 | ||
325 | if (!s || !h) | |
326 | return 0; | |
327 | ||
328 | goacc_lazy_initialize (); | |
329 | ||
330 | struct goacc_thread *thr = goacc_thread (); | |
331 | struct gomp_device_descr *acc_dev = thr->dev; | |
332 | ||
e46c7770 CP |
333 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
334 | return h != NULL; | |
335 | ||
5fae049d TS |
336 | /* In the following, no OpenACC Profiling Interface events can possibly be |
337 | generated. */ | |
338 | ||
e38fdba4 JB |
339 | gomp_mutex_lock (&acc_dev->lock); |
340 | ||
a51df54e | 341 | n = lookup_host (acc_dev, h, s); |
41dbbb37 TS |
342 | |
343 | if (n && ((uintptr_t)h < n->host_start | |
344 | || (uintptr_t)h + s > n->host_end | |
345 | || s > n->host_end - n->host_start)) | |
346 | n = NULL; | |
347 | ||
e38fdba4 JB |
348 | gomp_mutex_unlock (&acc_dev->lock); |
349 | ||
41dbbb37 TS |
350 | return n != NULL; |
351 | } | |
352 | ||
353 | /* Create a mapping for host [H,+S] -> device [D,+S] */ | |
354 | ||
355 | void | |
356 | acc_map_data (void *h, void *d, size_t s) | |
357 | { | |
41dbbb37 TS |
358 | size_t mapnum = 1; |
359 | void *hostaddrs = h; | |
360 | void *devaddrs = d; | |
361 | size_t sizes = s; | |
362 | unsigned short kinds = GOMP_MAP_ALLOC; | |
363 | ||
364 | goacc_lazy_initialize (); | |
365 | ||
366 | struct goacc_thread *thr = goacc_thread (); | |
367 | struct gomp_device_descr *acc_dev = thr->dev; | |
368 | ||
369 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
370 | { | |
371 | if (d != h) | |
372 | gomp_fatal ("cannot map data on shared-memory system"); | |
41dbbb37 TS |
373 | } |
374 | else | |
375 | { | |
376 | struct goacc_thread *thr = goacc_thread (); | |
377 | ||
378 | if (!d || !h || !s) | |
379 | gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", | |
380 | (void *)h, (int)s, (void *)d, (int)s); | |
381 | ||
5fae049d TS |
382 | acc_prof_info prof_info; |
383 | acc_api_info api_info; | |
384 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
385 | ||
e38fdba4 JB |
386 | gomp_mutex_lock (&acc_dev->lock); |
387 | ||
a51df54e | 388 | if (lookup_host (acc_dev, h, s)) |
e38fdba4 JB |
389 | { |
390 | gomp_mutex_unlock (&acc_dev->lock); | |
391 | gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, | |
392 | (int)s); | |
393 | } | |
41dbbb37 | 394 | |
47afc7b4 | 395 | if (lookup_dev (&thr->dev->mem_map, d, s)) |
e38fdba4 JB |
396 | { |
397 | gomp_mutex_unlock (&acc_dev->lock); | |
398 | gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, | |
399 | (int)s); | |
400 | } | |
401 | ||
402 | gomp_mutex_unlock (&acc_dev->lock); | |
41dbbb37 | 403 | |
f233418c TS |
404 | struct target_mem_desc *tgt |
405 | = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, | |
9e628024 CLT |
406 | &kinds, true, |
407 | GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_ENTER_DATA); | |
ba40277f | 408 | assert (tgt); |
f233418c | 409 | assert (tgt->list_count == 1); |
e103542b | 410 | splay_tree_key n = tgt->list[0].key; |
f233418c | 411 | assert (n); |
e103542b | 412 | assert (n->refcount == 1); |
6f5b4b64 | 413 | assert (n->dynamic_refcount == 0); |
e103542b TS |
414 | /* Special reference counting behavior. */ |
415 | n->refcount = REFCOUNT_INFINITY; | |
5fae049d TS |
416 | |
417 | if (profiling_p) | |
418 | { | |
419 | thr->prof_info = NULL; | |
420 | thr->api_info = NULL; | |
421 | } | |
41dbbb37 | 422 | } |
41dbbb37 TS |
423 | } |
424 | ||
425 | void | |
426 | acc_unmap_data (void *h) | |
427 | { | |
428 | struct goacc_thread *thr = goacc_thread (); | |
429 | struct gomp_device_descr *acc_dev = thr->dev; | |
430 | ||
431 | /* No need to call lazy open, as the address must have been mapped. */ | |
432 | ||
e46c7770 CP |
433 | /* This is a no-op on shared-memory targets. */ |
434 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
435 | return; | |
436 | ||
5fae049d TS |
437 | acc_prof_info prof_info; |
438 | acc_api_info api_info; | |
439 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
440 | ||
e38fdba4 JB |
441 | gomp_mutex_lock (&acc_dev->lock); |
442 | ||
a51df54e | 443 | splay_tree_key n = lookup_host (acc_dev, h, 1); |
41dbbb37 TS |
444 | |
445 | if (!n) | |
e38fdba4 JB |
446 | { |
447 | gomp_mutex_unlock (&acc_dev->lock); | |
448 | gomp_fatal ("%p is not a mapped block", (void *)h); | |
449 | } | |
41dbbb37 | 450 | |
378da98f | 451 | size_t host_size = n->host_end - n->host_start; |
41dbbb37 TS |
452 | |
453 | if (n->host_start != (uintptr_t) h) | |
e38fdba4 JB |
454 | { |
455 | gomp_mutex_unlock (&acc_dev->lock); | |
456 | gomp_fatal ("[%p,%d] surrounds %p", | |
457 | (void *) n->host_start, (int) host_size, (void *) h); | |
458 | } | |
e103542b | 459 | /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from |
6f5b4b64 | 460 | 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating |
e103542b TS |
461 | the different 'REFCOUNT_INFINITY' cases, or simply separate |
462 | 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA' | |
463 | etc.)? */ | |
464 | else if (n->refcount != REFCOUNT_INFINITY) | |
465 | { | |
466 | gomp_mutex_unlock (&acc_dev->lock); | |
467 | gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped" | |
468 | " by 'acc_map_data'", | |
469 | (void *) h, (int) host_size); | |
470 | } | |
41dbbb37 | 471 | |
378da98f | 472 | struct target_mem_desc *tgt = n->tgt; |
41dbbb37 | 473 | |
378da98f | 474 | if (tgt->refcount == REFCOUNT_INFINITY) |
41dbbb37 | 475 | { |
378da98f JB |
476 | gomp_mutex_unlock (&acc_dev->lock); |
477 | gomp_fatal ("cannot unmap target block"); | |
478 | } | |
4662f7fe TS |
479 | |
480 | /* Above, we've verified that the mapping must have been set up by | |
481 | 'acc_map_data'. */ | |
482 | assert (tgt->refcount == 1); | |
483 | ||
2112d324 TS |
484 | /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var' |
485 | from freeing the target memory. */ | |
486 | tgt->tgt_end = 0; | |
487 | tgt->to_free = NULL; | |
488 | ||
06ec6172 TS |
489 | bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); |
490 | assert (is_tgt_unmapped); | |
e38fdba4 | 491 | |
ba40277f | 492 | gomp_mutex_unlock (&acc_dev->lock); |
5fae049d TS |
493 | |
494 | if (profiling_p) | |
495 | { | |
496 | thr->prof_info = NULL; | |
497 | thr->api_info = NULL; | |
498 | } | |
41dbbb37 TS |
499 | } |
500 | ||
aaf0e9d7 | 501 | |
cb7effde JB |
502 | /* Helper function to map a single dynamic data item, represented by a single |
503 | mapping. The acc_dev->lock should be held on entry, and remains locked on | |
504 | exit. */ | |
505 | ||
506 | static void * | |
507 | goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr, | |
508 | size_t size, splay_tree_key n) | |
509 | { | |
510 | assert (n); | |
511 | ||
512 | /* Present. */ | |
513 | void *d = (void *) (n->tgt->tgt_start + n->tgt_offset + hostaddr | |
514 | - n->host_start); | |
515 | ||
516 | if (hostaddr + size > (void *) n->host_end) | |
517 | { | |
518 | gomp_mutex_unlock (&acc_dev->lock); | |
519 | gomp_fatal ("[%p,+%d] not mapped", hostaddr, (int) size); | |
520 | } | |
521 | ||
522 | assert (n->refcount != REFCOUNT_LINK); | |
523 | if (n->refcount != REFCOUNT_INFINITY) | |
6f5b4b64 JB |
524 | n->refcount++; |
525 | n->dynamic_refcount++; | |
cb7effde JB |
526 | |
527 | return d; | |
528 | } | |
529 | ||
378da98f | 530 | /* Enter dynamic mapping for a single datum. Return the device pointer. */ |
41dbbb37 TS |
531 | |
532 | static void * | |
378da98f | 533 | goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) |
41dbbb37 TS |
534 | { |
535 | void *d; | |
536 | splay_tree_key n; | |
537 | ||
378da98f | 538 | if (!hostaddrs[0] || !sizes[0]) |
9444a299 | 539 | gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]); |
41dbbb37 TS |
540 | |
541 | goacc_lazy_initialize (); | |
542 | ||
543 | struct goacc_thread *thr = goacc_thread (); | |
544 | struct gomp_device_descr *acc_dev = thr->dev; | |
545 | ||
e46c7770 | 546 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
378da98f | 547 | return hostaddrs[0]; |
e46c7770 | 548 | |
5fae049d TS |
549 | acc_prof_info prof_info; |
550 | acc_api_info api_info; | |
551 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
552 | if (profiling_p) | |
553 | { | |
554 | prof_info.async = async; | |
555 | prof_info.async_queue = prof_info.async; | |
556 | } | |
557 | ||
e38fdba4 JB |
558 | gomp_mutex_lock (&acc_dev->lock); |
559 | ||
9444a299 | 560 | n = lookup_host (acc_dev, hostaddrs[0], sizes[0]); |
378da98f | 561 | if (n) |
41dbbb37 | 562 | { |
cb7effde | 563 | d = goacc_map_var_existing (acc_dev, hostaddrs[0], sizes[0], n); |
77ce5555 | 564 | gomp_mutex_unlock (&acc_dev->lock); |
77ce5555 | 565 | } |
41dbbb37 TS |
566 | else |
567 | { | |
378da98f JB |
568 | const size_t mapnum = 1; |
569 | ||
e38fdba4 JB |
570 | gomp_mutex_unlock (&acc_dev->lock); |
571 | ||
1f4c5b9b | 572 | goacc_aq aq = get_goacc_asyncqueue (async); |
58168bbf | 573 | |
f233418c TS |
574 | struct target_mem_desc *tgt |
575 | = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, | |
9e628024 CLT |
576 | kinds, true, (GOMP_MAP_VARS_OPENACC |
577 | | GOMP_MAP_VARS_ENTER_DATA)); | |
f233418c TS |
578 | assert (tgt); |
579 | assert (tgt->list_count == 1); | |
580 | n = tgt->list[0].key; | |
581 | assert (n); | |
582 | assert (n->refcount == 1); | |
6f5b4b64 JB |
583 | assert (n->dynamic_refcount == 0); |
584 | n->dynamic_refcount++; | |
41dbbb37 | 585 | |
f233418c | 586 | d = (void *) tgt->tgt_start; |
41dbbb37 TS |
587 | } |
588 | ||
5fae049d TS |
589 | if (profiling_p) |
590 | { | |
591 | thr->prof_info = NULL; | |
592 | thr->api_info = NULL; | |
593 | } | |
594 | ||
41dbbb37 TS |
595 | return d; |
596 | } | |
597 | ||
598 | void * | |
599 | acc_create (void *h, size_t s) | |
600 | { | |
9444a299 | 601 | unsigned short kinds[1] = { GOMP_MAP_ALLOC }; |
378da98f | 602 | return goacc_enter_datum (&h, &s, &kinds, acc_async_sync); |
58168bbf CLT |
603 | } |
604 | ||
605 | void | |
606 | acc_create_async (void *h, size_t s, int async) | |
607 | { | |
9444a299 | 608 | unsigned short kinds[1] = { GOMP_MAP_ALLOC }; |
378da98f | 609 | goacc_enter_datum (&h, &s, &kinds, async); |
41dbbb37 TS |
610 | } |
611 | ||
c759830b TS |
612 | /* acc_present_or_create used to be what acc_create is now. */ |
613 | /* acc_pcreate is acc_present_or_create by a different name. */ | |
614 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
615 | strong_alias (acc_create, acc_present_or_create) | |
616 | strong_alias (acc_create, acc_pcreate) | |
617 | #else | |
41dbbb37 TS |
618 | void * |
619 | acc_present_or_create (void *h, size_t s) | |
620 | { | |
c759830b | 621 | return acc_create (h, s); |
41dbbb37 TS |
622 | } |
623 | ||
9b94fbc7 TS |
624 | void * |
625 | acc_pcreate (void *h, size_t s) | |
626 | { | |
c759830b | 627 | return acc_create (h, s); |
9b94fbc7 TS |
628 | } |
629 | #endif | |
630 | ||
41dbbb37 | 631 | void * |
c759830b | 632 | acc_copyin (void *h, size_t s) |
41dbbb37 | 633 | { |
9444a299 | 634 | unsigned short kinds[1] = { GOMP_MAP_TO }; |
378da98f | 635 | return goacc_enter_datum (&h, &s, &kinds, acc_async_sync); |
41dbbb37 TS |
636 | } |
637 | ||
c759830b TS |
638 | void |
639 | acc_copyin_async (void *h, size_t s, int async) | |
640 | { | |
9444a299 | 641 | unsigned short kinds[1] = { GOMP_MAP_TO }; |
378da98f | 642 | goacc_enter_datum (&h, &s, &kinds, async); |
c759830b TS |
643 | } |
644 | ||
645 | /* acc_present_or_copyin used to be what acc_copyin is now. */ | |
9b94fbc7 TS |
646 | /* acc_pcopyin is acc_present_or_copyin by a different name. */ |
647 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
c759830b TS |
648 | strong_alias (acc_copyin, acc_present_or_copyin) |
649 | strong_alias (acc_copyin, acc_pcopyin) | |
9b94fbc7 | 650 | #else |
c759830b TS |
651 | void * |
652 | acc_present_or_copyin (void *h, size_t s) | |
653 | { | |
654 | return acc_copyin (h, s); | |
655 | } | |
656 | ||
9b94fbc7 TS |
657 | void * |
658 | acc_pcopyin (void *h, size_t s) | |
659 | { | |
c759830b | 660 | return acc_copyin (h, s); |
9b94fbc7 TS |
661 | } |
662 | #endif | |
663 | ||
34cfe31e | 664 | |
cb7effde JB |
665 | /* Helper function to unmap a single data item. Device lock should be held on |
666 | entry, and remains locked on exit. */ | |
41dbbb37 TS |
667 | |
668 | static void | |
cb7effde JB |
669 | goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s, |
670 | unsigned short kind, splay_tree_key n, goacc_aq aq) | |
41dbbb37 | 671 | { |
bc4ed079 JB |
672 | assert (kind != GOMP_MAP_DETACH |
673 | && kind != GOMP_MAP_FORCE_DETACH); | |
674 | ||
e307b05f | 675 | if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end) |
e38fdba4 | 676 | { |
e307b05f | 677 | size_t host_size = n->host_end - n->host_start; |
e38fdba4 | 678 | gomp_mutex_unlock (&acc_dev->lock); |
e307b05f JB |
679 | gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]", |
680 | (void *) h, (int) s, (void *) n->host_start, (int) host_size); | |
e38fdba4 JB |
681 | } |
682 | ||
6f5b4b64 | 683 | bool finalize = (kind == GOMP_MAP_FORCE_FROM |
bc4ed079 | 684 | || kind == GOMP_MAP_DELETE); |
6f5b4b64 JB |
685 | |
686 | assert (n->refcount != REFCOUNT_LINK); | |
687 | if (n->refcount != REFCOUNT_INFINITY | |
688 | && n->refcount < n->dynamic_refcount) | |
689 | { | |
690 | gomp_mutex_unlock (&acc_dev->lock); | |
691 | gomp_fatal ("Dynamic reference counting assert fail\n"); | |
692 | } | |
cb7effde | 693 | |
34cfe31e | 694 | if (finalize) |
829c6349 | 695 | { |
d6e8c01c | 696 | if (n->refcount != REFCOUNT_INFINITY) |
6f5b4b64 JB |
697 | n->refcount -= n->dynamic_refcount; |
698 | n->dynamic_refcount = 0; | |
829c6349 | 699 | } |
6f5b4b64 | 700 | else if (n->dynamic_refcount) |
829c6349 | 701 | { |
d6e8c01c TS |
702 | if (n->refcount != REFCOUNT_INFINITY) |
703 | n->refcount--; | |
6f5b4b64 | 704 | n->dynamic_refcount--; |
829c6349 | 705 | } |
41dbbb37 | 706 | |
829c6349 CLT |
707 | if (n->refcount == 0) |
708 | { | |
34cfe31e TS |
709 | bool copyout = (kind == GOMP_MAP_FROM |
710 | || kind == GOMP_MAP_FORCE_FROM); | |
711 | if (copyout) | |
58168bbf | 712 | { |
1cbd94e8 JB |
713 | void *d = (void *) (n->tgt->tgt_start + n->tgt_offset |
714 | + (uintptr_t) h - n->host_start); | |
1f4c5b9b | 715 | gomp_copy_dev2host (acc_dev, aq, h, d, s); |
58168bbf | 716 | } |
ba40277f TS |
717 | |
718 | if (aq) | |
719 | /* TODO We can't do the 'is_tgt_unmapped' checking -- see the | |
720 | 'gomp_unref_tgt' comment in | |
721 | <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; | |
722 | PR92881. */ | |
723 | gomp_remove_var_async (acc_dev, n, aq); | |
724 | else | |
725 | { | |
06ec6172 TS |
726 | size_t num_mappings = 0; |
727 | /* If the target_mem_desc represents a single data mapping, we can | |
728 | check that it is freed when this splay tree key's refcount reaches | |
729 | zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with | |
730 | multiple members), fall back to skipping the test. */ | |
731 | for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i) | |
bc4ed079 JB |
732 | if (n->tgt->list[l_i].key |
733 | && !n->tgt->list[l_i].is_attach) | |
06ec6172 | 734 | ++num_mappings; |
ba40277f | 735 | bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); |
06ec6172 | 736 | assert (is_tgt_unmapped || num_mappings > 1); |
ba40277f | 737 | } |
829c6349 | 738 | } |
cb7effde JB |
739 | } |
740 | ||
741 | ||
742 | /* Exit a dynamic mapping for a single variable. */ | |
743 | ||
744 | static void | |
745 | goacc_exit_datum (void *h, size_t s, unsigned short kind, int async) | |
746 | { | |
747 | /* No need to call lazy open, as the data must already have been | |
748 | mapped. */ | |
749 | ||
750 | kind &= 0xff; | |
751 | ||
752 | struct goacc_thread *thr = goacc_thread (); | |
753 | struct gomp_device_descr *acc_dev = thr->dev; | |
754 | ||
755 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
756 | return; | |
757 | ||
758 | acc_prof_info prof_info; | |
759 | acc_api_info api_info; | |
760 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
761 | if (profiling_p) | |
762 | { | |
763 | prof_info.async = async; | |
764 | prof_info.async_queue = prof_info.async; | |
765 | } | |
766 | ||
767 | gomp_mutex_lock (&acc_dev->lock); | |
768 | ||
769 | splay_tree_key n = lookup_host (acc_dev, h, s); | |
770 | /* Non-present data is a no-op: PR92726, RP92970, PR92984. */ | |
771 | if (n) | |
772 | { | |
773 | goacc_aq aq = get_goacc_asyncqueue (async); | |
774 | goacc_exit_datum_1 (acc_dev, h, s, kind, n, aq); | |
775 | } | |
829c6349 CLT |
776 | |
777 | gomp_mutex_unlock (&acc_dev->lock); | |
5fae049d TS |
778 | |
779 | if (profiling_p) | |
780 | { | |
781 | thr->prof_info = NULL; | |
782 | thr->api_info = NULL; | |
783 | } | |
41dbbb37 TS |
784 | } |
785 | ||
786 | void | |
787 | acc_delete (void *h , size_t s) | |
788 | { | |
378da98f | 789 | goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync); |
58168bbf CLT |
790 | } |
791 | ||
792 | void | |
793 | acc_delete_async (void *h , size_t s, int async) | |
794 | { | |
378da98f | 795 | goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async); |
41dbbb37 TS |
796 | } |
797 | ||
829c6349 CLT |
798 | void |
799 | acc_delete_finalize (void *h , size_t s) | |
800 | { | |
378da98f | 801 | goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync); |
829c6349 CLT |
802 | } |
803 | ||
804 | void | |
805 | acc_delete_finalize_async (void *h , size_t s, int async) | |
806 | { | |
378da98f | 807 | goacc_exit_datum (h, s, GOMP_MAP_DELETE, async); |
829c6349 CLT |
808 | } |
809 | ||
6ce13072 CLT |
810 | void |
811 | acc_copyout (void *h, size_t s) | |
41dbbb37 | 812 | { |
378da98f | 813 | goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync); |
58168bbf CLT |
814 | } |
815 | ||
816 | void | |
817 | acc_copyout_async (void *h, size_t s, int async) | |
818 | { | |
378da98f | 819 | goacc_exit_datum (h, s, GOMP_MAP_FROM, async); |
41dbbb37 TS |
820 | } |
821 | ||
829c6349 CLT |
822 | void |
823 | acc_copyout_finalize (void *h, size_t s) | |
824 | { | |
378da98f | 825 | goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync); |
829c6349 CLT |
826 | } |
827 | ||
828 | void | |
829 | acc_copyout_finalize_async (void *h, size_t s, int async) | |
830 | { | |
378da98f | 831 | goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async); |
829c6349 CLT |
832 | } |
833 | ||
41dbbb37 | 834 | static void |
58168bbf | 835 | update_dev_host (int is_dev, void *h, size_t s, int async) |
41dbbb37 TS |
836 | { |
837 | splay_tree_key n; | |
838 | void *d; | |
8baa7864 TS |
839 | |
840 | goacc_lazy_initialize (); | |
841 | ||
41dbbb37 TS |
842 | struct goacc_thread *thr = goacc_thread (); |
843 | struct gomp_device_descr *acc_dev = thr->dev; | |
844 | ||
e46c7770 CP |
845 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
846 | return; | |
847 | ||
6e4d01d6 TB |
848 | /* Fortran optional arguments that are non-present result in a |
849 | NULL host address here. This can safely be ignored as it is | |
850 | not possible to 'update' a non-present optional argument. */ | |
851 | if (h == NULL) | |
852 | return; | |
853 | ||
5fae049d TS |
854 | acc_prof_info prof_info; |
855 | acc_api_info api_info; | |
856 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
857 | if (profiling_p) | |
858 | { | |
859 | prof_info.async = async; | |
860 | prof_info.async_queue = prof_info.async; | |
861 | } | |
862 | ||
e38fdba4 JB |
863 | gomp_mutex_lock (&acc_dev->lock); |
864 | ||
a51df54e | 865 | n = lookup_host (acc_dev, h, s); |
41dbbb37 | 866 | |
41dbbb37 | 867 | if (!n) |
e38fdba4 JB |
868 | { |
869 | gomp_mutex_unlock (&acc_dev->lock); | |
870 | gomp_fatal ("[%p,%d] is not mapped", h, (int)s); | |
871 | } | |
41dbbb37 | 872 | |
b6d1f2b5 JN |
873 | d = (void *) (n->tgt->tgt_start + n->tgt_offset |
874 | + (uintptr_t) h - n->host_start); | |
41dbbb37 | 875 | |
1f4c5b9b | 876 | goacc_aq aq = get_goacc_asyncqueue (async); |
58168bbf | 877 | |
41dbbb37 | 878 | if (is_dev) |
1f4c5b9b | 879 | gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL); |
41dbbb37 | 880 | else |
1f4c5b9b | 881 | gomp_copy_dev2host (acc_dev, aq, h, d, s); |
58168bbf | 882 | |
6ce13072 | 883 | gomp_mutex_unlock (&acc_dev->lock); |
5fae049d TS |
884 | |
885 | if (profiling_p) | |
886 | { | |
887 | thr->prof_info = NULL; | |
888 | thr->api_info = NULL; | |
889 | } | |
41dbbb37 TS |
890 | } |
891 | ||
892 | void | |
893 | acc_update_device (void *h, size_t s) | |
894 | { | |
58168bbf CLT |
895 | update_dev_host (1, h, s, acc_async_sync); |
896 | } | |
897 | ||
898 | void | |
899 | acc_update_device_async (void *h, size_t s, int async) | |
900 | { | |
901 | update_dev_host (1, h, s, async); | |
41dbbb37 TS |
902 | } |
903 | ||
904 | void | |
905 | acc_update_self (void *h, size_t s) | |
906 | { | |
58168bbf CLT |
907 | update_dev_host (0, h, s, acc_async_sync); |
908 | } | |
909 | ||
910 | void | |
911 | acc_update_self_async (void *h, size_t s, int async) | |
912 | { | |
913 | update_dev_host (0, h, s, async); | |
41dbbb37 TS |
914 | } |
915 | ||
5d5be7bf JB |
916 | void |
917 | acc_attach_async (void **hostaddr, int async) | |
918 | { | |
919 | struct goacc_thread *thr = goacc_thread (); | |
920 | struct gomp_device_descr *acc_dev = thr->dev; | |
921 | goacc_aq aq = get_goacc_asyncqueue (async); | |
922 | ||
923 | struct splay_tree_key_s cur_node; | |
924 | splay_tree_key n; | |
925 | ||
926 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
927 | return; | |
928 | ||
929 | gomp_mutex_lock (&acc_dev->lock); | |
930 | ||
931 | cur_node.host_start = (uintptr_t) hostaddr; | |
932 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
933 | n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); | |
934 | ||
935 | if (n == NULL) | |
dc954154 JB |
936 | { |
937 | gomp_mutex_unlock (&acc_dev->lock); | |
938 | gomp_fatal ("struct not mapped for acc_attach"); | |
939 | } | |
5d5be7bf JB |
940 | |
941 | gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr, | |
942 | 0, NULL); | |
943 | ||
944 | gomp_mutex_unlock (&acc_dev->lock); | |
945 | } | |
946 | ||
947 | void | |
948 | acc_attach (void **hostaddr) | |
949 | { | |
950 | acc_attach_async (hostaddr, acc_async_sync); | |
951 | } | |
952 | ||
953 | static void | |
954 | goacc_detach_internal (void **hostaddr, int async, bool finalize) | |
955 | { | |
956 | struct goacc_thread *thr = goacc_thread (); | |
957 | struct gomp_device_descr *acc_dev = thr->dev; | |
958 | struct splay_tree_key_s cur_node; | |
959 | splay_tree_key n; | |
960 | struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async); | |
961 | ||
962 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
963 | return; | |
964 | ||
965 | gomp_mutex_lock (&acc_dev->lock); | |
966 | ||
967 | cur_node.host_start = (uintptr_t) hostaddr; | |
968 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
969 | n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); | |
970 | ||
971 | if (n == NULL) | |
dc954154 JB |
972 | { |
973 | gomp_mutex_unlock (&acc_dev->lock); | |
974 | gomp_fatal ("struct not mapped for acc_detach"); | |
975 | } | |
5d5be7bf JB |
976 | |
977 | gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL); | |
978 | ||
979 | gomp_mutex_unlock (&acc_dev->lock); | |
980 | } | |
981 | ||
982 | void | |
983 | acc_detach (void **hostaddr) | |
984 | { | |
985 | goacc_detach_internal (hostaddr, acc_async_sync, false); | |
986 | } | |
987 | ||
988 | void | |
989 | acc_detach_async (void **hostaddr, int async) | |
990 | { | |
991 | goacc_detach_internal (hostaddr, async, false); | |
992 | } | |
993 | ||
994 | void | |
995 | acc_detach_finalize (void **hostaddr) | |
996 | { | |
997 | goacc_detach_internal (hostaddr, acc_async_sync, true); | |
998 | } | |
999 | ||
1000 | void | |
1001 | acc_detach_finalize_async (void **hostaddr, int async) | |
1002 | { | |
1003 | goacc_detach_internal (hostaddr, async, true); | |
1004 | } | |
1005 | ||
378da98f JB |
1006 | /* Some types of (pointer) variables use several consecutive mappings, which |
1007 | must be treated as a group for enter/exit data directives. This function | |
1008 | returns the last mapping in such a group (inclusive), or POS for singleton | |
1009 | mappings. */ | |
57963e39 | 1010 | |
378da98f | 1011 | static int |
8e7e71ff | 1012 | find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds) |
378da98f JB |
1013 | { |
1014 | unsigned char kind0 = kinds[pos] & 0xff; | |
8e7e71ff | 1015 | int first_pos = pos; |
57963e39 | 1016 | |
8e7e71ff | 1017 | switch (kind0) |
378da98f | 1018 | { |
8e7e71ff | 1019 | case GOMP_MAP_TO_PSET: |
8d2e5026 JB |
1020 | if (pos + 1 < mapnum |
1021 | && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH) | |
1022 | return pos + 1; | |
1023 | ||
1024 | while (pos + 1 < mapnum | |
1025 | && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER) | |
8e7e71ff | 1026 | pos++; |
8d2e5026 JB |
1027 | /* We expect at least one GOMP_MAP_POINTER (if not a single |
1028 | GOMP_MAP_ATTACH) after a GOMP_MAP_TO_PSET. */ | |
8e7e71ff JB |
1029 | assert (pos > first_pos); |
1030 | break; | |
1031 | ||
1032 | case GOMP_MAP_STRUCT: | |
1033 | pos += sizes[pos]; | |
1034 | break; | |
1035 | ||
1036 | case GOMP_MAP_POINTER: | |
1037 | case GOMP_MAP_ALWAYS_POINTER: | |
1038 | /* These mappings are only expected after some other mapping. If we | |
1039 | see one by itself, something has gone wrong. */ | |
1040 | gomp_fatal ("unexpected mapping"); | |
1041 | break; | |
1042 | ||
8d2e5026 JB |
1043 | case GOMP_MAP_ATTACH: |
1044 | break; | |
1045 | ||
8e7e71ff | 1046 | default: |
378da98f JB |
1047 | /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other |
1048 | mapping. */ | |
8e7e71ff JB |
1049 | if (pos + 1 < mapnum) |
1050 | { | |
1051 | unsigned char kind1 = kinds[pos + 1] & 0xff; | |
1052 | if (kind1 == GOMP_MAP_ALWAYS_POINTER) | |
1053 | return pos + 1; | |
1054 | } | |
378da98f | 1055 | |
8d2e5026 JB |
1056 | /* We can have a single GOMP_MAP_ATTACH mapping after a to/from |
1057 | mapping. */ | |
1058 | if (pos + 1 < mapnum | |
1059 | && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH) | |
1060 | return pos + 1; | |
1061 | ||
8e7e71ff | 1062 | /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from |
378da98f | 1063 | (etc.) mapping. */ |
8d2e5026 JB |
1064 | while (pos + 1 < mapnum |
1065 | && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER) | |
8e7e71ff | 1066 | pos++; |
378da98f | 1067 | } |
57963e39 | 1068 | |
8e7e71ff | 1069 | return pos; |
378da98f JB |
1070 | } |
1071 | ||
1072 | /* Map variables for OpenACC "enter data". We can't just call | |
1073 | gomp_map_vars_async once, because individual mapped variables might have | |
1074 | "exit data" called for them at different times. */ | |
57963e39 | 1075 | |
57963e39 | 1076 | static void |
378da98f JB |
1077 | goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, |
1078 | void **hostaddrs, size_t *sizes, | |
1079 | unsigned short *kinds, goacc_aq aq) | |
41dbbb37 | 1080 | { |
6f5b4b64 JB |
1081 | gomp_mutex_lock (&acc_dev->lock); |
1082 | ||
378da98f JB |
1083 | for (size_t i = 0; i < mapnum; i++) |
1084 | { | |
6f5b4b64 JB |
1085 | splay_tree_key n; |
1086 | size_t group_last = find_group_last (i, mapnum, sizes, kinds); | |
1087 | bool struct_p = false; | |
1088 | size_t size, groupnum = (group_last - i) + 1; | |
1089 | ||
1090 | switch (kinds[i] & 0xff) | |
1091 | { | |
1092 | case GOMP_MAP_STRUCT: | |
1093 | { | |
1094 | size = (uintptr_t) hostaddrs[group_last] + sizes[group_last] | |
1095 | - (uintptr_t) hostaddrs[i]; | |
1096 | struct_p = true; | |
1097 | } | |
1098 | break; | |
1099 | ||
1100 | case GOMP_MAP_ATTACH: | |
1101 | size = sizeof (void *); | |
1102 | break; | |
1103 | ||
1104 | default: | |
1105 | size = sizes[i]; | |
1106 | } | |
1107 | ||
1108 | n = lookup_host (acc_dev, hostaddrs[i], size); | |
1109 | ||
1110 | if (n && struct_p) | |
1111 | { | |
1112 | for (size_t j = i + 1; j <= group_last; j++) | |
1113 | { | |
1114 | struct splay_tree_key_s cur_node; | |
1115 | cur_node.host_start = (uintptr_t) hostaddrs[j]; | |
1116 | cur_node.host_end = cur_node.host_start + sizes[j]; | |
1117 | splay_tree_key n2 | |
1118 | = splay_tree_lookup (&acc_dev->mem_map, &cur_node); | |
1119 | if (!n2 | |
1120 | || n2->tgt != n->tgt | |
1121 | || n2->host_start - n->host_start | |
1122 | != n2->tgt_offset - n->tgt_offset) | |
1123 | { | |
1124 | gomp_mutex_unlock (&acc_dev->lock); | |
1125 | gomp_fatal ("Trying to map into device [%p..%p) structure " | |
1126 | "element when other mapped elements from the " | |
1127 | "same structure weren't mapped together with " | |
1128 | "it", (void *) cur_node.host_start, | |
1129 | (void *) cur_node.host_end); | |
1130 | } | |
1131 | } | |
1132 | /* This is a special case because we must increment the refcount by | |
1133 | the number of mapped struct elements, rather than by one. */ | |
1134 | if (n->refcount != REFCOUNT_INFINITY) | |
1135 | n->refcount += groupnum - 1; | |
1136 | n->dynamic_refcount += groupnum - 1; | |
1137 | } | |
1138 | else if (n && groupnum == 1) | |
1139 | { | |
1140 | void *h = hostaddrs[i]; | |
1141 | size_t s = sizes[i]; | |
1142 | ||
6f5b4b64 | 1143 | if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) |
bc4ed079 JB |
1144 | { |
1145 | gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, | |
1146 | (uintptr_t) h, s, NULL); | |
1147 | /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic | |
1148 | reference counts ('n->refcount', 'n->dynamic_refcount'). */ | |
1149 | } | |
1150 | else | |
1151 | goacc_map_var_existing (acc_dev, h, s, n); | |
6f5b4b64 JB |
1152 | } |
1153 | else if (n && groupnum > 1) | |
1154 | { | |
1155 | assert (n->refcount != REFCOUNT_INFINITY | |
1156 | && n->refcount != REFCOUNT_LINK); | |
1157 | ||
1158 | for (size_t j = i + 1; j <= group_last; j++) | |
1159 | if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH) | |
1160 | { | |
1161 | splay_tree_key m | |
1162 | = lookup_host (acc_dev, hostaddrs[j], sizeof (void *)); | |
1163 | gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m, | |
1164 | (uintptr_t) hostaddrs[j], sizes[j], NULL); | |
1165 | } | |
1166 | ||
1167 | bool processed = false; | |
1168 | ||
1169 | struct target_mem_desc *tgt = n->tgt; | |
1170 | for (size_t j = 0; j < tgt->list_count; j++) | |
1171 | if (tgt->list[j].key == n) | |
1172 | { | |
1173 | /* We are processing a group of mappings (e.g. | |
1174 | [GOMP_MAP_TO, GOMP_MAP_TO_PSET, GOMP_MAP_POINTER]). | |
1175 | Find the right group in the target_mem_desc's variable | |
1176 | list, and increment the refcounts for each item in that | |
1177 | group. */ | |
1178 | for (size_t k = 0; k < groupnum; k++) | |
bc4ed079 JB |
1179 | if (j + k < tgt->list_count |
1180 | && tgt->list[j + k].key | |
1181 | && !tgt->list[j + k].is_attach) | |
6f5b4b64 JB |
1182 | { |
1183 | tgt->list[j + k].key->refcount++; | |
1184 | tgt->list[j + k].key->dynamic_refcount++; | |
1185 | } | |
1186 | processed = true; | |
1187 | break; | |
1188 | } | |
e38fdba4 | 1189 | |
6f5b4b64 JB |
1190 | if (!processed) |
1191 | { | |
1192 | gomp_mutex_unlock (&acc_dev->lock); | |
1193 | gomp_fatal ("dynamic refcount incrementing failed for " | |
1194 | "pointer/pset"); | |
1195 | } | |
1196 | } | |
1197 | else if (hostaddrs[i]) | |
1198 | { | |
1199 | /* The data is not mapped already. Map it now, unless the first | |
1200 | member in the group has a NULL pointer (e.g. a non-present | |
1201 | optional parameter). */ | |
1202 | gomp_mutex_unlock (&acc_dev->lock); | |
1203 | ||
1204 | struct target_mem_desc *tgt | |
1205 | = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, | |
1206 | &sizes[i], &kinds[i], true, | |
9e628024 CLT |
1207 | (GOMP_MAP_VARS_OPENACC |
1208 | | GOMP_MAP_VARS_ENTER_DATA)); | |
6f5b4b64 JB |
1209 | assert (tgt); |
1210 | ||
1211 | gomp_mutex_lock (&acc_dev->lock); | |
1212 | ||
1213 | for (size_t j = 0; j < tgt->list_count; j++) | |
1214 | { | |
1215 | n = tgt->list[j].key; | |
bc4ed079 | 1216 | if (n && !tgt->list[j].is_attach) |
6f5b4b64 JB |
1217 | n->dynamic_refcount++; |
1218 | } | |
1219 | } | |
41dbbb37 | 1220 | |
378da98f | 1221 | i = group_last; |
e38fdba4 | 1222 | } |
6f5b4b64 JB |
1223 | |
1224 | gomp_mutex_unlock (&acc_dev->lock); | |
378da98f | 1225 | } |
41dbbb37 | 1226 | |
378da98f | 1227 | /* Unmap variables for OpenACC "exit data". */ |
41dbbb37 | 1228 | |
378da98f JB |
1229 | static void |
1230 | goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, | |
1231 | void **hostaddrs, size_t *sizes, | |
1232 | unsigned short *kinds, goacc_aq aq) | |
1233 | { | |
1234 | gomp_mutex_lock (&acc_dev->lock); | |
41dbbb37 | 1235 | |
8e7e71ff JB |
1236 | /* Handle "detach" before copyback/deletion of mapped data. */ |
1237 | for (size_t i = 0; i < mapnum; ++i) | |
1238 | { | |
1239 | unsigned char kind = kinds[i] & 0xff; | |
1240 | bool finalize = false; | |
1241 | switch (kind) | |
1242 | { | |
1243 | case GOMP_MAP_FORCE_DETACH: | |
1244 | finalize = true; | |
1245 | /* Fallthrough. */ | |
1246 | ||
1247 | case GOMP_MAP_DETACH: | |
1248 | { | |
1249 | struct splay_tree_key_s cur_node; | |
1250 | uintptr_t hostaddr = (uintptr_t) hostaddrs[i]; | |
1251 | cur_node.host_start = hostaddr; | |
1252 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
1253 | splay_tree_key n | |
1254 | = splay_tree_lookup (&acc_dev->mem_map, &cur_node); | |
1255 | ||
1256 | if (n == NULL) | |
2e24d457 TS |
1257 | { |
1258 | gomp_mutex_unlock (&acc_dev->lock); | |
1259 | gomp_fatal ("struct not mapped for detach operation"); | |
1260 | } | |
8e7e71ff JB |
1261 | |
1262 | gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL); | |
1263 | } | |
1264 | break; | |
1265 | default: | |
1266 | ; | |
1267 | } | |
1268 | } | |
1269 | ||
378da98f | 1270 | for (size_t i = 0; i < mapnum; ++i) |
829c6349 | 1271 | { |
378da98f | 1272 | unsigned char kind = kinds[i] & 0xff; |
41dbbb37 | 1273 | |
378da98f | 1274 | switch (kind) |
ba40277f | 1275 | { |
378da98f JB |
1276 | case GOMP_MAP_FROM: |
1277 | case GOMP_MAP_FORCE_FROM: | |
378da98f JB |
1278 | case GOMP_MAP_TO_PSET: |
1279 | case GOMP_MAP_POINTER: | |
1280 | case GOMP_MAP_DELETE: | |
1281 | case GOMP_MAP_RELEASE: | |
1282 | { | |
1283 | struct splay_tree_key_s cur_node; | |
1284 | size_t size; | |
bc4ed079 | 1285 | if (kind == GOMP_MAP_POINTER) |
378da98f JB |
1286 | size = sizeof (void *); |
1287 | else | |
1288 | size = sizes[i]; | |
1289 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
1290 | cur_node.host_end = cur_node.host_start + size; | |
1291 | splay_tree_key n | |
1292 | = splay_tree_lookup (&acc_dev->mem_map, &cur_node); | |
1293 | ||
1294 | if (n == NULL) | |
1295 | continue; | |
1296 | ||
6f5b4b64 | 1297 | goacc_exit_datum_1 (acc_dev, hostaddrs[i], size, kind, n, aq); |
378da98f JB |
1298 | } |
1299 | break; | |
8e7e71ff JB |
1300 | |
1301 | case GOMP_MAP_STRUCT: | |
1809628f | 1302 | /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing |
1afc4672 TS |
1303 | for all its entries. This special handling exists for GCC 10.1 |
1304 | compatibility; afterwards, we're not generating these no-op | |
1305 | 'GOMP_MAP_STRUCT's anymore. */ | |
8e7e71ff JB |
1306 | break; |
1307 | ||
bc4ed079 JB |
1308 | case GOMP_MAP_DETACH: |
1309 | case GOMP_MAP_FORCE_DETACH: | |
1310 | /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic | |
1311 | reference counts ('n->refcount', 'n->dynamic_refcount'). */ | |
1312 | break; | |
1313 | ||
378da98f JB |
1314 | default: |
1315 | gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x", | |
1316 | kind); | |
1f4c5b9b | 1317 | } |
829c6349 | 1318 | } |
41dbbb37 | 1319 | |
829c6349 | 1320 | gomp_mutex_unlock (&acc_dev->lock); |
57963e39 TS |
1321 | } |
1322 | ||
1323 | void | |
1324 | GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, | |
1325 | size_t *sizes, unsigned short *kinds, int async, | |
1326 | int num_waits, ...) | |
1327 | { | |
1328 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); | |
1329 | ||
1330 | struct goacc_thread *thr; | |
1331 | struct gomp_device_descr *acc_dev; | |
1332 | bool data_enter = false; | |
1333 | size_t i; | |
1334 | ||
1335 | goacc_lazy_initialize (); | |
1336 | ||
1337 | thr = goacc_thread (); | |
1338 | acc_dev = thr->dev; | |
1339 | ||
57963e39 TS |
1340 | /* Determine if this is an "acc enter data". */ |
1341 | for (i = 0; i < mapnum; ++i) | |
1342 | { | |
1343 | unsigned char kind = kinds[i] & 0xff; | |
1344 | ||
8e7e71ff JB |
1345 | if (kind == GOMP_MAP_POINTER |
1346 | || kind == GOMP_MAP_TO_PSET | |
1347 | || kind == GOMP_MAP_STRUCT) | |
57963e39 TS |
1348 | continue; |
1349 | ||
1350 | if (kind == GOMP_MAP_FORCE_ALLOC | |
1351 | || kind == GOMP_MAP_FORCE_PRESENT | |
8e7e71ff | 1352 | || kind == GOMP_MAP_ATTACH |
57963e39 TS |
1353 | || kind == GOMP_MAP_FORCE_TO |
1354 | || kind == GOMP_MAP_TO | |
1355 | || kind == GOMP_MAP_ALLOC) | |
1356 | { | |
1357 | data_enter = true; | |
1358 | break; | |
1359 | } | |
1360 | ||
1361 | if (kind == GOMP_MAP_RELEASE | |
1362 | || kind == GOMP_MAP_DELETE | |
8e7e71ff JB |
1363 | || kind == GOMP_MAP_DETACH |
1364 | || kind == GOMP_MAP_FORCE_DETACH | |
57963e39 TS |
1365 | || kind == GOMP_MAP_FROM |
1366 | || kind == GOMP_MAP_FORCE_FROM) | |
1367 | break; | |
1368 | ||
1369 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
1370 | kind); | |
1371 | } | |
1372 | ||
1373 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); | |
1374 | ||
1375 | acc_prof_info prof_info; | |
1376 | if (profiling_p) | |
1377 | { | |
1378 | thr->prof_info = &prof_info; | |
1379 | ||
1380 | prof_info.event_type | |
1381 | = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start; | |
1382 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
1383 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
1384 | prof_info.device_type = acc_device_type (acc_dev->type); | |
1385 | prof_info.device_number = acc_dev->target_id; | |
1386 | prof_info.thread_id = -1; | |
1387 | prof_info.async = async; | |
1388 | prof_info.async_queue = prof_info.async; | |
1389 | prof_info.src_file = NULL; | |
1390 | prof_info.func_name = NULL; | |
1391 | prof_info.line_no = -1; | |
1392 | prof_info.end_line_no = -1; | |
1393 | prof_info.func_line_no = -1; | |
1394 | prof_info.func_end_line_no = -1; | |
1395 | } | |
1396 | acc_event_info enter_exit_data_event_info; | |
1397 | if (profiling_p) | |
1398 | { | |
1399 | enter_exit_data_event_info.other_event.event_type | |
1400 | = prof_info.event_type; | |
1401 | enter_exit_data_event_info.other_event.valid_bytes | |
1402 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
1403 | enter_exit_data_event_info.other_event.parent_construct | |
1404 | = data_enter ? acc_construct_enter_data : acc_construct_exit_data; | |
1405 | enter_exit_data_event_info.other_event.implicit = 0; | |
1406 | enter_exit_data_event_info.other_event.tool_info = NULL; | |
1407 | } | |
1408 | acc_api_info api_info; | |
1409 | if (profiling_p) | |
1410 | { | |
1411 | thr->api_info = &api_info; | |
1412 | ||
1413 | api_info.device_api = acc_device_api_none; | |
1414 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
1415 | api_info.device_type = prof_info.device_type; | |
1416 | api_info.vendor = -1; | |
1417 | api_info.device_handle = NULL; | |
1418 | api_info.context_handle = NULL; | |
1419 | api_info.async_handle = NULL; | |
1420 | } | |
1421 | ||
1422 | if (profiling_p) | |
1423 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
1424 | &api_info); | |
1425 | ||
1426 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
1427 | || (flags & GOACC_FLAG_HOST_FALLBACK)) | |
1428 | { | |
1429 | prof_info.device_type = acc_device_host; | |
1430 | api_info.device_type = prof_info.device_type; | |
1431 | ||
1432 | goto out_prof; | |
1433 | } | |
1434 | ||
1435 | if (num_waits) | |
1436 | { | |
1437 | va_list ap; | |
1438 | ||
1439 | va_start (ap, num_waits); | |
1440 | goacc_wait (async, num_waits, &ap); | |
1441 | va_end (ap); | |
1442 | } | |
1443 | ||
378da98f | 1444 | goacc_aq aq = get_goacc_asyncqueue (async); |
57963e39 TS |
1445 | |
1446 | if (data_enter) | |
378da98f | 1447 | goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq); |
57963e39 | 1448 | else |
378da98f | 1449 | goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq); |
57963e39 TS |
1450 | |
1451 | out_prof: | |
1452 | if (profiling_p) | |
1453 | { | |
1454 | prof_info.event_type | |
1455 | = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end; | |
1456 | enter_exit_data_event_info.other_event.event_type = prof_info.event_type; | |
1457 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
1458 | &api_info); | |
1459 | ||
1460 | thr->prof_info = NULL; | |
1461 | thr->api_info = NULL; | |
1462 | } | |
1463 | } |