]>
Commit | Line | Data |
---|---|---|
7adcbafe | 1 | /* Copyright (C) 2007-2022 Free Software Foundation, Inc. |
a68ab351 JJ |
2 | Contributed by Richard Henderson <rth@redhat.com>. |
3 | ||
f1f3453e TS |
4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
a68ab351 JJ |
6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
748086b7 JJ |
8 | under the terms of the GNU General Public License as published by |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
a68ab351 JJ |
11 | |
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
748086b7 | 14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
a68ab351 JJ |
15 | more details. |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
a68ab351 | 25 | |
93d90219 | 26 | /* This file handles the maintenance of tasks in response to task |
a68ab351 JJ |
27 | creation and termination. */ |
28 | ||
29 | #include "libgomp.h" | |
30 | #include <stdlib.h> | |
31 | #include <string.h> | |
d656bfda | 32 | #include <assert.h> |
d9a6bd32 | 33 | #include "gomp-constants.h" |
a68ab351 | 34 | |
acf0174b JJ |
35 | typedef struct gomp_task_depend_entry *hash_entry_type; |
36 | ||
37 | static inline void * | |
38 | htab_alloc (size_t size) | |
39 | { | |
40 | return gomp_malloc (size); | |
41 | } | |
42 | ||
43 | static inline void | |
44 | htab_free (void *ptr) | |
45 | { | |
46 | free (ptr); | |
47 | } | |
48 | ||
49 | #include "hashtab.h" | |
50 | ||
51 | static inline hashval_t | |
52 | htab_hash (hash_entry_type element) | |
53 | { | |
54 | return hash_pointer (element->addr); | |
55 | } | |
56 | ||
57 | static inline bool | |
58 | htab_eq (hash_entry_type x, hash_entry_type y) | |
59 | { | |
60 | return x->addr == y->addr; | |
61 | } | |
a68ab351 JJ |
62 | |
63 | /* Create a new task data structure. */ | |
64 | ||
65 | void | |
66 | gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, | |
67 | struct gomp_task_icv *prev_icv) | |
68 | { | |
e4606348 JJ |
69 | /* It would seem that using memset here would be a win, but it turns |
70 | out that partially filling gomp_task allows us to keep the | |
71 | overhead of task creation low. In the nqueens-1.c test, for a | |
72 | sufficiently large N, we drop the overhead from 5-6% to 1%. | |
73 | ||
74 | Note, the nqueens-1.c test in serial mode is a good test to | |
75 | benchmark the overhead of creating tasks as there are millions of | |
76 | tiny tasks created that all run undeferred. */ | |
a68ab351 | 77 | task->parent = parent_task; |
d656bfda KCY |
78 | priority_queue_init (&task->children_queue); |
79 | task->taskgroup = NULL; | |
80 | task->dependers = NULL; | |
81 | task->depend_hash = NULL; | |
82 | task->taskwait = NULL; | |
7f78783d | 83 | task->depend_all_memory = NULL; |
d656bfda KCY |
84 | task->depend_count = 0; |
85 | task->completion_sem = NULL; | |
86 | task->deferred_p = false; | |
a68ab351 JJ |
87 | task->icv = *prev_icv; |
88 | task->kind = GOMP_TASK_IMPLICIT; | |
5f836cbb | 89 | task->in_tied_task = false; |
20906c66 | 90 | task->final_task = false; |
acf0174b | 91 | task->copy_ctors_done = false; |
0494285a | 92 | task->parent_depends_on = false; |
a68ab351 JJ |
93 | } |
94 | ||
95 | /* Clean up a task, after completing it. */ | |
96 | ||
97 | void | |
98 | gomp_end_task (void) | |
99 | { | |
100 | struct gomp_thread *thr = gomp_thread (); | |
101 | struct gomp_task *task = thr->task; | |
102 | ||
103 | gomp_finish_task (task); | |
104 | thr->task = task->parent; | |
105 | } | |
106 | ||
e4606348 | 107 | /* Clear the parent field of every task in LIST. */ |
d9a6bd32 | 108 | |
a68ab351 | 109 | static inline void |
e4606348 | 110 | gomp_clear_parent_in_list (struct priority_list *list) |
a68ab351 | 111 | { |
e4606348 JJ |
112 | struct priority_node *p = list->tasks; |
113 | if (p) | |
a68ab351 JJ |
114 | do |
115 | { | |
e4606348 JJ |
116 | priority_node_to_task (PQ_CHILDREN, p)->parent = NULL; |
117 | p = p->next; | |
a68ab351 | 118 | } |
e4606348 | 119 | while (p != list->tasks); |
a68ab351 JJ |
120 | } |
121 | ||
e4606348 JJ |
122 | /* Splay tree version of gomp_clear_parent_in_list. |
123 | ||
124 | Clear the parent field of every task in NODE within SP, and free | |
125 | the node when done. */ | |
126 | ||
127 | static void | |
128 | gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node) | |
129 | { | |
130 | if (!node) | |
131 | return; | |
132 | prio_splay_tree_node left = node->left, right = node->right; | |
133 | gomp_clear_parent_in_list (&node->key.l); | |
134 | #if _LIBGOMP_CHECKING_ | |
135 | memset (node, 0xaf, sizeof (*node)); | |
136 | #endif | |
137 | /* No need to remove the node from the tree. We're nuking | |
138 | everything, so just free the nodes and our caller can clear the | |
139 | entire splay tree. */ | |
140 | free (node); | |
141 | gomp_clear_parent_in_tree (sp, left); | |
142 | gomp_clear_parent_in_tree (sp, right); | |
143 | } | |
144 | ||
145 | /* Clear the parent field of every task in Q and remove every task | |
146 | from Q. */ | |
147 | ||
148 | static inline void | |
149 | gomp_clear_parent (struct priority_queue *q) | |
150 | { | |
151 | if (priority_queue_multi_p (q)) | |
152 | { | |
153 | gomp_clear_parent_in_tree (&q->t, q->t.root); | |
154 | /* All the nodes have been cleared in gomp_clear_parent_in_tree. | |
155 | No need to remove anything. We can just nuke everything. */ | |
156 | q->t.root = NULL; | |
157 | } | |
158 | else | |
159 | gomp_clear_parent_in_list (&q->l); | |
160 | } | |
161 | ||
162 | /* Helper function for GOMP_task and gomp_create_target_task. | |
163 | ||
164 | For a TASK with in/out dependencies, fill in the various dependency | |
165 | queues. PARENT is the parent of said task. DEPEND is as in | |
166 | GOMP_task. */ | |
d9a6bd32 JJ |
167 | |
168 | static void | |
169 | gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, | |
170 | void **depend) | |
171 | { | |
172 | size_t ndepend = (uintptr_t) depend[0]; | |
d9a6bd32 JJ |
173 | size_t i; |
174 | hash_entry_type ent; | |
7f78783d | 175 | bool all_memory = false; |
d9a6bd32 | 176 | |
28567c40 JJ |
177 | if (ndepend) |
178 | { | |
179 | /* depend[0] is total # */ | |
180 | size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */ | |
181 | /* ndepend - nout is # of in: */ | |
182 | for (i = 0; i < ndepend; i++) | |
183 | { | |
184 | task->depend[i].addr = depend[2 + i]; | |
185 | task->depend[i].is_in = i >= nout; | |
7f78783d | 186 | all_memory |= i < nout && depend[2 + i] == NULL; |
28567c40 JJ |
187 | } |
188 | } | |
189 | else | |
190 | { | |
191 | ndepend = (uintptr_t) depend[1]; /* total # */ | |
192 | size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */ | |
193 | size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */ | |
194 | /* For now we treat mutexinoutset like out, which is compliant, but | |
195 | inefficient. */ | |
196 | size_t nin = (uintptr_t) depend[4]; /* # of in: */ | |
197 | /* ndepend - nout - nmutexinoutset - nin is # of depobjs */ | |
198 | size_t normal = nout + nmutexinoutset + nin; | |
199 | size_t n = 0; | |
2c16eb31 | 200 | bool has_in = false; |
28567c40 JJ |
201 | for (i = normal; i < ndepend; i++) |
202 | { | |
203 | void **d = (void **) (uintptr_t) depend[5 + i]; | |
204 | switch ((uintptr_t) d[1]) | |
205 | { | |
206 | case GOMP_DEPEND_OUT: | |
207 | case GOMP_DEPEND_INOUT: | |
7f78783d JJ |
208 | all_memory |= d[0] == NULL; |
209 | break; | |
28567c40 JJ |
210 | case GOMP_DEPEND_MUTEXINOUTSET: |
211 | break; | |
212 | case GOMP_DEPEND_IN: | |
2c16eb31 JJ |
213 | case GOMP_DEPEND_INOUTSET: |
214 | has_in = true; | |
28567c40 JJ |
215 | continue; |
216 | default: | |
217 | gomp_fatal ("unknown omp_depend_t dependence type %d", | |
218 | (int) (uintptr_t) d[1]); | |
219 | } | |
220 | task->depend[n].addr = d[0]; | |
221 | task->depend[n++].is_in = 0; | |
222 | } | |
223 | for (i = 0; i < normal; i++) | |
224 | { | |
225 | task->depend[n].addr = depend[5 + i]; | |
226 | task->depend[n++].is_in = i >= nout + nmutexinoutset; | |
227 | } | |
2c16eb31 JJ |
228 | if (has_in) |
229 | for (i = normal; i < ndepend; i++) | |
230 | { | |
231 | void **d = (void **) (uintptr_t) depend[5 + i]; | |
232 | if ((uintptr_t) d[1] != GOMP_DEPEND_IN | |
233 | && (uintptr_t) d[1] != GOMP_DEPEND_INOUTSET) | |
234 | continue; | |
235 | task->depend[n].addr = d[0]; | |
236 | task->depend[n++].is_in | |
237 | = 1 + ((uintptr_t) d[1] == GOMP_DEPEND_INOUTSET); | |
238 | } | |
28567c40 | 239 | } |
d9a6bd32 | 240 | task->num_dependees = 0; |
7f78783d JJ |
241 | if (__builtin_expect (parent->depend_all_memory && ndepend, false)) |
242 | { | |
243 | struct gomp_task *tsk = parent->depend_all_memory; | |
244 | if (tsk->dependers == NULL) | |
245 | { | |
246 | tsk->dependers | |
247 | = gomp_malloc (sizeof (struct gomp_dependers_vec) | |
248 | + 6 * sizeof (struct gomp_task *)); | |
249 | tsk->dependers->n_elem = 1; | |
250 | tsk->dependers->allocated = 6; | |
251 | tsk->dependers->elem[0] = task; | |
252 | } | |
253 | else | |
254 | { | |
255 | if (tsk->dependers->n_elem == tsk->dependers->allocated) | |
256 | { | |
257 | tsk->dependers->allocated | |
258 | = tsk->dependers->allocated * 2 + 2; | |
259 | tsk->dependers | |
260 | = gomp_realloc (tsk->dependers, | |
261 | sizeof (struct gomp_dependers_vec) | |
262 | + (tsk->dependers->allocated | |
263 | * sizeof (struct gomp_task *))); | |
264 | } | |
265 | tsk->dependers->elem[tsk->dependers->n_elem++] = task; | |
266 | } | |
267 | task->num_dependees++; | |
268 | } | |
269 | if (__builtin_expect (all_memory, false)) | |
270 | { | |
271 | /* A task with depend(inout: omp_all_memory) depends on all previous | |
272 | sibling tasks which have any dependencies and all later sibling | |
273 | tasks which have any dependencies depend on it. */ | |
274 | task->depend_count = 1; | |
275 | task->depend[0].addr = NULL; | |
276 | task->depend[0].next = NULL; | |
277 | task->depend[0].prev = NULL; | |
278 | task->depend[0].task = task; | |
279 | task->depend[0].redundant = true; | |
280 | task->depend[0].redundant_out = false; | |
281 | if (parent->depend_hash) | |
282 | { | |
283 | /* Inlined htab_traverse + htab_clear. All newer siblings can | |
284 | just depend on this task. Add dependencies on all previous | |
285 | sibling tasks with dependencies and make them redundant and | |
286 | clear the hash table. */ | |
287 | hash_entry_type *slot = &parent->depend_hash->entries[0]; | |
288 | hash_entry_type *end = slot + htab_size (parent->depend_hash); | |
289 | for (; slot != end; ++slot) | |
290 | { | |
291 | if (*slot == HTAB_EMPTY_ENTRY) | |
292 | continue; | |
293 | if (*slot != HTAB_DELETED_ENTRY) | |
294 | { | |
295 | for (ent = *slot; ent; ent = ent->next) | |
296 | { | |
297 | struct gomp_task *tsk = ent->task; | |
298 | ||
299 | if (ent->redundant_out) | |
300 | break; | |
301 | ||
302 | ent->redundant = true; | |
303 | if (tsk->dependers == NULL) | |
304 | { | |
305 | tsk->dependers | |
306 | = gomp_malloc (sizeof (struct gomp_dependers_vec) | |
307 | + 6 * sizeof (struct gomp_task *)); | |
308 | tsk->dependers->n_elem = 1; | |
309 | tsk->dependers->allocated = 6; | |
310 | tsk->dependers->elem[0] = task; | |
311 | task->num_dependees++; | |
312 | continue; | |
313 | } | |
314 | /* We already have some other dependency on tsk from | |
315 | earlier depend clause. */ | |
316 | else if (tsk->dependers->n_elem | |
317 | && (tsk->dependers->elem[tsk->dependers->n_elem | |
318 | - 1] == task)) | |
319 | continue; | |
320 | else if (tsk->dependers->n_elem | |
321 | == tsk->dependers->allocated) | |
322 | { | |
323 | tsk->dependers->allocated | |
324 | = tsk->dependers->allocated * 2 + 2; | |
325 | tsk->dependers | |
326 | = gomp_realloc (tsk->dependers, | |
327 | sizeof (struct gomp_dependers_vec) | |
328 | + (tsk->dependers->allocated | |
329 | * sizeof (struct gomp_task *))); | |
330 | } | |
331 | tsk->dependers->elem[tsk->dependers->n_elem++] = task; | |
332 | task->num_dependees++; | |
333 | } | |
334 | while (ent) | |
335 | { | |
336 | ent->redundant = true; | |
337 | ent = ent->next; | |
338 | } | |
339 | } | |
340 | *slot = HTAB_EMPTY_ENTRY; | |
341 | } | |
342 | if (htab_size (parent->depend_hash) <= 32) | |
343 | { | |
344 | parent->depend_hash->n_elements = 0; | |
345 | parent->depend_hash->n_deleted = 0; | |
346 | } | |
347 | else | |
348 | { | |
349 | /* Shrink the hash table if it would be too large. | |
350 | We don't want to walk e.g. megabytes of empty hash | |
351 | table for every depend(inout: omp_all_memory). */ | |
352 | free (parent->depend_hash); | |
353 | parent->depend_hash = htab_create (12); | |
354 | } | |
355 | } | |
356 | parent->depend_all_memory = task; | |
357 | return; | |
358 | } | |
359 | task->depend_count = ndepend; | |
d9a6bd32 JJ |
360 | if (parent->depend_hash == NULL) |
361 | parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); | |
362 | for (i = 0; i < ndepend; i++) | |
363 | { | |
d9a6bd32 JJ |
364 | task->depend[i].next = NULL; |
365 | task->depend[i].prev = NULL; | |
366 | task->depend[i].task = task; | |
d9a6bd32 JJ |
367 | task->depend[i].redundant = false; |
368 | task->depend[i].redundant_out = false; | |
369 | ||
370 | hash_entry_type *slot = htab_find_slot (&parent->depend_hash, | |
371 | &task->depend[i], INSERT); | |
372 | hash_entry_type out = NULL, last = NULL; | |
373 | if (*slot) | |
374 | { | |
375 | /* If multiple depends on the same task are the same, all but the | |
376 | first one are redundant. As inout/out come first, if any of them | |
377 | is inout/out, it will win, which is the right semantics. */ | |
378 | if ((*slot)->task == task) | |
379 | { | |
380 | task->depend[i].redundant = true; | |
381 | continue; | |
382 | } | |
383 | for (ent = *slot; ent; ent = ent->next) | |
384 | { | |
385 | if (ent->redundant_out) | |
386 | break; | |
387 | ||
388 | last = ent; | |
389 | ||
2c16eb31 JJ |
390 | /* depend(in:...) doesn't depend on earlier depend(in:...). |
391 | Similarly depend(inoutset:...) doesn't depend on earlier | |
392 | depend(inoutset:...). */ | |
393 | if (task->depend[i].is_in && task->depend[i].is_in == ent->is_in) | |
d9a6bd32 JJ |
394 | continue; |
395 | ||
396 | if (!ent->is_in) | |
397 | out = ent; | |
398 | ||
399 | struct gomp_task *tsk = ent->task; | |
400 | if (tsk->dependers == NULL) | |
401 | { | |
402 | tsk->dependers | |
403 | = gomp_malloc (sizeof (struct gomp_dependers_vec) | |
404 | + 6 * sizeof (struct gomp_task *)); | |
405 | tsk->dependers->n_elem = 1; | |
406 | tsk->dependers->allocated = 6; | |
407 | tsk->dependers->elem[0] = task; | |
408 | task->num_dependees++; | |
409 | continue; | |
410 | } | |
411 | /* We already have some other dependency on tsk from earlier | |
412 | depend clause. */ | |
413 | else if (tsk->dependers->n_elem | |
414 | && (tsk->dependers->elem[tsk->dependers->n_elem - 1] | |
415 | == task)) | |
416 | continue; | |
417 | else if (tsk->dependers->n_elem == tsk->dependers->allocated) | |
418 | { | |
419 | tsk->dependers->allocated | |
420 | = tsk->dependers->allocated * 2 + 2; | |
421 | tsk->dependers | |
422 | = gomp_realloc (tsk->dependers, | |
423 | sizeof (struct gomp_dependers_vec) | |
424 | + (tsk->dependers->allocated | |
425 | * sizeof (struct gomp_task *))); | |
426 | } | |
427 | tsk->dependers->elem[tsk->dependers->n_elem++] = task; | |
428 | task->num_dependees++; | |
429 | } | |
430 | task->depend[i].next = *slot; | |
431 | (*slot)->prev = &task->depend[i]; | |
432 | } | |
433 | *slot = &task->depend[i]; | |
434 | ||
435 | /* There is no need to store more than one depend({,in}out:) task per | |
436 | address in the hash table chain for the purpose of creation of | |
437 | deferred tasks, because each out depends on all earlier outs, thus it | |
438 | is enough to record just the last depend({,in}out:). For depend(in:), | |
439 | we need to keep all of the previous ones not terminated yet, because | |
440 | a later depend({,in}out:) might need to depend on all of them. So, if | |
441 | the new task's clause is depend({,in}out:), we know there is at most | |
442 | one other depend({,in}out:) clause in the list (out). For | |
443 | non-deferred tasks we want to see all outs, so they are moved to the | |
444 | end of the chain, after first redundant_out entry all following | |
445 | entries should be redundant_out. */ | |
446 | if (!task->depend[i].is_in && out) | |
447 | { | |
448 | if (out != last) | |
449 | { | |
450 | out->next->prev = out->prev; | |
451 | out->prev->next = out->next; | |
452 | out->next = last->next; | |
453 | out->prev = last; | |
454 | last->next = out; | |
455 | if (out->next) | |
456 | out->next->prev = out; | |
457 | } | |
458 | out->redundant_out = true; | |
459 | } | |
460 | } | |
461 | } | |
0494285a | 462 | |
b4383691 JJ |
463 | /* Body of empty task like taskwait nowait depend. */ |
464 | ||
465 | static void | |
466 | empty_task (void *data __attribute__((unused))) | |
467 | { | |
468 | } | |
469 | ||
470 | static void gomp_task_run_post_handle_depend_hash (struct gomp_task *); | |
471 | static inline size_t gomp_task_run_post_handle_depend (struct gomp_task *, | |
472 | struct gomp_team *); | |
473 | ||
a68ab351 JJ |
474 | /* Called when encountering an explicit task directive. If IF_CLAUSE is |
475 | false, then we must not delay in executing the task. If UNTIED is true, | |
d9a6bd32 JJ |
476 | then the task may be executed by any member of the team. |
477 | ||
478 | DEPEND is an array containing: | |
28567c40 | 479 | if depend[0] is non-zero, then: |
d9a6bd32 | 480 | depend[0]: number of depend elements. |
28567c40 JJ |
481 | depend[1]: number of depend elements of type "out/inout". |
482 | depend[2..N+1]: address of [1..N]th depend element. | |
483 | otherwise, when depend[0] is zero, then: | |
484 | depend[1]: number of depend elements. | |
485 | depend[2]: number of depend elements of type "out/inout". | |
486 | depend[3]: number of depend elements of type "mutexinoutset". | |
487 | depend[4]: number of depend elements of type "in". | |
488 | depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements | |
489 | depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of | |
490 | omp_depend_t objects. */ | |
a68ab351 JJ |
491 | |
492 | void | |
493 | GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), | |
acf0174b | 494 | long arg_size, long arg_align, bool if_clause, unsigned flags, |
0bb27b81 | 495 | void **depend, int priority_arg, void *detach) |
a68ab351 JJ |
496 | { |
497 | struct gomp_thread *thr = gomp_thread (); | |
498 | struct gomp_team *team = thr->ts.team; | |
0bb27b81 | 499 | int priority = 0; |
a68ab351 JJ |
500 | |
501 | #ifdef HAVE_BROKEN_POSIX_SEMAPHORES | |
502 | /* If pthread_mutex_* is used for omp_*lock*, then each task must be | |
503 | tied to one thread all the time. This means UNTIED tasks must be | |
504 | tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN | |
505 | might be running on different thread than FN. */ | |
506 | if (cpyfn) | |
507 | if_clause = false; | |
d9a6bd32 | 508 | flags &= ~GOMP_TASK_FLAG_UNTIED; |
a68ab351 JJ |
509 | #endif |
510 | ||
acf0174b | 511 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ |
28567c40 JJ |
512 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
513 | { | |
514 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
515 | return; | |
516 | if (thr->task->taskgroup) | |
517 | { | |
518 | if (thr->task->taskgroup->cancelled) | |
519 | return; | |
520 | if (thr->task->taskgroup->workshare | |
521 | && thr->task->taskgroup->prev | |
522 | && thr->task->taskgroup->prev->cancelled) | |
523 | return; | |
524 | } | |
525 | } | |
acf0174b | 526 | |
0bb27b81 JJ |
527 | if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0)) |
528 | { | |
529 | priority = priority_arg; | |
530 | if (priority > gomp_max_task_priority_var) | |
531 | priority = gomp_max_task_priority_var; | |
532 | } | |
a6d22fb2 | 533 | |
a68ab351 | 534 | if (!if_clause || team == NULL |
20906c66 | 535 | || (thr->task && thr->task->final_task) |
a68ab351 JJ |
536 | || team->task_count > 64 * team->nthreads) |
537 | { | |
538 | struct gomp_task task; | |
d656bfda | 539 | gomp_sem_t completion_sem; |
a68ab351 | 540 | |
acf0174b JJ |
541 | /* If there are depend clauses and earlier deferred sibling tasks |
542 | with depend clauses, check if there isn't a dependency. If there | |
0494285a | 543 | is, we need to wait for them. There is no need to handle |
acf0174b JJ |
544 | depend clauses for non-deferred tasks other than this, because |
545 | the parent task is suspended until the child task finishes and thus | |
546 | it can't start further child tasks. */ | |
d9a6bd32 JJ |
547 | if ((flags & GOMP_TASK_FLAG_DEPEND) |
548 | && thr->task && thr->task->depend_hash) | |
0494285a | 549 | gomp_task_maybe_wait_for_dependencies (depend); |
acf0174b | 550 | |
a68ab351 | 551 | gomp_init_task (&task, thr->task, gomp_icv (false)); |
d9a6bd32 JJ |
552 | task.kind = GOMP_TASK_UNDEFERRED; |
553 | task.final_task = (thr->task && thr->task->final_task) | |
554 | || (flags & GOMP_TASK_FLAG_FINAL); | |
e4606348 | 555 | task.priority = priority; |
a6d22fb2 | 556 | |
0bb27b81 | 557 | if ((flags & GOMP_TASK_FLAG_DETACH) != 0) |
a6d22fb2 | 558 | { |
d656bfda KCY |
559 | gomp_sem_init (&completion_sem, 0); |
560 | task.completion_sem = &completion_sem; | |
561 | *(void **) detach = &task; | |
a6d22fb2 | 562 | if (data) |
d656bfda | 563 | *(void **) data = &task; |
a6d22fb2 | 564 | |
d656bfda KCY |
565 | gomp_debug (0, "Thread %d: new event: %p\n", |
566 | thr->ts.team_id, &task); | |
a6d22fb2 KCY |
567 | } |
568 | ||
5f836cbb | 569 | if (thr->task) |
acf0174b JJ |
570 | { |
571 | task.in_tied_task = thr->task->in_tied_task; | |
572 | task.taskgroup = thr->task->taskgroup; | |
573 | } | |
a68ab351 JJ |
574 | thr->task = &task; |
575 | if (__builtin_expect (cpyfn != NULL, 0)) | |
576 | { | |
577 | char buf[arg_size + arg_align - 1]; | |
578 | char *arg = (char *) (((uintptr_t) buf + arg_align - 1) | |
579 | & ~(uintptr_t) (arg_align - 1)); | |
580 | cpyfn (arg, data); | |
581 | fn (arg); | |
582 | } | |
583 | else | |
584 | fn (data); | |
a6d22fb2 | 585 | |
d656bfda KCY |
586 | if ((flags & GOMP_TASK_FLAG_DETACH) != 0) |
587 | { | |
588 | gomp_sem_wait (&completion_sem); | |
589 | gomp_sem_destroy (&completion_sem); | |
590 | } | |
a6d22fb2 | 591 | |
bed8d8a6 AM |
592 | /* Access to "children" is normally done inside a task_lock |
593 | mutex region, but the only way this particular task.children | |
594 | can be set is if this thread's task work function (fn) | |
595 | creates children. So since the setter is *this* thread, we | |
596 | need no barriers here when testing for non-NULL. We can have | |
597 | task.children set by the current thread then changed by a | |
598 | child thread, but seeing a stale non-NULL value is not a | |
599 | problem. Once past the task_lock acquisition, this thread | |
600 | will see the real value of task.children. */ | |
e4606348 | 601 | if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) |
a68ab351 JJ |
602 | { |
603 | gomp_mutex_lock (&team->task_lock); | |
e4606348 | 604 | gomp_clear_parent (&task.children_queue); |
a68ab351 JJ |
605 | gomp_mutex_unlock (&team->task_lock); |
606 | } | |
607 | gomp_end_task (); | |
608 | } | |
609 | else | |
610 | { | |
611 | struct gomp_task *task; | |
612 | struct gomp_task *parent = thr->task; | |
acf0174b | 613 | struct gomp_taskgroup *taskgroup = parent->taskgroup; |
a68ab351 JJ |
614 | char *arg; |
615 | bool do_wake; | |
acf0174b JJ |
616 | size_t depend_size = 0; |
617 | ||
d9a6bd32 | 618 | if (flags & GOMP_TASK_FLAG_DEPEND) |
28567c40 | 619 | depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1]) |
acf0174b JJ |
620 | * sizeof (struct gomp_task_depend_entry)); |
621 | task = gomp_malloc (sizeof (*task) + depend_size | |
622 | + arg_size + arg_align - 1); | |
623 | arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) | |
a68ab351 JJ |
624 | & ~(uintptr_t) (arg_align - 1)); |
625 | gomp_init_task (task, parent, gomp_icv (false)); | |
e4606348 | 626 | task->priority = priority; |
d9a6bd32 | 627 | task->kind = GOMP_TASK_UNDEFERRED; |
5f836cbb | 628 | task->in_tied_task = parent->in_tied_task; |
acf0174b | 629 | task->taskgroup = taskgroup; |
d656bfda | 630 | task->deferred_p = true; |
0bb27b81 | 631 | if ((flags & GOMP_TASK_FLAG_DETACH) != 0) |
a6d22fb2 | 632 | { |
d656bfda KCY |
633 | task->detach_team = team; |
634 | ||
635 | *(void **) detach = task; | |
a6d22fb2 | 636 | if (data) |
d656bfda | 637 | *(void **) data = task; |
a6d22fb2 | 638 | |
d656bfda | 639 | gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task); |
a6d22fb2 | 640 | } |
a68ab351 JJ |
641 | thr->task = task; |
642 | if (cpyfn) | |
acf0174b JJ |
643 | { |
644 | cpyfn (arg, data); | |
645 | task->copy_ctors_done = true; | |
646 | } | |
a68ab351 JJ |
647 | else |
648 | memcpy (arg, data, arg_size); | |
649 | thr->task = parent; | |
650 | task->kind = GOMP_TASK_WAITING; | |
651 | task->fn = fn; | |
652 | task->fn_data = arg; | |
d9a6bd32 | 653 | task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; |
a68ab351 | 654 | gomp_mutex_lock (&team->task_lock); |
acf0174b JJ |
655 | /* If parallel or taskgroup has been cancelled, don't start new |
656 | tasks. */ | |
28567c40 JJ |
657 | if (__builtin_expect (gomp_cancel_var, 0) |
658 | && !task->copy_ctors_done) | |
acf0174b | 659 | { |
28567c40 JJ |
660 | if (gomp_team_barrier_cancelled (&team->barrier)) |
661 | { | |
662 | do_cancel: | |
663 | gomp_mutex_unlock (&team->task_lock); | |
664 | gomp_finish_task (task); | |
665 | free (task); | |
666 | return; | |
667 | } | |
668 | if (taskgroup) | |
669 | { | |
670 | if (taskgroup->cancelled) | |
671 | goto do_cancel; | |
672 | if (taskgroup->workshare | |
673 | && taskgroup->prev | |
674 | && taskgroup->prev->cancelled) | |
675 | goto do_cancel; | |
676 | } | |
acf0174b JJ |
677 | } |
678 | if (taskgroup) | |
679 | taskgroup->num_children++; | |
680 | if (depend_size) | |
681 | { | |
d9a6bd32 | 682 | gomp_task_handle_depend (task, parent, depend); |
acf0174b JJ |
683 | if (task->num_dependees) |
684 | { | |
e4606348 JJ |
685 | /* Tasks that depend on other tasks are not put into the |
686 | various waiting queues, so we are done for now. Said | |
687 | tasks are instead put into the queues via | |
688 | gomp_task_run_post_handle_dependers() after their | |
689 | dependencies have been satisfied. After which, they | |
690 | can be picked up by the various scheduling | |
691 | points. */ | |
acf0174b JJ |
692 | gomp_mutex_unlock (&team->task_lock); |
693 | return; | |
694 | } | |
b4383691 JJ |
695 | /* Check for taskwait nowait depend which doesn't need to wait for |
696 | anything. */ | |
697 | if (__builtin_expect (fn == empty_task, 0)) | |
698 | { | |
699 | if (taskgroup) | |
700 | taskgroup->num_children--; | |
701 | gomp_task_run_post_handle_depend_hash (task); | |
702 | gomp_mutex_unlock (&team->task_lock); | |
703 | gomp_finish_task (task); | |
704 | free (task); | |
705 | return; | |
706 | } | |
acf0174b | 707 | } |
e4606348 JJ |
708 | |
709 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, | |
710 | task, priority, | |
711 | PRIORITY_INSERT_BEGIN, | |
712 | /*adjust_parent_depends_on=*/false, | |
713 | task->parent_depends_on); | |
acf0174b | 714 | if (taskgroup) |
e4606348 JJ |
715 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
716 | task, priority, | |
717 | PRIORITY_INSERT_BEGIN, | |
718 | /*adjust_parent_depends_on=*/false, | |
719 | task->parent_depends_on); | |
720 | ||
721 | priority_queue_insert (PQ_TEAM, &team->task_queue, | |
722 | task, priority, | |
723 | PRIORITY_INSERT_END, | |
724 | /*adjust_parent_depends_on=*/false, | |
725 | task->parent_depends_on); | |
726 | ||
5f836cbb | 727 | ++team->task_count; |
acf0174b | 728 | ++team->task_queued_count; |
5f836cbb JJ |
729 | gomp_team_barrier_set_task_pending (&team->barrier); |
730 | do_wake = team->task_running_count + !parent->in_tied_task | |
731 | < team->nthreads; | |
a68ab351 JJ |
732 | gomp_mutex_unlock (&team->task_lock); |
733 | if (do_wake) | |
734 | gomp_team_barrier_wake (&team->barrier, 1); | |
735 | } | |
736 | } | |
737 | ||
6c420193 | 738 | ialias (GOMP_task) |
d9a6bd32 JJ |
739 | ialias (GOMP_taskgroup_start) |
740 | ialias (GOMP_taskgroup_end) | |
28567c40 | 741 | ialias (GOMP_taskgroup_reduction_register) |
d9a6bd32 JJ |
742 | |
743 | #define TYPE long | |
744 | #define UTYPE unsigned long | |
745 | #define TYPE_is_long 1 | |
746 | #include "taskloop.c" | |
747 | #undef TYPE | |
748 | #undef UTYPE | |
749 | #undef TYPE_is_long | |
750 | ||
751 | #define TYPE unsigned long long | |
752 | #define UTYPE TYPE | |
753 | #define GOMP_taskloop GOMP_taskloop_ull | |
754 | #include "taskloop.c" | |
755 | #undef TYPE | |
756 | #undef UTYPE | |
757 | #undef GOMP_taskloop | |
758 | ||
e4606348 JJ |
759 | static void inline |
760 | priority_queue_move_task_first (enum priority_queue_type type, | |
761 | struct priority_queue *head, | |
762 | struct gomp_task *task) | |
763 | { | |
764 | #if _LIBGOMP_CHECKING_ | |
765 | if (!priority_queue_task_in_queue_p (type, head, task)) | |
766 | gomp_fatal ("Attempt to move first missing task %p", task); | |
767 | #endif | |
768 | struct priority_list *list; | |
769 | if (priority_queue_multi_p (head)) | |
770 | { | |
771 | list = priority_queue_lookup_priority (head, task->priority); | |
772 | #if _LIBGOMP_CHECKING_ | |
773 | if (!list) | |
774 | gomp_fatal ("Unable to find priority %d", task->priority); | |
775 | #endif | |
776 | } | |
777 | else | |
778 | list = &head->l; | |
779 | priority_list_remove (list, task_to_priority_node (type, task), 0); | |
780 | priority_list_insert (type, list, task, task->priority, | |
781 | PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN, | |
782 | task->parent_depends_on); | |
783 | } | |
784 | ||
785 | /* Actual body of GOMP_PLUGIN_target_task_completion that is executed | |
786 | with team->task_lock held, or is executed in the thread that called | |
787 | gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been | |
788 | run before it acquires team->task_lock. */ | |
789 | ||
790 | static void | |
791 | gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task) | |
792 | { | |
793 | struct gomp_task *parent = task->parent; | |
794 | if (parent) | |
795 | priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue, | |
796 | task); | |
797 | ||
798 | struct gomp_taskgroup *taskgroup = task->taskgroup; | |
799 | if (taskgroup) | |
800 | priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue, | |
801 | task); | |
802 | ||
803 | priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority, | |
804 | PRIORITY_INSERT_BEGIN, false, | |
805 | task->parent_depends_on); | |
806 | task->kind = GOMP_TASK_WAITING; | |
807 | if (parent && parent->taskwait) | |
808 | { | |
809 | if (parent->taskwait->in_taskwait) | |
810 | { | |
811 | /* One more task has had its dependencies met. | |
812 | Inform any waiters. */ | |
813 | parent->taskwait->in_taskwait = false; | |
814 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
815 | } | |
816 | else if (parent->taskwait->in_depend_wait) | |
817 | { | |
818 | /* One more task has had its dependencies met. | |
819 | Inform any waiters. */ | |
820 | parent->taskwait->in_depend_wait = false; | |
821 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
822 | } | |
823 | } | |
824 | if (taskgroup && taskgroup->in_taskgroup_wait) | |
825 | { | |
826 | /* One more task has had its dependencies met. | |
827 | Inform any waiters. */ | |
828 | taskgroup->in_taskgroup_wait = false; | |
829 | gomp_sem_post (&taskgroup->taskgroup_sem); | |
830 | } | |
831 | ||
832 | ++team->task_queued_count; | |
833 | gomp_team_barrier_set_task_pending (&team->barrier); | |
834 | /* I'm afraid this can't be done after releasing team->task_lock, | |
835 | as gomp_target_task_completion is run from unrelated thread and | |
836 | therefore in between gomp_mutex_unlock and gomp_team_barrier_wake | |
837 | the team could be gone already. */ | |
838 | if (team->nthreads > team->task_running_count) | |
839 | gomp_team_barrier_wake (&team->barrier, 1); | |
840 | } | |
841 | ||
842 | /* Signal that a target task TTASK has completed the asynchronously | |
843 | running phase and should be requeued as a task to handle the | |
844 | variable unmapping. */ | |
d9a6bd32 JJ |
845 | |
846 | void | |
e4606348 JJ |
847 | GOMP_PLUGIN_target_task_completion (void *data) |
848 | { | |
849 | struct gomp_target_task *ttask = (struct gomp_target_task *) data; | |
850 | struct gomp_task *task = ttask->task; | |
851 | struct gomp_team *team = ttask->team; | |
852 | ||
853 | gomp_mutex_lock (&team->task_lock); | |
854 | if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN) | |
855 | { | |
856 | ttask->state = GOMP_TARGET_TASK_FINISHED; | |
857 | gomp_mutex_unlock (&team->task_lock); | |
cb11f3cf | 858 | return; |
e4606348 JJ |
859 | } |
860 | ttask->state = GOMP_TARGET_TASK_FINISHED; | |
861 | gomp_target_task_completion (team, task); | |
862 | gomp_mutex_unlock (&team->task_lock); | |
863 | } | |
864 | ||
865 | /* Called for nowait target tasks. */ | |
866 | ||
867 | bool | |
d9a6bd32 JJ |
868 | gomp_create_target_task (struct gomp_device_descr *devicep, |
869 | void (*fn) (void *), size_t mapnum, void **hostaddrs, | |
870 | size_t *sizes, unsigned short *kinds, | |
b2b40051 | 871 | unsigned int flags, void **depend, void **args, |
e4606348 | 872 | enum gomp_target_task_state state) |
d9a6bd32 JJ |
873 | { |
874 | struct gomp_thread *thr = gomp_thread (); | |
875 | struct gomp_team *team = thr->ts.team; | |
876 | ||
877 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
28567c40 JJ |
878 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
879 | { | |
880 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
881 | return true; | |
882 | if (thr->task->taskgroup) | |
883 | { | |
884 | if (thr->task->taskgroup->cancelled) | |
885 | return true; | |
886 | if (thr->task->taskgroup->workshare | |
887 | && thr->task->taskgroup->prev | |
888 | && thr->task->taskgroup->prev->cancelled) | |
889 | return true; | |
890 | } | |
891 | } | |
d9a6bd32 JJ |
892 | |
893 | struct gomp_target_task *ttask; | |
894 | struct gomp_task *task; | |
895 | struct gomp_task *parent = thr->task; | |
896 | struct gomp_taskgroup *taskgroup = parent->taskgroup; | |
897 | bool do_wake; | |
898 | size_t depend_size = 0; | |
e4606348 JJ |
899 | uintptr_t depend_cnt = 0; |
900 | size_t tgt_align = 0, tgt_size = 0; | |
aea72386 | 901 | uintptr_t args_cnt = 0; |
d9a6bd32 JJ |
902 | |
903 | if (depend != NULL) | |
e4606348 | 904 | { |
28567c40 | 905 | depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]); |
e4606348 JJ |
906 | depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry); |
907 | } | |
908 | if (fn) | |
909 | { | |
910 | /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are | |
911 | firstprivate on the target task. */ | |
912 | size_t i; | |
913 | for (i = 0; i < mapnum; i++) | |
914 | if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) | |
915 | { | |
916 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
917 | if (tgt_align < align) | |
918 | tgt_align = align; | |
919 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
920 | tgt_size += sizes[i]; | |
921 | } | |
922 | if (tgt_align) | |
923 | tgt_size += tgt_align - 1; | |
924 | else | |
925 | tgt_size = 0; | |
aea72386 JJ |
926 | if (args) |
927 | { | |
928 | void **cargs = args; | |
929 | while (*cargs) | |
930 | { | |
931 | intptr_t id = (intptr_t) *cargs++; | |
932 | if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) | |
933 | cargs++; | |
934 | } | |
935 | args_cnt = cargs + 1 - args; | |
936 | } | |
e4606348 JJ |
937 | } |
938 | ||
d9a6bd32 JJ |
939 | task = gomp_malloc (sizeof (*task) + depend_size |
940 | + sizeof (*ttask) | |
aea72386 | 941 | + args_cnt * sizeof (void *) |
d9a6bd32 | 942 | + mapnum * (sizeof (void *) + sizeof (size_t) |
e4606348 JJ |
943 | + sizeof (unsigned short)) |
944 | + tgt_size); | |
d9a6bd32 | 945 | gomp_init_task (task, parent, gomp_icv (false)); |
e4606348 | 946 | task->priority = 0; |
d9a6bd32 JJ |
947 | task->kind = GOMP_TASK_WAITING; |
948 | task->in_tied_task = parent->in_tied_task; | |
949 | task->taskgroup = taskgroup; | |
e4606348 | 950 | ttask = (struct gomp_target_task *) &task->depend[depend_cnt]; |
d9a6bd32 JJ |
951 | ttask->devicep = devicep; |
952 | ttask->fn = fn; | |
953 | ttask->mapnum = mapnum; | |
954 | memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); | |
aea72386 JJ |
955 | if (args_cnt) |
956 | { | |
957 | ttask->args = (void **) &ttask->hostaddrs[mapnum]; | |
958 | memcpy (ttask->args, args, args_cnt * sizeof (void *)); | |
959 | ttask->sizes = (size_t *) &ttask->args[args_cnt]; | |
960 | } | |
961 | else | |
962 | { | |
963 | ttask->args = args; | |
964 | ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; | |
965 | } | |
d9a6bd32 JJ |
966 | memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); |
967 | ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; | |
968 | memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); | |
e4606348 JJ |
969 | if (tgt_align) |
970 | { | |
971 | char *tgt = (char *) &ttask->kinds[mapnum]; | |
972 | size_t i; | |
973 | uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); | |
974 | if (al) | |
975 | tgt += tgt_align - al; | |
976 | tgt_size = 0; | |
977 | for (i = 0; i < mapnum; i++) | |
978 | if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) | |
979 | { | |
980 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
981 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
982 | memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); | |
983 | ttask->hostaddrs[i] = tgt + tgt_size; | |
984 | tgt_size = tgt_size + sizes[i]; | |
985 | } | |
986 | } | |
d9a6bd32 | 987 | ttask->flags = flags; |
e4606348 JJ |
988 | ttask->state = state; |
989 | ttask->task = task; | |
990 | ttask->team = team; | |
991 | task->fn = NULL; | |
d9a6bd32 JJ |
992 | task->fn_data = ttask; |
993 | task->final_task = 0; | |
994 | gomp_mutex_lock (&team->task_lock); | |
995 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
28567c40 | 996 | if (__builtin_expect (gomp_cancel_var, 0)) |
d9a6bd32 | 997 | { |
28567c40 JJ |
998 | if (gomp_team_barrier_cancelled (&team->barrier)) |
999 | { | |
1000 | do_cancel: | |
1001 | gomp_mutex_unlock (&team->task_lock); | |
1002 | gomp_finish_task (task); | |
1003 | free (task); | |
1004 | return true; | |
1005 | } | |
1006 | if (taskgroup) | |
1007 | { | |
1008 | if (taskgroup->cancelled) | |
1009 | goto do_cancel; | |
1010 | if (taskgroup->workshare | |
1011 | && taskgroup->prev | |
1012 | && taskgroup->prev->cancelled) | |
1013 | goto do_cancel; | |
1014 | } | |
d9a6bd32 | 1015 | } |
d9a6bd32 JJ |
1016 | if (depend_size) |
1017 | { | |
1018 | gomp_task_handle_depend (task, parent, depend); | |
1019 | if (task->num_dependees) | |
1020 | { | |
e4606348 JJ |
1021 | if (taskgroup) |
1022 | taskgroup->num_children++; | |
d9a6bd32 | 1023 | gomp_mutex_unlock (&team->task_lock); |
e4606348 | 1024 | return true; |
d9a6bd32 JJ |
1025 | } |
1026 | } | |
e4606348 | 1027 | if (state == GOMP_TARGET_TASK_DATA) |
d9a6bd32 | 1028 | { |
8e4e4719 | 1029 | gomp_task_run_post_handle_depend_hash (task); |
e4606348 JJ |
1030 | gomp_mutex_unlock (&team->task_lock); |
1031 | gomp_finish_task (task); | |
1032 | free (task); | |
1033 | return false; | |
d9a6bd32 | 1034 | } |
d9a6bd32 | 1035 | if (taskgroup) |
e4606348 JJ |
1036 | taskgroup->num_children++; |
1037 | /* For async offloading, if we don't need to wait for dependencies, | |
1038 | run the gomp_target_task_fn right away, essentially schedule the | |
1039 | mapping part of the task in the current thread. */ | |
1040 | if (devicep != NULL | |
1041 | && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
d9a6bd32 | 1042 | { |
e4606348 JJ |
1043 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, |
1044 | PRIORITY_INSERT_END, | |
1045 | /*adjust_parent_depends_on=*/false, | |
1046 | task->parent_depends_on); | |
1047 | if (taskgroup) | |
1048 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, | |
1049 | task, 0, PRIORITY_INSERT_END, | |
1050 | /*adjust_parent_depends_on=*/false, | |
1051 | task->parent_depends_on); | |
1052 | task->pnode[PQ_TEAM].next = NULL; | |
1053 | task->pnode[PQ_TEAM].prev = NULL; | |
1054 | task->kind = GOMP_TASK_TIED; | |
1055 | ++team->task_count; | |
1056 | gomp_mutex_unlock (&team->task_lock); | |
1057 | ||
1058 | thr->task = task; | |
1059 | gomp_target_task_fn (task->fn_data); | |
1060 | thr->task = parent; | |
1061 | ||
1062 | gomp_mutex_lock (&team->task_lock); | |
1063 | task->kind = GOMP_TASK_ASYNC_RUNNING; | |
1064 | /* If GOMP_PLUGIN_target_task_completion has run already | |
1065 | in between gomp_target_task_fn and the mutex lock, | |
1066 | perform the requeuing here. */ | |
1067 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
1068 | gomp_target_task_completion (team, task); | |
d9a6bd32 | 1069 | else |
e4606348 JJ |
1070 | ttask->state = GOMP_TARGET_TASK_RUNNING; |
1071 | gomp_mutex_unlock (&team->task_lock); | |
1072 | return true; | |
d9a6bd32 | 1073 | } |
e4606348 JJ |
1074 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, |
1075 | PRIORITY_INSERT_BEGIN, | |
1076 | /*adjust_parent_depends_on=*/false, | |
1077 | task->parent_depends_on); | |
1078 | if (taskgroup) | |
1079 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0, | |
1080 | PRIORITY_INSERT_BEGIN, | |
1081 | /*adjust_parent_depends_on=*/false, | |
1082 | task->parent_depends_on); | |
1083 | priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0, | |
1084 | PRIORITY_INSERT_END, | |
1085 | /*adjust_parent_depends_on=*/false, | |
1086 | task->parent_depends_on); | |
d9a6bd32 JJ |
1087 | ++team->task_count; |
1088 | ++team->task_queued_count; | |
1089 | gomp_team_barrier_set_task_pending (&team->barrier); | |
1090 | do_wake = team->task_running_count + !parent->in_tied_task | |
1091 | < team->nthreads; | |
1092 | gomp_mutex_unlock (&team->task_lock); | |
1093 | if (do_wake) | |
1094 | gomp_team_barrier_wake (&team->barrier, 1); | |
e4606348 | 1095 | return true; |
d9a6bd32 JJ |
1096 | } |
1097 | ||
e4606348 JJ |
1098 | /* Given a parent_depends_on task in LIST, move it to the front of its |
1099 | priority so it is run as soon as possible. | |
d9a6bd32 | 1100 | |
e4606348 | 1101 | Care is taken to update the list's LAST_PARENT_DEPENDS_ON field. |
d9a6bd32 | 1102 | |
e4606348 JJ |
1103 | We rearrange the queue such that all parent_depends_on tasks are |
1104 | first, and last_parent_depends_on points to the last such task we | |
1105 | rearranged. For example, given the following tasks in a queue | |
1106 | where PD[123] are the parent_depends_on tasks: | |
d9a6bd32 | 1107 | |
e4606348 JJ |
1108 | task->children |
1109 | | | |
1110 | V | |
1111 | C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 | |
d9a6bd32 | 1112 | |
e4606348 JJ |
1113 | We rearrange such that: |
1114 | ||
1115 | task->children | |
1116 | | +--- last_parent_depends_on | |
1117 | | | | |
1118 | V V | |
1119 | PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */ | |
1120 | ||
1121 | static void inline | |
1122 | priority_list_upgrade_task (struct priority_list *list, | |
1123 | struct priority_node *node) | |
1124 | { | |
1125 | struct priority_node *last_parent_depends_on | |
1126 | = list->last_parent_depends_on; | |
1127 | if (last_parent_depends_on) | |
d9a6bd32 | 1128 | { |
e4606348 JJ |
1129 | node->prev->next = node->next; |
1130 | node->next->prev = node->prev; | |
1131 | node->prev = last_parent_depends_on; | |
1132 | node->next = last_parent_depends_on->next; | |
1133 | node->prev->next = node; | |
1134 | node->next->prev = node; | |
d9a6bd32 | 1135 | } |
e4606348 JJ |
1136 | else if (node != list->tasks) |
1137 | { | |
1138 | node->prev->next = node->next; | |
1139 | node->next->prev = node->prev; | |
1140 | node->prev = list->tasks->prev; | |
1141 | node->next = list->tasks; | |
1142 | list->tasks = node; | |
1143 | node->prev->next = node; | |
1144 | node->next->prev = node; | |
1145 | } | |
1146 | list->last_parent_depends_on = node; | |
d9a6bd32 JJ |
1147 | } |
1148 | ||
e4606348 JJ |
1149 | /* Given a parent_depends_on TASK in its parent's children_queue, move |
1150 | it to the front of its priority so it is run as soon as possible. | |
d9a6bd32 | 1151 | |
e4606348 | 1152 | PARENT is passed as an optimization. |
d9a6bd32 | 1153 | |
e4606348 JJ |
1154 | (This function could be defined in priority_queue.c, but we want it |
1155 | inlined, and putting it in priority_queue.h is not an option, given | |
1156 | that gomp_task has not been properly defined at that point). */ | |
d9a6bd32 | 1157 | |
e4606348 JJ |
1158 | static void inline |
1159 | priority_queue_upgrade_task (struct gomp_task *task, | |
1160 | struct gomp_task *parent) | |
d9a6bd32 | 1161 | { |
e4606348 JJ |
1162 | struct priority_queue *head = &parent->children_queue; |
1163 | struct priority_node *node = &task->pnode[PQ_CHILDREN]; | |
1164 | #if _LIBGOMP_CHECKING_ | |
1165 | if (!task->parent_depends_on) | |
1166 | gomp_fatal ("priority_queue_upgrade_task: task must be a " | |
1167 | "parent_depends_on task"); | |
1168 | if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task)) | |
1169 | gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task); | |
1170 | #endif | |
1171 | if (priority_queue_multi_p (head)) | |
d9a6bd32 | 1172 | { |
e4606348 JJ |
1173 | struct priority_list *list |
1174 | = priority_queue_lookup_priority (head, task->priority); | |
1175 | priority_list_upgrade_task (list, node); | |
d9a6bd32 | 1176 | } |
e4606348 JJ |
1177 | else |
1178 | priority_list_upgrade_task (&head->l, node); | |
d9a6bd32 JJ |
1179 | } |
1180 | ||
e4606348 JJ |
1181 | /* Given a CHILD_TASK in LIST that is about to be executed, move it out of |
1182 | the way in LIST so that other tasks can be considered for | |
1183 | execution. LIST contains tasks of type TYPE. | |
d9a6bd32 | 1184 | |
e4606348 JJ |
1185 | Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field |
1186 | if applicable. */ | |
1187 | ||
1188 | static void inline | |
1189 | priority_list_downgrade_task (enum priority_queue_type type, | |
1190 | struct priority_list *list, | |
1191 | struct gomp_task *child_task) | |
d9a6bd32 | 1192 | { |
e4606348 JJ |
1193 | struct priority_node *node = task_to_priority_node (type, child_task); |
1194 | if (list->tasks == node) | |
1195 | list->tasks = node->next; | |
1196 | else if (node->next != list->tasks) | |
1197 | { | |
1198 | /* The task in NODE is about to become TIED and TIED tasks | |
1199 | cannot come before WAITING tasks. If we're about to | |
1200 | leave the queue in such an indeterminate state, rewire | |
1201 | things appropriately. However, a TIED task at the end is | |
1202 | perfectly fine. */ | |
1203 | struct gomp_task *next_task = priority_node_to_task (type, node->next); | |
1204 | if (next_task->kind == GOMP_TASK_WAITING) | |
1205 | { | |
1206 | /* Remove from list. */ | |
1207 | node->prev->next = node->next; | |
1208 | node->next->prev = node->prev; | |
1209 | /* Rewire at the end. */ | |
1210 | node->next = list->tasks; | |
1211 | node->prev = list->tasks->prev; | |
1212 | list->tasks->prev->next = node; | |
1213 | list->tasks->prev = node; | |
1214 | } | |
1215 | } | |
1216 | ||
1217 | /* If the current task is the last_parent_depends_on for its | |
1218 | priority, adjust last_parent_depends_on appropriately. */ | |
1219 | if (__builtin_expect (child_task->parent_depends_on, 0) | |
1220 | && list->last_parent_depends_on == node) | |
1221 | { | |
1222 | struct gomp_task *prev_child = priority_node_to_task (type, node->prev); | |
1223 | if (node->prev != node | |
1224 | && prev_child->kind == GOMP_TASK_WAITING | |
1225 | && prev_child->parent_depends_on) | |
1226 | list->last_parent_depends_on = node->prev; | |
1227 | else | |
1228 | { | |
1229 | /* There are no more parent_depends_on entries waiting | |
1230 | to run, clear the list. */ | |
1231 | list->last_parent_depends_on = NULL; | |
1232 | } | |
1233 | } | |
d9a6bd32 | 1234 | } |
e4606348 JJ |
1235 | |
1236 | /* Given a TASK in HEAD that is about to be executed, move it out of | |
1237 | the way so that other tasks can be considered for execution. HEAD | |
1238 | contains tasks of type TYPE. | |
1239 | ||
1240 | Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field | |
1241 | if applicable. | |
1242 | ||
1243 | (This function could be defined in priority_queue.c, but we want it | |
1244 | inlined, and putting it in priority_queue.h is not an option, given | |
1245 | that gomp_task has not been properly defined at that point). */ | |
1246 | ||
1247 | static void inline | |
1248 | priority_queue_downgrade_task (enum priority_queue_type type, | |
1249 | struct priority_queue *head, | |
1250 | struct gomp_task *task) | |
1251 | { | |
1252 | #if _LIBGOMP_CHECKING_ | |
1253 | if (!priority_queue_task_in_queue_p (type, head, task)) | |
1254 | gomp_fatal ("Attempt to downgrade missing task %p", task); | |
d9a6bd32 | 1255 | #endif |
e4606348 JJ |
1256 | if (priority_queue_multi_p (head)) |
1257 | { | |
1258 | struct priority_list *list | |
1259 | = priority_queue_lookup_priority (head, task->priority); | |
1260 | priority_list_downgrade_task (type, list, task); | |
1261 | } | |
1262 | else | |
1263 | priority_list_downgrade_task (type, &head->l, task); | |
1264 | } | |
1265 | ||
1266 | /* Setup CHILD_TASK to execute. This is done by setting the task to | |
1267 | TIED, and updating all relevant queues so that CHILD_TASK is no | |
1268 | longer chosen for scheduling. Also, remove CHILD_TASK from the | |
1269 | overall team task queue entirely. | |
1270 | ||
1271 | Return TRUE if task or its containing taskgroup has been | |
1272 | cancelled. */ | |
d9a6bd32 | 1273 | |
acf0174b JJ |
1274 | static inline bool |
1275 | gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, | |
d9a6bd32 | 1276 | struct gomp_team *team) |
acf0174b | 1277 | { |
e4606348 JJ |
1278 | #if _LIBGOMP_CHECKING_ |
1279 | if (child_task->parent) | |
1280 | priority_queue_verify (PQ_CHILDREN, | |
1281 | &child_task->parent->children_queue, true); | |
1282 | if (child_task->taskgroup) | |
1283 | priority_queue_verify (PQ_TASKGROUP, | |
1284 | &child_task->taskgroup->taskgroup_queue, false); | |
1285 | priority_queue_verify (PQ_TEAM, &team->task_queue, false); | |
d9a6bd32 JJ |
1286 | #endif |
1287 | ||
e4606348 | 1288 | /* Task is about to go tied, move it out of the way. */ |
0494285a | 1289 | if (parent) |
e4606348 JJ |
1290 | priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue, |
1291 | child_task); | |
d9a6bd32 | 1292 | |
e4606348 | 1293 | /* Task is about to go tied, move it out of the way. */ |
d9a6bd32 JJ |
1294 | struct gomp_taskgroup *taskgroup = child_task->taskgroup; |
1295 | if (taskgroup) | |
e4606348 JJ |
1296 | priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
1297 | child_task); | |
d9a6bd32 | 1298 | |
e4606348 JJ |
1299 | priority_queue_remove (PQ_TEAM, &team->task_queue, child_task, |
1300 | MEMMODEL_RELAXED); | |
1301 | child_task->pnode[PQ_TEAM].next = NULL; | |
1302 | child_task->pnode[PQ_TEAM].prev = NULL; | |
acf0174b | 1303 | child_task->kind = GOMP_TASK_TIED; |
d9a6bd32 | 1304 | |
acf0174b JJ |
1305 | if (--team->task_queued_count == 0) |
1306 | gomp_team_barrier_clear_task_pending (&team->barrier); | |
28567c40 | 1307 | if (__builtin_expect (gomp_cancel_var, 0) |
acf0174b | 1308 | && !child_task->copy_ctors_done) |
28567c40 JJ |
1309 | { |
1310 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
1311 | return true; | |
1312 | if (taskgroup) | |
1313 | { | |
1314 | if (taskgroup->cancelled) | |
1315 | return true; | |
1316 | if (taskgroup->workshare | |
1317 | && taskgroup->prev | |
1318 | && taskgroup->prev->cancelled) | |
1319 | return true; | |
1320 | } | |
1321 | } | |
acf0174b JJ |
1322 | return false; |
1323 | } | |
1324 | ||
1325 | static void | |
1326 | gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task) | |
1327 | { | |
1328 | struct gomp_task *parent = child_task->parent; | |
1329 | size_t i; | |
1330 | ||
7f78783d JJ |
1331 | if (parent->depend_all_memory == child_task) |
1332 | parent->depend_all_memory = NULL; | |
acf0174b JJ |
1333 | for (i = 0; i < child_task->depend_count; i++) |
1334 | if (!child_task->depend[i].redundant) | |
1335 | { | |
1336 | if (child_task->depend[i].next) | |
1337 | child_task->depend[i].next->prev = child_task->depend[i].prev; | |
1338 | if (child_task->depend[i].prev) | |
1339 | child_task->depend[i].prev->next = child_task->depend[i].next; | |
1340 | else | |
1341 | { | |
1342 | hash_entry_type *slot | |
1343 | = htab_find_slot (&parent->depend_hash, &child_task->depend[i], | |
1344 | NO_INSERT); | |
1345 | if (*slot != &child_task->depend[i]) | |
1346 | abort (); | |
1347 | if (child_task->depend[i].next) | |
1348 | *slot = child_task->depend[i].next; | |
1349 | else | |
1350 | htab_clear_slot (parent->depend_hash, slot); | |
1351 | } | |
1352 | } | |
1353 | } | |
1354 | ||
e4606348 JJ |
1355 | /* After a CHILD_TASK has been run, adjust the dependency queue for |
1356 | each task that depends on CHILD_TASK, to record the fact that there | |
1357 | is one less dependency to worry about. If a task that depended on | |
1358 | CHILD_TASK now has no dependencies, place it in the various queues | |
1359 | so it gets scheduled to run. | |
d9a6bd32 JJ |
1360 | |
1361 | TEAM is the team to which CHILD_TASK belongs to. */ | |
1362 | ||
acf0174b JJ |
1363 | static size_t |
1364 | gomp_task_run_post_handle_dependers (struct gomp_task *child_task, | |
1365 | struct gomp_team *team) | |
1366 | { | |
1367 | struct gomp_task *parent = child_task->parent; | |
1368 | size_t i, count = child_task->dependers->n_elem, ret = 0; | |
1369 | for (i = 0; i < count; i++) | |
1370 | { | |
1371 | struct gomp_task *task = child_task->dependers->elem[i]; | |
e4606348 JJ |
1372 | |
1373 | /* CHILD_TASK satisfies a dependency for TASK. Keep track of | |
1374 | TASK's remaining dependencies. Once TASK has no other | |
93d90219 | 1375 | dependencies, put it into the various queues so it will get |
e4606348 | 1376 | scheduled for execution. */ |
acf0174b JJ |
1377 | if (--task->num_dependees != 0) |
1378 | continue; | |
1379 | ||
1380 | struct gomp_taskgroup *taskgroup = task->taskgroup; | |
b4383691 JJ |
1381 | if (__builtin_expect (task->fn == empty_task, 0)) |
1382 | { | |
1383 | if (!parent) | |
1384 | task->parent = NULL; | |
c125f504 JJ |
1385 | else if (__builtin_expect (task->parent_depends_on, 0) |
1386 | && --parent->taskwait->n_depend == 0 | |
1387 | && parent->taskwait->in_depend_wait) | |
1388 | { | |
1389 | parent->taskwait->in_depend_wait = false; | |
1390 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
1391 | } | |
b4383691 JJ |
1392 | if (gomp_task_run_post_handle_depend (task, team)) |
1393 | ++ret; | |
1394 | if (taskgroup) | |
c125f504 JJ |
1395 | { |
1396 | if (taskgroup->num_children > 1) | |
1397 | --taskgroup->num_children; | |
1398 | else | |
1399 | { | |
1400 | __atomic_store_n (&taskgroup->num_children, 0, | |
1401 | MEMMODEL_RELEASE); | |
1402 | if (taskgroup->in_taskgroup_wait) | |
1403 | { | |
1404 | taskgroup->in_taskgroup_wait = false; | |
1405 | gomp_sem_post (&taskgroup->taskgroup_sem); | |
1406 | } | |
1407 | } | |
1408 | } | |
b4383691 JJ |
1409 | gomp_finish_task (task); |
1410 | free (task); | |
1411 | continue; | |
1412 | } | |
acf0174b JJ |
1413 | if (parent) |
1414 | { | |
e4606348 JJ |
1415 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, |
1416 | task, task->priority, | |
1417 | PRIORITY_INSERT_BEGIN, | |
1418 | /*adjust_parent_depends_on=*/true, | |
1419 | task->parent_depends_on); | |
0494285a | 1420 | if (parent->taskwait) |
acf0174b | 1421 | { |
0494285a JJ |
1422 | if (parent->taskwait->in_taskwait) |
1423 | { | |
e4606348 JJ |
1424 | /* One more task has had its dependencies met. |
1425 | Inform any waiters. */ | |
0494285a JJ |
1426 | parent->taskwait->in_taskwait = false; |
1427 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
1428 | } | |
1429 | else if (parent->taskwait->in_depend_wait) | |
1430 | { | |
e4606348 JJ |
1431 | /* One more task has had its dependencies met. |
1432 | Inform any waiters. */ | |
0494285a JJ |
1433 | parent->taskwait->in_depend_wait = false; |
1434 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
1435 | } | |
acf0174b JJ |
1436 | } |
1437 | } | |
0af7ef05 JJ |
1438 | else |
1439 | task->parent = NULL; | |
acf0174b JJ |
1440 | if (taskgroup) |
1441 | { | |
e4606348 JJ |
1442 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
1443 | task, task->priority, | |
1444 | PRIORITY_INSERT_BEGIN, | |
1445 | /*adjust_parent_depends_on=*/false, | |
1446 | task->parent_depends_on); | |
acf0174b JJ |
1447 | if (taskgroup->in_taskgroup_wait) |
1448 | { | |
e4606348 JJ |
1449 | /* One more task has had its dependencies met. |
1450 | Inform any waiters. */ | |
acf0174b JJ |
1451 | taskgroup->in_taskgroup_wait = false; |
1452 | gomp_sem_post (&taskgroup->taskgroup_sem); | |
1453 | } | |
1454 | } | |
e4606348 JJ |
1455 | priority_queue_insert (PQ_TEAM, &team->task_queue, |
1456 | task, task->priority, | |
1457 | PRIORITY_INSERT_END, | |
1458 | /*adjust_parent_depends_on=*/false, | |
1459 | task->parent_depends_on); | |
acf0174b JJ |
1460 | ++team->task_count; |
1461 | ++team->task_queued_count; | |
1462 | ++ret; | |
1463 | } | |
1464 | free (child_task->dependers); | |
1465 | child_task->dependers = NULL; | |
1466 | if (ret > 1) | |
1467 | gomp_team_barrier_set_task_pending (&team->barrier); | |
1468 | return ret; | |
1469 | } | |
1470 | ||
1471 | static inline size_t | |
1472 | gomp_task_run_post_handle_depend (struct gomp_task *child_task, | |
1473 | struct gomp_team *team) | |
1474 | { | |
1475 | if (child_task->depend_count == 0) | |
1476 | return 0; | |
1477 | ||
1478 | /* If parent is gone already, the hash table is freed and nothing | |
1479 | will use the hash table anymore, no need to remove anything from it. */ | |
1480 | if (child_task->parent != NULL) | |
1481 | gomp_task_run_post_handle_depend_hash (child_task); | |
1482 | ||
1483 | if (child_task->dependers == NULL) | |
1484 | return 0; | |
1485 | ||
1486 | return gomp_task_run_post_handle_dependers (child_task, team); | |
1487 | } | |
1488 | ||
d9a6bd32 JJ |
1489 | /* Remove CHILD_TASK from its parent. */ |
1490 | ||
acf0174b JJ |
1491 | static inline void |
1492 | gomp_task_run_post_remove_parent (struct gomp_task *child_task) | |
1493 | { | |
1494 | struct gomp_task *parent = child_task->parent; | |
1495 | if (parent == NULL) | |
1496 | return; | |
d9a6bd32 JJ |
1497 | |
1498 | /* If this was the last task the parent was depending on, | |
1499 | synchronize with gomp_task_maybe_wait_for_dependencies so it can | |
1500 | clean up and return. */ | |
0494285a JJ |
1501 | if (__builtin_expect (child_task->parent_depends_on, 0) |
1502 | && --parent->taskwait->n_depend == 0 | |
1503 | && parent->taskwait->in_depend_wait) | |
1504 | { | |
1505 | parent->taskwait->in_depend_wait = false; | |
1506 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
1507 | } | |
d9a6bd32 | 1508 | |
e4606348 JJ |
1509 | if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue, |
1510 | child_task, MEMMODEL_RELEASE) | |
1511 | && parent->taskwait && parent->taskwait->in_taskwait) | |
acf0174b | 1512 | { |
e4606348 JJ |
1513 | parent->taskwait->in_taskwait = false; |
1514 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
acf0174b | 1515 | } |
e4606348 JJ |
1516 | child_task->pnode[PQ_CHILDREN].next = NULL; |
1517 | child_task->pnode[PQ_CHILDREN].prev = NULL; | |
acf0174b JJ |
1518 | } |
1519 | ||
d9a6bd32 JJ |
1520 | /* Remove CHILD_TASK from its taskgroup. */ |
1521 | ||
acf0174b JJ |
1522 | static inline void |
1523 | gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) | |
1524 | { | |
1525 | struct gomp_taskgroup *taskgroup = child_task->taskgroup; | |
1526 | if (taskgroup == NULL) | |
1527 | return; | |
e4606348 JJ |
1528 | bool empty = priority_queue_remove (PQ_TASKGROUP, |
1529 | &taskgroup->taskgroup_queue, | |
1530 | child_task, MEMMODEL_RELAXED); | |
1531 | child_task->pnode[PQ_TASKGROUP].next = NULL; | |
1532 | child_task->pnode[PQ_TASKGROUP].prev = NULL; | |
acf0174b JJ |
1533 | if (taskgroup->num_children > 1) |
1534 | --taskgroup->num_children; | |
1535 | else | |
1536 | { | |
1537 | /* We access taskgroup->num_children in GOMP_taskgroup_end | |
1538 | outside of the task lock mutex region, so | |
1539 | need a release barrier here to ensure memory | |
1540 | written by child_task->fn above is flushed | |
1541 | before the NULL is written. */ | |
1542 | __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE); | |
1543 | } | |
e4606348 | 1544 | if (empty && taskgroup->in_taskgroup_wait) |
acf0174b | 1545 | { |
e4606348 JJ |
1546 | taskgroup->in_taskgroup_wait = false; |
1547 | gomp_sem_post (&taskgroup->taskgroup_sem); | |
acf0174b JJ |
1548 | } |
1549 | } | |
1550 | ||
a68ab351 JJ |
1551 | void |
1552 | gomp_barrier_handle_tasks (gomp_barrier_state_t state) | |
1553 | { | |
1554 | struct gomp_thread *thr = gomp_thread (); | |
1555 | struct gomp_team *team = thr->ts.team; | |
1556 | struct gomp_task *task = thr->task; | |
1557 | struct gomp_task *child_task = NULL; | |
1558 | struct gomp_task *to_free = NULL; | |
acf0174b | 1559 | int do_wake = 0; |
a68ab351 JJ |
1560 | |
1561 | gomp_mutex_lock (&team->task_lock); | |
1562 | if (gomp_barrier_last_thread (state)) | |
1563 | { | |
1564 | if (team->task_count == 0) | |
1565 | { | |
1566 | gomp_team_barrier_done (&team->barrier, state); | |
1567 | gomp_mutex_unlock (&team->task_lock); | |
1568 | gomp_team_barrier_wake (&team->barrier, 0); | |
1569 | return; | |
1570 | } | |
1571 | gomp_team_barrier_set_waiting_for_tasks (&team->barrier); | |
1572 | } | |
1573 | ||
1574 | while (1) | |
1575 | { | |
acf0174b | 1576 | bool cancelled = false; |
a6d22fb2 | 1577 | |
e4606348 | 1578 | if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED)) |
a68ab351 | 1579 | { |
e4606348 JJ |
1580 | bool ignored; |
1581 | child_task | |
1582 | = priority_queue_next_task (PQ_TEAM, &team->task_queue, | |
1583 | PQ_IGNORED, NULL, | |
1584 | &ignored); | |
acf0174b | 1585 | cancelled = gomp_task_run_pre (child_task, child_task->parent, |
d9a6bd32 | 1586 | team); |
acf0174b JJ |
1587 | if (__builtin_expect (cancelled, 0)) |
1588 | { | |
1589 | if (to_free) | |
1590 | { | |
1591 | gomp_finish_task (to_free); | |
1592 | free (to_free); | |
1593 | to_free = NULL; | |
1594 | } | |
1595 | goto finish_cancelled; | |
1596 | } | |
a68ab351 | 1597 | team->task_running_count++; |
acf0174b | 1598 | child_task->in_tied_task = true; |
a68ab351 | 1599 | } |
d656bfda KCY |
1600 | else if (team->task_count == 0 |
1601 | && gomp_team_barrier_waiting_for_tasks (&team->barrier)) | |
1602 | { | |
1603 | gomp_team_barrier_done (&team->barrier, state); | |
1604 | gomp_mutex_unlock (&team->task_lock); | |
1605 | gomp_team_barrier_wake (&team->barrier, 0); | |
1606 | if (to_free) | |
1607 | { | |
1608 | gomp_finish_task (to_free); | |
1609 | free (to_free); | |
1610 | } | |
1611 | return; | |
1612 | } | |
a68ab351 | 1613 | gomp_mutex_unlock (&team->task_lock); |
acf0174b JJ |
1614 | if (do_wake) |
1615 | { | |
1616 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
1617 | do_wake = 0; | |
1618 | } | |
a68ab351 JJ |
1619 | if (to_free) |
1620 | { | |
1621 | gomp_finish_task (to_free); | |
1622 | free (to_free); | |
1623 | to_free = NULL; | |
1624 | } | |
1625 | if (child_task) | |
1626 | { | |
1627 | thr->task = child_task; | |
e4606348 JJ |
1628 | if (__builtin_expect (child_task->fn == NULL, 0)) |
1629 | { | |
1630 | if (gomp_target_task_fn (child_task->fn_data)) | |
1631 | { | |
1632 | thr->task = task; | |
1633 | gomp_mutex_lock (&team->task_lock); | |
1634 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
1635 | team->task_running_count--; | |
1636 | struct gomp_target_task *ttask | |
1637 | = (struct gomp_target_task *) child_task->fn_data; | |
1638 | /* If GOMP_PLUGIN_target_task_completion has run already | |
1639 | in between gomp_target_task_fn and the mutex lock, | |
1640 | perform the requeuing here. */ | |
1641 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
1642 | gomp_target_task_completion (team, child_task); | |
1643 | else | |
1644 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
1645 | child_task = NULL; | |
1646 | continue; | |
1647 | } | |
1648 | } | |
1649 | else | |
1650 | child_task->fn (child_task->fn_data); | |
a68ab351 JJ |
1651 | thr->task = task; |
1652 | } | |
1653 | else | |
1654 | return; | |
1655 | gomp_mutex_lock (&team->task_lock); | |
1656 | if (child_task) | |
1657 | { | |
d656bfda | 1658 | if (child_task->detach_team) |
acf0174b | 1659 | { |
d656bfda KCY |
1660 | assert (child_task->detach_team == team); |
1661 | child_task->kind = GOMP_TASK_DETACHED; | |
a6d22fb2 | 1662 | ++team->task_detach_count; |
d656bfda KCY |
1663 | --team->task_running_count; |
1664 | gomp_debug (0, | |
1665 | "thread %d: task with event %p finished without " | |
1666 | "completion event fulfilled in team barrier\n", | |
1667 | thr->ts.team_id, child_task); | |
a6d22fb2 | 1668 | child_task = NULL; |
d656bfda | 1669 | continue; |
acf0174b | 1670 | } |
d656bfda KCY |
1671 | |
1672 | finish_cancelled:; | |
1673 | size_t new_tasks | |
1674 | = gomp_task_run_post_handle_depend (child_task, team); | |
1675 | gomp_task_run_post_remove_parent (child_task); | |
1676 | gomp_clear_parent (&child_task->children_queue); | |
1677 | gomp_task_run_post_remove_taskgroup (child_task); | |
1678 | to_free = child_task; | |
1679 | if (!cancelled) | |
1680 | team->task_running_count--; | |
1681 | child_task = NULL; | |
1682 | if (new_tasks > 1) | |
a68ab351 | 1683 | { |
d656bfda KCY |
1684 | do_wake = team->nthreads - team->task_running_count; |
1685 | if (do_wake > new_tasks) | |
1686 | do_wake = new_tasks; | |
a68ab351 | 1687 | } |
d656bfda | 1688 | --team->task_count; |
a68ab351 JJ |
1689 | } |
1690 | } | |
1691 | } | |
1692 | ||
d9a6bd32 JJ |
1693 | /* Called when encountering a taskwait directive. |
1694 | ||
1695 | Wait for all children of the current task. */ | |
a68ab351 JJ |
1696 | |
1697 | void | |
1698 | GOMP_taskwait (void) | |
1699 | { | |
1700 | struct gomp_thread *thr = gomp_thread (); | |
1701 | struct gomp_team *team = thr->ts.team; | |
1702 | struct gomp_task *task = thr->task; | |
1703 | struct gomp_task *child_task = NULL; | |
1704 | struct gomp_task *to_free = NULL; | |
0494285a | 1705 | struct gomp_taskwait taskwait; |
acf0174b | 1706 | int do_wake = 0; |
a68ab351 | 1707 | |
bed8d8a6 | 1708 | /* The acquire barrier on load of task->children here synchronizes |
acf0174b | 1709 | with the write of a NULL in gomp_task_run_post_remove_parent. It is |
bed8d8a6 AM |
1710 | not necessary that we synchronize with other non-NULL writes at |
1711 | this point, but we must ensure that all writes to memory by a | |
1712 | child thread task work function are seen before we exit from | |
1713 | GOMP_taskwait. */ | |
9a647288 | 1714 | if (task == NULL |
e4606348 | 1715 | || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE)) |
a68ab351 | 1716 | return; |
fbf7be80 | 1717 | |
0494285a | 1718 | memset (&taskwait, 0, sizeof (taskwait)); |
e4606348 | 1719 | bool child_q = false; |
a68ab351 JJ |
1720 | gomp_mutex_lock (&team->task_lock); |
1721 | while (1) | |
1722 | { | |
acf0174b | 1723 | bool cancelled = false; |
e4606348 | 1724 | if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED)) |
a68ab351 | 1725 | { |
0494285a JJ |
1726 | bool destroy_taskwait = task->taskwait != NULL; |
1727 | task->taskwait = NULL; | |
1728 | gomp_mutex_unlock (&team->task_lock); | |
1729 | if (to_free) | |
1730 | { | |
1731 | gomp_finish_task (to_free); | |
1732 | free (to_free); | |
1733 | } | |
1734 | if (destroy_taskwait) | |
1735 | gomp_sem_destroy (&taskwait.taskwait_sem); | |
1736 | return; | |
1737 | } | |
e4606348 JJ |
1738 | struct gomp_task *next_task |
1739 | = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, | |
1740 | PQ_TEAM, &team->task_queue, &child_q); | |
1741 | if (next_task->kind == GOMP_TASK_WAITING) | |
0494285a | 1742 | { |
e4606348 | 1743 | child_task = next_task; |
0494285a | 1744 | cancelled |
d9a6bd32 | 1745 | = gomp_task_run_pre (child_task, task, team); |
0494285a JJ |
1746 | if (__builtin_expect (cancelled, 0)) |
1747 | { | |
1748 | if (to_free) | |
1749 | { | |
1750 | gomp_finish_task (to_free); | |
1751 | free (to_free); | |
1752 | to_free = NULL; | |
1753 | } | |
1754 | goto finish_cancelled; | |
1755 | } | |
1756 | } | |
1757 | else | |
1758 | { | |
e4606348 | 1759 | /* All tasks we are waiting for are either running in other |
d656bfda KCY |
1760 | threads, are detached and waiting for the completion event to be |
1761 | fulfilled, or they are tasks that have not had their | |
e4606348 JJ |
1762 | dependencies met (so they're not even in the queue). Wait |
1763 | for them. */ | |
0494285a JJ |
1764 | if (task->taskwait == NULL) |
1765 | { | |
1766 | taskwait.in_depend_wait = false; | |
1767 | gomp_sem_init (&taskwait.taskwait_sem, 0); | |
1768 | task->taskwait = &taskwait; | |
1769 | } | |
1770 | taskwait.in_taskwait = true; | |
1771 | } | |
1772 | gomp_mutex_unlock (&team->task_lock); | |
1773 | if (do_wake) | |
1774 | { | |
1775 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
1776 | do_wake = 0; | |
1777 | } | |
1778 | if (to_free) | |
1779 | { | |
1780 | gomp_finish_task (to_free); | |
1781 | free (to_free); | |
1782 | to_free = NULL; | |
1783 | } | |
1784 | if (child_task) | |
1785 | { | |
1786 | thr->task = child_task; | |
e4606348 JJ |
1787 | if (__builtin_expect (child_task->fn == NULL, 0)) |
1788 | { | |
1789 | if (gomp_target_task_fn (child_task->fn_data)) | |
1790 | { | |
1791 | thr->task = task; | |
1792 | gomp_mutex_lock (&team->task_lock); | |
1793 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
1794 | struct gomp_target_task *ttask | |
1795 | = (struct gomp_target_task *) child_task->fn_data; | |
1796 | /* If GOMP_PLUGIN_target_task_completion has run already | |
1797 | in between gomp_target_task_fn and the mutex lock, | |
1798 | perform the requeuing here. */ | |
1799 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
1800 | gomp_target_task_completion (team, child_task); | |
1801 | else | |
1802 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
1803 | child_task = NULL; | |
1804 | continue; | |
1805 | } | |
1806 | } | |
1807 | else | |
1808 | child_task->fn (child_task->fn_data); | |
0494285a JJ |
1809 | thr->task = task; |
1810 | } | |
1811 | else | |
1812 | gomp_sem_wait (&taskwait.taskwait_sem); | |
1813 | gomp_mutex_lock (&team->task_lock); | |
1814 | if (child_task) | |
1815 | { | |
d656bfda KCY |
1816 | if (child_task->detach_team) |
1817 | { | |
1818 | assert (child_task->detach_team == team); | |
1819 | child_task->kind = GOMP_TASK_DETACHED; | |
1820 | ++team->task_detach_count; | |
1821 | gomp_debug (0, | |
1822 | "thread %d: task with event %p finished without " | |
1823 | "completion event fulfilled in taskwait\n", | |
1824 | thr->ts.team_id, child_task); | |
1825 | child_task = NULL; | |
1826 | continue; | |
1827 | } | |
1828 | ||
0494285a JJ |
1829 | finish_cancelled:; |
1830 | size_t new_tasks | |
1831 | = gomp_task_run_post_handle_depend (child_task, team); | |
d9a6bd32 | 1832 | |
e4606348 | 1833 | if (child_q) |
0494285a | 1834 | { |
e4606348 JJ |
1835 | priority_queue_remove (PQ_CHILDREN, &task->children_queue, |
1836 | child_task, MEMMODEL_RELAXED); | |
1837 | child_task->pnode[PQ_CHILDREN].next = NULL; | |
1838 | child_task->pnode[PQ_CHILDREN].prev = NULL; | |
0494285a | 1839 | } |
d9a6bd32 | 1840 | |
e4606348 JJ |
1841 | gomp_clear_parent (&child_task->children_queue); |
1842 | ||
0494285a | 1843 | gomp_task_run_post_remove_taskgroup (child_task); |
d9a6bd32 | 1844 | |
0494285a JJ |
1845 | to_free = child_task; |
1846 | child_task = NULL; | |
1847 | team->task_count--; | |
1848 | if (new_tasks > 1) | |
1849 | { | |
1850 | do_wake = team->nthreads - team->task_running_count | |
1851 | - !task->in_tied_task; | |
1852 | if (do_wake > new_tasks) | |
1853 | do_wake = new_tasks; | |
1854 | } | |
1855 | } | |
1856 | } | |
1857 | } | |
1858 | ||
28567c40 JJ |
1859 | /* Called when encountering a taskwait directive with depend clause(s). |
1860 | Wait as if it was an mergeable included task construct with empty body. */ | |
1861 | ||
1862 | void | |
1863 | GOMP_taskwait_depend (void **depend) | |
1864 | { | |
1865 | struct gomp_thread *thr = gomp_thread (); | |
1866 | struct gomp_team *team = thr->ts.team; | |
1867 | ||
1868 | /* If parallel or taskgroup has been cancelled, return early. */ | |
1869 | if (__builtin_expect (gomp_cancel_var, 0) && team) | |
1870 | { | |
1871 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
1872 | return; | |
1873 | if (thr->task->taskgroup) | |
1874 | { | |
1875 | if (thr->task->taskgroup->cancelled) | |
1876 | return; | |
1877 | if (thr->task->taskgroup->workshare | |
1878 | && thr->task->taskgroup->prev | |
1879 | && thr->task->taskgroup->prev->cancelled) | |
1880 | return; | |
1881 | } | |
1882 | } | |
1883 | ||
1884 | if (thr->task && thr->task->depend_hash) | |
1885 | gomp_task_maybe_wait_for_dependencies (depend); | |
1886 | } | |
1887 | ||
b4383691 JJ |
1888 | /* Called when encountering a taskwait directive with nowait and depend |
1889 | clause(s). Create a possibly deferred task construct with empty body. */ | |
1890 | ||
1891 | void | |
1892 | GOMP_taskwait_depend_nowait (void **depend) | |
1893 | { | |
1894 | ialias_call (GOMP_task) (empty_task, "", NULL, 0, 1, true, | |
1895 | GOMP_TASK_FLAG_DEPEND, depend, 0, NULL); | |
1896 | } | |
1897 | ||
e4606348 JJ |
1898 | /* An undeferred task is about to run. Wait for all tasks that this |
1899 | undeferred task depends on. | |
1900 | ||
1901 | This is done by first putting all known ready dependencies | |
1902 | (dependencies that have their own dependencies met) at the top of | |
1903 | the scheduling queues. Then we iterate through these imminently | |
1904 | ready tasks (and possibly other high priority tasks), and run them. | |
1905 | If we run out of ready dependencies to execute, we either wait for | |
28567c40 | 1906 | the remaining dependencies to finish, or wait for them to get |
e4606348 | 1907 | scheduled so we can run them. |
0494285a | 1908 | |
d9a6bd32 JJ |
1909 | DEPEND is as in GOMP_task. */ |
1910 | ||
1911 | void | |
0494285a JJ |
1912 | gomp_task_maybe_wait_for_dependencies (void **depend) |
1913 | { | |
1914 | struct gomp_thread *thr = gomp_thread (); | |
1915 | struct gomp_task *task = thr->task; | |
1916 | struct gomp_team *team = thr->ts.team; | |
1917 | struct gomp_task_depend_entry elem, *ent = NULL; | |
1918 | struct gomp_taskwait taskwait; | |
28567c40 | 1919 | size_t orig_ndepend = (uintptr_t) depend[0]; |
0494285a | 1920 | size_t nout = (uintptr_t) depend[1]; |
28567c40 JJ |
1921 | size_t ndepend = orig_ndepend; |
1922 | size_t normal = ndepend; | |
1923 | size_t n = 2; | |
0494285a JJ |
1924 | size_t i; |
1925 | size_t num_awaited = 0; | |
1926 | struct gomp_task *child_task = NULL; | |
1927 | struct gomp_task *to_free = NULL; | |
1928 | int do_wake = 0; | |
1929 | ||
28567c40 JJ |
1930 | if (ndepend == 0) |
1931 | { | |
1932 | ndepend = nout; | |
1933 | nout = (uintptr_t) depend[2] + (uintptr_t) depend[3]; | |
1934 | normal = nout + (uintptr_t) depend[4]; | |
1935 | n = 5; | |
1936 | } | |
0494285a | 1937 | gomp_mutex_lock (&team->task_lock); |
7f78783d JJ |
1938 | if (__builtin_expect (task->depend_all_memory && ndepend, false)) |
1939 | { | |
1940 | struct gomp_task *tsk = task->depend_all_memory; | |
1941 | if (!tsk->parent_depends_on) | |
1942 | { | |
1943 | tsk->parent_depends_on = true; | |
1944 | ++num_awaited; | |
1945 | if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) | |
1946 | priority_queue_upgrade_task (tsk, task); | |
1947 | } | |
1948 | } | |
0494285a JJ |
1949 | for (i = 0; i < ndepend; i++) |
1950 | { | |
28567c40 JJ |
1951 | elem.addr = depend[i + n]; |
1952 | elem.is_in = i >= nout; | |
1953 | if (__builtin_expect (i >= normal, 0)) | |
1954 | { | |
1955 | void **d = (void **) elem.addr; | |
1956 | switch ((uintptr_t) d[1]) | |
1957 | { | |
1958 | case GOMP_DEPEND_IN: | |
1959 | break; | |
1960 | case GOMP_DEPEND_OUT: | |
1961 | case GOMP_DEPEND_INOUT: | |
1962 | case GOMP_DEPEND_MUTEXINOUTSET: | |
1963 | elem.is_in = 0; | |
1964 | break; | |
2c16eb31 JJ |
1965 | case GOMP_DEPEND_INOUTSET: |
1966 | elem.is_in = 2; | |
1967 | break; | |
28567c40 JJ |
1968 | default: |
1969 | gomp_fatal ("unknown omp_depend_t dependence type %d", | |
1970 | (int) (uintptr_t) d[1]); | |
1971 | } | |
1972 | elem.addr = d[0]; | |
1973 | } | |
7f78783d JJ |
1974 | if (__builtin_expect (elem.addr == NULL && !elem.is_in, false)) |
1975 | { | |
1976 | size_t size = htab_size (task->depend_hash); | |
1977 | if (htab_elements (task->depend_hash) * 8 < size && size > 32) | |
1978 | htab_expand (task->depend_hash); | |
1979 | ||
1980 | /* depend(inout: omp_all_memory) - depend on all previous | |
1981 | sibling tasks that do have dependencies. Inlined | |
1982 | htab_traverse. */ | |
1983 | hash_entry_type *slot = &task->depend_hash->entries[0]; | |
1984 | hash_entry_type *end = slot + htab_size (task->depend_hash); | |
1985 | for (; slot != end; ++slot) | |
1986 | { | |
1987 | if (*slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY) | |
1988 | continue; | |
1989 | for (ent = *slot; ent; ent = ent->next) | |
1990 | { | |
1991 | struct gomp_task *tsk = ent->task; | |
1992 | if (!tsk->parent_depends_on) | |
1993 | { | |
1994 | tsk->parent_depends_on = true; | |
1995 | ++num_awaited; | |
1996 | if (tsk->num_dependees == 0 | |
1997 | && tsk->kind == GOMP_TASK_WAITING) | |
1998 | priority_queue_upgrade_task (tsk, task); | |
1999 | } | |
2000 | } | |
2001 | } | |
2002 | break; | |
2003 | } | |
0494285a JJ |
2004 | ent = htab_find (task->depend_hash, &elem); |
2005 | for (; ent; ent = ent->next) | |
2c16eb31 | 2006 | if (elem.is_in && elem.is_in == ent->is_in) |
0494285a JJ |
2007 | continue; |
2008 | else | |
2009 | { | |
2010 | struct gomp_task *tsk = ent->task; | |
2011 | if (!tsk->parent_depends_on) | |
2012 | { | |
2013 | tsk->parent_depends_on = true; | |
2014 | ++num_awaited; | |
93d90219 | 2015 | /* If dependency TSK itself has no dependencies and is |
e4606348 JJ |
2016 | ready to run, move it up front so that we run it as |
2017 | soon as possible. */ | |
0494285a | 2018 | if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) |
e4606348 | 2019 | priority_queue_upgrade_task (tsk, task); |
0494285a JJ |
2020 | } |
2021 | } | |
2022 | } | |
2023 | if (num_awaited == 0) | |
2024 | { | |
2025 | gomp_mutex_unlock (&team->task_lock); | |
2026 | return; | |
2027 | } | |
2028 | ||
2029 | memset (&taskwait, 0, sizeof (taskwait)); | |
2030 | taskwait.n_depend = num_awaited; | |
0494285a JJ |
2031 | gomp_sem_init (&taskwait.taskwait_sem, 0); |
2032 | task->taskwait = &taskwait; | |
2033 | ||
2034 | while (1) | |
2035 | { | |
2036 | bool cancelled = false; | |
2037 | if (taskwait.n_depend == 0) | |
2038 | { | |
2039 | task->taskwait = NULL; | |
a68ab351 JJ |
2040 | gomp_mutex_unlock (&team->task_lock); |
2041 | if (to_free) | |
2042 | { | |
2043 | gomp_finish_task (to_free); | |
2044 | free (to_free); | |
2045 | } | |
0494285a | 2046 | gomp_sem_destroy (&taskwait.taskwait_sem); |
a68ab351 JJ |
2047 | return; |
2048 | } | |
e4606348 JJ |
2049 | |
2050 | /* Theoretically when we have multiple priorities, we should | |
2051 | chose between the highest priority item in | |
2052 | task->children_queue and team->task_queue here, so we should | |
2053 | use priority_queue_next_task(). However, since we are | |
2054 | running an undeferred task, perhaps that makes all tasks it | |
2055 | depends on undeferred, thus a priority of INF? This would | |
2056 | make it unnecessary to take anything into account here, | |
2057 | but the dependencies. | |
2058 | ||
2059 | On the other hand, if we want to use priority_queue_next_task(), | |
2060 | care should be taken to only use priority_queue_remove() | |
2061 | below if the task was actually removed from the children | |
2062 | queue. */ | |
2063 | bool ignored; | |
2064 | struct gomp_task *next_task | |
2065 | = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, | |
2066 | PQ_IGNORED, NULL, &ignored); | |
2067 | ||
2068 | if (next_task->kind == GOMP_TASK_WAITING) | |
a68ab351 | 2069 | { |
e4606348 | 2070 | child_task = next_task; |
acf0174b | 2071 | cancelled |
d9a6bd32 | 2072 | = gomp_task_run_pre (child_task, task, team); |
acf0174b | 2073 | if (__builtin_expect (cancelled, 0)) |
a68ab351 | 2074 | { |
acf0174b JJ |
2075 | if (to_free) |
2076 | { | |
2077 | gomp_finish_task (to_free); | |
2078 | free (to_free); | |
2079 | to_free = NULL; | |
2080 | } | |
2081 | goto finish_cancelled; | |
a68ab351 | 2082 | } |
a68ab351 JJ |
2083 | } |
2084 | else | |
e4606348 JJ |
2085 | /* All tasks we are waiting for are either running in other |
2086 | threads, or they are tasks that have not had their | |
2087 | dependencies met (so they're not even in the queue). Wait | |
2088 | for them. */ | |
0494285a | 2089 | taskwait.in_depend_wait = true; |
a68ab351 | 2090 | gomp_mutex_unlock (&team->task_lock); |
acf0174b JJ |
2091 | if (do_wake) |
2092 | { | |
2093 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
2094 | do_wake = 0; | |
2095 | } | |
a68ab351 JJ |
2096 | if (to_free) |
2097 | { | |
2098 | gomp_finish_task (to_free); | |
2099 | free (to_free); | |
2100 | to_free = NULL; | |
2101 | } | |
2102 | if (child_task) | |
2103 | { | |
2104 | thr->task = child_task; | |
e4606348 JJ |
2105 | if (__builtin_expect (child_task->fn == NULL, 0)) |
2106 | { | |
2107 | if (gomp_target_task_fn (child_task->fn_data)) | |
2108 | { | |
2109 | thr->task = task; | |
2110 | gomp_mutex_lock (&team->task_lock); | |
2111 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
2112 | struct gomp_target_task *ttask | |
2113 | = (struct gomp_target_task *) child_task->fn_data; | |
2114 | /* If GOMP_PLUGIN_target_task_completion has run already | |
2115 | in between gomp_target_task_fn and the mutex lock, | |
2116 | perform the requeuing here. */ | |
2117 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
2118 | gomp_target_task_completion (team, child_task); | |
2119 | else | |
2120 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
2121 | child_task = NULL; | |
2122 | continue; | |
2123 | } | |
2124 | } | |
2125 | else | |
2126 | child_task->fn (child_task->fn_data); | |
a68ab351 JJ |
2127 | thr->task = task; |
2128 | } | |
2129 | else | |
0494285a | 2130 | gomp_sem_wait (&taskwait.taskwait_sem); |
a68ab351 JJ |
2131 | gomp_mutex_lock (&team->task_lock); |
2132 | if (child_task) | |
2133 | { | |
acf0174b JJ |
2134 | finish_cancelled:; |
2135 | size_t new_tasks | |
2136 | = gomp_task_run_post_handle_depend (child_task, team); | |
0494285a JJ |
2137 | if (child_task->parent_depends_on) |
2138 | --taskwait.n_depend; | |
d9a6bd32 | 2139 | |
e4606348 JJ |
2140 | priority_queue_remove (PQ_CHILDREN, &task->children_queue, |
2141 | child_task, MEMMODEL_RELAXED); | |
2142 | child_task->pnode[PQ_CHILDREN].next = NULL; | |
2143 | child_task->pnode[PQ_CHILDREN].prev = NULL; | |
d9a6bd32 | 2144 | |
e4606348 | 2145 | gomp_clear_parent (&child_task->children_queue); |
acf0174b | 2146 | gomp_task_run_post_remove_taskgroup (child_task); |
a68ab351 JJ |
2147 | to_free = child_task; |
2148 | child_task = NULL; | |
2149 | team->task_count--; | |
acf0174b JJ |
2150 | if (new_tasks > 1) |
2151 | { | |
2152 | do_wake = team->nthreads - team->task_running_count | |
2153 | - !task->in_tied_task; | |
2154 | if (do_wake > new_tasks) | |
2155 | do_wake = new_tasks; | |
2156 | } | |
a68ab351 JJ |
2157 | } |
2158 | } | |
2159 | } | |
20906c66 JJ |
2160 | |
2161 | /* Called when encountering a taskyield directive. */ | |
2162 | ||
2163 | void | |
2164 | GOMP_taskyield (void) | |
2165 | { | |
2166 | /* Nothing at the moment. */ | |
2167 | } | |
2168 | ||
28567c40 JJ |
2169 | static inline struct gomp_taskgroup * |
2170 | gomp_taskgroup_init (struct gomp_taskgroup *prev) | |
2171 | { | |
2172 | struct gomp_taskgroup *taskgroup | |
2173 | = gomp_malloc (sizeof (struct gomp_taskgroup)); | |
2174 | taskgroup->prev = prev; | |
2175 | priority_queue_init (&taskgroup->taskgroup_queue); | |
2176 | taskgroup->reductions = prev ? prev->reductions : NULL; | |
2177 | taskgroup->in_taskgroup_wait = false; | |
2178 | taskgroup->cancelled = false; | |
2179 | taskgroup->workshare = false; | |
2180 | taskgroup->num_children = 0; | |
2181 | gomp_sem_init (&taskgroup->taskgroup_sem, 0); | |
2182 | return taskgroup; | |
2183 | } | |
2184 | ||
acf0174b JJ |
2185 | void |
2186 | GOMP_taskgroup_start (void) | |
2187 | { | |
2188 | struct gomp_thread *thr = gomp_thread (); | |
2189 | struct gomp_team *team = thr->ts.team; | |
2190 | struct gomp_task *task = thr->task; | |
acf0174b JJ |
2191 | |
2192 | /* If team is NULL, all tasks are executed as | |
d9a6bd32 | 2193 | GOMP_TASK_UNDEFERRED tasks and thus all children tasks of |
acf0174b JJ |
2194 | taskgroup and their descendant tasks will be finished |
2195 | by the time GOMP_taskgroup_end is called. */ | |
2196 | if (team == NULL) | |
2197 | return; | |
28567c40 | 2198 | task->taskgroup = gomp_taskgroup_init (task->taskgroup); |
acf0174b JJ |
2199 | } |
2200 | ||
2201 | void | |
2202 | GOMP_taskgroup_end (void) | |
2203 | { | |
2204 | struct gomp_thread *thr = gomp_thread (); | |
2205 | struct gomp_team *team = thr->ts.team; | |
2206 | struct gomp_task *task = thr->task; | |
2207 | struct gomp_taskgroup *taskgroup; | |
2208 | struct gomp_task *child_task = NULL; | |
2209 | struct gomp_task *to_free = NULL; | |
2210 | int do_wake = 0; | |
2211 | ||
2212 | if (team == NULL) | |
2213 | return; | |
2214 | taskgroup = task->taskgroup; | |
e4606348 JJ |
2215 | if (__builtin_expect (taskgroup == NULL, 0) |
2216 | && thr->ts.level == 0) | |
2217 | { | |
2218 | /* This can happen if GOMP_taskgroup_start is called when | |
2219 | thr->ts.team == NULL, but inside of the taskgroup there | |
2220 | is #pragma omp target nowait that creates an implicit | |
2221 | team with a single thread. In this case, we want to wait | |
2222 | for all outstanding tasks in this team. */ | |
2223 | gomp_team_barrier_wait (&team->barrier); | |
2224 | return; | |
2225 | } | |
acf0174b JJ |
2226 | |
2227 | /* The acquire barrier on load of taskgroup->num_children here | |
2228 | synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup. | |
2229 | It is not necessary that we synchronize with other non-0 writes at | |
2230 | this point, but we must ensure that all writes to memory by a | |
2231 | child thread task work function are seen before we exit from | |
2232 | GOMP_taskgroup_end. */ | |
2233 | if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0) | |
2234 | goto finish; | |
2235 | ||
e4606348 | 2236 | bool unused; |
acf0174b JJ |
2237 | gomp_mutex_lock (&team->task_lock); |
2238 | while (1) | |
2239 | { | |
2240 | bool cancelled = false; | |
e4606348 JJ |
2241 | if (priority_queue_empty_p (&taskgroup->taskgroup_queue, |
2242 | MEMMODEL_RELAXED)) | |
acf0174b JJ |
2243 | { |
2244 | if (taskgroup->num_children) | |
acf0174b | 2245 | { |
e4606348 JJ |
2246 | if (priority_queue_empty_p (&task->children_queue, |
2247 | MEMMODEL_RELAXED)) | |
3696163c | 2248 | goto do_wait; |
e4606348 JJ |
2249 | child_task |
2250 | = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, | |
2251 | PQ_TEAM, &team->task_queue, | |
2252 | &unused); | |
2253 | } | |
2254 | else | |
3696163c JJ |
2255 | { |
2256 | gomp_mutex_unlock (&team->task_lock); | |
2257 | if (to_free) | |
2258 | { | |
2259 | gomp_finish_task (to_free); | |
2260 | free (to_free); | |
2261 | } | |
2262 | goto finish; | |
acf0174b | 2263 | } |
acf0174b | 2264 | } |
3696163c | 2265 | else |
e4606348 JJ |
2266 | child_task |
2267 | = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, | |
2268 | PQ_TEAM, &team->task_queue, &unused); | |
3696163c | 2269 | if (child_task->kind == GOMP_TASK_WAITING) |
acf0174b | 2270 | { |
acf0174b | 2271 | cancelled |
d9a6bd32 | 2272 | = gomp_task_run_pre (child_task, child_task->parent, team); |
acf0174b JJ |
2273 | if (__builtin_expect (cancelled, 0)) |
2274 | { | |
2275 | if (to_free) | |
2276 | { | |
2277 | gomp_finish_task (to_free); | |
2278 | free (to_free); | |
2279 | to_free = NULL; | |
2280 | } | |
2281 | goto finish_cancelled; | |
2282 | } | |
2283 | } | |
2284 | else | |
2285 | { | |
3696163c | 2286 | child_task = NULL; |
acf0174b | 2287 | do_wait: |
e4606348 JJ |
2288 | /* All tasks we are waiting for are either running in other |
2289 | threads, or they are tasks that have not had their | |
2290 | dependencies met (so they're not even in the queue). Wait | |
2291 | for them. */ | |
acf0174b JJ |
2292 | taskgroup->in_taskgroup_wait = true; |
2293 | } | |
2294 | gomp_mutex_unlock (&team->task_lock); | |
2295 | if (do_wake) | |
2296 | { | |
2297 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
2298 | do_wake = 0; | |
2299 | } | |
2300 | if (to_free) | |
2301 | { | |
2302 | gomp_finish_task (to_free); | |
2303 | free (to_free); | |
2304 | to_free = NULL; | |
2305 | } | |
2306 | if (child_task) | |
2307 | { | |
2308 | thr->task = child_task; | |
e4606348 JJ |
2309 | if (__builtin_expect (child_task->fn == NULL, 0)) |
2310 | { | |
2311 | if (gomp_target_task_fn (child_task->fn_data)) | |
2312 | { | |
2313 | thr->task = task; | |
2314 | gomp_mutex_lock (&team->task_lock); | |
2315 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
2316 | struct gomp_target_task *ttask | |
2317 | = (struct gomp_target_task *) child_task->fn_data; | |
2318 | /* If GOMP_PLUGIN_target_task_completion has run already | |
2319 | in between gomp_target_task_fn and the mutex lock, | |
2320 | perform the requeuing here. */ | |
2321 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
2322 | gomp_target_task_completion (team, child_task); | |
2323 | else | |
2324 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
2325 | child_task = NULL; | |
2326 | continue; | |
2327 | } | |
2328 | } | |
2329 | else | |
2330 | child_task->fn (child_task->fn_data); | |
acf0174b JJ |
2331 | thr->task = task; |
2332 | } | |
2333 | else | |
2334 | gomp_sem_wait (&taskgroup->taskgroup_sem); | |
2335 | gomp_mutex_lock (&team->task_lock); | |
2336 | if (child_task) | |
2337 | { | |
d656bfda KCY |
2338 | if (child_task->detach_team) |
2339 | { | |
2340 | assert (child_task->detach_team == team); | |
2341 | child_task->kind = GOMP_TASK_DETACHED; | |
2342 | ++team->task_detach_count; | |
2343 | gomp_debug (0, | |
2344 | "thread %d: task with event %p finished without " | |
2345 | "completion event fulfilled in taskgroup\n", | |
2346 | thr->ts.team_id, child_task); | |
2347 | child_task = NULL; | |
2348 | continue; | |
2349 | } | |
2350 | ||
acf0174b JJ |
2351 | finish_cancelled:; |
2352 | size_t new_tasks | |
2353 | = gomp_task_run_post_handle_depend (child_task, team); | |
acf0174b | 2354 | gomp_task_run_post_remove_parent (child_task); |
e4606348 | 2355 | gomp_clear_parent (&child_task->children_queue); |
3696163c | 2356 | gomp_task_run_post_remove_taskgroup (child_task); |
acf0174b JJ |
2357 | to_free = child_task; |
2358 | child_task = NULL; | |
2359 | team->task_count--; | |
2360 | if (new_tasks > 1) | |
2361 | { | |
2362 | do_wake = team->nthreads - team->task_running_count | |
2363 | - !task->in_tied_task; | |
2364 | if (do_wake > new_tasks) | |
2365 | do_wake = new_tasks; | |
2366 | } | |
2367 | } | |
2368 | } | |
2369 | ||
2370 | finish: | |
2371 | task->taskgroup = taskgroup->prev; | |
2372 | gomp_sem_destroy (&taskgroup->taskgroup_sem); | |
2373 | free (taskgroup); | |
2374 | } | |
2375 | ||
28567c40 JJ |
2376 | static inline __attribute__((always_inline)) void |
2377 | gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig, | |
2378 | unsigned nthreads) | |
2379 | { | |
2380 | size_t total_cnt = 0; | |
2381 | uintptr_t *d = data; | |
2382 | struct htab *old_htab = NULL, *new_htab; | |
2383 | do | |
2384 | { | |
2385 | if (__builtin_expect (orig != NULL, 0)) | |
2386 | { | |
2387 | /* For worksharing task reductions, memory has been allocated | |
2388 | already by some other thread that encountered the construct | |
2389 | earlier. */ | |
2390 | d[2] = orig[2]; | |
2391 | d[6] = orig[6]; | |
2392 | orig = (uintptr_t *) orig[4]; | |
2393 | } | |
2394 | else | |
2395 | { | |
2396 | size_t sz = d[1] * nthreads; | |
2397 | /* Should use omp_alloc if d[3] is not -1. */ | |
2398 | void *ptr = gomp_aligned_alloc (d[2], sz); | |
2399 | memset (ptr, '\0', sz); | |
2400 | d[2] = (uintptr_t) ptr; | |
2401 | d[6] = d[2] + sz; | |
2402 | } | |
2403 | d[5] = 0; | |
2404 | total_cnt += d[0]; | |
2405 | if (d[4] == 0) | |
2406 | { | |
2407 | d[4] = (uintptr_t) old; | |
2408 | break; | |
2409 | } | |
2410 | else | |
2411 | d = (uintptr_t *) d[4]; | |
2412 | } | |
2413 | while (1); | |
2414 | if (old && old[5]) | |
2415 | { | |
2416 | old_htab = (struct htab *) old[5]; | |
2417 | total_cnt += htab_elements (old_htab); | |
2418 | } | |
2419 | new_htab = htab_create (total_cnt); | |
2420 | if (old_htab) | |
2421 | { | |
2422 | /* Copy old hash table, like in htab_expand. */ | |
2423 | hash_entry_type *p, *olimit; | |
2424 | new_htab->n_elements = htab_elements (old_htab); | |
2425 | olimit = old_htab->entries + old_htab->size; | |
2426 | p = old_htab->entries; | |
2427 | do | |
2428 | { | |
2429 | hash_entry_type x = *p; | |
2430 | if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) | |
2431 | *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x; | |
2432 | p++; | |
2433 | } | |
2434 | while (p < olimit); | |
2435 | } | |
2436 | d = data; | |
2437 | do | |
2438 | { | |
2439 | size_t j; | |
2440 | for (j = 0; j < d[0]; ++j) | |
2441 | { | |
2442 | uintptr_t *p = d + 7 + j * 3; | |
2443 | p[2] = (uintptr_t) d; | |
2444 | /* Ugly hack, hash_entry_type is defined for the task dependencies, | |
2445 | which hash on the first element which is a pointer. We need | |
2446 | to hash also on the first sizeof (uintptr_t) bytes which contain | |
2447 | a pointer. Hide the cast from the compiler. */ | |
2448 | hash_entry_type n; | |
2449 | __asm ("" : "=g" (n) : "0" (p)); | |
2450 | *htab_find_slot (&new_htab, n, INSERT) = n; | |
2451 | } | |
2452 | if (d[4] == (uintptr_t) old) | |
2453 | break; | |
2454 | else | |
2455 | d = (uintptr_t *) d[4]; | |
2456 | } | |
2457 | while (1); | |
2458 | d[5] = (uintptr_t) new_htab; | |
2459 | } | |
2460 | ||
2461 | static void | |
2462 | gomp_create_artificial_team (void) | |
2463 | { | |
2464 | struct gomp_thread *thr = gomp_thread (); | |
2465 | struct gomp_task_icv *icv; | |
2466 | struct gomp_team *team = gomp_new_team (1); | |
2467 | struct gomp_task *task = thr->task; | |
a58a965e | 2468 | struct gomp_task **implicit_task = &task; |
28567c40 JJ |
2469 | icv = task ? &task->icv : &gomp_global_icv; |
2470 | team->prev_ts = thr->ts; | |
2471 | thr->ts.team = team; | |
2472 | thr->ts.team_id = 0; | |
2473 | thr->ts.work_share = &team->work_shares[0]; | |
2474 | thr->ts.last_work_share = NULL; | |
2475 | #ifdef HAVE_SYNC_BUILTINS | |
2476 | thr->ts.single_count = 0; | |
2477 | #endif | |
2478 | thr->ts.static_trip = 0; | |
2479 | thr->task = &team->implicit_task[0]; | |
2480 | gomp_init_task (thr->task, NULL, icv); | |
a58a965e JJ |
2481 | while (*implicit_task |
2482 | && (*implicit_task)->kind != GOMP_TASK_IMPLICIT) | |
2483 | implicit_task = &(*implicit_task)->parent; | |
2484 | if (*implicit_task) | |
28567c40 | 2485 | { |
a58a965e | 2486 | thr->task = *implicit_task; |
28567c40 | 2487 | gomp_end_task (); |
a58a965e | 2488 | free (*implicit_task); |
28567c40 JJ |
2489 | thr->task = &team->implicit_task[0]; |
2490 | } | |
2491 | #ifdef LIBGOMP_USE_PTHREADS | |
2492 | else | |
2493 | pthread_setspecific (gomp_thread_destructor, thr); | |
2494 | #endif | |
a58a965e JJ |
2495 | if (implicit_task != &task) |
2496 | { | |
2497 | *implicit_task = thr->task; | |
2498 | thr->task = task; | |
2499 | } | |
28567c40 JJ |
2500 | } |
2501 | ||
2502 | /* The format of data is: | |
2503 | data[0] cnt | |
2504 | data[1] size | |
2505 | data[2] alignment (on output array pointer) | |
2506 | data[3] allocator (-1 if malloc allocator) | |
2507 | data[4] next pointer | |
2508 | data[5] used internally (htab pointer) | |
2509 | data[6] used internally (end of array) | |
2510 | cnt times | |
2511 | ent[0] address | |
2512 | ent[1] offset | |
2513 | ent[2] used internally (pointer to data[0]) | |
2514 | The entries are sorted by increasing offset, so that a binary | |
2515 | search can be performed. Normally, data[8] is 0, exception is | |
2516 | for worksharing construct task reductions in cancellable parallel, | |
2517 | where at offset 0 there should be space for a pointer and an integer | |
2518 | which are used internally. */ | |
2519 | ||
2520 | void | |
2521 | GOMP_taskgroup_reduction_register (uintptr_t *data) | |
2522 | { | |
2523 | struct gomp_thread *thr = gomp_thread (); | |
2524 | struct gomp_team *team = thr->ts.team; | |
2525 | struct gomp_task *task; | |
2526 | unsigned nthreads; | |
2527 | if (__builtin_expect (team == NULL, 0)) | |
2528 | { | |
2529 | /* The task reduction code needs a team and task, so for | |
2530 | orphaned taskgroups just create the implicit team. */ | |
2531 | gomp_create_artificial_team (); | |
2532 | ialias_call (GOMP_taskgroup_start) (); | |
2533 | team = thr->ts.team; | |
2534 | } | |
2535 | nthreads = team->nthreads; | |
2536 | task = thr->task; | |
2537 | gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads); | |
2538 | task->taskgroup->reductions = data; | |
2539 | } | |
2540 | ||
2541 | void | |
2542 | GOMP_taskgroup_reduction_unregister (uintptr_t *data) | |
2543 | { | |
2544 | uintptr_t *d = data; | |
2545 | htab_free ((struct htab *) data[5]); | |
2546 | do | |
2547 | { | |
2548 | gomp_aligned_free ((void *) d[2]); | |
2549 | d = (uintptr_t *) d[4]; | |
2550 | } | |
2551 | while (d && !d[5]); | |
2552 | } | |
2553 | ialias (GOMP_taskgroup_reduction_unregister) | |
2554 | ||
2555 | /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the | |
2556 | original list item or address of previously remapped original list | |
2557 | item to address of the private copy, store that to ptrs[i]. | |
2558 | For i < cntorig, additionally set ptrs[cnt+i] to the address of | |
2559 | the original list item. */ | |
2560 | ||
2561 | void | |
2562 | GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs) | |
2563 | { | |
2564 | struct gomp_thread *thr = gomp_thread (); | |
2565 | struct gomp_task *task = thr->task; | |
2566 | unsigned id = thr->ts.team_id; | |
2567 | uintptr_t *data = task->taskgroup->reductions; | |
2568 | uintptr_t *d; | |
2569 | struct htab *reduction_htab = (struct htab *) data[5]; | |
2570 | size_t i; | |
2571 | for (i = 0; i < cnt; ++i) | |
2572 | { | |
2573 | hash_entry_type ent, n; | |
2574 | __asm ("" : "=g" (ent) : "0" (ptrs + i)); | |
2575 | n = htab_find (reduction_htab, ent); | |
2576 | if (n) | |
2577 | { | |
2578 | uintptr_t *p; | |
2579 | __asm ("" : "=g" (p) : "0" (n)); | |
2580 | /* At this point, p[0] should be equal to (uintptr_t) ptrs[i], | |
2581 | p[1] is the offset within the allocated chunk for each | |
2582 | thread, p[2] is the array registered with | |
2583 | GOMP_taskgroup_reduction_register, d[2] is the base of the | |
2584 | allocated memory and d[1] is the size of the allocated chunk | |
2585 | for one thread. */ | |
2586 | d = (uintptr_t *) p[2]; | |
2587 | ptrs[i] = (void *) (d[2] + id * d[1] + p[1]); | |
2588 | if (__builtin_expect (i < cntorig, 0)) | |
2589 | ptrs[cnt + i] = (void *) p[0]; | |
2590 | continue; | |
2591 | } | |
2592 | d = data; | |
2593 | while (d != NULL) | |
2594 | { | |
2595 | if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6]) | |
2596 | break; | |
2597 | d = (uintptr_t *) d[4]; | |
2598 | } | |
2599 | if (d == NULL) | |
2600 | gomp_fatal ("couldn't find matching task_reduction or reduction with " | |
2601 | "task modifier for %p", ptrs[i]); | |
2602 | uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1]; | |
2603 | ptrs[i] = (void *) (d[2] + id * d[1] + off); | |
2604 | if (__builtin_expect (i < cntorig, 0)) | |
2605 | { | |
2606 | size_t lo = 0, hi = d[0] - 1; | |
2607 | while (lo <= hi) | |
2608 | { | |
2609 | size_t m = (lo + hi) / 2; | |
2610 | if (d[7 + 3 * m + 1] < off) | |
2611 | lo = m + 1; | |
2612 | else if (d[7 + 3 * m + 1] == off) | |
2613 | { | |
2614 | ptrs[cnt + i] = (void *) d[7 + 3 * m]; | |
2615 | break; | |
2616 | } | |
2617 | else | |
2618 | hi = m - 1; | |
2619 | } | |
2620 | if (lo > hi) | |
2621 | gomp_fatal ("couldn't find matching task_reduction or reduction " | |
2622 | "with task modifier for %p", ptrs[i]); | |
2623 | } | |
2624 | } | |
2625 | } | |
2626 | ||
2627 | struct gomp_taskgroup * | |
2628 | gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads) | |
2629 | { | |
2630 | struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL); | |
2631 | gomp_reduction_register (data, NULL, NULL, nthreads); | |
2632 | taskgroup->reductions = data; | |
2633 | return taskgroup; | |
2634 | } | |
2635 | ||
2636 | void | |
2637 | gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig) | |
2638 | { | |
2639 | struct gomp_thread *thr = gomp_thread (); | |
2640 | struct gomp_team *team = thr->ts.team; | |
2641 | struct gomp_task *task = thr->task; | |
2642 | unsigned nthreads = team->nthreads; | |
2643 | gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads); | |
2644 | task->taskgroup->reductions = data; | |
2645 | } | |
2646 | ||
2647 | void | |
2648 | gomp_workshare_taskgroup_start (void) | |
2649 | { | |
2650 | struct gomp_thread *thr = gomp_thread (); | |
2651 | struct gomp_team *team = thr->ts.team; | |
2652 | struct gomp_task *task; | |
2653 | ||
2654 | if (team == NULL) | |
2655 | { | |
2656 | gomp_create_artificial_team (); | |
2657 | team = thr->ts.team; | |
2658 | } | |
2659 | task = thr->task; | |
2660 | task->taskgroup = gomp_taskgroup_init (task->taskgroup); | |
2661 | task->taskgroup->workshare = true; | |
2662 | } | |
2663 | ||
2664 | void | |
2665 | GOMP_workshare_task_reduction_unregister (bool cancelled) | |
2666 | { | |
2667 | struct gomp_thread *thr = gomp_thread (); | |
2668 | struct gomp_task *task = thr->task; | |
2669 | struct gomp_team *team = thr->ts.team; | |
2670 | uintptr_t *data = task->taskgroup->reductions; | |
2671 | ialias_call (GOMP_taskgroup_end) (); | |
2672 | if (thr->ts.team_id == 0) | |
2673 | ialias_call (GOMP_taskgroup_reduction_unregister) (data); | |
2674 | else | |
2675 | htab_free ((struct htab *) data[5]); | |
2676 | ||
2677 | if (!cancelled) | |
2678 | gomp_team_barrier_wait (&team->barrier); | |
2679 | } | |
2680 | ||
20906c66 JJ |
2681 | int |
2682 | omp_in_final (void) | |
2683 | { | |
2684 | struct gomp_thread *thr = gomp_thread (); | |
2685 | return thr->task && thr->task->final_task; | |
2686 | } | |
2687 | ||
2688 | ialias (omp_in_final) | |
a6d22fb2 | 2689 | |
0ec4e93f JJ |
2690 | int |
2691 | omp_in_explicit_task (void) | |
2692 | { | |
2693 | struct gomp_thread *thr = gomp_thread (); | |
2694 | struct gomp_task *task = thr->task; | |
2695 | return task && task->kind != GOMP_TASK_IMPLICIT; | |
2696 | } | |
2697 | ||
2698 | ialias (omp_in_explicit_task) | |
2699 | ||
a6d22fb2 KCY |
2700 | void |
2701 | omp_fulfill_event (omp_event_handle_t event) | |
2702 | { | |
d656bfda KCY |
2703 | struct gomp_task *task = (struct gomp_task *) event; |
2704 | if (!task->deferred_p) | |
2705 | { | |
2706 | if (gomp_sem_getcount (task->completion_sem) > 0) | |
2707 | gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task); | |
a6d22fb2 | 2708 | |
d656bfda KCY |
2709 | gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n", |
2710 | task); | |
2711 | gomp_sem_post (task->completion_sem); | |
2712 | return; | |
2713 | } | |
a6d22fb2 | 2714 | |
d656bfda KCY |
2715 | struct gomp_team *team = __atomic_load_n (&task->detach_team, |
2716 | MEMMODEL_RELAXED); | |
2717 | if (!team) | |
2718 | gomp_fatal ("omp_fulfill_event: %p event is invalid or has already " | |
2719 | "been fulfilled!\n", task); | |
2720 | ||
2721 | gomp_mutex_lock (&team->task_lock); | |
2722 | if (task->kind != GOMP_TASK_DETACHED) | |
2723 | { | |
2724 | /* The task has not finished running yet. */ | |
2725 | gomp_debug (0, | |
2726 | "omp_fulfill_event: %p event fulfilled for unfinished " | |
2727 | "task\n", task); | |
2728 | __atomic_store_n (&task->detach_team, NULL, MEMMODEL_RELAXED); | |
2729 | gomp_mutex_unlock (&team->task_lock); | |
2730 | return; | |
2731 | } | |
2732 | ||
2733 | gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n", | |
2734 | task); | |
2735 | size_t new_tasks = gomp_task_run_post_handle_depend (task, team); | |
2736 | gomp_task_run_post_remove_parent (task); | |
2737 | gomp_clear_parent (&task->children_queue); | |
2738 | gomp_task_run_post_remove_taskgroup (task); | |
2739 | team->task_count--; | |
2740 | team->task_detach_count--; | |
2741 | ||
2742 | int do_wake = 0; | |
2743 | bool shackled_thread_p = team == gomp_thread ()->ts.team; | |
2744 | if (new_tasks > 0) | |
2745 | { | |
2746 | /* Wake up threads to run new tasks. */ | |
ba886d0c | 2747 | gomp_team_barrier_set_task_pending (&team->barrier); |
d656bfda KCY |
2748 | do_wake = team->nthreads - team->task_running_count; |
2749 | if (do_wake > new_tasks) | |
2750 | do_wake = new_tasks; | |
2751 | } | |
2752 | ||
2753 | if (!shackled_thread_p | |
2754 | && !do_wake | |
2755 | && team->task_detach_count == 0 | |
2756 | && gomp_team_barrier_waiting_for_tasks (&team->barrier)) | |
2757 | /* Ensure that at least one thread is woken up to signal that the | |
2758 | barrier can finish. */ | |
2759 | do_wake = 1; | |
2760 | ||
2761 | /* If we are running in an unshackled thread, the team might vanish before | |
2762 | gomp_team_barrier_wake is run if we release the lock first, so keep the | |
2763 | lock for the call in that case. */ | |
2764 | if (shackled_thread_p) | |
2765 | gomp_mutex_unlock (&team->task_lock); | |
2766 | if (do_wake) | |
2767 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
2768 | if (!shackled_thread_p) | |
2769 | gomp_mutex_unlock (&team->task_lock); | |
2770 | ||
2771 | gomp_finish_task (task); | |
2772 | free (task); | |
a6d22fb2 KCY |
2773 | } |
2774 | ||
2775 | ialias (omp_fulfill_event) |