This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[gomp5] Fix task reduction handling in implicit parallel regions
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Mon, 8 Oct 2018 19:58:43 +0200
- Subject: [gomp5] Fix task reduction handling in implicit parallel regions
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
In implicit parallel regions, we have NULL teams and often NULL task.
For task reductions we need both non-NULL, so this patch creates such a team
in that case, like we do already for target nowait.
Tested on x86_64-linux, committed to gomp-5_0-branch.
2018-10-08 Jakub Jelinek <jakub@redhat.com>
* task.c (GOMP_taskgroup_reduction_register): If team is NULL, create
implicit team with 1 thread and call GOMP_taskgroup_start again. Don't
mix declarations with statements.
* team.c (gomp_team_end): Determine nesting by thr->ts.level != 0
rather than thr->ts.team != NULL.
* testsuite/libgomp.c-c++-common/task-reduction-4.c: New test.
--- libgomp/task.c.jj 2018-10-08 12:20:53.712125100 +0200
+++ libgomp/task.c 2018-10-08 18:29:51.410292170 +0200
@@ -1968,11 +1968,45 @@ GOMP_taskgroup_reduction_register (uintp
{
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
- struct gomp_task *task = thr->task;
- unsigned nthreads = team ? team->nthreads : 1;
+ struct gomp_task *task;
+ if (__builtin_expect (team == NULL, 0))
+ {
+ /* The task reduction code needs a team and task, so for
+ orphaned taskgroups just create the implicit team. */
+ struct gomp_task_icv *icv;
+ team = gomp_new_team (1);
+ task = thr->task;
+ icv = task ? &task->icv : &gomp_global_icv;
+ team->prev_ts = thr->ts;
+ thr->ts.team = team;
+ thr->ts.team_id = 0;
+ thr->ts.work_share = &team->work_shares[0];
+ thr->ts.last_work_share = NULL;
+#ifdef HAVE_SYNC_BUILTINS
+ thr->ts.single_count = 0;
+#endif
+ thr->ts.static_trip = 0;
+ thr->task = &team->implicit_task[0];
+ gomp_init_task (thr->task, NULL, icv);
+ if (task)
+ {
+ thr->task = task;
+ gomp_end_task ();
+ free (task);
+ thr->task = &team->implicit_task[0];
+ }
+#ifdef LIBGOMP_USE_PTHREADS
+ else
+ pthread_setspecific (gomp_thread_destructor, thr);
+#endif
+ GOMP_taskgroup_start ();
+ }
+ unsigned nthreads = team->nthreads;
size_t total_cnt = 0;
- uintptr_t *d = data;
- uintptr_t *old = task->taskgroup->reductions;
+ uintptr_t *d = data, *old;
+ struct htab *old_htab = NULL, *new_htab;
+ task = thr->task;
+ old = task->taskgroup->reductions;
do
{
size_t sz = d[1] * nthreads;
@@ -1992,13 +2026,12 @@ GOMP_taskgroup_reduction_register (uintp
d = (uintptr_t *) d[4];
}
while (1);
- struct htab *old_htab = NULL;
if (old && old[5])
{
old_htab = (struct htab *) old[5];
total_cnt += htab_elements (old_htab);
}
- struct htab *new_htab = htab_create (total_cnt);
+ new_htab = htab_create (total_cnt);
if (old_htab)
{
/* Copy old hash table, like in htab_expand. */
--- libgomp/team.c.jj 2018-07-27 12:57:16.000000000 +0200
+++ libgomp/team.c 2018-10-08 19:05:58.135130888 +0200
@@ -945,7 +945,7 @@ gomp_team_end (void)
gomp_end_task ();
thr->ts = team->prev_ts;
- if (__builtin_expect (thr->ts.team != NULL, 0))
+ if (__builtin_expect (thr->ts.level != 0, 0))
{
#ifdef HAVE_SYNC_BUILTINS
__sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
--- libgomp/testsuite/libgomp.c-c++-common/task-reduction-4.c.jj 2018-10-08 18:35:52.181268647 +0200
+++ libgomp/testsuite/libgomp.c-c++-common/task-reduction-4.c 2018-10-08 18:35:52.181268647 +0200
@@ -0,0 +1,70 @@
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+
+void
+bar (long long int *p)
+{
+ p[0] *= 2;
+ #pragma omp task in_reduction (*: p[0])
+ p[0] *= 3;
+}
+
+void
+foo (long long int *p, long long int *q)
+{
+ #pragma omp taskgroup task_reduction (*: p[0])
+ {
+ #pragma omp task in_reduction (*: p[0])
+ bar (p);
+ #pragma omp task in_reduction (*: p[0])
+ bar (p);
+ bar (p);
+ #pragma omp taskgroup task_reduction (*: q[0])
+ {
+ #pragma omp task in_reduction (*: q[0])
+ bar (q);
+ #pragma omp task in_reduction (*: q[0])
+ bar (q);
+ #pragma omp task in_reduction (*: q[0])
+ bar (q);
+ bar (q);
+ #pragma omp task in_reduction (*: p[0])
+ {
+ #pragma omp taskgroup task_reduction (*: p[0])
+ {
+ #pragma omp task in_reduction (*: p[0])
+ bar (p);
+ p[0] *= 2;
+ #pragma omp task in_reduction (*: p[0])
+ bar (p);
+ }
+ }
+ }
+ }
+}
+
+int
+main ()
+{
+ long long int p = 1LL, q = 1LL;
+ foo (&p, &q);
+ if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL)
+ abort ();
+ p = 1LL;
+ q = 1LL;
+ #pragma omp taskgroup
+ foo (&p, &q);
+ if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL)
+ abort ();
+ p = 1LL;
+ q = 1LL;
+ #pragma omp parallel
+ #pragma omp single
+ foo (&p, &q);
+ if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL)
+ abort ();
+ return 0;
+}
Jakub