[libgomp, WIP, GSoC'19] Modification to a single queue, single execution path.

김규래 msca8h@naver.com
Sun Jul 21 19:50:00 GMT 2019


Finished unifying the three queues to team->task_queue.
All the tests passed except some unsupported target tests. 
 
=== libgomp Summary ===
 
# of expected passes 6749
# of expected failures 4
# of unsupported tests 349
 
I also tried to make taskwait_end, taskgroup_end, maybe_wait_for_dependencies share the same execution routines,
The current state of that is pretty rough.
I think there are too many mutex lock and unlocks.
I haven't tested the performance of this patch I'll try that soon for the sake of curiosity.
I'll try to reduce the locked regions and then split the queues into a multiqueue.
 
2019-07-22  Khu-rai Kim  <msca8h@naver.com>
 
* libgomp/libgomp.h: Removed task->children_queue,
taskgroup->children_queue, added children counter for taskgroup. 
Added a new task kind, GOMP_DONE to track the lifecycle of 
dangling parents.
* libgomp/task.c: Unified all queue to team->task_queue. 
taskwait_end, taskgroup_end, maybe_wait_for_dependencies share 
the same task execution routine. Parents finished executing with
remaining children are kept until all their children are done 
executing.
* libgomp/taskloop.c: Unified all queue to team->task_queue.
 
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 9f433160ab5..3a615f1d9af 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -405,7 +405,11 @@ enum gomp_task_kind
      but not yet completed.  Once that completes, they will be readded
      into the queues as GOMP_TASK_WAITING in order to perform the var
      unmapping.  */
-  GOMP_TASK_ASYNC_RUNNING
+  GOMP_TASK_ASYNC_RUNNING,
+
+  /* The task is left only for dependency tracking purpose
+     and is ready to be freed anytime. */
+  GOMP_DONE
 };
 
 struct gomp_task_depend_entry
@@ -447,7 +451,7 @@ struct gomp_task
   /* Parent of this task.  */
   struct gomp_task *parent;
   /* Children of this task.  */
-  struct priority_queue children_queue;
+  /* struct priority_queue children_queue; */
   /* Taskgroup this task belongs in.  */
   struct gomp_taskgroup *taskgroup;
   /* Tasks that depend on this task.  */
@@ -461,13 +465,16 @@ struct gomp_task
      into the various queues to be scheduled.  */
   size_t num_dependees;
 
+  /* Number of childrens created and queued from this task. */
+  size_t num_children;
+
   /* Priority of this task.  */
   int priority;
   /* The priority node for this task in each of the different queues.
      We put this here to avoid allocating space for each priority
      node.  Then we play offsetof() games to convert between pnode[]
      entries and the gomp_task in which they reside.  */
-  struct priority_node pnode[3];
+  struct priority_node pnode;
 
   struct gomp_task_icv icv;
   void (*fn) (void *);
@@ -491,7 +498,7 @@ struct gomp_taskgroup
 {
   struct gomp_taskgroup *prev;
   /* Queue of tasks that belong in this taskgroup.  */
-  struct priority_queue taskgroup_queue;
+  /* struct priority_queue taskgroup_queue; */
   uintptr_t *reductions;
   bool in_taskgroup_wait;
   bool cancelled;
@@ -1211,7 +1218,7 @@ extern int gomp_test_nest_lock_25 (omp_nest_lock_25_t *) __GOMP_NOTHROW;
 static inline size_t
 priority_queue_offset (enum priority_queue_type type)
 {
-  return offsetof (struct gomp_task, pnode[(int) type]);
+  return offsetof (struct gomp_task, pnode);
 }
 
 /* Return the task associated with a priority NODE of type TYPE.  */
diff --git a/libgomp/task.c b/libgomp/task.c
index 15177ac8824..df822526c3f 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -81,11 +81,12 @@ gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
   task->final_task = false;
   task->copy_ctors_done = false;
   task->parent_depends_on = false;
-  priority_queue_init (&task->children_queue);
+  // priority_queue_init (&task->children_queue);
   task->taskgroup = NULL;
   task->dependers = NULL;
   task->depend_hash = NULL;
   task->depend_count = 0;
+  task->num_children = 0;
 }
 
 /* Clean up a task, after completing it.  */
@@ -100,61 +101,6 @@ gomp_end_task (void)
   thr->task = task->parent;
 }
 
-/* Clear the parent field of every task in LIST.  */
-
-static inline void
-gomp_clear_parent_in_list (struct priority_list *list)
-{
-  struct priority_node *p = list->tasks;
-  if (p)
-    do
-      {
- priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
- p = p->next;
-      }
-    while (p != list->tasks);
-}
-
-/* Splay tree version of gomp_clear_parent_in_list.
-
-   Clear the parent field of every task in NODE within SP, and free
-   the node when done.  */
-
-static void
-gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
-{
-  if (!node)
-    return;
-  prio_splay_tree_node left = node->left, right = node->right;
-  gomp_clear_parent_in_list (&node->key.l);
-#if _LIBGOMP_CHECKING_
-  memset (node, 0xaf, sizeof (*node));
-#endif
-  /* No need to remove the node from the tree.  We're nuking
-     everything, so just free the nodes and our caller can clear the
-     entire splay tree.  */
-  free (node);
-  gomp_clear_parent_in_tree (sp, left);
-  gomp_clear_parent_in_tree (sp, right);
-}
-
-/* Clear the parent field of every task in Q and remove every task
-   from Q.  */
-
-static inline void
-gomp_clear_parent (struct priority_queue *q)
-{
-  if (priority_queue_multi_p (q))
-    {
-      gomp_clear_parent_in_tree (&q->t, q->t.root);
-      /* All the nodes have been cleared in gomp_clear_parent_in_tree.
- No need to remove anything.  We can just nuke everything.  */
-      q->t.root = NULL;
-    }
-  else
-    gomp_clear_parent_in_list (&q->l);
-}
-
 /* Helper function for GOMP_task and gomp_create_target_task.
 
    For a TASK with in/out dependencies, fill in the various dependency
@@ -182,8 +128,8 @@ gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
     }
   else
     {
-      ndepend = (uintptr_t) depend[1]; /* total # */
-      size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
+      ndepend = (uintptr_t) depend[1];      /* total # */
+      size_t nout = (uintptr_t) depend[2];    /* # of out: and inout: */
       size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
       /* For now we treat mutexinoutset like out, which is compliant, but
  inefficient.  */
@@ -235,8 +181,8 @@ gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
       task->depend[i].redundant = false;
       task->depend[i].redundant_out = false;
 
-      hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
-       &task->depend[i], INSERT);
+      hash_entry_type *slot
+ = htab_find_slot (&parent->depend_hash, &task->depend[i], INSERT);
       hash_entry_type out = NULL, last = NULL;
       if (*slot)
  {
@@ -282,13 +228,12 @@ gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
  continue;
        else if (tsk->dependers->n_elem == tsk->dependers->allocated)
  {
-   tsk->dependers->allocated
-     = tsk->dependers->allocated * 2 + 2;
+   tsk->dependers->allocated = tsk->dependers->allocated * 2 + 2;
    tsk->dependers
      = gomp_realloc (tsk->dependers,
      sizeof (struct gomp_dependers_vec)
-     + (tsk->dependers->allocated
-        * sizeof (struct gomp_task *)));
+       + (tsk->dependers->allocated
+ * sizeof (struct gomp_task *)));
  }
        tsk->dependers->elem[tsk->dependers->n_elem++] = task;
        task->num_dependees++;
@@ -371,8 +316,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
  {
    if (thr->task->taskgroup->cancelled)
      return;
-   if (thr->task->taskgroup->workshare
-       && thr->task->taskgroup->prev
+   if (thr->task->taskgroup->workshare && thr->task->taskgroup->prev
        && thr->task->taskgroup->prev->cancelled)
      return;
  }
@@ -383,8 +327,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
   else if (priority > gomp_max_task_priority_var)
     priority = gomp_max_task_priority_var;
 
-  if (!if_clause || team == NULL
-      || (thr->task && thr->task->final_task)
+  if (!if_clause || team == NULL || (thr->task && thr->task->final_task)
       || team->task_count > 64 * team->nthreads)
     {
       struct gomp_task task;
@@ -395,8 +338,8 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
  depend clauses for non-deferred tasks other than this, because
  the parent task is suspended until the child task finishes and thus
  it can't start further child tasks.  */
-      if ((flags & GOMP_TASK_FLAG_DEPEND)
-   && thr->task && thr->task->depend_hash)
+      if ((flags & GOMP_TASK_FLAG_DEPEND) && thr->task
+   && thr->task->depend_hash)
  gomp_task_maybe_wait_for_dependencies (depend);
 
       gomp_init_task (&task, thr->task, gomp_icv (false));
@@ -429,12 +372,6 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
  child thread, but seeing a stale non-NULL value is not a
  problem.  Once past the task_lock acquisition, this thread
  will see the real value of task.children.  */
-      if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
- {
-   gomp_mutex_lock (&team->task_lock);
-   gomp_clear_parent (&task.children_queue);
-   gomp_mutex_unlock (&team->task_lock);
- }
       gomp_end_task ();
     }
   else
@@ -449,8 +386,8 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
       if (flags & GOMP_TASK_FLAG_DEPEND)
  depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
         * sizeof (struct gomp_task_depend_entry));
-      task = gomp_malloc (sizeof (*task) + depend_size
-   + arg_size + arg_align - 1);
+      task
+ = gomp_malloc (sizeof (*task) + depend_size + arg_size + arg_align - 1);
       arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
        & ~(uintptr_t) (arg_align - 1));
       gomp_init_task (task, parent, gomp_icv (false));
@@ -474,8 +411,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
       gomp_mutex_lock (&team->task_lock);
       /* If parallel or taskgroup has been cancelled, don't start new
  tasks.  */
-      if (__builtin_expect (gomp_cancel_var, 0)
-   && !task->copy_ctors_done)
+      if (__builtin_expect (gomp_cancel_var, 0) && !task->copy_ctors_done)
  {
    if (gomp_team_barrier_cancelled (&team->barrier))
      {
@@ -489,14 +425,17 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
      {
        if (taskgroup->cancelled)
  goto do_cancel;
-       if (taskgroup->workshare
-   && taskgroup->prev
+       if (taskgroup->workshare && taskgroup->prev
    && taskgroup->prev->cancelled)
  goto do_cancel;
      }
  }
+
       if (taskgroup)
- taskgroup->num_children++;
+ ++taskgroup->num_children;
+      ++parent->num_children;
+      ++team->task_count;
+
       if (depend_size)
  {
    gomp_task_handle_depend (task, parent, depend);
@@ -514,38 +453,22 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
      }
  }
 
-      priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
-      task, priority,
-      PRIORITY_INSERT_BEGIN,
-      /*adjust_parent_depends_on=*/false,
-      task->parent_depends_on);
-      if (taskgroup)
- priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
-        task, priority,
-        PRIORITY_INSERT_BEGIN,
-        /*adjust_parent_depends_on=*/false,
-        task->parent_depends_on);
-
-      priority_queue_insert (PQ_TEAM, &team->task_queue,
-      task, priority,
+      priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
       PRIORITY_INSERT_END,
       /*adjust_parent_depends_on=*/false,
       task->parent_depends_on);
-
-      ++team->task_count;
       ++team->task_queued_count;
       gomp_team_barrier_set_task_pending (&team->barrier);
-      do_wake = team->task_running_count + !parent->in_tied_task
- < team->nthreads;
+      do_wake
+ = team->task_running_count + !parent->in_tied_task < team->nthreads;
       gomp_mutex_unlock (&team->task_lock);
       if (do_wake)
  gomp_team_barrier_wake (&team->barrier, 1);
     }
 }
 
-ialias (GOMP_taskgroup_start)
-ialias (GOMP_taskgroup_end)
-ialias (GOMP_taskgroup_reduction_register)
+ialias (GOMP_taskgroup_start) ialias (GOMP_taskgroup_end)
+  ialias (GOMP_taskgroup_reduction_register)
 
 #define TYPE long
 #define UTYPE unsigned long
@@ -563,10 +486,9 @@ ialias (GOMP_taskgroup_reduction_register)
 #undef UTYPE
 #undef GOMP_taskloop
 
-static void inline
-priority_queue_move_task_first (enum priority_queue_type type,
- struct priority_queue *head,
- struct gomp_task *task)
+    static void inline priority_queue_move_task_first (
+      enum priority_queue_type type, struct priority_queue *head,
+      struct gomp_task *task)
 {
 #if _LIBGOMP_CHECKING_
   if (!priority_queue_task_in_queue_p (type, head, task))
@@ -584,9 +506,8 @@ priority_queue_move_task_first (enum priority_queue_type type,
   else
     list = &head->l;
   priority_list_remove (list, task_to_priority_node (type, task), 0);
-  priority_list_insert (type, list, task, task->priority,
- PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
- task->parent_depends_on);
+  priority_list_insert (type, list, task, task->priority, PRIORITY_INSERT_BEGIN,
+ type == PQ_CHILDREN, task->parent_depends_on);
 }
 
 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
@@ -598,45 +519,40 @@ static void
 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
 {
   struct gomp_task *parent = task->parent;
-  if (parent)
-    priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
-     task);
-
   struct gomp_taskgroup *taskgroup = task->taskgroup;
-  if (taskgroup)
-    priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
-     task);
-
   priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
- PRIORITY_INSERT_BEGIN, false,
- task->parent_depends_on);
+ PRIORITY_INSERT_BEGIN, false, task->parent_depends_on);
   task->kind = GOMP_TASK_WAITING;
-  if (parent && parent->taskwait)
+  if (parent)
     {
-      if (parent->taskwait->in_taskwait)
+      if (parent->taskwait)
  {
-   /* One more task has had its dependencies met.
-      Inform any waiters.  */
-   parent->taskwait->in_taskwait = false;
-   gomp_sem_post (&parent->taskwait->taskwait_sem);
+   if (parent->taskwait->in_taskwait)
+     {
+       /* One more task has had its dependencies met.
+ Inform any waiters.  */
+       parent->taskwait->in_taskwait = false;
+       gomp_sem_post (&parent->taskwait->taskwait_sem);
+     }
+   else if (parent->taskwait->in_depend_wait)
+     {
+       /* One more task has had its dependencies met.
+ Inform any waiters.  */
+       parent->taskwait->in_depend_wait = false;
+       gomp_sem_post (&parent->taskwait->taskwait_sem);
+     }
  }
-      else if (parent->taskwait->in_depend_wait)
+    }
+  if (taskgroup)
+    {
+      if (taskgroup->in_taskgroup_wait)
  {
    /* One more task has had its dependencies met.
       Inform any waiters.  */
-   parent->taskwait->in_depend_wait = false;
-   gomp_sem_post (&parent->taskwait->taskwait_sem);
+   taskgroup->in_taskgroup_wait = false;
+   gomp_sem_post (&taskgroup->taskgroup_sem);
  }
     }
-  if (taskgroup && taskgroup->in_taskgroup_wait)
-    {
-      /* One more task has had its dependencies met.
- Inform any waiters.  */
-      taskgroup->in_taskgroup_wait = false;
-      gomp_sem_post (&taskgroup->taskgroup_sem);
-    }
-
-  ++team->task_queued_count;
   gomp_team_barrier_set_task_pending (&team->barrier);
   /* I'm afraid this can't be done after releasing team->task_lock,
      as gomp_target_task_completion is run from unrelated thread and
@@ -674,10 +590,10 @@ static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
 /* Called for nowait target tasks.  */
 
 bool
-gomp_create_target_task (struct gomp_device_descr *devicep,
- void (*fn) (void *), size_t mapnum, void **hostaddrs,
- size_t *sizes, unsigned short *kinds,
- unsigned int flags, void **depend, void **args,
+gomp_create_target_task (struct gomp_device_descr *devicep, void (*fn) (void *),
+ size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned short *kinds, unsigned int flags,
+ void **depend, void **args,
  enum gomp_target_task_state state)
 {
   struct gomp_thread *thr = gomp_thread ();
@@ -692,8 +608,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
  {
    if (thr->task->taskgroup->cancelled)
      return true;
-   if (thr->task->taskgroup->workshare
-       && thr->task->taskgroup->prev
+   if (thr->task->taskgroup->workshare && thr->task->taskgroup->prev
        && thr->task->taskgroup->prev->cancelled)
      return true;
  }
@@ -733,11 +648,10 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
  tgt_size = 0;
     }
 
-  task = gomp_malloc (sizeof (*task) + depend_size
-       + sizeof (*ttask)
-       + mapnum * (sizeof (void *) + sizeof (size_t)
-   + sizeof (unsigned short))
-       + tgt_size);
+  task = gomp_malloc (
+    sizeof (*task) + depend_size + sizeof (*ttask)
+    + mapnum * (sizeof (void *) + sizeof (size_t) + sizeof (unsigned short))
+    + tgt_size);
   gomp_init_task (task, parent, gomp_icv (false));
   task->priority = 0;
   task->kind = GOMP_TASK_WAITING;
@@ -794,8 +708,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
  {
    if (taskgroup->cancelled)
      goto do_cancel;
-   if (taskgroup->workshare
-       && taskgroup->prev
+   if (taskgroup->workshare && taskgroup->prev
        && taskgroup->prev->cancelled)
      goto do_cancel;
  }
@@ -806,7 +719,9 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
       if (task->num_dependees)
  {
    if (taskgroup)
-     taskgroup->num_children++;
+     ++taskgroup->num_children;
+   ++parent->num_children;
+   ++team->task_count;
    gomp_mutex_unlock (&team->task_lock);
    return true;
  }
@@ -819,27 +734,19 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
       free (task);
       return false;
     }
-  if (taskgroup)
-    taskgroup->num_children++;
+
   /* For async offloading, if we don't need to wait for dependencies,
      run the gomp_target_task_fn right away, essentially schedule the
      mapping part of the task in the current thread.  */
-  if (devicep != NULL
-      && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+
+  if (taskgroup)
+    ++taskgroup->num_children;
+  ++parent->num_children;
+  ++team->task_count;
+
+  if (devicep != NULL && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
     {
-      priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
-      PRIORITY_INSERT_END,
-      /*adjust_parent_depends_on=*/false,
-      task->parent_depends_on);
-      if (taskgroup)
- priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
-        task, 0, PRIORITY_INSERT_END,
-        /*adjust_parent_depends_on=*/false,
-        task->parent_depends_on);
-      task->pnode[PQ_TEAM].next = NULL;
-      task->pnode[PQ_TEAM].prev = NULL;
       task->kind = GOMP_TASK_TIED;
-      ++team->task_count;
       gomp_mutex_unlock (&team->task_lock);
 
       thr->task = task;
@@ -858,24 +765,16 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
       gomp_mutex_unlock (&team->task_lock);
       return true;
     }
-  priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
- PRIORITY_INSERT_BEGIN,
- /*adjust_parent_depends_on=*/false,
- task->parent_depends_on);
-  if (taskgroup)
-    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
-    PRIORITY_INSERT_BEGIN,
-    /*adjust_parent_depends_on=*/false,
-    task->parent_depends_on);
+
   priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
  PRIORITY_INSERT_END,
  /*adjust_parent_depends_on=*/false,
  task->parent_depends_on);
-  ++team->task_count;
+
   ++team->task_queued_count;
+
   gomp_team_barrier_set_task_pending (&team->barrier);
-  do_wake = team->task_running_count + !parent->in_tied_task
-     < team->nthreads;
+  do_wake = team->task_running_count + !parent->in_tied_task < team->nthreads;
   gomp_mutex_unlock (&team->task_lock);
   if (do_wake)
     gomp_team_barrier_wake (&team->barrier, 1);
@@ -905,12 +804,10 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
  V        V
  PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4.  */
 
-static void inline
-priority_list_upgrade_task (struct priority_list *list,
-     struct priority_node *node)
+static void inline priority_list_upgrade_task (struct priority_list *list,
+        struct priority_node *node)
 {
-  struct priority_node *last_parent_depends_on
-    = list->last_parent_depends_on;
+  struct priority_node *last_parent_depends_on = list->last_parent_depends_on;
   if (last_parent_depends_on)
     {
       node->prev->next = node->next;
@@ -933,166 +830,22 @@ priority_list_upgrade_task (struct priority_list *list,
   list->last_parent_depends_on = node;
 }
 
-/* Given a parent_depends_on TASK in its parent's children_queue, move
-   it to the front of its priority so it is run as soon as possible.
-
-   PARENT is passed as an optimization.
-
-   (This function could be defined in priority_queue.c, but we want it
-   inlined, and putting it in priority_queue.h is not an option, given
-   that gomp_task has not been properly defined at that point).  */
-
-static void inline
-priority_queue_upgrade_task (struct gomp_task *task,
-      struct gomp_task *parent)
-{
-  struct priority_queue *head = &parent->children_queue;
-  struct priority_node *node = &task->pnode[PQ_CHILDREN];
-#if _LIBGOMP_CHECKING_
-  if (!task->parent_depends_on)
-    gomp_fatal ("priority_queue_upgrade_task: task must be a "
- "parent_depends_on task");
-  if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
-    gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
-#endif
-  if (priority_queue_multi_p (head))
-    {
-      struct priority_list *list
- = priority_queue_lookup_priority (head, task->priority);
-      priority_list_upgrade_task (list, node);
-    }
-  else
-    priority_list_upgrade_task (&head->l, node);
-}
-
-/* Given a CHILD_TASK in LIST that is about to be executed, move it out of
-   the way in LIST so that other tasks can be considered for
-   execution.  LIST contains tasks of type TYPE.
-
-   Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
-   if applicable.  */
-
-static void inline
-priority_list_downgrade_task (enum priority_queue_type type,
-       struct priority_list *list,
-       struct gomp_task *child_task)
-{
-  struct priority_node *node = task_to_priority_node (type, child_task);
-  if (list->tasks == node)
-    list->tasks = node->next;
-  else if (node->next != list->tasks)
-    {
-      /* The task in NODE is about to become TIED and TIED tasks
- cannot come before WAITING tasks.  If we're about to
- leave the queue in such an indeterminate state, rewire
- things appropriately.  However, a TIED task at the end is
- perfectly fine.  */
-      struct gomp_task *next_task = priority_node_to_task (type, node->next);
-      if (next_task->kind == GOMP_TASK_WAITING)
- {
-   /* Remove from list.  */
-   node->prev->next = node->next;
-   node->next->prev = node->prev;
-   /* Rewire at the end.  */
-   node->next = list->tasks;
-   node->prev = list->tasks->prev;
-   list->tasks->prev->next = node;
-   list->tasks->prev = node;
- }
-    }
-
-  /* If the current task is the last_parent_depends_on for its
-     priority, adjust last_parent_depends_on appropriately.  */
-  if (__builtin_expect (child_task->parent_depends_on, 0)
-      && list->last_parent_depends_on == node)
-    {
-      struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
-      if (node->prev != node
-   && prev_child->kind == GOMP_TASK_WAITING
-   && prev_child->parent_depends_on)
- list->last_parent_depends_on = node->prev;
-      else
- {
-   /* There are no more parent_depends_on entries waiting
-      to run, clear the list.  */
-   list->last_parent_depends_on = NULL;
- }
-    }
-}
-
-/* Given a TASK in HEAD that is about to be executed, move it out of
-   the way so that other tasks can be considered for execution.  HEAD
-   contains tasks of type TYPE.
-
-   Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
-   if applicable.
-
-   (This function could be defined in priority_queue.c, but we want it
-   inlined, and putting it in priority_queue.h is not an option, given
-   that gomp_task has not been properly defined at that point).  */
-
-static void inline
-priority_queue_downgrade_task (enum priority_queue_type type,
-        struct priority_queue *head,
-        struct gomp_task *task)
-{
-#if _LIBGOMP_CHECKING_
-  if (!priority_queue_task_in_queue_p (type, head, task))
-    gomp_fatal ("Attempt to downgrade missing task %p", task);
-#endif
-  if (priority_queue_multi_p (head))
-    {
-      struct priority_list *list
- = priority_queue_lookup_priority (head, task->priority);
-      priority_list_downgrade_task (type, list, task);
-    }
-  else
-    priority_list_downgrade_task (type, &head->l, task);
-}
-
-/* Setup CHILD_TASK to execute.  This is done by setting the task to
-   TIED, and updating all relevant queues so that CHILD_TASK is no
-   longer chosen for scheduling.  Also, remove CHILD_TASK from the
-   overall team task queue entirely.
-
-   Return TRUE if task or its containing taskgroup has been
-   cancelled.  */
-
 static inline bool
-gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
-    struct gomp_team *team)
+gomp_task_run_pre (struct gomp_task *task, struct gomp_team *team)
 {
 #if _LIBGOMP_CHECKING_
-  if (child_task->parent)
-    priority_queue_verify (PQ_CHILDREN,
-    &child_task->parent->children_queue, true);
-  if (child_task->taskgroup)
-    priority_queue_verify (PQ_TASKGROUP,
-    &child_task->taskgroup->taskgroup_queue, false);
   priority_queue_verify (PQ_TEAM, &team->task_queue, false);
 #endif
+  struct gomp_taskgroup *taskgroup = task->taskgroup;
 
-  /* Task is about to go tied, move it out of the way.  */
-  if (parent)
-    priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
-    child_task);
-
-  /* Task is about to go tied, move it out of the way.  */
-  struct gomp_taskgroup *taskgroup = child_task->taskgroup;
-  if (taskgroup)
-    priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
-    child_task);
-
-  priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
- MEMMODEL_RELAXED);
-  child_task->pnode[PQ_TEAM].next = NULL;
-  child_task->pnode[PQ_TEAM].prev = NULL;
-  child_task->kind = GOMP_TASK_TIED;
+  priority_queue_remove (PQ_TEAM, &team->task_queue, task, MEMMODEL_RELAXED);
+  task->pnode.next = NULL;
+  task->pnode.prev = NULL;
+  task->kind = GOMP_TASK_TIED;
 
   if (--team->task_queued_count == 0)
     gomp_team_barrier_clear_task_pending (&team->barrier);
-  if (__builtin_expect (gomp_cancel_var, 0)
-      && !child_task->copy_ctors_done)
+  if (__builtin_expect (gomp_cancel_var, 0) && !task->copy_ctors_done)
     {
       if (gomp_team_barrier_cancelled (&team->barrier))
  return true;
@@ -1100,8 +853,7 @@ gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
  {
    if (taskgroup->cancelled)
      return true;
-   if (taskgroup->workshare
-       && taskgroup->prev
+   if (taskgroup->workshare && taskgroup->prev
        && taskgroup->prev->cancelled)
      return true;
  }
@@ -1154,7 +906,6 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
   for (i = 0; i < count; i++)
     {
       struct gomp_task *task = child_task->dependers->elem[i];
-
       /* CHILD_TASK satisfies a dependency for TASK.  Keep track of
  TASK's remaining dependencies.  Once TASK has no other
  depenencies, put it into the various queues so it will get
@@ -1162,14 +913,14 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
       if (--task->num_dependees != 0)
  continue;
 
+      priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
+      PRIORITY_INSERT_END,
+      /*adjust_parent_depends_on=*/false,
+      task->parent_depends_on);
+
       struct gomp_taskgroup *taskgroup = task->taskgroup;
       if (parent)
  {
-   priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
- task, task->priority,
- PRIORITY_INSERT_BEGIN,
- /*adjust_parent_depends_on=*/true,
- task->parent_depends_on);
    if (parent->taskwait)
      {
        if (parent->taskwait->in_taskwait)
@@ -1190,11 +941,6 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
  }
       if (taskgroup)
  {
-   priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
- task, task->priority,
- PRIORITY_INSERT_BEGIN,
- /*adjust_parent_depends_on=*/false,
- task->parent_depends_on);
    if (taskgroup->in_taskgroup_wait)
      {
        /* One more task has had its dependencies met.
@@ -1203,12 +949,6 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
        gomp_sem_post (&taskgroup->taskgroup_sem);
      }
  }
-      priority_queue_insert (PQ_TEAM, &team->task_queue,
-      task, task->priority,
-      PRIORITY_INSERT_END,
-      /*adjust_parent_depends_on=*/false,
-      task->parent_depends_on);
-      ++team->task_count;
       ++team->task_queued_count;
       ++ret;
     }
@@ -1250,22 +990,26 @@ gomp_task_run_post_remove_parent (struct gomp_task *child_task)
      synchronize with gomp_task_maybe_wait_for_dependencies so it can
      clean up and return.  */
   if (__builtin_expect (child_task->parent_depends_on, 0)
-      && --parent->taskwait->n_depend == 0
-      && parent->taskwait->in_depend_wait)
+      && --parent->taskwait->n_depend == 0 && parent->taskwait->in_depend_wait)
     {
       parent->taskwait->in_depend_wait = false;
       gomp_sem_post (&parent->taskwait->taskwait_sem);
     }
 
-  if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
-      child_task, MEMMODEL_RELEASE)
-      && parent->taskwait && parent->taskwait->in_taskwait)
+  if (--parent->num_children == 0)
     {
-      parent->taskwait->in_taskwait = false;
-      gomp_sem_post (&parent->taskwait->taskwait_sem);
+      if (parent->kind == GOMP_DONE)
+ {
+   gomp_finish_task (parent);
+   free (parent);
+   parent = NULL;
+ }
+      else if (parent->taskwait && parent->taskwait->in_taskwait)
+ {
+   parent->taskwait->in_taskwait = false;
+   gomp_sem_post (&parent->taskwait->taskwait_sem);
+ }
     }
-  child_task->pnode[PQ_CHILDREN].next = NULL;
-  child_task->pnode[PQ_CHILDREN].prev = NULL;
 }
 
 /* Remove CHILD_TASK from its taskgroup.  */
@@ -1276,27 +1020,131 @@ gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
   if (taskgroup == NULL)
     return;
-  bool empty = priority_queue_remove (PQ_TASKGROUP,
-       &taskgroup->taskgroup_queue,
-       child_task, MEMMODEL_RELAXED);
-  child_task->pnode[PQ_TASKGROUP].next = NULL;
-  child_task->pnode[PQ_TASKGROUP].prev = NULL;
-  if (taskgroup->num_children > 1)
-    --taskgroup->num_children;
-  else
+  if (--taskgroup->num_children == 0)
     {
       /* We access taskgroup->num_children in GOMP_taskgroup_end
  outside of the task lock mutex region, so
  need a release barrier here to ensure memory
  written by child_task->fn above is flushed
  before the NULL is written.  */
-      __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
+      if (taskgroup->in_taskgroup_wait)
+ {
+   taskgroup->in_taskgroup_wait = false;
+   gomp_sem_post (&taskgroup->taskgroup_sem);
+ }
     }
-  if (empty && taskgroup->in_taskgroup_wait)
+}
+
+/* Executes all tasks until the team queue is empty.
+   The caller will check for a stop criterion and
+   called again if the criterion is not met.
+   true is returned if the routine was exited
+   becasue the team queue was empty.
+   *team, *thr are passed as an optimization */
+
+static inline bool
+gomp_execute_task (struct gomp_team *team, struct gomp_thread *thr,
+    struct gomp_task *task)
+{
+  bool cancelled = false;
+  bool ignored;
+  int do_wake = 0;
+  struct gomp_task *to_free = NULL;
+  struct gomp_task *next_task = NULL;
+
+#if _LIBGOMP_CHECKING_
+  if (priority_queue_empty_p (&team->task_queue, MEMMODEL_ACQUIRE)
+      && __atomic_load_n (&team->task_queued_count, MEMMODEL_ACQUIRE) != 0)
+    gomp_fatal ("Queue empty while queued task count is nonzero.");
+#endif
+
+  if (priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
+    return false;
+
+  next_task = priority_queue_next_task (PQ_TEAM, &team->task_queue, PQ_IGNORED,
+ NULL, &ignored);
+
+  if (next_task->kind == GOMP_TASK_WAITING)
+    {
+      cancelled = gomp_task_run_pre (next_task, team);
+      if (__builtin_expect (cancelled, 0))
+ goto finish_cancelled;
+    }
+  else
     {
-      taskgroup->in_taskgroup_wait = false;
-      gomp_sem_post (&taskgroup->taskgroup_sem);
+      /* All tasks we are waiting for are either running in other
+ threads, or they are tasks that have not had their
+ dependencies met (so they're not even in the queue).  Wait
+ for them.  */
+      next_task = NULL;
+      return false;
     }
+  gomp_mutex_unlock (&team->task_lock);
+  if (do_wake)
+    {
+      gomp_team_barrier_wake (&team->barrier, do_wake);
+      do_wake = 0;
+    }
+  if (next_task)
+    {
+      thr->task = next_task;
+      if (__builtin_expect (next_task->fn == NULL, 0))
+ {
+   if (gomp_target_task_fn (next_task->fn_data))
+     {
+       thr->task = task;
+       gomp_mutex_lock (&team->task_lock);
+       next_task->kind = GOMP_TASK_ASYNC_RUNNING;
+       struct gomp_target_task *ttask
+ = (struct gomp_target_task *) next_task->fn_data;
+       /* If GOMP_PLUGIN_target_task_completion has run already
+ in between gomp_target_task_fn and the mutex lock,
+ perform the requeuing here.  */
+       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
+ gomp_target_task_completion (team, next_task);
+       else
+ ttask->state = GOMP_TARGET_TASK_RUNNING;
+       next_task = NULL;
+       return true;
+     }
+ }
+      else
+ next_task->fn (next_task->fn_data);
+      thr->task = task;
+    }
+  else
+    {
+      gomp_mutex_lock (&team->task_lock);
+      return false;
+    }
+  gomp_mutex_lock (&team->task_lock);
+  if (next_task)
+    {
+    finish_cancelled:;
+      size_t new_tasks = gomp_task_run_post_handle_depend (next_task, team);
+
+      gomp_task_run_post_remove_parent (next_task);
+      gomp_task_run_post_remove_taskgroup (next_task);
+
+      to_free = next_task;
+      to_free->kind = GOMP_DONE;
+      next_task = NULL;
+      team->task_count--;
+      if (new_tasks > 1)
+ {
+   do_wake
+     = team->nthreads - team->task_running_count - !task->in_tied_task;
+   if (do_wake > new_tasks)
+     do_wake = new_tasks;
+ }
+    }
+  if (to_free && to_free->num_children == 0)
+    {
+      gomp_finish_task (to_free);
+      free (to_free);
+      to_free = NULL;
+    }
+  return true;
 }
 
 void
@@ -1322,21 +1170,18 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state)
       gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
     }
 
-  while (1)
+  while (true)
     {
       bool cancelled = false;
       if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
  {
    bool ignored;
-   child_task
-     = priority_queue_next_task (PQ_TEAM, &team->task_queue,
- PQ_IGNORED, NULL,
- &ignored);
-   cancelled = gomp_task_run_pre (child_task, child_task->parent,
- team);
+   child_task = priority_queue_next_task (PQ_TEAM, &team->task_queue,
+ PQ_IGNORED, NULL, &ignored);
+   cancelled = gomp_task_run_pre (child_task, team);
    if (__builtin_expect (cancelled, 0))
      {
-       if (to_free)
+       if (to_free && to_free->num_children == 0)
  {
    gomp_finish_task (to_free);
    free (to_free);
@@ -1353,7 +1198,7 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state)
    gomp_team_barrier_wake (&team->barrier, do_wake);
    do_wake = 0;
  }
-      if (to_free)
+      if (to_free && to_free->num_children == 0)
  {
    gomp_finish_task (to_free);
    free (to_free);
@@ -1392,13 +1237,13 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state)
       gomp_mutex_lock (&team->task_lock);
       if (child_task)
  {
- finish_cancelled:;
+ finish_cancelled:;
    size_t new_tasks
      = gomp_task_run_post_handle_depend (child_task, team);
    gomp_task_run_post_remove_parent (child_task);
-   gomp_clear_parent (&child_task->children_queue);
    gomp_task_run_post_remove_taskgroup (child_task);
    to_free = child_task;
+   to_free->kind = GOMP_DONE;
    child_task = NULL;
    if (!cancelled)
      team->task_running_count--;
@@ -1430,10 +1275,7 @@ GOMP_taskwait (void)
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
   struct gomp_task *task = thr->task;
-  struct gomp_task *child_task = NULL;
-  struct gomp_task *to_free = NULL;
   struct gomp_taskwait taskwait;
-  int do_wake = 0;
 
   /* The acquire barrier on load of task->children here synchronizes
      with the write of a NULL in gomp_task_run_post_remove_parent.  It is
@@ -1442,54 +1284,24 @@ GOMP_taskwait (void)
      child thread task work function are seen before we exit from
      GOMP_taskwait.  */
   if (task == NULL
-      || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
+      || __atomic_load_n (&task->num_children, MEMMODEL_ACQUIRE) == 0)
     return;
 
   memset (&taskwait, 0, sizeof (taskwait));
-  bool child_q = false;
   gomp_mutex_lock (&team->task_lock);
   while (1)
     {
-      bool cancelled = false;
-      if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
+      if (__atomic_load_n (&task->num_children, MEMMODEL_RELAXED) == 0)
  {
    bool destroy_taskwait = task->taskwait != NULL;
    task->taskwait = NULL;
    gomp_mutex_unlock (&team->task_lock);
-   if (to_free)
-     {
-       gomp_finish_task (to_free);
-       free (to_free);
-     }
    if (destroy_taskwait)
      gomp_sem_destroy (&taskwait.taskwait_sem);
    return;
  }
-      struct gomp_task *next_task
- = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
-     PQ_TEAM, &team->task_queue, &child_q);
-      if (next_task->kind == GOMP_TASK_WAITING)
+      if (!gomp_execute_task (team, thr, task))
  {
-   child_task = next_task;
-   cancelled
-     = gomp_task_run_pre (child_task, task, team);
-   if (__builtin_expect (cancelled, 0))
-     {
-       if (to_free)
- {
-   gomp_finish_task (to_free);
-   free (to_free);
-   to_free = NULL;
- }
-       goto finish_cancelled;
-     }
- }
-      else
- {
- /* All tasks we are waiting for are either running in other
-    threads, or they are tasks that have not had their
-    dependencies met (so they're not even in the queue).  Wait
-    for them.  */
    if (task->taskwait == NULL)
      {
        taskwait.in_depend_wait = false;
@@ -1497,77 +1309,9 @@ GOMP_taskwait (void)
        task->taskwait = &taskwait;
      }
    taskwait.in_taskwait = true;
- }
-      gomp_mutex_unlock (&team->task_lock);
-      if (do_wake)
- {
-   gomp_team_barrier_wake (&team->barrier, do_wake);
-   do_wake = 0;
- }
-      if (to_free)
- {
-   gomp_finish_task (to_free);
-   free (to_free);
-   to_free = NULL;
- }
-      if (child_task)
- {
-   thr->task = child_task;
-   if (__builtin_expect (child_task->fn == NULL, 0))
-     {
-       if (gomp_target_task_fn (child_task->fn_data))
- {
-   thr->task = task;
-   gomp_mutex_lock (&team->task_lock);
-   child_task->kind = GOMP_TASK_ASYNC_RUNNING;
-   struct gomp_target_task *ttask
-     = (struct gomp_target_task *) child_task->fn_data;
-   /* If GOMP_PLUGIN_target_task_completion has run already
-      in between gomp_target_task_fn and the mutex lock,
-      perform the requeuing here.  */
-   if (ttask->state == GOMP_TARGET_TASK_FINISHED)
-     gomp_target_task_completion (team, child_task);
-   else
-     ttask->state = GOMP_TARGET_TASK_RUNNING;
-   child_task = NULL;
-   continue;
- }
-     }
-   else
-     child_task->fn (child_task->fn_data);
-   thr->task = task;
- }
-      else
- gomp_sem_wait (&taskwait.taskwait_sem);
-      gomp_mutex_lock (&team->task_lock);
-      if (child_task)
- {
- finish_cancelled:;
-   size_t new_tasks
-     = gomp_task_run_post_handle_depend (child_task, team);
-
-   if (child_q)
-     {
-       priority_queue_remove (PQ_CHILDREN, &task->children_queue,
-      child_task, MEMMODEL_RELAXED);
-       child_task->pnode[PQ_CHILDREN].next = NULL;
-       child_task->pnode[PQ_CHILDREN].prev = NULL;
-     }
-
-   gomp_clear_parent (&child_task->children_queue);
-
-   gomp_task_run_post_remove_taskgroup (child_task);
-
-   to_free = child_task;
-   child_task = NULL;
-   team->task_count--;
-   if (new_tasks > 1)
-     {
-       do_wake = team->nthreads - team->task_running_count
- - !task->in_tied_task;
-       if (do_wake > new_tasks)
- do_wake = new_tasks;
-     }
+   gomp_mutex_unlock (&team->task_lock);
+   gomp_sem_wait (&taskwait.taskwait_sem);
+   gomp_mutex_lock (&team->task_lock);
  }
     }
 }
@@ -1590,8 +1334,7 @@ GOMP_taskwait_depend (void **depend)
  {
    if (thr->task->taskgroup->cancelled)
      return;
-   if (thr->task->taskgroup->workshare
-       && thr->task->taskgroup->prev
+   if (thr->task->taskgroup->workshare && thr->task->taskgroup->prev
        && thr->task->taskgroup->prev->cancelled)
      return;
  }
@@ -1629,9 +1372,6 @@ gomp_task_maybe_wait_for_dependencies (void **depend)
   size_t n = 2;
   size_t i;
   size_t num_awaited = 0;
-  struct gomp_task *child_task = NULL;
-  struct gomp_task *to_free = NULL;
-  int do_wake = 0;
 
   if (ndepend == 0)
     {
@@ -1674,11 +1414,9 @@ gomp_task_maybe_wait_for_dependencies (void **depend)
        {
  tsk->parent_depends_on = true;
  ++num_awaited;
- /* If depenency TSK itself has no dependencies and is
-    ready to run, move it up front so that we run it as
-    soon as possible.  */
- if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
-   priority_queue_upgrade_task (tsk, task);
+ /*  Previously, the dependencies were upgraded their priorities.
+     I'm not sure if not upgrading the depedencies will not lead
+     to a possible deadlock in a single queue situation. */
        }
    }
     }
@@ -1689,133 +1427,26 @@ gomp_task_maybe_wait_for_dependencies (void **depend)
     }
 
   memset (&taskwait, 0, sizeof (taskwait));
-  taskwait.n_depend = num_awaited;
   gomp_sem_init (&taskwait.taskwait_sem, 0);
+  taskwait.n_depend = num_awaited;
   task->taskwait = &taskwait;
 
-  while (1)
+  while (true)
     {
-      bool cancelled = false;
       if (taskwait.n_depend == 0)
  {
    task->taskwait = NULL;
    gomp_mutex_unlock (&team->task_lock);
-   if (to_free)
-     {
-       gomp_finish_task (to_free);
-       free (to_free);
-     }
    gomp_sem_destroy (&taskwait.taskwait_sem);
    return;
  }
 
-      /* Theoretically when we have multiple priorities, we should
- chose between the highest priority item in
- task->children_queue and team->task_queue here, so we should
- use priority_queue_next_task().  However, since we are
- running an undeferred task, perhaps that makes all tasks it
- depends on undeferred, thus a priority of INF?  This would
- make it unnecessary to take anything into account here,
- but the dependencies.
-
- On the other hand, if we want to use priority_queue_next_task(),
- care should be taken to only use priority_queue_remove()
- below if the task was actually removed from the children
- queue.  */
-      bool ignored;
-      struct gomp_task *next_task
- = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
-     PQ_IGNORED, NULL, &ignored);
-
-      if (next_task->kind == GOMP_TASK_WAITING)
- {
-   child_task = next_task;
-   cancelled
-     = gomp_task_run_pre (child_task, task, team);
-   if (__builtin_expect (cancelled, 0))
-     {
-       if (to_free)
- {
-   gomp_finish_task (to_free);
-   free (to_free);
-   to_free = NULL;
- }
-       goto finish_cancelled;
-     }
- }
-      else
- /* All tasks we are waiting for are either running in other
-    threads, or they are tasks that have not had their
-    dependencies met (so they're not even in the queue).  Wait
-    for them.  */
- taskwait.in_depend_wait = true;
-      gomp_mutex_unlock (&team->task_lock);
-      if (do_wake)
- {
-   gomp_team_barrier_wake (&team->barrier, do_wake);
-   do_wake = 0;
- }
-      if (to_free)
- {
-   gomp_finish_task (to_free);
-   free (to_free);
-   to_free = NULL;
- }
-      if (child_task)
- {
-   thr->task = child_task;
-   if (__builtin_expect (child_task->fn == NULL, 0))
-     {
-       if (gomp_target_task_fn (child_task->fn_data))
- {
-   thr->task = task;
-   gomp_mutex_lock (&team->task_lock);
-   child_task->kind = GOMP_TASK_ASYNC_RUNNING;
-   struct gomp_target_task *ttask
-     = (struct gomp_target_task *) child_task->fn_data;
-   /* If GOMP_PLUGIN_target_task_completion has run already
-      in between gomp_target_task_fn and the mutex lock,
-      perform the requeuing here.  */
-   if (ttask->state == GOMP_TARGET_TASK_FINISHED)
-     gomp_target_task_completion (team, child_task);
-   else
-     ttask->state = GOMP_TARGET_TASK_RUNNING;
-   child_task = NULL;
-   continue;
- }
-     }
-   else
-     child_task->fn (child_task->fn_data);
-   thr->task = task;
- }
-      else
- gomp_sem_wait (&taskwait.taskwait_sem);
-      gomp_mutex_lock (&team->task_lock);
-      if (child_task)
+      if (!gomp_execute_task (team, thr, task))
  {
- finish_cancelled:;
-   size_t new_tasks
-     = gomp_task_run_post_handle_depend (child_task, team);
-   if (child_task->parent_depends_on)
-     --taskwait.n_depend;
-
-   priority_queue_remove (PQ_CHILDREN, &task->children_queue,
- child_task, MEMMODEL_RELAXED);
-   child_task->pnode[PQ_CHILDREN].next = NULL;
-   child_task->pnode[PQ_CHILDREN].prev = NULL;
-
-   gomp_clear_parent (&child_task->children_queue);
-   gomp_task_run_post_remove_taskgroup (child_task);
-   to_free = child_task;
-   child_task = NULL;
-   team->task_count--;
-   if (new_tasks > 1)
-     {
-       do_wake = team->nthreads - team->task_running_count
- - !task->in_tied_task;
-       if (do_wake > new_tasks)
- do_wake = new_tasks;
-     }
+   taskwait.in_depend_wait = true;
+   gomp_mutex_unlock (&team->task_lock);
+   gomp_sem_wait (&taskwait.taskwait_sem);
+   gomp_mutex_lock (&team->task_lock);
  }
     }
 }
@@ -1834,7 +1465,7 @@ gomp_taskgroup_init (struct gomp_taskgroup *prev)
   struct gomp_taskgroup *taskgroup
     = gomp_malloc (sizeof (struct gomp_taskgroup));
   taskgroup->prev = prev;
-  priority_queue_init (&taskgroup->taskgroup_queue);
+  // priority_queue_init (&taskgroup->taskgroup_queue);
   taskgroup->reductions = prev ? prev->reductions : NULL;
   taskgroup->in_taskgroup_wait = false;
   taskgroup->cancelled = false;
@@ -1867,15 +1498,11 @@ GOMP_taskgroup_end (void)
   struct gomp_team *team = thr->ts.team;
   struct gomp_task *task = thr->task;
   struct gomp_taskgroup *taskgroup;
-  struct gomp_task *child_task = NULL;
-  struct gomp_task *to_free = NULL;
-  int do_wake = 0;
 
   if (team == NULL)
     return;
   taskgroup = task->taskgroup;
-  if (__builtin_expect (taskgroup == NULL, 0)
-      && thr->ts.level == 0)
+  if (__builtin_expect (taskgroup == NULL, 0) && thr->ts.level == 0)
     {
       /* This can happen if GOMP_taskgroup_start is called when
  thr->ts.team == NULL, but inside of the taskgroup there
@@ -1895,134 +1522,30 @@ GOMP_taskgroup_end (void)
   if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
     goto finish;
 
-  bool unused;
   gomp_mutex_lock (&team->task_lock);
-  while (1)
+  while (true)
     {
-      bool cancelled = false;
-      if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
-   MEMMODEL_RELAXED))
+      if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_RELAXED) == 0)
  {
-   if (taskgroup->num_children)
-     {
-       if (priority_queue_empty_p (&task->children_queue,
-   MEMMODEL_RELAXED))
- goto do_wait;
-       child_task
- = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
-     PQ_TEAM, &team->task_queue,
-     &unused);
-     }
-   else
-     {
-       gomp_mutex_unlock (&team->task_lock);
-       if (to_free)
- {
-   gomp_finish_task (to_free);
-   free (to_free);
- }
-       goto finish;
-     }
- }
-      else
- child_task
-   = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
-       PQ_TEAM, &team->task_queue, &unused);
-      if (child_task->kind == GOMP_TASK_WAITING)
- {
-   cancelled
-     = gomp_task_run_pre (child_task, child_task->parent, team);
-   if (__builtin_expect (cancelled, 0))
-     {
-       if (to_free)
- {
-   gomp_finish_task (to_free);
-   free (to_free);
-   to_free = NULL;
- }
-       goto finish_cancelled;
-     }
+   gomp_mutex_unlock (&team->task_lock);
+   goto finish;
  }
-      else
+      if (!gomp_execute_task (team, thr, task))
  {
-   child_task = NULL;
- do_wait:
- /* All tasks we are waiting for are either running in other
-    threads, or they are tasks that have not had their
-    dependencies met (so they're not even in the queue).  Wait
-    for them.  */
    taskgroup->in_taskgroup_wait = true;
- }
-      gomp_mutex_unlock (&team->task_lock);
-      if (do_wake)
- {
-   gomp_team_barrier_wake (&team->barrier, do_wake);
-   do_wake = 0;
- }
-      if (to_free)
- {
-   gomp_finish_task (to_free);
-   free (to_free);
-   to_free = NULL;
- }
-      if (child_task)
- {
-   thr->task = child_task;
-   if (__builtin_expect (child_task->fn == NULL, 0))
-     {
-       if (gomp_target_task_fn (child_task->fn_data))
- {
-   thr->task = task;
-   gomp_mutex_lock (&team->task_lock);
-   child_task->kind = GOMP_TASK_ASYNC_RUNNING;
-   struct gomp_target_task *ttask
-     = (struct gomp_target_task *) child_task->fn_data;
-   /* If GOMP_PLUGIN_target_task_completion has run already
-      in between gomp_target_task_fn and the mutex lock,
-      perform the requeuing here.  */
-   if (ttask->state == GOMP_TARGET_TASK_FINISHED)
-     gomp_target_task_completion (team, child_task);
-   else
-     ttask->state = GOMP_TARGET_TASK_RUNNING;
-   child_task = NULL;
-   continue;
- }
-     }
-   else
-     child_task->fn (child_task->fn_data);
-   thr->task = task;
- }
-      else
- gomp_sem_wait (&taskgroup->taskgroup_sem);
-      gomp_mutex_lock (&team->task_lock);
-      if (child_task)
- {
- finish_cancelled:;
-   size_t new_tasks
-     = gomp_task_run_post_handle_depend (child_task, team);
-   gomp_task_run_post_remove_parent (child_task);
-   gomp_clear_parent (&child_task->children_queue);
-   gomp_task_run_post_remove_taskgroup (child_task);
-   to_free = child_task;
-   child_task = NULL;
-   team->task_count--;
-   if (new_tasks > 1)
-     {
-       do_wake = team->nthreads - team->task_running_count
- - !task->in_tied_task;
-       if (do_wake > new_tasks)
- do_wake = new_tasks;
-     }
+   gomp_mutex_unlock (&team->task_lock);
+   gomp_sem_wait (&taskgroup->taskgroup_sem);
+   gomp_mutex_lock (&team->task_lock);
  }
     }
 
- finish:
+finish:
   task->taskgroup = taskgroup->prev;
   gomp_sem_destroy (&taskgroup->taskgroup_sem);
   free (taskgroup);
 }
 
-static inline __attribute__((always_inline)) void
+static inline __attribute__ ((always_inline)) void
 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
  unsigned nthreads)
 {
@@ -2095,7 +1618,7 @@ gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
       to hash also on the first sizeof (uintptr_t) bytes which contain
       a pointer.  Hide the cast from the compiler.  */
    hash_entry_type n;
-   __asm ("" : "=g" (n) : "0" (p));
+   __asm("" : "=g"(n) : "0"(p));
    *htab_find_slot (&new_htab, n, INSERT) = n;
  }
       if (d[4] == (uintptr_t) old)
@@ -2192,14 +1715,13 @@ GOMP_taskgroup_reduction_unregister (uintptr_t *data)
 }
 ialias (GOMP_taskgroup_reduction_unregister)
 
-/* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
-   original list item or address of previously remapped original list
-   item to address of the private copy, store that to ptrs[i].
-   For i < cntorig, additionally set ptrs[cnt+i] to the address of
-   the original list item.  */
+  /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
+     original list item or address of previously remapped original list
+     item to address of the private copy, store that to ptrs[i].
+     For i < cntorig, additionally set ptrs[cnt+i] to the address of
+     the original list item.  */
 
-void
-GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
+  void GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_task *task = thr->task;
@@ -2211,12 +1733,12 @@ GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
   for (i = 0; i < cnt; ++i)
     {
       hash_entry_type ent, n;
-      __asm ("" : "=g" (ent) : "0" (ptrs + i));
+      __asm("" : "=g"(ent) : "0"(ptrs + i));
       n = htab_find (reduction_htab, ent);
       if (n)
  {
    uintptr_t *p;
-   __asm ("" : "=g" (p) : "0" (n));
+   __asm("" : "=g"(p) : "0"(n));
    /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
       p[1] is the offset within the allocated chunk for each
       thread, p[2] is the array registered with
@@ -2238,7 +1760,8 @@ GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
  }
       if (d == NULL)
  gomp_fatal ("couldn't find matching task_reduction or reduction with "
-     "task modifier for %p", ptrs[i]);
+     "task modifier for %p",
+     ptrs[i]);
       uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
       ptrs[i] = (void *) (d[2] + id * d[1] + off);
       if (__builtin_expect (i < cntorig, 0))
@@ -2259,7 +1782,8 @@ GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
      }
    if (lo > hi)
      gomp_fatal ("couldn't find matching task_reduction or reduction "
- "with task modifier for %p", ptrs[i]);
+ "with task modifier for %p",
+ ptrs[i]);
  }
     }
 }
diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c
index 5d3f810a8f2..4943ca06b2e 100644
--- a/libgomp/taskloop.c
+++ b/libgomp/taskloop.c
@@ -216,13 +216,13 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
  task_step -= step;
        fn (arg);
        arg += arg_size;
-       if (!priority_queue_empty_p (&task[i].children_queue,
-    MEMMODEL_RELAXED))
- {
-   gomp_mutex_lock (&team->task_lock);
-   gomp_clear_parent (&task[i].children_queue);
-   gomp_mutex_unlock (&team->task_lock);
- }
+       /* if (!priority_queue_empty_p (&task[i].children_queue, */
+       /*    MEMMODEL_RELAXED)) */
+       /* { */
+       /*   gomp_mutex_lock (&team->task_lock); */
+       /*   gomp_clear_parent (&task[i].children_queue); */
+       /*   gomp_mutex_unlock (&team->task_lock); */
+       /* } */
        gomp_end_task ();
      }
  }
@@ -248,13 +248,6 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
      if (i == nfirst)
        task_step -= step;
      fn (data);
-     if (!priority_queue_empty_p (&task.children_queue,
- MEMMODEL_RELAXED))
-       {
- gomp_mutex_lock (&team->task_lock);
- gomp_clear_parent (&task.children_queue);
- gomp_mutex_unlock (&team->task_lock);
-       }
      gomp_end_task ();
    }
     }
@@ -329,24 +322,15 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
  }
       if (taskgroup)
  taskgroup->num_children += num_tasks;
+      parent->num_children += num_tasks;
+      team->task_count += num_tasks;
       for (i = 0; i < num_tasks; i++)
  {
    struct gomp_task *task = tasks[i];
-   priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
- task, priority,
- PRIORITY_INSERT_BEGIN,
- /*last_parent_depends_on=*/false,
- task->parent_depends_on);
-   if (taskgroup)
-     priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
-    task, priority, PRIORITY_INSERT_BEGIN,
-    /*last_parent_depends_on=*/false,
-    task->parent_depends_on);
    priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
  PRIORITY_INSERT_END,
  /*last_parent_depends_on=*/false,
  task->parent_depends_on);
-   ++team->task_count;
    ++team->task_queued_count;
  }
       gomp_team_barrier_set_task_pending (&team->barrier);



More information about the Gcc-patches mailing list