This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [gomp4] Library side of depend clause support


On Fri, Sep 27, 2013 at 01:48:36AM +0200, Jakub Jelinek wrote:
> Perhaps.  What if I do just minor cleanup (use flexible array members for
> the reallocated vectors, and perhaps keep only the last out/inout task
> in the hash table chains rather than all of them), retest, commit and then
> we can discuss/incrementally improve it?

Here is what I've committed now, the incremental changes were really only
using a structure with flex array member for the dependers vectors,
removing/making redundant earlier !ent->is_in when adding !is_in into the
chain and addition of new testcases.

Let's improve it incrementally later.

2013-09-27  Jakub Jelinek  <jakub@redhat.com>

	* libgomp.h: Include stdlib.h.
	(struct gomp_task_depend_entry,
	struct gomp_dependers_vec): New types.
	(struct gomp_task): Add dependers, depend_hash, depend_count,
	num_dependees and depend fields.
	(struct gomp_taskgroup): Add num_children field.
	(gomp_finish_task): Free depend_hash if non-NULL.
	* libgomp_g.h (GOMP_task): Add depend argument.
	* hashtab.h: New file.
	* task.c: Include hashtab.h.
	(hash_entry_type): New typedef.
	(htab_alloc, htab_free, htab_hash, htab_eq): New inlines.
	(gomp_init_task): Clear dependers, depend_hash and depend_count
	fields.
	(GOMP_task): Add depend argument, handle depend clauses.  Increment
	num_children field in taskgroup.
	(gomp_task_run_pre): Don't increment task_running_count here,
	nor clear task_pending bit.
	(gomp_task_run_post_handle_depend_hash,
	gomp_task_run_post_handle_dependers,
	gomp_task_run_post_handle_depend): New functions.
	(gomp_task_run_post_remove_parent): Clear in_taskwait before
	signalling corresponding semaphore.
	(gomp_task_run_post_remove_taskgroup): Decrement num_children
	field and make the decrement to 0 MEMMODEL_RELEASE operation,
	rather than storing NULL to taskgroup->children.  Clear
	in_taskgroup_wait before signalling corresponding semaphore.
	(gomp_barrier_handle_tasks): Move task_running_count increment
	and task_pending bit clearing here.  Call
	gomp_task_run_post_handle_depend.  If more than one new tasks
	have been queued, wake other threads if needed.
	(GOMP_taskwait): Call gomp_task_run_post_handle_depend.  If more
	than one new tasks have been queued, wake other threads if needed.
	After waiting on taskwait_sem, enter critical section again.
	(GOMP_taskgroup_start): Initialize num_children field.
	(GOMP_taskgroup_end): Check num_children instead of children
	before critical section.  If children is NULL, but num_children
	is non-zero, wait on taskgroup_sem.  Call
	gomp_task_run_post_handle_depend.  If more than one new tasks have
	been queued, wake other threads if needed.  After waiting on
	taskgroup_sem, enter critical section again.
	* testsuite/libgomp.c/depend-1.c: New test.
	* testsuite/libgomp.c/depend-2.c: New test.
	* testsuite/libgomp.c/depend-3.c: New test.
	* testsuite/libgomp.c/depend-4.c: New test.

--- libgomp/libgomp.h.jj	2013-09-26 09:43:10.903930832 +0200
+++ libgomp/libgomp.h	2013-09-27 09:05:17.025402127 +0200
@@ -39,6 +39,7 @@
 
 #include <pthread.h>
 #include <stdbool.h>
+#include <stdlib.h>
 
 #ifdef HAVE_ATTRIBUTE_VISIBILITY
 # pragma GCC visibility push(hidden)
@@ -253,7 +254,26 @@ enum gomp_task_kind
   GOMP_TASK_TIED
 };
 
+struct gomp_task;
 struct gomp_taskgroup;
+struct htab;
+
+struct gomp_task_depend_entry
+{
+  void *addr;
+  struct gomp_task_depend_entry *next;
+  struct gomp_task_depend_entry *prev;
+  struct gomp_task *task;
+  bool is_in;
+  bool redundant;
+};
+
+struct gomp_dependers_vec
+{
+  size_t n_elem;
+  size_t allocated;
+  struct gomp_task *elem[];
+};
 
 /* This structure describes a "task" to be run by a thread.  */
 
@@ -268,6 +288,10 @@ struct gomp_task
   struct gomp_task *next_taskgroup;
   struct gomp_task *prev_taskgroup;
   struct gomp_taskgroup *taskgroup;
+  struct gomp_dependers_vec *dependers;
+  struct htab *depend_hash;
+  size_t depend_count;
+  size_t num_dependees;
   struct gomp_task_icv icv;
   void (*fn) (void *);
   void *fn_data;
@@ -277,6 +301,7 @@ struct gomp_task
   bool final_task;
   bool copy_ctors_done;
   gomp_sem_t taskwait_sem;
+  struct gomp_task_depend_entry depend[];
 };
 
 struct gomp_taskgroup
@@ -286,6 +311,7 @@ struct gomp_taskgroup
   bool in_taskgroup_wait;
   bool cancelled;
   gomp_sem_t taskgroup_sem;
+  size_t num_children;
 };
 
 /* This structure describes a "team" of threads.  These are the threads
@@ -525,6 +551,8 @@ extern void gomp_barrier_handle_tasks (g
 static void inline
 gomp_finish_task (struct gomp_task *task)
 {
+  if (__builtin_expect (task->depend_hash != NULL, 0))
+    free (task->depend_hash);
   gomp_sem_destroy (&task->taskwait_sem);
 }
 
--- libgomp/libgomp_g.h.jj	2013-09-26 09:43:10.902930838 +0200
+++ libgomp/libgomp_g.h	2013-09-26 10:08:44.820160094 +0200
@@ -178,7 +178,7 @@ extern bool GOMP_cancellation_point (int
 /* task.c */
 
 extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *),
-		       long, long, bool, unsigned);
+		       long, long, bool, unsigned, void **);
 extern void GOMP_taskwait (void);
 extern void GOMP_taskyield (void);
 extern void GOMP_taskgroup_start (void);
--- libgomp/hashtab.h.jj	2013-09-26 10:08:51.031128932 +0200
+++ libgomp/hashtab.h	2013-09-26 21:07:17.757697391 +0200
@@ -0,0 +1,443 @@
+/* An expandable hash tables datatype.
+   Copyright (C) 1999-2013
+   Free Software Foundation, Inc.
+   Contributed by Vladimir Makarov <vmakarov@cygnus.com>.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+/* The hash table code copied from include/hashtab.[hc] and adjusted,
+   so that the hash table entries are in the flexible array at the end
+   of the control structure, no callbacks are used and the elements in the
+   table are of the hash_entry_type type.
+   Before including this file, define hash_entry_type type and
+   htab_alloc and htab_free functions.  After including it, define
+   htab_hash and htab_eq inline functions.   */
+
+/* This package implements basic hash table functionality.  It is possible
+   to search for an entry, create an entry and destroy an entry.
+
+   Elements in the table are generic pointers.
+
+   The size of the table is not fixed; if the occupancy of the table
+   grows too high the hash table will be expanded.
+
+   The abstract data implementation is based on generalized Algorithm D
+   from Knuth's book "The art of computer programming".  Hash table is
+   expanded by creation of new hash table and transferring elements from
+   the old table to the new table.  */
+
+/* The type for a hash code.  */
+typedef unsigned int hashval_t;
+
+static inline hashval_t htab_hash (hash_entry_type);
+static inline bool htab_eq (hash_entry_type, hash_entry_type);
+
+/* This macro defines reserved value for empty table entry.  */
+
+#define HTAB_EMPTY_ENTRY    ((hash_entry_type) 0)
+
+/* This macro defines reserved value for table entry which contained
+   a deleted element. */
+
+#define HTAB_DELETED_ENTRY  ((hash_entry_type) 1)
+
+/* Hash tables are of the following type.  The structure
+   (implementation) of this type is not needed for using the hash
+   tables.  All work with hash table should be executed only through
+   functions mentioned below.  The size of this structure is subject to
+   change.  */
+
+struct htab {
+  /* Current size (in entries) of the hash table.  */
+  size_t size;
+
+  /* Current number of elements including also deleted elements.  */
+  size_t n_elements;
+
+  /* Current number of deleted elements in the table.  */
+  size_t n_deleted;
+
+  /* Current size (in entries) of the hash table, as an index into the
+     table of primes.  */
+  unsigned int size_prime_index;
+
+  /* Table itself.  */
+  hash_entry_type entries[];
+};
+
+typedef struct htab *htab_t;
+
+/* An enum saying whether we insert into the hash table or not.  */
+enum insert_option {NO_INSERT, INSERT};
+
+/* Table of primes and multiplicative inverses.
+
+   Note that these are not minimally reduced inverses.  Unlike when generating
+   code to divide by a constant, we want to be able to use the same algorithm
+   all the time.  All of these inverses (are implied to) have bit 32 set.
+
+   For the record, the function that computed the table is in
+   libiberty/hashtab.c.  */
+
+struct prime_ent
+{
+  hashval_t prime;
+  hashval_t inv;
+  hashval_t inv_m2;	/* inverse of prime-2 */
+  hashval_t shift;
+};
+
+static struct prime_ent const prime_tab[] = {
+  {          7, 0x24924925, 0x9999999b, 2 },
+  {         13, 0x3b13b13c, 0x745d1747, 3 },
+  {         31, 0x08421085, 0x1a7b9612, 4 },
+  {         61, 0x0c9714fc, 0x15b1e5f8, 5 },
+  {        127, 0x02040811, 0x0624dd30, 6 },
+  {        251, 0x05197f7e, 0x073260a5, 7 },
+  {        509, 0x01824366, 0x02864fc8, 8 },
+  {       1021, 0x00c0906d, 0x014191f7, 9 },
+  {       2039, 0x0121456f, 0x0161e69e, 10 },
+  {       4093, 0x00300902, 0x00501908, 11 },
+  {       8191, 0x00080041, 0x00180241, 12 },
+  {      16381, 0x000c0091, 0x00140191, 13 },
+  {      32749, 0x002605a5, 0x002a06e6, 14 },
+  {      65521, 0x000f00e2, 0x00110122, 15 },
+  {     131071, 0x00008001, 0x00018003, 16 },
+  {     262139, 0x00014002, 0x0001c004, 17 },
+  {     524287, 0x00002001, 0x00006001, 18 },
+  {    1048573, 0x00003001, 0x00005001, 19 },
+  {    2097143, 0x00004801, 0x00005801, 20 },
+  {    4194301, 0x00000c01, 0x00001401, 21 },
+  {    8388593, 0x00001e01, 0x00002201, 22 },
+  {   16777213, 0x00000301, 0x00000501, 23 },
+  {   33554393, 0x00001381, 0x00001481, 24 },
+  {   67108859, 0x00000141, 0x000001c1, 25 },
+  {  134217689, 0x000004e1, 0x00000521, 26 },
+  {  268435399, 0x00000391, 0x000003b1, 27 },
+  {  536870909, 0x00000019, 0x00000029, 28 },
+  { 1073741789, 0x0000008d, 0x00000095, 29 },
+  { 2147483647, 0x00000003, 0x00000007, 30 },
+  /* Avoid "decimal constant so large it is unsigned" for 4294967291.  */
+  { 0xfffffffb, 0x00000006, 0x00000008, 31 }
+};
+
+/* The following function returns an index into the above table of the
+   nearest prime number which is greater than N, and near a power of two. */
+
+static unsigned int
+higher_prime_index (unsigned long n)
+{
+  unsigned int low = 0;
+  unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]);
+
+  while (low != high)
+    {
+      unsigned int mid = low + (high - low) / 2;
+      if (n > prime_tab[mid].prime)
+	low = mid + 1;
+      else
+	high = mid;
+    }
+
+  /* If we've run out of primes, abort.  */
+  if (n > prime_tab[low].prime)
+    abort ();
+
+  return low;
+}
+
+/* Return the current size of given hash table.  */
+
+static inline size_t
+htab_size (htab_t htab)
+{
+  return htab->size;
+}
+
+/* Return the current number of elements in given hash table. */
+
+static inline size_t
+htab_elements (htab_t htab)
+{
+  return htab->n_elements - htab->n_deleted;
+}
+
+/* Return X % Y.  */
+
+static inline hashval_t
+htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift)
+{
+  /* The multiplicative inverses computed above are for 32-bit types, and
+     requires that we be able to compute a highpart multiply.  */
+  if (sizeof (hashval_t) * __CHAR_BIT__ <= 32)
+    {
+      hashval_t t1, t2, t3, t4, q, r;
+
+      t1 = ((unsigned long long)x * inv) >> 32;
+      t2 = x - t1;
+      t3 = t2 >> 1;
+      t4 = t1 + t3;
+      q  = t4 >> shift;
+      r  = x - (q * y);
+
+      return r;
+    }
+
+  /* Otherwise just use the native division routines.  */
+  return x % y;
+}
+
+/* Compute the primary hash for HASH given HTAB's current size.  */
+
+static inline hashval_t
+htab_mod (hashval_t hash, htab_t htab)
+{
+  const struct prime_ent *p = &prime_tab[htab->size_prime_index];
+  return htab_mod_1 (hash, p->prime, p->inv, p->shift);
+}
+
+/* Compute the secondary hash for HASH given HTAB's current size.  */
+
+static inline hashval_t
+htab_mod_m2 (hashval_t hash, htab_t htab)
+{
+  const struct prime_ent *p = &prime_tab[htab->size_prime_index];
+  return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift);
+}
+
+/* Create hash table of size SIZE.  */
+
+static htab_t
+htab_create (size_t size)
+{
+  htab_t result;
+  unsigned int size_prime_index;
+
+  size_prime_index = higher_prime_index (size);
+  size = prime_tab[size_prime_index].prime;
+
+  result = (htab_t) htab_alloc (sizeof (struct htab)
+				+ size * sizeof (hash_entry_type));
+  result->size = size;
+  result->n_elements = 0;
+  result->n_deleted = 0;
+  result->size_prime_index = size_prime_index;
+  memset (result->entries, 0, size * sizeof (hash_entry_type));
+  return result;
+}
+
+/* Similar to htab_find_slot, but without several unwanted side effects:
+    - Does not call htab_eq when it finds an existing entry.
+    - Does not change the count of elements in the hash table.
+   This function also assumes there are no deleted entries in the table.
+   HASH is the hash value for the element to be inserted.  */
+
+static hash_entry_type *
+find_empty_slot_for_expand (htab_t htab, hashval_t hash)
+{
+  hashval_t index = htab_mod (hash, htab);
+  size_t size = htab_size (htab);
+  hash_entry_type *slot = htab->entries + index;
+  hashval_t hash2;
+
+  if (*slot == HTAB_EMPTY_ENTRY)
+    return slot;
+  else if (*slot == HTAB_DELETED_ENTRY)
+    abort ();
+
+  hash2 = htab_mod_m2 (hash, htab);
+  for (;;)
+    {
+      index += hash2;
+      if (index >= size)
+	index -= size;
+
+      slot = htab->entries + index;
+      if (*slot == HTAB_EMPTY_ENTRY)
+	return slot;
+      else if (*slot == HTAB_DELETED_ENTRY)
+	abort ();
+    }
+}
+
+/* The following function changes size of memory allocated for the
+   entries and repeatedly inserts the table elements.  The occupancy
+   of the table after the call will be about 50%.  Naturally the hash
+   table must already exist.  Remember also that the place of the
+   table entries is changed.  */
+
+static htab_t
+htab_expand (htab_t htab)
+{
+  htab_t nhtab;
+  hash_entry_type *olimit;
+  hash_entry_type *p;
+  size_t osize, elts;
+
+  osize = htab->size;
+  olimit = htab->entries + osize;
+  elts = htab_elements (htab);
+
+  /* Resize only when table after removal of unused elements is either
+     too full or too empty.  */
+  if (elts * 2 > osize || (elts * 8 < osize && osize > 32))
+    nhtab = htab_create (elts * 2);
+  else
+    nhtab = htab_create (osize - 1);
+  nhtab->n_elements = htab->n_elements - htab->n_deleted;
+
+  p = htab->entries;
+  do
+    {
+      hash_entry_type x = *p;
+
+      if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
+	*find_empty_slot_for_expand (nhtab, htab_hash (x)) = x;
+
+      p++;
+    }
+  while (p < olimit);
+
+  htab_free (htab);
+  return nhtab;
+}
+
+/* This function searches for a hash table entry equal to the given
+   element.  It cannot be used to insert or delete an element.  */
+
+static hash_entry_type
+htab_find (htab_t htab, const hash_entry_type element)
+{
+  hashval_t index, hash2, hash = htab_hash (element);
+  size_t size;
+  hash_entry_type entry;
+
+  size = htab_size (htab);
+  index = htab_mod (hash, htab);
+
+  entry = htab->entries[index];
+  if (entry == HTAB_EMPTY_ENTRY
+      || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element)))
+    return entry;
+
+  hash2 = htab_mod_m2 (hash, htab);
+  for (;;)
+    {
+      index += hash2;
+      if (index >= size)
+	index -= size;
+
+      entry = htab->entries[index];
+      if (entry == HTAB_EMPTY_ENTRY
+	  || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element)))
+	return entry;
+    }
+}
+
+/* This function searches for a hash table slot containing an entry
+   equal to the given element.  To delete an entry, call this with
+   insert=NO_INSERT, then call htab_clear_slot on the slot returned
+   (possibly after doing some checks).  To insert an entry, call this
+   with insert=INSERT, then write the value you want into the returned
+   slot.  */
+
+static hash_entry_type *
+htab_find_slot (htab_t *htabp, const hash_entry_type element,
+		enum insert_option insert)
+{
+  hash_entry_type *first_deleted_slot;
+  hashval_t index, hash2, hash = htab_hash (element);
+  size_t size;
+  hash_entry_type entry;
+  htab_t htab = *htabp;
+
+  size = htab_size (htab);
+  if (insert == INSERT && size * 3 <= htab->n_elements * 4)
+    {
+      htab = *htabp = htab_expand (htab);
+      size = htab_size (htab);
+    }
+
+  index = htab_mod (hash, htab);
+
+  first_deleted_slot = NULL;
+
+  entry = htab->entries[index];
+  if (entry == HTAB_EMPTY_ENTRY)
+    goto empty_entry;
+  else if (entry == HTAB_DELETED_ENTRY)
+    first_deleted_slot = &htab->entries[index];
+  else if (htab_eq (entry, element))
+    return &htab->entries[index];
+
+  hash2 = htab_mod_m2 (hash, htab);
+  for (;;)
+    {
+      index += hash2;
+      if (index >= size)
+	index -= size;
+
+      entry = htab->entries[index];
+      if (entry == HTAB_EMPTY_ENTRY)
+	goto empty_entry;
+      else if (entry == HTAB_DELETED_ENTRY)
+	{
+	  if (!first_deleted_slot)
+	    first_deleted_slot = &htab->entries[index];
+	}
+      else if (htab_eq (entry, element))
+	return &htab->entries[index];
+    }
+
+ empty_entry:
+  if (insert == NO_INSERT)
+    return NULL;
+
+  if (first_deleted_slot)
+    {
+      htab->n_deleted--;
+      *first_deleted_slot = HTAB_EMPTY_ENTRY;
+      return first_deleted_slot;
+    }
+
+  htab->n_elements++;
+  return &htab->entries[index];
+}
+
+/* This function clears a specified slot in a hash table.  It is
+   useful when you've already done the lookup and don't want to do it
+   again.  */
+
+static inline void
+htab_clear_slot (htab_t htab, hash_entry_type *slot)
+{
+  if (slot < htab->entries || slot >= htab->entries + htab_size (htab)
+      || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
+    abort ();
+
+  *slot = HTAB_DELETED_ENTRY;
+  htab->n_deleted++;
+}
+
+/* Returns a hash code for pointer P. Simplified version of evahash */
+
+static inline hashval_t
+hash_pointer (const void *p)
+{
+  uintptr_t v = (uintptr_t) p;
+  if (sizeof (v) > sizeof (hashval_t))
+    v ^= v >> (sizeof (uintptr_t) / 2 * __CHAR_BIT__);
+  return v;
+}
--- libgomp/task.c.jj	2013-09-26 09:43:10.903930832 +0200
+++ libgomp/task.c	2013-09-27 09:30:57.798187840 +0200
@@ -29,6 +29,33 @@
 #include <stdlib.h>
 #include <string.h>
 
+typedef struct gomp_task_depend_entry *hash_entry_type;
+
+static inline void *
+htab_alloc (size_t size)
+{
+  return gomp_malloc (size);
+}
+
+static inline void
+htab_free (void *ptr)
+{
+  free (ptr);
+}
+
+#include "hashtab.h"
+
+static inline hashval_t
+htab_hash (hash_entry_type element)
+{
+  return hash_pointer (element->addr);
+}
+
+static inline bool
+htab_eq (hash_entry_type x, hash_entry_type y)
+{
+  return x->addr == y->addr;
+}
 
 /* Create a new task data structure.  */
 
@@ -45,6 +72,9 @@ gomp_init_task (struct gomp_task *task,
   task->copy_ctors_done = false;
   task->children = NULL;
   task->taskgroup = NULL;
+  task->dependers = NULL;
+  task->depend_hash = NULL;
+  task->depend_count = 0;
   gomp_sem_init (&task->taskwait_sem, 0);
 }
 
@@ -80,7 +110,8 @@ gomp_clear_parent (struct gomp_task *chi
 
 void
 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
-	   long arg_size, long arg_align, bool if_clause, unsigned flags)
+	   long arg_size, long arg_align, bool if_clause, unsigned flags,
+	   void **depend)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
@@ -108,6 +139,38 @@ GOMP_task (void (*fn) (void *), void *da
     {
       struct gomp_task task;
 
+      /* If there are depend clauses and earlier deferred sibling tasks
+	 with depend clauses, check if there isn't a dependency.  If there
+	 is, fall through to the deferred task handling, as we can't
+	 schedule such tasks right away.  There is no need to handle
+	 depend clauses for non-deferred tasks other than this, because
+	 the parent task is suspended until the child task finishes and thus
+	 it can't start further child tasks.  */
+      if ((flags & 8) && thr->task && thr->task->depend_hash)
+	{
+	  struct gomp_task *parent = thr->task;
+	  struct gomp_task_depend_entry elem, *ent = NULL;
+	  size_t ndepend = (uintptr_t) depend[0];
+	  size_t nout = (uintptr_t) depend[1];
+	  size_t i;
+	  gomp_mutex_lock (&team->task_lock);
+	  for (i = 0; i < ndepend; i++)
+	    {
+	      elem.addr = depend[i + 2];
+	      ent = htab_find (parent->depend_hash, &elem);
+	      for (; ent; ent = ent->next)
+		if (i >= nout && ent->is_in)
+		  continue;
+		else
+		  break;
+	      if (ent)
+		break;
+	    }
+	  gomp_mutex_unlock (&team->task_lock);
+	  if (ent)
+	    goto defer;
+	}
+
       gomp_init_task (&task, thr->task, gomp_icv (false));
       task.kind = GOMP_TASK_IFFALSE;
       task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
@@ -146,14 +209,20 @@ GOMP_task (void (*fn) (void *), void *da
     }
   else
     {
+     defer:;
       struct gomp_task *task;
       struct gomp_task *parent = thr->task;
       struct gomp_taskgroup *taskgroup = parent->taskgroup;
       char *arg;
       bool do_wake;
+      size_t depend_size = 0;
 
-      task = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
-      arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
+      if (flags & 8)
+	depend_size = ((uintptr_t) depend[0]
+		       * sizeof (struct gomp_task_depend_entry));
+      task = gomp_malloc (sizeof (*task) + depend_size
+			  + arg_size + arg_align - 1);
+      arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
 		      & ~(uintptr_t) (arg_align - 1));
       gomp_init_task (task, parent, gomp_icv (false));
       task->kind = GOMP_TASK_IFFALSE;
@@ -171,7 +240,6 @@ GOMP_task (void (*fn) (void *), void *da
       task->kind = GOMP_TASK_WAITING;
       task->fn = fn;
       task->fn_data = arg;
-      task->in_tied_task = true;
       task->final_task = (flags & 2) >> 1;
       gomp_mutex_lock (&team->task_lock);
       /* If parallel or taskgroup has been cancelled, don't start new
@@ -185,6 +253,117 @@ GOMP_task (void (*fn) (void *), void *da
 	  free (task);
 	  return;
 	}
+      if (taskgroup)
+	taskgroup->num_children++;
+      if (depend_size)
+	{
+	  size_t ndepend = (uintptr_t) depend[0];
+	  size_t nout = (uintptr_t) depend[1];
+	  size_t i;
+	  hash_entry_type ent;
+
+	  task->depend_count = ndepend;
+	  task->num_dependees = 0;
+	  if (parent->depend_hash == NULL)
+	    parent->depend_hash
+	      = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
+	  for (i = 0; i < ndepend; i++)
+	    {
+	      task->depend[i].addr = depend[2 + i];
+	      task->depend[i].next = NULL;
+	      task->depend[i].prev = NULL;
+	      task->depend[i].task = task;
+	      task->depend[i].is_in = i >= nout;
+	      task->depend[i].redundant = false;
+
+	      hash_entry_type *slot
+		= htab_find_slot (&parent->depend_hash, &task->depend[i],
+				  INSERT);
+	      hash_entry_type out = NULL;
+	      if (*slot)
+		{
+		  /* If multiple depends on the same task are the
+		     same, all but the first one are redundant.
+		     As inout/out come first, if any of them is
+		     inout/out, it will win, which is the right
+		     semantics.  */
+		  if ((*slot)->task == task)
+		    {
+		      task->depend[i].redundant = true;
+		      continue;
+		    }
+		  for (ent = *slot; ent; ent = ent->next)
+		    {
+		      /* depend(in:...) doesn't depend on earlier
+			 depend(in:...).  */
+		      if (i >= nout && ent->is_in)
+			continue;
+
+		      if (!ent->is_in)
+			out = ent;
+
+		      struct gomp_task *tsk = ent->task;
+		      if (tsk->dependers == NULL)
+			{
+			  tsk->dependers
+			    = gomp_malloc (sizeof (struct gomp_dependers_vec)
+					   + 6 * sizeof (struct gomp_task *));
+			  tsk->dependers->n_elem = 1;
+			  tsk->dependers->allocated = 6;
+			  tsk->dependers->elem[0] = task;
+			  task->num_dependees++;
+			  continue;
+			}
+		      /* We already have some other dependency on tsk
+			 from earlier depend clause.  */
+		      else if (tsk->dependers->n_elem
+			       && (tsk->dependers->elem[tsk->dependers->n_elem
+							- 1]
+				   == task))
+			continue;
+		      else if (tsk->dependers->n_elem
+			       == tsk->dependers->allocated)
+			{
+			  tsk->dependers->allocated
+			    = tsk->dependers->allocated * 2 + 2;
+			  tsk->dependers
+			    = gomp_realloc (tsk->dependers,
+					    sizeof (struct gomp_dependers_vec)
+					    + (tsk->dependers->allocated
+					       * sizeof (struct gomp_task *)));
+			}
+		      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
+		      task->num_dependees++;
+		    }
+		  task->depend[i].next = *slot;
+		  (*slot)->prev = &task->depend[i];
+		}
+	      *slot = &task->depend[i];
+
+	      /* There is no need to store more than one depend({,in}out:)
+		 task per address in the hash table chain, because each out
+		 depends on all earlier outs, thus it is enough to record
+		 just the last depend({,in}out:).  For depend(in:), we need
+		 to keep all of the previous ones not terminated yet, because
+		 a later depend({,in}out:) might need to depend on all of
+		 them.  So, if the new task's clause is depend({,in}out:),
+		 we know there is at most one other depend({,in}out:) clause
+		 in the list (out) and to maintain the invariant we now
+		 need to remove it from the list.  */
+	      if (!task->depend[i].is_in && out)
+		{
+		  if (out->next)
+		    out->next->prev = out->prev;
+		  out->prev->next = out->next;
+		  out->redundant = true;
+		}
+	    }
+	  if (task->num_dependees)
+	    {
+	      gomp_mutex_unlock (&team->task_lock);
+	      return;
+	    }
+	}
       if (parent->children)
 	{
 	  task->next_child = parent->children;
@@ -259,12 +438,133 @@ gomp_task_run_pre (struct gomp_task *chi
        || (taskgroup && taskgroup->cancelled))
       && !child_task->copy_ctors_done)
     return true;
-  team->task_running_count++;
-  if (team->task_count == team->task_running_count)
-    gomp_team_barrier_clear_task_pending (&team->barrier);
   return false;
 }
 
+static void
+gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
+{
+  struct gomp_task *parent = child_task->parent;
+  size_t i;
+
+  for (i = 0; i < child_task->depend_count; i++)
+    if (!child_task->depend[i].redundant)
+      {
+	if (child_task->depend[i].next)
+	  child_task->depend[i].next->prev = child_task->depend[i].prev;
+	if (child_task->depend[i].prev)
+	  child_task->depend[i].prev->next = child_task->depend[i].next;
+	else
+	  {
+	    hash_entry_type *slot
+	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
+				NO_INSERT);
+	    if (*slot != &child_task->depend[i])
+	      abort ();
+	    if (child_task->depend[i].next)
+	      *slot = child_task->depend[i].next;
+	    else
+	      htab_clear_slot (parent->depend_hash, slot);
+	  }
+      }
+}
+
+static size_t
+gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
+				     struct gomp_team *team)
+{
+  struct gomp_task *parent = child_task->parent;
+  size_t i, count = child_task->dependers->n_elem, ret = 0;
+  for (i = 0; i < count; i++)
+    {
+      struct gomp_task *task = child_task->dependers->elem[i];
+      if (--task->num_dependees != 0)
+	continue;
+
+      struct gomp_taskgroup *taskgroup = task->taskgroup;
+      if (parent)
+	{
+	  if (parent->children)
+	    {
+	      task->next_child = parent->children;
+	      task->prev_child = parent->children->prev_child;
+	      task->next_child->prev_child = task;
+	      task->prev_child->next_child = task;
+	    }
+	  else
+	    {
+	      task->next_child = task;
+	      task->prev_child = task;
+	    }
+	  parent->children = task;
+	  if (parent->in_taskwait)
+	    {
+	      parent->in_taskwait = false;
+	      gomp_sem_post (&parent->taskwait_sem);
+	    }
+	}
+      if (taskgroup)
+	{
+	  if (taskgroup->children)
+	    {
+	      task->next_taskgroup = taskgroup->children;
+	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+	      task->next_taskgroup->prev_taskgroup = task;
+	      task->prev_taskgroup->next_taskgroup = task;
+	    }
+	  else
+	    {
+	      task->next_taskgroup = task;
+	      task->prev_taskgroup = task;
+	    }
+	  taskgroup->children = task;
+	  if (taskgroup->in_taskgroup_wait)
+	    {
+	      taskgroup->in_taskgroup_wait = false;
+	      gomp_sem_post (&taskgroup->taskgroup_sem);
+	    }
+	}
+      if (team->task_queue)
+	{
+	  task->next_queue = team->task_queue;
+	  task->prev_queue = team->task_queue->prev_queue;
+	  task->next_queue->prev_queue = task;
+	  task->prev_queue->next_queue = task;
+	}
+      else
+	{
+	  task->next_queue = task;
+	  task->prev_queue = task;
+	  team->task_queue = task;
+	}
+      ++team->task_count;
+      ++ret;
+    }
+  free (child_task->dependers);
+  child_task->dependers = NULL;
+  if (ret > 1)
+    gomp_team_barrier_set_task_pending (&team->barrier);
+  return ret;
+}
+
+static inline size_t
+gomp_task_run_post_handle_depend (struct gomp_task *child_task,
+				  struct gomp_team *team)
+{
+  if (child_task->depend_count == 0)
+    return 0;
+
+  /* If parent is gone already, the hash table is freed and nothing
+     will use the hash table anymore, no need to remove anything from it.  */
+  if (child_task->parent != NULL)
+    gomp_task_run_post_handle_depend_hash (child_task);
+
+  if (child_task->dependers == NULL)
+    return 0;
+
+  return gomp_task_run_post_handle_dependers (child_task, team);
+}
+
 static inline void
 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
 {
@@ -286,7 +586,10 @@ gomp_task_run_post_remove_parent (struct
 	 before the NULL is written.  */
       __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE);
       if (parent->in_taskwait)
-	gomp_sem_post (&parent->taskwait_sem);
+	{
+	  parent->in_taskwait = false;
+	  gomp_sem_post (&parent->taskwait_sem);
+	}
     }
 }
 
@@ -298,20 +601,29 @@ gomp_task_run_post_remove_taskgroup (str
     return;
   child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup;
   child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup;
-  if (taskgroup->children != child_task)
-    return;
-  if (child_task->next_taskgroup != child_task)
-    taskgroup->children = child_task->next_taskgroup;
+  if (taskgroup->num_children > 1)
+    --taskgroup->num_children;
   else
     {
-      /* We access task->children in GOMP_taskgroup_end
+      /* We access taskgroup->num_children in GOMP_taskgroup_end
 	 outside of the task lock mutex region, so
 	 need a release barrier here to ensure memory
 	 written by child_task->fn above is flushed
 	 before the NULL is written.  */
-      __atomic_store_n (&taskgroup->children, NULL, MEMMODEL_RELEASE);
+      __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
+    }
+  if (taskgroup->children != child_task)
+    return;
+  if (child_task->next_taskgroup != child_task)
+    taskgroup->children = child_task->next_taskgroup;
+  else
+    {
+      taskgroup->children = NULL;
       if (taskgroup->in_taskgroup_wait)
-	gomp_sem_post (&taskgroup->taskgroup_sem);
+	{
+	  taskgroup->in_taskgroup_wait = false;
+	  gomp_sem_post (&taskgroup->taskgroup_sem);
+	}
     }
 }
 
@@ -323,6 +635,7 @@ gomp_barrier_handle_tasks (gomp_barrier_
   struct gomp_task *task = thr->task;
   struct gomp_task *child_task = NULL;
   struct gomp_task *to_free = NULL;
+  int do_wake = 0;
 
   gomp_mutex_lock (&team->task_lock);
   if (gomp_barrier_last_thread (state))
@@ -355,8 +668,17 @@ gomp_barrier_handle_tasks (gomp_barrier_
 		}
 	      goto finish_cancelled;
 	    }
+	  team->task_running_count++;
+	  child_task->in_tied_task = true;
+	  if (team->task_count == team->task_running_count)
+	    gomp_team_barrier_clear_task_pending (&team->barrier);
 	}
       gomp_mutex_unlock (&team->task_lock);
+      if (do_wake)
+	{
+	  gomp_team_barrier_wake (&team->barrier, do_wake);
+	  do_wake = 0;
+	}
       if (to_free)
 	{
 	  gomp_finish_task (to_free);
@@ -374,7 +696,9 @@ gomp_barrier_handle_tasks (gomp_barrier_
       gomp_mutex_lock (&team->task_lock);
       if (child_task)
 	{
-	 finish_cancelled:
+	 finish_cancelled:;
+	  size_t new_tasks
+	    = gomp_task_run_post_handle_depend (child_task, team);
 	  gomp_task_run_post_remove_parent (child_task);
 	  gomp_clear_parent (child_task->children);
 	  gomp_task_run_post_remove_taskgroup (child_task);
@@ -382,6 +706,12 @@ gomp_barrier_handle_tasks (gomp_barrier_
 	  child_task = NULL;
 	  if (!cancelled)
 	    team->task_running_count--;
+	  if (new_tasks > 1)
+	    {
+	      do_wake = team->nthreads - team->task_running_count;
+	      if (do_wake > new_tasks)
+		do_wake = new_tasks;
+	    }
 	  if (--team->task_count == 0
 	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
 	    {
@@ -404,9 +734,10 @@ GOMP_taskwait (void)
   struct gomp_task *task = thr->task;
   struct gomp_task *child_task = NULL;
   struct gomp_task *to_free = NULL;
+  int do_wake = 0;
 
   /* The acquire barrier on load of task->children here synchronizes
-     with the write of a NULL in gomp_barrier_handle_tasks.  It is
+     with the write of a NULL in gomp_task_run_post_remove_parent.  It is
      not necessary that we synchronize with other non-NULL writes at
      this point, but we must ensure that all writes to memory by a
      child thread task work function are seen before we exit from
@@ -451,6 +782,11 @@ GOMP_taskwait (void)
 	   in other threads.  Wait for them.  */
 	task->in_taskwait = true;
       gomp_mutex_unlock (&team->task_lock);
+      if (do_wake)
+	{
+	  gomp_team_barrier_wake (&team->barrier, do_wake);
+	  do_wake = 0;
+	}
       if (to_free)
 	{
 	  gomp_finish_task (to_free);
@@ -464,15 +800,13 @@ GOMP_taskwait (void)
 	  thr->task = task;
 	}
       else
-	{
-	  gomp_sem_wait (&task->taskwait_sem);
-	  task->in_taskwait = false;
-	  return;
-	}
+	gomp_sem_wait (&task->taskwait_sem);
       gomp_mutex_lock (&team->task_lock);
       if (child_task)
 	{
-	 finish_cancelled:
+	 finish_cancelled:;
+	  size_t new_tasks
+	    = gomp_task_run_post_handle_depend (child_task, team);
 	  child_task->prev_child->next_child = child_task->next_child;
 	  child_task->next_child->prev_child = child_task->prev_child;
 	  if (task->children == child_task)
@@ -487,7 +821,13 @@ GOMP_taskwait (void)
 	  to_free = child_task;
 	  child_task = NULL;
 	  team->task_count--;
-	  team->task_running_count--;
+	  if (new_tasks > 1)
+	    {
+	      do_wake = team->nthreads - team->task_running_count
+			- !task->in_tied_task;
+	      if (do_wake > new_tasks)
+		do_wake = new_tasks;
+	    }
 	}
     }
 }
@@ -519,6 +859,7 @@ GOMP_taskgroup_start (void)
   taskgroup->children = NULL;
   taskgroup->in_taskgroup_wait = false;
   taskgroup->cancelled = false;
+  taskgroup->num_children = 0;
   gomp_sem_init (&taskgroup->taskgroup_sem, 0);
   task->taskgroup = taskgroup;
 }
@@ -532,18 +873,29 @@ GOMP_taskgroup_end (void)
   struct gomp_taskgroup *taskgroup;
   struct gomp_task *child_task = NULL;
   struct gomp_task *to_free = NULL;
+  int do_wake = 0;
 
   if (team == NULL)
     return;
   taskgroup = task->taskgroup;
-  if (__atomic_load_n (&taskgroup->children, MEMMODEL_ACQUIRE) == NULL)
+
+  /* The acquire barrier on load of taskgroup->num_children here
+     synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
+     It is not necessary that we synchronize with other non-0 writes at
+     this point, but we must ensure that all writes to memory by a
+     child thread task work function are seen before we exit from
+     GOMP_taskgroup_end.  */
+  if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
     goto finish;
+
   gomp_mutex_lock (&team->task_lock);
   while (1)
     {
       bool cancelled = false;
       if (taskgroup->children == NULL)
 	{
+	  if (taskgroup->num_children)
+	    goto do_wait;
 	  gomp_mutex_unlock (&team->task_lock);
 	  if (to_free)
 	    {
@@ -570,10 +922,18 @@ GOMP_taskgroup_end (void)
 	    }
 	}
       else
-	/* All tasks we are waiting for are already running
-	   in other threads.  Wait for them.  */
-	taskgroup->in_taskgroup_wait = true;
+	{
+	 do_wait:
+	  /* All tasks we are waiting for are already running
+	     in other threads.  Wait for them.  */
+	  taskgroup->in_taskgroup_wait = true;
+	}
       gomp_mutex_unlock (&team->task_lock);
+      if (do_wake)
+	{
+	  gomp_team_barrier_wake (&team->barrier, do_wake);
+	  do_wake = 0;
+	}
       if (to_free)
 	{
 	  gomp_finish_task (to_free);
@@ -587,19 +947,18 @@ GOMP_taskgroup_end (void)
 	  thr->task = task;
 	}
       else
-	{
-	  gomp_sem_wait (&taskgroup->taskgroup_sem);
-	  taskgroup->in_taskgroup_wait = false;
-	  goto finish;
-	}
+	gomp_sem_wait (&taskgroup->taskgroup_sem);
       gomp_mutex_lock (&team->task_lock);
       if (child_task)
 	{
-	 finish_cancelled:
+	 finish_cancelled:;
+	  size_t new_tasks
+	    = gomp_task_run_post_handle_depend (child_task, team);
 	  child_task->prev_taskgroup->next_taskgroup
 	    = child_task->next_taskgroup;
 	  child_task->next_taskgroup->prev_taskgroup
 	    = child_task->prev_taskgroup;
+	  --taskgroup->num_children;
 	  if (taskgroup->children == child_task)
 	    {
 	      if (child_task->next_taskgroup != child_task)
@@ -612,7 +971,13 @@ GOMP_taskgroup_end (void)
 	  to_free = child_task;
 	  child_task = NULL;
 	  team->task_count--;
-	  team->task_running_count--;
+	  if (new_tasks > 1)
+	    {
+	      do_wake = team->nthreads - team->task_running_count
+			- !task->in_tied_task;
+	      if (do_wake > new_tasks)
+		do_wake = new_tasks;
+	    }
 	}
     }
 
--- libgomp/testsuite/libgomp.c/depend-4.c.jj	2013-09-27 11:42:37.283473918 +0200
+++ libgomp/testsuite/libgomp.c/depend-4.c	2013-09-27 11:49:37.781239095 +0200
@@ -0,0 +1,56 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1, y = 2, z = 3;
+    #pragma omp taskgroup
+    {
+      #pragma omp task shared (x, y, z) depend(inout: x, y) \
+		       depend (in: z) if (x > 10)
+      {
+	if (x != 1 || y != 2 || z != 3)
+	  abort ();
+	x = 4;
+	y = 5;
+      }
+      /* The above task has depend clauses, but no dependencies
+	 on earlier tasks, and is if (0), so must be scheduled
+	 immediately.  */
+      if (x != 4 || y != 5)
+	abort ();
+    }
+    #pragma omp taskgroup
+    {
+      #pragma omp task shared (x, y) depend(in: x, y)
+      {
+	usleep (10000);
+	if (x != 4 || y != 5 || z != 3)
+	  abort ();
+      }
+      #pragma omp task shared (x, y) depend(in: x, y)
+      {
+	usleep (10000);
+	if (x != 4 || y != 5 || z != 3)
+	  abort ();
+      }
+      #pragma omp task shared (x, y, z) depend(inout: x, y) \
+		       depend (in: z) if (x > 10)
+      {
+	if (x != 4 || y != 5 || z != 3)
+	  abort ();
+	x = 6;
+	y = 7;
+      }
+      /* The above task has depend clauses, and may have dependencies
+	 on earlier tasks, while it is if (0), it can be deferred.  */
+    }
+    if (x != 6 || y != 7)
+      abort ();
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-1.c.jj	2013-09-26 17:57:26.011983435 +0200
+++ libgomp/testsuite/libgomp.c/depend-1.c	2013-09-26 21:09:57.128895308 +0200
@@ -0,0 +1,215 @@
+#include <stdlib.h>
+
+void
+dep (void)
+{
+  int x = 1;
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+  }
+}
+
+void
+dep2 (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp taskwait
+  }
+}
+
+void
+dep3 (void)
+{
+  #pragma omp parallel
+  {
+    int x = 1;
+    #pragma omp single
+    {
+      #pragma omp task shared (x) depend(out: x)
+      x = 2;
+      #pragma omp task shared (x) depend(in: x)
+      if (x != 2)
+	abort ();
+    }
+  }
+}
+
+void
+firstpriv (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp task depend(out: x)
+    x = 2;
+    #pragma omp task depend(in: x)
+    if (x != 1)
+      abort ();
+  }
+}
+
+void
+antidep (void)
+{
+  int x = 1;
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp task shared(x) depend(in: x)
+    if (x != 1)
+      abort ();
+    #pragma omp task shared(x) depend(out: x)
+    x = 2;
+  }
+}
+
+void
+antidep2 (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp taskgroup
+    {
+      #pragma omp task shared(x) depend(in: x)
+      if (x != 1)
+	abort ();
+      #pragma omp task shared(x) depend(out: x)
+      x = 2;
+    }
+  }
+}
+
+void
+antidep3 (void)
+{
+  #pragma omp parallel
+  {
+    int x = 1;
+    #pragma omp single
+    {
+      #pragma omp task shared(x) depend(in: x)
+      if (x != 1)
+	abort ();
+      #pragma omp task shared(x) depend(out: x)
+      x = 2;
+    }
+  }
+}
+
+
+void
+outdep (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 0;
+    #pragma omp task shared(x) depend(out: x)
+    x = 1;
+    #pragma omp task shared(x) depend(out: x)
+    x = 2;
+    #pragma omp taskwait
+    if (x != 2)
+      abort ();
+  }
+}
+
+void
+concurrent (void)
+{
+  int x = 1;
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+  }
+}
+
+void
+concurrent2 (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp taskwait
+  }
+}
+
+void
+concurrent3 (void)
+{
+  #pragma omp parallel
+  {
+    int x = 1;
+    #pragma omp single
+    {
+      #pragma omp task shared (x) depend(out: x)
+      x = 2;
+      #pragma omp task shared (x) depend(in: x)
+      if (x != 2)
+	abort ();
+      #pragma omp task shared (x) depend(in: x)
+      if (x != 2)
+	abort ();
+      #pragma omp task shared (x) depend(in: x)
+      if (x != 2)
+	abort ();
+    }
+  }
+}
+
+int
+main ()
+{
+  dep ();
+  dep2 ();
+  dep3 ();
+  firstpriv ();
+  antidep ();
+  antidep2 ();
+  antidep3 ();
+  outdep ();
+  concurrent ();
+  concurrent2 ();
+  concurrent3 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-2.c.jj	2013-09-26 18:56:19.808294100 +0200
+++ libgomp/testsuite/libgomp.c/depend-2.c	2013-09-26 19:46:29.732123749 +0200
@@ -0,0 +1,71 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+void
+foo (int do_sleep)
+{
+  int a[64], i, *p = a + 4, x = 0;
+  asm volatile ("" : "+r" (p));
+  for (i = 0; i < 64; i++)
+    a[i] = i + 8;
+  #pragma omp parallel private (i)
+  {
+    #pragma omp single nowait
+    {
+      for (i = 0; i < 8; i++)
+	{
+	  #pragma omp task depend(out: a[i * 8 : 4])
+	    a[i * 8] += (i + 2) * 9;
+	  #pragma omp task depend(out: p[i * 8 : 2])
+	    p[i * 8] += (i + 3) * 10;
+	  #pragma omp task depend(out: x)
+	    x = 1;
+	}
+      for (i = 0; i < 8; i++)
+	#pragma omp task depend(in: a[i * 8 : 4]) \
+			 depend(inout: a[i * 8 + 4 : 2]) \
+			 depend(in: a[0 : 4]) depend(in: x)
+	{
+	  if (a[0] != 8 + 2 * 9 || x != 1)
+	    abort ();
+	  if (a[i * 8] != i * 8 + 8 + (i + 2) * 9)
+	    abort ();
+	  if (a[4 + i * 8] != 4 + i * 8 + 8 + (i + 3) * 10)
+	    abort ();
+	  p[i * 8] += a[i * 8];
+	}
+      for (i = 0; i < 8; i++)
+	#pragma omp task depend(inout: a[i * 8 : 4]) \
+			 depend(in: p[i * 8 : 2]) \
+			 depend(in: p[0 : 2], x)
+	{
+	  if (p[0] != 4 + 8 + 3 * 10 + 0 + 8 + 2 * 9 || x != 1)
+	    abort ();
+	  if (a[i * 8] != i * 8 + 8 + (i + 2) * 9)
+	    abort ();
+	  if (a[4 + i * 8] != (4 + i * 8 + 8 + (i + 3) * 10
+			       + i * 8 + 8 + (i + 2) * 9))
+	    abort ();
+	  a[i * 8] += 2;
+	}
+      for (i = 0; i < 4; i++)
+	#pragma omp task depend(in: a[i * 16 : 4], a[i * 16 + 8 : 4], x)
+	{
+	  if (a[i * 16] != i * 16 + 8 + (2 * i + 2) * 9 + 2 || x != 1)
+	    abort ();
+	  if (p[i * 16 + 4] != i * 16 + 8 + 8 + (2 * i + 1 + 2) * 9 + 2)
+	    abort ();
+	}
+    }
+    if (do_sleep)
+      sleep (1);
+  }
+}
+
+int
+main ()
+{
+  foo (1);
+  foo (0);
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-3.c.jj	2013-09-27 11:32:44.410621977 +0200
+++ libgomp/testsuite/libgomp.c/depend-3.c	2013-09-27 11:39:25.500493830 +0200
@@ -0,0 +1,51 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1, y = 2;
+    #pragma omp taskgroup
+    {
+      #pragma omp task shared (x) depend(in: x)
+      {
+	usleep (10000);
+	if (x != 1)
+	  abort ();
+      }
+      #pragma omp taskgroup
+      {
+	#pragma omp task shared (x) depend(in: x)
+	{
+	  usleep (15000);
+	  if (x != 1)
+	    abort ();
+	}
+	#pragma omp task shared (y) depend(inout: y)
+	{
+	  if (y != 2)
+	    abort ();
+	  y = 3;
+	}
+	#pragma omp taskgroup
+	{
+	  #pragma omp task shared (x) depend(in: x)
+	  {
+	    usleep (13000);
+	    if (x != 1)
+	      abort ();
+	  }
+	  #pragma omp taskgroup
+	  {
+	    #pragma omp task shared (x) depend(out: x)
+	    x = 2;
+	  }
+	}
+      }
+    }
+  }
+  return 0;
+}


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]