[gomp4] Library side of depend clause support

Jakub Jelinek jakub@redhat.com
Thu Sep 26 19:35:00 GMT 2013


Hi!

This patch adds depend clause support.
In GOMP_task, before queueing the task, if task has any depend clauses
we look up the addresses in a hash table (in the parent task, because
only sibling tasks are ordered through depend clause), and if there
are any dependencies on the earlier started tasks, the new task
isn't added to team, parent and taskgroup task queues, but instead
just added into the earlier task's depender vectors.  Each task
has also an integer number of how many other tasks it depends on.
When a task on which something depends on finishes, if parent exists,
it's records are removed from parent's depend address hash table,
and even if parent doesn't exist anymore, we decrement num_dependees
of every task mentioned in the dependers vector.  If any of those
counters go to zero, we insert them into all the relevant task queues.

Tested on x86_64-linux.  Will commit tomorrow unless somebody complains,
but in any case would appreciate review of the changes.

2013-09-26  Jakub Jelinek  <jakub@redhat.com>

	* libgomp.h: Include stdlib.h.
	(struct gomp_task_depend_entry): New type.
	(struct gomp_task): Add dependers, depend_hash, depend_count,
	num_dependees and depend fields.
	(struct gomp_taskgroup): Add num_children field.
	(gomp_finish_task): Free depend_hash if non-NULL.
	* libgomp_g.h (GOMP_task): Add depend argument.
	* hashtab.h: New file.
	* task.c: Include hashtab.h.
	(hash_entry_type): New typedef.
	(htab_alloc, htab_free, htab_hash, htab_eq): New inlines.
	(gomp_init_task): Clear dependers, depend_hash and depend_count
	fields.
	(GOMP_task): Add depend argument, handle depend clauses.  Increment
	num_children field in taskgroup.
	(gomp_task_run_pre): Don't increment task_running_count here,
	nor clear task_pending bit.
	(gomp_task_run_post_handle_depend_hash,
	gomp_task_run_post_handle_dependers,
	gomp_task_run_post_handle_depend): New functions.
	(gomp_task_run_post_remove_parent): Clear in_taskwait before
	signalling corresponding semaphore.
	(gomp_task_run_post_remove_taskgroup): Decrement num_children
	field and make the decrement to 0 MEMMODEL_RELEASE operation,
	rather than storing NULL to taskgroup->children.  Clear
	in_taskgroup_wait before signalling corresponding semaphore.
	(gomp_barrier_handle_tasks): Move task_running_count increment
	and task_pending bit clearing here.  Call
	gomp_task_run_post_handle_depend.  If more than one new tasks
	have been queued, wake other threads if needed.
	(GOMP_taskwait): Call gomp_task_run_post_handle_depend.  If more
	than one new tasks have been queued, wake other threads if needed.
	After waiting on taskwait_sem, enter critical section again.
	(GOMP_taskgroup_start): Initialize num_children field.
	(GOMP_taskgroup_end): Check num_children instead of children
	before critical section.  If children is NULL, but num_children
	is non-zero, wait on taskgroup_sem.  Call
	gomp_task_run_post_handle_depend.  If more than one new tasks have
	been queued, wake other threads if needed.  After waiting on
	taskgroup_sem, enter critical section again.
	* testsuite/libgomp.c/depend-1.c: New test.
	* testsuite/libgomp.c/depend-2.c: New test.

--- libgomp/libgomp.h.jj	2013-09-26 09:43:10.903930832 +0200
+++ libgomp/libgomp.h	2013-09-26 17:17:28.267001263 +0200
@@ -39,6 +39,7 @@
 
 #include <pthread.h>
 #include <stdbool.h>
+#include <stdlib.h>
 
 #ifdef HAVE_ATTRIBUTE_VISIBILITY
 # pragma GCC visibility push(hidden)
@@ -253,7 +254,19 @@ enum gomp_task_kind
   GOMP_TASK_TIED
 };
 
+struct gomp_task;
 struct gomp_taskgroup;
+struct htab;
+
+struct gomp_task_depend_entry
+{
+  void *addr;
+  struct gomp_task_depend_entry *next;
+  struct gomp_task_depend_entry *prev;
+  struct gomp_task *task;
+  bool is_in;
+  bool redundant;
+};
 
 /* This structure describes a "task" to be run by a thread.  */
 
@@ -268,6 +281,10 @@ struct gomp_task
   struct gomp_task *next_taskgroup;
   struct gomp_task *prev_taskgroup;
   struct gomp_taskgroup *taskgroup;
+  struct gomp_task **dependers;
+  struct htab *depend_hash;
+  size_t depend_count;
+  size_t num_dependees;
   struct gomp_task_icv icv;
   void (*fn) (void *);
   void *fn_data;
@@ -277,6 +294,7 @@ struct gomp_task
   bool final_task;
   bool copy_ctors_done;
   gomp_sem_t taskwait_sem;
+  struct gomp_task_depend_entry depend[];
 };
 
 struct gomp_taskgroup
@@ -286,6 +304,7 @@ struct gomp_taskgroup
   bool in_taskgroup_wait;
   bool cancelled;
   gomp_sem_t taskgroup_sem;
+  size_t num_children;
 };
 
 /* This structure describes a "team" of threads.  These are the threads
@@ -525,6 +544,8 @@ extern void gomp_barrier_handle_tasks (g
 static void inline
 gomp_finish_task (struct gomp_task *task)
 {
+  if (__builtin_expect (task->depend_hash != NULL, 0))
+    free (task->depend_hash);
   gomp_sem_destroy (&task->taskwait_sem);
 }
 
--- libgomp/libgomp_g.h.jj	2013-09-26 09:43:10.902930838 +0200
+++ libgomp/libgomp_g.h	2013-09-26 10:08:44.820160094 +0200
@@ -178,7 +178,7 @@ extern bool GOMP_cancellation_point (int
 /* task.c */
 
 extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *),
-		       long, long, bool, unsigned);
+		       long, long, bool, unsigned, void **);
 extern void GOMP_taskwait (void);
 extern void GOMP_taskyield (void);
 extern void GOMP_taskgroup_start (void);
--- libgomp/hashtab.h.jj	2013-09-26 10:08:51.031128932 +0200
+++ libgomp/hashtab.h	2013-09-26 18:11:51.241624867 +0200
@@ -0,0 +1,443 @@
+/* An expandable hash tables datatype.
+   Copyright (C) 1999-2013
+   Free Software Foundation, Inc.
+   Contributed by Vladimir Makarov <vmakarov@cygnus.com>.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+/* The hash table code copied from include/hashtab.[hc] and adjusted,
+   so that the hash table entries are in the flexible array at the end
+   of the control structure, no callbacks are used and the elements in the
+   table are of the hash_entry_type type.
+   Before including this file, define hash_entry_type type and
+   htab_alloc and htab_free functions.  After including it, define
+   htab_hash and htab_eq inline functions.   */
+
+/* This package implements basic hash table functionality.  It is possible
+   to search for an entry, create an entry and destroy an entry.
+
+   Elements in the table are generic pointers.
+
+   The size of the table is not fixed; if the occupancy of the table
+   grows too high the hash table will be expanded.
+
+   The abstract data implementation is based on generalized Algorithm D
+   from Knuth's book "The art of computer programming".  Hash table is
+   expanded by creation of new hash table and transferring elements from
+   the old table to the new table.  */
+
+/* The type for a hash code.  */
+typedef unsigned int hashval_t;
+
+static inline hashval_t htab_hash (hash_entry_type);
+static inline bool htab_eq (hash_entry_type, hash_entry_type);
+
+/* This macro defines reserved value for empty table entry.  */
+
+#define HTAB_EMPTY_ENTRY    ((hash_entry_type) 0)
+
+/* This macro defines reserved value for table entry which contained
+   a deleted element. */
+
+#define HTAB_DELETED_ENTRY  ((hash_entry_type) 1)
+
+/* Hash tables are of the following type.  The structure
+   (implementation) of this type is not needed for using the hash
+   tables.  All work with hash table should be executed only through
+   functions mentioned below.  The size of this structure is subject to
+   change.  */
+
+struct htab {
+  /* Current size (in entries) of the hash table.  */
+  size_t size;
+
+  /* Current number of elements including also deleted elements.  */
+  size_t n_elements;
+
+  /* Current number of deleted elements in the table.  */
+  size_t n_deleted;
+
+  /* Current size (in entries) of the hash table, as an index into the
+     table of primes.  */
+  unsigned int size_prime_index;
+
+  /* Table itself.  */
+  hash_entry_type entries[];
+};
+
+typedef struct htab *htab_t;
+
+/* An enum saying whether we insert into the hash table or not.  */
+enum insert_option {NO_INSERT, INSERT};
+
+/* Table of primes and multiplicative inverses.
+
+   Note that these are not minimally reduced inverses.  Unlike when generating
+   code to divide by a constant, we want to be able to use the same algorithm
+   all the time.  All of these inverses (are implied to) have bit 32 set.
+
+   For the record, the function that computed the table is in
+   libiberty/hashtab.c.  */
+
+struct prime_ent
+{
+  hashval_t prime;
+  hashval_t inv;
+  hashval_t inv_m2;	/* inverse of prime-2 */
+  hashval_t shift;
+};
+
+static struct prime_ent const prime_tab[] = {
+  {          7, 0x24924925, 0x9999999b, 2 },
+  {         13, 0x3b13b13c, 0x745d1747, 3 },
+  {         31, 0x08421085, 0x1a7b9612, 4 },
+  {         61, 0x0c9714fc, 0x15b1e5f8, 5 },
+  {        127, 0x02040811, 0x0624dd30, 6 },
+  {        251, 0x05197f7e, 0x073260a5, 7 },
+  {        509, 0x01824366, 0x02864fc8, 8 },
+  {       1021, 0x00c0906d, 0x014191f7, 9 },
+  {       2039, 0x0121456f, 0x0161e69e, 10 },
+  {       4093, 0x00300902, 0x00501908, 11 },
+  {       8191, 0x00080041, 0x00180241, 12 },
+  {      16381, 0x000c0091, 0x00140191, 13 },
+  {      32749, 0x002605a5, 0x002a06e6, 14 },
+  {      65521, 0x000f00e2, 0x00110122, 15 },
+  {     131071, 0x00008001, 0x00018003, 16 },
+  {     262139, 0x00014002, 0x0001c004, 17 },
+  {     524287, 0x00002001, 0x00006001, 18 },
+  {    1048573, 0x00003001, 0x00005001, 19 },
+  {    2097143, 0x00004801, 0x00005801, 20 },
+  {    4194301, 0x00000c01, 0x00001401, 21 },
+  {    8388593, 0x00001e01, 0x00002201, 22 },
+  {   16777213, 0x00000301, 0x00000501, 23 },
+  {   33554393, 0x00001381, 0x00001481, 24 },
+  {   67108859, 0x00000141, 0x000001c1, 25 },
+  {  134217689, 0x000004e1, 0x00000521, 26 },
+  {  268435399, 0x00000391, 0x000003b1, 27 },
+  {  536870909, 0x00000019, 0x00000029, 28 },
+  { 1073741789, 0x0000008d, 0x00000095, 29 },
+  { 2147483647, 0x00000003, 0x00000007, 30 },
+  /* Avoid "decimal constant so large it is unsigned" for 4294967291.  */
+  { 0xfffffffb, 0x00000006, 0x00000008, 31 }
+};
+
+/* The following function returns an index into the above table of the
+   nearest prime number which is greater than N, and near a power of two. */
+
+static unsigned int
+higher_prime_index (unsigned long n)
+{
+  unsigned int low = 0;
+  unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]);
+
+  while (low != high)
+    {
+      unsigned int mid = low + (high - low) / 2;
+      if (n > prime_tab[mid].prime)
+	low = mid + 1;
+      else
+	high = mid;
+    }
+
+  /* If we've run out of primes, abort.  */
+  if (n > prime_tab[low].prime)
+    abort ();
+
+  return low;
+}
+
+/* Return the current size of given hash table.  */
+
+static inline size_t
+htab_size (htab_t htab)
+{
+  return htab->size;
+}
+
+/* Return the current number of elements in given hash table. */
+
+static inline size_t
+htab_elements (htab_t htab)
+{
+  return htab->n_elements - htab->n_deleted;
+}
+
+/* Return X % Y.  */
+
+static inline hashval_t
+htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift)
+{
+  /* The multiplicative inverses computed above are for 32-bit types, and
+     requires that we be able to compute a highpart multiply.  */
+  if (sizeof (hashval_t) * __CHAR_BIT__ <= 32)
+    {
+      hashval_t t1, t2, t3, t4, q, r;
+
+      t1 = ((unsigned long long)x * inv) >> 32;
+      t2 = x - t1;
+      t3 = t2 >> 1;
+      t4 = t1 + t3;
+      q  = t4 >> shift;
+      r  = x - (q * y);
+
+      return r;
+    }
+
+  /* Otherwise just use the native division routines.  */
+  return x % y;
+}
+
+/* Compute the primary hash for HASH given HTAB's current size.  */
+
+static inline hashval_t
+htab_mod (hashval_t hash, htab_t htab)
+{
+  const struct prime_ent *p = &prime_tab[htab->size_prime_index];
+  return htab_mod_1 (hash, p->prime, p->inv, p->shift);
+}
+
+/* Compute the secondary hash for HASH given HTAB's current size.  */
+
+static inline hashval_t
+htab_mod_m2 (hashval_t hash, htab_t htab)
+{
+  const struct prime_ent *p = &prime_tab[htab->size_prime_index];
+  return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift);
+}
+
+/* Create hash table of size SIZE.  */
+
+static htab_t
+htab_create (size_t size)
+{
+  htab_t result;
+  unsigned int size_prime_index;
+
+  size_prime_index = higher_prime_index (size);
+  size = prime_tab[size_prime_index].prime;
+
+  result = (htab_t) htab_alloc (sizeof (struct htab)
+				+ size * sizeof (hash_entry_type));
+  result->size = size;
+  result->n_elements = 0;
+  result->n_deleted = 0;
+  result->size_prime_index = size_prime_index;
+  memset (result->entries, 0, size * sizeof (hash_entry_type));
+  return result;
+}
+
+/* Similar to htab_find_slot, but without several unwanted side effects:
+    - Does not call htab_eq when it finds an existing entry.
+    - Does not change the count of elements in the hash table.
+   This function also assumes there are no deleted entries in the table.
+   HASH is the hash value for the element to be inserted.  */
+
+static hash_entry_type *
+find_empty_slot_for_expand (htab_t htab, hashval_t hash)
+{
+  hashval_t index = htab_mod (hash, htab);
+  size_t size = htab_size (htab);
+  hash_entry_type *slot = htab->entries + index;
+  hashval_t hash2;
+
+  if (*slot == HTAB_EMPTY_ENTRY)
+    return slot;
+  else if (*slot == HTAB_DELETED_ENTRY)
+    abort ();
+
+  hash2 = htab_mod_m2 (hash, htab);
+  for (;;)
+    {
+      index += hash2;
+      if (index >= size)
+	index -= size;
+
+      slot = htab->entries + index;
+      if (*slot == HTAB_EMPTY_ENTRY)
+	return slot;
+      else if (*slot == HTAB_DELETED_ENTRY)
+	abort ();
+    }
+}
+
+/* The following function changes size of memory allocated for the
+   entries and repeatedly inserts the table elements.  The occupancy
+   of the table after the call will be about 50%.  Naturally the hash
+   table must already exist.  Remember also that the place of the
+   table entries is changed.  */
+
+static htab_t
+htab_expand (htab_t htab)
+{
+  htab_t nhtab;
+  hash_entry_type *olimit;
+  hash_entry_type *p;
+  size_t osize, elts;
+
+  osize = htab->size;
+  olimit = htab->entries + osize;
+  elts = htab_elements (htab);
+
+  /* Resize only when table after removal of unused elements is either
+     too full or too empty.  */
+  if (elts * 2 > osize || (elts * 8 < osize && osize > 32))
+    nhtab = htab_create (elts * 2);
+  else
+    nhtab = htab_create (osize - 1);
+  nhtab->n_elements = htab->n_elements - htab->n_deleted;
+
+  p = htab->entries;
+  do
+    {
+      hash_entry_type x = *p;
+
+      if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
+	*find_empty_slot_for_expand (nhtab, htab_hash (x)) = x;
+
+      p++;
+    }
+  while (p < olimit);
+
+  htab_free (htab);
+  return nhtab;
+}
+
+/* This function searches for a hash table entry equal to the given
+   element.  It cannot be used to insert or delete an element.  */
+
+static hash_entry_type
+htab_find (htab_t htab, const hash_entry_type element)
+{
+  hashval_t index, hash2, hash = htab_hash (element);
+  size_t size;
+  hash_entry_type entry;
+
+  size = htab_size (htab);
+  index = htab_mod (hash, htab);
+
+  entry = htab->entries[index];
+  if (entry == HTAB_EMPTY_ENTRY
+      || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element)))
+    return entry;
+
+  hash2 = htab_mod_m2 (hash, htab);
+  for (;;)
+    {
+      index += hash2;
+      if (index >= size)
+	index -= size;
+
+      entry = htab->entries[index];
+      if (entry == HTAB_EMPTY_ENTRY
+	  || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element)))
+	return entry;
+    }
+}
+
+/* This function searches for a hash table slot containing an entry
+   equal to the given element.  To delete an entry, call this with
+   insert=NO_INSERT, then call htab_clear_slot on the slot returned
+   (possibly after doing some checks).  To insert an entry, call this
+   with insert=INSERT, then write the value you want into the returned
+   slot.  */
+
+static hash_entry_type *
+htab_find_slot (htab_t *htabp, const hash_entry_type element,
+		enum insert_option insert)
+{
+  hash_entry_type *first_deleted_slot;
+  hashval_t index, hash2, hash = htab_hash (element);
+  size_t size;
+  hash_entry_type entry;
+  htab_t htab = *htabp;
+
+  size = htab_size (htab);
+  if (insert == INSERT && size * 3 <= htab->n_elements * 4)
+    {
+      htab = *htabp = htab_expand (htab);
+      size = htab_size (htab);
+    }
+
+  index = htab_mod (hash, htab);
+
+  first_deleted_slot = NULL;
+
+  entry = htab->entries[index];
+  if (entry == HTAB_EMPTY_ENTRY)
+    goto empty_entry;
+  else if (entry == HTAB_DELETED_ENTRY)
+    first_deleted_slot = &htab->entries[index];
+  else if (htab_eq (entry, element))
+    return &htab->entries[index];
+
+  hash2 = htab_mod_m2 (hash, htab);
+  for (;;)
+    {
+      index += hash2;
+      if (index >= size)
+	index -= size;
+
+      entry = htab->entries[index];
+      if (entry == HTAB_EMPTY_ENTRY)
+	goto empty_entry;
+      else if (entry == HTAB_DELETED_ENTRY)
+	{
+	  if (!first_deleted_slot)
+	    first_deleted_slot = &htab->entries[index];
+	}
+      else if (htab_eq (entry, element))
+	return &htab->entries[index];
+    }
+
+ empty_entry:
+  if (insert == NO_INSERT)
+    return NULL;
+
+  if (first_deleted_slot)
+    {
+      htab->n_deleted--;
+      *first_deleted_slot = HTAB_EMPTY_ENTRY;
+      return first_deleted_slot;
+    }
+
+  htab->n_elements++;
+  return &htab->entries[index];
+}
+
+/* This function clears a specified slot in a hash table.  It is
+   useful when you've already done the lookup and don't want to do it
+   again.  */
+
+static inline void
+htab_clear_slot (htab_t htab, hash_entry_type *slot)
+{
+  if (slot < htab->entries || slot >= htab->entries + htab_size (htab)
+      || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
+    abort ();
+
+  *slot = HTAB_DELETED_ENTRY;
+  htab->n_deleted++;
+}
+
+/* Returns a hash code for pointer P. Simplified version of evahash */
+
+static inline hashval_t
+hash_pointer (const void *p)
+{
+  uintptr_t v = (uintptr_t) p;
+  if (sizeof (v) > sizeof (hashval_t))
+    v ^= v >> (sizeof (uintptr_t) / 2 * __CHAR_BIT__);
+  return v;
+}
--- libgomp/task.c.jj	2013-09-26 09:43:10.903930832 +0200
+++ libgomp/task.c	2013-09-26 19:40:17.092019688 +0200
@@ -29,6 +29,33 @@
 #include <stdlib.h>
 #include <string.h>
 
+typedef struct gomp_task_depend_entry *hash_entry_type;
+
+static inline void *
+htab_alloc (size_t size)
+{
+  return gomp_malloc (size);
+}
+
+static inline void
+htab_free (void *ptr)
+{
+  free (ptr);
+}
+
+#include "hashtab.h"
+
+static inline hashval_t
+htab_hash (hash_entry_type element)
+{
+  return hash_pointer (element->addr);
+}
+
+static inline bool
+htab_eq (hash_entry_type x, hash_entry_type y)
+{
+  return x->addr == y->addr;
+}
 
 /* Create a new task data structure.  */
 
@@ -45,6 +72,9 @@ gomp_init_task (struct gomp_task *task,
   task->copy_ctors_done = false;
   task->children = NULL;
   task->taskgroup = NULL;
+  task->dependers = NULL;
+  task->depend_hash = NULL;
+  task->depend_count = 0;
   gomp_sem_init (&task->taskwait_sem, 0);
 }
 
@@ -80,7 +110,8 @@ gomp_clear_parent (struct gomp_task *chi
 
 void
 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
-	   long arg_size, long arg_align, bool if_clause, unsigned flags)
+	   long arg_size, long arg_align, bool if_clause, unsigned flags,
+	   void **depend)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
@@ -108,6 +139,38 @@ GOMP_task (void (*fn) (void *), void *da
     {
       struct gomp_task task;
 
+      /* If there are depend clauses and earlier deferred sibling tasks
+	 with depend clauses, check if there isn't a dependency.  If there
+	 is, fall through to the deferred task handling, as we can't
+	 schedule such tasks right away.  There is no need to handle
+	 depend clauses for non-deferred tasks other than this, because
+	 the parent task is suspended until the child task finishes and thus
+	 it can't start further child tasks.  */
+      if ((flags & 8) && thr->task && thr->task->depend_hash)
+	{
+	  struct gomp_task *parent = thr->task;
+	  struct gomp_task_depend_entry elem, *ent = NULL;
+	  size_t ndepend = (uintptr_t) depend[0];
+	  size_t nout = (uintptr_t) depend[1];
+	  size_t i;
+	  gomp_mutex_lock (&team->task_lock);
+	  for (i = 0; i < ndepend; i++)
+	    {
+	      elem.addr = depend[i + 2];
+	      ent = htab_find (parent->depend_hash, &elem);
+	      for (; ent; ent = ent->next)
+		if (i >= nout && ent->is_in)
+		  continue;
+		else
+		  break;
+	      if (ent)
+		break;
+	    }
+	  gomp_mutex_unlock (&team->task_lock);
+	  if (ent)
+	    goto defer;
+	}
+
       gomp_init_task (&task, thr->task, gomp_icv (false));
       task.kind = GOMP_TASK_IFFALSE;
       task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
@@ -146,14 +209,20 @@ GOMP_task (void (*fn) (void *), void *da
     }
   else
     {
+     defer:;
       struct gomp_task *task;
       struct gomp_task *parent = thr->task;
       struct gomp_taskgroup *taskgroup = parent->taskgroup;
       char *arg;
       bool do_wake;
+      size_t depend_size = 0;
 
-      task = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
-      arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
+      if (flags & 8)
+	depend_size = ((uintptr_t) depend[0]
+		       * sizeof (struct gomp_task_depend_entry));
+      task = gomp_malloc (sizeof (*task) + depend_size
+			  + arg_size + arg_align - 1);
+      arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
 		      & ~(uintptr_t) (arg_align - 1));
       gomp_init_task (task, parent, gomp_icv (false));
       task->kind = GOMP_TASK_IFFALSE;
@@ -171,7 +240,6 @@ GOMP_task (void (*fn) (void *), void *da
       task->kind = GOMP_TASK_WAITING;
       task->fn = fn;
       task->fn_data = arg;
-      task->in_tied_task = true;
       task->final_task = (flags & 2) >> 1;
       gomp_mutex_lock (&team->task_lock);
       /* If parallel or taskgroup has been cancelled, don't start new
@@ -185,6 +253,99 @@ GOMP_task (void (*fn) (void *), void *da
 	  free (task);
 	  return;
 	}
+      if (taskgroup)
+	taskgroup->num_children++;
+      if (depend_size)
+	{
+	  size_t ndepend = (uintptr_t) depend[0];
+	  size_t nout = (uintptr_t) depend[1];
+	  size_t i;
+	  hash_entry_type ent;
+
+	  task->depend_count = ndepend;
+	  task->num_dependees = 0;
+	  if (parent->depend_hash == NULL)
+	    parent->depend_hash
+	      = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
+	  for (i = 0; i < ndepend; i++)
+	    {
+	      task->depend[i].addr = depend[2 + i];
+	      task->depend[i].next = NULL;
+	      task->depend[i].prev = NULL;
+	      task->depend[i].task = task;
+	      task->depend[i].is_in = i >= nout;
+	      task->depend[i].redundant = false;
+	      hash_entry_type *slot
+		= htab_find_slot (&parent->depend_hash, &task->depend[i],
+				  INSERT);
+	      if (*slot)
+		{
+		  /* If multiple depends on the same task are the
+		     same, all but the first one are redundant.
+		     As inout/out come first, if any of them is
+		     inout/out, it will win, which is the right
+		     semantics.  */
+		  if ((*slot)->task == task)
+		    {
+		      task->depend[i].redundant = true;
+		      continue;
+		    }
+		  for (ent = *slot; ent; ent = ent->next)
+		    {
+		      /* depend(in:...) doesn't depend on earlier
+			 depend(in:...).  */
+		      if (i >= nout && ent->is_in)
+			continue;
+		      struct gomp_task *tsk = ent->task;
+		      if (tsk->dependers == NULL)
+			{
+			  tsk->dependers
+			    = gomp_malloc (8 * sizeof (struct gomp_task *));
+			  tsk->dependers[0]
+			    = (struct gomp_task *) (uintptr_t) 1;
+			  tsk->dependers[1]
+			    = (struct gomp_task *) (uintptr_t) (8 - 2);
+			  tsk->dependers[2] = task;
+			  task->num_dependees++;
+			  continue;
+			}
+		      /* We already have some other dependency on tsk
+			 from earlier depend clause.  */
+		      else if (tsk->dependers[0]
+			       && (tsk->dependers[((uintptr_t)
+						   tsk->dependers[0]) + 1]
+				   == task))
+			continue;
+		      else if ((uintptr_t) tsk->dependers[0]
+			       == (uintptr_t) tsk->dependers[1])
+			{
+			  size_t count = ((uintptr_t) tsk->dependers[1]
+					  + 2) * 2;
+			  tsk->dependers
+			    = gomp_realloc (tsk->dependers,
+					    count
+					    * sizeof (struct gomp_task *));
+			  tsk->dependers[1]
+			    = (struct gomp_task *) (uintptr_t) (count - 2);
+			}
+		      tsk->dependers[((uintptr_t) tsk->dependers[0]) + 2]
+			= task;
+		      tsk->dependers[0]
+			= (struct gomp_task *)
+			  (((uintptr_t) tsk->dependers[0]) + 1);
+		      task->num_dependees++;
+		    }
+		  task->depend[i].next = *slot;
+		  (*slot)->prev = &task->depend[i];
+		}
+	      *slot = &task->depend[i];
+	    }
+	  if (task->num_dependees)
+	    {
+	      gomp_mutex_unlock (&team->task_lock);
+	      return;
+	    }
+	}
       if (parent->children)
 	{
 	  task->next_child = parent->children;
@@ -259,12 +420,133 @@ gomp_task_run_pre (struct gomp_task *chi
        || (taskgroup && taskgroup->cancelled))
       && !child_task->copy_ctors_done)
     return true;
-  team->task_running_count++;
-  if (team->task_count == team->task_running_count)
-    gomp_team_barrier_clear_task_pending (&team->barrier);
   return false;
 }
 
+static void
+gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
+{
+  struct gomp_task *parent = child_task->parent;
+  size_t i;
+
+  for (i = 0; i < child_task->depend_count; i++)
+    if (!child_task->depend[i].redundant)
+      {
+	if (child_task->depend[i].next)
+	  child_task->depend[i].next->prev = child_task->depend[i].prev;
+	if (child_task->depend[i].prev)
+	  child_task->depend[i].prev->next = child_task->depend[i].next;
+	else
+	  {
+	    hash_entry_type *slot
+	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
+				NO_INSERT);
+	    if (*slot != &child_task->depend[i])
+	      abort ();
+	    if (child_task->depend[i].next)
+	      *slot = child_task->depend[i].next;
+	    else
+	      htab_clear_slot (parent->depend_hash, slot);
+	  }
+      }
+}
+
+static size_t
+gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
+				     struct gomp_team *team)
+{
+  struct gomp_task *parent = child_task->parent;
+  size_t i, count = (uintptr_t) child_task->dependers[0], ret = 0;
+  for (i = 0; i < count; i++)
+    {
+      struct gomp_task *task = child_task->dependers[i + 2];
+      if (--task->num_dependees != 0)
+	continue;
+
+      struct gomp_taskgroup *taskgroup = task->taskgroup;
+      if (parent)
+	{
+	  if (parent->children)
+	    {
+	      task->next_child = parent->children;
+	      task->prev_child = parent->children->prev_child;
+	      task->next_child->prev_child = task;
+	      task->prev_child->next_child = task;
+	    }
+	  else
+	    {
+	      task->next_child = task;
+	      task->prev_child = task;
+	    }
+	  parent->children = task;
+	  if (parent->in_taskwait)
+	    {
+	      parent->in_taskwait = false;
+	      gomp_sem_post (&parent->taskwait_sem);
+	    }
+	}
+      if (taskgroup)
+	{
+	  if (taskgroup->children)
+	    {
+	      task->next_taskgroup = taskgroup->children;
+	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+	      task->next_taskgroup->prev_taskgroup = task;
+	      task->prev_taskgroup->next_taskgroup = task;
+	    }
+	  else
+	    {
+	      task->next_taskgroup = task;
+	      task->prev_taskgroup = task;
+	    }
+	  taskgroup->children = task;
+	  if (taskgroup->in_taskgroup_wait)
+	    {
+	      taskgroup->in_taskgroup_wait = false;
+	      gomp_sem_post (&taskgroup->taskgroup_sem);
+	    }
+	}
+      if (team->task_queue)
+	{
+	  task->next_queue = team->task_queue;
+	  task->prev_queue = team->task_queue->prev_queue;
+	  task->next_queue->prev_queue = task;
+	  task->prev_queue->next_queue = task;
+	}
+      else
+	{
+	  task->next_queue = task;
+	  task->prev_queue = task;
+	  team->task_queue = task;
+	}
+      ++team->task_count;
+      ++ret;
+    }
+  free (child_task->dependers);
+  child_task->dependers = NULL;
+  if (ret > 1)
+    gomp_team_barrier_set_task_pending (&team->barrier);
+  return ret;
+}
+
+static inline size_t
+gomp_task_run_post_handle_depend (struct gomp_task *child_task,
+				  struct gomp_team *team)
+{
+  if (child_task->depend_count == 0)
+    return 0;
+
+  /* If parent is gone already, the hash table is freed and nothing
+     will use the hash table anymore, no need to remove anything from it.  */
+  if (child_task->parent != NULL)
+    gomp_task_run_post_handle_depend_hash (child_task);
+
+  if (child_task->dependers == NULL)
+    return 0;
+
+  return gomp_task_run_post_handle_dependers (child_task, team);
+}
+
 static inline void
 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
 {
@@ -286,7 +568,10 @@ gomp_task_run_post_remove_parent (struct
 	 before the NULL is written.  */
       __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE);
       if (parent->in_taskwait)
-	gomp_sem_post (&parent->taskwait_sem);
+	{
+	  parent->in_taskwait = false;
+	  gomp_sem_post (&parent->taskwait_sem);
+	}
     }
 }
 
@@ -298,20 +583,29 @@ gomp_task_run_post_remove_taskgroup (str
     return;
   child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup;
   child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup;
-  if (taskgroup->children != child_task)
-    return;
-  if (child_task->next_taskgroup != child_task)
-    taskgroup->children = child_task->next_taskgroup;
+  if (taskgroup->num_children > 1)
+    --taskgroup->num_children;
   else
     {
-      /* We access task->children in GOMP_taskgroup_end
+      /* We access taskgroup->num_children in GOMP_taskgroup_end
 	 outside of the task lock mutex region, so
 	 need a release barrier here to ensure memory
 	 written by child_task->fn above is flushed
 	 before the NULL is written.  */
-      __atomic_store_n (&taskgroup->children, NULL, MEMMODEL_RELEASE);
+      __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
+    }
+  if (taskgroup->children != child_task)
+    return;
+  if (child_task->next_taskgroup != child_task)
+    taskgroup->children = child_task->next_taskgroup;
+  else
+    {
+      taskgroup->children = NULL;
       if (taskgroup->in_taskgroup_wait)
-	gomp_sem_post (&taskgroup->taskgroup_sem);
+	{
+	  taskgroup->in_taskgroup_wait = false;
+	  gomp_sem_post (&taskgroup->taskgroup_sem);
+	}
     }
 }
 
@@ -323,6 +617,7 @@ gomp_barrier_handle_tasks (gomp_barrier_
   struct gomp_task *task = thr->task;
   struct gomp_task *child_task = NULL;
   struct gomp_task *to_free = NULL;
+  int do_wake = 0;
 
   gomp_mutex_lock (&team->task_lock);
   if (gomp_barrier_last_thread (state))
@@ -355,8 +650,17 @@ gomp_barrier_handle_tasks (gomp_barrier_
 		}
 	      goto finish_cancelled;
 	    }
+	  team->task_running_count++;
+	  child_task->in_tied_task = true;
+	  if (team->task_count == team->task_running_count)
+	    gomp_team_barrier_clear_task_pending (&team->barrier);
 	}
       gomp_mutex_unlock (&team->task_lock);
+      if (do_wake)
+	{
+	  gomp_team_barrier_wake (&team->barrier, do_wake);
+	  do_wake = 0;
+	}
       if (to_free)
 	{
 	  gomp_finish_task (to_free);
@@ -374,7 +678,9 @@ gomp_barrier_handle_tasks (gomp_barrier_
       gomp_mutex_lock (&team->task_lock);
       if (child_task)
 	{
-	 finish_cancelled:
+	 finish_cancelled:;
+	  size_t new_tasks
+	    = gomp_task_run_post_handle_depend (child_task, team);
 	  gomp_task_run_post_remove_parent (child_task);
 	  gomp_clear_parent (child_task->children);
 	  gomp_task_run_post_remove_taskgroup (child_task);
@@ -382,6 +688,12 @@ gomp_barrier_handle_tasks (gomp_barrier_
 	  child_task = NULL;
 	  if (!cancelled)
 	    team->task_running_count--;
+	  if (new_tasks > 1)
+	    {
+	      do_wake = team->nthreads - team->task_running_count;
+	      if (do_wake > new_tasks)
+		do_wake = new_tasks;
+	    }
 	  if (--team->task_count == 0
 	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
 	    {
@@ -404,9 +716,10 @@ GOMP_taskwait (void)
   struct gomp_task *task = thr->task;
   struct gomp_task *child_task = NULL;
   struct gomp_task *to_free = NULL;
+  int do_wake = 0;
 
   /* The acquire barrier on load of task->children here synchronizes
-     with the write of a NULL in gomp_barrier_handle_tasks.  It is
+     with the write of a NULL in gomp_task_run_post_remove_parent.  It is
      not necessary that we synchronize with other non-NULL writes at
      this point, but we must ensure that all writes to memory by a
      child thread task work function are seen before we exit from
@@ -451,6 +764,11 @@ GOMP_taskwait (void)
 	   in other threads.  Wait for them.  */
 	task->in_taskwait = true;
       gomp_mutex_unlock (&team->task_lock);
+      if (do_wake)
+	{
+	  gomp_team_barrier_wake (&team->barrier, do_wake);
+	  do_wake = 0;
+	}
       if (to_free)
 	{
 	  gomp_finish_task (to_free);
@@ -464,15 +782,13 @@ GOMP_taskwait (void)
 	  thr->task = task;
 	}
       else
-	{
-	  gomp_sem_wait (&task->taskwait_sem);
-	  task->in_taskwait = false;
-	  return;
-	}
+	gomp_sem_wait (&task->taskwait_sem);
       gomp_mutex_lock (&team->task_lock);
       if (child_task)
 	{
-	 finish_cancelled:
+	 finish_cancelled:;
+	  size_t new_tasks
+	    = gomp_task_run_post_handle_depend (child_task, team);
 	  child_task->prev_child->next_child = child_task->next_child;
 	  child_task->next_child->prev_child = child_task->prev_child;
 	  if (task->children == child_task)
@@ -487,7 +803,13 @@ GOMP_taskwait (void)
 	  to_free = child_task;
 	  child_task = NULL;
 	  team->task_count--;
-	  team->task_running_count--;
+	  if (new_tasks > 1)
+	    {
+	      do_wake = team->nthreads - team->task_running_count
+			- !task->in_tied_task;
+	      if (do_wake > new_tasks)
+		do_wake = new_tasks;
+	    }
 	}
     }
 }
@@ -519,6 +841,7 @@ GOMP_taskgroup_start (void)
   taskgroup->children = NULL;
   taskgroup->in_taskgroup_wait = false;
   taskgroup->cancelled = false;
+  taskgroup->num_children = 0;
   gomp_sem_init (&taskgroup->taskgroup_sem, 0);
   task->taskgroup = taskgroup;
 }
@@ -532,18 +855,29 @@ GOMP_taskgroup_end (void)
   struct gomp_taskgroup *taskgroup;
   struct gomp_task *child_task = NULL;
   struct gomp_task *to_free = NULL;
+  int do_wake = 0;
 
   if (team == NULL)
     return;
   taskgroup = task->taskgroup;
-  if (__atomic_load_n (&taskgroup->children, MEMMODEL_ACQUIRE) == NULL)
+
+  /* The acquire barrier on load of taskgroup->num_children here
+     synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
+     It is not necessary that we synchronize with other non-0 writes at
+     this point, but we must ensure that all writes to memory by a
+     child thread task work function are seen before we exit from
+     GOMP_taskgroup_end.  */
+  if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
     goto finish;
+
   gomp_mutex_lock (&team->task_lock);
   while (1)
     {
       bool cancelled = false;
       if (taskgroup->children == NULL)
 	{
+	  if (taskgroup->num_children)
+	    goto do_wait;
 	  gomp_mutex_unlock (&team->task_lock);
 	  if (to_free)
 	    {
@@ -570,10 +904,18 @@ GOMP_taskgroup_end (void)
 	    }
 	}
       else
-	/* All tasks we are waiting for are already running
-	   in other threads.  Wait for them.  */
-	taskgroup->in_taskgroup_wait = true;
+	{
+	 do_wait:
+	  /* All tasks we are waiting for are already running
+	     in other threads.  Wait for them.  */
+	  taskgroup->in_taskgroup_wait = true;
+	}
       gomp_mutex_unlock (&team->task_lock);
+      if (do_wake)
+	{
+	  gomp_team_barrier_wake (&team->barrier, do_wake);
+	  do_wake = 0;
+	}
       if (to_free)
 	{
 	  gomp_finish_task (to_free);
@@ -587,19 +929,18 @@ GOMP_taskgroup_end (void)
 	  thr->task = task;
 	}
       else
-	{
-	  gomp_sem_wait (&taskgroup->taskgroup_sem);
-	  taskgroup->in_taskgroup_wait = false;
-	  goto finish;
-	}
+	gomp_sem_wait (&taskgroup->taskgroup_sem);
       gomp_mutex_lock (&team->task_lock);
       if (child_task)
 	{
-	 finish_cancelled:
+	 finish_cancelled:;
+	  size_t new_tasks
+	    = gomp_task_run_post_handle_depend (child_task, team);
 	  child_task->prev_taskgroup->next_taskgroup
 	    = child_task->next_taskgroup;
 	  child_task->next_taskgroup->prev_taskgroup
 	    = child_task->prev_taskgroup;
+	  --taskgroup->num_children;
 	  if (taskgroup->children == child_task)
 	    {
 	      if (child_task->next_taskgroup != child_task)
@@ -612,7 +953,13 @@ GOMP_taskgroup_end (void)
 	  to_free = child_task;
 	  child_task = NULL;
 	  team->task_count--;
-	  team->task_running_count--;
+	  if (new_tasks > 1)
+	    {
+	      do_wake = team->nthreads - team->task_running_count
+			- !task->in_tied_task;
+	      if (do_wake > new_tasks)
+		do_wake = new_tasks;
+	    }
 	}
     }
 
--- libgomp/testsuite/libgomp.c/depend-1.c.jj	2013-09-26 17:57:26.011983435 +0200
+++ libgomp/testsuite/libgomp.c/depend-1.c	2013-09-26 18:49:55.262270240 +0200
@@ -0,0 +1,154 @@
+#include <stdlib.h>
+
+void
+dep (void)
+{
+  int x = 1;
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+  }
+}
+
+void
+dep2 (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp taskwait
+  }
+}
+
+void
+firstpriv (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp task depend(out: x)
+    x = 2;
+    #pragma omp task depend(in: x)
+    if (x != 1)
+      abort ();
+  }
+}
+
+void
+antidep (void)
+{
+  int x = 1;
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp task shared(x) depend(in: x)
+    if (x != 1)
+      abort ();
+    #pragma omp task shared(x) depend(out: x)
+    x = 2;
+  }
+}
+
+void
+antidep2 (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp taskgroup
+    {
+      #pragma omp task shared(x) depend(in: x)
+      if (x != 1)
+	abort ();
+      #pragma omp task shared(x) depend(out: x)
+      x = 2;
+    }
+  }
+}
+
+void
+outdep (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 0;
+    #pragma omp task shared(x) depend(out: x)
+    x = 1;
+    #pragma omp task shared(x) depend(out: x)
+    x = 2;
+    #pragma omp taskwait
+    if (x != 2)
+      abort ();
+  }
+}
+
+void
+concurrent (void)
+{
+  int x = 1;
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+  }
+}
+
+void
+concurrent2 (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp taskwait
+  }
+}
+
+int
+main ()
+{
+  dep ();
+  dep2 ();
+  firstpriv ();
+  antidep ();
+  antidep2 ();
+  outdep ();
+  concurrent ();
+  concurrent2 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-2.c.jj	2013-09-26 18:56:19.808294100 +0200
+++ libgomp/testsuite/libgomp.c/depend-2.c	2013-09-26 19:46:29.732123749 +0200
@@ -0,0 +1,71 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+void
+foo (int do_sleep)
+{
+  int a[64], i, *p = a + 4, x = 0;
+  asm volatile ("" : "+r" (p));
+  for (i = 0; i < 64; i++)
+    a[i] = i + 8;
+  #pragma omp parallel private (i)
+  {
+    #pragma omp single nowait
+    {
+      for (i = 0; i < 8; i++)
+	{
+	  #pragma omp task depend(out: a[i * 8 : 4])
+	    a[i * 8] += (i + 2) * 9;
+	  #pragma omp task depend(out: p[i * 8 : 2])
+	    p[i * 8] += (i + 3) * 10;
+	  #pragma omp task depend(out: x)
+	    x = 1;
+	}
+      for (i = 0; i < 8; i++)
+	#pragma omp task depend(in: a[i * 8 : 4]) \
+			 depend(inout: a[i * 8 + 4 : 2]) \
+			 depend(in: a[0 : 4]) depend(in: x)
+	{
+	  if (a[0] != 8 + 2 * 9 || x != 1)
+	    abort ();
+	  if (a[i * 8] != i * 8 + 8 + (i + 2) * 9)
+	    abort ();
+	  if (a[4 + i * 8] != 4 + i * 8 + 8 + (i + 3) * 10)
+	    abort ();
+	  p[i * 8] += a[i * 8];
+	}
+      for (i = 0; i < 8; i++)
+	#pragma omp task depend(inout: a[i * 8 : 4]) \
+			 depend(in: p[i * 8 : 2]) \
+			 depend(in: p[0 : 2], x)
+	{
+	  if (p[0] != 4 + 8 + 3 * 10 + 0 + 8 + 2 * 9 || x != 1)
+	    abort ();
+	  if (a[i * 8] != i * 8 + 8 + (i + 2) * 9)
+	    abort ();
+	  if (a[4 + i * 8] != (4 + i * 8 + 8 + (i + 3) * 10
+			       + i * 8 + 8 + (i + 2) * 9))
+	    abort ();
+	  a[i * 8] += 2;
+	}
+      for (i = 0; i < 4; i++)
+	#pragma omp task depend(in: a[i * 16 : 4], a[i * 16 + 8 : 4], x)
+	{
+	  if (a[i * 16] != i * 16 + 8 + (2 * i + 2) * 9 + 2 || x != 1)
+	    abort ();
+	  if (p[i * 16 + 4] != i * 16 + 8 + 8 + (2 * i + 1 + 2) * 9 + 2)
+	    abort ();
+	}
+    }
+    if (do_sleep)
+      sleep (1);
+  }
+}
+
+int
+main ()
+{
+  foo (1);
+  foo (0);
+  return 0;
+}


	Jakub



More information about the Gcc-patches mailing list