This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [gomp4] Library side of depend clause support
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Henderson <rth at redhat dot com>
- Cc: Torvald Riegel <triegel at redhat dot com>, gcc-patches at gcc dot gnu dot org
- Date: Fri, 27 Sep 2013 12:08:12 +0200
- Subject: Re: [gomp4] Library side of depend clause support
- Authentication-results: sourceware.org; auth=none
- References: <20130926183624 dot GI30970 at tucnak dot zalov dot cz> <5244BB11 dot 2030708 at redhat dot com> <20130926234836 dot GJ30970 at tucnak dot zalov dot cz>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
On Fri, Sep 27, 2013 at 01:48:36AM +0200, Jakub Jelinek wrote:
> Perhaps. What if I do just minor cleanup (use flexible array members for
> the reallocated vectors, and perhaps keep only the last out/inout task
> in the hash table chains rather than all of them), retest, commit and then
> we can discuss/incrementally improve it?
Here is what I've committed now, the incremental changes were really only
using a structure with flex array member for the dependers vectors,
removing/making redundant earlier !ent->is_in when adding !is_in into the
chain and addition of new testcases.
Let's improve it incrementally later.
2013-09-27 Jakub Jelinek <jakub@redhat.com>
* libgomp.h: Include stdlib.h.
(struct gomp_task_depend_entry,
struct gomp_dependers_vec): New types.
(struct gomp_task): Add dependers, depend_hash, depend_count,
num_dependees and depend fields.
(struct gomp_taskgroup): Add num_children field.
(gomp_finish_task): Free depend_hash if non-NULL.
* libgomp_g.h (GOMP_task): Add depend argument.
* hashtab.h: New file.
* task.c: Include hashtab.h.
(hash_entry_type): New typedef.
(htab_alloc, htab_free, htab_hash, htab_eq): New inlines.
(gomp_init_task): Clear dependers, depend_hash and depend_count
fields.
(GOMP_task): Add depend argument, handle depend clauses. Increment
num_children field in taskgroup.
(gomp_task_run_pre): Don't increment task_running_count here,
nor clear task_pending bit.
(gomp_task_run_post_handle_depend_hash,
gomp_task_run_post_handle_dependers,
gomp_task_run_post_handle_depend): New functions.
(gomp_task_run_post_remove_parent): Clear in_taskwait before
signalling corresponding semaphore.
(gomp_task_run_post_remove_taskgroup): Decrement num_children
field and make the decrement to 0 MEMMODEL_RELEASE operation,
rather than storing NULL to taskgroup->children. Clear
in_taskgroup_wait before signalling corresponding semaphore.
(gomp_barrier_handle_tasks): Move task_running_count increment
and task_pending bit clearing here. Call
gomp_task_run_post_handle_depend. If more than one new tasks
have been queued, wake other threads if needed.
(GOMP_taskwait): Call gomp_task_run_post_handle_depend. If more
than one new tasks have been queued, wake other threads if needed.
After waiting on taskwait_sem, enter critical section again.
(GOMP_taskgroup_start): Initialize num_children field.
(GOMP_taskgroup_end): Check num_children instead of children
before critical section. If children is NULL, but num_children
is non-zero, wait on taskgroup_sem. Call
gomp_task_run_post_handle_depend. If more than one new tasks have
been queued, wake other threads if needed. After waiting on
taskgroup_sem, enter critical section again.
* testsuite/libgomp.c/depend-1.c: New test.
* testsuite/libgomp.c/depend-2.c: New test.
* testsuite/libgomp.c/depend-3.c: New test.
* testsuite/libgomp.c/depend-4.c: New test.
--- libgomp/libgomp.h.jj 2013-09-26 09:43:10.903930832 +0200
+++ libgomp/libgomp.h 2013-09-27 09:05:17.025402127 +0200
@@ -39,6 +39,7 @@
#include <pthread.h>
#include <stdbool.h>
+#include <stdlib.h>
#ifdef HAVE_ATTRIBUTE_VISIBILITY
# pragma GCC visibility push(hidden)
@@ -253,7 +254,26 @@ enum gomp_task_kind
GOMP_TASK_TIED
};
+struct gomp_task;
struct gomp_taskgroup;
+struct htab;
+
+struct gomp_task_depend_entry
+{
+ void *addr;
+ struct gomp_task_depend_entry *next;
+ struct gomp_task_depend_entry *prev;
+ struct gomp_task *task;
+ bool is_in;
+ bool redundant;
+};
+
+struct gomp_dependers_vec
+{
+ size_t n_elem;
+ size_t allocated;
+ struct gomp_task *elem[];
+};
/* This structure describes a "task" to be run by a thread. */
@@ -268,6 +288,10 @@ struct gomp_task
struct gomp_task *next_taskgroup;
struct gomp_task *prev_taskgroup;
struct gomp_taskgroup *taskgroup;
+ struct gomp_dependers_vec *dependers;
+ struct htab *depend_hash;
+ size_t depend_count;
+ size_t num_dependees;
struct gomp_task_icv icv;
void (*fn) (void *);
void *fn_data;
@@ -277,6 +301,7 @@ struct gomp_task
bool final_task;
bool copy_ctors_done;
gomp_sem_t taskwait_sem;
+ struct gomp_task_depend_entry depend[];
};
struct gomp_taskgroup
@@ -286,6 +311,7 @@ struct gomp_taskgroup
bool in_taskgroup_wait;
bool cancelled;
gomp_sem_t taskgroup_sem;
+ size_t num_children;
};
/* This structure describes a "team" of threads. These are the threads
@@ -525,6 +551,8 @@ extern void gomp_barrier_handle_tasks (g
static void inline
gomp_finish_task (struct gomp_task *task)
{
+ if (__builtin_expect (task->depend_hash != NULL, 0))
+ free (task->depend_hash);
gomp_sem_destroy (&task->taskwait_sem);
}
--- libgomp/libgomp_g.h.jj 2013-09-26 09:43:10.902930838 +0200
+++ libgomp/libgomp_g.h 2013-09-26 10:08:44.820160094 +0200
@@ -178,7 +178,7 @@ extern bool GOMP_cancellation_point (int
/* task.c */
extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *),
- long, long, bool, unsigned);
+ long, long, bool, unsigned, void **);
extern void GOMP_taskwait (void);
extern void GOMP_taskyield (void);
extern void GOMP_taskgroup_start (void);
--- libgomp/hashtab.h.jj 2013-09-26 10:08:51.031128932 +0200
+++ libgomp/hashtab.h 2013-09-26 21:07:17.757697391 +0200
@@ -0,0 +1,443 @@
+/* An expandable hash tables datatype.
+ Copyright (C) 1999-2013
+ Free Software Foundation, Inc.
+ Contributed by Vladimir Makarov <vmakarov@cygnus.com>.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* The hash table code copied from include/hashtab.[hc] and adjusted,
+ so that the hash table entries are in the flexible array at the end
+ of the control structure, no callbacks are used and the elements in the
+ table are of the hash_entry_type type.
+ Before including this file, define hash_entry_type type and
+ htab_alloc and htab_free functions. After including it, define
+ htab_hash and htab_eq inline functions. */
+
+/* This package implements basic hash table functionality. It is possible
+ to search for an entry, create an entry and destroy an entry.
+
+ Elements in the table are generic pointers.
+
+ The size of the table is not fixed; if the occupancy of the table
+ grows too high the hash table will be expanded.
+
+ The abstract data implementation is based on generalized Algorithm D
+ from Knuth's book "The art of computer programming". Hash table is
+ expanded by creation of new hash table and transferring elements from
+ the old table to the new table. */
+
+/* The type for a hash code. */
+typedef unsigned int hashval_t;
+
+static inline hashval_t htab_hash (hash_entry_type);
+static inline bool htab_eq (hash_entry_type, hash_entry_type);
+
+/* This macro defines reserved value for empty table entry. */
+
+#define HTAB_EMPTY_ENTRY ((hash_entry_type) 0)
+
+/* This macro defines reserved value for table entry which contained
+ a deleted element. */
+
+#define HTAB_DELETED_ENTRY ((hash_entry_type) 1)
+
+/* Hash tables are of the following type. The structure
+ (implementation) of this type is not needed for using the hash
+ tables. All work with hash table should be executed only through
+ functions mentioned below. The size of this structure is subject to
+ change. */
+
+struct htab {
+ /* Current size (in entries) of the hash table. */
+ size_t size;
+
+ /* Current number of elements including also deleted elements. */
+ size_t n_elements;
+
+ /* Current number of deleted elements in the table. */
+ size_t n_deleted;
+
+ /* Current size (in entries) of the hash table, as an index into the
+ table of primes. */
+ unsigned int size_prime_index;
+
+ /* Table itself. */
+ hash_entry_type entries[];
+};
+
+typedef struct htab *htab_t;
+
+/* An enum saying whether we insert into the hash table or not. */
+enum insert_option {NO_INSERT, INSERT};
+
+/* Table of primes and multiplicative inverses.
+
+ Note that these are not minimally reduced inverses. Unlike when generating
+ code to divide by a constant, we want to be able to use the same algorithm
+ all the time. All of these inverses (are implied to) have bit 32 set.
+
+ For the record, the function that computed the table is in
+ libiberty/hashtab.c. */
+
+struct prime_ent
+{
+ hashval_t prime;
+ hashval_t inv;
+ hashval_t inv_m2; /* inverse of prime-2 */
+ hashval_t shift;
+};
+
+static struct prime_ent const prime_tab[] = {
+ { 7, 0x24924925, 0x9999999b, 2 },
+ { 13, 0x3b13b13c, 0x745d1747, 3 },
+ { 31, 0x08421085, 0x1a7b9612, 4 },
+ { 61, 0x0c9714fc, 0x15b1e5f8, 5 },
+ { 127, 0x02040811, 0x0624dd30, 6 },
+ { 251, 0x05197f7e, 0x073260a5, 7 },
+ { 509, 0x01824366, 0x02864fc8, 8 },
+ { 1021, 0x00c0906d, 0x014191f7, 9 },
+ { 2039, 0x0121456f, 0x0161e69e, 10 },
+ { 4093, 0x00300902, 0x00501908, 11 },
+ { 8191, 0x00080041, 0x00180241, 12 },
+ { 16381, 0x000c0091, 0x00140191, 13 },
+ { 32749, 0x002605a5, 0x002a06e6, 14 },
+ { 65521, 0x000f00e2, 0x00110122, 15 },
+ { 131071, 0x00008001, 0x00018003, 16 },
+ { 262139, 0x00014002, 0x0001c004, 17 },
+ { 524287, 0x00002001, 0x00006001, 18 },
+ { 1048573, 0x00003001, 0x00005001, 19 },
+ { 2097143, 0x00004801, 0x00005801, 20 },
+ { 4194301, 0x00000c01, 0x00001401, 21 },
+ { 8388593, 0x00001e01, 0x00002201, 22 },
+ { 16777213, 0x00000301, 0x00000501, 23 },
+ { 33554393, 0x00001381, 0x00001481, 24 },
+ { 67108859, 0x00000141, 0x000001c1, 25 },
+ { 134217689, 0x000004e1, 0x00000521, 26 },
+ { 268435399, 0x00000391, 0x000003b1, 27 },
+ { 536870909, 0x00000019, 0x00000029, 28 },
+ { 1073741789, 0x0000008d, 0x00000095, 29 },
+ { 2147483647, 0x00000003, 0x00000007, 30 },
+ /* Avoid "decimal constant so large it is unsigned" for 4294967291. */
+ { 0xfffffffb, 0x00000006, 0x00000008, 31 }
+};
+
+/* The following function returns an index into the above table of the
+ nearest prime number which is greater than N, and near a power of two. */
+
+static unsigned int
+higher_prime_index (unsigned long n)
+{
+ unsigned int low = 0;
+ unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]);
+
+ while (low != high)
+ {
+ unsigned int mid = low + (high - low) / 2;
+ if (n > prime_tab[mid].prime)
+ low = mid + 1;
+ else
+ high = mid;
+ }
+
+ /* If we've run out of primes, abort. */
+ if (n > prime_tab[low].prime)
+ abort ();
+
+ return low;
+}
+
+/* Return the current size of given hash table. */
+
+static inline size_t
+htab_size (htab_t htab)
+{
+ return htab->size;
+}
+
+/* Return the current number of elements in given hash table. */
+
+static inline size_t
+htab_elements (htab_t htab)
+{
+ return htab->n_elements - htab->n_deleted;
+}
+
+/* Return X % Y. */
+
+static inline hashval_t
+htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift)
+{
+ /* The multiplicative inverses computed above are for 32-bit types, and
+ requires that we be able to compute a highpart multiply. */
+ if (sizeof (hashval_t) * __CHAR_BIT__ <= 32)
+ {
+ hashval_t t1, t2, t3, t4, q, r;
+
+ t1 = ((unsigned long long)x * inv) >> 32;
+ t2 = x - t1;
+ t3 = t2 >> 1;
+ t4 = t1 + t3;
+ q = t4 >> shift;
+ r = x - (q * y);
+
+ return r;
+ }
+
+ /* Otherwise just use the native division routines. */
+ return x % y;
+}
+
+/* Compute the primary hash for HASH given HTAB's current size. */
+
+static inline hashval_t
+htab_mod (hashval_t hash, htab_t htab)
+{
+ const struct prime_ent *p = &prime_tab[htab->size_prime_index];
+ return htab_mod_1 (hash, p->prime, p->inv, p->shift);
+}
+
+/* Compute the secondary hash for HASH given HTAB's current size. */
+
+static inline hashval_t
+htab_mod_m2 (hashval_t hash, htab_t htab)
+{
+ const struct prime_ent *p = &prime_tab[htab->size_prime_index];
+ return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift);
+}
+
+/* Create hash table of size SIZE. */
+
+static htab_t
+htab_create (size_t size)
+{
+ htab_t result;
+ unsigned int size_prime_index;
+
+ size_prime_index = higher_prime_index (size);
+ size = prime_tab[size_prime_index].prime;
+
+ result = (htab_t) htab_alloc (sizeof (struct htab)
+ + size * sizeof (hash_entry_type));
+ result->size = size;
+ result->n_elements = 0;
+ result->n_deleted = 0;
+ result->size_prime_index = size_prime_index;
+ memset (result->entries, 0, size * sizeof (hash_entry_type));
+ return result;
+}
+
+/* Similar to htab_find_slot, but without several unwanted side effects:
+ - Does not call htab_eq when it finds an existing entry.
+ - Does not change the count of elements in the hash table.
+ This function also assumes there are no deleted entries in the table.
+ HASH is the hash value for the element to be inserted. */
+
+static hash_entry_type *
+find_empty_slot_for_expand (htab_t htab, hashval_t hash)
+{
+ hashval_t index = htab_mod (hash, htab);
+ size_t size = htab_size (htab);
+ hash_entry_type *slot = htab->entries + index;
+ hashval_t hash2;
+
+ if (*slot == HTAB_EMPTY_ENTRY)
+ return slot;
+ else if (*slot == HTAB_DELETED_ENTRY)
+ abort ();
+
+ hash2 = htab_mod_m2 (hash, htab);
+ for (;;)
+ {
+ index += hash2;
+ if (index >= size)
+ index -= size;
+
+ slot = htab->entries + index;
+ if (*slot == HTAB_EMPTY_ENTRY)
+ return slot;
+ else if (*slot == HTAB_DELETED_ENTRY)
+ abort ();
+ }
+}
+
+/* The following function changes size of memory allocated for the
+ entries and repeatedly inserts the table elements. The occupancy
+ of the table after the call will be about 50%. Naturally the hash
+ table must already exist. Remember also that the place of the
+ table entries is changed. */
+
+static htab_t
+htab_expand (htab_t htab)
+{
+ htab_t nhtab;
+ hash_entry_type *olimit;
+ hash_entry_type *p;
+ size_t osize, elts;
+
+ osize = htab->size;
+ olimit = htab->entries + osize;
+ elts = htab_elements (htab);
+
+ /* Resize only when table after removal of unused elements is either
+ too full or too empty. */
+ if (elts * 2 > osize || (elts * 8 < osize && osize > 32))
+ nhtab = htab_create (elts * 2);
+ else
+ nhtab = htab_create (osize - 1);
+ nhtab->n_elements = htab->n_elements - htab->n_deleted;
+
+ p = htab->entries;
+ do
+ {
+ hash_entry_type x = *p;
+
+ if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
+ *find_empty_slot_for_expand (nhtab, htab_hash (x)) = x;
+
+ p++;
+ }
+ while (p < olimit);
+
+ htab_free (htab);
+ return nhtab;
+}
+
+/* This function searches for a hash table entry equal to the given
+ element. It cannot be used to insert or delete an element. */
+
+static hash_entry_type
+htab_find (htab_t htab, const hash_entry_type element)
+{
+ hashval_t index, hash2, hash = htab_hash (element);
+ size_t size;
+ hash_entry_type entry;
+
+ size = htab_size (htab);
+ index = htab_mod (hash, htab);
+
+ entry = htab->entries[index];
+ if (entry == HTAB_EMPTY_ENTRY
+ || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element)))
+ return entry;
+
+ hash2 = htab_mod_m2 (hash, htab);
+ for (;;)
+ {
+ index += hash2;
+ if (index >= size)
+ index -= size;
+
+ entry = htab->entries[index];
+ if (entry == HTAB_EMPTY_ENTRY
+ || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element)))
+ return entry;
+ }
+}
+
+/* This function searches for a hash table slot containing an entry
+ equal to the given element. To delete an entry, call this with
+ insert=NO_INSERT, then call htab_clear_slot on the slot returned
+ (possibly after doing some checks). To insert an entry, call this
+ with insert=INSERT, then write the value you want into the returned
+ slot. */
+
+static hash_entry_type *
+htab_find_slot (htab_t *htabp, const hash_entry_type element,
+ enum insert_option insert)
+{
+ hash_entry_type *first_deleted_slot;
+ hashval_t index, hash2, hash = htab_hash (element);
+ size_t size;
+ hash_entry_type entry;
+ htab_t htab = *htabp;
+
+ size = htab_size (htab);
+ if (insert == INSERT && size * 3 <= htab->n_elements * 4)
+ {
+ htab = *htabp = htab_expand (htab);
+ size = htab_size (htab);
+ }
+
+ index = htab_mod (hash, htab);
+
+ first_deleted_slot = NULL;
+
+ entry = htab->entries[index];
+ if (entry == HTAB_EMPTY_ENTRY)
+ goto empty_entry;
+ else if (entry == HTAB_DELETED_ENTRY)
+ first_deleted_slot = &htab->entries[index];
+ else if (htab_eq (entry, element))
+ return &htab->entries[index];
+
+ hash2 = htab_mod_m2 (hash, htab);
+ for (;;)
+ {
+ index += hash2;
+ if (index >= size)
+ index -= size;
+
+ entry = htab->entries[index];
+ if (entry == HTAB_EMPTY_ENTRY)
+ goto empty_entry;
+ else if (entry == HTAB_DELETED_ENTRY)
+ {
+ if (!first_deleted_slot)
+ first_deleted_slot = &htab->entries[index];
+ }
+ else if (htab_eq (entry, element))
+ return &htab->entries[index];
+ }
+
+ empty_entry:
+ if (insert == NO_INSERT)
+ return NULL;
+
+ if (first_deleted_slot)
+ {
+ htab->n_deleted--;
+ *first_deleted_slot = HTAB_EMPTY_ENTRY;
+ return first_deleted_slot;
+ }
+
+ htab->n_elements++;
+ return &htab->entries[index];
+}
+
+/* This function clears a specified slot in a hash table. It is
+ useful when you've already done the lookup and don't want to do it
+ again. */
+
+static inline void
+htab_clear_slot (htab_t htab, hash_entry_type *slot)
+{
+ if (slot < htab->entries || slot >= htab->entries + htab_size (htab)
+ || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
+ abort ();
+
+ *slot = HTAB_DELETED_ENTRY;
+ htab->n_deleted++;
+}
+
+/* Returns a hash code for pointer P. Simplified version of evahash */
+
+static inline hashval_t
+hash_pointer (const void *p)
+{
+ uintptr_t v = (uintptr_t) p;
+ if (sizeof (v) > sizeof (hashval_t))
+ v ^= v >> (sizeof (uintptr_t) / 2 * __CHAR_BIT__);
+ return v;
+}
--- libgomp/task.c.jj 2013-09-26 09:43:10.903930832 +0200
+++ libgomp/task.c 2013-09-27 09:30:57.798187840 +0200
@@ -29,6 +29,33 @@
#include <stdlib.h>
#include <string.h>
+typedef struct gomp_task_depend_entry *hash_entry_type;
+
+static inline void *
+htab_alloc (size_t size)
+{
+ return gomp_malloc (size);
+}
+
+static inline void
+htab_free (void *ptr)
+{
+ free (ptr);
+}
+
+#include "hashtab.h"
+
+static inline hashval_t
+htab_hash (hash_entry_type element)
+{
+ return hash_pointer (element->addr);
+}
+
+static inline bool
+htab_eq (hash_entry_type x, hash_entry_type y)
+{
+ return x->addr == y->addr;
+}
/* Create a new task data structure. */
@@ -45,6 +72,9 @@ gomp_init_task (struct gomp_task *task,
task->copy_ctors_done = false;
task->children = NULL;
task->taskgroup = NULL;
+ task->dependers = NULL;
+ task->depend_hash = NULL;
+ task->depend_count = 0;
gomp_sem_init (&task->taskwait_sem, 0);
}
@@ -80,7 +110,8 @@ gomp_clear_parent (struct gomp_task *chi
void
GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
- long arg_size, long arg_align, bool if_clause, unsigned flags)
+ long arg_size, long arg_align, bool if_clause, unsigned flags,
+ void **depend)
{
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
@@ -108,6 +139,38 @@ GOMP_task (void (*fn) (void *), void *da
{
struct gomp_task task;
+ /* If there are depend clauses and earlier deferred sibling tasks
+ with depend clauses, check if there isn't a dependency. If there
+ is, fall through to the deferred task handling, as we can't
+ schedule such tasks right away. There is no need to handle
+ depend clauses for non-deferred tasks other than this, because
+ the parent task is suspended until the child task finishes and thus
+ it can't start further child tasks. */
+ if ((flags & 8) && thr->task && thr->task->depend_hash)
+ {
+ struct gomp_task *parent = thr->task;
+ struct gomp_task_depend_entry elem, *ent = NULL;
+ size_t ndepend = (uintptr_t) depend[0];
+ size_t nout = (uintptr_t) depend[1];
+ size_t i;
+ gomp_mutex_lock (&team->task_lock);
+ for (i = 0; i < ndepend; i++)
+ {
+ elem.addr = depend[i + 2];
+ ent = htab_find (parent->depend_hash, &elem);
+ for (; ent; ent = ent->next)
+ if (i >= nout && ent->is_in)
+ continue;
+ else
+ break;
+ if (ent)
+ break;
+ }
+ gomp_mutex_unlock (&team->task_lock);
+ if (ent)
+ goto defer;
+ }
+
gomp_init_task (&task, thr->task, gomp_icv (false));
task.kind = GOMP_TASK_IFFALSE;
task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
@@ -146,14 +209,20 @@ GOMP_task (void (*fn) (void *), void *da
}
else
{
+ defer:;
struct gomp_task *task;
struct gomp_task *parent = thr->task;
struct gomp_taskgroup *taskgroup = parent->taskgroup;
char *arg;
bool do_wake;
+ size_t depend_size = 0;
- task = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
- arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
+ if (flags & 8)
+ depend_size = ((uintptr_t) depend[0]
+ * sizeof (struct gomp_task_depend_entry));
+ task = gomp_malloc (sizeof (*task) + depend_size
+ + arg_size + arg_align - 1);
+ arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
& ~(uintptr_t) (arg_align - 1));
gomp_init_task (task, parent, gomp_icv (false));
task->kind = GOMP_TASK_IFFALSE;
@@ -171,7 +240,6 @@ GOMP_task (void (*fn) (void *), void *da
task->kind = GOMP_TASK_WAITING;
task->fn = fn;
task->fn_data = arg;
- task->in_tied_task = true;
task->final_task = (flags & 2) >> 1;
gomp_mutex_lock (&team->task_lock);
/* If parallel or taskgroup has been cancelled, don't start new
@@ -185,6 +253,117 @@ GOMP_task (void (*fn) (void *), void *da
free (task);
return;
}
+ if (taskgroup)
+ taskgroup->num_children++;
+ if (depend_size)
+ {
+ size_t ndepend = (uintptr_t) depend[0];
+ size_t nout = (uintptr_t) depend[1];
+ size_t i;
+ hash_entry_type ent;
+
+ task->depend_count = ndepend;
+ task->num_dependees = 0;
+ if (parent->depend_hash == NULL)
+ parent->depend_hash
+ = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
+ for (i = 0; i < ndepend; i++)
+ {
+ task->depend[i].addr = depend[2 + i];
+ task->depend[i].next = NULL;
+ task->depend[i].prev = NULL;
+ task->depend[i].task = task;
+ task->depend[i].is_in = i >= nout;
+ task->depend[i].redundant = false;
+
+ hash_entry_type *slot
+ = htab_find_slot (&parent->depend_hash, &task->depend[i],
+ INSERT);
+ hash_entry_type out = NULL;
+ if (*slot)
+ {
+ /* If multiple depends on the same task are the
+ same, all but the first one are redundant.
+ As inout/out come first, if any of them is
+ inout/out, it will win, which is the right
+ semantics. */
+ if ((*slot)->task == task)
+ {
+ task->depend[i].redundant = true;
+ continue;
+ }
+ for (ent = *slot; ent; ent = ent->next)
+ {
+ /* depend(in:...) doesn't depend on earlier
+ depend(in:...). */
+ if (i >= nout && ent->is_in)
+ continue;
+
+ if (!ent->is_in)
+ out = ent;
+
+ struct gomp_task *tsk = ent->task;
+ if (tsk->dependers == NULL)
+ {
+ tsk->dependers
+ = gomp_malloc (sizeof (struct gomp_dependers_vec)
+ + 6 * sizeof (struct gomp_task *));
+ tsk->dependers->n_elem = 1;
+ tsk->dependers->allocated = 6;
+ tsk->dependers->elem[0] = task;
+ task->num_dependees++;
+ continue;
+ }
+ /* We already have some other dependency on tsk
+ from earlier depend clause. */
+ else if (tsk->dependers->n_elem
+ && (tsk->dependers->elem[tsk->dependers->n_elem
+ - 1]
+ == task))
+ continue;
+ else if (tsk->dependers->n_elem
+ == tsk->dependers->allocated)
+ {
+ tsk->dependers->allocated
+ = tsk->dependers->allocated * 2 + 2;
+ tsk->dependers
+ = gomp_realloc (tsk->dependers,
+ sizeof (struct gomp_dependers_vec)
+ + (tsk->dependers->allocated
+ * sizeof (struct gomp_task *)));
+ }
+ tsk->dependers->elem[tsk->dependers->n_elem++] = task;
+ task->num_dependees++;
+ }
+ task->depend[i].next = *slot;
+ (*slot)->prev = &task->depend[i];
+ }
+ *slot = &task->depend[i];
+
+ /* There is no need to store more than one depend({,in}out:)
+ task per address in the hash table chain, because each out
+ depends on all earlier outs, thus it is enough to record
+ just the last depend({,in}out:). For depend(in:), we need
+ to keep all of the previous ones not terminated yet, because
+ a later depend({,in}out:) might need to depend on all of
+ them. So, if the new task's clause is depend({,in}out:),
+ we know there is at most one other depend({,in}out:) clause
+ in the list (out) and to maintain the invariant we now
+ need to remove it from the list. */
+ if (!task->depend[i].is_in && out)
+ {
+ if (out->next)
+ out->next->prev = out->prev;
+ out->prev->next = out->next;
+ out->redundant = true;
+ }
+ }
+ if (task->num_dependees)
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ return;
+ }
+ }
if (parent->children)
{
task->next_child = parent->children;
@@ -259,12 +438,133 @@ gomp_task_run_pre (struct gomp_task *chi
|| (taskgroup && taskgroup->cancelled))
&& !child_task->copy_ctors_done)
return true;
- team->task_running_count++;
- if (team->task_count == team->task_running_count)
- gomp_team_barrier_clear_task_pending (&team->barrier);
return false;
}
+static void
+gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
+{
+ struct gomp_task *parent = child_task->parent;
+ size_t i;
+
+ for (i = 0; i < child_task->depend_count; i++)
+ if (!child_task->depend[i].redundant)
+ {
+ if (child_task->depend[i].next)
+ child_task->depend[i].next->prev = child_task->depend[i].prev;
+ if (child_task->depend[i].prev)
+ child_task->depend[i].prev->next = child_task->depend[i].next;
+ else
+ {
+ hash_entry_type *slot
+ = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
+ NO_INSERT);
+ if (*slot != &child_task->depend[i])
+ abort ();
+ if (child_task->depend[i].next)
+ *slot = child_task->depend[i].next;
+ else
+ htab_clear_slot (parent->depend_hash, slot);
+ }
+ }
+}
+
+static size_t
+gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
+ struct gomp_team *team)
+{
+ struct gomp_task *parent = child_task->parent;
+ size_t i, count = child_task->dependers->n_elem, ret = 0;
+ for (i = 0; i < count; i++)
+ {
+ struct gomp_task *task = child_task->dependers->elem[i];
+ if (--task->num_dependees != 0)
+ continue;
+
+ struct gomp_taskgroup *taskgroup = task->taskgroup;
+ if (parent)
+ {
+ if (parent->children)
+ {
+ task->next_child = parent->children;
+ task->prev_child = parent->children->prev_child;
+ task->next_child->prev_child = task;
+ task->prev_child->next_child = task;
+ }
+ else
+ {
+ task->next_child = task;
+ task->prev_child = task;
+ }
+ parent->children = task;
+ if (parent->in_taskwait)
+ {
+ parent->in_taskwait = false;
+ gomp_sem_post (&parent->taskwait_sem);
+ }
+ }
+ if (taskgroup)
+ {
+ if (taskgroup->children)
+ {
+ task->next_taskgroup = taskgroup->children;
+ task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+ task->next_taskgroup->prev_taskgroup = task;
+ task->prev_taskgroup->next_taskgroup = task;
+ }
+ else
+ {
+ task->next_taskgroup = task;
+ task->prev_taskgroup = task;
+ }
+ taskgroup->children = task;
+ if (taskgroup->in_taskgroup_wait)
+ {
+ taskgroup->in_taskgroup_wait = false;
+ gomp_sem_post (&taskgroup->taskgroup_sem);
+ }
+ }
+ if (team->task_queue)
+ {
+ task->next_queue = team->task_queue;
+ task->prev_queue = team->task_queue->prev_queue;
+ task->next_queue->prev_queue = task;
+ task->prev_queue->next_queue = task;
+ }
+ else
+ {
+ task->next_queue = task;
+ task->prev_queue = task;
+ team->task_queue = task;
+ }
+ ++team->task_count;
+ ++ret;
+ }
+ free (child_task->dependers);
+ child_task->dependers = NULL;
+ if (ret > 1)
+ gomp_team_barrier_set_task_pending (&team->barrier);
+ return ret;
+}
+
+static inline size_t
+gomp_task_run_post_handle_depend (struct gomp_task *child_task,
+ struct gomp_team *team)
+{
+ if (child_task->depend_count == 0)
+ return 0;
+
+ /* If parent is gone already, the hash table is freed and nothing
+ will use the hash table anymore, no need to remove anything from it. */
+ if (child_task->parent != NULL)
+ gomp_task_run_post_handle_depend_hash (child_task);
+
+ if (child_task->dependers == NULL)
+ return 0;
+
+ return gomp_task_run_post_handle_dependers (child_task, team);
+}
+
static inline void
gomp_task_run_post_remove_parent (struct gomp_task *child_task)
{
@@ -286,7 +586,10 @@ gomp_task_run_post_remove_parent (struct
before the NULL is written. */
__atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE);
if (parent->in_taskwait)
- gomp_sem_post (&parent->taskwait_sem);
+ {
+ parent->in_taskwait = false;
+ gomp_sem_post (&parent->taskwait_sem);
+ }
}
}
@@ -298,20 +601,29 @@ gomp_task_run_post_remove_taskgroup (str
return;
child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup;
child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup;
- if (taskgroup->children != child_task)
- return;
- if (child_task->next_taskgroup != child_task)
- taskgroup->children = child_task->next_taskgroup;
+ if (taskgroup->num_children > 1)
+ --taskgroup->num_children;
else
{
- /* We access task->children in GOMP_taskgroup_end
+ /* We access taskgroup->num_children in GOMP_taskgroup_end
outside of the task lock mutex region, so
need a release barrier here to ensure memory
written by child_task->fn above is flushed
before the NULL is written. */
- __atomic_store_n (&taskgroup->children, NULL, MEMMODEL_RELEASE);
+ __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
+ }
+ if (taskgroup->children != child_task)
+ return;
+ if (child_task->next_taskgroup != child_task)
+ taskgroup->children = child_task->next_taskgroup;
+ else
+ {
+ taskgroup->children = NULL;
if (taskgroup->in_taskgroup_wait)
- gomp_sem_post (&taskgroup->taskgroup_sem);
+ {
+ taskgroup->in_taskgroup_wait = false;
+ gomp_sem_post (&taskgroup->taskgroup_sem);
+ }
}
}
@@ -323,6 +635,7 @@ gomp_barrier_handle_tasks (gomp_barrier_
struct gomp_task *task = thr->task;
struct gomp_task *child_task = NULL;
struct gomp_task *to_free = NULL;
+ int do_wake = 0;
gomp_mutex_lock (&team->task_lock);
if (gomp_barrier_last_thread (state))
@@ -355,8 +668,17 @@ gomp_barrier_handle_tasks (gomp_barrier_
}
goto finish_cancelled;
}
+ team->task_running_count++;
+ child_task->in_tied_task = true;
+ if (team->task_count == team->task_running_count)
+ gomp_team_barrier_clear_task_pending (&team->barrier);
}
gomp_mutex_unlock (&team->task_lock);
+ if (do_wake)
+ {
+ gomp_team_barrier_wake (&team->barrier, do_wake);
+ do_wake = 0;
+ }
if (to_free)
{
gomp_finish_task (to_free);
@@ -374,7 +696,9 @@ gomp_barrier_handle_tasks (gomp_barrier_
gomp_mutex_lock (&team->task_lock);
if (child_task)
{
- finish_cancelled:
+ finish_cancelled:;
+ size_t new_tasks
+ = gomp_task_run_post_handle_depend (child_task, team);
gomp_task_run_post_remove_parent (child_task);
gomp_clear_parent (child_task->children);
gomp_task_run_post_remove_taskgroup (child_task);
@@ -382,6 +706,12 @@ gomp_barrier_handle_tasks (gomp_barrier_
child_task = NULL;
if (!cancelled)
team->task_running_count--;
+ if (new_tasks > 1)
+ {
+ do_wake = team->nthreads - team->task_running_count;
+ if (do_wake > new_tasks)
+ do_wake = new_tasks;
+ }
if (--team->task_count == 0
&& gomp_team_barrier_waiting_for_tasks (&team->barrier))
{
@@ -404,9 +734,10 @@ GOMP_taskwait (void)
struct gomp_task *task = thr->task;
struct gomp_task *child_task = NULL;
struct gomp_task *to_free = NULL;
+ int do_wake = 0;
/* The acquire barrier on load of task->children here synchronizes
- with the write of a NULL in gomp_barrier_handle_tasks. It is
+ with the write of a NULL in gomp_task_run_post_remove_parent. It is
not necessary that we synchronize with other non-NULL writes at
this point, but we must ensure that all writes to memory by a
child thread task work function are seen before we exit from
@@ -451,6 +782,11 @@ GOMP_taskwait (void)
in other threads. Wait for them. */
task->in_taskwait = true;
gomp_mutex_unlock (&team->task_lock);
+ if (do_wake)
+ {
+ gomp_team_barrier_wake (&team->barrier, do_wake);
+ do_wake = 0;
+ }
if (to_free)
{
gomp_finish_task (to_free);
@@ -464,15 +800,13 @@ GOMP_taskwait (void)
thr->task = task;
}
else
- {
- gomp_sem_wait (&task->taskwait_sem);
- task->in_taskwait = false;
- return;
- }
+ gomp_sem_wait (&task->taskwait_sem);
gomp_mutex_lock (&team->task_lock);
if (child_task)
{
- finish_cancelled:
+ finish_cancelled:;
+ size_t new_tasks
+ = gomp_task_run_post_handle_depend (child_task, team);
child_task->prev_child->next_child = child_task->next_child;
child_task->next_child->prev_child = child_task->prev_child;
if (task->children == child_task)
@@ -487,7 +821,13 @@ GOMP_taskwait (void)
to_free = child_task;
child_task = NULL;
team->task_count--;
- team->task_running_count--;
+ if (new_tasks > 1)
+ {
+ do_wake = team->nthreads - team->task_running_count
+ - !task->in_tied_task;
+ if (do_wake > new_tasks)
+ do_wake = new_tasks;
+ }
}
}
}
@@ -519,6 +859,7 @@ GOMP_taskgroup_start (void)
taskgroup->children = NULL;
taskgroup->in_taskgroup_wait = false;
taskgroup->cancelled = false;
+ taskgroup->num_children = 0;
gomp_sem_init (&taskgroup->taskgroup_sem, 0);
task->taskgroup = taskgroup;
}
@@ -532,18 +873,29 @@ GOMP_taskgroup_end (void)
struct gomp_taskgroup *taskgroup;
struct gomp_task *child_task = NULL;
struct gomp_task *to_free = NULL;
+ int do_wake = 0;
if (team == NULL)
return;
taskgroup = task->taskgroup;
- if (__atomic_load_n (&taskgroup->children, MEMMODEL_ACQUIRE) == NULL)
+
+ /* The acquire barrier on load of taskgroup->num_children here
+ synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
+ It is not necessary that we synchronize with other non-0 writes at
+ this point, but we must ensure that all writes to memory by a
+ child thread task work function are seen before we exit from
+ GOMP_taskgroup_end. */
+ if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
goto finish;
+
gomp_mutex_lock (&team->task_lock);
while (1)
{
bool cancelled = false;
if (taskgroup->children == NULL)
{
+ if (taskgroup->num_children)
+ goto do_wait;
gomp_mutex_unlock (&team->task_lock);
if (to_free)
{
@@ -570,10 +922,18 @@ GOMP_taskgroup_end (void)
}
}
else
- /* All tasks we are waiting for are already running
- in other threads. Wait for them. */
- taskgroup->in_taskgroup_wait = true;
+ {
+ do_wait:
+ /* All tasks we are waiting for are already running
+ in other threads. Wait for them. */
+ taskgroup->in_taskgroup_wait = true;
+ }
gomp_mutex_unlock (&team->task_lock);
+ if (do_wake)
+ {
+ gomp_team_barrier_wake (&team->barrier, do_wake);
+ do_wake = 0;
+ }
if (to_free)
{
gomp_finish_task (to_free);
@@ -587,19 +947,18 @@ GOMP_taskgroup_end (void)
thr->task = task;
}
else
- {
- gomp_sem_wait (&taskgroup->taskgroup_sem);
- taskgroup->in_taskgroup_wait = false;
- goto finish;
- }
+ gomp_sem_wait (&taskgroup->taskgroup_sem);
gomp_mutex_lock (&team->task_lock);
if (child_task)
{
- finish_cancelled:
+ finish_cancelled:;
+ size_t new_tasks
+ = gomp_task_run_post_handle_depend (child_task, team);
child_task->prev_taskgroup->next_taskgroup
= child_task->next_taskgroup;
child_task->next_taskgroup->prev_taskgroup
= child_task->prev_taskgroup;
+ --taskgroup->num_children;
if (taskgroup->children == child_task)
{
if (child_task->next_taskgroup != child_task)
@@ -612,7 +971,13 @@ GOMP_taskgroup_end (void)
to_free = child_task;
child_task = NULL;
team->task_count--;
- team->task_running_count--;
+ if (new_tasks > 1)
+ {
+ do_wake = team->nthreads - team->task_running_count
+ - !task->in_tied_task;
+ if (do_wake > new_tasks)
+ do_wake = new_tasks;
+ }
}
}
--- libgomp/testsuite/libgomp.c/depend-4.c.jj 2013-09-27 11:42:37.283473918 +0200
+++ libgomp/testsuite/libgomp.c/depend-4.c 2013-09-27 11:49:37.781239095 +0200
@@ -0,0 +1,56 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ int x = 1, y = 2, z = 3;
+ #pragma omp taskgroup
+ {
+ #pragma omp task shared (x, y, z) depend(inout: x, y) \
+ depend (in: z) if (x > 10)
+ {
+ if (x != 1 || y != 2 || z != 3)
+ abort ();
+ x = 4;
+ y = 5;
+ }
+ /* The above task has depend clauses, but no dependencies
+ on earlier tasks, and is if (0), so must be scheduled
+ immediately. */
+ if (x != 4 || y != 5)
+ abort ();
+ }
+ #pragma omp taskgroup
+ {
+ #pragma omp task shared (x, y) depend(in: x, y)
+ {
+ usleep (10000);
+ if (x != 4 || y != 5 || z != 3)
+ abort ();
+ }
+ #pragma omp task shared (x, y) depend(in: x, y)
+ {
+ usleep (10000);
+ if (x != 4 || y != 5 || z != 3)
+ abort ();
+ }
+ #pragma omp task shared (x, y, z) depend(inout: x, y) \
+ depend (in: z) if (x > 10)
+ {
+ if (x != 4 || y != 5 || z != 3)
+ abort ();
+ x = 6;
+ y = 7;
+ }
+ /* The above task has depend clauses, and may have dependencies
+ on earlier tasks, while it is if (0), it can be deferred. */
+ }
+ if (x != 6 || y != 7)
+ abort ();
+ }
+ return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-1.c.jj 2013-09-26 17:57:26.011983435 +0200
+++ libgomp/testsuite/libgomp.c/depend-1.c 2013-09-26 21:09:57.128895308 +0200
@@ -0,0 +1,215 @@
+#include <stdlib.h>
+
+void
+dep (void)
+{
+ int x = 1;
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task shared (x) depend(out: x)
+ x = 2;
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ }
+}
+
+void
+dep2 (void)
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ int x = 1;
+ #pragma omp task shared (x) depend(out: x)
+ x = 2;
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ #pragma omp taskwait
+ }
+}
+
+void
+dep3 (void)
+{
+ #pragma omp parallel
+ {
+ int x = 1;
+ #pragma omp single
+ {
+ #pragma omp task shared (x) depend(out: x)
+ x = 2;
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ }
+ }
+}
+
+void
+firstpriv (void)
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ int x = 1;
+ #pragma omp task depend(out: x)
+ x = 2;
+ #pragma omp task depend(in: x)
+ if (x != 1)
+ abort ();
+ }
+}
+
+void
+antidep (void)
+{
+ int x = 1;
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task shared(x) depend(in: x)
+ if (x != 1)
+ abort ();
+ #pragma omp task shared(x) depend(out: x)
+ x = 2;
+ }
+}
+
+void
+antidep2 (void)
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ int x = 1;
+ #pragma omp taskgroup
+ {
+ #pragma omp task shared(x) depend(in: x)
+ if (x != 1)
+ abort ();
+ #pragma omp task shared(x) depend(out: x)
+ x = 2;
+ }
+ }
+}
+
+void
+antidep3 (void)
+{
+ #pragma omp parallel
+ {
+ int x = 1;
+ #pragma omp single
+ {
+ #pragma omp task shared(x) depend(in: x)
+ if (x != 1)
+ abort ();
+ #pragma omp task shared(x) depend(out: x)
+ x = 2;
+ }
+ }
+}
+
+
+void
+outdep (void)
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ int x = 0;
+ #pragma omp task shared(x) depend(out: x)
+ x = 1;
+ #pragma omp task shared(x) depend(out: x)
+ x = 2;
+ #pragma omp taskwait
+ if (x != 2)
+ abort ();
+ }
+}
+
+void
+concurrent (void)
+{
+ int x = 1;
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task shared (x) depend(out: x)
+ x = 2;
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ }
+}
+
+void
+concurrent2 (void)
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ int x = 1;
+ #pragma omp task shared (x) depend(out: x)
+ x = 2;
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ #pragma omp taskwait
+ }
+}
+
+void
+concurrent3 (void)
+{
+ #pragma omp parallel
+ {
+ int x = 1;
+ #pragma omp single
+ {
+ #pragma omp task shared (x) depend(out: x)
+ x = 2;
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ #pragma omp task shared (x) depend(in: x)
+ if (x != 2)
+ abort ();
+ }
+ }
+}
+
+int
+main ()
+{
+ dep ();
+ dep2 ();
+ dep3 ();
+ firstpriv ();
+ antidep ();
+ antidep2 ();
+ antidep3 ();
+ outdep ();
+ concurrent ();
+ concurrent2 ();
+ concurrent3 ();
+ return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-2.c.jj 2013-09-26 18:56:19.808294100 +0200
+++ libgomp/testsuite/libgomp.c/depend-2.c 2013-09-26 19:46:29.732123749 +0200
@@ -0,0 +1,71 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+void
+foo (int do_sleep)
+{
+ int a[64], i, *p = a + 4, x = 0;
+ asm volatile ("" : "+r" (p));
+ for (i = 0; i < 64; i++)
+ a[i] = i + 8;
+ #pragma omp parallel private (i)
+ {
+ #pragma omp single nowait
+ {
+ for (i = 0; i < 8; i++)
+ {
+ #pragma omp task depend(out: a[i * 8 : 4])
+ a[i * 8] += (i + 2) * 9;
+ #pragma omp task depend(out: p[i * 8 : 2])
+ p[i * 8] += (i + 3) * 10;
+ #pragma omp task depend(out: x)
+ x = 1;
+ }
+ for (i = 0; i < 8; i++)
+ #pragma omp task depend(in: a[i * 8 : 4]) \
+ depend(inout: a[i * 8 + 4 : 2]) \
+ depend(in: a[0 : 4]) depend(in: x)
+ {
+ if (a[0] != 8 + 2 * 9 || x != 1)
+ abort ();
+ if (a[i * 8] != i * 8 + 8 + (i + 2) * 9)
+ abort ();
+ if (a[4 + i * 8] != 4 + i * 8 + 8 + (i + 3) * 10)
+ abort ();
+ p[i * 8] += a[i * 8];
+ }
+ for (i = 0; i < 8; i++)
+ #pragma omp task depend(inout: a[i * 8 : 4]) \
+ depend(in: p[i * 8 : 2]) \
+ depend(in: p[0 : 2], x)
+ {
+ if (p[0] != 4 + 8 + 3 * 10 + 0 + 8 + 2 * 9 || x != 1)
+ abort ();
+ if (a[i * 8] != i * 8 + 8 + (i + 2) * 9)
+ abort ();
+ if (a[4 + i * 8] != (4 + i * 8 + 8 + (i + 3) * 10
+ + i * 8 + 8 + (i + 2) * 9))
+ abort ();
+ a[i * 8] += 2;
+ }
+ for (i = 0; i < 4; i++)
+ #pragma omp task depend(in: a[i * 16 : 4], a[i * 16 + 8 : 4], x)
+ {
+ if (a[i * 16] != i * 16 + 8 + (2 * i + 2) * 9 + 2 || x != 1)
+ abort ();
+ if (p[i * 16 + 4] != i * 16 + 8 + 8 + (2 * i + 1 + 2) * 9 + 2)
+ abort ();
+ }
+ }
+ if (do_sleep)
+ sleep (1);
+ }
+}
+
+int
+main ()
+{
+ foo (1);
+ foo (0);
+ return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-3.c.jj 2013-09-27 11:32:44.410621977 +0200
+++ libgomp/testsuite/libgomp.c/depend-3.c 2013-09-27 11:39:25.500493830 +0200
@@ -0,0 +1,51 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ int x = 1, y = 2;
+ #pragma omp taskgroup
+ {
+ #pragma omp task shared (x) depend(in: x)
+ {
+ usleep (10000);
+ if (x != 1)
+ abort ();
+ }
+ #pragma omp taskgroup
+ {
+ #pragma omp task shared (x) depend(in: x)
+ {
+ usleep (15000);
+ if (x != 1)
+ abort ();
+ }
+ #pragma omp task shared (y) depend(inout: y)
+ {
+ if (y != 2)
+ abort ();
+ y = 3;
+ }
+ #pragma omp taskgroup
+ {
+ #pragma omp task shared (x) depend(in: x)
+ {
+ usleep (13000);
+ if (x != 1)
+ abort ();
+ }
+ #pragma omp taskgroup
+ {
+ #pragma omp task shared (x) depend(out: x)
+ x = 2;
+ }
+ }
+ }
+ }
+ }
+ return 0;
+}
Jakub