This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[gomp4.5] Fix up doacross with dynamic scheduling
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 13 Nov 2015 19:25:45 +0100
- Subject: [gomp4.5] Fix up doacross with dynamic scheduling
- Authentication-results: sourceware.org; auth=none
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
Guided scheduling doesn't ensure that all assigned chunks have sizes
of multiple of chunk_size, chunk_size for guided scheduling actually means
just minimum size of the chunk. That means we can't divide the IV or
number of iterations by chunk size though, because the precondition for that
is that all chunks assigned to each thread are multiples of the chunk size.
Fixed thusly, tested on x86_64-linux, committed to gomp-4_5-branch.
2015-11-13 Jakub Jelinek <jakub@redhat.com>
* ordered.c (gomp_doacross_init, GOMP_doacross_post,
GOMP_doacross_wait, gomp_doacross_ull_init, GOMP_doacross_ull_post,
GOMP_doacross_ull_wait): For GFS_GUIDED don't divide number of
iterators or IV by chunk size.
* testsuite/libgomp.c/doacross-3.c: New test.
--- libgomp/ordered.c.jj 2015-10-14 10:24:10.000000000 +0200
+++ libgomp/ordered.c 2015-11-13 19:14:11.877005602 +0100
@@ -297,6 +297,8 @@ gomp_doacross_init (unsigned ncounts, lo
if (ws->sched == GFS_STATIC)
num_ents = team->nthreads;
+ else if (ws->sched == GFS_GUIDED)
+ num_ents = counts[0];
else
num_ents = (counts[0] - 1) / chunk_size + 1;
if (num_bits <= MAX_COLLAPSED_BITS)
@@ -366,6 +368,8 @@ GOMP_doacross_post (long *counts)
if (__builtin_expect (ws->sched == GFS_STATIC, 1))
ent = thr->ts.team_id;
+ else if (ws->sched == GFS_GUIDED)
+ ent = counts[0];
else
ent = counts[0] / doacross->chunk_size;
unsigned long *array = (unsigned long *) (doacross->array
@@ -426,6 +430,8 @@ GOMP_doacross_wait (long first, ...)
else
ent = first / ws->chunk_size % thr->ts.team->nthreads;
}
+ else if (ws->sched == GFS_GUIDED)
+ ent = first;
else
ent = first / doacross->chunk_size;
unsigned long *array = (unsigned long *) (doacross->array
@@ -520,6 +526,8 @@ gomp_doacross_ull_init (unsigned ncounts
if (ws->sched == GFS_STATIC)
num_ents = team->nthreads;
+ else if (ws->sched == GFS_GUIDED)
+ num_ents = counts[0];
else
num_ents = (counts[0] - 1) / chunk_size + 1;
if (num_bits <= MAX_COLLAPSED_BITS)
@@ -595,6 +603,8 @@ GOMP_doacross_ull_post (gomp_ull *counts
if (__builtin_expect (ws->sched == GFS_STATIC, 1))
ent = thr->ts.team_id;
+ else if (ws->sched == GFS_GUIDED)
+ ent = counts[0];
else
ent = counts[0] / doacross->chunk_size_ull;
@@ -676,6 +686,8 @@ GOMP_doacross_ull_wait (gomp_ull first,
else
ent = first / ws->chunk_size_ull % thr->ts.team->nthreads;
}
+ else if (ws->sched == GFS_GUIDED)
+ ent = first;
else
ent = first / doacross->chunk_size_ull;
--- libgomp/testsuite/libgomp.c/doacross-3.c.jj 2015-11-13 19:08:22.191960410 +0100
+++ libgomp/testsuite/libgomp.c/doacross-3.c 2015-11-13 19:09:01.626401650 +0100
@@ -0,0 +1,225 @@
+extern void abort (void);
+
+#define N 256
+int a[N], b[N / 16][8][4], c[N / 32][8][8], g[N / 16][8][6];
+volatile int d, e;
+volatile unsigned long long f;
+
+int
+main ()
+{
+ unsigned long long i;
+ int j, k, l, m;
+ #pragma omp parallel private (l)
+ {
+ #pragma omp for schedule(guided, 3) ordered (1) nowait
+ for (i = 1; i < N + f; i++)
+ {
+ #pragma omp atomic write
+ a[i] = 1;
+ #pragma omp ordered depend(sink: i - 1)
+ if (i > 1)
+ {
+ #pragma omp atomic read
+ l = a[i - 1];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ a[i] = 2;
+ if (i < N - 1)
+ {
+ #pragma omp atomic read
+ l = a[i + 1];
+ if (l == 3)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ #pragma omp atomic write
+ a[i] = 3;
+ }
+ #pragma omp for schedule(guided) ordered (3) nowait
+ for (i = 3; i < N / 16 - 1 + f; i++)
+ for (j = 0; j < 8; j += 2)
+ for (k = 1; k <= 3; k++)
+ {
+ #pragma omp atomic write
+ b[i][j][k] = 1;
+ #pragma omp ordered depend(sink: i, j - 2, k - 1) \
+ depend(sink: i - 2, j - 2, k + 1)
+ #pragma omp ordered depend(sink: i - 3, j + 2, k - 2)
+ if (j >= 2 && k > 1)
+ {
+ #pragma omp atomic read
+ l = b[i][j - 2][k - 1];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ b[i][j][k] = 2;
+ if (i >= 5 && j >= 2 && k < 3)
+ {
+ #pragma omp atomic read
+ l = b[i - 2][j - 2][k + 1];
+ if (l < 2)
+ abort ();
+ }
+ if (i >= 6 && j < N / 16 - 3 && k == 3)
+ {
+ #pragma omp atomic read
+ l = b[i - 3][j + 2][k - 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ #pragma omp atomic write
+ b[i][j][k] = 3;
+ }
+#define A(n) int n;
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3)
+ D(m)
+#undef A
+ #pragma omp for collapse (2) ordered(61) schedule(guided, 15)
+ for (i = 2; i < N / 32 + f; i++)
+ for (j = 7; j > 1; j--)
+ for (k = 6; k >= 0; k -= 2)
+#define A(n) for (n = 4; n < 5; n++)
+ D(m)
+#undef A
+ {
+ #pragma omp atomic write
+ c[i][j][k] = 1;
+#define A(n) ,n
+#define E(n) C(n##0) C(n##1) C(n##2) B(n##30) B(n##31) A(n##320) A(n##321)
+ #pragma omp ordered depend (sink: i, j, k + 2 E(m)) \
+ depend (sink:i - 2, j + 1, k - 4 E(m)) \
+ depend(sink: i - 1, j - 2, k - 2 E(m))
+ if (k <= 4)
+ {
+ l = c[i][j][k + 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ c[i][j][k] = 2;
+ if (i >= 4 && j < 7 && k >= 4)
+ {
+ l = c[i - 2][j + 1][k - 4];
+ if (l < 2)
+ abort ();
+ }
+ if (i >= 3 && j >= 4 && k >= 2)
+ {
+ l = c[i - 1][j - 2][k - 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp ordered depend (source)
+ #pragma omp atomic write
+ c[i][j][k] = 3;
+ }
+ #pragma omp for schedule(guided, 5) ordered (3) nowait
+ for (j = 0; j < N / 16 - 1; j++)
+ for (k = 0; k < 8; k += 2)
+ for (i = 3; i <= 5 + f; i++)
+ {
+ #pragma omp atomic write
+ g[j][k][i] = 1;
+ #pragma omp ordered depend(sink: j, k - 2, i - 1) \
+ depend(sink: j - 2, k - 2, i + 1)
+ #pragma omp ordered depend(sink: j - 3, k + 2, i - 2)
+ if (k >= 2 && i > 3)
+ {
+ #pragma omp atomic read
+ l = g[j][k - 2][i - 1];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ g[j][k][i] = 2;
+ if (j >= 2 && k >= 2 && i < 5)
+ {
+ #pragma omp atomic read
+ l = g[j - 2][k - 2][i + 1];
+ if (l < 2)
+ abort ();
+ }
+ if (j >= 3 && k < N / 16 - 3 && i == 5)
+ {
+ #pragma omp atomic read
+ l = g[j - 3][k + 2][i - 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ #pragma omp atomic write
+ g[j][k][i] = 3;
+ }
+ #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k)
+ for (i = 2; i < f + 3; i++)
+ for (j = d + 1; j >= 0; j--)
+ for (k = 0; k < d; k++)
+ for (l = 0; l < d + 2; l++)
+ {
+ #pragma omp ordered depend (source)
+ #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l)
+ if (!e)
+ abort ();
+ }
+ #pragma omp single
+ {
+ if (i != 3 || j != -1 || k != 0)
+ abort ();
+ i = 8; j = 9; k = 10;
+ }
+ #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k, m)
+ for (i = 2; i < f + 3; i++)
+ for (j = d + 1; j >= 0; j--)
+ for (k = 0; k < d + 2; k++)
+ for (m = 0; m < d; m++)
+ {
+ #pragma omp ordered depend (source)
+ #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, m)
+ abort ();
+ }
+ #pragma omp single
+ if (i != 3 || j != -1 || k != 2 || m != 0)
+ abort ();
+ #pragma omp for collapse(2) ordered(4) nowait
+ for (i = 2; i < f + 3; i++)
+ for (j = d; j > 0; j--)
+ for (k = 0; k < d + 2; k++)
+ for (l = 0; l < d + 4; l++)
+ {
+ #pragma omp ordered depend (source)
+ #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l)
+ if (!e)
+ abort ();
+ }
+ #pragma omp for nowait
+ for (i = 0; i < N; i++)
+ if (a[i] != 3 * (i >= 1))
+ abort ();
+ #pragma omp for collapse(2) private(k) nowait
+ for (i = 0; i < N / 16; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 4; k++)
+ if (b[i][j][k] != 3 * (i >= 3 && i < N / 16 - 1 && (j & 1) == 0 && k >= 1))
+ abort ();
+ #pragma omp for collapse(3) nowait
+ for (i = 0; i < N / 32; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 8; k++)
+ if (c[i][j][k] != 3 * (i >= 2 && j >= 2 && (k & 1) == 0))
+ abort ();
+ #pragma omp for collapse(2) private(k) nowait
+ for (i = 0; i < N / 16; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 6; k++)
+ if (g[i][j][k] != 3 * (i < N / 16 - 1 && (j & 1) == 0 && k >= 3))
+ abort ();
+ }
+ return 0;
+}
Jakub