This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[gomp4.5] Fix up doacross with dynamic scheduling

From: Jakub Jelinek <jakub at redhat dot com>
To: gcc-patches at gcc dot gnu dot org
Date: Fri, 13 Nov 2015 19:25:45 +0100
Subject: [gomp4.5] Fix up doacross with dynamic scheduling
Authentication-results: sourceware.org; auth=none
Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!

Guided scheduling doesn't ensure that all assigned chunks have sizes
of multiple of chunk_size, chunk_size for guided scheduling actually means
just minimum size of the chunk.  That means we can't divide the IV or
number of iterations by chunk size though, because the precondition for that
is that all chunks assigned to each thread are multiples of the chunk size.

Fixed thusly, tested on x86_64-linux, committed to gomp-4_5-branch.

2015-11-13  Jakub Jelinek  <jakub@redhat.com>

	* ordered.c (gomp_doacross_init, GOMP_doacross_post,
	GOMP_doacross_wait, gomp_doacross_ull_init, GOMP_doacross_ull_post,
	GOMP_doacross_ull_wait): For GFS_GUIDED don't divide number of
	iterators or IV by chunk size.
	* testsuite/libgomp.c/doacross-3.c: New test.

--- libgomp/ordered.c.jj	2015-10-14 10:24:10.000000000 +0200
+++ libgomp/ordered.c	2015-11-13 19:14:11.877005602 +0100
@@ -297,6 +297,8 @@ gomp_doacross_init (unsigned ncounts, lo
 
   if (ws->sched == GFS_STATIC)
     num_ents = team->nthreads;
+  else if (ws->sched == GFS_GUIDED)
+    num_ents = counts[0];
   else
     num_ents = (counts[0] - 1) / chunk_size + 1;
   if (num_bits <= MAX_COLLAPSED_BITS)
@@ -366,6 +368,8 @@ GOMP_doacross_post (long *counts)
 
   if (__builtin_expect (ws->sched == GFS_STATIC, 1))
     ent = thr->ts.team_id;
+  else if (ws->sched == GFS_GUIDED)
+    ent = counts[0];
   else
     ent = counts[0] / doacross->chunk_size;
   unsigned long *array = (unsigned long *) (doacross->array
@@ -426,6 +430,8 @@ GOMP_doacross_wait (long first, ...)
       else
 	ent = first / ws->chunk_size % thr->ts.team->nthreads;
     }
+  else if (ws->sched == GFS_GUIDED)
+    ent = first;
   else
     ent = first / doacross->chunk_size;
   unsigned long *array = (unsigned long *) (doacross->array
@@ -520,6 +526,8 @@ gomp_doacross_ull_init (unsigned ncounts
 
   if (ws->sched == GFS_STATIC)
     num_ents = team->nthreads;
+  else if (ws->sched == GFS_GUIDED)
+    num_ents = counts[0];
   else
     num_ents = (counts[0] - 1) / chunk_size + 1;
   if (num_bits <= MAX_COLLAPSED_BITS)
@@ -595,6 +603,8 @@ GOMP_doacross_ull_post (gomp_ull *counts
 
   if (__builtin_expect (ws->sched == GFS_STATIC, 1))
     ent = thr->ts.team_id;
+  else if (ws->sched == GFS_GUIDED)
+    ent = counts[0];
   else
     ent = counts[0] / doacross->chunk_size_ull;
 
@@ -676,6 +686,8 @@ GOMP_doacross_ull_wait (gomp_ull first,
       else
 	ent = first / ws->chunk_size_ull % thr->ts.team->nthreads;
     }
+  else if (ws->sched == GFS_GUIDED)
+    ent = first;
   else
     ent = first / doacross->chunk_size_ull;
 
--- libgomp/testsuite/libgomp.c/doacross-3.c.jj	2015-11-13 19:08:22.191960410 +0100
+++ libgomp/testsuite/libgomp.c/doacross-3.c	2015-11-13 19:09:01.626401650 +0100
@@ -0,0 +1,225 @@
+extern void abort (void);
+
+#define N 256
+int a[N], b[N / 16][8][4], c[N / 32][8][8], g[N / 16][8][6];
+volatile int d, e;
+volatile unsigned long long f;
+
+int
+main ()
+{
+  unsigned long long i;
+  int j, k, l, m;
+  #pragma omp parallel private (l)
+  {
+    #pragma omp for schedule(guided, 3) ordered (1) nowait
+    for (i = 1; i < N + f; i++)
+      {
+	#pragma omp atomic write
+	a[i] = 1;
+	#pragma omp ordered depend(sink: i - 1)
+	if (i > 1)
+	  {
+	    #pragma omp atomic read
+	    l = a[i - 1];
+	    if (l < 2)
+	      abort ();
+	  }
+	#pragma omp atomic write
+	a[i] = 2;
+	if (i < N - 1)
+	  {
+	    #pragma omp atomic read
+	    l = a[i + 1];
+	    if (l == 3)
+	      abort ();
+	  }
+	#pragma omp ordered depend(source)
+	#pragma omp atomic write
+	a[i] = 3;
+      }
+    #pragma omp for schedule(guided) ordered (3) nowait
+    for (i = 3; i < N / 16 - 1 + f; i++)
+      for (j = 0; j < 8; j += 2)
+	for (k = 1; k <= 3; k++)
+	  {
+	    #pragma omp atomic write
+	    b[i][j][k] = 1;
+	    #pragma omp ordered depend(sink: i, j - 2, k - 1) \
+				depend(sink: i - 2, j - 2, k + 1)
+	    #pragma omp ordered depend(sink: i - 3, j + 2, k - 2)
+	    if (j >= 2 && k > 1)
+	      {
+		#pragma omp atomic read
+		l = b[i][j - 2][k - 1];
+		if (l < 2)
+		  abort ();
+	      }
+	    #pragma omp atomic write
+	    b[i][j][k] = 2;
+	    if (i >= 5 && j >= 2 && k < 3)
+	      {
+		#pragma omp atomic read
+		l = b[i - 2][j - 2][k + 1];
+		if (l < 2)
+		  abort ();
+	      }
+	    if (i >= 6 && j < N / 16 - 3 && k == 3)
+	      {
+		#pragma omp atomic read
+		l = b[i - 3][j + 2][k - 2];
+		if (l < 2)
+		  abort ();
+	      }
+	    #pragma omp ordered depend(source)
+	    #pragma omp atomic write
+	    b[i][j][k] = 3;
+	  }
+#define A(n) int n;
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3)
+    D(m)
+#undef A
+    #pragma omp for collapse (2) ordered(61) schedule(guided, 15)
+    for (i = 2; i < N / 32 + f; i++)
+      for (j = 7; j > 1; j--)
+	for (k = 6; k >= 0; k -= 2)
+#define A(n) for (n = 4; n < 5; n++)
+	  D(m)
+#undef A
+	    {
+	      #pragma omp atomic write
+	      c[i][j][k] = 1;
+#define A(n) ,n
+#define E(n) C(n##0) C(n##1) C(n##2) B(n##30) B(n##31) A(n##320) A(n##321)
+	      #pragma omp ordered depend (sink: i, j, k + 2 E(m)) \
+				  depend (sink:i - 2, j + 1, k - 4 E(m)) \
+				  depend(sink: i - 1, j - 2, k - 2 E(m))
+	      if (k <= 4)
+		{
+		  l = c[i][j][k + 2];
+		  if (l < 2)
+		    abort ();
+		}
+	      #pragma omp atomic write
+	      c[i][j][k] = 2;
+	      if (i >= 4 && j < 7 && k >= 4)
+		{
+		  l = c[i - 2][j + 1][k - 4];
+		  if (l < 2)
+		    abort ();
+		}
+	      if (i >= 3 && j >= 4 && k >= 2)
+		{
+		  l = c[i - 1][j - 2][k - 2];
+		  if (l < 2)
+		    abort ();
+		}
+	      #pragma omp ordered depend (source)
+	      #pragma omp atomic write
+	      c[i][j][k] = 3;
+	    }
+    #pragma omp for schedule(guided, 5) ordered (3) nowait
+    for (j = 0; j < N / 16 - 1; j++)
+      for (k = 0; k < 8; k += 2)
+	for (i = 3; i <= 5 + f; i++)
+	  {
+	    #pragma omp atomic write
+	    g[j][k][i] = 1;
+	    #pragma omp ordered depend(sink: j, k - 2, i - 1) \
+				depend(sink: j - 2, k - 2, i + 1)
+	    #pragma omp ordered depend(sink: j - 3, k + 2, i - 2)
+	    if (k >= 2 && i > 3)
+	      {
+		#pragma omp atomic read
+		l = g[j][k - 2][i - 1];
+		if (l < 2)
+		  abort ();
+	      }
+	    #pragma omp atomic write
+	    g[j][k][i] = 2;
+	    if (j >= 2 && k >= 2 && i < 5)
+	      {
+		#pragma omp atomic read
+		l = g[j - 2][k - 2][i + 1];
+		if (l < 2)
+		  abort ();
+	      }
+	    if (j >= 3 && k < N / 16 - 3 && i == 5)
+	      {
+		#pragma omp atomic read
+		l = g[j - 3][k + 2][i - 2];
+		if (l < 2)
+		  abort ();
+	      }
+	    #pragma omp ordered depend(source)
+	    #pragma omp atomic write
+	    g[j][k][i] = 3;
+	  }
+    #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k)
+    for (i = 2; i < f + 3; i++)
+      for (j = d + 1; j >= 0; j--)
+	for (k = 0; k < d; k++)
+	  for (l = 0; l < d + 2; l++)
+	    {
+	      #pragma omp ordered depend (source)
+	      #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l)
+	      if (!e)
+		abort ();
+	    }
+    #pragma omp single
+    {
+      if (i != 3 || j != -1 || k != 0)
+	abort ();
+      i = 8; j = 9; k = 10;
+    }
+    #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k, m)
+    for (i = 2; i < f + 3; i++)
+      for (j = d + 1; j >= 0; j--)
+	for (k = 0; k < d + 2; k++)
+	  for (m = 0; m < d; m++)
+	    {
+	      #pragma omp ordered depend (source)
+	      #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, m)
+	      abort ();
+	    }
+    #pragma omp single
+    if (i != 3 || j != -1 || k != 2 || m != 0)
+      abort ();
+    #pragma omp for collapse(2) ordered(4) nowait
+    for (i = 2; i < f + 3; i++)
+      for (j = d; j > 0; j--)
+	for (k = 0; k < d + 2; k++)
+	  for (l = 0; l < d + 4; l++)
+	    {
+	      #pragma omp ordered depend (source)
+	      #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l)
+	      if (!e)
+		abort ();
+	    }
+    #pragma omp for nowait
+    for (i = 0; i < N; i++)
+      if (a[i] != 3 * (i >= 1))
+	abort ();
+    #pragma omp for collapse(2) private(k) nowait
+    for (i = 0; i < N / 16; i++)
+      for (j = 0; j < 8; j++)
+	for (k = 0; k < 4; k++)
+	  if (b[i][j][k] != 3 * (i >= 3 && i < N / 16 - 1 && (j & 1) == 0 && k >= 1))
+	    abort ();
+    #pragma omp for collapse(3) nowait
+    for (i = 0; i < N / 32; i++)
+      for (j = 0; j < 8; j++)
+	for (k = 0; k < 8; k++)
+	  if (c[i][j][k] != 3 * (i >= 2 && j >= 2 && (k & 1) == 0))
+	    abort ();
+    #pragma omp for collapse(2) private(k) nowait
+    for (i = 0; i < N / 16; i++)
+      for (j = 0; j < 8; j++)
+	for (k = 0; k < 6; k++)
+	  if (g[i][j][k] != 3 * (i < N / 16 - 1 && (j & 1) == 0 && k >= 3))
+	    abort ();
+  }
+  return 0;
+}

	Jakub
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]