This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] OpenACC gang clause static argument


This patch teaches omp-expand how to utilize the optional static
argument in the gang clause. In OpenACC this corresponds to setting the
static chunk_size for acc loop. The idea here is that the user requests
'n' adjacent loop iterations to be executed by the same gang. And if two
loops inside the same acc parallel region have the same static arguments
and number of loop iterations, the iterations in both loops will be
executed by the same gang. This happens for free in nvptx targets
because of the way that threads are scheduled.

I had to fix a minor bug in the fortran front end. Combined parallel
loops weren't passing gang_expr, gang_static, worker_expr and
vector_expr to the loop handler. This fixes that.

This patch has been committed to gomp-4_0-branch.

Cesar
2015-06-11  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/fortran/
	* trans-openmp.c (gfc_trans_omp_clauses): Handle the static
	argument to the gang clause.
	(gfc_trans_oacc_combined_directive): Pass the gang_expr, gang_static,
	worker_expr and vector_expr members to loop_clauses.

	gcc/
	* omp-low.c (extract_omp_for_data): Adjust the chunk_size
	based on the static argument of the gang clause.

	gcc/testsuite/
	* gfortran.dg/goacc/gang-static.f95: New test.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/gang-static-1.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: New test.
	* testsuite/libgomp.oacc-fortran/gang-static-1.f90: New test.

diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index f73e366..5a22c49 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -2646,7 +2646,17 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
 	  tree gang_var
 	    = gfc_convert_expr_to_tree (block, clauses->gang_expr);
 	  c = build_omp_clause (where.lb->location, OMP_CLAUSE_GANG);
-	  OMP_CLAUSE_GANG_EXPR (c) = gang_var;
+	  if (clauses->gang_static)
+	    OMP_CLAUSE_GANG_STATIC_EXPR (c) = gang_var;
+	  else
+	    OMP_CLAUSE_GANG_EXPR (c) = gang_var;
+	  omp_clauses = gfc_trans_add_clause (c, omp_clauses);
+	}
+      else if (clauses->gang_static)
+	{
+	  /* This corresponds to gang (static: *).  */
+	  c = build_omp_clause (where.lb->location, OMP_CLAUSE_GANG);
+	  OMP_CLAUSE_GANG_STATIC_EXPR (c) = integer_minus_one_node;
 	  omp_clauses = gfc_trans_add_clause (c, omp_clauses);
 	}
       else
@@ -3476,8 +3486,12 @@ gfc_trans_oacc_combined_directive (gfc_code *code)
 	      sizeof (construct_clauses));
       loop_clauses.collapse = construct_clauses.collapse;
       loop_clauses.gang = construct_clauses.gang;
+      loop_clauses.gang_expr = construct_clauses.gang_expr;
+      loop_clauses.gang_static = construct_clauses.gang_static;
       loop_clauses.vector = construct_clauses.vector;
+      loop_clauses.vector_expr = construct_clauses.vector_expr;
       loop_clauses.worker = construct_clauses.worker;
+      loop_clauses.worker_expr = construct_clauses.worker_expr;
       loop_clauses.seq = construct_clauses.seq;
       loop_clauses.independent = construct_clauses.independent;
       construct_clauses.collapse = 0;
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e99fd35..12a1d78 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -755,13 +755,26 @@ extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
       fd->loop.cond_code = LT_EXPR;
     }
 
-  /* For OpenACC loops, force a chunk size of one, as this avoids the default
-    scheduling where several subsequent iterations are being executed by the
-    same thread.  */
+  /* For OpenACC loops, force a chunk size of one, unless a gang loop
+     contains a static argument.  This avoids the default scheduling where
+     several subsequent iterations are being executed by the same thread.  */
   if (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
     {
       gcc_assert (fd->chunk_size == NULL_TREE);
-      fd->chunk_size = build_int_cst (TREE_TYPE (fd->loop.v), 1);
+
+      tree gang = find_omp_clause (gimple_omp_for_clauses (for_stmt),
+				   OMP_CLAUSE_GANG);
+      tree chunk_size = NULL_TREE;
+
+      if (gang)
+	{
+	  chunk_size = OMP_CLAUSE_GANG_STATIC_EXPR (gang);
+	}
+
+      if (!chunk_size || chunk_size == integer_minus_one_node)
+	chunk_size = build_int_cst (TREE_TYPE (fd->loop.v), 1);
+
+      fd->chunk_size = chunk_size;
     }
 
   /* Extract the OpenACC gang, worker and vector clauses.  */
diff --git a/gcc/testsuite/gfortran.dg/goacc/gang-static.f95 b/gcc/testsuite/gfortran.dg/goacc/gang-static.f95
new file mode 100644
index 0000000..f515ff2
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/gang-static.f95
@@ -0,0 +1,69 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-omplower" }
+
+program main
+  integer, parameter :: n = 100
+  integer i, a(n), b(n)
+
+  do i = 1, n
+     b(i) = i
+  end do
+
+  !$acc parallel loop gang (static:*) num_gangs (10)
+  do i = 1, n
+     a(i) = b(i) + 0
+  end do
+  !$acc end parallel loop
+
+  call test (a, b, 0, n)
+
+  !$acc parallel loop gang (static:1) num_gangs (10)
+  do i = 1, n
+     a(i) = b(i) + 1
+  end do
+  !$acc end parallel loop
+
+  call test (a, b, 1, n)
+
+  !$acc parallel loop gang (static:2) num_gangs (10)
+  do i = 1, n
+     a(i) = b(i) + 2
+  end do
+  !$acc end parallel loop
+
+  call test (a, b, 2, n)
+
+  !$acc parallel loop gang (static:5) num_gangs (10)
+  do i = 1, n
+     a(i) = b(i) + 5
+  end do
+  !$acc end parallel loop
+
+  call test (a, b, 5, n)
+
+  !$acc parallel loop gang (static:20) num_gangs (10)
+  do i = 1, n
+     a(i) = b(i) + 20
+  end do
+  !$acc end parallel loop
+
+  call test (a, b, 20, n)
+
+end program main
+
+subroutine test (a, b, sarg, n)
+  integer n
+  integer a (n), b(n), sarg
+  integer i
+
+  do i = 1, n
+     if (a(i) .ne. b(i) + sarg) call abort ()
+  end do
+end subroutine test
+
+! { dg-final { scan-tree-dump-times "gang\\(static:\\\*\\)" 1 "omplower" } }
+! { dg-final { scan-tree-dump-times "gang\\(static:1\\)" 1 "omplower" } }
+! { dg-final { scan-tree-dump-times "gang\\(static:2\\)" 1 "omplower" } }
+! { dg-final { scan-tree-dump-times "gang\\(static:5\\)" 1 "omplower" } }
+! { dg-final { scan-tree-dump-times "gang\\(static:20\\)" 1 "omplower" } }
+! { dg-final { cleanup-tree-dump "omplower" } }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-1.c
new file mode 100644
index 0000000..42f4585
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-1.c
@@ -0,0 +1,47 @@
+#include <assert.h>
+
+#define N 100
+
+int test(int *a, int *b, int sarg)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    assert (a[i] == b[i] + sarg);
+}
+
+int
+main ()
+{
+  int a[N], b[N];
+  int i;
+
+  for (i = 0; i < N; i++)
+    b[i] = i+1;
+
+#pragma acc parallel loop gang (static:*) num_gangs (10)
+  for (i = 0; i < 100; i++)
+    a[i] = b[i] + 0;
+
+  test (a, b, 0);
+
+#pragma acc parallel loop gang (static:1) num_gangs (10)
+  for (i = 0; i < 100; i++)
+    a[i] = b[i] + 1;
+
+  test (a, b, 1);
+
+#pragma acc parallel loop gang (static:5) num_gangs (10)
+  for (i = 0; i < 100; i++)
+    a[i] = b[i] + 5;
+
+  test (a, b, 5);
+
+#pragma acc parallel loop gang (static:20) num_gangs (10)
+  for (i = 0; i < 100; i++)
+    a[i] = b[i] + 20;
+
+  test (a, b, 20);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
new file mode 100644
index 0000000..8ff2005
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <assert.h>
+
+#define N 100
+
+int test(int *a, int sarg)
+{
+  int i, j, gang;
+
+  if (sarg == 0)
+    sarg = 1;
+
+  for (i = 0, gang = 0; i < N; i+=sarg, gang++)
+    {
+      for (j = 0; j < sarg; j++)
+	assert (a[i] == gang % 10);
+    }
+}
+
+int
+main ()
+{
+  int a[N];
+  int i;
+
+#pragma acc parallel loop gang (static:*) num_gangs (10)
+  for (i = 0; i < 100; i++)
+    a[i] = __builtin_GOACC_ctaid (0);
+
+  test (a, 0);
+
+#pragma acc parallel loop gang (static:1) num_gangs (10)
+  for (i = 0; i < 100; i++)
+    a[i] = __builtin_GOACC_ctaid (0);
+
+  test (a, 1);
+
+#pragma acc parallel loop gang (static:2) num_gangs (10)
+  for (i = 0; i < 100; i++)
+    a[i] = __builtin_GOACC_ctaid (0);
+
+  test (a, 2);
+
+#pragma acc parallel loop gang (static:5) num_gangs (10)
+  for (i = 0; i < 100; i++)
+    a[i] = __builtin_GOACC_ctaid (0);
+
+  test (a, 5);
+
+#pragma acc parallel loop gang (static:20) num_gangs (10)
+  for (i = 0; i < 100; i++)
+    a[i] = __builtin_GOACC_ctaid (0);
+
+  test (a, 20);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gang-static-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gang-static-1.f90
new file mode 100644
index 0000000..e562535
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/gang-static-1.f90
@@ -0,0 +1,61 @@
+! { dg-do run }
+
+program main
+  integer, parameter :: n = 100
+  integer i, a(n), b(n)
+
+  do i = 1, n
+     b(i) = i
+  end do
+
+  !$acc parallel loop gang (static:*) num_gangs (10)
+  do i = 1, n
+     a(i) = b(i) + 0
+  end do
+  !$acc end parallel loop
+
+  call test (a, b, 0, n)
+
+  !$acc parallel loop gang (static:1) num_gangs (10)
+  do i = 1, n
+     a(i) = b(i) + 1
+  end do
+  !$acc end parallel loop
+
+  call test (a, b, 1, n)
+
+  !$acc parallel loop gang (static:2) num_gangs (10)
+  do i = 1, n
+     a(i) = b(i) + 2
+  end do
+  !$acc end parallel loop
+
+  call test (a, b, 2, n)
+
+  !$acc parallel loop gang (static:5) num_gangs (10)
+  do i = 1, n
+     a(i) = b(i) + 5
+  end do
+  !$acc end parallel loop
+
+  call test (a, b, 5, n)
+
+  !$acc parallel loop gang (static:20) num_gangs (10)
+  do i = 1, n
+     a(i) = b(i) + 20
+  end do
+  !$acc end parallel loop
+
+  call test (a, b, 20, n)
+
+end program main
+
+subroutine test (a, b, sarg, n)
+  integer n
+  integer a (n), b(n), sarg
+  integer i
+
+  do i = 1, n
+     if (a(i) .ne. b(i) + sarg) call abort ()
+  end do
+end subroutine test

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]