This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [openacc] tile, independent, default, private and firstprivate support in c/++


On 11/04/2015 02:24 AM, Jakub Jelinek wrote:
> Have you verified pt.c does the right thing when instantiating the
> OMP_CLAUSE_TILE clause (I mean primarily the TREE_VEC in there)?
> There really should be testcases for that.

Here's a patch which adds template support for the oacc clauses. Is it
ok for trunk?

Cesar
2015-11-04  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/cp/
	* pt.c (tsubst_omp_clauses): Add support for OMP_CLAUSE_{NUM_GANGS,
	NUM_WORKERS,VECTOR_LENGTH,GANG,WORKER,VECTOR,ASYNC,WAIT,TILE,AUTO,
	INDEPENDENT,SEQ}. 
	(tsubst_expr): Add support for OMP_CLAUSE_{KERNELS,PARALLEL,LOOP}.

	gcc/testsuite/
	* g++.dg/goacc/template-reduction.C: New test.
	* g++.dg/goacc/template.C: New test.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index e3f55a7..4424596 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -14395,6 +14395,15 @@ tsubst_omp_clauses (tree clauses, bool declare_simd, bool allow_fields,
 	case OMP_CLAUSE_PRIORITY:
 	case OMP_CLAUSE_ORDERED:
 	case OMP_CLAUSE_HINT:
+	case OMP_CLAUSE_NUM_GANGS:
+	case OMP_CLAUSE_NUM_WORKERS:
+	case OMP_CLAUSE_VECTOR_LENGTH:
+	case OMP_CLAUSE_GANG:
+	case OMP_CLAUSE_WORKER:
+	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
+	case OMP_CLAUSE_TILE:
 	  OMP_CLAUSE_OPERAND (nc, 0)
 	    = tsubst_expr (OMP_CLAUSE_OPERAND (oc, 0), args, complain, 
 			   in_decl, /*integral_constant_expression_p=*/false);
@@ -14449,6 +14458,9 @@ tsubst_omp_clauses (tree clauses, bool declare_simd, bool allow_fields,
 	case OMP_CLAUSE_THREADS:
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
+	case OMP_CLAUSE_SEQ:
 	  break;
 	default:
 	  gcc_unreachable ();
@@ -15197,6 +15209,15 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
       }
       break;
 
+    case OACC_KERNELS:
+    case OACC_PARALLEL:
+      tmp = tsubst_omp_clauses (OMP_CLAUSES (t), false, false, args, complain,
+				in_decl);
+      stmt = begin_omp_parallel ();
+      RECUR (OMP_BODY (t));
+      finish_omp_construct (TREE_CODE (t), stmt, tmp);
+      break;
+
     case OMP_PARALLEL:
       r = push_omp_privatization_clauses (OMP_PARALLEL_COMBINED (t));
       tmp = tsubst_omp_clauses (OMP_PARALLEL_CLAUSES (t), false, true,
@@ -15227,6 +15248,7 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
     case CILK_FOR:
     case OMP_DISTRIBUTE:
     case OMP_TASKLOOP:
+    case OACC_LOOP:
       {
 	tree clauses, body, pre_body;
 	tree declv = NULL_TREE, initv = NULL_TREE, condv = NULL_TREE;
@@ -15235,7 +15257,8 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
 	int i;
 
 	r = push_omp_privatization_clauses (OMP_FOR_INIT (t) == NULL_TREE);
-	clauses = tsubst_omp_clauses (OMP_FOR_CLAUSES (t), false, true,
+	clauses = tsubst_omp_clauses (OMP_FOR_CLAUSES (t), false,
+				      TREE_CODE (t) != OACC_LOOP,
 				      args, complain, in_decl);
 	if (OMP_FOR_INIT (t) != NULL_TREE)
 	  {
@@ -15305,9 +15328,11 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
       pop_omp_privatization_clauses (r);
       break;
 
+    case OACC_DATA:
     case OMP_TARGET_DATA:
     case OMP_TARGET:
-      tmp = tsubst_omp_clauses (OMP_CLAUSES (t), false, true,
+      tmp = tsubst_omp_clauses (OMP_CLAUSES (t), false,
+				TREE_CODE (t) != OACC_DATA,
 				args, complain, in_decl);
       keep_next_level (true);
       stmt = begin_omp_structured_block ();
@@ -15331,6 +15356,16 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
       add_stmt (t);
       break;
 
+    case OACC_ENTER_DATA:
+    case OACC_EXIT_DATA:
+    case OACC_UPDATE:
+      tmp = tsubst_omp_clauses (OMP_STANDALONE_CLAUSES (t), false, false,
+				args, complain, in_decl);
+      t = copy_node (t);
+      OMP_STANDALONE_CLAUSES (t) = tmp;
+      add_stmt (t);
+      break;
+
     case OMP_ORDERED:
       tmp = tsubst_omp_clauses (OMP_ORDERED_CLAUSES (t), false, true,
 				args, complain, in_decl);
diff --git a/gcc/testsuite/g++.dg/goacc/template-reduction.C b/gcc/testsuite/g++.dg/goacc/template-reduction.C
new file mode 100644
index 0000000..668eeb3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/goacc/template-reduction.C
@@ -0,0 +1,104 @@
+// This error is temporary.  Remove when support is added for these clauses
+// in the middle end.
+// { dg-prune-output "sorry, unimplemented" }
+
+extern void abort ();
+
+const int n = 100;
+
+// Check explicit template copy map
+
+template<typename T> T
+sum (T array[])
+{
+   T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s, array[0:n])
+  for (int i = 0; i < n; i++)
+    s += array[i];
+
+  return s;
+}
+
+// Check implicit template copy map
+
+template<typename T> T
+sum ()
+{
+  T s = 0;
+  T array[n];
+
+  for (int i = 0; i < n; i++)
+    array[i] = i+1;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s)
+  for (int i = 0; i < n; i++)
+    s += array[i];
+
+  return s;
+}
+
+// Check present and async
+
+template<typename T> T
+async_sum (T array[])
+{
+   T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang async (1) present (array[0:n])
+   for (int i = 0; i < n; i++)
+     array[i] = i+1;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) copy (s) async wait (1)
+  for (int i = 0; i < n; i++)
+    s += array[i];
+
+#pragma acc wait
+
+  return s;
+}
+
+// Check present and async and an explicit firstprivate
+
+template<typename T> T
+async_sum (int c)
+{
+   T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy(s) firstprivate (c) async wait (1)
+  for (int i = 0; i < n; i++)
+    s += i+c;
+
+#pragma acc wait
+
+  return s;
+}
+
+int
+main()
+{
+  int a[n];
+  int result = 0;
+
+  for (int i = 0; i < n; i++)
+    {
+      a[i] = i+1;
+      result += i+1;
+    }
+
+  if (sum (a) != result)
+    abort ();
+
+  if (sum<int> () != result)
+    abort ();
+
+#pragma acc enter data copyin (a)
+  if (async_sum (a) != result)
+    abort ();
+
+  if (async_sum<int> (1) != result)
+    abort ();
+#pragma acc exit data delete (a)
+
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/goacc/template.C b/gcc/testsuite/g++.dg/goacc/template.C
new file mode 100644
index 0000000..f899d6a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/goacc/template.C
@@ -0,0 +1,140 @@
+// This error is temporary.  Remove when support is added for these clauses
+// in the middle end.
+// { dg-prune-output "sorry, unimplemented" }
+
+#pragma acc routine
+template <typename T> T
+accDouble(int val)
+{
+  return val * 2;
+}
+
+template<typename T> T
+oacc_parallel_copy (T a)
+{
+  T b = 0;
+  char w = 1;
+  int x = 2;
+  float y = 3;
+  double z = 4;
+
+#pragma acc parallel num_gangs (a) num_workers (a) vector_length (a) default (none) copyout (b) copyin (a)
+  {
+    b = a;
+  }
+
+#pragma acc parallel num_gangs (a) copy (w, x, y, z)
+  {
+    w = accDouble<char>(w);
+    x = accDouble<int>(x);
+    y = accDouble<float>(y);
+    z = accDouble<double>(z);
+  }
+
+#pragma acc parallel num_gangs (a) if (1)
+  {
+#pragma acc loop auto tile (a, 3)
+  for (int i = 0; i < a; i++)
+    for (int j = 0; j < 5; j++)
+      b = a;
+
+#pragma acc loop seq
+  for (int i = 0; i < a; i++)
+    b = a;
+  }
+
+  T c;
+
+#pragma acc parallel num_workers (10)
+  {
+#pragma acc atomic capture
+    c = b++;
+
+#pragma atomic update
+    c++;
+
+#pragma acc atomic read
+    b = a;
+
+#pragma acc atomic write
+    b = a;
+  }
+
+#pragma acc parallel reduction (+:c)
+  {
+    c = 1;
+  }
+
+#pragma acc data if (1) copy (b)
+  {
+    #pragma acc parallel
+    {
+      b = a;
+    }
+  }
+
+#pragma acc enter data copyin (b)
+#pragma acc parallel present (b)
+    {
+      b = a;
+    }
+
+#pragma acc update host (b)
+#pragma acc update self (b)
+#pragma acc update device (b)
+#pragma acc exit data delete (b)
+
+  return b;
+}
+
+template<typename T> T
+oacc_kernels_copy (T a)
+{
+  T b = 0;
+  T c = 0;
+  char w = 1;
+  int x = 2;
+  float y = 3;
+  double z = 4;
+
+#pragma acc kernels copy (w, x, y, z)
+  {
+    w = accDouble<char>(w);
+    x = accDouble<int>(x);
+    y = accDouble<float>(y);
+    z = accDouble<double>(z);
+  }
+
+#pragma acc kernels copyout (b) copyin (a)
+  b = a;
+
+#pragma acc kernels loop reduction (+:c)
+  for (int i = 0; i < 10; i++)
+    {
+      c = 1;
+    }
+
+#pragma acc data if (1) copy (b)
+  {
+    #pragma acc kernels
+    {
+      b = a;
+    }
+  }
+
+#pragma acc enter data copyin (b)
+#pragma acc kernels present (b)
+    {
+      b = a;
+    }
+  return b;
+}
+
+int
+main ()
+{
+  int b = oacc_parallel_copy<int> (5);
+  int c = oacc_kernels_copy<int> (5);
+
+  return b + c;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]