Next set of OpenACC changes: Testsuite
Thomas Schwinge
thomas@codesourcery.com
Tue May 5 09:00:00 GMT 2015
Hi!
On Tue, 05 May 2015 10:54:02 +0200, I wrote:
> In follow-up messages, I'll be posting the separated parts (for easier
> review) of a next set of OpenACC changes that we'd like to commit.
> ChangeLog updates not yet written; will do that before commit, obviously.
gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c | 46 +
.../c-c++-common/goacc-gomp/nesting-fail-1.c | 25 -
gcc/testsuite/c-c++-common/goacc/asyncwait-1.c | 4 +-
gcc/testsuite/c-c++-common/goacc/data-2.c | 12 +-
gcc/testsuite/c-c++-common/goacc/declare-1.c | 84 +
gcc/testsuite/c-c++-common/goacc/declare-2.c | 67 +
gcc/testsuite/c-c++-common/goacc/dtype-1.c | 113 ++
gcc/testsuite/c-c++-common/goacc/dtype-2.c | 31 +
gcc/testsuite/c-c++-common/goacc/host_data-1.c | 14 +
gcc/testsuite/c-c++-common/goacc/host_data-2.c | 14 +
gcc/testsuite/c-c++-common/goacc/host_data-3.c | 16 +
gcc/testsuite/c-c++-common/goacc/host_data-4.c | 15 +
gcc/testsuite/c-c++-common/goacc/kernels-1.c | 6 -
gcc/testsuite/c-c++-common/goacc/kernels-empty.c | 6 +
gcc/testsuite/c-c++-common/goacc/kernels-eternal.c | 11 +
.../c-c++-common/goacc/kernels-noreturn.c | 12 +
gcc/testsuite/c-c++-common/goacc/loop-1.c | 2 -
gcc/testsuite/c-c++-common/goacc/parallel-1.c | 6 -
gcc/testsuite/c-c++-common/goacc/parallel-empty.c | 6 +
.../c-c++-common/goacc/parallel-eternal.c | 11 +
.../c-c++-common/goacc/parallel-noreturn.c | 12 +
gcc/testsuite/c-c++-common/goacc/reduction-1.c | 25 +-
gcc/testsuite/c-c++-common/goacc/reduction-2.c | 22 +-
gcc/testsuite/c-c++-common/goacc/reduction-3.c | 22 +-
gcc/testsuite/c-c++-common/goacc/reduction-4.c | 40 +-
gcc/testsuite/c-c++-common/goacc/routine-1.c | 35 +
gcc/testsuite/c-c++-common/goacc/routine-2.c | 36 +
gcc/testsuite/c-c++-common/goacc/routine-3.c | 52 +
gcc/testsuite/c-c++-common/goacc/routine-4.c | 87 ++
gcc/testsuite/c-c++-common/goacc/tile.c | 26 +
gcc/testsuite/g++.dg/goacc/template-reduction.C | 100 ++
gcc/testsuite/g++.dg/goacc/template.C | 131 ++
gcc/testsuite/gfortran.dg/goacc/cache-1.f95 | 1 -
gcc/testsuite/gfortran.dg/goacc/coarray.f95 | 2 +-
gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 | 1 +
gcc/testsuite/gfortran.dg/goacc/combined_loop.f90 | 2 +-
gcc/testsuite/gfortran.dg/goacc/cray.f95 | 1 -
gcc/testsuite/gfortran.dg/goacc/declare-1.f95 | 3 +-
gcc/testsuite/gfortran.dg/goacc/declare-2.f95 | 44 +
gcc/testsuite/gfortran.dg/goacc/default.f95 | 17 +
gcc/testsuite/gfortran.dg/goacc/dtype-1.f95 | 161 ++
gcc/testsuite/gfortran.dg/goacc/dtype-2.f95 | 39 +
gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 | 2 +-
gcc/testsuite/gfortran.dg/goacc/loop-1.f95 | 1 -
gcc/testsuite/gfortran.dg/goacc/loop-2.f95 | 26 +-
gcc/testsuite/gfortran.dg/goacc/modules.f95 | 55 +
gcc/testsuite/gfortran.dg/goacc/parameter.f95 | 1 -
gcc/testsuite/gfortran.dg/goacc/update.f95 | 5 +
libgomp/testsuite/
.../libgomp.oacc-c++/template-reduction.C | 102 ++
.../libgomp.oacc-c-c++-common/atomic_capture-1.c | 866 +++++++++++
.../libgomp.oacc-c-c++-common/atomic_capture-2.c | 1626 ++++++++++++++++++++
.../libgomp.oacc-c-c++-common/atomic_update-1.c | 760 +++++++++
.../libgomp.oacc-c-c++-common/clauses-1.c | 26 +
.../testsuite/libgomp.oacc-c-c++-common/data-2.c | 44 +-
.../testsuite/libgomp.oacc-c-c++-common/data-3.c | 18 +-
.../libgomp.oacc-c-c++-common/data-clauses.h | 202 +++
.../libgomp.oacc-c-c++-common/kernels-1.c | 182 +--
.../testsuite/libgomp.oacc-c-c++-common/lib-69.c | 70 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-70.c | 79 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-71.c | 55 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-72.c | 60 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-73.c | 64 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-74.c | 91 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-75.c | 89 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-76.c | 88 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-77.c | 91 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-78.c | 91 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-79.c | 91 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-80.c | 95 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-81.c | 106 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-82.c | 43 +-
.../testsuite/libgomp.oacc-c-c++-common/lib-83.c | 22 +-
.../libgomp.oacc-c-c++-common/parallel-1.c | 204 +--
.../libgomp.oacc-c-c++-common/routine-1.c | 40 +
.../libgomp.oacc-c-c++-common/routine-2.c | 41 +
libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h | 44 +-
.../testsuite/libgomp.oacc-c-c++-common/subr.ptx | 222 +--
.../testsuite/libgomp.oacc-c-c++-common/timer.h | 103 --
.../libgomp.oacc-fortran/atomic_capture-1.f90 | 784 ++++++++++
.../libgomp.oacc-fortran/atomic_update-1.f90 | 338 ++++
libgomp/testsuite/libgomp.oacc-fortran/cache-1.f90 | 26 +
.../testsuite/libgomp.oacc-fortran/clauses-1.f90 | 290 ++++
libgomp/testsuite/libgomp.oacc-fortran/data-1.f90 | 231 ++-
libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 | 50 +
libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 | 34 +-
.../testsuite/libgomp.oacc-fortran/data-4-2.f90 | 19 +-
libgomp/testsuite/libgomp.oacc-fortran/data-4.f90 | 19 +-
.../testsuite/libgomp.oacc-fortran/declare-1.f90 | 229 +++
libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90 | 24 +
libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 | 28 +
libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90 | 79 +
libgomp/testsuite/libgomp.oacc-fortran/lib-15.f90 | 52 +
.../testsuite/libgomp.oacc-fortran/routine-5.f90 | 27 +
diff --git gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c
index df45bcf..b38e181 100644
--- gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c
+++ gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c
@@ -1,4 +1,50 @@
void
+f_acc_data (void)
+{
+#pragma acc data
+ {
+ int i;
+#pragma omp atomic write
+ i = 0;
+ }
+}
+
+void
+f_acc_kernels (void)
+{
+#pragma acc kernels
+ {
+ int i;
+#pragma omp atomic write
+ i = 0;
+ }
+}
+
+void
+f_acc_loop (void)
+{
+ int i;
+
+#pragma acc loop
+ for (i = 0; i < 2; ++i)
+ {
+#pragma omp atomic write
+ i = 0;
+ }
+}
+
+void
+f_acc_parallel (void)
+{
+#pragma acc parallel
+ {
+ int i;
+#pragma omp atomic write
+ i = 0;
+ }
+}
+
+void
f_omp_parallel (void)
{
#pragma omp parallel
diff --git gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c
index 411fb5f..14c6aa6 100644
--- gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c
+++ gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c
@@ -216,12 +216,6 @@ f_acc_parallel (void)
#pragma acc parallel
{
-#pragma omp atomic write
- i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
- }
-
-#pragma acc parallel
- {
#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
;
}
@@ -286,12 +280,6 @@ f_acc_kernels (void)
#pragma acc kernels
{
-#pragma omp atomic write
- i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
- }
-
-#pragma acc kernels
- {
#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
;
}
@@ -356,12 +344,6 @@ f_acc_data (void)
#pragma acc data
{
-#pragma omp atomic write
- i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
- }
-
-#pragma acc data
- {
#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
;
}
@@ -434,13 +416,6 @@ f_acc_loop (void)
#pragma acc loop
for (i = 0; i < 2; ++i)
{
-#pragma omp atomic write
- i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
- }
-
-#pragma acc loop
- for (i = 0; i < 2; ++i)
- {
#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
;
}
diff --git gcc/testsuite/c-c++-common/goacc/asyncwait-1.c gcc/testsuite/c-c++-common/goacc/asyncwait-1.c
index ccc0106..c6b81b1 100644
--- gcc/testsuite/c-c++-common/goacc/asyncwait-1.c
+++ gcc/testsuite/c-c++-common/goacc/asyncwait-1.c
@@ -116,7 +116,7 @@ f (int N, float *a, float *b)
}
#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1 /* { dg-error "expected '\\\)' before end of line" } */
- /* { dg-error "expected integer expression before '\\\)'" "" { target c++ } 118 } */
+ /* { dg-error "expected integer expression list before" "" { target c++ } 118 } */
{
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
@@ -171,7 +171,7 @@ f (int N, float *a, float *b)
#pragma acc wait (1,2,,) /* { dg-error "expected (primary-|)expression before" } */
#pragma acc wait (1 /* { dg-error "expected '\\\)' before end of line" } */
- /* { dg-error "expected integer expression before '\\\)'" "" { target c++ } 173 } */
+ /* { dg-error "expected integer expression list before" "" { target c++ } 173 } */
#pragma acc wait (1,*) /* { dg-error "expected (primary-|)expression before" } */
diff --git gcc/testsuite/c-c++-common/goacc/data-2.c gcc/testsuite/c-c++-common/goacc/data-2.c
index a67d8a4..1043bf8a 100644
--- gcc/testsuite/c-c++-common/goacc/data-2.c
+++ gcc/testsuite/c-c++-common/goacc/data-2.c
@@ -10,12 +10,14 @@ foo (void)
#pragma acc exit data delete (a) if (0)
#pragma acc exit data copyout (b) if (a)
#pragma acc exit data delete (b)
-#pragma acc enter /* { dg-error "expected 'data' in" } */
-#pragma acc exit /* { dg-error "expected 'data' in" } */
+#pragma acc enter /* { dg-error "expected 'data' after" } */
+#pragma acc exit /* { dg-error "expected 'data' after" } */
#pragma acc enter data /* { dg-error "has no data movement clause" } */
-#pragma acc exit data /* { dg-error "has no data movement clause" } */
-#pragma acc enter Data /* { dg-error "invalid pragma before" } */
-#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */
+#pragma acc exit data /* { dg-error "no data movement clause" } */
+#pragma acc enter Data /* { dg-error "expected 'data' after" } */
+#pragma acc exit copyout (b) /* { dg-error "expected 'data' after" } */
+#pragma acc enter for /* { dg-error "expected 'data' after" } */
+#pragma acc enter data2 /* { dg-error "expected 'data' after" } */
}
/* { dg-error "has no data movement clause" "" { target *-*-* } 8 } */
diff --git gcc/testsuite/c-c++-common/goacc/declare-1.c gcc/testsuite/c-c++-common/goacc/declare-1.c
new file mode 100644
index 0000000..cf50f02
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/declare-1.c
@@ -0,0 +1,84 @@
+/* Test valid uses of declare directive. */
+/* { dg-do compile } */
+/* { dg-skip-if "not yet" { c++ } } */
+
+int v0;
+#pragma acc declare create(v0)
+
+int v1;
+#pragma acc declare copyin(v1)
+
+int *v2;
+#pragma acc declare deviceptr(v2)
+
+int v3;
+#pragma acc declare device_resident(v3)
+
+int v4;
+#pragma acc declare link(v4)
+
+int v5, v6, v7, v8;
+#pragma acc declare create(v5, v6) copyin(v7, v8)
+
+void
+f (void)
+{
+ int va0;
+#pragma acc declare create(va0)
+
+ int va1;
+#pragma acc declare copyin(va1)
+
+ int *va2;
+#pragma acc declare deviceptr(va2)
+
+ int va3;
+#pragma acc declare device_resident(va3)
+
+ extern int ve0;
+#pragma acc declare create(ve0)
+
+ extern int ve1;
+#pragma acc declare copyin(ve1)
+
+ extern int *ve2;
+#pragma acc declare deviceptr(ve2)
+
+ extern int ve3;
+#pragma acc declare device_resident(ve3)
+
+ extern int ve4;
+#pragma acc declare link(ve4)
+
+ int va5;
+#pragma acc declare copy(va5)
+
+ int va6;
+#pragma acc declare copyout(va6)
+
+ int va7;
+#pragma acc declare present(va7)
+
+ int va8;
+#pragma acc declare present_or_copy(va8)
+
+ int va9;
+#pragma acc declare present_or_copyin(va9)
+
+ int va10;
+#pragma acc declare present_or_copyout(va10)
+
+ int va11;
+#pragma acc declare present_or_create(va11)
+
+ a:
+ {
+ int va0;
+#pragma acc declare create(va0)
+ if (v1)
+ goto a;
+ else
+ goto b;
+ }
+ b:;
+}
diff --git gcc/testsuite/c-c++-common/goacc/declare-2.c gcc/testsuite/c-c++-common/goacc/declare-2.c
new file mode 100644
index 0000000..a2b5d6f
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/declare-2.c
@@ -0,0 +1,67 @@
+/* Test invalid uses of declare directive. */
+/* { dg-do compile } */
+/* { dg-skip-if "not yet" { c++ } } */
+
+#pragma acc declare /* { dg-error "no valid clauses" } */
+
+#pragma acc declare create(undeclared) /* { dg-error "undeclared" } */
+/* { dg-error "no valid clauses" "second error" { target *-*-* } 7 } */
+
+int v0[10];
+#pragma acc declare create(v0[1:3]) /* { dg-error "subarray" } */
+
+int v1;
+#pragma acc declare create(v1, v1) /* { dg-error "more than once" } */
+
+int v2;
+#pragma acc declare create(v2) /* { dg-message "previous directive" } */
+#pragma acc declare copyin(v2) /* { dg-error "more than once" } */
+
+int v3;
+#pragma acc declare copy(v3) /* { dg-error "at file scope" } */
+
+int v4;
+#pragma acc declare copyout(v4) /* { dg-error "at file scope" } */
+
+int v5;
+#pragma acc declare present(v5) /* { dg-error "at file scope" } */
+
+int v6;
+#pragma acc declare present_or_copy(v6) /* { dg-error "at file scope" } */
+
+int v7;
+#pragma acc declare present_or_copyin(v7) /* { dg-error "at file scope" } */
+
+int v8;
+#pragma acc declare present_or_copyout(v8) /* { dg-error "at file scope" } */
+
+int v9;
+#pragma acc declare present_or_create(v9) /* { dg-error "at file scope" } */
+
+void
+f (void)
+{
+ int va0;
+#pragma acc declare link(va0) /* { dg-error "invalid variable" } */
+
+ extern int ve0;
+#pragma acc declare copy(ve0) /* { dg-error "invalid use of" } */
+
+ extern int ve1;
+#pragma acc declare copyout(ve1) /* { dg-error "invalid use of" } */
+
+ extern int ve2;
+#pragma acc declare present(ve2) /* { dg-error "invalid use of" } */
+
+ extern int ve3;
+#pragma acc declare present_or_copy(ve3) /* { dg-error "invalid use of" } */
+
+ extern int ve4;
+#pragma acc declare present_or_copyin(ve4) /* { dg-error "invalid use of" } */
+
+ extern int ve5;
+#pragma acc declare present_or_copyout(ve5) /* { dg-error "invalid use of" } */
+
+ extern int ve6;
+#pragma acc declare present_or_create(ve6) /* { dg-error "invalid use of" } */
+}
diff --git gcc/testsuite/c-c++-common/goacc/dtype-1.c gcc/testsuite/c-c++-common/goacc/dtype-1.c
new file mode 100644
index 0000000..2b4569e
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/dtype-1.c
@@ -0,0 +1,113 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenacc -fdump-tree-omplower" } */
+
+void
+test ()
+{
+ int i1;
+
+ /* ACC PARALLEL DEVICE_TYPE: */
+
+#pragma acc parallel device_type (nVidia) async (1) num_gangs (100) num_workers (100) vector_length (32) wait (1)
+ {
+ }
+
+#pragma acc parallel async (1) num_gangs (1) num_workers (1) vector_length (1) wait (1) dtype (nvidia) async (2) num_gangs (200) num_workers (200) vector_length (64) wait (2)
+ {
+ }
+
+#pragma acc parallel async (1) num_gangs (1) num_workers (1) vector_length (1) wait (1) dtype (nvidia) async (3) num_gangs (300) num_workers (300) vector_length (128) wait (3) device_type (*) async (10) num_gangs (10) num_workers (10) vector_length (10) wait (10)
+ {
+ }
+
+#pragma acc parallel async (1) num_gangs (1) num_workers (1) vector_length (1) wait (1) device_type (nvidia_ptx) async (3) num_gangs (300) num_workers (300) vector_length (128) wait (3) dtype (*) async (10) num_gangs (10) num_workers (10) vector_length (10) wait (10)
+ {
+ }
+
+ /* ACC KERNELS DEVICE_TYPE: */
+
+#pragma acc kernels device_type (nvidia) async wait
+ {
+ }
+
+#pragma acc kernels async wait dtype (nvidia) async (1) wait (1)
+ {
+ }
+
+#pragma acc kernels async wait dtype (nvidia) async (2) wait (2) device_type (*) async (0) wait (0)
+ {
+ }
+
+#pragma acc kernels async wait device_type (nvidia_ptx) async (1) wait (1) dtype (*) async (0) wait (0)
+ {
+ }
+
+ /* ACC LOOP DEVICE_TYPE: */
+
+#pragma acc parallel
+#pragma acc loop dtype (nVidia) gang
+ for (i1 = 1; i1 < 10; i1++)
+ {
+ }
+
+#pragma acc parallel
+#pragma acc loop device_type (nVidia) gang dtype (*) worker
+ for (i1 = 1; i1 < 10; i1++)
+ {
+ }
+
+#pragma acc parallel
+#pragma acc loop dtype (nVidiaGPU) gang device_type (*) vector
+ for (i1 = 1; i1 < 10; i1++)
+ {
+ }
+
+ /* ACC UPDATE DEVICE_TYPE: */
+
+#pragma acc update host(i1) async(1) wait (1)
+
+#pragma acc update host(i1) device_type(nvidia) async(2) wait (2)
+
+#pragma acc update host(i1) async(1) wait (1) device_type(nvidia) async(3) wait (3)
+
+#pragma acc update host(i1) async(4) wait (4) device_type(nvidia) async(5) wait (5) dtype (*) async (6) wait (6)
+
+#pragma acc update host(i1) async(4) wait (4) dtype(nvidia1) async(5) wait (5) dtype (*) async (6) wait (6)
+}
+
+/* ACC ROUTINE DEVICE_TYPE: */
+
+#pragma acc routine (foo1) device_type (nvidia) gang
+#pragma acc routine (foo2) device_type (nvidia) worker
+#pragma acc routine (foo3) dtype (nvidia) vector
+#pragma acc routine (foo5) device_type (nvidia) bind (foo)
+#pragma acc routine (foo6) device_type (nvidia) gang device_type (*) worker
+#pragma acc routine (foo7) dtype (nvidia) worker dtype (*) vector
+#pragma acc routine (foo8) dtype (nvidia) vector device_type (*) gang
+#pragma acc routine (foo9) device_type (nvidia) vector device_type (*) worker
+#pragma acc routine (foo10) device_type (nvidia) bind (foo) dtype (*) gang
+#pragma acc routine (foo11) device_type (gpu) gang device_type (*) worker
+#pragma acc routine (foo12) device_type (gpu) worker dtype (*) worker
+#pragma acc routine (foo13) device_type (gpu) vector device_type (*) worker
+#pragma acc routine (foo14) dtype (gpu) worker dtype (*) worker
+#pragma acc routine (foo15) dtype (gpu) bind (foo) dtype (*) gang
+
+/* { dg-final { scan-tree-dump-times "oacc_parallel wait\\(1\\) vector_length\\(32\\) num_workers\\(100\\) num_gangs\\(100\\) async\\(1\\)" 1 "omplower" } } */
+
+/* { dg-final { scan-tree-dump-times "oacc_parallel wait\\(1\\) vector_length\\(1\\) num_workers\\(1\\) num_gangs\\(1\\) async\\(1\\) wait\\(2\\) vector_length\\(64\\) num_workers\\(200\\) num_gangs\\(200\\) async\\(2\\)" 1 "omplower" } } */
+
+/* { dg-final { scan-tree-dump-times "acc_parallel wait\\(1\\) vector_length\\(1\\) num_workers\\(1\\) num_gangs\\(1\\) async\\(1\\) wait\\(3\\) vector_length\\(128\\) num_workers\\(300\\) num_gangs\\(300\\) async\\(3" 1 "omplower" } } */
+
+/* { dg-final { scan-tree-dump-times "oacc_kernels async\\(-1\\)" 4 "omplower" } } */
+
+/* { dg-final { scan-tree-dump-times "oacc_kernels async\\(-1\\) wait\\(2\\) async\\(2\\)" 1 "omplower" } } */
+
+/* { dg-final { scan-tree-dump-times "oacc_kernels async\\(-1\\) wait\\(0\\) async\\(0\\)" 1 "omplower" } } */
+
+/* { dg-final { scan-tree-dump-times "acc loop gang private\\(i1.0\\) private\\(i1\\)" 1 "omplower" } } */
+
+/* { dg-final { scan-tree-dump-times "acc loop gang private\\(i1.1\\) private\\(i1\\)" 1 "omplower" } } */
+
+/* { dg-final { scan-tree-dump-times "acc loop vector private\\(i1.2\\) private\\(i1\\)" 1 "omplower" } } */
+
+/* { dg-final { cleanup-tree-dump "omplower" } } */
diff --git gcc/testsuite/c-c++-common/goacc/dtype-2.c gcc/testsuite/c-c++-common/goacc/dtype-2.c
new file mode 100644
index 0000000..b0bd247
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/dtype-2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+
+void
+test ()
+{
+ int i1, i2;
+
+ /* ACC PARALLEL DEVICE_TYPE: */
+
+#pragma acc parallel dtype (nVidia) async (1) num_gangs (100) num_workers (100) vector_length (32) wait (1) copy (i1) /* { dg-error "not valid" } */
+ {
+ }
+
+ /* ACC KERNELS DEVICE_TYPE: */
+
+#pragma acc kernels device_type (nvidia) async wait copy (i1) /* { dg-error "not valid" } */
+ {
+ }
+
+ /* ACC LOOP DEVICE_TYPE: */
+
+#pragma acc parallel
+#pragma acc loop device_type (nVidia) gang private (i2) /* { dg-error "not valid" } */
+ for (i1 = 1; i1 < 10; i1++)
+ {
+ }
+
+ /* ACC UPDATE DEVICE_TYPE: */
+
+#pragma acc update host(i1) dtype (nvidia) async(1) wait (1) self (i2) /* { dg-error "not valid" } */
+}
diff --git gcc/testsuite/c-c++-common/goacc/host_data-1.c gcc/testsuite/c-c++-common/goacc/host_data-1.c
new file mode 100644
index 0000000..5e8240f
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/host_data-1.c
@@ -0,0 +1,14 @@
+/* Test valid use of host_data directive. */
+/* { dg-do compile } */
+
+int v0;
+int v1[3][3];
+
+void
+f (void)
+{
+ int v2 = 3;
+#pragma acc host_data use_device(v2, v0, v1)
+ ;
+}
+/* { dg-bogus "sorry, unimplemented: directive not yet implemented" "host_data" { xfail *-*-* } 11 } */
diff --git gcc/testsuite/c-c++-common/goacc/host_data-2.c gcc/testsuite/c-c++-common/goacc/host_data-2.c
new file mode 100644
index 0000000..92fa97b
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/host_data-2.c
@@ -0,0 +1,14 @@
+/* Test invalid use of host_data directive. */
+/* { dg-do compile } */
+
+int v0;
+#pragma acc host_data use_device(v0) /* { dg-error "expected" } */
+
+void
+f (void)
+{
+ int v2 = 3;
+#pragma acc host_data copy(v2) /* { dg-error "not valid for" } */
+ ;
+}
+/* { dg-bogus "sorry, unimplemented: directive not yet implemented" "host_data" { xfail *-*-* } 11 } */
diff --git gcc/testsuite/c-c++-common/goacc/host_data-3.c gcc/testsuite/c-c++-common/goacc/host_data-3.c
new file mode 100644
index 0000000..580f566
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/host_data-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+
+int main (int argc, char* argv[])
+{
+ int x = 5, y;
+
+ #pragma acc enter data copyin (x)
+ #pragma acc host_data use_device (x)
+ {
+ y = x;
+ }
+ #pragma acc exit data delete (x)
+
+ return y - 5;
+}
+/* { dg-bogus "sorry, unimplemented: directive not yet implemented" "host_data" { xfail *-*-* } 8 } */
diff --git gcc/testsuite/c-c++-common/goacc/host_data-4.c gcc/testsuite/c-c++-common/goacc/host_data-4.c
new file mode 100644
index 0000000..61b1c5b
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/host_data-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+int main (int argc, char* argv[])
+{
+ int x[100];
+
+ #pragma acc enter data copyin (x)
+ /* Specifying an array index is not valid for host_data/use_device. */
+ #pragma acc host_data use_device (x[4]) /* { dg-error "expected \\\')' before '\\\[' token" } */
+ ;
+ #pragma acc exit data delete (x)
+
+ return 0;
+}
+/* { dg-bogus "sorry, unimplemented: directive not yet implemented" "host_data" { xfail *-*-* } 9 } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-1.c gcc/testsuite/c-c++-common/goacc/kernels-1.c
deleted file mode 100644
index e91b81c..0000000
--- gcc/testsuite/c-c++-common/goacc/kernels-1.c
+++ /dev/null
@@ -1,6 +0,0 @@
-void
-foo (void)
-{
-#pragma acc kernels
- ;
-}
diff --git gcc/testsuite/c-c++-common/goacc/kernels-empty.c gcc/testsuite/c-c++-common/goacc/kernels-empty.c
new file mode 100644
index 0000000..e91b81c
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/kernels-empty.c
@@ -0,0 +1,6 @@
+void
+foo (void)
+{
+#pragma acc kernels
+ ;
+}
diff --git gcc/testsuite/c-c++-common/goacc/kernels-eternal.c gcc/testsuite/c-c++-common/goacc/kernels-eternal.c
new file mode 100644
index 0000000..edc17d2
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/kernels-eternal.c
@@ -0,0 +1,11 @@
+int
+main (void)
+{
+#pragma acc kernels
+ {
+ while (1)
+ ;
+ }
+
+ return 0;
+}
diff --git gcc/testsuite/c-c++-common/goacc/kernels-noreturn.c gcc/testsuite/c-c++-common/goacc/kernels-noreturn.c
new file mode 100644
index 0000000..1a8cc67
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/kernels-noreturn.c
@@ -0,0 +1,12 @@
+int
+main (void)
+{
+
+#pragma acc kernels
+ {
+ __builtin_abort ();
+ }
+
+ return 0;
+}
+
diff --git gcc/testsuite/c-c++-common/goacc/loop-1.c gcc/testsuite/c-c++-common/goacc/loop-1.c
index fea40e0..5e1a248 100644
--- gcc/testsuite/c-c++-common/goacc/loop-1.c
+++ gcc/testsuite/c-c++-common/goacc/loop-1.c
@@ -1,5 +1,3 @@
-/* { dg-skip-if "not yet" { c++ } } */
-
int test1()
{
int i, j, k, b[10];
diff --git gcc/testsuite/c-c++-common/goacc/parallel-1.c gcc/testsuite/c-c++-common/goacc/parallel-1.c
deleted file mode 100644
index a860526..0000000
--- gcc/testsuite/c-c++-common/goacc/parallel-1.c
+++ /dev/null
@@ -1,6 +0,0 @@
-void
-foo (void)
-{
-#pragma acc parallel
- ;
-}
diff --git gcc/testsuite/c-c++-common/goacc/parallel-empty.c gcc/testsuite/c-c++-common/goacc/parallel-empty.c
new file mode 100644
index 0000000..a860526
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/parallel-empty.c
@@ -0,0 +1,6 @@
+void
+foo (void)
+{
+#pragma acc parallel
+ ;
+}
diff --git gcc/testsuite/c-c++-common/goacc/parallel-eternal.c gcc/testsuite/c-c++-common/goacc/parallel-eternal.c
new file mode 100644
index 0000000..51eac76
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/parallel-eternal.c
@@ -0,0 +1,11 @@
+int
+main (void)
+{
+#pragma acc parallel
+ {
+ while (1)
+ ;
+ }
+
+ return 0;
+}
diff --git gcc/testsuite/c-c++-common/goacc/parallel-noreturn.c gcc/testsuite/c-c++-common/goacc/parallel-noreturn.c
new file mode 100644
index 0000000..ec840bd
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/parallel-noreturn.c
@@ -0,0 +1,12 @@
+int
+main (void)
+{
+
+#pragma acc parallel
+ {
+ __builtin_abort ();
+ }
+
+ return 0;
+}
+
diff --git gcc/testsuite/c-c++-common/goacc/reduction-1.c gcc/testsuite/c-c++-common/goacc/reduction-1.c
index 0f50082..8f7c70d 100644
--- gcc/testsuite/c-c++-common/goacc/reduction-1.c
+++ gcc/testsuite/c-c++-common/goacc/reduction-1.c
@@ -22,20 +22,17 @@ main(void)
for (i = 0; i < n; i++)
result *= array[i];
-// result = 0;
-// vresult = 0;
-//
-// /* 'max' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result > array[i] ? result : array[i];
-//
-// /* 'min' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result < array[i] ? result : array[i];
+ /* 'max' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (max:result)
+ for (i = 0; i < n; i++)
+ result = result > array[i] ? result : array[i];
+
+ /* 'min' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (min:result)
+ for (i = 0; i < n; i++)
+ result = result < array[i] ? result : array[i];
/* '&' reductions. */
#pragma acc parallel vector_length (vl)
diff --git gcc/testsuite/c-c++-common/goacc/reduction-2.c gcc/testsuite/c-c++-common/goacc/reduction-2.c
index 1f95138..7ff125f 100644
--- gcc/testsuite/c-c++-common/goacc/reduction-2.c
+++ gcc/testsuite/c-c++-common/goacc/reduction-2.c
@@ -22,17 +22,17 @@ main(void)
for (i = 0; i < n; i++)
result *= array[i];
-// /* 'max' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result > array[i] ? result : array[i];
-//
-// /* 'min' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result < array[i] ? result : array[i];
+ /* 'max' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (max:result)
+ for (i = 0; i < n; i++)
+ result = result > array[i] ? result : array[i];
+
+ /* 'min' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (min:result)
+ for (i = 0; i < n; i++)
+ result = result < array[i] ? result : array[i];
/* '&&' reductions. */
#pragma acc parallel vector_length (vl)
diff --git gcc/testsuite/c-c++-common/goacc/reduction-3.c gcc/testsuite/c-c++-common/goacc/reduction-3.c
index 476e375..cd44559 100644
--- gcc/testsuite/c-c++-common/goacc/reduction-3.c
+++ gcc/testsuite/c-c++-common/goacc/reduction-3.c
@@ -22,17 +22,17 @@ main(void)
for (i = 0; i < n; i++)
result *= array[i];
-// /* 'max' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result > array[i] ? result : array[i];
-//
-// /* 'min' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result < array[i] ? result : array[i];
+ /* 'max' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (max:result)
+ for (i = 0; i < n; i++)
+ result = result > array[i] ? result : array[i];
+
+ /* 'min' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (min:result)
+ for (i = 0; i < n; i++)
+ result = result < array[i] ? result : array[i];
/* '&&' reductions. */
#pragma acc parallel vector_length (vl)
diff --git gcc/testsuite/c-c++-common/goacc/reduction-4.c gcc/testsuite/c-c++-common/goacc/reduction-4.c
index 73dde86..ec3a9c9 100644
--- gcc/testsuite/c-c++-common/goacc/reduction-4.c
+++ gcc/testsuite/c-c++-common/goacc/reduction-4.c
@@ -16,25 +16,29 @@ main(void)
for (i = 0; i < n; i++)
result += array[i];
- /* Needs support for complex multiplication. */
+ /* '*' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (*:result)
+ for (i = 0; i < n; i++)
+ result *= array[i];
-// /* '*' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (*:result)
-// for (i = 0; i < n; i++)
-// result *= array[i];
-//
-// /* 'max' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result > array[i] ? result : array[i];
-//
-// /* 'min' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result < array[i] ? result : array[i];
+ /* 'max' reductions. */
+#if 0
+ // error: 'result' has invalid type for 'reduction(max)'
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (max:result)
+ for (i = 0; i < n; i++)
+ result = result > array[i] ? result : array[i];
+#endif
+
+ /* 'min' reductions. */
+#if 0
+ // error: 'result' has invalid type for 'reduction(min)'
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (min:result)
+ for (i = 0; i < n; i++)
+ result = result < array[i] ? result : array[i];
+#endif
/* '&&' reductions. */
#pragma acc parallel vector_length (vl)
diff --git gcc/testsuite/c-c++-common/goacc/routine-1.c gcc/testsuite/c-c++-common/goacc/routine-1.c
new file mode 100644
index 0000000..1f89fdb
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/routine-1.c
@@ -0,0 +1,35 @@
+void *malloc (__SIZE_TYPE__);
+void free (void *);
+
+#pragma acc routine
+int
+fact (int n)
+{
+ if (n == 0 || n == 1)
+ return 1;
+
+ return n * fact (n - 1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int *a, i, n = 10;
+
+ a = (int *)malloc (sizeof (int) * n);
+
+#pragma acc parallel copy (a[0:n]) vector_length (5)
+ {
+#pragma acc loop
+ for (i = 0; i < n; i++)
+ a[i] = fact (i);
+ }
+
+ for (i = 0; i < n; i++)
+ if (fact (i) != a[i])
+ return -1;
+
+ free (a);
+
+ return 0;
+}
diff --git gcc/testsuite/c-c++-common/goacc/routine-2.c gcc/testsuite/c-c++-common/goacc/routine-2.c
new file mode 100644
index 0000000..fe2e7f7
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/routine-2.c
@@ -0,0 +1,36 @@
+void *malloc (__SIZE_TYPE__);
+void free (void *);
+
+#pragma acc routine (fact)
+
+int
+fact (int n)
+{
+ if (n == 0 || n == 1)
+ return 1;
+
+ return n * fact (n - 1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int *a, i, n = 10;
+
+ a = (int *)malloc (sizeof (int) * n);
+
+#pragma acc parallel copy (a[0:n]) vector_length (5)
+ {
+#pragma acc loop
+ for (i = 0; i < n; i++)
+ a[i] = fact (i);
+ }
+
+ for (i = 0; i < n; i++)
+ if (fact (i) != a[i])
+ return -1;
+
+ free (a);
+
+ return 0;
+}
diff --git gcc/testsuite/c-c++-common/goacc/routine-3.c gcc/testsuite/c-c++-common/goacc/routine-3.c
new file mode 100644
index 0000000..e35dfc1
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/routine-3.c
@@ -0,0 +1,52 @@
+/* Test valid use of clauses with routine. */
+/* { dg-do compile } */
+
+#pragma acc routine gang
+void
+f1 (void)
+{
+}
+
+#pragma acc routine worker
+void
+f2 (void)
+{
+}
+
+#pragma acc routine vector
+void
+f3 (void)
+{
+}
+
+#pragma acc routine seq
+void
+f4 (void)
+{
+}
+
+#pragma acc routine bind (f4a)
+void
+f5 (void)
+{
+}
+
+typedef int T;
+
+#pragma acc routine bind (T)
+void
+f6 (void)
+{
+}
+
+#pragma acc routine bind ("f7a")
+void
+f7 (void)
+{
+}
+
+#pragma acc routine nohost
+void
+f8 (void)
+{
+}
diff --git gcc/testsuite/c-c++-common/goacc/routine-4.c gcc/testsuite/c-c++-common/goacc/routine-4.c
new file mode 100644
index 0000000..682d901
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/routine-4.c
@@ -0,0 +1,87 @@
+/* Test invalid use of clauses with routine. */
+/* { dg-do compile } */
+
+#pragma acc routine gang worker /* { dg-error "invalid combination" } */
+void
+f1 (void)
+{
+}
+
+#pragma acc routine worker gang /* { dg-error "invalid combination" } */
+void
+f1a (void)
+{
+}
+
+#pragma acc routine gang vector /* { dg-error "invalid combination" } */
+void
+f2 (void)
+{
+}
+
+#pragma acc routine vector gang /* { dg-error "invalid combination" } */
+void
+f2a (void)
+{
+}
+
+#pragma acc routine gang seq /* { dg-error "invalid combination" } */
+void
+f3 (void)
+{
+}
+
+#pragma acc routine seq gang /* { dg-error "invalid combination" } */
+void
+f3a (void)
+{
+}
+
+#pragma acc routine worker vector /* { dg-error "invalid combination" } */
+void
+f4 (void)
+{
+}
+
+#pragma acc routine vector worker /* { dg-error "invalid combination" } */
+void
+f4a (void)
+{
+}
+
+#pragma acc routine worker seq /* { dg-error "invalid combination" } */
+void
+f5 (void)
+{
+}
+
+#pragma acc routine seq worker /* { dg-error "invalid combination" } */
+void
+f5a (void)
+{
+}
+
+#pragma acc routine vector seq /* { dg-error "invalid combination" } */
+void
+f6 (void)
+{
+}
+
+#pragma acc routine seq vector /* { dg-error "invalid combination" } */
+void
+f6a (void)
+{
+}
+
+#pragma acc routine (g1) gang worker /* { dg-error "invalid combination" } */
+#pragma acc routine (g2) worker gang /* { dg-error "invalid combination" } */
+#pragma acc routine (g3) gang vector /* { dg-error "invalid combination" } */
+#pragma acc routine (g4) vector gang /* { dg-error "invalid combination" } */
+#pragma acc routine (g5) gang seq /* { dg-error "invalid combination" } */
+#pragma acc routine (g6) seq gang /* { dg-error "invalid combination" } */
+#pragma acc routine (g7) worker vector /* { dg-error "invalid combination" } */
+#pragma acc routine (g8) vector worker /* { dg-error "invalid combination" } */
+#pragma acc routine (g9) worker seq /* { dg-error "invalid combination" } */
+#pragma acc routine (g10) seq worker /* { dg-error "invalid combination" } */
+#pragma acc routine (g11) vector seq /* { dg-error "invalid combination" } */
+#pragma acc routine (g12) seq vector /* { dg-error "invalid combination" } */
diff --git gcc/testsuite/c-c++-common/goacc/tile.c gcc/testsuite/c-c++-common/goacc/tile.c
new file mode 100644
index 0000000..e127955
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/tile.c
@@ -0,0 +1,26 @@
+int
+main ()
+{
+ int i;
+
+#pragma acc parallel loop tile (10)
+ for (i = 0; i < 100; i++)
+ ;
+
+#pragma acc parallel loop tile (*)
+ for (i = 0; i < 100; i++)
+ ;
+
+#pragma acc parallel loop tile (10, *)
+ for (i = 0; i < 100; i++)
+ ;
+
+#pragma acc parallel loop tile (10, *, i) /* { dg-error "positive constant integer expression" } */
+ for (i = 0; i < 100; i++)
+ ;
+
+ return 0;
+}
+/* { dg-bogus "sorry, unimplemented: Clause not supported yet" "tile" { xfail *-*-* } 6 } */
+/* { dg-bogus "sorry, unimplemented: Clause not supported yet" "tile" { xfail *-*-* } 10 } */
+/* { dg-bogus "sorry, unimplemented: Clause not supported yet" "tile" { xfail *-*-* } 14 } */
diff --git gcc/testsuite/g++.dg/goacc/template-reduction.C gcc/testsuite/g++.dg/goacc/template-reduction.C
new file mode 100644
index 0000000..3618c02
--- /dev/null
+++ gcc/testsuite/g++.dg/goacc/template-reduction.C
@@ -0,0 +1,100 @@
+extern void abort ();
+
+const int n = 100;
+
+// Check explicit template copy map
+
+template<typename T> T
+sum (T array[])
+{
+ T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s, array[0:n])
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+ return s;
+}
+
+// Check implicit template copy map
+
+template<typename T> T
+sum ()
+{
+ T s = 0;
+ T array[n];
+
+ for (int i = 0; i < n; i++)
+ array[i] = i+1;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s)
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+ return s;
+}
+
+// Check present and async
+
+template<typename T> T
+async_sum (T array[])
+{
+ T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang async (1) present (array[0:n])
+ for (int i = 0; i < n; i++)
+ array[i] = i+1;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) copy (s) async wait (1)
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+#pragma acc wait
+
+ return s;
+}
+
+// Check present and async
+
+template<typename T> T
+async_sum (int c)
+{
+ T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy(s) async wait (1)
+ for (int i = 0; i < n; i++)
+ s += i;
+
+#pragma acc wait
+
+ return s;
+}
+
+int
+main()
+{
+ int a[n];
+ int result = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = i+1;
+ result += i+1;
+ }
+
+ if (sum (a) != result)
+ abort ();
+
+ if (sum<int> () != result)
+ abort ();
+
+#pragma acc enter data copyin (a)
+ if (async_sum (a) != result)
+ abort ();
+
+ if (async_sum<int> (1) != result)
+ abort ();
+#pragma acc exit data delete (a)
+
+ return 0;
+}
diff --git gcc/testsuite/g++.dg/goacc/template.C gcc/testsuite/g++.dg/goacc/template.C
new file mode 100644
index 0000000..497c004
--- /dev/null
+++ gcc/testsuite/g++.dg/goacc/template.C
@@ -0,0 +1,131 @@
+#include <cstdio>
+
+#pragma acc routine
+template <typename T> T
+accDouble(int val)
+{
+ return val * 2;
+}
+
+template<typename T> T
+oacc_parallel_copy (T a)
+{
+ T b = 0;
+ char w = 1;
+ int x = 2;
+ float y = 3;
+ double z = 4;
+
+#pragma acc parallel num_gangs (a) num_workers (a) vector_length (a) default (none) copyout (b) copyin (a)
+ {
+ b = a;
+ }
+
+#pragma acc parallel num_gangs (a) copy (w, x, y, z)
+ {
+ w = accDouble<char>(w);
+ x = accDouble<int>(x);
+ y = accDouble<float>(y);
+ z = accDouble<double>(z);
+ }
+
+#pragma acc parallel num_gangs (a) if (1)
+ {
+#pragma acc loop independent collapse (2) device_type (nvidia) gang
+ for (int i = 0; i < a; i++)
+ for (int j = 0; j < 5; j++)
+ b = a;
+ }
+
+ T c;
+
+#pragma acc parallel num_workers (10)
+ {
+#pragma acc atomic capture
+ c = b++;
+
+#pragma atomic update
+ c++;
+
+#pragma acc atomic read
+ b = a;
+
+#pragma acc atomic write
+ b = a;
+ }
+
+#pragma acc parallel reduction (+:c)
+ {
+ c = 1;
+ }
+
+#pragma acc data if (1) copy (b)
+ {
+ #pragma acc parallel
+ {
+ b = a;
+ }
+ }
+
+#pragma acc enter data copyin (b)
+#pragma acc parallel present (b)
+ {
+ b = a;
+ }
+
+#pragma acc update host (b)
+#pragma acc update self (b)
+#pragma acc update device (b)
+#pragma acc exit data delete (b)
+
+ return b;
+}
+
+template<typename T> T
+oacc_kernels_copy (T a)
+{
+ T b = 0;
+ T c = 0;
+ char w = 1;
+ int x = 2;
+ float y = 3;
+ double z = 4;
+
+#pragma acc kernels copy (w, x, y, z)
+ {
+ w = accDouble<char>(w);
+ x = accDouble<int>(x);
+ y = accDouble<float>(y);
+ z = accDouble<double>(z);
+ }
+
+#pragma acc kernels copyout (b) copyin (a)
+ b = a;
+
+#pragma acc data if (1) copy (b)
+ {
+ #pragma acc kernels
+ {
+ b = a;
+ }
+ }
+
+#pragma acc enter data copyin (b)
+#pragma acc kernels present (b)
+ {
+ b = a;
+ }
+ return b;
+}
+
+int
+main ()
+{
+ int b = oacc_parallel_copy<int> (5);
+ int c = oacc_kernels_copy<int> (5);
+
+ printf ("b = %d\n", b);
+ printf ("c = %d\n", c);
+
+ return 0;
+}
diff --git gcc/testsuite/gfortran.dg/goacc/cache-1.f95 gcc/testsuite/gfortran.dg/goacc/cache-1.f95
index 746cf02..74ab332 100644
--- gcc/testsuite/gfortran.dg/goacc/cache-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/cache-1.f95
@@ -9,4 +9,3 @@ program test
!$acc cache (d)
enddo
end
-! { dg-prune-output "unimplemented" }
diff --git gcc/testsuite/gfortran.dg/goacc/coarray.f95 gcc/testsuite/gfortran.dg/goacc/coarray.f95
index 4f1224e..08e4004 100644
--- gcc/testsuite/gfortran.dg/goacc/coarray.f95
+++ gcc/testsuite/gfortran.dg/goacc/coarray.f95
@@ -32,4 +32,4 @@ contains
!$acc update self (a)
end subroutine oacc1
end module test
-! { dg-prune-output "ACC cache unimplemented" }
+! { dg-bogus "sorry, unimplemented: directive not yet implemented" "host_data" { xfail *-*-* } 19 }
diff --git gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 gcc/testsuite/gfortran.dg/goacc/coarray_2.f90
index f35d4b9..06a2bed 100644
--- gcc/testsuite/gfortran.dg/goacc/coarray_2.f90
+++ gcc/testsuite/gfortran.dg/goacc/coarray_2.f90
@@ -2,6 +2,7 @@
! { dg-additional-options "-fcoarray=lib" }
!
! PR fortran/63861
+! { dg-xfail-if "<http://gcc.gnu.org/PR63861>" { *-*-* } } */
module test
contains
diff --git gcc/testsuite/gfortran.dg/goacc/combined_loop.f90 gcc/testsuite/gfortran.dg/goacc/combined_loop.f90
index b8be649..58aaa4f 100644
--- gcc/testsuite/gfortran.dg/goacc/combined_loop.f90
+++ gcc/testsuite/gfortran.dg/goacc/combined_loop.f90
@@ -6,7 +6,7 @@ subroutine oacc1()
implicit none
integer :: i
integer :: a
- !$acc parallel loop reduction(+:a) ! { dg-excess-errors "sorry, unimplemented: directive not yet implemented" }
+ !$acc parallel loop reduction(+:a)
do i = 1,5
enddo
end subroutine oacc1
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray.f95
index 8f2c077..28294ee 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray.f95
@@ -53,4 +53,3 @@ contains
!$acc update self (ptr)
end subroutine oacc1
end module test
-! { dg-prune-output "unimplemented" }
diff --git gcc/testsuite/gfortran.dg/goacc/declare-1.f95 gcc/testsuite/gfortran.dg/goacc/declare-1.f95
index 03540f1..14190a7 100644
--- gcc/testsuite/gfortran.dg/goacc/declare-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/declare-1.f95
@@ -15,6 +15,5 @@ contains
END BLOCK
end function foo
end program test
-! { dg-prune-output "unimplemented" }
-! { dg-final { scan-tree-dump-times "pragma acc declare map\\(force_tofrom:i\\)" 2 "original" } }
+! { dg-final { scan-tree-dump-times "pragma acc data map\\(force_tofrom:i\\)" 2 "original" } }
! { dg-final { cleanup-tree-dump "original" } }
diff --git gcc/testsuite/gfortran.dg/goacc/declare-2.f95 gcc/testsuite/gfortran.dg/goacc/declare-2.f95
new file mode 100644
index 0000000..afdbe2e
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/declare-2.f95
@@ -0,0 +1,44 @@
+
+module amod
+
+contains
+
+subroutine asubr (b)
+ implicit none
+ integer :: b(8)
+
+ !$acc declare copy (b) ! { dg-error "Invalid clause in module" }
+ !$acc declare copyout (b) ! { dg-error "Invalid clause in module" }
+ !$acc declare present (b) ! { dg-error "Invalid clause in module" }
+ !$acc declare present_or_copy (b) ! { dg-error "Invalid clause in module" }
+ !$acc declare present_or_copyin (b) ! { dg-error "Invalid clause in module" }
+ !$acc declare present_or_copyout (b) ! { dg-error "Invalid clause in module" }
+ !$acc declare present_or_create (b) ! { dg-error "Invalid clause in module" }
+ !$acc declare deviceptr (b) ! { dg-error "Invalid clause in module" }
+ !$acc declare create (b) copyin (b) ! { dg-error "present on multiple clauses" }
+
+end subroutine
+
+end module
+
+subroutine bsubr (foo)
+ implicit none
+
+ integer, dimension (:) :: foo
+
+ !$acc declare copy (foo) ! { dg-error "assumed-size dummy array" }
+ !$acc declare copy (foo(1:2)) ! { dg-error "assumed-size dummy array" }
+
+end subroutine
+
+program test
+ integer :: a(8)
+ integer :: b(8)
+ integer :: c(8)
+
+ !$acc declare create (a) copyin (a) ! { dg-error "present on multiple clauses" }
+ !$acc declare copyin (b)
+ !$acc declare copyin (b) ! { dg-error "present on multiple clauses" }
+ !$acc declare copy (c(1:2)) ! { dg-error "Subarray: 'c' not allowed" }
+
+end program
diff --git gcc/testsuite/gfortran.dg/goacc/default.f95 gcc/testsuite/gfortran.dg/goacc/default.f95
new file mode 100644
index 0000000..c1fc52e
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/default.f95
@@ -0,0 +1,17 @@
+! { dg-do compile }
+
+program tile
+ integer i, j, a
+
+ !$acc parallel default (shared) ! { dg-error "Unclassifiable OpenACC directive" }
+ !$acc end parallel ! { dg-error "Unexpected" }
+
+ !$acc parallel default (private) ! { dg-error "Unclassifiable OpenACC directive" }
+ !$acc end parallel ! { dg-error "Unexpected" }
+
+ !$acc parallel default (none)
+ !$acc end parallel
+
+ !$acc parallel default (firstprivate) ! { dg-error "Unclassifiable OpenACC directive" }
+ !$acc end parallel ! { dg-error "Unexpected" }
+end program tile
diff --git gcc/testsuite/gfortran.dg/goacc/dtype-1.f95 gcc/testsuite/gfortran.dg/goacc/dtype-1.f95
new file mode 100644
index 0000000..350e443
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/dtype-1.f95
@@ -0,0 +1,161 @@
+! { dg-do compile }
+! { dg-options "-fopenacc -fdump-tree-omplower" }
+
+program dtype
+ integer i1
+
+!! ACC PARALLEL DEVICE_TYPE:
+
+!$acc parallel dtype (nVidia) async (1) num_gangs (100) &
+!$acc& num_workers (100) vector_length (32) wait (1)
+!$acc end parallel
+
+!$acc parallel async (1) num_gangs (1) num_workers (1) vector_length (1) &
+!$acc& wait (1) device_type (nvidia) async (2) num_gangs (200) &
+!$acc& num_workers (200) vector_length (64) wait (2)
+!$acc end parallel
+
+!$acc parallel async (1) num_gangs (1) num_workers (1) vector_length (1) &
+!$acc& wait (1) device_type (nvidia) async (3) num_gangs (300) &
+!$acc& num_workers (300) vector_length (128) wait (3) dtype (*) &
+!$acc& async (10) num_gangs (10) num_workers (10) vector_length (10) wait (10)
+!$acc end parallel
+
+!$acc parallel async (1) num_gangs (1) num_workers (1) vector_length (1) &
+!$acc& wait (1) dtype (nvidia_ptx) async (3) num_gangs (300) &
+!$acc& num_workers (300) vector_length (128) wait (3) device_type (*) &
+!$acc& async (10) num_gangs (10) num_workers (10) vector_length (10) wait (10)
+!$acc end parallel
+
+!! ACC KERNELS DEVICE_TYPE:
+
+!$acc kernels device_type (nvidia) async wait
+!$acc end kernels
+
+!$acc kernels async wait dtype (nvidia) async (1) wait (1)
+!$acc end kernels
+
+!$acc kernels async wait dtype (nvidia) async (2) wait (2) &
+!$acc& device_type (*) async (0) wait (0)
+!$acc end kernels
+
+!$acc kernels async wait device_type (nvidia_ptx) async (1) wait (1) &
+!$acc& dtype (*) async (0) wait (0)
+!$acc end kernels
+
+!! ACC LOOP DEVICE_TYPE:
+
+!$acc parallel
+!$acc loop device_type (nVidia) gang
+ do i1 = 1, 10
+ end do
+!$acc end parallel
+
+!$acc parallel
+!$acc loop dtype (nVidia) gang dtype (*) worker
+ do i1 = 1, 10
+ end do
+!$acc end parallel
+
+!$acc parallel
+!$acc loop dtype (nVidiaGPU) gang dtype (*) vector
+ do i1 = 1, 10
+ end do
+!$acc end parallel
+
+!! ACC UPDATE:
+
+!$acc update host(i1) async(1) wait (1)
+
+!$acc update host(i1) device_type(nvidia) async(2) wait (2)
+
+!$acc update host(i1) async(1) wait (1) dtype(nvidia) async(3) wait (3)
+
+!$acc update host(i1) async(4) wait (4) device_type(nvidia) async(5) wait (5) &
+!$acc& dtype (*) async (6) wait (6)
+
+!$acc update host(i1) async(4) wait (4) dtype(nvidia1) async(5) &
+!$acc& wait (5) device_type (*) async (6) wait (6)
+end program dtype
+
+!! ACC ROUTINE:
+
+subroutine sr1 ()
+ !$acc routine device_type (nvidia) gang
+end subroutine sr1
+
+subroutine sr2 ()
+ !$acc routine dtype (nvidia) worker
+end subroutine sr2
+
+subroutine sr3 ()
+ !$acc routine device_type (nvidia) vector
+end subroutine sr3
+
+subroutine sr5 ()
+ !$acc routine dtype (nvidia) bind (foo)
+end subroutine sr5
+
+subroutine sr1a ()
+ !$acc routine device_type (nvidia) gang device_type (*) worker
+end subroutine sr1a
+
+subroutine sr2a ()
+ !$acc routine dtype (nvidia) worker dtype (*) vector
+end subroutine sr2a
+
+subroutine sr3a ()
+ !$acc routine dtype (nvidia) vector device_type (*) gang
+end subroutine sr3a
+
+subroutine sr4a ()
+ !$acc routine device_type (nvidia) vector device_type (*) worker
+end subroutine sr4a
+
+subroutine sr5a ()
+ !$acc routine device_type (nvidia) bind (foo) dtype (*) gang
+end subroutine sr5a
+
+subroutine sr1b ()
+ !$acc routine dtype (gpu) gang dtype (*) worker
+end subroutine sr1b
+
+subroutine sr2b ()
+ !$acc routine dtype (gpu) worker device_type (*) worker
+end subroutine sr2b
+
+subroutine sr3b ()
+ !$acc routine device_type (gpu) vector device_type (*) worker
+end subroutine sr3b
+
+subroutine sr4b ()
+ !$acc routine device_type (gpu) worker device_type (*) worker
+end subroutine sr4b
+
+subroutine sr5b ()
+ !$acc routine dtype (gpu) bind (foo) device_type (*) gang
+end subroutine sr5b
+
+! { dg-final { scan-tree-dump-times "oacc_parallel async\\(1\\) wait\\(1\\) num_gangs\\(100\\) num_workers\\(100\\) vector_length\\(32\\)" 1 "omplower" } }
+
+! { dg-final { scan-tree-dump-times "oacc_parallel async\\(2\\) wait\\(2\\) num_gangs\\(200\\) num_workers\\(200\\) vector_length\\(64\\)" 1 "omplower" } }
+
+! { dg-final { scan-tree-dump-times "oacc_parallel async\\(3\\) wait\\(3\\) num_gangs\\(300\\) num_workers\\(300\\) vector_length\\(128\\)" 1 "omplower" } }
+
+! { dg-final { scan-tree-dump-times "oacc_parallel async\\(10\\) wait\\(10\\) num_gangs\\(10\\) num_workers\\(10\\) vector_length\\(10\\)" 1 "omplower" } }
+
+! { dg-final { scan-tree-dump-times "oacc_kernels async\\(-1\\)" 1 "omplower" } }
+
+! { dg-final { scan-tree-dump-times "oacc_kernels async\\(1\\) wait\\(1\\)" 1 "omplower" } }
+
+! { dg-final { scan-tree-dump-times "oacc_kernels async\\(2\\) wait\\(2\\)" 1 "omplower" } }
+
+! { dg-final { scan-tree-dump-times "oacc_kernels async\\(0\\) wait\\(0\\)" 1 "omplower" } }
+
+! { dg-final { scan-tree-dump-times "acc loop private\\(i1\\) gang private\\(i1\\.1\\)" 1 "omplower" } }
+
+! { dg-final { scan-tree-dump-times "acc loop private\\(i1\\) gang private\\(i1\\.2\\)" 1 "omplower" } }
+
+! { dg-final { scan-tree-dump-times "acc loop private\\(i1\\) vector private\\(i1\\.3\\)" 1 "omplower" } }
+
+! { dg-final { cleanup-tree-dump "omplower" } }
diff --git gcc/testsuite/gfortran.dg/goacc/dtype-2.f95 gcc/testsuite/gfortran.dg/goacc/dtype-2.f95
new file mode 100644
index 0000000..a4573e9
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/dtype-2.f95
@@ -0,0 +1,39 @@
+! { dg-do compile }
+
+program dtype
+ integer i1, i2, i3, i4, i5, i6
+
+!! ACC PARALLEL DEVICE_TYPE:
+
+!$acc parallel device_type (nVidia) async (1) num_gangs (100) &
+!$acc& num_workers (100) vector_length (32) wait (1) copy (i1)
+!$acc end parallel
+
+!! ACC KERNELS DEVICE_TYPE:
+
+!$acc kernels dtype (nvidia) async wait copy (i1)
+!$acc end kernels
+
+!! ACC LOOP DEVICE_TYPE:
+
+!$acc parallel
+!$acc loop dtype (nVidia) gang tile (1) private (i1)
+ do i1 = 1, 10
+ end do
+!$acc end parallel
+
+!! ACC UPDATE:
+
+!$acc update host(i1) device_type(nvidia) async(2) wait (2) self(i2)
+
+end program dtype
+
+! { dg-error "Invalid character" "" { target *-*-* } 8 }
+! { dg-error "Unexpected" "" { target *-*-* } 10 }
+
+! { dg-error "Invalid character" "" { target *-*-* } 14 }
+! { dg-error "Unexpected" "" { target *-*-* } 15 }
+
+! { dg-error "Invalid character" "" { target *-*-* } 20 }
+
+! { dg-error "Invalid character" "" { target *-*-* } 27 }
diff --git gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95
index 19e7411..8a25829 100644
--- gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95
+++ gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95
@@ -8,6 +8,6 @@ program test
!$acc host_data use_device(i)
!$acc end host_data
end program test
-! { dg-prune-output "unimplemented" }
+! { dg-bogus "sorry, unimplemented: directive not yet implemented" "host_data" { xfail *-*-* } 8 }
! { dg-final { scan-tree-dump-times "pragma acc host_data use_device\\(i\\)" 1 "original" } }
! { dg-final { cleanup-tree-dump "original" } }
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1.f95
index e1b2dfd..817039f 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95
@@ -168,4 +168,3 @@ subroutine test1
end subroutine test1
end module test
! { dg-prune-output "Deleted" }
-! { dg-prune-output "ACC cache unimplemented" }
diff --git gcc/testsuite/gfortran.dg/goacc/loop-2.f95 gcc/testsuite/gfortran.dg/goacc/loop-2.f95
index f85691e..b5e6368 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-2.f95
@@ -66,7 +66,7 @@ program test
!$acc loop seq worker ! { dg-error "conflicts with" }
DO i = 1,10
ENDDO
- !$acc loop gang worker ! { dg-error "conflicts with" }
+ !$acc loop gang worker
DO i = 1,10
ENDDO
@@ -94,10 +94,10 @@ program test
!$acc loop seq vector ! { dg-error "conflicts with" }
DO i = 1,10
ENDDO
- !$acc loop gang vector ! { dg-error "conflicts with" }
+ !$acc loop gang vector
DO i = 1,10
ENDDO
- !$acc loop worker vector ! { dg-error "conflicts with" }
+ !$acc loop worker vector
DO i = 1,10
ENDDO
@@ -239,7 +239,7 @@ program test
!$acc loop seq worker ! { dg-error "conflicts with" }
DO i = 1,10
ENDDO
- !$acc loop gang worker ! { dg-error "conflicts with" }
+ !$acc loop gang worker
DO i = 1,10
ENDDO
@@ -267,10 +267,10 @@ program test
!$acc loop seq vector ! { dg-error "conflicts with" }
DO i = 1,10
ENDDO
- !$acc loop gang vector ! { dg-error "conflicts with" }
+ !$acc loop gang vector
DO i = 1,10
ENDDO
- !$acc loop worker vector ! { dg-error "conflicts with" }
+ !$acc loop worker vector
DO i = 1,10
ENDDO
@@ -392,7 +392,7 @@ program test
!$acc kernels loop seq worker ! { dg-error "conflicts with" }
DO i = 1,10
ENDDO
- !$acc kernels loop gang worker ! { dg-error "conflicts with" }
+ !$acc kernels loop gang worker
DO i = 1,10
ENDDO
@@ -420,10 +420,10 @@ program test
!$acc kernels loop seq vector ! { dg-error "conflicts with" }
DO i = 1,10
ENDDO
- !$acc kernels loop gang vector ! { dg-error "conflicts with" }
+ !$acc kernels loop gang vector
DO i = 1,10
ENDDO
- !$acc kernels loop worker vector ! { dg-error "conflicts with" }
+ !$acc kernels loop worker vector
DO i = 1,10
ENDDO
@@ -544,7 +544,7 @@ program test
!$acc parallel loop seq worker ! { dg-error "conflicts with" }
DO i = 1,10
ENDDO
- !$acc parallel loop gang worker ! { dg-error "conflicts with" }
+ !$acc parallel loop gang worker
DO i = 1,10
ENDDO
@@ -572,10 +572,10 @@ program test
!$acc parallel loop seq vector ! { dg-error "conflicts with" }
DO i = 1,10
ENDDO
- !$acc parallel loop gang vector ! { dg-error "conflicts with" }
+ !$acc parallel loop gang vector
DO i = 1,10
ENDDO
- !$acc parallel loop worker vector ! { dg-error "conflicts with" }
+ !$acc parallel loop worker vector
DO i = 1,10
ENDDO
@@ -646,4 +646,4 @@ program test
!$acc parallel loop gang worker tile(*)
DO i = 1,10
ENDDO
-end
\ No newline at end of file
+end
diff --git gcc/testsuite/gfortran.dg/goacc/modules.f95 gcc/testsuite/gfortran.dg/goacc/modules.f95
new file mode 100644
index 0000000..19a2abe
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/modules.f95
@@ -0,0 +1,55 @@
+! { dg-do compile }
+
+MODULE reduction_test
+
+CONTAINS
+
+SUBROUTINE reduction_kernel(x_min,x_max,y_min,y_max,arr,sum)
+
+ IMPLICIT NONE
+
+ INTEGER :: x_min,x_max,y_min,y_max
+ REAL(KIND=8), DIMENSION(x_min-2:x_max+2,y_min-2:y_max+2) :: arr
+ REAL(KIND=8) :: sum
+
+ INTEGER :: j,k
+
+ sum=0.0
+
+!$ACC DATA PRESENT(arr) COPY(sum)
+!$ACC PARALLEL LOOP REDUCTION(+ : sum)
+ DO k=y_min,y_max
+ DO j=x_min,x_max
+ sum=sum*arr(j,k)
+ ENDDO
+ ENDDO
+!$ACC END PARALLEL LOOP
+!$ACC END DATA
+
+END SUBROUTINE reduction_kernel
+
+END MODULE reduction_test
+
+program main
+ use reduction_test
+
+ integer :: x_min,x_max,y_min,y_max
+ real(kind=8), dimension(1:10,1:10) :: arr
+ real(kind=8) :: sum
+
+ x_min = 5
+ x_max = 6
+ y_min = 5
+ y_max = 6
+
+ arr(:,:) = 1.0
+
+ sum = 1.0
+
+ !$acc data copy(arr)
+
+ call field_summary_kernel(x_min,x_max,y_min,y_max,arr,sum)
+
+ !$acc end data
+
+end program
diff --git gcc/testsuite/gfortran.dg/goacc/parameter.f95 gcc/testsuite/gfortran.dg/goacc/parameter.f95
index 1364181..82c25ba 100644
--- gcc/testsuite/gfortran.dg/goacc/parameter.f95
+++ gcc/testsuite/gfortran.dg/goacc/parameter.f95
@@ -29,4 +29,3 @@ contains
!$acc update self (a) ! { dg-error "not a variable" }
end subroutine oacc1
end module test
-! { dg-prune-output "unimplemented" }
diff --git gcc/testsuite/gfortran.dg/goacc/update.f95 gcc/testsuite/gfortran.dg/goacc/update.f95
new file mode 100644
index 0000000..ae23dfc
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/update.f95
@@ -0,0 +1,5 @@
+! { dg-do compile }
+
+program foo
+ !$acc update ! { dg-error "must contain at least one 'device' or 'host/self' clause" }
+end program foo
diff --git libgomp/testsuite/libgomp.oacc-c++/template-reduction.C libgomp/testsuite/libgomp.oacc-c++/template-reduction.C
new file mode 100644
index 0000000..c158b7a
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c++/template-reduction.C
@@ -0,0 +1,102 @@
+/* { dg-do run } */
+
+#include <cstdlib>
+
+const int n = 100;
+
+// Check explicit template copy map
+
+template<typename T> T
+sum (T array[])
+{
+ T s = 0;
+
+#pragma acc parallel loop vector_length (10) reduction (+:s) copy (s, array[0:n])
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+ return s;
+}
+
+// Check implicit template copy map
+
+template<typename T> T
+sum ()
+{
+ T s = 0;
+ T array[n];
+
+ for (int i = 0; i < n; i++)
+ array[i] = i+1;
+
+#pragma acc parallel loop vector_length (10) reduction (+:s) copy (s)
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+ return s;
+}
+
+// Check present and async
+
+template<typename T> T
+async_sum (T array[])
+{
+ T s = 0;
+
+#pragma acc parallel loop vector_length (10) async (1) present (array[0:n])
+ for (int i = 0; i < n; i++)
+ array[i] = i+1;
+
+#pragma acc parallel loop vector_length (10) reduction (+:s) present (array[0:n]) copy (s) async wait (1)
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+#pragma acc wait
+
+ return s;
+}
+
+// Check present and async
+
+template<typename T> T
+async_sum (int c)
+{
+ T s = 0;
+
+#pragma acc parallel loop vector_length (10) reduction (+:s) copy(s) async wait (1)
+ for (int i = 0; i < n; i++)
+ s += i+1;
+
+#pragma acc wait
+
+ return s;
+}
+
+int
+main()
+{
+ int a[n];
+ int result = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = i+1;
+ result += i+1;
+ }
+
+ if (sum (a) != result)
+ abort ();
+
+ if (sum<int> () != result)
+ abort ();
+
+#pragma acc enter data copyin (a)
+ if (async_sum (a) != result)
+ abort ();
+
+ if (async_sum<int> (1) != result)
+ abort ();
+#pragma acc exit data delete (a)
+
+ return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-1.c
new file mode 100644
index 0000000..ad958cd
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-1.c
@@ -0,0 +1,866 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+ int iexp, igot;
+ long long lexp, lgot;
+ int N = 32;
+ int idata[N];
+ long long ldata[N];
+ float fexp, fgot;
+ float fdata[N];
+ int i;
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic capture
+ idata[i] = igot++;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic capture
+ idata[i] = igot--;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic capture
+ idata[i] = ++igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic capture
+ idata[i] = --igot;
+ }
+ }
+
+ /* BINOP = + */
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ idata[i] = igot += expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ idata[i] = igot = igot + expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ idata[i] = igot = expr + igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ /* BINOP = * */
+ lgot = 1LL;
+ lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ ldata[i] = lgot *= expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ ldata[i] = lgot = lgot * expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ ldata[i] = lgot = expr * lgot;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ /* BINOP = - */
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ idata[i] = igot -= expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ idata[i] = igot = igot - expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 32;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ idata[i] = igot = expr - igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+
+ /* BINOP = / */
+ lgot = 1LL << 32;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ ldata[i] = lgot /= expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL << 32;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ ldata[i] = lgot = lgot / expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 2LL;
+ lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL << N;
+
+#pragma acc atomic capture
+ ldata[i] = lgot = expr / lgot;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ /* BINOP = & */
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1 << i;
+
+#pragma acc atomic capture
+ idata[i] = igot &= expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1 << i;
+
+#pragma acc atomic capture
+ idata[i] = igot = igot & expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1 << i;
+
+#pragma acc atomic capture
+ idata[i] = igot = expr & igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ /* BINOP = ^ */
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1 << i;
+
+#pragma acc atomic capture
+ idata[i] = igot ^= expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1 << i;
+
+#pragma acc atomic capture
+ idata[i] = igot = igot ^ expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1 << i;
+
+#pragma acc atomic capture
+ idata[i] = igot = expr ^ igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ /* BINOP = | */
+ igot = 0;
+ iexp = ~0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1 << i;
+
+#pragma acc atomic capture
+ idata[i] = igot |= expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = ~0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1 << i;
+
+#pragma acc atomic capture
+ idata[i] = igot = igot | expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = ~0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1 << i;
+
+#pragma acc atomic capture
+ idata[i] = igot = expr | igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ /* BINOP = << */
+ lgot = 1LL;
+ lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ ldata[i] = lgot <<= expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ idata[i] = lgot = lgot << expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ ldata[0] = lgot = expr << lgot;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ /* BINOP = >> */
+ lgot = 1LL << N;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ ldata[i] = lgot >>= expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL << N;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ ldata[i] = lgot = lgot >> expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL << 63;
+ lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel
+ {
+ long long expr = 1LL << 32;
+
+#pragma acc atomic capture
+ ldata[0] = lgot = expr >> lgot;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic capture
+ fdata[i] = fgot++;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 32.0;
+ fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic capture
+ fdata[i] = fgot--;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic capture
+ fdata[i] = ++fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 32.0;
+ fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic capture
+ fdata[i] = --fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = + */
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ fdata[i] = fgot += expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ fdata[i] = fgot = fgot + expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ fdata[i] = fgot = expr + fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = * */
+ fgot = 1.0;
+ fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ fdata[i] = fgot *= expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1.0;
+ fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ fdata[i] = fgot = fgot * expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1.0;
+ fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ fdata[i] = fgot = expr * fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = - */
+ fgot = 32.0;
+ fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ fdata[i] = fgot -= expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 32.0;
+ fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ fdata[i] = fgot = fgot - expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1.0;
+ fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 32.0;
+
+#pragma acc atomic capture
+ fdata[i] = fgot = expr - fgot;
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ if (i % 2 == 0)
+ {
+ if (fdata[i] != 31.0)
+ abort ();
+ }
+ else
+ {
+ if (fdata[i] != 1.0)
+ abort ();
+ }
+
+
+ /* BINOP = / */
+ fexp = 1.0;
+ fgot = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ fdata[i] = fgot /= expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fexp = 1.0;
+ fgot = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ fdata[i] = fgot = fgot / expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fexp = 1.0;
+ fgot = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel
+ {
+ float expr = 8192.0*8192.0*64.0;
+
+#pragma acc atomic capture
+ fdata[0] = fgot = expr / fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-2.c
new file mode 100644
index 0000000..842f2de
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-2.c
@@ -0,0 +1,1626 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+ int iexp, igot, imax, imin;
+ long long lexp, lgot;
+ int N = 32;
+ int i;
+ int idata[N];
+ long long ldata[N];
+ float fexp, fgot;
+ float fdata[N];
+
+ igot = 1234;
+ iexp = 31;
+
+ for (i = 0; i < N; i++)
+ idata[i] = i;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+#pragma acc atomic capture
+ { idata[i] = igot; igot = i; }
+ }
+
+ imax = 0;
+ imin = N;
+
+ for (i = 0; i < N; i++)
+ {
+ imax = idata[i] > imax ? idata[i] : imax;
+ imin = idata[i] < imin ? idata[i] : imin;
+ }
+
+ if (imax != 1234 || imin != 0)
+ abort ();
+
+ return 0;
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+#pragma acc atomic capture
+ { idata[i] = igot; igot++; }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+#pragma acc atomic capture
+ { idata[i] = igot; ++igot; }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+#pragma acc atomic capture
+ { ++igot; idata[i] = igot; }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+#pragma acc atomic capture
+ { igot++; idata[i] = igot; }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+#pragma acc atomic capture
+ { idata[i] = igot; igot--; }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+#pragma acc atomic capture
+ { idata[i] = igot; --igot; }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+#pragma acc atomic capture
+ { --igot; idata[i] = igot; }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+#pragma acc atomic capture
+ { igot--; idata[i] = igot; }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ /* BINOP = + */
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { idata[i] = igot; igot += expr; }
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { igot += expr; idata[i] = igot; }
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { idata[i] = igot; igot = igot + expr; }
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { idata[i] = igot; igot = expr + igot; }
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { igot = igot + expr; idata[i] = igot; }
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+
+ igot = 0;
+ iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { igot = expr + igot; idata[i] = igot; }
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ /* BINOP = * */
+ lgot = 1LL;
+ lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot *= expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ { lgot *= expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = lgot * expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = expr * lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ { lgot = lgot * expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2;
+
+#pragma acc atomic capture
+ { lgot = expr * lgot; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ /* BINOP = - */
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { idata[i] = igot; igot -= expr; }
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { igot -= expr; idata[i] = igot; }
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 32;
+ iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { idata[i] = igot; igot = igot - expr; }
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 1;
+ iexp = 1;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { idata[i] = igot; igot = expr - igot; }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ if (i % 2 == 0)
+ {
+ if (idata[i] != 1)
+ abort ();
+ }
+ else
+ {
+ if (idata[i] != 0)
+ abort ();
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 1;
+ iexp = -31;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { igot = igot - expr; idata[i] = igot; }
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 1;
+ iexp = 1;
+
+#pragma acc data copy (igot, idata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = 1;
+
+#pragma acc atomic capture
+ { igot = expr - igot; idata[i] = igot; }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ if (i % 2 == 0)
+ {
+ if (idata[i] != 0)
+ abort ();
+ }
+ else
+ {
+ if (idata[i] != 1)
+ abort ();
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ /* BINOP = / */
+ lgot = 1LL << 32;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot /= expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL << 32;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ { lgot /= expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL << 32;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = lgot / expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 2LL;
+ lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL << N;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = expr / lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 2LL;
+ lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL << N;
+
+#pragma acc atomic capture
+ { lgot = lgot / expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 2LL;
+ lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL << N;
+
+#pragma acc atomic capture
+ { lgot = expr / lgot; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ /* BINOP = & */
+ lgot = ~0LL;
+ lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot &= expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = ~0LL;
+ iexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { lgot &= expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = ~0LL;
+ lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = lgot & expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = ~0LL;
+ lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = expr & lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = ~0LL;
+ iexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { lgot = lgot & expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = ~0LL;
+ lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { lgot = expr & lgot; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ /* BINOP = ^ */
+ lgot = ~0LL;
+ lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1 << i;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot ^= expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = ~0LL;
+ iexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { lgot ^= expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = ~0LL;
+ lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = lgot ^ expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = ~0LL;
+ lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = expr ^ lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = ~0LL;
+ iexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { lgot = lgot ^ expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = ~0LL;
+ lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { lgot = expr ^ lgot; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ /* BINOP = | */
+ lgot = 0LL;
+ lexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1 << i;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot |= expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 0LL;
+ iexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { lgot |= expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 0LL;
+ lexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = lgot | expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 0LL;
+ lexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = expr | lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 0LL;
+ iexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { lgot = lgot | expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 0LL;
+ lexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+ { lgot = expr | lgot; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ /* BINOP = << */
+ lgot = 1LL;
+ lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot <<= expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ iexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ { lgot <<= expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = lgot << expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < 1; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = expr << lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < 1; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ { lgot = lgot << expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < 1; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ { lgot = expr << lgot; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ /* BINOP = >> */
+ lgot = 1LL << N;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot >>= expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL << N;
+ iexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ { lgot >>= expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL << N;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = lgot >> expr; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << (N - 1);
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < 1; i++)
+ {
+ long long expr = 1LL << N;
+
+#pragma acc atomic capture
+ { ldata[i] = lgot; lgot = expr >> lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL << N;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic capture
+ { lgot = lgot >> expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << (N - 1);
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < 1; i++)
+ {
+ long long expr = 1LL << N;
+
+#pragma acc atomic capture
+ { lgot = expr >> lgot; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ // FLOAT FLOAT FLOAT
+
+ /* BINOP = + */
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot += expr; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fgot += expr; fdata[i] = fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { idata[i] = fgot; fgot = fgot + expr; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot = expr + fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fgot = fgot + expr; fdata[i] = fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 0.0;
+ fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fgot = expr + fgot; fdata[i] = fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = * */
+ fgot = 1.0;
+ fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot *= expr; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1.0;
+ fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ { fgot *= expr; fdata[i] = fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1.0;
+ fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot = fgot * expr; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1.0;
+ fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot = expr * fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2LL;
+
+#pragma acc atomic capture
+ { lgot = lgot * expr; ldata[i] = lgot; }
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ fgot = 1.0;
+ fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 2;
+
+#pragma acc atomic capture
+ { fgot = expr * fgot; fdata[i] = fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = - */
+ fgot = 32.0;
+ fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot -= expr; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 32.0;
+ fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fgot -= expr; fdata[i] = fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 32.0;
+ fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot = fgot - expr; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1.0;
+ fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot = expr - fgot; }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ if (i % 2 == 0)
+ {
+ if (fdata[i] != 1.0)
+ abort ();
+ }
+ else
+ {
+ if (fdata[i] != 0.0)
+ abort ();
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1.0;
+ fexp = -31.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fgot = fgot - expr; fdata[i] = fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1.0;
+ fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fgot = expr - fgot; fdata[i] = fgot; }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ if (i % 2 == 0)
+ {
+ if (fdata[i] != 0.0)
+ abort ();
+ }
+ else
+ {
+ if (fdata[i] != 1.0)
+ abort ();
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = / */
+ fgot = 8192.0*8192.0*64.0;
+ fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot /= expr; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 8192.0*8192.0*64.0;
+ fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ { fgot /= expr; fdata[i] = fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 8192.0*8192.0*64.0;
+ fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot = fgot / expr; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 8192.0*8192.0*64.0;
+ fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+
+#pragma acc atomic capture
+ { fdata[i] = fgot; fgot = expr / fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 4.0;
+ fexp = 4.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL << N;
+
+#pragma acc atomic capture
+ { fgot = fgot / expr; fdata[i] = fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 4.0;
+ fexp = 4.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic capture
+ { fgot = expr / fgot; fdata[i] = fgot; }
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_update-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_update-1.c
new file mode 100644
index 0000000..18ee3aa
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_update-1.c
@@ -0,0 +1,760 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+ float fexp, fgot;
+ int iexp, igot;
+ long long lexp, lgot;
+ int N = 32;
+ int i;
+
+ fgot = 1234.0;
+ fexp = 1235.0;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < 1; i++)
+#pragma acc atomic update
+ fgot++;
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = fgot - N;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic update
+ fgot--;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = fgot + N;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic update
+ ++fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = fgot - N;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+#pragma acc atomic update
+ --fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = + */
+
+ fgot = 1234.0;
+ fexp = fgot + N;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+#pragma acc atomic update
+ fgot += expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = fgot + N;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+#pragma acc atomic update
+ fgot = fgot + expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = fgot + N;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+#pragma acc atomic update
+ fgot = expr + fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 0.5;
+#pragma acc atomic update
+ fgot = (expr + expr) + fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = * */
+
+ fgot = 1234.0;
+ fexp = 1234.0;
+
+ for (i = 0; i < N; i++)
+ fexp *= 2.0;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+#pragma acc atomic update
+ fgot *= expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = 1234.0;
+
+ for (i = 0; i < N; i++)
+ fexp = fexp * 2.0;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+#pragma acc atomic update
+ fgot = fgot * expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = 1234.0;
+
+ for (i = 0; i < N; i++)
+ fexp = 2.0 * fexp;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+#pragma acc atomic update
+ fgot = expr * fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+#pragma acc atomic update
+ fgot = (expr + expr) * fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = - */
+
+ fgot = 1234.0;
+ fexp = 1234.0;
+
+ for (i = 0; i < N; i++)
+ fexp -= 2.0;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+#pragma acc atomic update
+ fgot -= expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = 1234.0;
+
+ for (i = 0; i < N; i++)
+ fexp = fexp - 2.0;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+#pragma acc atomic update
+ fgot = fgot - expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = 1234.0;
+
+ for (i = 0; i < N; i++)
+ fexp = 2.0 - fexp;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic update
+ fgot = expr - fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+#pragma acc atomic update
+ fgot = (expr + expr) - fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = / */
+
+ fgot = 1234.0;
+ fexp = 1234.0;
+
+ for (i = 0; i < N; i++)
+ fexp /= 2.0;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+#pragma acc atomic update
+ fgot /= expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = 1234.0;
+
+ for (i = 0; i < N; i++)
+ fexp = fexp / 2.0;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic update
+ fgot = fgot / expr;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = 1234.0;
+
+ for (i = 0; i < N; i++)
+ fexp = 2.0 / fexp;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 2.0;
+
+#pragma acc atomic update
+ fgot = expr / fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ fgot = 1234.0;
+ fexp = 1234.0;
+
+ for (i = 0; i < N; i++)
+ fexp = 2.0 / fexp;
+
+#pragma acc data copy (fgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ float expr = 1.0;
+#pragma acc atomic update
+ fgot = (expr + expr) / fgot;
+ }
+ }
+
+ if (fexp != fgot)
+ abort ();
+
+ /* BINOP = & */
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = ~(1 << i);
+
+#pragma acc atomic update
+ igot &= expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = ~(1 << i);
+#pragma acc atomic update
+ igot = igot / expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = ~(1 << i);
+#pragma acc atomic update
+ igot = expr & igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = ~(1 << i);
+ int zero = 0;
+
+#pragma acc atomic update
+ igot = (expr + zero) & igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ /* BINOP = ^ */
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = (1 << i);
+
+#pragma acc atomic update
+ igot ^= expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = (1 << i);
+
+#pragma acc atomic update
+ igot = igot ^ expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = (1 << i);
+
+#pragma acc atomic update
+ igot = expr ^ igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = ~0;
+ iexp = 0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = (1 << i);
+ int zero = 0;
+
+#pragma acc atomic update
+ igot = (expr + zero) ^ igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ /* BINOP = | */
+
+ igot = 0;
+ iexp = ~0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = (1 << i);
+
+#pragma acc atomic update
+ igot |= expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = ~0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = (1 << i);
+
+#pragma acc atomic update
+ igot = igot | expr;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = ~0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = (1 << i);
+
+#pragma acc atomic update
+ igot = expr | igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ igot = 0;
+ iexp = ~0;
+
+#pragma acc data copy (igot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ int expr = (1 << i);
+ int zero = 0;
+
+#pragma acc atomic update
+ igot = (expr + zero) | igot;
+ }
+ }
+
+ if (iexp != igot)
+ abort ();
+
+ /* BINOP = << */
+
+ lgot = 1LL;
+ lexp = 1LL << N;
+
+#pragma acc data copy (lgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic update
+ lgot <<= expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << N;
+
+#pragma acc data copy (lgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic update
+ lgot = lgot << expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 2LL;
+
+#pragma acc data copy (lgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < 1; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic update
+ lgot = expr << lgot;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 2LL;
+
+#pragma acc data copy (lgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < 1; i++)
+ {
+ long long expr = 1LL;
+ long long zero = 0LL;
+
+#pragma acc atomic update
+ lgot = (expr + zero) << lgot;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ /* BINOP = >> */
+
+ lgot = 1LL << N;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic update
+ lgot >>= expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL << N;
+ lexp = 1LL;
+
+#pragma acc data copy (lgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < N; i++)
+ {
+ long long expr = 1LL;
+
+#pragma acc atomic update
+ lgot = lgot >> expr;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << (N - 1);
+
+#pragma acc data copy (lgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < 1; i++)
+ {
+ long long expr = 1LL << N;
+
+#pragma acc atomic update
+ lgot = expr >> lgot;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ lgot = 1LL;
+ lexp = 1LL << (N - 1);
+
+#pragma acc data copy (lgot)
+ {
+#pragma acc parallel loop
+ for (i = 0; i < 1; i++)
+ {
+ long long expr = 1LL << N;
+ long long zero = 0LL;
+
+#pragma acc atomic update
+ lgot = (expr + zero) >> lgot;
+ }
+ }
+
+ if (lexp != lgot)
+ abort ();
+
+ return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c
index 51c0cf5..410c46c 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c
@@ -586,6 +586,32 @@ main (int argc, char **argv)
for (i = 0; i < N; i++)
{
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc parallel pcopy (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
a[i] = 5.0;
b[i] = 7.0;
}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
index f867a66..5fc9fb6 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
@@ -25,7 +25,33 @@ main (int argc, char **argv)
}
#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
-#pragma acc parallel async wait
+#pragma acc parallel async wait present (a[0:N]) present (b[0:N]) present (N)
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ b[i] = a[i];
+
+#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) delete (N) wait async
+#pragma acc wait
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc enter data copyin (a[0:N]) async
+#pragma acc enter data copyin (b[0:N]) async wait
+#pragma acc enter data copyin (N) async wait
+#pragma acc parallel async wait present (a[0:N]) present (b[0:N]) present (N)
#pragma acc loop
for (i = 0; i < N; i++)
b[i] = a[i];
@@ -49,7 +75,7 @@ main (int argc, char **argv)
}
#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async (1)
-#pragma acc parallel async (1)
+#pragma acc parallel async (1) present (a[0:N]) present (b[0:N]) present (N)
#pragma acc loop
for (i = 0; i < N; i++)
b[i] = a[i];
@@ -76,17 +102,17 @@ main (int argc, char **argv)
#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (N) async (1)
-#pragma acc parallel async (1) wait (1)
+#pragma acc parallel async (1) wait (1) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (N)
#pragma acc loop
for (i = 0; i < N; i++)
b[i] = (a[i] * a[i] * a[i]) / a[i];
-#pragma acc parallel async (2) wait (1)
+#pragma acc parallel async (2) wait (1) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (N)
#pragma acc loop
for (i = 0; i < N; i++)
c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
-#pragma acc parallel async (3) wait (1)
+#pragma acc parallel async (3) wait (1) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (N)
#pragma acc loop
for (i = 0; i < N; i++)
d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
@@ -120,19 +146,19 @@ main (int argc, char **argv)
#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (e[0:N]) copyin (N) async (1)
-#pragma acc parallel async (1) wait (1)
+#pragma acc parallel async (1) wait (1) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (e[0:N]) present (N)
for (int ii = 0; ii < N; ii++)
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-#pragma acc parallel async (2) wait (1)
+#pragma acc parallel async (2) wait (1) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (e[0:N]) present (N)
for (int ii = 0; ii < N; ii++)
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-#pragma acc parallel async (3) wait (1)
+#pragma acc parallel async (3) wait (1) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (e[0:N]) present (N)
for (int ii = 0; ii < N; ii++)
d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
-#pragma acc parallel wait (1) async (4)
+#pragma acc parallel wait (1) async (4) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (e[0:N]) present (N)
for (int ii = 0; ii < N; ii++)
e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c
index 747109f..6e173d3 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c
@@ -25,7 +25,7 @@ main (int argc, char **argv)
}
#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
-#pragma acc parallel async wait
+#pragma acc parallel async wait present (a[0:N]) present (b[0:N]) present (N)
#pragma acc loop
for (i = 0; i < N; i++)
b[i] = a[i];
@@ -49,7 +49,7 @@ main (int argc, char **argv)
}
#pragma acc update device (a[0:N], b[0:N]) async (1)
-#pragma acc parallel async (1)
+#pragma acc parallel async (1) present (a[0:N]) present (b[0:N]) present (N)
#pragma acc loop
for (i = 0; i < N; i++)
b[i] = a[i];
@@ -78,17 +78,17 @@ main (int argc, char **argv)
#pragma acc update device (b[0:N]) async (2)
#pragma acc enter data copyin (c[0:N], d[0:N]) async (3)
-#pragma acc parallel async (1) wait (1,2)
+#pragma acc parallel async (1) wait (1,2) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (N)
#pragma acc loop
for (i = 0; i < N; i++)
b[i] = (a[i] * a[i] * a[i]) / a[i];
-#pragma acc parallel async (2) wait (1,3)
+#pragma acc parallel async (2) wait (1,3) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (N)
#pragma acc loop
for (i = 0; i < N; i++)
c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
-#pragma acc parallel async (3) wait (1,3)
+#pragma acc parallel async (3) wait (1,3) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (N)
#pragma acc loop
for (i = 0; i < N; i++)
d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
@@ -123,19 +123,19 @@ main (int argc, char **argv)
#pragma acc update device (a[0:N], b[0:N], c[0:N], d[0:N]) async (1)
#pragma acc enter data copyin (e[0:N]) async (5)
-#pragma acc parallel async (1) wait (1)
+#pragma acc parallel async (1) wait (1) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (e[0:N]) present (N)
for (int ii = 0; ii < N; ii++)
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-#pragma acc parallel async (2) wait (1)
+#pragma acc parallel async (2) wait (1) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (e[0:N]) present (N)
for (int ii = 0; ii < N; ii++)
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-#pragma acc parallel async (3) wait (1)
+#pragma acc parallel async (3) wait (1) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (e[0:N]) present (N)
for (int ii = 0; ii < N; ii++)
d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
-#pragma acc parallel wait (1,5) async (4)
+#pragma acc parallel wait (1,5) async (4) present (a[0:N]) present (b[0:N]) present (c[0:N]) present (d[0:N]) present (e[0:N]) present (N)
for (int ii = 0; ii < N; ii++)
e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses.h libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses.h
new file mode 100644
index 0000000..8341053
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses.h
@@ -0,0 +1,202 @@
+int i;
+
+int main(void)
+{
+ int j, v;
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc EXEC_DIRECTIVE /* copyout */ present_or_copyout (v) copyin (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc EXEC_DIRECTIVE /* copyout */ present_or_copyout (v) copyout (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc EXEC_DIRECTIVE /* copyout */ present_or_copyout (v) copy (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc EXEC_DIRECTIVE /* copyout */ present_or_copyout (v) create (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc EXEC_DIRECTIVE /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1)
+ abort ();
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc EXEC_DIRECTIVE /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc EXEC_DIRECTIVE /* copyout */ present_or_copyout (v) present_or_copy (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc EXEC_DIRECTIVE /* copyout */ present_or_copyout (v) present_or_create (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1)
+ abort ();
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+
+#pragma acc data copyin (i, j)
+ {
+#pragma acc EXEC_DIRECTIVE /* copyout */ present_or_copyout (v) present (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+
+#pragma acc data copyin(i, j)
+ {
+#pragma acc EXEC_DIRECTIVE /* copyout */ present_or_copyout (v)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c
index 3acfdf5..aeb0142 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c
@@ -2,183 +2,5 @@
#include <stdlib.h>
-int i;
-
-int main (void)
-{
- int j, v;
-
-#if 0
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) copyin (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != -1 || j != -2)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) copyout (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) copy (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) create (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != -1 || j != -2)
- abort ();
-#endif
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1)
- abort ();
-#if ACC_MEM_SHARED
- if (i != 2 || j != 1)
- abort ();
-#else
- if (i != -1 || j != -2)
- abort ();
-#endif
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copy (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_create (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1)
- abort ();
-#if ACC_MEM_SHARED
- if (i != 2 || j != 1)
- abort ();
-#else
- if (i != -1 || j != -2)
- abort ();
-#endif
-
-#if 0
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) present (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-#endif
-
-#if 0
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-#endif
-
- return 0;
-}
+#define EXEC_DIRECTIVE kernels
+#include "data-clauses.h"
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c
index 5462f12..78c834a 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c
@@ -9,46 +9,14 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuModuleLoad (&module, "subr.ptx");
+ r = cuModuleLoad (&module, "./subr.ptx");
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuModuleLoad failed: %d\n", r);
@@ -62,20 +30,6 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
stream = (CUstream) acc_get_cuda_stream (0);
if (stream != NULL)
abort ();
@@ -90,31 +44,21 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
- if (acc_async_test (0) != 0)
- {
- fprintf (stderr, "asynchronous operation not running\n");
- abort ();
- }
+ if (acc_async_test (0) == 1)
+ fprintf (stderr, "expected asynchronous operation to be running\n");
- sleep (1);
+ acc_wait_all ();
- if (acc_async_test (0) != 1)
- {
- fprintf (stderr, "found asynchronous operation still running\n");
- abort ();
- }
+ if (acc_async_test (0) == 0)
+ fprintf (stderr, "expected asynchronous operation to be running\n");
- acc_unmap_data (a);
-
- free (a);
- acc_free (d_a);
acc_shutdown (acc_device_nvidia);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c
index 912b266..ee06898 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c
@@ -1,6 +1,7 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* { dg-additional-options "-lcuda" } */
+#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -10,47 +11,17 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- const int N = 10;
+ const int N = 3;
int i;
CUstream streams[N];
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t diff;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -65,20 +36,6 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
for (i = 0; i < N; i++)
{
streams[i] = (CUstream) acc_get_cuda_stream (i);
@@ -96,9 +53,29 @@ main (int argc, char **argv)
abort ();
}
+ gettimeofday (&tv1, NULL);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[0], NULL, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxLaunch failed: %d\n", r);
+ abort ();
+ }
+
+ gettimeofday (&tv2, NULL);
+
+ diff = tv2.tv_sec - tv1.tv_sec;
+
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -112,7 +89,7 @@ main (int argc, char **argv)
}
}
- sleep ((int) (dtime / 1000.0f) + 1);
+ sleep ((diff + 1) * N);
for (i = 0; i < N; i++)
{
@@ -123,10 +100,6 @@ main (int argc, char **argv)
}
}
- acc_unmap_data (a);
-
- free (a);
- acc_free (d_a);
acc_shutdown (acc_device_nvidia);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c
index e8584db..8db6bcb 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c
@@ -9,45 +9,13 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -62,20 +30,6 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
if (r != CUDA_SUCCESS)
{
@@ -85,7 +39,7 @@ main (int argc, char **argv)
acc_set_cuda_stream (0, stream);
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -98,7 +52,7 @@ main (int argc, char **argv)
abort ();
}
- sleep ((int) (dtime / 1000.0f) + 1);
+ sleep (1);
if (acc_async_test (1) != 1)
{
@@ -106,11 +60,6 @@ main (int argc, char **argv)
abort ();
}
- acc_unmap_data (a);
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
return 0;
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c
index e383ba0..920ff5f 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c
@@ -10,45 +10,13 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -63,20 +31,6 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
if (r != CUDA_SUCCESS)
{
@@ -87,7 +41,7 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -100,7 +54,12 @@ main (int argc, char **argv)
abort ();
}
- sleep ((int) (dtime / 1000.f) + 1);
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize () failed: %d\n", r);
+ abort ();
+ }
if (acc_async_test_all () != 1)
{
@@ -108,11 +67,6 @@ main (int argc, char **argv)
abort ();
}
- acc_unmap_data (a);
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c
index 43a8b7e..4fa9d5a 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c
@@ -1,6 +1,7 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* { dg-additional-options "-lcuda" } */
+#include <sys/time.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
@@ -10,47 +11,15 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- const int N = 10;
+ const int N = 6;
int i;
CUstream streams[N];
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -65,20 +34,6 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
for (i = 0; i < N; i++)
{
streams[i] = (CUstream) acc_get_cuda_stream (i);
@@ -98,13 +53,12 @@ main (int argc, char **argv)
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
-
}
if (acc_async_test_all () != 0)
@@ -113,7 +67,12 @@ main (int argc, char **argv)
abort ();
}
- sleep ((int) (dtime / 1000.0f) + 1);
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
+ }
if (acc_async_test_all () != 1)
{
@@ -121,11 +80,6 @@ main (int argc, char **argv)
abort ();
}
- acc_unmap_data (a);
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c
index 0726ee4..e25d894 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c
@@ -5,50 +5,20 @@
#include <stdlib.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -63,19 +33,25 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
+ gettimeofday (&tv1, NULL);
- dtime = 200.0;
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
- dticks = (unsigned long) (dtime * clkrate);
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
+ }
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
+ gettimeofday (&tv2, NULL);
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
stream = (CUstream) acc_get_cuda_stream (0);
if (stream != NULL)
@@ -91,11 +67,9 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- init_timers (1);
+ gettimeofday (&tv1, NULL);
- start_timer (0);
-
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -104,33 +78,30 @@ main (int argc, char **argv)
acc_wait (0);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (atime < dtime)
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ if (((abs (t2 - t1) / t1) * 100.0) > 1.0)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long 1\n");
abort ();
}
- start_timer (0);
+ gettimeofday (&tv1, NULL);
acc_wait (0);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (0.010 < atime)
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ if (t2 > 1000)
{
- fprintf (stderr, "actual time too long\n");
+ fprintf (stderr, "too long 2\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c
index 1942211..53e285f 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c
@@ -6,52 +6,22 @@
#include <stdlib.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- int N;
+ const int N = 2;
int i;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime, hitime, lotime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -66,18 +36,25 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
+ gettimeofday (&tv1, NULL);
- dtime = 200.0;
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
- dticks = (unsigned long) (dtime * clkrate);
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
+ }
- N = nprocs;
+ gettimeofday (&tv2, NULL);
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
stream = (CUstream) acc_get_cuda_stream (0);
if (stream != NULL)
@@ -93,16 +70,11 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- init_timers (1);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
- start_timer (0);
+ gettimeofday (&tv1, NULL);
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -112,27 +84,18 @@ main (int argc, char **argv)
acc_wait (0);
}
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- hitime = dtime * N;
- hitime += hitime * 0.02;
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- lotime = dtime * N;
- lotime -= lotime * 0.02;
+ t1 *= N;
- if (atime > hitime || atime < lotime)
+ if (((abs (t2 - t1) / t1) * 100.0) > 1.0)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c
index 11d9d62..787dcb8 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c
@@ -6,52 +6,22 @@
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- int N;
+ const int N = 2;
int i;
CUstream *streams;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime, hitime, lotime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -66,18 +36,25 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
+ gettimeofday (&tv1, NULL);
- dtime = 200.0;
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
- dticks = (unsigned long) (dtime * clkrate);
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
+ }
- N = nprocs;
+ gettimeofday (&tv2, NULL);
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
streams = (CUstream *) malloc (N * sizeof (void *));
@@ -98,16 +75,11 @@ main (int argc, char **argv)
abort ();
}
- init_timers (1);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
- start_timer (0);
+ gettimeofday (&tv1, NULL);
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -117,27 +89,19 @@ main (int argc, char **argv)
acc_wait (i);
}
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- hitime = dtime * N;
- hitime += hitime * 0.02;
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- lotime = dtime * N;
- lotime -= lotime * 0.02;
+ t1 *= N;
- if (atime > hitime || atime < lotime)
+ if (((abs (t2 - t1) / t1) * 100.0) > 1.0)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
free (streams);
- free (a);
- acc_free (d_a);
acc_shutdown (acc_device_nvidia);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c
index 35a0980..5ef6fd9 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c
@@ -6,50 +6,20 @@
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -64,19 +34,25 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
+ gettimeofday (&tv1, NULL);
- dtime = 200.0;
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
- dticks = (unsigned long) (dtime * clkrate);
+ r = cuCtxSynchronize();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
+ }
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
+ gettimeofday (&tv2, NULL);
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
if (r != CUDA_SUCCESS)
@@ -87,11 +63,9 @@ main (int argc, char **argv)
acc_set_cuda_stream (0, stream);
- init_timers (1);
+ gettimeofday (&tv1, NULL);
- start_timer (0);
-
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -100,33 +74,30 @@ main (int argc, char **argv)
acc_wait (1);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (atime < dtime)
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ if (t2 > t1)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long 1\n");
abort ();
}
- start_timer (0);
+ gettimeofday (&tv1, NULL);
acc_wait (1);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (0.010 < atime)
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ if (t2 > 1000)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long 2\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
return 0;
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c
index 4f58fb2..0bed15f 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c
@@ -6,50 +6,20 @@
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -64,19 +34,25 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
+ gettimeofday (&tv1, NULL);
- dtime = 200.0;
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
- dticks = (unsigned long) (dtime * clkrate);
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
+ }
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
+ gettimeofday (&tv2, NULL);
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
stream = (CUstream) acc_get_cuda_stream (0);
if (stream != NULL)
@@ -92,11 +68,9 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- init_timers (1);
+ gettimeofday (&tv1, NULL);
- start_timer (0);
-
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -105,33 +79,30 @@ main (int argc, char **argv)
acc_wait_all ();
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (atime < dtime)
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ if (t2 > (t1 + (t1 * 0.10)))
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long 1\n");
abort ();
}
- start_timer (0);
+ gettimeofday (&tv1, NULL);
acc_wait_all ();
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (0.010 < atime)
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ if (t2 > 1000)
{
- fprintf (stderr, "actual time too long\n");
+ fprintf (stderr, "too long 2\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c
index ef3df13..5723588 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c
@@ -6,54 +6,22 @@
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- int N;
+ const int N = 2;
int i;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime, hitime, lotime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
-
- devnum = 2;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -68,18 +36,25 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
+ gettimeofday (&tv1, NULL);
- dtime = 200.0;
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
- dticks = (unsigned long) (dtime * clkrate);
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
+ }
- N = nprocs;
+ gettimeofday (&tv2, NULL);
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
if (r != CUDA_SUCCESS)
@@ -105,16 +80,11 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- init_timers (1);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
- start_timer (0);
+ gettimeofday (&tv1, NULL);
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -132,7 +102,7 @@ main (int argc, char **argv)
acc_wait (1);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
if (acc_async_test (0) != 1)
abort ();
@@ -140,25 +110,16 @@ main (int argc, char **argv)
if (acc_async_test (1) != 1)
abort ();
- hitime = dtime * N;
- hitime += hitime * 0.02;
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- lotime = dtime * N;
- lotime -= lotime * 0.02;
+ t1 *= N;
- if (atime > hitime || atime < lotime)
+ if (((abs (t2 - t1) / t1) * 100.0) > 1.0)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c
index d521331..ec98119 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c
@@ -6,52 +6,22 @@
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- int N;
+ const int N = 2;
int i;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -66,38 +36,40 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
+ gettimeofday (&tv1, NULL);
- dtime = 200.0;
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
- dticks = (unsigned long) (dtime * clkrate);
+ r = cuCtxSynchronize();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
+ }
- N = nprocs;
+ gettimeofday (&tv2, NULL);
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuStreamCreate failed: %d\n", r);
- abort ();
- }
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
acc_set_cuda_stream (1, stream);
- init_timers (1);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
- start_timer (0);
+ gettimeofday (&tv1, NULL);
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -109,21 +81,18 @@ main (int argc, char **argv)
acc_wait (1);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (atime < dtime)
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ t1 *= N;
+
+ if (((abs (t2 - t1) / t1) * 100.0) > 1.0)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
return 0;
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c
index d5f18f0..77de9ba 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c
@@ -6,52 +6,22 @@
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- int N;
+ const int N = 2;
int i;
CUstream *streams, stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -66,18 +36,25 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
+ gettimeofday (&tv1, NULL);
- dtime = 500.0;
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
- dticks = (unsigned long) (dtime * clkrate);
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
+ }
- N = nprocs;
+ gettimeofday (&tv2, NULL);
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
streams = (CUstream *) malloc (N * sizeof (void *));
@@ -98,11 +75,6 @@ main (int argc, char **argv)
abort ();
}
- init_timers (1);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
stream = (CUstream) acc_get_cuda_stream (N);
if (stream != NULL)
abort ();
@@ -117,11 +89,11 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (N, stream))
abort ();
- start_timer (0);
+ gettimeofday (&tv1, NULL);
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -129,6 +101,10 @@ main (int argc, char **argv)
}
}
+ gettimeofday (&tv2, NULL);
+
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
acc_wait_all_async (N);
for (i = 0; i <= N; i++)
@@ -145,15 +121,13 @@ main (int argc, char **argv)
abort ();
}
- atime = stop_timer (0);
-
- if (atime < dtime)
+ if ((t1 * N) < t2)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long 1\n");
abort ();
}
- start_timer (0);
+ gettimeofday (&tv1, NULL);
stream = (CUstream) acc_get_cuda_stream (N + 1);
if (stream != NULL)
@@ -173,35 +147,33 @@ main (int argc, char **argv)
acc_wait (N + 1);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (0.10 < atime)
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ if (t1 > 1000)
{
- fprintf (stderr, "actual time too long\n");
+ fprintf (stderr, "too long 2\n");
abort ();
}
- start_timer (0);
+ gettimeofday (&tv1, NULL);
acc_wait_all_async (N);
acc_wait (N);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (0.10 < atime)
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ if (t1 > 1000)
{
- fprintf (stderr, "actual time too long\n");
+ fprintf (stderr, "too long 3\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
free (streams);
- free (a);
- acc_free (d_a);
acc_shutdown (acc_device_nvidia);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c
index be30a7f..ecf7488 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c
@@ -10,46 +10,18 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay2;
CUmodule module;
CUresult r;
- int N;
+ const int N = 32;
int i;
CUstream *streams;
- unsigned long **a, **d_a, *tid, ticks;
+ unsigned long **a, **d_a, *tid;
int nbytes;
- void *kargs[3];
- int clkrate;
- int devnum, nprocs;
+ void *kargs[2];
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -66,10 +38,6 @@ main (int argc, char **argv)
nbytes = sizeof (int);
- ticks = (unsigned long) (200.0 * clkrate);
-
- N = nprocs;
-
streams = (CUstream *) malloc (N * sizeof (void *));
a = (unsigned long **) malloc (N * sizeof (unsigned long *));
@@ -103,8 +71,7 @@ main (int argc, char **argv)
for (i = 0; i < N; i++)
{
kargs[0] = (void *) &d_a[i];
- kargs[1] = (void *) &ticks;
- kargs[2] = (void *) &tid[i];
+ kargs[1] = (void *) &tid[i];
r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
if (r != CUDA_SUCCESS)
@@ -112,8 +79,6 @@ main (int argc, char **argv)
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
-
- ticks = (unsigned long) (50.0 * clkrate);
}
acc_wait_all_async (0);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c
index 1c2e52b..51b7ee7 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c
@@ -5,21 +5,19 @@
#include <stdlib.h>
#include <unistd.h>
#include <openacc.h>
-#include "timer.h"
+#include <cuda.h>
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- float atime;
CUstream stream;
CUresult r;
+ struct timeval tv1, tv2;
+ time_t t1;
acc_init (acc_device_nvidia);
- (void) acc_get_device_num (acc_device_nvidia);
-
- init_timers (1);
-
stream = (CUstream) acc_get_cuda_stream (0);
if (stream != NULL)
abort ();
@@ -34,22 +32,22 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- start_timer (0);
+ gettimeofday (&tv1, NULL);
acc_wait_all_async (0);
acc_wait (0);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (0.010 < atime)
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ if (t1 > 1000)
{
- fprintf (stderr, "actual time too long\n");
+ fprintf (stderr, "too long\n");
abort ();
}
- fini_timers ();
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c
index fd9df33..9a411fe 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c
@@ -2,205 +2,5 @@
#include <stdlib.h>
-int i;
-
-int main(void)
-{
- int j, v;
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) copyin (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
-#if ACC_MEM_SHARED
- if (v != 1 || i != 2 || j != 1)
- abort ();
-#else
- if (v != 1 || i != -1 || j != -2)
- abort ();
-#endif
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) copyout (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) copy (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) create (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
-#if ACC_MEM_SHARED
- if (v != 1 || i != 2 || j != 1)
- abort ();
-#else
- if (v != 1 || i != -1 || j != -2)
- abort ();
-#endif
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1)
- abort ();
-#if ACC_MEM_SHARED
- if (v != 1 || i != 2 || j != 1)
- abort ();
-#else
- if (v != 1 || i != -1 || j != -2)
- abort ();
-#endif
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copy (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_create (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1)
- abort ();
-#if ACC_MEM_SHARED
- if (v != 1 || i != 2 || j != 1)
- abort ();
-#else
- if (v != 1 || i != -1 || j != -2)
- abort ();
-#endif
-
- i = -1;
- j = -2;
- v = 0;
-
-#pragma acc data copyin (i, j)
- {
-#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- }
-#if ACC_MEM_SHARED
- if (v != 1 || i != 2 || j != 1)
- abort ();
-#else
- if (v != 1 || i != -1 || j != -2)
- abort ();
-#endif
-
- i = -1;
- j = -2;
- v = 0;
-
-#pragma acc data copyin(i, j)
- {
-#pragma acc parallel /* copyout */ present_or_copyout (v)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- }
-#if ACC_MEM_SHARED
- if (v != 1 || i != 2 || j != 1)
- abort ();
-#else
- if (v != 1 || i != -1 || j != -2)
- abort ();
-#endif
-
- return 0;
-}
+#define EXEC_DIRECTIVE parallel
+#include "data-clauses.h"
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/routine-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/routine-1.c
new file mode 100644
index 0000000..a27d076
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-1.c
@@ -0,0 +1,40 @@
+/* FIXME: remove -fno-var-tracking and -fno-exceptions from dg-options. */
+
+/* { dg-do run } */
+/* { dg-options "-fno-inline -fno-var-tracking -fno-exceptions" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#pragma acc routine
+int
+fact (int n)
+{
+ if (n == 0 || n == 1)
+ return 1;
+
+ return n * fact (n - 1);
+}
+
+int
+main()
+{
+ int *a, i, n = 10;
+
+ a = (int *)malloc (sizeof (int) * n);
+
+#pragma acc parallel copy (a[0:n]) vector_length (5)
+ {
+#pragma acc loop
+ for (i = 0; i < n; i++)
+ a[i] = fact (i);
+ }
+
+ for (i = 0; i < n; i++)
+ if (a[i] != fact (i))
+ abort ();
+
+ free (a);
+
+ return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/routine-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/routine-2.c
new file mode 100644
index 0000000..8ec4d8b
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-2.c
@@ -0,0 +1,41 @@
+/* FIXME: remove -fno-var-tracking and -fno-exceptions from dg-options. */
+
+/* { dg-do run } */
+/* { dg-options "-fno-inline -fno-var-tracking -fno-exceptions" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#pragma acc routine (fact)
+
+
+int fact (int n)
+{
+ if (n == 0 || n == 1)
+ return 1;
+
+ return n * fact (n - 1);
+}
+
+int
+main()
+{
+ int *a, i, n = 10;
+
+ a = (int *)malloc (sizeof (int) * n);
+
+#pragma acc parallel copy (a[0:n]) vector_length (5)
+ {
+#pragma acc loop
+ for (i = 0; i < n; i++)
+ a[i] = fact (i);
+ }
+
+ for (i = 0; i < n; i++)
+ if (a[i] != fact (i))
+ abort ();
+
+ free (a);
+
+ return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h
index 9db236c..0c9096f 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h
@@ -1,46 +1,24 @@
-#if ACC_DEVICE_TYPE_nvidia
-
#pragma acc routine nohost
-static int clock (void)
-{
- int thetime;
-
- asm __volatile__ ("mov.u32 %0, %%clock;" : "=r"(thetime));
-
- return thetime;
-}
-
-#endif
-
void
-delay (unsigned long *d_o, unsigned long delay)
+delay ()
{
- int start, ticks;
+ int i, sum;
+ const int N = 500000;
- start = clock ();
-
- ticks = 0;
-
- while (ticks < delay)
- ticks = clock () - start;
-
- return;
+ for (i = 0; i < N; i++)
+ sum = sum + 1;
}
+#pragma acc routine nohost
void
-delay2 (unsigned long *d_o, unsigned long delay, unsigned long tid)
+delay2 (unsigned long *d_o, unsigned long tid)
{
- int start, ticks;
-
- start = clock ();
-
- ticks = 0;
+ int i, sum;
+ const int N = 500000;
- while (ticks < delay)
- ticks = clock () - start;
+ for (i = 0; i < N; i++)
+ sum = sum + 1;
d_o[0] = tid;
-
- return;
}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx
index 6f748fc..88b63bf 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx
@@ -1,148 +1,90 @@
-// BEGIN PREAMBLE
- .version 3.1
- .target sm_30
+ .version 3.1
+ .target sm_30
.address_size 64
-// END PREAMBLE
-// BEGIN FUNCTION DEF: clock
-.func (.param.u32 %out_retval)clock
-{
-.reg.u32 %retval;
- .reg.u64 %hr10;
- .reg.u32 %r22;
- .reg.u32 %r23;
- .reg.u32 %r24;
- .local.align 8 .b8 %frame[8];
- // #APP
-// 7 "subr.c" 1
- mov.u32 %r24, %clock;
-// 0 "" 2
- // #NO_APP
- st.local.u32 [%frame], %r24;
- ld.local.u32 %r22, [%frame];
- mov.u32 %r23, %r22;
- mov.u32 %retval, %r23;
- st.param.u32 [%out_retval], %retval;
- ret;
- }
-// END FUNCTION DEF
-// BEGIN GLOBAL FUNCTION DEF: delay
-.visible .entry delay(.param.u64 %in_ar1, .param.u64 %in_ar2)
-{
- .reg.u64 %ar1;
- .reg.u64 %ar2;
- .reg.u64 %hr10;
- .reg.u64 %r22;
- .reg.u32 %r23;
- .reg.u64 %r24;
- .reg.u64 %r25;
- .reg.u32 %r26;
- .reg.u32 %r27;
- .reg.u32 %r28;
- .reg.u32 %r29;
- .reg.u32 %r30;
- .reg.u64 %r31;
- .reg.pred %r32;
- .local.align 8 .b8 %frame[24];
- ld.param.u64 %ar1, [%in_ar1];
- ld.param.u64 %ar2, [%in_ar2];
- mov.u64 %r24, %ar1;
- st.u64 [%frame+8], %r24;
- mov.u64 %r25, %ar2;
- st.local.u64 [%frame+16], %r25;
+ .visible .entry delay
{
- .param.u32 %retval_in;
- {
- call (%retval_in), clock;
- }
- ld.param.u32 %r26, [%retval_in];
-}
- st.local.u32 [%frame+4], %r26;
- mov.u32 %r27, 0;
- st.local.u32 [%frame], %r27;
- bra $L4;
-$L5:
- {
- .param.u32 %retval_in;
- {
- call (%retval_in), clock;
- }
- ld.param.u32 %r28, [%retval_in];
-}
- mov.u32 %r23, %r28;
- ld.local.u32 %r30, [%frame+4];
- sub.u32 %r29, %r23, %r30;
- st.local.u32 [%frame], %r29;
-$L4:
- ld.local.s32 %r22, [%frame];
- ld.local.u64 %r31, [%frame+16];
- setp.lo.u64 %r32,%r22,%r31;
- @%r32 bra $L5;
+ .reg .u64 %hr10;
+ .reg .u32 %r22;
+ .reg .u32 %r23;
+ .reg .u32 %r24;
+ .reg .u32 %r25;
+ .reg .u32 %r26;
+ .reg .u32 %r27;
+ .reg .u32 %r28;
+ .reg .u32 %r29;
+ .reg .pred %r30;
+ .reg .u64 %frame;
+ .local .align 8 .b8 %farray[16];
+ cvta.local.u64 %frame,%farray;
+ mov.u32 %r22,500000;
+ st.u32 [%frame+8],%r22;
+ mov.u32 %r23,0;
+ st.u32 [%frame],%r23;
+ bra $L2;
+ $L3:
+ ld.u32 %r25,[%frame+4];
+ add.u32 %r24,%r25,1;
+ st.u32 [%frame+4],%r24;
+ ld.u32 %r27,[%frame];
+ add.u32 %r26,%r27,1;
+ st.u32 [%frame],%r26;
+ $L2:
+ ld.u32 %r28,[%frame];
+ ld.u32 %r29,[%frame+8];
+ setp.lt.s32 %r30,%r28,%r29;
+ @%r30
+ bra $L3;
ret;
}
-// END FUNCTION DEF
-// BEGIN GLOBAL FUNCTION DEF: delay2
-.visible .entry delay2(.param.u64 %in_ar1, .param.u64 %in_ar2, .param.u64 %in_ar3)
-{
- .reg.u64 %ar1;
- .reg.u64 %ar2;
- .reg.u64 %ar3;
- .reg.u64 %hr10;
- .reg.u64 %r22;
- .reg.u32 %r23;
- .reg.u64 %r24;
- .reg.u64 %r25;
- .reg.u64 %r26;
- .reg.u32 %r27;
- .reg.u32 %r28;
- .reg.u32 %r29;
- .reg.u32 %r30;
- .reg.u32 %r31;
- .reg.u64 %r32;
- .reg.pred %r33;
- .reg.u64 %r34;
- .reg.u64 %r35;
- .local.align 8 .b8 %frame[32];
- ld.param.u64 %ar1, [%in_ar1];
- ld.param.u64 %ar2, [%in_ar2];
- ld.param.u64 %ar3, [%in_ar3];
- mov.u64 %r24, %ar1;
- st.local.u64 [%frame+8], %r24;
- mov.u64 %r25, %ar2;
- st.local.u64 [%frame+16], %r25;
- mov.u64 %r26, %ar3;
- st.local.u64 [%frame+24], %r26;
- {
- .param.u32 %retval_in;
- {
- call (%retval_in), clock;
- }
- ld.param.u32 %r27, [%retval_in];
-}
- st.local.u32 [%frame+4], %r27;
- mov.u32 %r28, 0;
- st.local.u32 [%frame], %r28;
- bra $L8;
-$L9:
- {
- .param.u32 %retval_in;
+
+ .visible .entry delay2 (.param .u64 %in_ar1, .param .u64 %in_ar2)
{
- call (%retval_in), clock;
- }
- ld.param.u32 %r29, [%retval_in];
-}
- mov.u32 %r23, %r29;
- ld.local.u32 %r31, [%frame+4];
- sub.u32 %r30, %r23, %r31;
- st.local.u32 [%frame], %r30;
-$L8:
- ld.local.s32 %r22, [%frame];
- ld.local.u64 %r32, [%frame+16];
- setp.lo.u64 %r33,%r22,%r32;
- @%r33 bra $L9;
- ld.local.u64 %r34, [%frame+8];
- ld.local.u64 %r35, [%frame+24];
- st.u64 [%r34], %r35;
+ .reg .u64 %ar1;
+ .reg .u64 %ar2;
+ .reg .u64 %hr10;
+ .reg .u64 %r22;
+ .reg .u64 %r23;
+ .reg .u32 %r24;
+ .reg .u32 %r25;
+ .reg .u32 %r26;
+ .reg .u32 %r27;
+ .reg .u32 %r28;
+ .reg .u32 %r29;
+ .reg .u32 %r30;
+ .reg .u32 %r31;
+ .reg .pred %r32;
+ .reg .u64 %r33;
+ .reg .u64 %r34;
+ .reg .u64 %frame;
+ .local .align 8 .b8 %farray[32];
+ cvta.local.u64 %frame,%farray;
+ ld.param.u64 %ar1,[%in_ar1];
+ ld.param.u64 %ar2,[%in_ar2];
+ mov.u64 %r22,%ar1;
+ st.u64 [%frame+16],%r22;
+ mov.u64 %r23,%ar2;
+ st.u64 [%frame+24],%r23;
+ mov.u32 %r24,500000;
+ st.u32 [%frame+8],%r24;
+ mov.u32 %r25,0;
+ st.u32 [%frame],%r25;
+ bra $L5;
+ $L6:
+ ld.u32 %r27,[%frame+4];
+ add.u32 %r26,%r27,1;
+ st.u32 [%frame+4],%r26;
+ ld.u32 %r29,[%frame];
+ add.u32 %r28,%r29,1;
+ st.u32 [%frame],%r28;
+ $L5:
+ ld.u32 %r30,[%frame];
+ ld.u32 %r31,[%frame+8];
+ setp.lt.s32 %r32,%r30,%r31;
+ @%r32
+ bra $L6;
+ ld.u64 %r33,[%frame+16];
+ ld.u64 %r34,[%frame+24];
+ st.u64 [%r33],%r34;
ret;
}
-// END FUNCTION DEF
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h
deleted file mode 100644
index 53749da..0000000
--- libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h
+++ /dev/null
@@ -1,103 +0,0 @@
-
-#include <stdio.h>
-#include <cuda.h>
-
-static int _Tnum_timers;
-static CUevent *_Tstart_events, *_Tstop_events;
-static CUstream _Tstream;
-
-void
-init_timers (int ntimers)
-{
- int i;
- CUresult r;
-
- _Tnum_timers = ntimers;
-
- _Tstart_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent));
- _Tstop_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent));
-
- r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuStreamCreate failed: %d\n", r);
- abort ();
- }
-
- for (i = 0; i < _Tnum_timers; i++)
- {
- r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventCreate failed: %d\n", r);
- abort ();
- }
-
- r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventCreate failed: %d\n", r);
- abort ();
- }
- }
-}
-
-void
-fini_timers (void)
-{
- int i;
-
- for (i = 0; i < _Tnum_timers; i++)
- {
- cuEventDestroy (_Tstart_events[i]);
- cuEventDestroy (_Tstop_events[i]);
- }
-
- cuStreamDestroy (_Tstream);
-
- free (_Tstart_events);
- free (_Tstop_events);
-}
-
-void
-start_timer (int timer)
-{
- CUresult r;
-
- r = cuEventRecord (_Tstart_events[timer], _Tstream);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventRecord failed: %d\n", r);
- abort ();
- }
-}
-
-float
-stop_timer (int timer)
-{
- CUresult r;
- float etime;
-
- r = cuEventRecord (_Tstop_events[timer], _Tstream);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventRecord failed: %d\n", r);
- abort ();
- }
-
- r = cuEventSynchronize (_Tstop_events[timer]);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventSynchronize failed: %d\n", r);
- abort ();
- }
-
- r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventElapsedTime failed: %d\n", r);
- abort ();
- }
-
- return etime;
-}
diff --git libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90
new file mode 100644
index 0000000..27c5c9e
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90
@@ -0,0 +1,784 @@
+! { dg-do run }
+
+program main
+ integer igot, iexp, itmp
+ real fgot, fexp, ftmp
+ logical lgot, lexp, ltmp
+ integer, parameter :: N = 32
+
+ igot = 0
+ iexp = N * 2
+
+ !$acc parallel copy (igot, itmp)
+ do i = 1, N
+ !$acc atomic capture
+ itmp = igot
+ igot = i + i
+ !$acc end atomic
+ end do
+ !$acc end parallel
+
+ if (igot /= iexp) call abort
+ if (itmp /= iexp - 2) call abort
+
+ fgot = 1234.0
+ fexp = 1266.0
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ ftmp = fgot
+ fgot = fgot + 1.0
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp - 1.0) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 1.0
+ fexp = 2.0**32
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ ftmp = fgot
+ fgot = fgot * 2.0
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp / 2.0) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 32.0
+ fexp = fgot - N
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ ftmp = fgot
+ fgot = fgot - 1.0
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp + 1.0) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 2**32.0
+ fexp = 1.0
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ ftmp = fgot
+ fgot = fgot / 2.0
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fgot * 2.0) call abort
+ if (fgot /= fexp) call abort
+
+ lgot = .TRUE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ ltmp = lgot
+ lgot = lgot .and. .FALSE.
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. .not. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ ltmp = lgot
+ lgot = lgot .or. .FALSE.
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ ltmp = lgot
+ lgot = lgot .eqv. .TRUE.
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .TRUE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ ltmp = lgot
+ lgot = lgot .neqv. .TRUE.
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. .not. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ fgot = 1234.0
+ fexp = 1266.0
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ ftmp = fgot
+ fgot = 1.0 + fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp - 1.0) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 1.0
+ fexp = 2.0**32
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ ftmp = fgot
+ fgot = 2.0 * fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp / 2.0) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 32.0
+ fexp = 32.0
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ ftmp = fgot
+ fgot = 2.0 - fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= 2.0 - fexp) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 2.0**16
+ fexp = 2.0**16
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ ftmp = fgot
+ fgot = 2.0 / fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= 2.0 / fexp) call abort
+ if (fgot /= fexp) call abort
+
+ lgot = .TRUE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ ltmp = lgot
+ lgot = .FALSE. .and. lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. .not. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ ltmp = lgot
+ lgot = .FALSE. .or. lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ ltmp = lgot
+ lgot = .TRUE. .eqv. lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .TRUE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ ltmp = lgot
+ lgot = .TRUE. .neqv. lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. .not. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ igot = 1
+ iexp = N
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 1, N
+ !$acc atomic capture
+ itmp = igot
+ igot = max (igot, i)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp - 1) call abort
+ if (igot /= iexp) call abort
+
+ igot = N
+ iexp = 1
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 1, N
+ !$acc atomic capture
+ itmp = igot
+ igot = min (igot, i)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = ibclr (-2, i)
+ !$acc atomic capture
+ itmp = igot
+ igot = iand (igot, iexpr)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= ibset (iexp, N - 1)) call abort
+ if (igot /= iexp) call abort
+
+ igot = 0
+ iexp = -1
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic capture
+ itmp = igot
+ igot = ior (igot, iexpr)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= ieor (iexp, lshift (1, N - 1))) call abort
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic capture
+ itmp = igot
+ igot = ieor (igot, iexpr)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= ior (iexp, lshift (1, N - 1))) call abort
+ if (igot /= iexp) call abort
+
+ igot = 1
+ iexp = N
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 1, N
+ !$acc atomic capture
+ itmp = igot
+ igot = max (i, igot)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp - 1) call abort
+ if (igot /= iexp) call abort
+
+ igot = N
+ iexp = 1
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 1, N
+ !$acc atomic capture
+ itmp = igot
+ igot = min (i, igot)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = ibclr (-2, i)
+ !$acc atomic capture
+ itmp = igot
+ igot = iand (iexpr, igot)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= ibset (iexp, N - 1)) call abort
+ if (igot /= iexp) call abort
+
+ igot = 0
+ iexp = -1
+ !!
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic capture
+ itmp = igot
+ igot = ior (iexpr, igot)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= ieor (iexp, lshift (1, N - 1))) call abort
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic capture
+ itmp = igot
+ igot = ieor (iexpr, igot)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= ior (iexp, lshift (1, N - 1))) call abort
+ if (igot /= iexp) call abort
+
+ fgot = 1234.0
+ fexp = 1266.0
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ fgot = fgot + 1.0
+ ftmp = fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 1.0
+ fexp = 2.0**32
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ fgot = fgot * 2.0
+ ftmp = fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 32.0
+ fexp = fgot - N
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ fgot = fgot - 1.0
+ ftmp = fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 2**32.0
+ fexp = 1.0
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ fgot = fgot / 2.0
+ ftmp = fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp) call abort
+ if (fgot /= fexp) call abort
+
+ lgot = .TRUE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ lgot = lgot .and. .FALSE.
+ ltmp = lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ lgot = lgot .or. .FALSE.
+ ltmp = lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ lgot = lgot .eqv. .TRUE.
+ ltmp = lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .TRUE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ lgot = lgot .neqv. .TRUE.
+ ltmp = lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ fgot = 1234.0
+ fexp = 1266.0
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ fgot = 1.0 + fgot
+ ftmp = fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 1.0
+ fexp = 2.0**32
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ fgot = 2.0 * fgot
+ ftmp = fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 32.0
+ fexp = 32.0
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ fgot = 2.0 - fgot
+ ftmp = fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp) call abort
+ if (fgot /= fexp) call abort
+
+ fgot = 2.0**16
+ fexp = 2.0**16
+
+ !$acc parallel loop copy (fgot, ftmp)
+ do i = 1, N
+ !$acc atomic capture
+ fgot = 2.0 / fgot
+ ftmp = fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (ftmp /= fexp) call abort
+ if (fgot /= fexp) call abort
+
+ lgot = .TRUE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ lgot = .FALSE. .and. lgot
+ ltmp = lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ lgot = .FALSE. .or. lgot
+ ltmp = lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ lgot = .TRUE. .eqv. lgot
+ ltmp = lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .TRUE.
+
+ !$acc parallel copy (lgot, ltmp)
+ !$acc atomic capture
+ lgot = .TRUE. .neqv. lgot
+ ltmp = lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (ltmp .neqv. lexp) call abort
+ if (lgot .neqv. lexp) call abort
+
+ igot = 1
+ iexp = N
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 1, N
+ !$acc atomic capture
+ igot = max (igot, i)
+ itmp = igot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = N
+ iexp = 1
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 1, N
+ !$acc atomic capture
+ igot = min (igot, i)
+ itmp = igot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = ibclr (-2, i)
+ !$acc atomic capture
+ igot = iand (igot, iexpr)
+ itmp = igot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = 0
+ iexp = -1
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic capture
+ igot = ior (igot, iexpr)
+ itmp = igot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic capture
+ igot = ieor (igot, iexpr)
+ itmp = igot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = 1
+ iexp = N
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 1, N
+ !$acc atomic capture
+ igot = max (i, igot)
+ itmp = igot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = N
+ iexp = 1
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 1, N
+ !$acc atomic capture
+ igot = min (i, igot)
+ itmp = igot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = ibclr (-2, i)
+ !$acc atomic capture
+ igot = iand (iexpr, igot)
+ itmp = igot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = 0
+ iexp = -1
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic capture
+ igot = ior (iexpr, igot)
+ itmp = igot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot, itmp)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic capture
+ igot = ieor (iexpr, igot)
+ itmp = igot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (itmp /= iexp) call abort
+ if (igot /= iexp) call abort
+
+end program
diff --git libgomp/testsuite/libgomp.oacc-fortran/atomic_update-1.f90 libgomp/testsuite/libgomp.oacc-fortran/atomic_update-1.f90
new file mode 100644
index 0000000..6607c77
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/atomic_update-1.f90
@@ -0,0 +1,338 @@
+! { dg-do run }
+
+program main
+ integer igot, iexp, iexpr
+ real fgot, fexp
+ integer i
+ integer, parameter :: N = 32
+ logical lgot, lexp
+
+ fgot = 1234.0
+ fexp = 1266.0
+
+ !$acc parallel loop copy (fgot)
+ do i = 1, N
+ !$acc atomic update
+ fgot = fgot + 1.0
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (fgot /= fexp) call abort
+
+ fgot = 1.0
+ fexp = 2.0**32
+
+ !$acc parallel loop copy (fgot)
+ do i = 1, N
+ !$acc atomic update
+ fgot = fgot * 2.0
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (fgot /= fexp) call abort
+
+ fgot = 32.0
+ fexp = fgot - N
+
+ !$acc parallel loop copy (fgot)
+ do i = 1, N
+ !$acc atomic update
+ fgot = fgot - 1.0
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (fgot /= fexp) call abort
+
+ fgot = 2**32.0
+ fexp = 1.0
+
+ !$acc parallel loop copy (fgot)
+ do i = 1, N
+ !$acc atomic update
+ fgot = fgot / 2.0
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (fgot /= fexp) call abort
+
+ lgot = .TRUE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot)
+ !$acc atomic update
+ lgot = lgot .and. .FALSE.
+ !$acc end atomic
+ !$acc end parallel
+
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot)
+ !$acc atomic update
+ lgot = lgot .or. .FALSE.
+ !$acc end atomic
+ !$acc end parallel
+
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot)
+ !$acc atomic update
+ lgot = lgot .eqv. .TRUE.
+ !$acc end atomic
+ !$acc end parallel
+
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .TRUE.
+
+ !$acc parallel copy (lgot)
+ !$acc atomic update
+ lgot = lgot .neqv. .TRUE.
+ !$acc end atomic
+ !$acc end parallel
+
+ if (lgot .neqv. lexp) call abort
+
+ fgot = 1234.0
+ fexp = 1266.0
+
+ !$acc parallel loop copy (fgot)
+ do i = 1, N
+ !$acc atomic update
+ fgot = 1.0 + fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (fgot /= fexp) call abort
+
+ fgot = 1.0
+ fexp = 2.0**32
+
+ !$acc parallel loop copy (fgot)
+ do i = 1, N
+ !$acc atomic update
+ fgot = 2.0 * fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (fgot /= fexp) call abort
+
+ fgot = 32.0
+ fexp = 32.0
+
+ !$acc parallel loop copy (fgot)
+ do i = 1, N
+ !$acc atomic update
+ fgot = 2.0 - fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (fgot /= fexp) call abort
+
+ fgot = 2.0**16
+ fexp = 2.0**16
+
+ !$acc parallel loop copy (fgot)
+ do i = 1, N
+ !$acc atomic update
+ fgot = 2.0 / fgot
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (fgot /= fexp) call abort
+
+ lgot = .TRUE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot)
+ !$acc atomic update
+ lgot = .FALSE. .and. lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot)
+ !$acc atomic update
+ lgot = .FALSE. .or. lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .FALSE.
+
+ !$acc parallel copy (lgot)
+ !$acc atomic update
+ lgot = .TRUE. .eqv. lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (lgot .neqv. lexp) call abort
+
+ lgot = .FALSE.
+ lexp = .TRUE.
+
+ !$acc parallel copy (lgot)
+ !$acc atomic update
+ lgot = .TRUE. .neqv. lgot
+ !$acc end atomic
+ !$acc end parallel
+
+ if (lgot .neqv. lexp) call abort
+
+ igot = 1
+ iexp = N
+
+ !$acc parallel loop copy (igot)
+ do i = 1, N
+ !$acc atomic update
+ igot = max (igot, i)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (igot /= iexp) call abort
+
+ igot = N
+ iexp = 1
+
+ !$acc parallel loop copy (igot)
+ do i = 1, N
+ !$acc atomic update
+ igot = min (igot, i)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot)
+ do i = 0, N - 1
+ iexpr = ibclr (-2, i)
+ !$acc atomic update
+ igot = iand (igot, iexpr)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (igot /= iexp) call abort
+
+ igot = 0
+ iexp = -1
+
+ !$acc parallel loop copy (igot)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic update
+ igot = ior (igot, iexpr)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic update
+ igot = ieor (igot, iexpr)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (igot /= iexp) call abort
+
+ igot = 1
+ iexp = N
+
+ !$acc parallel loop copy (igot)
+ do i = 1, N
+ !$acc atomic update
+ igot = max (i, igot)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (igot /= iexp) call abort
+
+ igot = N
+ iexp = 1
+
+ !$acc parallel loop copy (igot)
+ do i = 1, N
+ !$acc atomic update
+ igot = min (i, igot)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot)
+ do i = 0, N - 1
+ iexpr = ibclr (-2, i)
+ !$acc atomic update
+ igot = iand (iexpr, igot)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (igot /= iexp) call abort
+
+ igot = 0
+ iexp = -1
+
+ !$acc parallel loop copy (igot)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic update
+ igot = ior (iexpr, igot)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (igot /= iexp) call abort
+
+ igot = -1
+ iexp = 0
+
+ !$acc parallel loop copy (igot)
+ do i = 0, N - 1
+ iexpr = lshift (1, i)
+ !$acc atomic update
+ igot = ieor (iexpr, igot)
+ !$acc end atomic
+ end do
+ !$acc end parallel loop
+
+ if (igot /= iexp) call abort
+
+end program
diff --git libgomp/testsuite/libgomp.oacc-fortran/cache-1.f90 libgomp/testsuite/libgomp.oacc-fortran/cache-1.f90
new file mode 100644
index 0000000..f01b8e9
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/cache-1.f90
@@ -0,0 +1,26 @@
+
+program main
+ integer, parameter :: N = 8
+ integer, dimension (N) :: a, b
+ integer :: i
+ integer :: idx, len
+
+ idx = 1
+ len = 2
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+
+ !$acc cache (a(1:N))
+ !$acc cache (a(0:N))
+ !$acc cache (a(0:N), b(0:N))
+ !$acc cache (a(0))
+ !$acc cache (a(0), a(1), b(0:N))
+ !$acc cache (a(idx))
+ !$acc cache (a(idx:len))
+
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+end program
diff --git libgomp/testsuite/libgomp.oacc-fortran/clauses-1.f90 libgomp/testsuite/libgomp.oacc-fortran/clauses-1.f90
new file mode 100644
index 0000000..e6ab78d
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/clauses-1.f90
@@ -0,0 +1,290 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+program main
+ use openacc
+ implicit none
+
+ integer, parameter :: N = 32
+ real, allocatable :: a(:), b(:), c(:)
+ integer i
+
+ i = 0
+
+ allocate (a(N))
+ allocate (b(N))
+ allocate (c(N))
+
+ a(:) = 3.0
+ b(:) = 0.0
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 5.0
+ b(:) = 1.0
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 5.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 6.0
+ b(:) = 0.0
+
+ call acc_copyin (a, sizeof (a))
+
+ a(:) = 9.0
+
+ !$acc parallel present_or_copyin (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 6.0) call abort
+ end do
+
+ call acc_copyout (a, sizeof (a))
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 6.0
+ b(:) = 0.0
+
+ !$acc parallel copyin (a(1:N)) present_or_copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 6.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 5.0
+ b(:) = 2.0
+
+ call acc_copyin (b, sizeof (b))
+
+ !$acc parallel copyin (a(1:N)) present_or_copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 5.0) call abort
+ if (b(i) .ne. 2.0) call abort
+ end do
+
+ call acc_copyout (b, sizeof (b))
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 3.0;
+ b(:) = 4.0;
+
+ !$acc parallel copy (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+ a(i) = a(i) + 1
+ b(i) = a(i) + 2
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 4.0) call abort
+ if (b(i) .ne. 6.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 4.0
+ b(:) = 7.0
+
+ !$acc parallel present_or_copy (a(1:N)) present_or_copy (b(1:N))
+ do i = 1, N
+ a(i) = a(i) + 1
+ b(i) = b(i) + 2
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 5.0) call abort
+ if (b(i) .ne. 9.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 3.0
+ b(:) = 7.0
+
+ call acc_copyin (a, sizeof (a))
+ call acc_copyin (b, sizeof (b))
+
+ !$acc parallel present_or_copy (a(1:N)) present_or_copy (b(1:N))
+ do i = 1, N
+ a(i) = a(i) + 1
+ b(i) = b(i) + 2
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 7.0) call abort
+ end do
+
+ call acc_copyout (a, sizeof (a))
+ call acc_copyout (b, sizeof (b))
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 3.0
+ b(:) = 7.0
+
+ !$acc parallel copyin (a(1:N)) create (c(1:N)) copyout (b(1:N))
+ do i = 1, N
+ c(i) = a(i)
+ b(i) = c(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+ if (acc_is_present (c) .eqv. .TRUE.) call abort
+
+ a(:) = 4.0
+ b(:) = 8.0
+
+ !$acc parallel copyin (a(1:N)) present_or_create (c(1:N)) copyout (b(1:N))
+ do i = 1, N
+ c(i) = a(i)
+ b(i) = c(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 4.0) call abort
+ if (b(i) .ne. 4.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+ if (acc_is_present (c) .eqv. .TRUE.) call abort
+
+ a(:) = 4.0
+
+ call acc_copyin (a, sizeof (a))
+ call acc_copyin (b, sizeof (b))
+ call acc_copyin (c, sizeof (c))
+
+ !$acc parallel present (a(1:N)) present (c(1:N)) present (b(1:N))
+ do i = 1, N
+ c(i) = a(i)
+ b(i) = c(i)
+ end do
+ !$acc end parallel
+
+ call acc_copyout (a, sizeof (a))
+ call acc_copyout (b, sizeof (b))
+ call acc_copyout (c, sizeof (c))
+
+ do i = 1, N
+ if (a(i) .ne. 4.0) call abort
+ if (b(i) .ne. 4.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+ if (acc_is_present (c) .eqv. .TRUE.) call abort
+
+ a(:) = 6.0
+ b(:) = 0.0
+
+ call acc_copyin (a, sizeof (a))
+
+ a(:) = 9.0
+
+ !$acc parallel pcopyin (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 6.0) call abort
+ end do
+
+ call acc_copyout (a, sizeof (a))
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 6.0
+ b(:) = 0.0
+
+ !$acc parallel copyin (a(1:N)) pcopyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 6.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 5.0
+ b(:) = 7.0
+
+ !$acc parallel copyin (a(1:N)) pcreate (c(1:N)) copyout (b(1:N))
+ do i = 1, N
+ c(i) = a(i)
+ b(i) = c(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 5.0) call abort
+ if (b(i) .ne. 5.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+ if (acc_is_present (c) .eqv. .TRUE.) call abort
+
+end program main
diff --git libgomp/testsuite/libgomp.oacc-fortran/data-1.f90 libgomp/testsuite/libgomp.oacc-fortran/data-1.f90
index 5e94e2d..bf323b3 100644
--- libgomp/testsuite/libgomp.oacc-fortran/data-1.f90
+++ libgomp/testsuite/libgomp.oacc-fortran/data-1.f90
@@ -1,45 +1,212 @@
! { dg-do run }
+! { dg-additional-options "-cpp" }
-program test
- integer, parameter :: N = 8
- real, allocatable :: a(:), b(:)
+function is_mapped (n) result (rc)
+ use openacc
- allocate (a(N))
- allocate (b(N))
+ integer, intent (in) :: n
+ logical rc
- a(:) = 3.0
- b(:) = 0.0
+#if ACC_MEM_SHARED
+ integer i
- !$acc enter data copyin (a(1:N), b(1:N))
+ rc = .TRUE.
+ i = n
+#else
+ rc = acc_is_present (n, sizeof (n))
+#endif
- !$acc parallel
- do i = 1, n
- b(i) = a (i)
- end do
- !$acc end parallel
+end function is_mapped
- !$acc exit data copyout (a(1:N), b(1:N))
+program main
+ integer i, j
+ logical is_mapped
- do i = 1, n
- if (a(i) .ne. 3.0) call abort
- if (b(i) .ne. 3.0) call abort
- end do
+ i = -1
+ j = -2
- a(:) = 5.0
- b(:) = 1.0
+ !$acc data copyin (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
- !$acc enter data copyin (a(1:N), b(1:N))
+ if (i .ne. -1 .or. j .ne. -2) call abort
- !$acc parallel
- do i = 1, n
- b(i) = a (i)
- end do
- !$acc end parallel
+ i = 2
+ j = 1
- !$acc exit data copyout (a(1:N), b(1:N))
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
- do i = 1, n
- if (a(i) .ne. 5.0) call abort
- if (b(i) .ne. 5.0) call abort
- end do
-end program test
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data copyout (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ !$acc parallel present (i, j)
+ i = 4
+ j = 2
+ !$acc end parallel
+ !$acc end data
+
+ if (i .ne. 4 .or. j .ne. 2) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data create (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data present_or_copyin (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data present_or_copyout (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ !$acc parallel present (i, j)
+ i = 4
+ j = 2
+ !$acc end parallel
+ !$acc end data
+
+ if (i .ne. 4 .or. j .ne. 2) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data present_or_copy (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+
+#if ACC_MEM_SHARED
+ if (i .ne. 2 .or. j .ne. 1) call abort
+#else
+ if (i .ne. -1 .or. j .ne. -2) call abort
+#endif
+
+ i = -1
+ j = -2
+
+ !$acc data present_or_create (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data copyin (i, j)
+ !$acc data present (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data copyin (i, j)
+ !$acc data present (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data
+#if !ACC_MEM_SHARED
+ if (is_mapped (i) .eqv. .TRUE.) call abort
+ if (is_mapped (j) .eqv. .TRUE.) call abort
+#endif
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+end program main
diff --git libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
index 8736c2a..d190700 100644
--- libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
+++ libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
@@ -1,8 +1,14 @@
! { dg-do run }
program test
+ use openacc
integer, parameter :: N = 8
real, allocatable :: a(:,:), b(:,:)
+ real, allocatable :: c(:), d(:)
+ integer i, j
+
+ i = 0
+ j = 0
allocate (a(N,N))
allocate (b(N,N))
@@ -28,4 +34,48 @@ program test
if (b(j,i) .ne. 3.0) call abort
end do
end do
+
+ allocate (c(N))
+ allocate (d(N))
+
+ c(:) = 3.0
+ d(:) = 0.0
+
+ !$acc enter data copyin (c(1:N)) create (d(1:N)) async
+ !$acc wait
+
+ !$acc parallel
+ do i = 1, N
+ d(i) = c(i) + 1
+ end do
+ !$acc end parallel
+
+ !$acc exit data copyout (c(1:N), d(1:N)) async
+ !$acc wait
+
+ do i = 1, N
+ if (d(i) .ne. 4.0) call abort
+ end do
+
+ c(:) = 3.0
+ d(:) = 0.0
+
+ !$acc enter data copyin (c(1:N)) async
+ !$acc enter data create (d(1:N)) wait
+ !$acc wait
+
+ !$acc parallel
+ do i = 1, N
+ d(i) = c(i) + 1
+ end do
+ !$acc end parallel
+
+ !$acc exit data copyout (d(1:N)) async
+ !$acc exit data async
+ !$acc wait
+
+ do i = 1, N
+ if (d(i) .ne. 4.0) call abort
+ end do
+
end program test
diff --git libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 libgomp/testsuite/libgomp.oacc-fortran/data-3.f90
index 9868cb0..daf20a5 100644
--- libgomp/testsuite/libgomp.oacc-fortran/data-3.f90
+++ libgomp/testsuite/libgomp.oacc-fortran/data-3.f90
@@ -17,7 +17,7 @@ program asyncwait
!$acc enter data copyin (a(1:N)) copyin (b(1:N)) copyin (N) async
- !$acc parallel async wait
+ !$acc parallel async wait present (a(1:N)) present (b(1:N)) present (N)
do i = 1, N
b(i) = a(i)
end do
@@ -36,7 +36,7 @@ program asyncwait
!$acc enter data copyin (a(1:N)) copyin (b(1:N)) async (1)
- !$acc parallel async (1) wait (1)
+ !$acc parallel async (1) wait (1) present (a(1:N), b(1:N), N)
do i = 1, N
b(i) = a(i)
end do
@@ -55,28 +55,30 @@ program asyncwait
c(:) = 0.0
d(:) = 0.0
- !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N))
+ !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) &
+ !$acc& create (d(1:N))
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), b(1:N), c(1:N), N)
do i = 1, N
b(i) = (a(i) * a(i) * a(i)) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), b(1:N), c(1:N), N)
do i = 1, N
c(i) = (a(i) * 4) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), b(1:N), c(1:N), d(1:N), N)
do i = 1, N
d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
end do
!$acc end parallel
!$acc wait (1)
- !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N))
+ !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) &
+ !$acc& copyout (d(1:N))
do i = 1, N
if (a(i) .ne. 3.0) call abort
@@ -91,34 +93,40 @@ program asyncwait
d(:) = 0.0
e(:) = 0.0
- !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N)) copyin (e(1:N))
+ !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) &
+ !$acc& create (d(1:N)) copyin (e(1:N))
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), b(1:N), c(1:N), d(1:N)) &
+ !$acc& present (e(1:N), N)
do i = 1, N
b(i) = (a(i) * a(i) * a(i)) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), b(1:N), c(1:N), d(1:N)) &
+ !$acc& present (e(1:N), N)
do i = 1, N
c(i) = (a(i) * 4) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), b(1:N), c(1:N), d(1:N)) &
+ !$acc& present (e(1:N), N)
do i = 1, N
d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
end do
!$acc end parallel
- !$acc parallel wait (1) async (1)
+ !$acc parallel wait (1) async (1) present (a(1:N), b(1:N), c(1:N)) &
+ !$acc& present (d(1:N), e(1:N), N)
do i = 1, N
e(i) = a(i) + b(i) + c(i) + d(i)
end do
!$acc end parallel
!$acc wait (1)
- !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N)) copyout (e(1:N))
+ !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) &
+ !$acc& copyout (d(1:N)) copyout (e(1:N))
!$acc exit data delete (N)
do i = 1, N
diff --git libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90 libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90
index 16a8598..d1ecf0a 100644
--- libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90
+++ libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90
@@ -19,7 +19,7 @@ program asyncwait
!$acc enter data copyin (a(1:N)) copyin (b(1:N)) copyin (N) async
- !$acc parallel async wait
+ !$acc parallel async wait present (a(1:N), b(1:N), N)
!$acc loop
do i = 1, N
b(i) = a(i)
@@ -39,7 +39,7 @@ program asyncwait
!$acc update device (a(1:N), b(1:N)) async (1)
- !$acc parallel async (1) wait (1)
+ !$acc parallel async (1) wait (1) present (a(1:N), b(1:N), N)
!$acc loop
do i = 1, N
b(i) = a(i)
@@ -62,19 +62,19 @@ program asyncwait
!$acc enter data copyin (c(1:N), d(1:N)) async (1)
!$acc update device (a(1:N), b(1:N)) async (1)
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), b(1:N), N)
do i = 1, N
b(i) = (a(i) * a(i) * a(i)) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), c(1:N), N)
do i = 1, N
c(i) = (a(i) * 4) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), d(1:N), N)
do i = 1, N
d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
end do
@@ -100,25 +100,26 @@ program asyncwait
!$acc enter data copyin (e(1:N)) async (1)
!$acc update device (a(1:N), b(1:N), c(1:N), d(1:N)) async (1)
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), b(1:N), N)
do i = 1, N
b(i) = (a(i) * a(i) * a(i)) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), c(1:N), N)
do i = 1, N
c(i) = (a(i) * 4) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), d(1:N), N)
do i = 1, N
d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
end do
!$acc end parallel
- !$acc parallel wait (1) async (1)
+ !$acc parallel wait (1) async (1) present (a(1:N), b(1:N), c(1:N)) &
+ !$acc& present (d(1:N), e(1:N), N)
do i = 1, N
e(i) = a(i) + b(i) + c(i) + d(i)
end do
diff --git libgomp/testsuite/libgomp.oacc-fortran/data-4.f90 libgomp/testsuite/libgomp.oacc-fortran/data-4.f90
index f6886b0..4e95a9c 100644
--- libgomp/testsuite/libgomp.oacc-fortran/data-4.f90
+++ libgomp/testsuite/libgomp.oacc-fortran/data-4.f90
@@ -17,7 +17,7 @@ program asyncwait
!$acc enter data copyin (a(1:N)) copyin (b(1:N)) copyin (N) async
- !$acc parallel async wait
+ !$acc parallel async wait present (a(1:N), b(1:N), N)
!$acc loop
do i = 1, N
b(i) = a(i)
@@ -37,7 +37,7 @@ program asyncwait
!$acc update device (a(1:N), b(1:N)) async (1)
- !$acc parallel async (1) wait (1)
+ !$acc parallel async (1) wait (1) present (a(1:N), b(1:N), N)
!$acc loop
do i = 1, N
b(i) = a(i)
@@ -60,19 +60,19 @@ program asyncwait
!$acc enter data copyin (c(1:N), d(1:N)) async (1)
!$acc update device (a(1:N), b(1:N)) async (1)
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), b(1:N), N)
do i = 1, N
b(i) = (a(i) * a(i) * a(i)) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), c(1:N), N)
do i = 1, N
c(i) = (a(i) * 4) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), d(1:N), N)
do i = 1, N
d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
end do
@@ -98,25 +98,26 @@ program asyncwait
!$acc enter data copyin (e(1:N)) async (1)
!$acc update device (a(1:N), b(1:N), c(1:N), d(1:N)) async (1)
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), b(1:N), N)
do i = 1, N
b(i) = (a(i) * a(i) * a(i)) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), c(1:N), N)
do i = 1, N
c(i) = (a(i) * 4) / a(i)
end do
!$acc end parallel
- !$acc parallel async (1)
+ !$acc parallel async (1) present (a(1:N), d(1:N), N)
do i = 1, N
d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
end do
!$acc end parallel
- !$acc parallel wait (1) async (1)
+ !$acc parallel wait (1) async (1) present (a(1:N), b(1:N), c(1:N)) &
+ !$acc& present (d(1:N), e(1:N), N)
do i = 1, N
e(i) = a(i) + b(i) + c(i) + d(i)
end do
diff --git libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
new file mode 100644
index 0000000..0bab5bd
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
@@ -0,0 +1,229 @@
+! { dg-do run { target openacc_nvidia_accel_selected } }
+
+subroutine subr6 (a, d)
+ integer, parameter :: N = 8
+ integer :: i
+ integer :: a(N)
+ !$acc declare deviceptr (a)
+ integer :: d(N)
+
+ i = 0
+
+ !$acc parallel copy (d)
+ do i = 1, N
+ d(i) = a(i) + a(i)
+ end do
+ !$acc end parallel
+
+end subroutine
+
+subroutine subr5 (a, b, c, d)
+ integer, parameter :: N = 8
+ integer :: i
+ integer :: a(N)
+ !$acc declare present_or_copyin (a)
+ integer :: b(N)
+ !$acc declare present_or_create (b)
+ integer :: c(N)
+ !$acc declare present_or_copyout (c)
+ integer :: d(N)
+ !$acc declare present_or_copy (d)
+
+ i = 0
+
+ !$acc parallel
+ do i = 1, N
+ b(i) = a(i)
+ c(i) = b(i)
+ d(i) = d(i) + b(i)
+ end do
+ !$acc end parallel
+
+end subroutine
+
+subroutine subr4 (a, b)
+ integer, parameter :: N = 8
+ integer :: i
+ integer :: a(N)
+ !$acc declare present (a)
+ integer :: b(N)
+ !$acc declare copyout (b)
+
+ i = 0
+
+ !$acc parallel
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+end subroutine
+
+subroutine subr3 (a, c)
+ integer, parameter :: N = 8
+ integer :: i
+ integer :: a(N)
+ !$acc declare present (a)
+ integer :: c(N)
+ !$acc declare copyin (c)
+
+ i = 0
+
+ !$acc parallel
+ do i = 1, N
+ a(i) = c(i)
+ c(i) = 0
+ end do
+ !$acc end parallel
+
+end subroutine
+
+subroutine subr2 (a, b, c)
+ integer, parameter :: N = 8
+ integer :: i
+ integer :: a(N)
+ !$acc declare present (a)
+ integer :: b(N)
+ !$acc declare create (b)
+ integer :: c(N)
+ !$acc declare copy (c)
+
+ i = 0
+
+ !$acc parallel
+ do i = 1, N
+ b(i) = a(i)
+ c(i) = b(i) + c(i) + 1
+ end do
+ !$acc end parallel
+
+end subroutine
+
+subroutine subr1 (a, b, c)
+ integer, parameter :: N = 8
+ integer :: i
+ integer :: a(N)
+ !$acc declare present (a)
+ integer :: b(N)
+ integer :: c(N)
+
+ i = 0
+
+ !$acc parallel
+ do i = 1, N
+ a(i) = a(i) + 1
+ end do
+ !$acc end parallel
+
+end subroutine
+
+subroutine test (a, e)
+ use openacc
+ logical :: e
+ integer, parameter :: N = 8
+ integer :: a(N)
+
+ if (acc_is_present (a) .neqv. e) call abort
+
+end subroutine
+
+subroutine subr0 (a, b, c, d)
+ integer, parameter :: N = 8
+ integer :: a(N)
+ !$acc declare copy (a)
+ integer :: b(N)
+ integer :: c(N)
+ integer :: d(N)
+
+ call test (a, .true.)
+ call test (b, .false.)
+ call test (c, .false.)
+
+ call subr1 (a, b, c)
+
+ call test (a, .true.)
+ call test (b, .false.)
+ call test (c, .false.)
+
+ call subr2 (a, b, c)
+
+ call test (a, .true.)
+ call test (b, .false.)
+ call test (c, .false.)
+
+ do i = 1, N
+ if (c(i) .ne. 8) call abort
+ end do
+
+ call subr3 (a, c)
+
+ call test (a, .true.)
+ call test (b, .false.)
+ call test (c, .false.)
+
+ do i = 1, N
+ if (a(i) .ne. 2) call abort
+ if (c(i) .ne. 8) call abort
+ end do
+
+ call subr4 (a, b)
+
+ call test (a, .true.)
+ call test (b, .false.)
+ call test (c, .false.)
+
+ do i = 1, N
+ if (b(i) .ne. 8) call abort
+ end do
+
+ call subr5 (a, b, c, d)
+
+ call test (a, .true.)
+ call test (b, .false.)
+ call test (c, .false.)
+ call test (d, .false.)
+
+ do i = 1, N
+ if (c(i) .ne. 8) call abort
+ if (d(i) .ne. 13) call abort
+ end do
+
+ call subr6 (a, d)
+
+ call test (a, .true.)
+ call test (d, .false.)
+
+ do i = 1, N
+ if (d(i) .ne. 16) call abort
+ end do
+
+end subroutine
+
+program main
+ use openacc
+ integer, parameter :: N = 8
+ integer :: a(N)
+ integer :: b(N)
+ integer :: c(N)
+ integer :: d(N)
+
+ a(:) = 2
+ b(:) = 3
+ c(:) = 4
+ d(:) = 5
+
+ call subr0 (a, b, c, d)
+
+ call test (a, .false.)
+ call test (b, .false.)
+ call test (c, .false.)
+ call test (d, .false.)
+
+ do i = 1, N
+ if (a(i) .ne. 8) call abort
+ if (b(i) .ne. 8) call abort
+ if (c(i) .ne. 8) call abort
+ if (d(i) .ne. 16) call abort
+ end do
+
+end program
diff --git libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90 libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90
new file mode 100644
index 0000000..593cde6
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90
@@ -0,0 +1,24 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ integer :: i, n
+
+ n = 1000000
+
+ !$acc parallel async (0)
+ do i = 1, 1000000
+ end do
+ !$acc end parallel
+
+ call acc_wait_async (0, 1)
+
+ if (acc_async_test (0) .neqv. .TRUE.) call abort
+
+ if (acc_async_test (1) .neqv. .TRUE.) call abort
+
+ call acc_wait (1)
+
+end program
diff --git libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
new file mode 100644
index 0000000..cffda87
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
@@ -0,0 +1,28 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ integer :: i, j, nprocs
+ integer, parameter :: N = 1000000
+
+ nprocs = 2
+
+ do j = 1, nprocs
+ !$acc parallel async (j)
+ do i = 1, N
+ end do
+ !$acc end parallel
+ end do
+
+ if (acc_async_test (1) .neqv. .TRUE.) call abort
+ if (acc_async_test (2) .neqv. .TRUE.) call abort
+
+ call acc_wait_all_async (nprocs + 1)
+
+ if (acc_async_test (nprocs + 1) .neqv. .TRUE.) call abort
+
+ call acc_wait_all ()
+
+end program
diff --git libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90 libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90
new file mode 100644
index 0000000..72a2b49
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90
@@ -0,0 +1,79 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ integer, parameter :: N = 256
+ integer, allocatable :: h(:)
+ integer :: i
+
+ allocate (h(N))
+
+ do i = 1, N
+ h(i) = i
+ end do
+
+ call acc_present_or_copyin (h)
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ call acc_copyout (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+ do i = 1, N
+ if (h(i) /= i) call abort
+ end do
+
+ do i = 1, N
+ h(i) = i + i
+ end do
+
+ call acc_pcopyin (h, sizeof (h))
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ call acc_copyout (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+ do i = 1, N
+ if (h(i) /= i + i) call abort
+ end do
+
+ call acc_create (h)
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ !$acc parallel loop
+ do i = 1, N
+ h(i) = i
+ end do
+ !$end acc parallel
+
+ call acc_copyout (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+ do i = 1, N
+ if (h(i) /= i) call abort
+ end do
+
+ call acc_present_or_create (h, sizeof (h))
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ call acc_delete (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+ call acc_pcreate (h)
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ call acc_delete (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+end program
diff --git libgomp/testsuite/libgomp.oacc-fortran/lib-15.f90 libgomp/testsuite/libgomp.oacc-fortran/lib-15.f90
new file mode 100644
index 0000000..3a834db
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/lib-15.f90
@@ -0,0 +1,52 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+program main
+ use openacc
+ implicit none
+
+ integer, parameter :: N = 256
+ integer, allocatable :: h(:)
+ integer :: i
+
+ allocate (h(N))
+
+ do i = 1, N
+ h(i) = i
+ end do
+
+ call acc_copyin (h)
+
+ do i = 1, N
+ h(i) = i + i
+ end do
+
+ call acc_update_device (h, sizeof (h))
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ h(:) = 0
+
+ call acc_copyout (h, sizeof (h))
+
+ do i = 1, N
+ if (h(i) /= i + i) call abort
+ end do
+
+ call acc_copyin (h, sizeof (h))
+
+ h(:) = 0
+
+ call acc_update_self (h, sizeof (h))
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ do i = 1, N
+ if (h(i) /= i + i) call abort
+ end do
+
+ call acc_delete (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+end program
diff --git libgomp/testsuite/libgomp.oacc-fortran/routine-5.f90 libgomp/testsuite/libgomp.oacc-fortran/routine-5.f90
new file mode 100644
index 0000000..aaeb994
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/routine-5.f90
@@ -0,0 +1,27 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+program main
+ integer :: n
+
+ n = 5
+
+ !$acc parallel copy (n)
+ n = func (n)
+ !$acc end parallel
+
+ if (n .ne. 6) call abort
+
+contains
+
+ function func (n) result (rc)
+ !$acc routine gang worker vector seq nohost
+ integer, intent (in) :: n
+ integer :: rc
+
+ rc = n
+ rc = rc + 1
+
+ end function
+
+end program
Grüße,
Thomas
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://gcc.gnu.org/pipermail/fortran/attachments/20150505/c44bcbe3/attachment.sig>
More information about the Fortran
mailing list