This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 10/10] OpenACC 2.0 support for libgomp - new tests


This patch implements a set of new tests for OpenACC functionality in
libgomp. Many of these do not yet work because of missing support
elsewhere in GCC.

Julian

xxxx-xx-xx  James Norris  <jnorris@codesourcery.com>
	    Thomas Schwinge  <thomas@codesourcery.com>
	    Tom de Vries  <tom@codesourcery.com>
	    Cesar Philippidis  <cesar@codesourcery.com>

	libgomp/
	* testsuite/lib/libgomp.exp
	(check_effective_target_openacc_nvidia_accel_present)
	(check_effective_target_openacc_nvidia_accel_selected): New
	functions.
	* testsuite/libgomp.oacc-fortran/fortran.exp: New exp file.
	* testsuite/libgomp.oacc-fortran/*.f: New tests.
	* testsuite/libgomp.oacc-fortran/*.f90: Likewise.
	* testsuite/libgomp.oacc-c/c.exp: New exp file.
	* testsuite/libgomp.oacc-c/subr.ptx: New file.
	* testsuite/libgomp.oacc-c/subr.cu: New file.
	* testsuite/libgomp.oacc-c/timer.h: New file.
	* testsuite/libgomp.oacc-c/*.c: New tests.
	* testsuite/libgomp.oacc-c++/c++.exp: New exp file.
commit a297265c276f7882d33cfb05bb5ab71e05b6d7a1
Author: Julian Brown <julian@codesourcery.com>
Date:   Mon Sep 22 03:27:53 2014 -0700

    OpenACC tests.

diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index 094e5ed..b8c3eda 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -239,3 +239,31 @@ proc libgomp_option_proc { option } {
 	return 0
     }
 }
+
+# Return 1 if at least one nvidia board is present.
+
+proc check_effective_target_openacc_nvidia_accel_present { } {
+    return [check_runtime openacc_nvidia_accel_present {
+	#include <openacc.h>
+	int main () {
+	return !(acc_get_num_devices (acc_device_nvidia) > 0);
+	}
+    } "" ]
+}
+
+# Return 1 if at least one nvidia board is present, and the nvidia device type
+# is selected by default by means of setting the environment variable
+# ACC_DEVICE_TYPE.
+
+proc check_effective_target_openacc_nvidia_accel_selected { } {
+    if { ![check_effective_target_openacc_nvidia_accel_present] } {
+	return 0;
+    }
+    if { ![info exists ::env(ACC_DEVICE_TYPE)] } {
+	return 0;
+    }
+    if { $::env(ACC_DEVICE_TYPE) == "nvidia" } {
+        return 1;
+    }
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c++/c++.exp b/libgomp/testsuite/libgomp.oacc-c++/c++.exp
new file mode 100644
index 0000000..0f426ed
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/c++.exp
@@ -0,0 +1,101 @@
+# This whole file adapted from libgomp.c++/c++.exp.
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+
+global shlib_ext
+
+set shlib_ext [get_shlib_extension]
+set lang_link_flags "-lstdc++"
+set lang_test_file_found 0
+set lang_library_path "../libstdc++-v3/src/.libs"
+if [info exists lang_include_flags] then {
+    unset lang_include_flags
+}
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
+
+set blddir [lookfor_file [get_multilibs] libgomp]
+
+
+if { $blddir != "" } {
+    # Look for a static libstdc++ first.
+    if [file exists "${blddir}/${lang_library_path}/libstdc++.a"] {
+        set lang_test_file "${lang_library_path}/libstdc++.a"
+        set lang_test_file_found 1
+        # We may have a shared only build, so look for a shared libstdc++.
+    } elseif [file exists "${blddir}/${lang_library_path}/libstdc++.${shlib_ext}"] {
+        set lang_test_file "${lang_library_path}/libstdc++.${shlib_ext}"
+        set lang_test_file_found 1
+    } else {
+        puts "No libstdc++ library found, will not execute c++ tests"
+    }
+} elseif { [info exists GXX_UNDER_TEST] } {
+    set lang_test_file_found 1
+    # Needs to exist for libgomp.exp.
+    set lang_test_file ""
+} else {
+    puts "GXX_UNDER_TEST not defined, will not execute c++ tests"
+}
+
+if { $lang_test_file_found } {
+    # Gather a list of all tests.
+    set tests [lsort [glob -nocomplain $srcdir/$subdir/*.C]]
+
+    if { $blddir != "" } {
+        set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}"
+    } else {
+        set ld_library_path "$always_ld_library_path"
+    }
+    append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+    set_ld_library_path_env_vars
+
+    set flags_file "${blddir}/../libstdc++-v3/scripts/testsuite_flags"
+    if { [file exists $flags_file] } {
+	set libstdcxx_includes [exec sh $flags_file --build-includes]
+    } else {
+	set libstdcxx_includes ""
+    }
+
+    # Todo: get list of accelerators from configure options --enable-accelerator.
+    set accels { "nonshm-host" "nvidia" }
+
+    # Run on host (or fallback) accelerator.
+    lappend accels "host"
+
+    # Test OpenACC with available accelerators.
+    foreach accel $accels {
+	set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+
+	# Todo: Determine shared memory or not using run-time test.
+	switch $accel {
+	    host {
+		set acc_mem_shared 1
+	    }
+	    nonshm-host {
+		set tagopt "-DACC_DEVICE_TYPE_nonshm_host=1"
+		set acc_mem_shared 0
+	    }
+	    nvidia {
+		set acc_mem_shared 0
+	    }
+	    default {
+		set acc_mem_shared 0
+	    }
+	}
+	set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+	# Todo: Verify that this works for both local and remote testing.
+	setenv ACC_DEVICE_TYPE $accel
+
+	dg-runtest $tests "$tagopt" $libstdcxx_includes
+    }
+}
+
+# All done.
+dg-finish
diff --git a/libgomp/testsuite/libgomp.oacc-c/abort-2.c b/libgomp/testsuite/libgomp.oacc-c/abort-2.c
new file mode 100644
index 0000000..5c2efa8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/abort-2.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+
+extern void abort ();
+
+int
+main (int argc)
+{
+
+#pragma acc parallel
+  {
+    if (argc != 1)
+      abort ();
+  }
+
+  return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.oacc-c/abort.c b/libgomp/testsuite/libgomp.oacc-c/abort.c
new file mode 100644
index 0000000..8fe5aab
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/abort.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-shouldfail "" { *-*-* } { "*" } { "" } } */
+
+extern void abort ();
+
+int
+main (void)
+{
+
+#pragma acc parallel
+  {
+    abort ();
+  }
+
+  return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c b/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c
new file mode 100644
index 0000000..4190c8c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c
@@ -0,0 +1,63 @@
+/* Disable the acc_on_device builtin; we want to test the libgomp library
+   function.  */
+/* { dg-additional-options "-fno-builtin-acc_on_device" } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char *argv[])
+{
+  /* Host.  */
+
+  {
+    if (!acc_on_device (acc_device_none))
+      abort ();
+    if (!acc_on_device (acc_device_host))
+      abort ();
+    if (acc_on_device (acc_device_not_host))
+      abort ();
+    if (acc_on_device (acc_device_nvidia))
+      abort ();
+  }
+
+
+  /* Host via offloading fallback mode.  */
+
+#pragma acc parallel if(0)
+  {
+    if (!acc_on_device (acc_device_none))
+      abort ();
+    if (!acc_on_device (acc_device_host))
+      abort ();
+    if (acc_on_device (acc_device_not_host))
+      abort ();
+    if (acc_on_device (acc_device_nvidia))
+      abort ();
+  }
+
+
+#if !ACC_DEVICE_TYPE_host
+
+  /* Offloaded.  */
+
+#pragma acc parallel
+  {
+    if (acc_on_device (acc_device_none))
+      abort ();
+    if (acc_on_device (acc_device_host))
+      abort ();
+    if (!acc_on_device (acc_device_not_host))
+      abort ();
+#if ACC_DEVICE_TYPE_nvidia
+    if (!acc_on_device (acc_device_nvidia))
+      abort ();
+#else
+    if (acc_on_device (acc_device_nvidia))
+      abort ();
+#endif
+  }
+#endif
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c/asyncwait-1.c
new file mode 100644
index 0000000..a7548fb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/asyncwait-1.c
@@ -0,0 +1,295 @@
+/* { dg-do compile } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+    int N = 64;
+    float *a, *b;
+    int i;
+
+    acc_init (acc_device_nvidia);
+
+    a = (float *) malloc (N * sizeof (float));
+    b = (float *) malloc (N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1 2) /* { dg-error "error: expected '\\)' before numeric constant" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,) /* { dg-error "error: expected '\\)' before ',' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (,1) /* { dg-error "error: expected '\\)' before ',' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2,) /* { dg-error "error: expected '\\)' before ',' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2 3) /* { dg-error "error: expected '\\)' before ',' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2,,) /* { dg-error "error: expected '\\)' before ',' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1 /* { dg-error "error: expected '\\)' before end of line" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (*) /* { dg-error "error: expected '\\)' before '\\*' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (a) /* { dg-error "error: expected '\\)' before 'a'" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (N) /* { dg-error "error: expected '\\)' before 'N'" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async ()
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1 2) /* { dg-error "error: expected ',' before numeric constant" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,) /* { dg-error "error: expected integer expression before '\\)' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (,1) /* { dg-error "error: expected integer expression before ',' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2,) /* { dg-error "error: expected integer expression before '\\)' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2 3) /* { dg-error "error: expected ',' before numeric constant" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2,,) /* { dg-error "error: expected integer expression before ',' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1 /* { dg-error "error: expected ',' before end of line" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,*) /* { dg-error "error: expected integer expression before '\\*' token" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,a) /* { dg-error "error: expected integer expression before 'a'" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (a) /* { dg-error "error: expected integer expression before 'a'" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (N) /* { dg-error "error: expected integer expression before 'N'" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait ()
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait (1 2) /* { dg-error "error: expected ',' before numeric constant" } */
+
+#pragma acc wait (1,) /* { dg-error "error: expected integer expression before '\\)' token" } */
+
+#pragma acc wait (,1) /* { dg-error "error: expected integer expression before ',' token" } */
+
+#pragma acc wait (1,2,) /* { dg-error "error: expected integer expression before '\\)' token" } */
+
+#pragma acc wait (1,2 3) /* { dg-error "error: expected ',' before numeric constant" } */
+
+#pragma acc wait (1,2,,) /* { dg-error "error: expected integer expression before ',' token" } */
+
+#pragma acc wait (1 /* { dg-error "error: expected ',' before end of line" } */
+
+#pragma acc wait (1,*) /* { dg-error "error: expected integer expression before '\\*' token" } */
+
+#pragma acc wait (1,a) /* { dg-error "error: expected integer expression before 'a'" } */
+
+#pragma acc wait (a) /* { dg-error "error: expected integer expression before 'a'" } */
+
+#pragma acc wait (N) /* { dg-error "error: expected integer expression before 'N'" } */
+
+#pragma acc wait (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */
+
+#pragma acc wait 1 /* { dg-error "error: expected clause before numeric constant" } */
+
+#pragma acc wait N /* { dg-error "error: expected clause before 'N'" } */
+
+#pragma acc wait async (1 2) /* { dg-error "error: expected '\\)' before numeric constant" } */
+
+#pragma acc wait async (1 2) /* { dg-error "error: expected '\\)' before numeric constant" } */
+
+#pragma acc wait async (1,) /* { dg-error "error: expected '\\)' before ',' token" } */
+
+#pragma acc wait async (,1) /* { dg-error "error: expected '\\)' before ',' token" } */
+
+#pragma acc wait async (1,2,) /* { dg-error "error: expected '\\)' before ',' token" } */
+
+#pragma acc wait async (1,2 3) /* { dg-error "error: expected '\\)' before ',' token" } */
+
+#pragma acc wait async (1,2,,) /* { dg-error "error: expected '\\)' before ',' token" } */
+
+#pragma acc wait async (1 /* { dg-error "error: expected '\\)' before end of line" } */
+
+#pragma acc wait async (*) /* { dg-error "error: expected '\\)' before '\\*' token" } */
+
+#pragma acc wait async (a) /* { dg-error "error: expected '\\)' before 'a'" } */
+
+#pragma acc wait async (N) /* { dg-error "error: expected '\\)' before 'N'" } */
+
+#pragma acc wait async (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */
+
+    acc_shutdown (acc_device_nvidia);
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/asyncwait-2.c b/libgomp/testsuite/libgomp.oacc-c/asyncwait-2.c
new file mode 100755
index 0000000..22cef6d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/asyncwait-2.c
@@ -0,0 +1,466 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <openacc.h>
+#include <stdlib.h>
+#include "cuda.h"
+
+#include <stdio.h>
+#include <sys/time.h>
+
+int
+main (int argc, char **argv)
+{
+    CUresult r;
+    CUstream stream1;
+    int N = 128; //1024 * 1024;
+    float *a, *b, *c, *d, *e;
+    int i;
+    int nbytes;
+
+    acc_init (acc_device_nvidia);
+
+    nbytes = N * sizeof (float);
+
+    a = (float *) malloc (nbytes);
+    b = (float *) malloc (nbytes);
+    c = (float *) malloc (nbytes);
+    d = (float *) malloc (nbytes);
+    e = (float *) malloc (nbytes);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 2.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc parallel wait (1) async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 4.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+
+        if (e[i] != 11.0)
+            abort ();
+    }
+
+
+    r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+        abort ();
+    }
+
+    acc_set_cuda_stream (1, stream1);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 7.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 7.0)
+            abort ();
+
+        if (b[i] != 49.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc parallel wait (1) async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+
+        if (e[i] != 17.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 16.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+    }
+
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 25.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+    }
+
+    acc_shutdown (acc_device_nvidia);
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/c.exp b/libgomp/testsuite/libgomp.oacc-c/c.exp
new file mode 100644
index 0000000..05a554a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/c.exp
@@ -0,0 +1,84 @@
+# This whole file adapted from libgomp.c/c.exp.
+
+if [info exists lang_library_path] then {
+    unset lang_library_path
+    unset lang_link_flags
+}
+if [info exists lang_test_file] then {
+    unset lang_test_file
+}
+if [info exists lang_include_flags] then {
+    unset lang_include_flags
+}
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+if ![info exists DEFAULT_CFLAGS] then {
+    set DEFAULT_CFLAGS "-O2"
+}
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
+
+# Gather a list of all tests.
+set tests [lsort [find $srcdir/$subdir *.c]]
+
+set ld_library_path $always_ld_library_path
+append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+append ld_library_path ":/opt/nvidia/cuda-5.5/lib64"
+set_ld_library_path_env_vars
+
+# Todo: get list of accelerators from configure options --enable-accelerator.
+set accels { "nonshm-host" "nvidia" }
+
+# Run on host (or fallback) accelerator.
+lappend accels "host"
+
+# Test OpenACC with available accelerators.
+set SAVE_ALWAYS_CFLAGS "$ALWAYS_CFLAGS"
+foreach accel $accels {
+    set ALWAYS_CFLAGS "$SAVE_ALWAYS_CFLAGS"
+    set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+
+    # Todo: Determine shared memory or not using run-time test.
+    switch $accel {
+	host {
+	    set acc_mem_shared 1
+	}
+	nonshm-host {
+	    set tagopt "-DACC_DEVICE_TYPE_nonshm_host=1"
+	    set acc_mem_shared 0
+	}
+	nvidia {
+	    # Copy ptx file (TEMPORARY)
+	    remote_download host $srcdir/libgomp.oacc-c/subr.ptx
+
+	    # Where cuda.h lives
+	    # Todo: get that from configure option --with-cuda-driver.
+	    lappend ALWAYS_CFLAGS "additional_flags=-I/opt/nvidia/cuda-5.5/include"
+	    lappend ALWAYS_CFLAGS "additional_flags=-L/opt/nvidia/cuda-5.5/lib64"
+
+	    # Where timer.h lives
+	    lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}"
+	    set acc_mem_shared 0
+	}
+	default {
+	    set acc_mem_shared 0
+	}
+    }
+    set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+    # Todo: Verify that this works for both local and remote testing.
+    setenv ACC_DEVICE_TYPE $accel
+
+    dg-runtest $tests "$tagopt" $DEFAULT_CFLAGS
+}
+
+# All done.
+dg-finish
diff --git a/libgomp/testsuite/libgomp.oacc-c/cache-1.c b/libgomp/testsuite/libgomp.oacc-c/cache-1.c
new file mode 100644
index 0000000..5473475
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/cache-1.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+#define N   2
+    int a[N], b[N];
+    int i;
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3;
+        b[i] = 0;
+    }
+
+#pragma acc parallel copyin (a[0:N]) copyout (b[0:N])
+{
+    int ii;
+
+    for (ii = 0; ii < N; ii++)
+    {
+        const int idx = ii;
+        int n = 1;
+        const int len = n;
+
+#pragma acc cache /* { dg-error "error: expected '\\(' before end of line" } */
+
+#pragma acc cache (a) /* { dg-error "error: expected '\\\[' before '\\)' token" } */
+
+#pragma acc cache (a[0:N]) copyin (a[0:N]) /* { dg-error "error: expected end of line before 'copyin'" } */
+
+#pragma acc cache () /* { dg-error "error: expected identifier before '\\)' token" } */
+
+#pragma acc cache (a[0:N] b[0:N}) /* { dg-error "error: expected '\\)' before 'b'" } */
+
+#pragma acc cache (a[0:N] /* { dg-error "error: expected '\\)' before end of line" } */
+
+#pragma acc cache (a[ii]) /* { dg-error "error: 'ii' is not a constant" } */
+
+#pragma acc cache (a[idx:n]) /* { dg-error "error: 'n' is not a constant" } */
+
+#pragma acc cache (a[0:N])
+
+#pragma acc cache (a[0:N], b[0:N])
+
+#pragma acc cache (a[0])
+
+#pragma acc cache (a[0], a[1], b[0:N])
+
+#pragma acc cache (a[idx])
+
+#pragma acc cache (a[idx:len])
+
+        b[ii] = a[ii];
+    }
+}
+
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != b[i])
+            abort ();
+    }
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/clauses-1.c b/libgomp/testsuite/libgomp.oacc-c/clauses-1.c
new file mode 100644
index 0000000..8177574
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/clauses-1.c
@@ -0,0 +1,623 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b, *c, *d;
+    int i;
+
+    a = (float *) malloc(N * sizeof (float));
+    b = (float *) malloc(N * sizeof (float));
+    c = (float *) malloc(N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 3.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 5.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+    d = acc_copyin (&a[0], N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc parallel present_or_copyin(a[0:N]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    acc_free(d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc parallel copyin(a[0:N]) present_or_copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 2.0;
+    }
+
+    d = acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) present_or_copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort();
+
+        if (b[i] != 2.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    acc_free (d);
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 4.0;
+    }
+
+#pragma acc parallel copy(a[0:N]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            a[ii] = a[ii] + 1;
+            b[ii] = a[ii] + 2;
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort();
+
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc parallel present_or_copy(a[0:N]) present_or_copy(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            a[ii] = a[ii] + 1;
+            b[ii] = b[ii] + 2;
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort();
+
+        if (b[i] != 9.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 7.0;
+    }
+
+    d = acc_copyin (&a[0], N * sizeof (float));
+    d = acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc parallel present_or_copy(a[0:N]) present_or_copy(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            a[ii] = a[ii] + 1;
+            b[ii] = b[ii] + 2;
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 7.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    d = acc_deviceptr(&a[0]);
+    acc_unmap_data (&a[0]);
+    acc_free (d);
+
+    d = acc_deviceptr(&b[0]);
+    acc_unmap_data (&b[0]);
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc parallel copyin(a[0:N]) create(c[0:N]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 3.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 8.0;
+    }
+
+#pragma acc parallel copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort();
+
+        if (b[i] != 4.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 5.0;
+    }
+
+    d = acc_malloc (N * sizeof (float));
+    acc_map_data(c, d, N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort();
+
+        if (b[i] != 2.0)
+            abort();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort();
+
+    d = acc_deviceptr (c);
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 8.0;
+    }
+
+    d = acc_malloc(N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) present(c[0:N]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort();
+
+        if (b[i] != 4.0)
+            abort();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort();
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 8.0;
+    }
+
+    acc_copyin (a, N * sizeof (float));
+
+    d = acc_malloc(N * sizeof (float));
+    acc_map_data (b, d, N * sizeof (float));
+
+    d = acc_malloc(N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel present(a[0:N]) present(c[0:N]) present(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    if (!acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort();
+
+    acc_copyout (b, N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort();
+
+        if (b[i] != 4.0)
+            abort();
+    }
+
+    d = acc_deviceptr(a);
+
+    acc_unmap_data (a);
+
+    acc_free (d);
+
+    d = acc_deviceptr (c);
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 6.0;
+    }
+
+    d = acc_malloc(N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) deviceptr(d) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            d[ii] = a[ii];
+            b[ii] = d[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 3.0)
+            abort();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+    d = acc_copyin (&a[0], N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc parallel pcopyin(a[0:N]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    acc_free(d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc parallel copyin(a[0:N]) pcopyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc parallel copyin(a[0:N]) pcreate(c[0:N]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort();
+
+        if (b[i] != 5.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort();
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/clauses-2.c b/libgomp/testsuite/libgomp.oacc-c/clauses-2.c
new file mode 100644
index 0000000..25c2165
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/clauses-2.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b, *c, *d;
+    int i;
+
+    a = (float *) malloc(N * sizeof (float));
+    b = (float *) malloc(N * sizeof (float));
+    c = (float *) malloc(N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 5.0;
+    }
+
+    d = acc_malloc (N * sizeof (float));
+    acc_map_data(c, d, N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) present_or_create(c[0:N+1]) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort();
+
+        if (b[i] != 2.0)
+            abort();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort();
+
+    d = acc_deviceptr (c);
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    return 0;
+}
+/* { dg-shouldfail "libgomp: \[\h+,\d+\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/collapse-1.c b/libgomp/testsuite/libgomp.oacc-c/collapse-1.c
new file mode 100644
index 0000000..80fed6c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/collapse-1.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+
+int
+main (void)
+{
+  int i, j, k, l = 0;
+  int a[3][3][3];
+
+  memset (a, '\0', sizeof (a));
+  #pragma acc parallel
+  #pragma acc loop collapse(4 - 1)
+    for (i = 0; i < 2; i++)
+      for (j = 0; j < 2; j++)
+	for (k = 0; k < 2; k++)
+	  a[i][j][k] = i + j * 4 + k * 16;
+  #pragma acc parallel
+    {
+      #pragma acc loop collapse(2) reduction(|:l)
+	for (i = 0; i < 2; i++)
+	  for (j = 0; j < 2; j++)
+	    for (k = 0; k < 2; k++)
+	      if (a[i][j][k] != i + j * 4 + k * 16)
+		l = 1;
+    }
+  if (l)
+    abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/collapse-2.c b/libgomp/testsuite/libgomp.oacc-c/collapse-2.c
new file mode 100644
index 0000000..44a77f7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/collapse-2.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (void)
+{
+  int i, j, k, l = 0, f = 0, x = 0;
+  int m1 = 4, m2 = -5, m3 = 17;
+
+  #pragma acc parallel
+  #pragma acc loop collapse(3) reduction(+:l)
+    for (i = -2; i < m1; i++)
+      for (j = m2; j < -2; j++)
+	{
+	  for (k = 13; k < m3; k++)
+	    {
+              if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) !=  9 + f++)
+		l++;
+	    }
+	}
+
+    for (i = -2; i < m1; i++)
+      for (j = m2; j < -2; j++)
+	{
+	  for (k = 13; k < m3; k++)
+	    {
+              if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) !=  9 + f++)
+		x++;
+	    }
+	}
+
+  if (l != x)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/collapse-3.c b/libgomp/testsuite/libgomp.oacc-c/collapse-3.c
new file mode 100644
index 0000000..dcb3b20
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/collapse-3.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -std=gnu99" } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int
+main (void)
+{
+  int i2, l = 0, r = 0;
+  int a[3][3][3];
+
+  memset (a, '\0', sizeof (a));
+  #pragma acc parallel
+  #pragma acc loop collapse(4 - 1)
+    for (int i = 0; i < 2; i++)
+      for (int j = 0; j < 2; j++)
+	for (int k = 0; k < 2; k++)
+	  a[i][j][k] = i + j * 4 + k * 16;
+#pragma acc parallel
+    {
+      #pragma acc loop collapse(2) reduction(|:l)
+	for (i2 = 0; i2 < 2; i2++)
+	  for (int j = 0; j < 2; j++)
+	    for (int k = 0; k < 2; k++)
+	      if (a[i2][j][k] != i2 + j * 4 + k * 16)
+		l += 1;
+    }
+
+    for (i2 = 0; i2 < 2; i2++)
+      for (int j = 0; j < 2; j++)
+	for (int k = 0; k < 2; k++)
+	  if (a[i2][j][k] != i2 + j * 4 + k * 16)
+	    r += 1;
+
+  if (l != r)
+    abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/context-1.c b/libgomp/testsuite/libgomp.oacc-c/context-1.c
new file mode 100644
index 0000000..fd64570
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/context-1.c
@@ -0,0 +1,213 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy(int n, float a, float *x, float *y)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+    {
+        y[i] = a * x[i] + y[i];
+    }
+}
+
+void
+context_check(CUcontext ctx1)
+{
+    CUcontext ctx2, ctx3;
+    CUresult r;
+
+    r = cuCtxGetCurrent(&ctx2);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    if (ctx1 != ctx2)
+    {
+        fprintf(stderr, "new context established\n");
+        exit(EXIT_FAILURE);
+    }
+
+    ctx3 = (CUcontext) acc_get_current_cuda_context();
+
+    if (ctx1 != ctx3)
+    {
+        fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return;
+}
+
+int
+main(int argc, char **argv)
+{
+    cublasStatus_t s;
+    cudaError_t e;
+    cublasHandle_t h;
+    CUcontext pctx, ctx;
+    CUresult r;
+    int dev;
+    int i;
+    const int N = 256;
+    float *h_X, *h_Y1, *h_Y2;
+    float *d_X,*d_Y;
+    float alpha = 2.0f;
+    float error_norm;
+    float ref_norm;
+
+    /* Test 1 - cuBLAS creates, OpenACC shares.  */
+
+    s = cublasCreate(&h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasCreate failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    r = cuCtxGetCurrent(&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    e = cudaGetDevice(&dev);
+    if (e != cudaSuccess)
+    {
+        fprintf(stderr, "cudaGetDevice failed: %d\n", e);
+        exit(EXIT_FAILURE);
+    }
+
+    acc_set_device_num(dev, acc_device_nvidia);
+
+    h_X = (float *)malloc(N * sizeof (float));
+    if (!h_X)
+    {
+        fprintf(stderr, "malloc failed: for h_X\n");
+        exit(EXIT_FAILURE);
+    }
+
+    h_Y1 = (float *)malloc(N * sizeof (float));
+    if (!h_Y1)
+    {
+        fprintf(stderr, "malloc failed: for h_Y1\n");
+        exit(EXIT_FAILURE);
+    }
+
+    h_Y2 = (float *)malloc(N * sizeof (float));
+    if (!h_Y2)
+    {
+        fprintf(stderr, "malloc failed: for h_Y2\n");
+        exit(EXIT_FAILURE);
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        h_X[i] = rand() / (float) RAND_MAX;
+        h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX;
+    }
+
+    d_X = acc_copyin(&h_X[0], N * sizeof (float));
+    if (d_X == NULL)
+    {
+        fprintf(stderr, "copyin error h_X\n");
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    d_Y = acc_copyin(&h_Y1[0], N * sizeof (float));
+    if (d_Y == NULL)
+    {
+        fprintf(stderr, "copyin error h_Y1\n");
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasSaxpy failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float));
+
+    context_check(pctx);
+
+    saxpy(N, alpha, h_X, h_Y2);
+
+    error_norm = 0;
+    ref_norm = 0;
+
+    for (i = 0; i < N; ++i)
+    {
+        float diff;
+
+        diff = h_Y1[i] - h_Y2[i];
+        error_norm += diff * diff;
+        ref_norm += h_Y2[i] * h_Y2[i];
+    }
+
+    error_norm = (float)sqrt((double)error_norm);
+    ref_norm = (float)sqrt((double)ref_norm);
+
+    if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+    {
+        fprintf(stderr, "math error\n");
+        exit(EXIT_FAILURE);
+    }
+
+    free(h_X);
+    free(h_Y1);
+    free(h_Y2);
+
+    acc_free(d_X);
+    acc_free(d_Y);
+
+    context_check(pctx);
+
+    s = cublasDestroy(h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasDestroy failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    acc_shutdown (acc_device_nvidia);
+
+    r = cuCtxGetCurrent(&ctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    if (!ctx)
+    {
+        fprintf(stderr, "Expected context\n");
+        exit(EXIT_FAILURE);
+    }
+
+    if (pctx != ctx)
+    {
+        fprintf(stderr, "Unexpected new context\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/context-2.c b/libgomp/testsuite/libgomp.oacc-c/context-2.c
new file mode 100644
index 0000000..03e7c1b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/context-2.c
@@ -0,0 +1,223 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy(int n, float a, float *x, float *y)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+    {
+        y[i] = a * x[i] + y[i];
+    }
+}
+
+void
+context_check(CUcontext ctx1)
+{
+    CUcontext ctx2, ctx3;
+    CUresult r;
+
+    r = cuCtxGetCurrent(&ctx2);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    if (ctx1 != ctx2)
+    {
+        fprintf(stderr, "new context established\n");
+        exit(EXIT_FAILURE);
+    }
+
+    ctx3 = (CUcontext) acc_get_current_cuda_context();
+
+    if (ctx1 != ctx3)
+    {
+        fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return;
+}
+
+int
+main(int argc, char **argv)
+{
+    cublasStatus_t s;
+    cudaError_t e;
+    cublasHandle_t h;
+    CUcontext pctx, ctx;
+    CUresult r;
+    int dev;
+    int i;
+    const int N = 256;
+    float *h_X, *h_Y1, *h_Y2;
+    float *d_X,*d_Y;
+    float alpha = 2.0f;
+    float error_norm;
+    float ref_norm;
+
+    /* Test 2 - cuBLAS creates, OpenACC shares.  */
+
+    s = cublasCreate(&h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasCreate failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    r = cuCtxGetCurrent(&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    e = cudaGetDevice(&dev);
+    if (e != cudaSuccess)
+    {
+        fprintf(stderr, "cudaGetDevice failed: %d\n", e);
+        exit(EXIT_FAILURE);
+    }
+
+    acc_set_device_num(dev, acc_device_nvidia);
+
+    h_X = (float *)malloc(N * sizeof (float));
+    if (h_X == 0)
+    {
+        fprintf(stderr, "malloc failed: for h_X\n");
+        exit(EXIT_FAILURE);
+    }
+
+    h_Y1 = (float *)malloc(N * sizeof (float));
+    if (h_Y1 == 0)
+    {
+        fprintf(stderr, "malloc failed: for h_Y1\n");
+        exit(EXIT_FAILURE);
+    }
+
+    h_Y2 = (float *)malloc(N * sizeof (float));
+    if (h_Y2 == 0)
+    {
+        fprintf(stderr, "malloc failed: for h_Y2\n");
+        exit(EXIT_FAILURE);
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        h_X[i] = rand() / (float) RAND_MAX;
+        h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX;
+    }
+
+    d_X = acc_copyin(&h_X[0], N * sizeof (float));
+    if (d_X == NULL)
+    {
+        fprintf(stderr, "copyin error h_X\n");
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    d_Y = acc_copyin(&h_Y1[0], N * sizeof (float));
+    if (d_Y == NULL)
+    {
+        fprintf(stderr, "copyin error h_Y1\n");
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasSaxpy failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float));
+
+    context_check(pctx);
+
+#pragma acc parallel copyin(h_X[0:N]), copy(h_Y2[0:N]) copyin(alpha)
+    {
+        int i;
+
+        for (i = 0; i < N; i++)
+        {
+            h_Y2[i] = alpha * h_X[i] + h_Y2[i];
+        }
+    }
+
+    context_check(pctx);
+
+    error_norm = 0;
+    ref_norm = 0;
+
+    for (i = 0; i < N; ++i)
+    {
+        float diff;
+
+        diff = h_Y1[i] - h_Y2[i];
+        error_norm += diff * diff;
+        ref_norm += h_Y2[i] * h_Y2[i];
+    }
+
+    error_norm = (float)sqrt((double)error_norm);
+    ref_norm = (float)sqrt((double)ref_norm);
+
+    if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+    {
+        fprintf(stderr, "math error\n");
+        exit(EXIT_FAILURE);
+    }
+
+    free(h_X);
+    free(h_Y1);
+    free(h_Y2);
+
+    acc_free(d_X);
+    acc_free(d_Y);
+
+    context_check(pctx);
+
+    s = cublasDestroy(h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasDestroy failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    acc_shutdown (acc_device_nvidia);
+
+    r = cuCtxGetCurrent(&ctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    if (!ctx)
+    {
+        fprintf(stderr, "Expected context\n");
+        exit(EXIT_FAILURE);
+    }
+
+    if (pctx != ctx)
+    {
+        fprintf(stderr, "Unexpected new context\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/context-3.c b/libgomp/testsuite/libgomp.oacc-c/context-3.c
new file mode 100644
index 0000000..85c7e7b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/context-3.c
@@ -0,0 +1,200 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy(int n, float a, float *x, float *y)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+    {
+        y[i] = a * x[i] + y[i];
+    }
+}
+
+void
+context_check(CUcontext ctx1)
+{
+    CUcontext ctx2, ctx3;
+    CUresult r;
+
+    r = cuCtxGetCurrent(&ctx2);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    if (ctx1 != ctx2)
+    {
+        fprintf(stderr, "new context established\n");
+        exit(EXIT_FAILURE);
+    }
+
+    ctx3 = (CUcontext) acc_get_current_cuda_context();
+
+    if (ctx1 != ctx3)
+    {
+        fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return;
+}
+
+int
+main(int argc, char **argv)
+{
+    cublasStatus_t s;
+    cublasHandle_t h;
+    CUcontext pctx;
+    CUresult r;
+    int i;
+    const int N = 256;
+    float *h_X, *h_Y1, *h_Y2;
+    float *d_X,*d_Y;
+    float alpha = 2.0f;
+    float error_norm;
+    float ref_norm;
+
+    /* Test 3 - OpenACC creates, cuBLAS shares.  */
+
+    acc_set_device_num(0, acc_device_nvidia);
+
+    r = cuCtxGetCurrent(&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    h_X = (float *)malloc(N * sizeof (float));
+    if (h_X == 0)
+    {
+        fprintf(stderr, "malloc failed: for h_X\n");
+        exit(EXIT_FAILURE);
+    }
+
+    h_Y1 = (float *)malloc(N * sizeof (float));
+    if (h_Y1 == 0)
+    {
+        fprintf(stderr, "malloc failed: for h_Y1\n");
+        exit(EXIT_FAILURE);
+    }
+
+    h_Y2 = (float *)malloc(N * sizeof (float));
+    if (h_Y2 == 0)
+    {
+        fprintf(stderr, "malloc failed: for h_Y2\n");
+        exit(EXIT_FAILURE);
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        h_X[i] = rand() / (float) RAND_MAX;
+        h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX;
+    }
+
+    d_X = acc_copyin(&h_X[0], N * sizeof (float));
+    if (d_X == NULL)
+    {
+        fprintf(stderr, "copyin error h_X\n");
+        exit(EXIT_FAILURE);
+    }
+
+    d_Y = acc_copyin(&h_Y1[0], N * sizeof (float));
+    if (d_Y == NULL)
+    {
+        fprintf(stderr, "copyin error h_Y1\n");
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    s = cublasCreate(&h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasCreate failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasSaxpy failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float));
+
+    context_check(pctx);
+
+    saxpy(N, alpha, h_X, h_Y2);
+
+    error_norm = 0;
+    ref_norm = 0;
+
+    for (i = 0; i < N; ++i)
+    {
+        float diff;
+
+        diff = h_Y1[i] - h_Y2[i];
+        error_norm += diff * diff;
+        ref_norm += h_Y2[i] * h_Y2[i];
+    }
+
+    error_norm = (float)sqrt((double)error_norm);
+    ref_norm = (float)sqrt((double)ref_norm);
+
+    if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+    {
+        fprintf(stderr, "math error\n");
+        exit(EXIT_FAILURE);
+    }
+
+    free(h_X);
+    free(h_Y1);
+    free(h_Y2);
+
+    acc_free(d_X);
+    acc_free(d_Y);
+
+    context_check(pctx);
+
+    s = cublasDestroy(h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasDestroy failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    acc_shutdown (acc_device_nvidia);
+
+    r = cuCtxGetCurrent(&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    if (pctx)
+    {
+        fprintf(stderr, "Unexpected context\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/context-4.c b/libgomp/testsuite/libgomp.oacc-c/context-4.c
new file mode 100644
index 0000000..7551cf2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/context-4.c
@@ -0,0 +1,213 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy(int n, float a, float *x, float *y)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+    {
+        y[i] = a * x[i] + y[i];
+    }
+}
+
+void
+context_check(CUcontext ctx1)
+{
+    CUcontext ctx2, ctx3;
+    CUresult r;
+
+    r = cuCtxGetCurrent(&ctx2);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    if (ctx1 != ctx2)
+    {
+        fprintf(stderr, "new context established\n");
+        exit(EXIT_FAILURE);
+    }
+
+    ctx3 = (CUcontext) acc_get_current_cuda_context();
+
+    if (ctx1 != ctx3)
+    {
+        fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return;
+}
+
+int
+main(int argc, char **argv)
+{
+    cublasStatus_t s;
+    cublasHandle_t h;
+    CUcontext pctx;
+    CUresult r;
+    int i;
+    const int N = 256;
+    float *h_X, *h_Y1, *h_Y2;
+    float *d_X,*d_Y;
+    float alpha = 2.0f;
+    float error_norm;
+    float ref_norm;
+
+    /* Test 4 - OpenACC creates, cuBLAS shares.  */
+
+    acc_set_device_num(0, acc_device_nvidia);
+
+    r = cuCtxGetCurrent(&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    h_X = (float *)malloc(N * sizeof (float));
+    if (h_X == 0)
+    {
+        fprintf(stderr, "malloc failed: for h_X\n");
+        exit(EXIT_FAILURE);
+    }
+
+    h_Y1 = (float *)malloc(N * sizeof (float));
+    if (h_Y1 == 0)
+    {
+        fprintf(stderr, "malloc failed: for h_Y1\n");
+        exit(EXIT_FAILURE);
+    }
+
+    h_Y2 = (float *)malloc(N * sizeof (float));
+    if (h_Y2 == 0)
+    {
+        fprintf(stderr, "malloc failed: for h_Y2\n");
+        exit(EXIT_FAILURE);
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        h_X[i] = rand() / (float) RAND_MAX;
+        h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX;
+    }
+
+#pragma acc parallel copyin(h_X[0:N]), copy(h_Y2[0:N]) copy(alpha)
+    {
+        int i;
+
+        for (i = 0; i < N; i++)
+        {
+            h_Y2[i] = alpha * h_X[i] + h_Y2[i];
+        }
+    }
+
+    r = cuCtxGetCurrent(&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    d_X = acc_copyin(&h_X[0], N * sizeof (float));
+    if (d_X == NULL)
+    {
+        fprintf(stderr, "copyin error h_Y1\n");
+        exit(EXIT_FAILURE);
+    }
+
+    d_Y = acc_copyin(&h_Y1[0], N * sizeof (float));
+    if (d_Y == NULL)
+    {
+        fprintf(stderr, "copyin error h_Y1\n");
+        exit(EXIT_FAILURE);
+    }
+
+    s = cublasCreate(&h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasCreate failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasSaxpy failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float));
+
+    context_check(pctx);
+
+    error_norm = 0;
+    ref_norm = 0;
+
+    for (i = 0; i < N; ++i)
+    {
+        float diff;
+
+        diff = h_Y1[i] - h_Y2[i];
+        error_norm += diff * diff;
+        ref_norm += h_Y2[i] * h_Y2[i];
+    }
+
+    error_norm = (float)sqrt((double)error_norm);
+    ref_norm = (float)sqrt((double)ref_norm);
+
+    if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+    {
+        fprintf(stderr, "math error\n");
+        exit(EXIT_FAILURE);
+    }
+
+    free(h_X);
+    free(h_Y1);
+    free(h_Y2);
+
+    acc_free(d_X);
+    acc_free(d_Y);
+
+    context_check(pctx);
+
+    s = cublasDestroy(h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf(stderr, "cublasDestroy failed: %d\n", s);
+        exit(EXIT_FAILURE);
+    }
+
+    context_check(pctx);
+
+    acc_shutdown (acc_device_nvidia);
+
+    r = cuCtxGetCurrent(&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit(EXIT_FAILURE);
+    }
+
+    if (pctx)
+    {
+        fprintf(stderr, "Unexpected context\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/data-1.c b/libgomp/testsuite/libgomp.oacc-c/data-1.c
new file mode 100644
index 0000000..e7564cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/data-1.c
@@ -0,0 +1,188 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int i;
+
+int
+is_mapped (void *p, size_t n)
+{
+#if ACC_MEM_SHARED
+  return 1;
+#else
+  return acc_is_present (p, n);
+#endif
+}
+
+int main(void)
+{
+  int j;
+
+  i = -1;
+  j = -2;
+#pragma acc data copyin (i, j)
+  {
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+  }
+  if (i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+#pragma acc data copyout (i, j)
+  {
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+
+#pragma acc parallel present (i, j)
+    {
+      i = 4;
+      j = 2;
+    }
+  }
+  if (i != 4 || j != 2)
+    abort ();
+
+  i = -1;
+  j = -2;
+#pragma acc data create (i, j)
+  {
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+  }
+  if (i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+#pragma acc data present_or_copyin (i, j)
+  {
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+  }
+  if (i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+#pragma acc data present_or_copyout (i, j)
+  {
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+
+#pragma acc parallel present (i, j)
+    {
+      i = 4;
+      j = 2;
+    }
+  }
+  if (i != 4 || j != 2)
+    abort ();
+
+  i = -1;
+  j = -2;
+#pragma acc data present_or_copy (i, j)
+  {
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+  }
+#if ACC_MEM_SHARED
+  if (i != 2 || j != 1)
+    abort ();
+#else
+  if (i != -1 || j != -2)
+    abort ();
+#endif
+
+  i = -1;
+  j = -2;
+#pragma acc data present_or_create (i, j)
+  {
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+  }
+
+  if (i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+#pragma acc data copyin (i, j)
+  {
+#pragma acc data present (i, j)
+    {
+      if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+        abort ();
+      if (i != -1 || j != -2)
+        abort ();
+      i = 2;
+      j = 1;
+      if (i != 2 || j != 1)
+        abort ();
+    }
+  }
+  if (i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+#pragma acc data
+  {
+#if !ACC_MEM_SHARED
+    if (is_mapped (&i, sizeof (i)) || is_mapped (&j, sizeof (j)))
+      abort ();
+#endif
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+  }
+  if (i != 2 || j != 1)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c b/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c
new file mode 100644
index 0000000..41f92d4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+
+extern void abort ();
+
+int main(void)
+{
+  void *a, *a_1, *a_2;
+
+#define A (void *) 0x123
+  a = A;
+
+#pragma acc data copyout(a_1, a_2)
+#pragma acc kernels deviceptr(a)
+  {
+    a_1 = a;
+    a_2 = &a;
+  }
+
+  if (a != A)
+    abort ();
+  if (a_1 != a)
+    abort ();
+#if ACC_MEM_SHARED
+  if (a_2 != &a)
+    abort ();
+#else
+  if (a_2 == &a)
+    abort ();
+#endif
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c b/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c
new file mode 100644
index 0000000..683fefa
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_DEVICE_TYPE_host=1" } } */
+
+#include "libgomp_g.h"
+
+extern void abort ();
+
+volatile int i;
+
+void
+f (void *data)
+{
+  if (i != -1)
+    abort ();
+  i = 42;
+}
+
+int main(void)
+{
+  i = -1;
+  GOACC_kernels (0, f, (const void *) 0,
+		 0, (void *) 0, (void *) 0, (void *) 0,
+		 1, 1, 1, -2, -1);
+  if (i != 42)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c b/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c
new file mode 100644
index 0000000..232ce8a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_DEVICE_TYPE_host=1" } } */
+
+#include "libgomp_g.h"
+
+extern void abort ();
+
+volatile int i;
+
+void
+f (void *data)
+{
+  if (i != -1)
+    abort ();
+  i = 42;
+}
+
+int main(void)
+{
+  i = -1;
+  GOACC_parallel (0, f, (const void *) 0,
+		  0, (void *) 0, (void *) 0, (void *) 0,
+		  1, 1, 1, -2, -1);
+  if (i != 42)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/if-1.c b/libgomp/testsuite/libgomp.oacc-c/if-1.c
new file mode 100644
index 0000000..ef2d5fc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/if-1.c
@@ -0,0 +1,537 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define N   32
+
+int
+main(int argc, char **argv)
+{
+    float *a, *b, *d_a, *d_b, exp;
+    int i;
+    const int one = 1;
+    const int zero = 0;
+    int n;
+
+    a = (float *) malloc (N * sizeof (float));
+    b = (float *) malloc (N * sizeof (float));
+    d_a = (float *) acc_malloc (N * sizeof (float));
+    d_b = (float *) acc_malloc (N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+        a[i] = 4.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 5.0;
+#else
+    exp = 4.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 16.0;
+
+#pragma acc parallel if(0)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 17.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 8.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 9.0;
+#else
+    exp = 8.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 22.0;
+
+#pragma acc parallel if(zero)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 23.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 16.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(true)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 17.0;
+#else
+    exp = 16.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 76.0;
+
+#pragma acc parallel if(false)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 77.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 22.0;
+
+    n = 1;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 23.0;
+#else
+    exp = 22.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 18.0;
+
+    n = 0;
+
+#pragma acc parallel if(n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 19.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 49.0;
+
+    n = 1;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n + n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 50.0;
+#else
+    exp = 49.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 38.0;
+
+    n = 0;
+
+#pragma acc parallel if(n + n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 39.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 91.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(-2)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 92.0;
+#else
+    exp = 91.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 43.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one == 1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 44.0;
+#else
+    exp = 43.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 87.0;
+
+#pragma acc parallel if(one == 0)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 88.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 9.0;
+    }
+
+    acc_map_data (a, d_a, N * sizeof (float));
+    acc_map_data (b, d_b, N * sizeof (float));
+
+#pragma acc update device(a[0:N], b[0:N]) if(1)
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 0.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc update host(a[0:N], b[0:N]) if(1)
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 9.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 12.0;
+    }
+
+#pragma acc update device(a[0:N], b[0:N]) if(0)
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 0.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc update host(a[0:N], b[0:N]) if(1)
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 9.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 26.0;
+        b[i] = 21.0;
+    }
+
+#pragma acc update device(a[0:N], b[0:N]) if(1)
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 0.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc update host(a[0:N], b[0:N]) if(0)
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 0.0)
+            abort();
+
+        if (b[i] != 0.0)
+            abort();
+    }
+
+    acc_unmap_data (a);
+    acc_unmap_data (b);
+
+    acc_free (d_a);
+    acc_free (d_b);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(1)
+{
+#pragma acc parallel present(a[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            b[ii] = a[ii];
+        }
+    }
+}
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 4.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 8.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(0)
+{
+#if !ACC_MEM_SHARED
+    if (acc_is_present (a, N * sizeof (float)))
+        abort ();
+#endif
+
+#if !ACC_MEM_SHARED
+    if (acc_is_present (b, N * sizeof (float)))
+        abort ();
+#endif
+}
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 18.0;
+        b[i] = 21.0;
+    }
+
+#pragma acc data copyin(a[0:N]) if(1)
+{
+#if !ACC_MEM_SHARED
+    if (!acc_is_present (a, N * sizeof (float)))
+        abort ();
+#endif
+
+#pragma acc data copyout(b[0:N]) if(0)
+    {
+#if !ACC_MEM_SHARED
+        if (acc_is_present (b, N * sizeof (float)))
+            abort ();
+#endif
+
+#pragma acc data copyout(b[0:N]) if(1)
+        {
+#pragma acc parallel present(a[0:N]) present(b[0:N])
+            {
+                int ii;
+
+                for (ii = 0; ii < N; ii++)
+                {
+                    b[ii] = a[ii];
+                }
+            }
+        }
+
+#if !ACC_MEM_SHARED
+        if (acc_is_present (b, N * sizeof (float)))
+            abort ();
+#endif
+    }
+}
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 18.0)
+            abort ();
+	}
+
+#ifdef XXX_TODO_ENTER_END_DATA
+#endif
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/kernels-1.c b/libgomp/testsuite/libgomp.oacc-c/kernels-1.c
new file mode 100644
index 0000000..64fea61
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/kernels-1.c
@@ -0,0 +1,184 @@
+/* { dg-do run } */
+
+extern void abort ();
+
+int i;
+
+int main(void)
+{
+  int j, v;
+
+#if 0
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) copyin (i, j)
+  {
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != -1 || j != -2)
+    abort ();
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) copyout (i, j)
+  {
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) copy (i, j)
+  {
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) create (i, j)
+  {
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != -1 || j != -2)
+    abort ();
+#endif
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
+  {
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1)
+    abort ();
+#if ACC_MEM_SHARED
+  if (i != 2 || j != 1)
+    abort ();
+#else
+  if (i != -1 || j != -2)
+    abort ();
+#endif
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
+  {
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copy (i, j)
+  {
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_create (i, j)
+  {
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1)
+    abort ();
+#if ACC_MEM_SHARED
+  if (i != 2 || j != 1)
+    abort ();
+#else
+  if (i != -1 || j != -2)
+    abort ();
+#endif
+
+#if 0
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present (i, j)
+  {
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#endif
+
+#if 0
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v)
+  {
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#endif
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-1.c b/libgomp/testsuite/libgomp.oacc-c/lib-1.c
new file mode 100644
index 0000000..17129d8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+  devtype = acc_device_nvidia;
+
+  if (acc_get_num_devices (devtype) == 0)
+    return 0;
+#endif
+
+  acc_init (devtype);
+
+  acc_init (devtype);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: device already active" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-10.c b/libgomp/testsuite/libgomp.oacc-c/lib-10.c
new file mode 100644
index 0000000..cf1af8c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-10.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  void *d;
+  acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+  devtype = acc_device_nvidia;
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+#endif
+
+  acc_init (devtype);
+
+  d = acc_malloc (0);
+  if (d != NULL)
+    abort ();
+
+  acc_free (0);
+
+  acc_shutdown (devtype);
+
+  acc_set_device_type (devtype);
+
+  d = acc_malloc (0);
+  if (d != NULL)
+    abort ();
+
+  acc_shutdown (devtype);
+
+  acc_init (devtype);
+
+  d = acc_malloc (1024);
+  if (d == NULL)
+    abort ();
+
+  acc_free (d);
+
+  acc_shutdown (devtype);
+
+  acc_set_device_type (devtype);
+
+  d = acc_malloc (1024);
+  if (d == NULL)
+    abort ();
+
+  acc_free (d);
+
+  acc_shutdown (devtype);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-11.c b/libgomp/testsuite/libgomp.oacc-c/lib-11.c
new file mode 100644
index 0000000..fdb53a8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-11.c
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 512;
+  void *d;
+
+  d = acc_malloc (N);
+  if (d == NULL)
+    abort ();
+
+  acc_free (d + (N >> 1));
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: mem free failed 1" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-12.c b/libgomp/testsuite/libgomp.oacc-c/lib-12.c
new file mode 100644
index 0000000..1c3a127
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-12.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  memset (h, 0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-13.c b/libgomp/testsuite/libgomp.oacc-c/lib-13.c
new file mode 100644
index 0000000..7098ef3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-13.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+
+  if (acc_is_present (h, 1) != 1)
+    abort ();
+
+  if (acc_is_present (h, N + 1) != 0)
+    abort ();
+
+  if (acc_is_present (h + 1, N) != 0)
+    abort ();
+
+  if (acc_is_present (h - 1, N) != 0)
+    abort ();
+
+  if (acc_is_present (h - 1, N - 1) != 0)
+    abort ();
+
+  if (acc_is_present (h + N, 0) != 0)
+    abort ();
+
+  if (acc_is_present (h + N, N) != 0)
+    abort ();
+
+  if (acc_is_present (0, N) != 0)
+    abort ();
+   
+  if (acc_is_present (h, 0) != 0)
+    abort ();
+
+  acc_free (d);
+
+  if (acc_is_present (h, 1) != 0)
+    abort ();
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-14.c b/libgomp/testsuite/libgomp.oacc-c/lib-14.c
new file mode 100644
index 0000000..a9632f7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-14.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+
+  if (acc_is_present (h, 1) != 1)
+    abort ();
+
+  if (acc_is_present (h + N - 1, 1) != 1)
+    abort ();
+
+  if (acc_is_present (h - 1, 1) != 0)
+    abort ();
+
+  if (acc_is_present (h + N, 1) != 0)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 1)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, N - i) != 1)
+	abort ();
+    }
+
+  acc_free (d);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, N - i) != 0)
+	abort ();
+    }
+
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-15.c b/libgomp/testsuite/libgomp.oacc-c/lib-15.c
new file mode 100644
index 0000000..4f6a731
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-15.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 0)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-16.c b/libgomp/testsuite/libgomp.oacc-c/lib-16.c
new file mode 100644
index 0000000..9d277ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-16.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  (void) acc_copyin (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\] already mapped to \[\h+,\+256\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-17.c b/libgomp/testsuite/libgomp.oacc-c/lib-17.c
new file mode 100644
index 0000000..5ff894c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-17.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  acc_copyout (h, N);
+
+  acc_copyout (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-18.c b/libgomp/testsuite/libgomp.oacc-c/lib-18.c
new file mode 100644
index 0000000..2bc3263
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-18.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+
+  acc_free (d);
+
+  acc_copyout (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-19.c b/libgomp/testsuite/libgomp.oacc-c/lib-19.c
new file mode 100644
index 0000000..57b1221
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-19.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h[N];
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+      unsigned char *p;
+
+      h[i] = (unsigned char *) malloc (N);
+      p = h[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  p[j] = i;
+	}
+
+      (void) acc_copyin (p, N);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      memset (h[i], 0, i);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+      unsigned char *p;
+
+      acc_copyout (h[i], N);
+
+      p = h[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  if (p[j] != i)
+	    abort ();
+	}
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      free (h[i]);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-2.c b/libgomp/testsuite/libgomp.oacc-c/lib-2.c
new file mode 100644
index 0000000..9a4501f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-2.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+  devtype = acc_device_nvidia;
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+#endif
+
+  acc_init (devtype);
+
+  acc_shutdown (devtype);
+
+  acc_shutdown (devtype);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: no device initialized" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-20.c b/libgomp/testsuite/libgomp.oacc-c/lib-20.c
new file mode 100644
index 0000000..b379a8f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-20.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  acc_copyout (h, N + 1);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surounds2 \[\h+,\+257\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-21.c b/libgomp/testsuite/libgomp.oacc-c/lib-21.c
new file mode 100644
index 0000000..3a67400
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-21.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  acc_copyout (h, 0);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-22.c b/libgomp/testsuite/libgomp.oacc-c/lib-22.c
new file mode 100644
index 0000000..2b86da8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-22.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  acc_copyout (h + 1, N - 1);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+255\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-23.c b/libgomp/testsuite/libgomp.oacc-c/lib-23.c
new file mode 100644
index 0000000..38f236d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-23.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h1, *h2;
+
+  h1 = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h1[i] = 0xab;
+    }
+
+  (void) acc_copyin (h1, N);
+
+  h2 = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h2[i] = 0xde;
+    }
+
+  (void) acc_copyin (h2, N);
+
+  acc_copyout (h1, N + N);
+
+  free (h1);
+  free (h2);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+512\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-24.c b/libgomp/testsuite/libgomp.oacc-c/lib-24.c
new file mode 100644
index 0000000..d7de8e3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-24.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 1)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 0)
+	abort ();
+    }
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 1)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 0)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-25.c b/libgomp/testsuite/libgomp.oacc-c/lib-25.c
new file mode 100644
index 0000000..1145828
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-25.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] already mapped to \[\h+,256\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-26.c b/libgomp/testsuite/libgomp.oacc-c/lib-26.c
new file mode 100644
index 0000000..a23f56e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-26.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, 0);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-27.c b/libgomp/testsuite/libgomp.oacc-c/lib-27.c
new file mode 100644
index 0000000..074fddb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-27.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (0, N);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\)\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-28.c b/libgomp/testsuite/libgomp.oacc-c/lib-28.c
new file mode 100644
index 0000000..027f7cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-28.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  acc_delete (0, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-29.c b/libgomp/testsuite/libgomp.oacc-c/lib-29.c
new file mode 100644
index 0000000..a66de0f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-29.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  acc_delete (h, 0);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-3.c b/libgomp/testsuite/libgomp.oacc-c/lib-3.c
new file mode 100644
index 0000000..e823a41
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-3.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  acc_init (acc_device_host);
+
+  acc_shutdown (acc_device_not_host);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: device 4(4) is initialized" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-30.c b/libgomp/testsuite/libgomp.oacc-c/lib-30.c
new file mode 100644
index 0000000..ce2bdb4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-30.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N - 2);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+254\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-31.c b/libgomp/testsuite/libgomp.oacc-c/lib-31.c
new file mode 100644
index 0000000..25ce5a9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-31.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_present_or_create (h, N);
+  if (!d)
+    abort ();
+
+  if (acc_is_present (h, 1) != 1)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-32.c b/libgomp/testsuite/libgomp.oacc-c/lib-32.c
new file mode 100755
index 0000000..e3f87a8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-32.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d1, *d2;
+
+  h = (unsigned char *) malloc (N);
+
+  d1 = acc_present_or_create (h, N);
+  if (!d1)
+    abort ();
+
+  d2 = acc_present_or_create (h, N);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  d2 = acc_pcreate (h, N);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-33.c b/libgomp/testsuite/libgomp.oacc-c/lib-33.c
new file mode 100755
index 0000000..4abaa02
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-33.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d1, *d2;
+
+  h = (unsigned char *) malloc (N);
+
+  d1 = acc_present_or_create (h, N);
+  if (!d1)
+    abort ();
+
+  d2 = acc_present_or_create (h, N - 2);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-34.c b/libgomp/testsuite/libgomp.oacc-c/lib-34.c
new file mode 100755
index 0000000..32d5d51
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-34.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d1, *d2;
+
+  h = (unsigned char *) malloc (N);
+
+  d1 = acc_present_or_create (h, N);
+  if (!d1)
+    abort ();
+
+  d2 = acc_present_or_create (h + 2, N);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\] not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-35.c b/libgomp/testsuite/libgomp.oacc-c/lib-35.c
new file mode 100755
index 0000000..ca8edab
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-35.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_present_or_create (0, N);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-36.c b/libgomp/testsuite/libgomp.oacc-c/lib-36.c
new file mode 100755
index 0000000..cb29397
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-36.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_present_or_create (h, 0);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-37.c b/libgomp/testsuite/libgomp.oacc-c/lib-37.c
new file mode 100644
index 0000000..20bee1b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-37.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_present_or_copyin (h, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-38.c b/libgomp/testsuite/libgomp.oacc-c/lib-38.c
new file mode 100644
index 0000000..1211272
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-38.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d1, *d2;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d1 = acc_present_or_copyin (h, N);
+  if (!d1)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = 0xab;
+    }
+
+  d2 = acc_present_or_copyin (h, N);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  d2 = acc_pcopyin (h, N);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-39.c b/libgomp/testsuite/libgomp.oacc-c/lib-39.c
new file mode 100644
index 0000000..db1e0b3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-39.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_present_or_copyin (0, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-4.c b/libgomp/testsuite/libgomp.oacc-c/lib-4.c
new file mode 100644
index 0000000..84d2ace
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-4.c
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  acc_init (99);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: device 99 is out of range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-40.c b/libgomp/testsuite/libgomp.oacc-c/lib-40.c
new file mode 100644
index 0000000..cb6c422
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-40.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_present_or_copyin (h, 0);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-41.c b/libgomp/testsuite/libgomp.oacc-c/lib-41.c
new file mode 100644
index 0000000..01c5f3c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-41.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = 0xab;
+    }
+
+  acc_update_device (h, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != 0xab)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-42.c b/libgomp/testsuite/libgomp.oacc-c/lib-42.c
new file mode 100644
index 0000000..d577fe3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-42.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  acc_update_device (h, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != 0xab)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-43.c b/libgomp/testsuite/libgomp.oacc-c/lib-43.c
new file mode 100644
index 0000000..ceeb155
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-43.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = 0xab;
+    }
+
+  acc_update_device (0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != 0xab)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-44.c b/libgomp/testsuite/libgomp.oacc-c/lib-44.c
new file mode 100644
index 0000000..0cabb0d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-44.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = 0xab;
+    }
+
+  acc_update_device (h, 0);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != 0xab)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-45.c b/libgomp/testsuite/libgomp.oacc-c/lib-45.c
new file mode 100644
index 0000000..82cbc5e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-45.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = 0xab;
+    }
+
+  acc_update_device (h, N - 2);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N - 2; i++)
+    {
+      if (h[i] != 0xab)
+	abort ();
+    }
+
+  for (i = N - 2; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-46.c b/libgomp/testsuite/libgomp.oacc-c/lib-46.c
new file mode 100644
index 0000000..36faa28
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-46.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_update_self (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-47.c b/libgomp/testsuite/libgomp.oacc-c/lib-47.c
new file mode 100644
index 0000000..a7ff904
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-47.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_update_self (0, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-48.c b/libgomp/testsuite/libgomp.oacc-c/lib-48.c
new file mode 100644
index 0000000..01d3c6c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-48.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_update_self (h, 0);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-49.c b/libgomp/testsuite/libgomp.oacc-c/lib-49.c
new file mode 100644
index 0000000..e0c5d2a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-49.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_update_self (h, N - 2);
+
+  for (i = 0; i < N - 2; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  for (i = N - 2; i < N; i++)
+    {
+      if (h[i] != 0)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-5.c b/libgomp/testsuite/libgomp.oacc-c/lib-5.c
new file mode 100644
index 0000000..961a62c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-5.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  if (acc_get_device_type () == acc_device_default)
+    abort ();
+
+  acc_init (acc_device_default);
+
+  if (acc_get_device_type () == acc_device_default)
+    abort ();
+
+  acc_shutdown (acc_device_default);
+
+  if (acc_get_num_devices (acc_device_nvidia) != 0)
+    {
+      acc_init (acc_device_nvidia);
+
+      if (acc_get_device_type () != acc_device_nvidia)
+        abort ();
+
+      acc_shutdown (acc_device_nvidia);
+
+      acc_init (acc_device_default);
+
+      acc_set_device_type (acc_device_nvidia);
+
+      if (acc_get_device_type () != acc_device_nvidia)
+        abort ();
+
+      acc_shutdown (acc_device_nvidia);
+    }
+
+  return 0;
+
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-50.c b/libgomp/testsuite/libgomp.oacc-c/lib-50.c
new file mode 100644
index 0000000..662309b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-50.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  if (acc_is_present (h, N) != 1)
+    abort ();
+
+  acc_unmap_data (h);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-51.c b/libgomp/testsuite/libgomp.oacc-c/lib-51.c
new file mode 100644
index 0000000..38a3bb8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-51.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h[N];
+  void *d[N];
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = (unsigned char *) malloc (N);
+      d[i] = acc_malloc (N);
+
+      acc_map_data (h[i], d[i], N);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h[i], N) != 1)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      acc_unmap_data (h[i]);
+
+      if (acc_is_present (h[i], N) != 0)
+	abort ();
+
+      acc_free (d[i]);
+      free (h[i]);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-52.c b/libgomp/testsuite/libgomp.oacc-c/lib-52.c
new file mode 100644
index 0000000..780db31
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-52.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (0, d, N);
+
+  acc_unmap_data (h);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[(nil),+256\]->\[\h+,\+256\] is a bad map" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-53.c b/libgomp/testsuite/libgomp.oacc-c/lib-53.c
new file mode 100644
index 0000000..657adde
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-53.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, 0, N);
+
+  acc_unmap_data (h);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\]->\[(nil),\+256\] is a bad map" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-54.c b/libgomp/testsuite/libgomp.oacc-c/lib-54.c
new file mode 100644
index 0000000..1f3df80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-54.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, 0);
+
+  acc_unmap_data (h);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\]->\[\h+,\+0\] is a bad map" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-55.c b/libgomp/testsuite/libgomp.oacc-c/lib-55.c
new file mode 100644
index 0000000..bf49e0a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-55.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  int i;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      acc_map_data (h + i, d + i, 1);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + 1, 1) != 1)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      acc_unmap_data (h + i);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + 1, 1) != 0)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-56.c b/libgomp/testsuite/libgomp.oacc-c/lib-56.c
new file mode 100644
index 0000000..4ebb06d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-56.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N >> 1);
+
+  if (acc_is_present (h, 1) != 1)
+    abort ();
+
+  if (acc_is_present (h + (N >> 1), 1) != 0)
+    abort ();
+
+  acc_unmap_data (h);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-57.c b/libgomp/testsuite/libgomp.oacc-c/lib-57.c
new file mode 100644
index 0000000..f9043a4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-57.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  acc_unmap_data (d);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \h+ is not a mapped block" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-58.c b/libgomp/testsuite/libgomp.oacc-c/lib-58.c
new file mode 100644
index 0000000..9d6e27d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-58.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  acc_unmap_data (0);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \(nil\) is not a mapped block" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-59.c b/libgomp/testsuite/libgomp.oacc-c/lib-59.c
new file mode 100644
index 0000000..352962d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-59.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_hostptr (d + i) != h + i)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_deviceptr (h + i) != d + i)
+	abort ();
+    }
+
+  acc_unmap_data (h);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_hostptr (d + i) != 0)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_deviceptr (h + i) != 0)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-6.c b/libgomp/testsuite/libgomp.oacc-c/lib-6.c
new file mode 100644
index 0000000..afdd480
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-6.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  int devnum;
+
+  if (acc_get_device_type () == acc_device_default)
+    abort ();
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  acc_set_device_type (acc_device_nvidia);
+
+  if (acc_get_device_type () != acc_device_nvidia)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  acc_set_device_type (acc_device_nvidia);
+
+  if (acc_get_device_type () != acc_device_nvidia)
+    abort ();
+
+  devnum = acc_get_num_devices (acc_device_host);
+  if (devnum != 1)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  if (acc_get_device_type () == acc_device_default)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-60.c b/libgomp/testsuite/libgomp.oacc-c/lib-60.c
new file mode 100644
index 0000000..e1f2f43
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-60.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 0)
+	abort ();
+    }
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 0)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-61.c b/libgomp/testsuite/libgomp.oacc-c/lib-61.c
new file mode 100644
index 0000000..02fd8b6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-61.c
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h[N];
+  void *d[N];
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+      unsigned char *p;
+
+      h[i] = (unsigned char *) malloc (N);
+
+      p = h[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  p[j] = i;
+	}
+
+      d[i] = acc_malloc (N);
+
+      acc_memcpy_to_device (d[i], h[i], N);
+
+      for (j = 0; j < N; j++)
+	{
+	  if (acc_is_present (h[i] + j, 1) != 0)
+	    abort ();
+	}
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+      unsigned char *p;
+
+      memset (h[i], 0, N);
+
+      acc_memcpy_from_device (h[i], d[i], N);
+
+      p = h[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  if (p[j] != i)
+	    abort ();
+	}
+
+      for (j = 0; j < N; j++)
+	{
+	  if (acc_is_present (h[i] + j, 1) != 0)
+	    abort ();
+	}
+
+      acc_free (d[i]);
+
+      free (h[i]);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-62.c b/libgomp/testsuite/libgomp.oacc-c/lib-62.c
new file mode 100644
index 0000000..e6178e2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-62.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  acc_init (acc_device_nvidia);
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, h, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_to_device (d, h, N << 1);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid size" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-63.c b/libgomp/testsuite/libgomp.oacc-c/lib-63.c
new file mode 100644
index 0000000..ca237ec
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-63.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (0, h, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid device address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-64.c b/libgomp/testsuite/libgomp.oacc-c/lib-64.c
new file mode 100644
index 0000000..850fd2e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-64.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, 0, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-65.c b/libgomp/testsuite/libgomp.oacc-c/lib-65.c
new file mode 100644
index 0000000..26c8cef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-65.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, d, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host or device address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-66.c b/libgomp/testsuite/libgomp.oacc-c/lib-66.c
new file mode 100644
index 0000000..360c05b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-66.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  acc_init (acc_device_nvidia);
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, h, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_to_device (d, h, 0);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-67.c b/libgomp/testsuite/libgomp.oacc-c/lib-67.c
new file mode 100644
index 0000000..01b8b2d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-67.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, h, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (0, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-68.c b/libgomp/testsuite/libgomp.oacc-c/lib-68.c
new file mode 100644
index 0000000..3ff5bd7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-68.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, h, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (h, 0, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid device address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-69.c b/libgomp/testsuite/libgomp.oacc-c/lib-69.c
new file mode 100644
index 0000000..7cac954
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-69.c
@@ -0,0 +1,131 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  stream = acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort();
+
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  if (acc_async_test (0) != 0)
+    {
+      fprintf (stderr, "asynchronous operation not running\n");
+      abort ();
+    }
+
+  sleep (1);
+
+  if (acc_async_test (0) != 1)
+    {
+      fprintf (stderr, "found asynchronous operation still running\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-7.c b/libgomp/testsuite/libgomp.oacc-c/lib-7.c
new file mode 100644
index 0000000..e78734b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-7.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  if (acc_get_num_devices (acc_device_none) != 0)
+    abort ();
+
+  if (acc_get_num_devices (acc_device_host) == 0)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-70.c b/libgomp/testsuite/libgomp.oacc-c/lib-70.c
new file mode 100644
index 0000000..e20e278
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-70.c
@@ -0,0 +1,143 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  const int N = 10;
+  int i;
+  CUstream streams[N];
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+
+      if (acc_async_test (i) != 0)
+	{
+	  fprintf (stderr, "asynchronous operation not running\n");
+	  abort ();
+	}
+    }
+
+  sleep ((int) (dtime / 1000.0f) + 1);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_async_test (i) != 1)
+	{
+	  fprintf (stderr, "found asynchronous operation still running\n");
+	  abort ();
+	}
+    }
+
+  acc_unmap_data (a);
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-71.c b/libgomp/testsuite/libgomp.oacc-c/lib-71.c
new file mode 100644
index 0000000..a55dfbf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-71.c
@@ -0,0 +1,126 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+  acc_set_cuda_stream (0, stream);
+
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  if (acc_async_test (1) != 0)
+    {
+      fprintf (stderr, "asynchronous operation not running\n");
+      abort ();
+    }
+
+  sleep ((int) (dtime / 1000.0f) + 1);
+
+  if (acc_async_test (1) != 1)
+    {
+      fprintf (stderr, "found asynchronous operation still running\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: unknown async \d" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-72.c b/libgomp/testsuite/libgomp.oacc-c/lib-72.c
new file mode 100644
index 0000000..bb84b6f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-72.c
@@ -0,0 +1,128 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+    
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  if (acc_async_test_all () != 0)
+    {
+      fprintf (stderr, "asynchronous operation not running\n");
+      abort ();
+    }
+
+  sleep ((int) (dtime / 1000.f) + 1);
+
+  if (acc_async_test_all () != 1)
+    {
+      fprintf (stderr, "found asynchronous operation still running\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-73.c b/libgomp/testsuite/libgomp.oacc-c/lib-73.c
new file mode 100644
index 0000000..328a8aa
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-73.c
@@ -0,0 +1,141 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  const int N = 10;
+  int i;
+  CUstream streams[N];
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+
+    }
+
+  if (acc_async_test_all () != 0)
+    {
+      fprintf (stderr, "asynchronous operation not running\n");
+      abort ();
+    }
+
+  sleep ((int) (dtime / 1000.0f) + 1);
+
+  if (acc_async_test_all () != 1)
+    {
+      fprintf (stderr, "asynchronous operation not running\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-74.c b/libgomp/testsuite/libgomp.oacc-c/lib-74.c
new file mode 100644
index 0000000..1b70b4b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-74.c
@@ -0,0 +1,146 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  stream = acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  init_timers (1);
+
+  start_timer (0);
+
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  acc_wait (0);
+
+  atime = stop_timer (0);
+
+  if (atime < dtime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  start_timer (0);
+
+  acc_wait (0);
+
+  atime = stop_timer (0);
+
+  if (0.010 < atime)
+    {
+      fprintf (stderr, "actual time too long\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-75.c b/libgomp/testsuite/libgomp.oacc-c/lib-75.c
new file mode 100644
index 0000000..4381b3e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-75.c
@@ -0,0 +1,148 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  int N;
+  int i;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime, hitime, lotime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  N = nprocs;
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  stream = acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  init_timers (1);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  start_timer (0);
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+
+      acc_wait (0);
+    }
+
+  atime = stop_timer (0);
+
+  hitime = dtime * N;
+  hitime += hitime * 0.02;
+
+  lotime = dtime * N;
+  lotime -= lotime * 0.02;
+
+  if (atime > hitime || atime < lotime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-76.c b/libgomp/testsuite/libgomp.oacc-c/lib-76.c
new file mode 100644
index 0000000..b3527e5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-76.c
@@ -0,0 +1,154 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  int N;
+  int i;
+  CUstream *streams;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime, hitime, lotime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  N = nprocs;
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  streams = malloc (N * sizeof (void *));
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  init_timers (1);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  start_timer (0);
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+
+      acc_wait (i);
+    }
+
+  atime = stop_timer (0);
+
+  hitime = dtime * N;
+  hitime += hitime * 0.02;
+
+  lotime = dtime * N;
+  lotime -= lotime * 0.02;
+
+  if (atime > hitime || atime < lotime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (streams);
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-77.c b/libgomp/testsuite/libgomp.oacc-c/lib-77.c
new file mode 100644
index 0000000..acc1b14
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-77.c
@@ -0,0 +1,142 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+  acc_set_cuda_stream (0, stream);
+
+  init_timers (1);
+
+  start_timer (0);
+
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  acc_wait (1);
+
+  atime = stop_timer (0);
+
+  if (atime < dtime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  start_timer (0);
+
+  acc_wait (1);
+
+  atime = stop_timer (0);
+
+  if (0.010 < atime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: unknown async \d" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-78.c b/libgomp/testsuite/libgomp.oacc-c/lib-78.c
new file mode 100644
index 0000000..76c54ca
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-78.c
@@ -0,0 +1,147 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  stream = acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  init_timers (1);
+
+  start_timer (0);
+
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  acc_wait_all ();
+
+  atime = stop_timer (0);
+
+  if (atime < dtime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  start_timer (0);
+
+  acc_wait_all ();
+
+  atime = stop_timer (0);
+
+  if (0.010 < atime)
+    {
+      fprintf (stderr, "actual time too long\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-79.c b/libgomp/testsuite/libgomp.oacc-c/lib-79.c
new file mode 100644
index 0000000..625acb6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-79.c
@@ -0,0 +1,174 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  int N;
+  int i;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime, hitime, lotime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  devnum = 2;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  N = nprocs;
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (1, stream))
+    abort ();
+
+  stream = acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  init_timers (1);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  start_timer (0);
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+    }
+
+  acc_wait_async (0, 1);
+
+  if (acc_async_test (0) != 0)
+    abort ();
+
+  if (acc_async_test (1) != 0)
+    abort ();
+
+  acc_wait (1);
+
+  atime = stop_timer (0);
+
+  if (acc_async_test (0) != 1)
+    abort ();
+
+  if (acc_async_test (1) != 1)
+    abort ();
+
+  hitime = dtime * N;
+  hitime += hitime * 0.02;
+
+  lotime = dtime * N;
+  lotime -= lotime * 0.02;
+
+  if (atime > hitime || atime < lotime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-80.c b/libgomp/testsuite/libgomp.oacc-c/lib-80.c
new file mode 100644
index 0000000..0c284de
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-80.c
@@ -0,0 +1,139 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  int N;
+  int i;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  N = nprocs;
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+  acc_set_cuda_stream (1, stream);
+
+  init_timers (1);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  start_timer (0);
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+    }
+
+  acc_wait_async (1, 1);
+
+  acc_wait (1);
+
+  atime = stop_timer (0);
+
+  if (atime < dtime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: identical parameters" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-81.c b/libgomp/testsuite/libgomp.oacc-c/lib-81.c
new file mode 100644
index 0000000..466eac4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-81.c
@@ -0,0 +1,218 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  int N;
+  int i;
+  CUstream *streams, stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 500.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  N = nprocs;
+
+  a = malloc (nbytes);
+  d_a = acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  streams = malloc (N * sizeof (void *));
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  init_timers (1);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  stream = acc_get_cuda_stream (N);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (N, stream))
+    abort ();
+
+  start_timer (0);
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+    }
+
+  acc_wait_all_async (N);
+
+  for (i = 0; i <= N; i++)
+    {
+      if (acc_async_test (i) != 0)
+	abort ();
+    }
+
+  acc_wait (N);
+
+  for (i = 0; i <= N; i++)
+    {
+      if (acc_async_test (i) != 1)
+	abort ();
+    }
+
+  atime = stop_timer (0);
+
+  if (atime < dtime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  start_timer (0);
+
+  stream = acc_get_cuda_stream (N + 1);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (N + 1, stream))
+    abort ();
+
+  acc_wait_all_async (N + 1);
+
+  acc_wait (N + 1);
+
+  atime = stop_timer (0);
+
+  if (0.10 < atime)
+    {
+      fprintf (stderr, "actual time too long\n");
+      abort ();
+    }
+
+  start_timer (0);
+
+  acc_wait_all_async (N);
+
+  acc_wait (N);
+
+  atime = stop_timer (0);
+
+  if (0.10 < atime)
+    {
+      fprintf (stderr, "actual time too long\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (streams);
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-82.c b/libgomp/testsuite/libgomp.oacc-c/lib-82.c
new file mode 100644
index 0000000..1064c37
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-82.c
@@ -0,0 +1,151 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay, sum;
+  CUmodule module;
+  CUresult r;
+  int N;
+  int i;
+  CUstream *streams;
+  unsigned long **a, **d_a, *tid, ticks;
+  int nbytes;
+  void *kargs[3];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay2");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&sum, module, "sum");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = sizeof (int);
+
+  ticks = (unsigned long) (200.0 * clkrate);
+
+  N = nprocs;
+
+  streams = malloc (N * sizeof (void *));
+
+  a = malloc (N * sizeof (unsigned long *));
+  d_a = malloc (N * sizeof (unsigned long *));
+  tid = malloc (N * sizeof (unsigned long));
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = malloc (sizeof (unsigned long));
+      *a[i] = N;
+      d_a[i] = acc_malloc (nbytes);
+      tid[i] = i;
+
+      acc_map_data (a[i], d_a[i], nbytes);
+
+      streams[i] = acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+        abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+        {
+          fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+          abort ();
+        }
+
+       if (!acc_set_cuda_stream (i, streams[i]))
+        abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      kargs[0] = (void *) &d_a[i];
+      kargs[1] = (void *) &ticks;
+      kargs[2] = (void *) &tid[i];
+
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+
+      ticks = (unsigned long) (50.0 * clkrate);
+    }
+
+  acc_wait_all_async (0);
+
+  for (i = 0; i < N; i++)
+    {
+      acc_copyout (a[i], nbytes);
+      if (*a[i] != i)
+	abort ();
+    }
+
+  free (streams);
+
+  for (i = 0; i < N; i++)
+    {
+      free (a[i]);
+    }
+
+  free (a);
+  free (d_a);
+  free (tid);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-83.c b/libgomp/testsuite/libgomp.oacc-c/lib-83.c
new file mode 100644
index 0000000..4b5076b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-83.c
@@ -0,0 +1,58 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  float atime;
+  CUstream stream;
+  CUresult r;
+
+  acc_init (acc_device_nvidia);
+
+  (void) acc_get_device_num (acc_device_nvidia);
+
+  init_timers (1);
+
+  stream = acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  start_timer (0);
+
+  acc_wait_all_async (0);
+
+  acc_wait (0);
+
+  atime = stop_timer (0);
+
+  if (0.010 < atime)
+    {
+      fprintf (stderr, "actual time too long\n");
+      abort ();
+    }
+
+  fini_timers ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-84.c b/libgomp/testsuite/libgomp.oacc-c/lib-84.c
new file mode 100644
index 0000000..6c3469b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-84.c
@@ -0,0 +1,66 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 100;
+  int i;
+  CUstream *streams;
+  CUstream s;
+  CUresult r;
+
+  acc_init (acc_device_nvidia);
+
+  (void) acc_get_device_num (acc_device_nvidia);
+
+  streams = malloc (N * sizeof (void *));
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+      int cnt;
+
+      cnt = 0;
+
+      s = streams[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  if (s == streams[j])
+	    cnt++;
+	}
+
+      if (cnt != 1)
+	abort ();
+    }
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-85.c b/libgomp/testsuite/libgomp.oacc-c/lib-85.c
new file mode 100644
index 0000000..b35b195
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-85.c
@@ -0,0 +1,52 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <stdio.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 100;
+  int i;
+  CUstream *streams;
+  CUstream s;
+  CUresult r;
+
+  acc_init (acc_device_nvidia);
+
+  (void) acc_get_device_num (acc_device_nvidia);
+
+  streams = malloc (N * sizeof (void *));
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  s = NULL;
+
+  if (acc_set_cuda_stream (N + 1, s) != 0)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-86.c b/libgomp/testsuite/libgomp.oacc-c/lib-86.c
new file mode 100644
index 0000000..b8a8ee9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-86.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  if (acc_get_current_cuda_device () != 0)
+    abort ();
+
+  acc_init (acc_device_host);
+
+  if (acc_get_current_cuda_device () != 0)
+    abort ();
+
+  acc_shutdown (acc_device_host);
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  if (acc_get_current_cuda_device () != 0)
+    abort ();
+
+  acc_init (acc_device_nvidia);
+
+  if (acc_get_current_cuda_device () == 0)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  if (acc_get_current_cuda_device () != 0)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-87.c b/libgomp/testsuite/libgomp.oacc-c/lib-87.c
new file mode 100644
index 0000000..147d443
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-87.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  if (acc_get_current_cuda_context () != 0)
+    abort ();
+
+  acc_init (acc_device_host);
+
+  if (acc_get_current_cuda_context () != 0)
+    abort ();
+
+  acc_shutdown (acc_device_host);
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  if (acc_get_current_cuda_context () != 0)
+    abort ();
+
+  acc_init (acc_device_nvidia);
+
+  if (acc_get_current_cuda_context () == 0)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  if (acc_get_current_cuda_context () != 0)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-88.c b/libgomp/testsuite/libgomp.oacc-c/lib-88.c
new file mode 100644
index 0000000..3af4f54
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-88.c
@@ -0,0 +1,111 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char *x;
+void *d_x;
+const int N = 256;
+
+static void *
+test (void *arg)
+{
+  int i;
+
+  if (acc_get_current_cuda_context () != NULL)
+    abort ();
+
+  if (acc_is_present (x, N) != 1)
+    abort ();
+
+  memset (x, 0, N);
+
+  acc_copyout (x, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (x[i] != i)
+	abort ();
+
+      x[i] = N - i - 1;
+    }
+
+  d_x = acc_copyin (x, N);
+
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  const int nthreads = 1;
+  int i;
+  pthread_attr_t attr;
+  pthread_t *tid;
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  acc_init (acc_device_nvidia);
+
+  x = malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      x[i] = i;
+    }
+
+  d_x = acc_copyin (x, N);
+
+  if (acc_is_present (x, N) != 1)
+    abort ();
+
+  if (pthread_attr_init (&attr) != 0)
+    perror ("pthread_attr_init failed");
+
+  tid = (pthread_t *) malloc (nthreads * sizeof (pthread_t));
+
+  for (i = 0; i < nthreads; i++)
+    {
+      if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+	  != 0)
+	perror ("pthread_create failed");
+    }
+
+  if (pthread_attr_destroy (&attr) != 0)
+    perror ("pthread_attr_destroy failed");
+
+  for (i = 0; i < nthreads; i++)
+    {
+      void *res;
+
+      if (pthread_join (tid[i], &res) != 0)
+	perror ("pthread join failed");
+    }
+
+  if (acc_is_present (x, N) != 1)
+    abort ();
+
+  memset (x, 0, N);
+
+  acc_copyout (x, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (x[i] != N - i - 1)
+	abort ();
+    }
+
+  if (acc_is_present (x, N) != 0)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-89.c b/libgomp/testsuite/libgomp.oacc-c/lib-89.c
new file mode 100644
index 0000000..4cc97ce
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-89.c
@@ -0,0 +1,118 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 16;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+  int i;
+  int tid;
+  unsigned char *p;
+  int devnum;
+
+  tid = (int) (long) arg;
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+  acc_set_device_num (devnum, acc_device_nvidia);
+
+  if (acc_get_current_cuda_context () == NULL)
+    abort ();
+
+  p = malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      p[i] = tid;
+    }
+
+  x[tid] = p;
+
+  d_x[tid] = acc_copyin (p, N);
+
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  pthread_attr_t attr;
+  pthread_t *tid;
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  acc_init (acc_device_nvidia);
+
+  x = malloc (NTHREADS * N);
+  d_x = malloc (NTHREADS * N);
+
+  if (pthread_attr_init (&attr) != 0)
+    perror ("pthread_attr_init failed");
+
+  tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+	  != 0)
+	perror ("pthread_create failed");
+    }
+
+  if (pthread_attr_destroy (&attr) != 0)
+    perror ("pthread_attr_destroy failed");
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      void *res;
+
+      if (pthread_join (tid[i], &res) != 0)
+	perror ("pthread join failed");
+    }
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (acc_is_present (x[i], N) != 1)
+	abort ();
+    }
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      memset (x[i], 0, N);
+      acc_copyout (x[i], N);
+    }
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      unsigned char *p;
+      int j;
+
+      p = x[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  if (p[j] != i)
+	    abort ();
+	}
+
+      if (acc_is_present (x[i], N) != 0)
+	abort ();
+    }
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-9.c b/libgomp/testsuite/libgomp.oacc-c/lib-9.c
new file mode 100644
index 0000000..c2146ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-9.c
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int num_devices;
+  int devnum;
+  acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+  devtype = acc_device_nvidia;
+#endif
+
+  num_devices = acc_get_num_devices (devtype);
+  if (num_devices == 0)
+    return 0;
+
+  acc_init (devtype);
+
+  for (i = 0; i < num_devices; i++)
+    {
+      acc_set_device_num (i, devtype);
+      devnum = acc_get_device_num (devtype);
+      if (devnum != i)
+	abort ();
+    }
+
+  acc_shutdown (devtype);
+
+  num_devices = acc_get_num_devices (devtype);
+  if (num_devices == 0)
+    abort ();
+
+  for (i = 0; i < num_devices; i++)
+    {
+      acc_set_device_num (i, devtype);
+      devnum = acc_get_device_num (devtype);
+      if (devnum != i)
+	abort ();
+    }
+
+  acc_shutdown (devtype);
+
+  acc_init (devtype);
+
+  acc_set_device_num (0, devtype);
+
+  devnum = acc_get_device_num (devtype);
+  if (devnum != 0)
+    abort();
+
+  if (num_devices > 1)
+    {
+      acc_set_device_num (1, 0);
+
+      devnum = acc_get_device_num (devtype);
+      if (devnum != 1)
+	abort();
+  }
+
+  acc_shutdown (devtype);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-90.c b/libgomp/testsuite/libgomp.oacc-c/lib-90.c
new file mode 100644
index 0000000..664bb59
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-90.c
@@ -0,0 +1,137 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+#include <cuda.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 16;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+  int i;
+  int tid;
+  unsigned char *p;
+  int devnum;
+
+  tid = (int) (long) arg;
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+  acc_set_device_num (devnum, acc_device_nvidia);
+
+  if (acc_get_current_cuda_context () == NULL)
+    abort ();
+
+  p = malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      p[i] = tid;
+    }
+
+  x[tid] = p;
+
+  d_x[tid] = acc_copyin (p, N);
+
+  acc_wait_all ();
+
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  pthread_attr_t attr;
+  pthread_t *tid;
+  CUresult r;
+  CUstream s;
+
+  acc_init (acc_device_nvidia);
+
+  x = malloc (NTHREADS * N);
+  d_x = malloc (NTHREADS * N);
+
+  if (pthread_attr_init (&attr) != 0)
+    perror ("pthread_attr_init failed");
+
+  tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+  r = cuStreamCreate (&s, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+  if (!acc_set_cuda_stream (0, s))
+	  abort ();
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+	  != 0)
+	perror ("pthread_create failed");
+    }
+
+  if (pthread_attr_destroy (&attr) != 0)
+    perror ("pthread_attr_destroy failed");
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      void *res;
+
+      if (pthread_join (tid[i], &res) != 0)
+	perror ("pthread join failed");
+    }
+
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (acc_is_present (x[i], N) != 1)
+	abort ();
+    }
+
+  acc_get_cuda_stream (1);
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      memset (x[i], 0, N);
+      acc_copyout (x[i], N);
+    }
+
+  acc_wait_all ();
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      unsigned char *p;
+      int j;
+
+      p = x[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  if (p[j] != i)
+	    abort ();
+	}
+
+      if (acc_is_present (x[i], N) != 0)
+	abort ();
+    }
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-91.c b/libgomp/testsuite/libgomp.oacc-c/lib-91.c
new file mode 100644
index 0000000..e00ef4f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-91.c
@@ -0,0 +1,84 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <sys/time.h>
+#include <stdio.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 1024 * 1024;
+  int i;
+  unsigned char *h;
+  void *d;
+  float async, sync;
+  struct timeval start, stop;
+  CUresult r;
+  CUstream s;
+
+  acc_init (acc_device_nvidia);
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  gettimeofday (&start, NULL);
+
+  for (i = 0; i < 100; i++)
+    {
+#pragma acc update device(h[0:N])
+    }
+
+  gettimeofday (&stop, NULL);
+
+  sync = (float) (stop.tv_sec - start.tv_sec);
+  sync += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0);
+
+  gettimeofday (&start, NULL);
+
+  r = cuStreamCreate (&s, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+  if (!acc_set_cuda_stream (0, s))
+	  abort ();
+
+  for (i = 0; i < 100; i++)
+    {
+#pragma acc update device(h[0:N]) async(0)
+    }
+
+  acc_wait_all ();
+
+  gettimeofday (&stop, NULL);
+
+  async = (float) (stop.tv_sec - start.tv_sec);
+  async += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0);
+
+  if (async > (sync * 1.5))
+    abort ();
+
+  acc_free (d);
+
+  free (h);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-92.c b/libgomp/testsuite/libgomp.oacc-c/lib-92.c
new file mode 100644
index 0000000..0756b29
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-92.c
@@ -0,0 +1,111 @@
+/* { dg-do run } */
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 32;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+  int i;
+  int tid;
+  unsigned char *p;
+  int devnum;
+
+  tid = (int) (long) arg;
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+  acc_set_device_num (devnum, acc_device_nvidia);
+
+  if (acc_get_current_cuda_context () == NULL)
+    abort ();
+
+  acc_copyout (x[tid], N);
+
+  p = x[tid];
+
+  for (i = 0; i < N; i++)
+    {
+      if (p[i] != i)
+	abort ();
+    }
+
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  pthread_attr_t attr;
+  pthread_t *tid;
+  unsigned char *p;
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  acc_init (acc_device_nvidia);
+
+  x = malloc (NTHREADS * N);
+  d_x = malloc (NTHREADS * N);
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+
+      p = malloc (N);
+
+      x[i] = p;
+
+      for (j = 0; j < N; j++)
+	{
+	  p[j] = j;
+	}
+
+      d_x[i] = acc_copyin (p, N);
+    }
+
+  if (pthread_attr_init (&attr) != 0)
+    perror ("pthread_attr_init failed");
+
+  tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+  acc_get_cuda_stream (1);
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+	  != 0)
+	perror ("pthread_create failed");
+    }
+
+  if (pthread_attr_destroy (&attr) != 0)
+    perror ("pthread_attr_destroy failed");
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      void *res;
+
+      if (pthread_join (tid[i], &res) != 0)
+	perror ("pthread join failed");
+    }
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (acc_is_present (x[i], N) != 0)
+	abort ();
+    }
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-1.c b/libgomp/testsuite/libgomp.oacc-c/nested-1.c
new file mode 100644
index 0000000..577ef8c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/nested-1.c
@@ -0,0 +1,680 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b, *c, *d;
+    int i;
+
+    a = (float *) malloc(N * sizeof (float));
+    b = (float *) malloc(N * sizeof (float));
+    c = (float *) malloc(N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 3.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 5.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+    d = acc_copyin (&a[0], N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc data present_or_copyin(a[0:N]) copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    acc_free(d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copyin(a[0:N]) present_or_copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 2.0;
+    }
+
+    d = acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc data copyin(a[0:N]) present_or_copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort();
+
+        if (b[i] != 2.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    acc_free (d);
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 4.0;
+    }
+
+#pragma acc data copy(a[0:N]) copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                a[ii] = a[ii] + 1;
+                b[ii] = a[ii] + 2;
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort();
+
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc data present_or_copy(a[0:N]) present_or_copy(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                a[ii] = a[ii] + 1;
+                b[ii] = b[ii] + 2;
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort();
+
+        if (b[i] != 9.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 7.0;
+    }
+
+    d = acc_copyin (&a[0], N * sizeof (float));
+    d = acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc data present_or_copy(a[0:N]) present_or_copy(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                a[ii] = a[ii] + 1;
+                b[ii] = b[ii] + 2;
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 7.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    d = acc_deviceptr(&a[0]);
+    acc_unmap_data (&a[0]);
+    acc_free (d);
+
+    d = acc_deviceptr(&b[0]);
+    acc_unmap_data (&b[0]);
+    acc_free (d);
+
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc data copyin(a[0:N]) create(c[0:N]) copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 3.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 8.0;
+    }
+
+#pragma acc data copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort();
+
+        if (b[i] != 4.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 5.0;
+    }
+
+    d = acc_malloc (N * sizeof (float));
+    acc_map_data(c, d, N * sizeof (float));
+
+#pragma acc data copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort();
+
+        if (b[i] != 2.0)
+            abort();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort();
+
+    d = acc_deviceptr (c);
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 8.0;
+    }
+
+    d = acc_malloc(N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc data copyin(a[0:N]) present(c[0:N]) copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort();
+
+        if (b[i] != 4.0)
+            abort();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort();
+
+    acc_unmap_data (c);
+
+    if (acc_is_present (c, (N * sizeof (float))))
+      abort();
+
+    acc_free (d);
+
+    d = acc_malloc(N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort();
+
+    d = acc_malloc(N * sizeof (float));
+    acc_map_data (b, d, N * sizeof (float));
+
+    if (!acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    d = acc_malloc(N * sizeof (float));
+    acc_map_data (a, d, N * sizeof (float));
+
+    if (!acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+#pragma acc data present(a[0:N]) present(c[0:N]) present(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                a[ii] = 1.0;
+                c[ii] = 2.0;
+                b[ii] = 4.0;
+            }
+        }
+    }
+
+    if (!acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort();
+
+    acc_copyout (b, N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort();
+
+        if (b[i] != 4.0)
+            abort();
+    }
+
+    d = acc_deviceptr(a);
+
+    acc_unmap_data (a);
+
+    acc_free (d);
+
+    d = acc_deviceptr (c);
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 6.0;
+    }
+
+    d = acc_malloc(N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) deviceptr(d) copyout(b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            d[ii] = a[ii];
+            b[ii] = d[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 3.0)
+            abort();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+    d = acc_copyin (&a[0], N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc data pcopyin(a[0:N]) copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    acc_free(d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copyin(a[0:N]) pcopyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc data copyin(a[0:N]) pcreate(c[0:N]) copyout(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort();
+
+        if (b[i] != 5.0)
+            abort();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort();
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-2.c b/libgomp/testsuite/libgomp.oacc-c/nested-2.c
new file mode 100644
index 0000000..6975467
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/nested-2.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+
+extern void abort(void);
+
+int
+main(int argc, char *argv[])
+{
+#define N 10
+  char a[N];
+
+  {
+    int i;
+    for (i = 0; i < N; ++i)
+      a[i] = 0;
+  }
+
+#pragma acc data copyout(a)
+  {
+#pragma acc parallel /* will result in a "dummy frame" */ present(a)
+    {
+      int i;
+      for (i = 0; i < N; ++i)
+	a[i] = i;
+    }
+  }
+
+  {
+    int i;
+    for (i = 0; i < N; ++i)
+      if (a[i] != i)
+	abort();
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/offset-1.c b/libgomp/testsuite/libgomp.oacc-c/offset-1.c
new file mode 100644
index 0000000..0bae23a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/offset-1.c
@@ -0,0 +1,97 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b;
+    int i;
+
+    a = (float *) malloc(N * sizeof (float));
+    b = (float *) malloc(N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 5.0;
+    }
+
+#pragma acc parallel copyin(a[2:4]) copyout(b[2:4])
+    {
+        b[2] = a[2];
+        b[3] = a[3];
+    }
+
+    for (i = 2; i < 4; i++)
+    {
+        if (a[i] != 2.0)
+            abort();
+
+        if (b[i] != 2.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc parallel copyin(a[0:4]) copyout(b[0:4])
+    {
+        b[0] = a[0];
+        b[1] = a[1];
+        b[2] = a[2];
+        b[3] = a[3];
+    }
+
+    for (i = 0; i < 4; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 3.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+        b[i] = 6.0;
+    }
+
+#pragma acc parallel copyin(a[0:4]) copyout(b[4:4])
+    {
+        b[4] = a[0];
+        b[5] = a[1];
+        b[6] = a[2];
+        b[7] = a[3];
+    }
+
+    for (i = 0; i < 4; i++)
+    {
+        if (a[i] != 9.0)
+            abort();
+    }
+
+    for (i = 4; i < 8; i++)
+    {
+        if (b[i] != 9.0)
+            abort();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/parallel-1.c b/libgomp/testsuite/libgomp.oacc-c/parallel-1.c
new file mode 100644
index 0000000..fd9df33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/parallel-1.c
@@ -0,0 +1,206 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int i;
+
+int main(void)
+{
+  int j, v;
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) copyin (i, j)
+  {
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+#if ACC_MEM_SHARED
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#else
+  if (v != 1 || i != -1 || j != -2)
+    abort ();
+#endif
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) copyout (i, j)
+  {
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) copy (i, j)
+  {
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) create (i, j)
+  {
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+#if ACC_MEM_SHARED
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#else
+  if (v != 1 || i != -1 || j != -2)
+    abort ();
+#endif
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
+  {
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1)
+    abort ();
+#if ACC_MEM_SHARED
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#else
+  if (v != 1 || i != -1 || j != -2)
+    abort ();
+#endif
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
+  {
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copy (i, j)
+  {
+    if (i != -1 || j != -2)
+      abort ();
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+
+  i = -1;
+  j = -2;
+  v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_create (i, j)
+  {
+    i = 2;
+    j = 1;
+    if (i != 2 || j != 1)
+      abort ();
+    v = 1;
+  }
+  if (v != 1)
+    abort ();
+#if ACC_MEM_SHARED
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#else
+  if (v != 1 || i != -1 || j != -2)
+    abort ();
+#endif
+
+  i = -1;
+  j = -2;
+  v = 0;
+
+#pragma acc data copyin (i, j)
+  {
+#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j)
+    {
+      if (i != -1 || j != -2)
+        abort ();
+      i = 2;
+      j = 1;
+      if (i != 2 || j != 1)
+        abort ();
+      v = 1;
+    }
+  }
+#if ACC_MEM_SHARED
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#else
+  if (v != 1 || i != -1 || j != -2)
+    abort ();
+#endif
+
+  i = -1;
+  j = -2;
+  v = 0;
+
+#pragma acc data copyin(i, j)
+  {
+#pragma acc parallel /* copyout */ present_or_copyout (v)
+    {
+      if (i != -1 || j != -2)
+        abort ();
+      i = 2;
+      j = 1;
+      if (i != 2 || j != 1)
+        abort ();
+      v = 1;
+    }
+  }
+#if ACC_MEM_SHARED
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#else
+  if (v != 1 || i != -1 || j != -2)
+    abort ();
+#endif
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c b/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c
new file mode 100644
index 0000000..6c410d0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+    int N = 5;
+    int a[N];
+
+    a[0] = 10;
+    a[1] = 10;
+    a[2] = 10;
+    a[3] = 10;
+    a[4] = 10;
+
+    acc_init(acc_device_nvidia);
+
+#pragma acc parallel copy(a[0:N])
+    {
+        a[0] = 5;
+        a[1] = 5;
+        a[2] = 5;
+        a[3] = 5;
+        a[4] = 5;
+    }
+
+    acc_shutdown(acc_device_nvidia);
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/present-1.c b/libgomp/testsuite/libgomp.oacc-c/present-1.c
new file mode 100644
index 0000000..0e284ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/present-1.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b, *c, *d;
+    int i;
+
+    a = (float *) malloc(N * sizeof (float));
+    b = (float *) malloc(N * sizeof (float));
+    c = (float *) malloc(N * sizeof (float));
+
+    d = acc_malloc(N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc data present(a[0:N]) present(c[0:N]) present(b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    d = acc_deviceptr (c);
+    acc_unmap_data (c);
+    acc_free (d);
+
+    free (a);
+    free (b);
+    free (c);
+
+    return 0;
+}
+/* { dg-shouldfail "libgomp: present clause: !acc_is_present" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/present-2.c b/libgomp/testsuite/libgomp.oacc-c/present-2.c
new file mode 100644
index 0000000..41efa70
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/present-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+  int N = 8;
+  float *a, *b;
+  int i;
+
+  a = (float *) malloc (N * sizeof (float));
+  b = (float *) malloc (N * sizeof (float));
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = 4.0;
+      b[i] = 0.0;
+    }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N])
+  {
+
+#pragma acc parallel present(a[0:N])
+    {
+      int ii;
+
+      for (ii = 0; ii < N; ii++)
+	{
+	  b[ii] = a[ii];
+	}
+    }
+
+  }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i] != 4.0)
+	abort ();
+
+      if (b[i] != 4.0)
+	abort ();
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/reduction-1.c b/libgomp/testsuite/libgomp.oacc-c/reduction-1.c
new file mode 100644
index 0000000..0aa02ca
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/reduction-1.c
@@ -0,0 +1,186 @@
+/* { dg-do run } */
+/* TODO:
+   <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+   { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */
+
+/* Integer reductions.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define vl 32
+
+int
+main(void)
+{
+  const int n = 1000;
+  int i;
+  int vresult, result, array[n];
+  bool lvresult, lresult;
+
+  for (i = 0; i < n; i++)
+    array[i] = i;
+
+  result = 0;
+  vresult = 0;
+
+  /* '+' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+  for (i = 0; i < n; i++)
+    result += array[i];
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult += array[i];
+
+  if (result != vresult)
+    abort ();
+
+  result = 0;
+  vresult = 0;
+
+  /* '*' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (*:result)
+  for (i = 0; i < n; i++)
+    result *= array[i];
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult *= array[i];
+
+  if (result != vresult)
+    abort ();
+
+//   result = 0;
+//   vresult = 0;
+// 
+//   /* 'max' reductions.  */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+//   for (i = 0; i < n; i++)
+//       result = result > array[i] ? result : array[i];
+// #pragma acc end parallel
+// 
+//   /* Verify the reduction.  */
+//   for (i = 0; i < n; i++)
+//       vresult = vresult > array[i] ? vresult : array[i];
+// 
+//   printf("%d != %d\n", result, vresult);
+//   if (result != vresult)
+//     abort ();
+// 
+//   result = 0;
+//   vresult = 0;
+// 
+//   /* 'min' reductions.  */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+//   for (i = 0; i < n; i++)
+//       result = result < array[i] ? result : array[i];
+// #pragma acc end parallel
+// 
+//   /* Verify the reduction.  */
+//   for (i = 0; i < n; i++)
+//       vresult = vresult < array[i] ? vresult : array[i];
+// 
+//   printf("%d != %d\n", result, vresult);
+//   if (result != vresult)
+//     abort ();
+
+  result = 0;
+  vresult = 0;
+
+  /* '&' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&:result)
+  for (i = 0; i < n; i++)
+    result &= array[i];
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult &= array[i];
+
+  if (result != vresult)
+    abort ();
+
+  result = 0;
+  vresult = 0;
+
+  /* '|' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (|:result)
+  for (i = 0; i < n; i++)
+    result |= array[i];
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult |= array[i];
+
+  if (result != vresult)
+    abort ();
+
+  result = 0;
+  vresult = 0;
+
+  /* '^' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (^:result)
+  for (i = 0; i < n; i++)
+    result ^= array[i];
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult ^= array[i];
+
+  if (result != vresult)
+    abort ();
+
+  result = 5;
+  vresult = 5;
+
+  lresult = false;
+  lvresult = false;
+
+  /* '&&' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+  for (i = 0; i < n; i++)
+    lresult = lresult && (result > array[i]);
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    lvresult = lresult && (result > array[i]);
+
+  if (lresult != lvresult)
+    abort ();
+
+  result = 5;
+  vresult = 5;
+
+  lresult = false;
+  lvresult = false;
+
+  /* '||' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+  for (i = 0; i < n; i++)
+    lresult = lresult || (result > array[i]);
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    lvresult = lresult || (result > array[i]);
+
+  if (lresult != lvresult)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/reduction-2.c b/libgomp/testsuite/libgomp.oacc-c/reduction-2.c
new file mode 100644
index 0000000..cac6ce4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/reduction-2.c
@@ -0,0 +1,135 @@
+/* { dg-do run } */
+/* TODO:
+   <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+   { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */
+
+/* float reductions.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+
+#define vl 32
+
+int
+main(void)
+{
+  const int n = 1000;
+  int i;
+  float vresult, result, array[n];
+  bool lvresult, lresult;
+
+  for (i = 0; i < n; i++)
+    array[i] = i;
+
+  result = 0;
+  vresult = 0;
+
+  /* '+' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+  for (i = 0; i < n; i++)
+    result += array[i];
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult += array[i];
+
+  if (result != vresult)
+    abort ();
+
+  result = 0;
+  vresult = 0;
+
+  /* '*' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (*:result)
+  for (i = 0; i < n; i++)
+    result *= array[i];
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult *= array[i];
+
+  if (fabs(result - vresult) > .0001)
+    abort ();
+//   result = 0;
+//   vresult = 0;
+// 
+//   /* 'max' reductions.  */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+//   for (i = 0; i < n; i++)
+//       result = result > array[i] ? result : array[i];
+// #pragma acc end parallel
+// 
+//   /* Verify the reduction.  */
+//   for (i = 0; i < n; i++)
+//       vresult = vresult > array[i] ? vresult : array[i];
+// 
+//   printf("%d != %d\n", result, vresult);
+//   if (result != vresult)
+//     abort ();
+// 
+//   result = 0;
+//   vresult = 0;
+// 
+//   /* 'min' reductions.  */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+//   for (i = 0; i < n; i++)
+//       result = result < array[i] ? result : array[i];
+// #pragma acc end parallel
+// 
+//   /* Verify the reduction.  */
+//   for (i = 0; i < n; i++)
+//       vresult = vresult < array[i] ? vresult : array[i];
+// 
+//   printf("%d != %d\n", result, vresult);
+//   if (result != vresult)
+//     abort ();
+
+  result = 5;
+  vresult = 5;
+
+  lresult = false;
+  lvresult = false;
+
+  /* '&&' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+  for (i = 0; i < n; i++)
+    lresult = lresult && (result > array[i]);
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    lvresult = lresult && (result > array[i]);
+
+  if (lresult != lvresult)
+    abort ();
+
+  result = 5;
+  vresult = 5;
+
+  lresult = false;
+  lvresult = false;
+
+  /* '||' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+  for (i = 0; i < n; i++)
+    lresult = lresult || (result > array[i]);
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    lvresult = lresult || (result > array[i]);
+
+  if (lresult != lvresult)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/reduction-3.c b/libgomp/testsuite/libgomp.oacc-c/reduction-3.c
new file mode 100644
index 0000000..9bcd7fe
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/reduction-3.c
@@ -0,0 +1,135 @@
+/* { dg-do run } */
+/* TODO:
+   <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+   { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */
+
+/* double reductions.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+
+#define vl 32
+
+int
+main(void)
+{
+  const int n = 1000;
+  int i;
+  double vresult, result, array[n];
+  bool lvresult, lresult;
+
+  for (i = 0; i < n; i++)
+    array[i] = i;
+
+  result = 0;
+  vresult = 0;
+
+  /* '+' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+  for (i = 0; i < n; i++)
+    result += array[i];
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult += array[i];
+
+  if (result != vresult)
+    abort ();
+
+  result = 0;
+  vresult = 0;
+
+  /* '*' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (*:result)
+  for (i = 0; i < n; i++)
+    result *= array[i];
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult *= array[i];
+
+  if (fabs(result - vresult) > .0001)
+    abort ();
+//   result = 0;
+//   vresult = 0;
+// 
+//   /* 'max' reductions.  */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+//   for (i = 0; i < n; i++)
+//       result = result > array[i] ? result : array[i];
+// #pragma acc end parallel
+// 
+//   /* Verify the reduction.  */
+//   for (i = 0; i < n; i++)
+//       vresult = vresult > array[i] ? vresult : array[i];
+// 
+//   printf("%d != %d\n", result, vresult);
+//   if (result != vresult)
+//     abort ();
+// 
+//   result = 0;
+//   vresult = 0;
+// 
+//   /* 'min' reductions.  */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+//   for (i = 0; i < n; i++)
+//       result = result < array[i] ? result : array[i];
+// #pragma acc end parallel
+// 
+//   /* Verify the reduction.  */
+//   for (i = 0; i < n; i++)
+//       vresult = vresult < array[i] ? vresult : array[i];
+// 
+//   printf("%d != %d\n", result, vresult);
+//   if (result != vresult)
+//     abort ();
+
+  result = 5;
+  vresult = 5;
+
+  lresult = false;
+  lvresult = false;
+
+  /* '&&' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+  for (i = 0; i < n; i++)
+    lresult = lresult && (result > array[i]);
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    lvresult = lresult && (result > array[i]);
+
+  if (lresult != lvresult)
+    abort ();
+
+  result = 5;
+  vresult = 5;
+
+  lresult = false;
+  lvresult = false;
+
+  /* '||' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+  for (i = 0; i < n; i++)
+    lresult = lresult || (result > array[i]);
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    lvresult = lresult || (result > array[i]);
+
+  if (lresult != lvresult)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/reduction-4.c b/libgomp/testsuite/libgomp.oacc-c/reduction-4.c
new file mode 100644
index 0000000..6cebcf5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/reduction-4.c
@@ -0,0 +1,138 @@
+/* { dg-do run } */
+/* TODO:
+   <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+   { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */
+
+/* complex reductions.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+#include <complex.h>
+
+#define vl 32
+
+int
+main(void)
+{
+  const int n = 1000;
+  int i;
+  double complex vresult, result, array[n];
+  bool lvresult, lresult;
+
+  for (i = 0; i < n; i++)
+    array[i] = i;
+
+  result = 0;
+  vresult = 0;
+
+  /* '+' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+  for (i = 0; i < n; i++)
+    result += array[i];
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult += array[i];
+
+  if (result != vresult)
+    abort ();
+
+  result = 0;
+  vresult = 0;
+
+  /* Needs support for complex multiplication.  */
+
+//   /* '*' reductions.  */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (*:result)
+//   for (i = 0; i < n; i++)
+//     result *= array[i];
+// #pragma acc end parallel
+// 
+//   /* Verify the reduction.  */
+//   for (i = 0; i < n; i++)
+//     vresult *= array[i];
+// 
+//   if (fabs(result - vresult) > .0001)
+//     abort ();
+//   result = 0;
+//   vresult = 0;
+
+//   /* 'max' reductions.  */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+//   for (i = 0; i < n; i++)
+//       result = result > array[i] ? result : array[i];
+// #pragma acc end parallel
+// 
+//   /* Verify the reduction.  */
+//   for (i = 0; i < n; i++)
+//       vresult = vresult > array[i] ? vresult : array[i];
+// 
+//   printf("%d != %d\n", result, vresult);
+//   if (result != vresult)
+//     abort ();
+// 
+//   result = 0;
+//   vresult = 0;
+// 
+//   /* 'min' reductions.  */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+//   for (i = 0; i < n; i++)
+//       result = result < array[i] ? result : array[i];
+// #pragma acc end parallel
+// 
+//   /* Verify the reduction.  */
+//   for (i = 0; i < n; i++)
+//       vresult = vresult < array[i] ? vresult : array[i];
+// 
+//   printf("%d != %d\n", result, vresult);
+//   if (result != vresult)
+//     abort ();
+
+  result = 5;
+  vresult = 5;
+
+  lresult = false;
+  lvresult = false;
+
+  /* '&&' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+  for (i = 0; i < n; i++)
+    lresult = lresult && (creal(result) > creal(array[i]));
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    lvresult = lresult && (creal(result) > creal(array[i]));
+
+  if (lresult != lvresult)
+    abort ();
+
+  result = 5;
+  vresult = 5;
+
+  lresult = false;
+  lvresult = false;
+
+  /* '||' reductions.  */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+  for (i = 0; i < n; i++)
+    lresult = lresult || (creal(result) > creal(array[i]));
+#pragma acc end parallel
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    lvresult = lresult || (creal(result) > creal(array[i]));
+
+  if (lresult != lvresult)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/subr.cu b/libgomp/testsuite/libgomp.oacc-c/subr.cu
new file mode 100644
index 0000000..e86e0fc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/subr.cu
@@ -0,0 +1,64 @@
+
+extern "C" __global__ void
+delay (clock_t * d_o, clock_t delay)
+{
+  clock_t start, ticks;
+
+  start = clock ();
+
+  ticks = 0;
+
+  while (ticks < delay)
+    ticks = clock () - start;
+}
+
+extern "C" __global__ void
+delay2 (unsigned long *d_o, clock_t delay, unsigned long tid)
+{
+  clock_t start, ticks;
+
+  start = clock ();
+
+  ticks = 0;
+
+  while (ticks < delay)
+    ticks = clock () - start;
+
+  d_o[0] = tid;
+}
+
+extern "C" __global__ void
+sum (clock_t * d_o, int N)
+{
+  int i;
+  clock_t sum;
+  __shared__ clock_t ticks[32];
+
+  sum = 0;
+
+  for (i = threadIdx.x; i < N; i += blockDim.x)
+    sum += d_o[i];
+
+  ticks[threadIdx.x] = sum;
+
+  syncthreads ();
+
+  for (i = 16; i >= 1; i >>= 1)
+    {
+      if (threadIdx.x < i)
+	ticks[threadIdx.x] += ticks[threadIdx.x + i];
+
+      syncthreads ();
+    }
+
+  d_o[0] = ticks[0];
+}
+
+extern "C" __global__ void
+mult (int n, float *x, float *y)
+{
+  int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (i = 0; i < n; i++)
+    y[i] = x[i] * x[i];
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/subr.ptx b/libgomp/testsuite/libgomp.oacc-c/subr.ptx
new file mode 100644
index 0000000..f398b15
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/subr.ptx
@@ -0,0 +1,251 @@
+	.version 1.4
+	.target sm_10, map_f64_to_f32
+	// compiled with /sj/opt/nvidia/cuda-5.5/open64/lib//be
+	// nvopencc 4.1 built on 2013-07-17
+
+	//-----------------------------------------------------------
+	// Compiling /tmp/tmpxft_00007716_00000000-9_subr.cpp3.i (/tmp/ccBI#.6sfiTI)
+	//-----------------------------------------------------------
+
+	//-----------------------------------------------------------
+	// Options:
+	//-----------------------------------------------------------
+	//  Target:ptx, ISA:sm_10, Endian:little, Pointer Size:64
+	//  -O3	(Optimization level)
+	//  -g0	(Debug level)
+	//  -m2	(Report advisories)
+	//-----------------------------------------------------------
+
+	.file	1	"<command-line>"
+	.file	2	"/tmp/tmpxft_00007716_00000000-8_subr.cudafe2.gpu"
+	.file	3	"/usr/lib/gcc/x86_64-redhat-linux/4.4.5/include/stddef.h"
+	.file	4	"/opt/nvidia/cuda-5.5/bin/..//include/crt/device_runtime.h"
+	.file	5	"/opt/nvidia/cuda-5.5/bin/..//include/host_defines.h"
+	.file	6	"/opt/nvidia/cuda-5.5/bin/..//include/builtin_types.h"
+	.file	7	"/opt/nvidia/cuda-5.5/bin/..//include/device_types.h"
+	.file	8	"/opt/nvidia/cuda-5.5/bin/..//include/driver_types.h"
+	.file	9	"/opt/nvidia/cuda-5.5/bin/..//include/surface_types.h"
+	.file	10	"/opt/nvidia/cuda-5.5/bin/..//include/texture_types.h"
+	.file	11	"/opt/nvidia/cuda-5.5/bin/..//include/vector_types.h"
+	.file	12	"/opt/nvidia/cuda-5.5/bin/..//include/device_launch_parameters.h"
+	.file	13	"/opt/nvidia/cuda-5.5/bin/..//include/crt/storage_class.h"
+	.file	14	"/usr/include/bits/types.h"
+	.file	15	"/usr/include/time.h"
+	.file	16	"subr.cu"
+	.file	17	"/opt/nvidia/cuda-5.5/bin/..//include/common_functions.h"
+	.file	18	"/opt/nvidia/cuda-5.5/bin/..//include/math_functions.h"
+	.file	19	"/opt/nvidia/cuda-5.5/bin/..//include/math_constants.h"
+	.file	20	"/opt/nvidia/cuda-5.5/bin/..//include/device_functions.h"
+	.file	21	"/opt/nvidia/cuda-5.5/bin/..//include/sm_11_atomic_functions.h"
+	.file	22	"/opt/nvidia/cuda-5.5/bin/..//include/sm_12_atomic_functions.h"
+	.file	23	"/opt/nvidia/cuda-5.5/bin/..//include/sm_13_double_functions.h"
+	.file	24	"/opt/nvidia/cuda-5.5/bin/..//include/sm_20_atomic_functions.h"
+	.file	25	"/opt/nvidia/cuda-5.5/bin/..//include/sm_32_atomic_functions.h"
+	.file	26	"/opt/nvidia/cuda-5.5/bin/..//include/sm_35_atomic_functions.h"
+	.file	27	"/opt/nvidia/cuda-5.5/bin/..//include/sm_20_intrinsics.h"
+	.file	28	"/opt/nvidia/cuda-5.5/bin/..//include/sm_30_intrinsics.h"
+	.file	29	"/opt/nvidia/cuda-5.5/bin/..//include/sm_32_intrinsics.h"
+	.file	30	"/opt/nvidia/cuda-5.5/bin/..//include/sm_35_intrinsics.h"
+	.file	31	"/opt/nvidia/cuda-5.5/bin/..//include/surface_functions.h"
+	.file	32	"/opt/nvidia/cuda-5.5/bin/..//include/texture_fetch_functions.h"
+	.file	33	"/opt/nvidia/cuda-5.5/bin/..//include/texture_indirect_functions.h"
+	.file	34	"/opt/nvidia/cuda-5.5/bin/..//include/surface_indirect_functions.h"
+	.file	35	"/opt/nvidia/cuda-5.5/bin/..//include/math_functions_dbl_ptx1.h"
+
+
+	.entry delay (
+		.param .u64 __cudaparm_delay_d_o,
+		.param .s64 __cudaparm_delay_delay)
+	{
+	.reg .u32 %rv1;
+	.reg .u32 %r<6>;
+	.reg .u64 %rd<7>;
+	.reg .pred %p<4>;
+	.loc	16	3	0
+$LDWbegin_delay:
+	.loc	16	7	0
+	mov.u32 	%r1, %clock;
+	mov.s32 	%r2, %r1;
+	ld.param.s64 	%rd1, [__cudaparm_delay_delay];
+	mov.u64 	%rd2, 0;
+	setp.le.s64 	%p1, %rd1, %rd2;
+	@%p1 bra 	$Lt_0_1282;
+	cvt.s64.s32 	%rd3, %r2;
+$Lt_0_1794:
+	.loc	16	12	0
+	mov.u32 	%r3, %clock;
+	mov.s32 	%r4, %r3;
+	cvt.s64.s32 	%rd4, %r4;
+	sub.s64 	%rd5, %rd4, %rd3;
+	.loc	16	7	0
+	ld.param.s64 	%rd1, [__cudaparm_delay_delay];
+	.loc	16	12	0
+	setp.gt.s64 	%p2, %rd1, %rd5;
+	@%p2 bra 	$Lt_0_1794;
+$Lt_0_1282:
+	.loc	16	13	0
+	exit;
+$LDWend_delay:
+	} // delay
+
+	.entry delay2 (
+		.param .u64 __cudaparm_delay2_d_o,
+		.param .s64 __cudaparm_delay2_delay,
+		.param .u64 __cudaparm_delay2_tid)
+	{
+	.reg .u32 %rv1;
+	.reg .u32 %r<6>;
+	.reg .u64 %rd<9>;
+	.reg .pred %p<4>;
+	.loc	16	16	0
+$LDWbegin_delay2:
+	.loc	16	20	0
+	mov.u32 	%r1, %clock;
+	mov.s32 	%r2, %r1;
+	ld.param.s64 	%rd1, [__cudaparm_delay2_delay];
+	mov.u64 	%rd2, 0;
+	setp.le.s64 	%p1, %rd1, %rd2;
+	@%p1 bra 	$Lt_1_1282;
+	cvt.s64.s32 	%rd3, %r2;
+$Lt_1_1794:
+	.loc	16	25	0
+	mov.u32 	%r3, %clock;
+	mov.s32 	%r4, %r3;
+	cvt.s64.s32 	%rd4, %r4;
+	sub.s64 	%rd5, %rd4, %rd3;
+	.loc	16	20	0
+	ld.param.s64 	%rd1, [__cudaparm_delay2_delay];
+	.loc	16	25	0
+	setp.gt.s64 	%p2, %rd1, %rd5;
+	@%p2 bra 	$Lt_1_1794;
+$Lt_1_1282:
+	.loc	16	27	0
+	ld.param.u64 	%rd6, [__cudaparm_delay2_tid];
+	ld.param.u64 	%rd7, [__cudaparm_delay2_d_o];
+	st.global.u64 	[%rd7+0], %rd6;
+	.loc	16	28	0
+	exit;
+$LDWend_delay2:
+	} // delay2
+
+	.entry sum (
+		.param .u64 __cudaparm_sum_d_o,
+		.param .s32 __cudaparm_sum_N)
+	{
+	.reg .u32 %r<9>;
+	.reg .u64 %rd<21>;
+	.reg .pred %p<6>;
+	.shared .align 8 .b8 __cuda___cuda_local_var_14805_37_non_const_ticks56[256];
+	.loc	16	31	0
+$LDWbegin_sum:
+	.loc	16	39	0
+	cvt.s32.u16 	%r1, %tid.x;
+	mov.s32 	%r2, %r1;
+	ld.param.u64 	%rd1, [__cudaparm_sum_d_o];
+	ld.param.s32 	%r3, [__cudaparm_sum_N];
+	setp.le.s32 	%p1, %r3, %r1;
+	@%p1 bra 	$Lt_2_5634;
+	cvt.u32.u16 	%r4, %ntid.x;
+	cvt.s64.u32 	%rd2, %r4;
+	mul.wide.u32 	%rd3, %r4, 8;
+	cvt.s64.s32 	%rd4, %r1;
+	mul.wide.s32 	%rd5, %r1, 8;
+	ld.param.u64 	%rd1, [__cudaparm_sum_d_o];
+	add.u64 	%rd6, %rd1, %rd5;
+	mov.s64 	%rd7, 0;
+$Lt_2_3586:
+ //<loop> Loop body line 39, nesting depth: 1, estimated iterations: unknown
+	.loc	16	40	0
+	ld.global.s64 	%rd8, [%rd6+0];
+	add.s64 	%rd7, %rd8, %rd7;
+	add.u32 	%r2, %r2, %r4;
+	add.u64 	%rd6, %rd3, %rd6;
+	.loc	16	39	0
+	ld.param.s32 	%r3, [__cudaparm_sum_N];
+	.loc	16	40	0
+	setp.gt.s32 	%p2, %r3, %r2;
+	@%p2 bra 	$Lt_2_3586;
+	bra.uni 	$Lt_2_3074;
+$Lt_2_5634:
+	mov.s64 	%rd7, 0;
+$Lt_2_3074:
+	mov.u64 	%rd9, __cuda___cuda_local_var_14805_37_non_const_ticks56;
+	.loc	16	42	0
+	cvt.u32.u16 	%r5, %tid.x;
+	cvt.u64.u32 	%rd10, %r5;
+	mul.wide.u32 	%rd11, %r5, 8;
+	add.u64 	%rd12, %rd9, %rd11;
+	st.shared.s64 	[%rd12+0], %rd7;
+	.loc	16	44	0
+	bar.sync 	0;
+	mov.s32 	%r2, 16;
+$Lt_2_4610:
+ //<loop> Loop body line 44, nesting depth: 1, estimated iterations: unknown
+	setp.le.u32 	%p3, %r2, %r5;
+	@%p3 bra 	$Lt_2_4866;
+	.loc	16	49	0
+	ld.shared.s64 	%rd13, [%rd12+0];
+	add.u32 	%r6, %r2, %r5;
+	cvt.u64.u32 	%rd14, %r6;
+	mul.wide.u32 	%rd15, %r6, 8;
+	add.u64 	%rd16, %rd9, %rd15;
+	ld.shared.s64 	%rd17, [%rd16+0];
+	add.s64 	%rd18, %rd13, %rd17;
+	st.shared.s64 	[%rd12+0], %rd18;
+$Lt_2_4866:
+	.loc	16	51	0
+	bar.sync 	0;
+	.loc	16	46	0
+	shr.s32 	%r2, %r2, 1;
+	mov.u32 	%r7, 0;
+	setp.gt.s32 	%p4, %r2, %r7;
+	@%p4 bra 	$Lt_2_4610;
+	.loc	16	54	0
+	ld.shared.s64 	%rd19, [__cuda___cuda_local_var_14805_37_non_const_ticks56+0];
+	st.global.s64 	[%rd1+0], %rd19;
+	.loc	16	55	0
+	exit;
+$LDWend_sum:
+	} // sum
+
+	.entry mult (
+		.param .s32 __cudaparm_mult_n,
+		.param .u64 __cudaparm_mult_x,
+		.param .u64 __cudaparm_mult_y)
+	{
+	.reg .u32 %r<7>;
+	.reg .u64 %rd<4>;
+	.reg .f32 %f<4>;
+	.reg .pred %p<4>;
+	.loc	16	58	0
+$LDWbegin_mult:
+	ld.param.s32 	%r1, [__cudaparm_mult_n];
+	mov.u32 	%r2, 0;
+	setp.le.s32 	%p1, %r1, %r2;
+	@%p1 bra 	$Lt_3_1282;
+	ld.param.s32 	%r1, [__cudaparm_mult_n];
+	mov.s32 	%r3, %r1;
+	ld.param.u64 	%rd1, [__cudaparm_mult_x];
+	ld.param.u64 	%rd2, [__cudaparm_mult_y];
+	mov.s32 	%r4, 0;
+	mov.s32 	%r5, %r3;
+$Lt_3_1794:
+ //<loop> Loop body line 58, nesting depth: 1, estimated iterations: unknown
+	.loc	16	63	0
+	ld.global.f32 	%f1, [%rd1+0];
+	mul.f32 	%f2, %f1, %f1;
+	st.global.f32 	[%rd2+0], %f2;
+	add.s32 	%r4, %r4, 1;
+	add.u64 	%rd2, %rd2, 4;
+	add.u64 	%rd1, %rd1, 4;
+	.loc	16	58	0
+	ld.param.s32 	%r1, [__cudaparm_mult_n];
+	.loc	16	63	0
+	setp.ne.s32 	%p2, %r1, %r4;
+	@%p2 bra 	$Lt_3_1794;
+$Lt_3_1282:
+	.loc	16	64	0
+	exit;
+$LDWend_mult:
+	} // mult
+
diff --git a/libgomp/testsuite/libgomp.oacc-c/timer.h b/libgomp/testsuite/libgomp.oacc-c/timer.h
new file mode 100644
index 0000000..aa6cb3b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/timer.h
@@ -0,0 +1,103 @@
+
+#include <stdio.h>
+#include <cuda.h>
+
+static int _Tnum_timers;
+static CUevent *_Tstart_events, *_Tstop_events;
+static CUstream _Tstream;
+
+void
+init_timers (int ntimers)
+{
+  int i;
+  CUresult r;
+
+  _Tnum_timers = ntimers;
+
+  _Tstart_events = malloc (_Tnum_timers * sizeof (CUevent));
+  _Tstop_events = malloc (_Tnum_timers * sizeof (CUevent));
+
+  r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  for (i = 0; i < _Tnum_timers; i++)
+    {
+      r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuEventCreate failed: %d\n", r);
+	  abort ();
+	}
+
+      r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuEventCreate failed: %d\n", r);
+	  abort ();
+	}
+    }
+}
+
+void
+fini_timers (void)
+{
+  int i;
+
+  for (i = 0; i < _Tnum_timers; i++)
+    {
+      cuEventDestroy (_Tstart_events[i]);
+      cuEventDestroy (_Tstop_events[i]);
+    }
+
+  cuStreamDestroy (_Tstream);
+
+  free (_Tstart_events);
+  free (_Tstop_events);
+}
+
+void
+start_timer (int timer)
+{
+  CUresult r;
+
+  r = cuEventRecord (_Tstart_events[timer], _Tstream);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuEventRecord failed: %d\n", r);
+      abort ();
+    }
+}
+
+float
+stop_timer (int timer)
+{
+  CUresult r;
+  float etime;
+
+  r = cuEventRecord (_Tstop_events[timer], _Tstream);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuEventRecord failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuEventSynchronize (_Tstop_events[timer]);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuEventSynchronize failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuEventElapsedTime failed: %d\n", r);
+      abort ();
+    }
+
+  return etime;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/update-1.c b/libgomp/testsuite/libgomp.oacc-c/update-1.c
new file mode 100644
index 0000000..4517273
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/update-1.c
@@ -0,0 +1,280 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b, *c;
+    float *d_a, *d_b, *d_c;
+    int i;
+
+    a = (float *) malloc(N * sizeof (float));
+    b = (float *) malloc(N * sizeof (float));
+    c = (float *) malloc(N * sizeof (float));
+
+    d_a = acc_malloc(N * sizeof (float));
+    d_b = acc_malloc(N * sizeof (float));
+    d_c = acc_malloc(N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+    acc_map_data(a, d_a, N * sizeof (float));
+    acc_map_data(b, d_b, N * sizeof (float));
+    acc_map_data(c, d_c, N * sizeof (float));
+
+#pragma acc update device(a[0:N], b[0:N])
+
+#pragma acc parallel present(a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 3.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc update device(a[0:N], b[0:N])
+
+#pragma acc parallel present(a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort();
+
+        if (b[i] != 5.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc update device(a[0:N], b[0:N])
+
+#pragma acc parallel present(a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update self(a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort();
+
+        if (b[i] != 5.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc update device(a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc parallel present(a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 6.0)
+            abort();
+
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 7.0;
+        b[i] = 2.0;
+    }
+
+#pragma acc update device(a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc parallel present(a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 7.0)
+            abort();
+
+        if (b[i] != 7.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc update device(a[0:N])
+
+#pragma acc parallel present(a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 9.0)
+            abort();
+
+        if (b[i] != 9.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+    }
+
+#pragma acc update device(a[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+    }
+
+#pragma acc update device(a[0:N >> 1])
+
+#pragma acc parallel present(a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+    for (i = 0; i < (N >> 1); i++)
+    {
+        if (a[i] != 6.0)
+            abort();
+
+        if (b[i] != 6.0)
+            abort();
+    }
+
+    for (i = (N >> 1); i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort();
+
+        if (b[i] != 5.0)
+            abort();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort();
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90
new file mode 100644
index 0000000..52b030b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90
@@ -0,0 +1,10 @@
+! { dg-shouldfail "" { *-*-* } { "*" } { "" } }
+
+program main
+  implicit none
+
+  !$acc parallel
+  call abort
+  !$acc end parallel
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90
new file mode 100644
index 0000000..2ba2bcb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90
@@ -0,0 +1,13 @@
+program main
+  implicit none
+
+  integer :: argc
+  argc = command_argument_count ()
+
+  !$acc parallel copyin(argc)
+  if (argc .ne. 0) then
+     call abort
+  end if
+  !$acc end parallel
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90
new file mode 100644
index 0000000..1a10f32
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90
@@ -0,0 +1,45 @@
+! { dg-additional-options "-cpp" }
+! TODO: Have to disable the acc_on_device builtin for we want to test the
+! libgomp library function?  The command line option
+! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not for
+! Fortran.
+
+use openacc
+implicit none
+
+! Host.
+
+if (.not. acc_on_device (acc_device_none)) call abort
+if (.not. acc_on_device (acc_device_host)) call abort
+if (acc_on_device (acc_device_not_host)) call abort
+if (acc_on_device (acc_device_nvidia)) call abort
+
+
+! Host via offloading fallback mode.
+
+!$acc parallel if(.false.)
+if (.not. acc_on_device (acc_device_none)) call abort
+if (.not. acc_on_device (acc_device_host)) call abort
+if (acc_on_device (acc_device_not_host)) call abort
+if (acc_on_device (acc_device_nvidia)) call abort
+!$acc end parallel
+
+
+#if !ACC_DEVICE_TYPE_host
+
+! Offloaded.
+
+!$acc parallel
+if (acc_on_device (acc_device_none)) call abort
+if (acc_on_device (acc_device_host)) call abort
+if (.not. acc_on_device (acc_device_not_host)) call abort
+#if ACC_DEVICE_TYPE_nvidia
+if (.not. acc_on_device (acc_device_nvidia)) call abort
+#else
+if (acc_on_device (acc_device_nvidia)) call abort
+#endif
+!$acc end parallel
+
+#endif
+
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f
new file mode 100644
index 0000000..a19045b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f
@@ -0,0 +1,45 @@
+! { dg-additional-options "-cpp" }
+! TODO: Have to disable the acc_on_device builtin for we want to test
+! the libgomp library function?  The command line option
+! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not
+! for Fortran.
+
+      USE OPENACC
+      IMPLICIT NONE
+
+!Host.
+
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+
+
+!Host via offloading fallback mode.
+
+!$ACC PARALLEL IF(.FALSE.)
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+!$ACC END PARALLEL
+
+
+#if !ACC_DEVICE_TYPE_host
+
+! Offloaded.
+
+!$ACC PARALLEL
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_nvidia
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#else
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#endif
+!$ACC END PARALLEL
+
+#endif
+
+      END
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f
new file mode 100644
index 0000000..c391776
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f
@@ -0,0 +1,45 @@
+! { dg-additional-options "-cpp" }
+! TODO: Have to disable the acc_on_device builtin for we want to test
+! the libgomp library function?  The command line option
+! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not
+! for Fortran.
+
+      IMPLICIT NONE
+      INCLUDE "openacc_lib.h"
+
+!Host.
+
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+
+
+!Host via offloading fallback mode.
+
+!$ACC PARALLEL IF(.FALSE.)
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+!$ACC END PARALLEL
+
+
+#if !ACC_DEVICE_TYPE_host
+
+! Offloaded.
+
+!$ACC PARALLEL
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_nvidia
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#else
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#endif
+!$ACC END PARALLEL
+
+#endif
+
+      END
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90
new file mode 100644
index 0000000..4c07bc2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90
@@ -0,0 +1,27 @@
+! { dg-do run }
+
+program collapse1
+  integer :: i, j, k, a(1:3, 4:6, 5:7)
+  logical :: l
+  l = .false.
+  a(:, :, :) = 0
+  !$acc parallel
+  !$acc loop collapse(4 - 1)
+    do i = 1, 3
+      do j = 4, 6
+        do k = 5, 7
+          a(i, j, k) = i + j + k
+        end do
+      end do
+    end do
+  !$acc loop collapse(2) reduction(.or.:l)
+    do i = 1, 3
+      do j = 4, 6
+        do k = 5, 7
+          if (a(i, j, k) .ne. (i + j + k)) l = .true.
+        end do
+      end do
+    end do
+  !$acc end parallel
+  if (l) call abort
+end program collapse1
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90
new file mode 100644
index 0000000..ca3b638
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90
@@ -0,0 +1,25 @@
+! { dg-do run }
+
+program collapse2
+  integer :: i, j, k, a(1:3, 4:6, 5:7)
+  logical :: l
+  l = .false.
+  a(:, :, :) = 0
+  !$acc parallel
+  !$acc loop collapse(4 - 1)
+    do 164 i = 1, 3
+      do 164 j = 4, 6
+        do 164 k = 5, 7
+          a(i, j, k) = i + j + k
+164      end do
+  !$acc loop collapse(2) reduction(.or.:l)
+firstdo: do i = 1, 3
+      do j = 4, 6
+        do k = 5, 7
+          if (a(i, j, k) .ne. (i + j + k)) l = .true.
+        end do
+      end do
+    end do firstdo
+  !$acc end parallel
+  if (l) call abort
+end program collapse2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90
new file mode 100644
index 0000000..50e6100
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90
@@ -0,0 +1,28 @@
+! { dg-do run }
+
+program collapse3
+  integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+  !$acc parallel
+  !$acc loop collapse(3)
+    do 115 k=1,3
+dokk: do kk=1,3
+        do kkk=1,3
+          a(k,kk,kkk) = 1
+        enddo
+      enddo dokk
+115   continue
+  !$acc end parallel
+  if (any(a(1:3,1:3,1:3).ne.1)) call abort
+
+  !$acc parallel
+  !$acc loop collapse(3)
+dol: do 120 l=1,3
+doll: do ll=1,3
+        do lll=1,3
+          a(l,ll,lll) = 2
+        enddo
+      enddo doll
+120 end do dol
+  !$acc end parallel
+  if (any(a(1:3,1:3,1:3).ne.2)) call abort
+end program collapse3
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90
new file mode 100644
index 0000000..41b66db
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90
@@ -0,0 +1,40 @@
+! { dg-do run }
+
+! collapse3.f90:test1
+program collapse4
+  integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+  logical :: l, r
+  l = .false.
+  r = .false.
+  a(:, :, :) = 0
+  b(:, :, :) = 0
+  !$acc parallel
+  !$acc loop collapse (3) reduction (.or.:l)
+    do i = 2, 6
+      do j = -2, 4
+        do k = 13, 18
+          l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          l = l.or.k.lt.13.or.k.gt.18
+          if (.not.l) a(i, j, k) = a(i, j, k) + 1
+        end do
+      end do
+    end do
+  !$acc end parallel
+  do i = 2, 6
+    do j = -2, 4
+      do k = 13, 18
+        r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+        r = r.or.k.lt.13.or.k.gt.18
+        if (.not.l) b(i, j, k) = b(i, j, k) + 1
+      end do
+    end do
+  end do
+  if (l .neqv. r) call abort
+  do i = 2, 6
+    do j = -2, 4
+      do k = 13, 18
+         if (a(i, j, k) .ne. b(i, j, k)) call abort
+      end do
+    end do
+  end do
+end program collapse4
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90
new file mode 100644
index 0000000..8c20f04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90
@@ -0,0 +1,48 @@
+! { dg-do run }
+
+! collapse3.f90:test2
+program collapse5
+  integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+  integer :: v1, v2, v3, v4, v5, v6
+  logical :: l, r
+  l = .false.
+  r = .false.
+  a(:, :, :) = 0
+  b(:, :, :) = 0
+  v1 = 3
+  v2 = 6
+  v3 = -2
+  v4 = 4
+  v5 = 13
+  v6 = 18
+  !$acc parallel
+  !$acc loop collapse (3) reduction (.or.:l)
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          l = l.or.k.lt.13.or.k.gt.18
+          if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          m = i * 100 + j * 10 + k
+        end do
+      end do
+    end do
+  !$acc end parallel
+  do i = v1, v2
+    do j = v3, v4
+      do k = v5, v6
+        r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+        r = r.or.k.lt.13.or.k.gt.18
+        if (.not.l) b(i, j, k) = b(i, j, k) + 1
+      end do
+    end do
+  end do
+  if (l .neqv. r) call abort
+  do i = v1, v2
+    do j = v3, v4
+      do k = v5, v6
+         if (a(i, j, k) .ne. b(i, j, k)) call abort
+      end do
+    end do
+  end do
+end program collapse5
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90
new file mode 100644
index 0000000..7404b91
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90
@@ -0,0 +1,50 @@
+! { dg-do run }
+
+! collapse3.f90:test3
+program collapse6
+  integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+  integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+  logical :: l, r
+  l = .false.
+  r = .false.
+  a(:, :, :) = 0
+  b(:, :, :) = 0
+  v1 = 3
+  v2 = 6
+  v3 = -2
+  v4 = 4
+  v5 = 13
+  v6 = 18
+  v7 = 1
+  v8 = 1
+  v9 = 1
+  !$acc parallel
+  !$acc loop collapse (3) reduction (.or.:l)
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          l = l.or.k.lt.13.or.k.gt.18
+          if (.not.l) a(i, j, k) = a(i, j, k) + 1
+        end do
+      end do
+    end do
+  !$acc end parallel
+  do i = v1, v2, v7
+    do j = v3, v4, v8
+      do k = v5, v6, v9
+        r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+        r = r.or.k.lt.13.or.k.gt.18
+        if (.not.r) b(i, j, k) = b(i, j, k) + 1
+      end do
+    end do
+  end do
+  if (l .neqv. r) call abort
+  do i = v1, v2, v7
+    do j = v3, v4, v8
+       do k = v5, v6, v9
+         if (a(i, j, k) .ne. b(i, j, k)) call abort
+       end do
+    end do
+  end do
+end program collapse6
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90
new file mode 100644
index 0000000..12efd8c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90
@@ -0,0 +1,40 @@
+! { dg-do run }
+
+! collapse3.f90:test4
+program collapse7
+  integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+  logical :: l, r
+  l = .false.
+  r = .false.
+  a(:, :, :) = 0
+  b(:, :, :) = 0
+  !$acc parallel
+  !$acc loop collapse (3) reduction (.or.:l)
+    do i = 2, 6
+      do j = -2, 4
+        do k = 13, 18
+          l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          l = l.or.k.lt.13.or.k.gt.18
+          if (.not.l) a(i, j, k) = a(i, j, k) + 1
+        end do
+      end do
+    end do
+  !$acc end parallel
+  do i = 2, 6
+    do j = -2, 4
+      do k = 13, 18
+        r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+        r = r.or.k.lt.13.or.k.gt.18
+        if (.not.r) b(i, j, k) = b(i, j, k) + 1
+      end do
+    end do
+  end do
+  if (l .neqv. r) call abort
+  do i = 1, 7
+    do j = -3, 5
+      do k = 12, 19
+         if (a(i, j, k) .ne. b(i, j, k)) call abort
+      end do
+    end do
+  end do
+end program collapse7
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90
new file mode 100644
index 0000000..04fbcfe
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90
@@ -0,0 +1,47 @@
+! { dg-do run }
+
+! collapse3.f90:test5
+program collapse8
+  integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+  integer :: v1, v2, v3, v4, v5, v6
+  logical :: l, r
+  l = .false.
+  r = .false.
+  a(:, :, :) = 0
+  b(:, :, :) = 0
+  v1 = 3
+  v2 = 6
+  v3 = -2
+  v4 = 4
+  v5 = 13
+  v6 = 18
+  !$acc parallel
+  !$acc loop collapse (3) reduction (.or.:l)
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          l = l.or.k.lt.13.or.k.gt.18
+          if (.not.l) a(i, j, k) = a(i, j, k) + 1
+        end do
+      end do
+    end do
+  !$acc end parallel
+  do i = v1, v2
+    do j = v3, v4
+      do k = v5, v6
+        r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+        r = r.or.k.lt.13.or.k.gt.18
+        if (.not.r) b(i, j, k) = b(i, j, k) + 1
+      end do
+    end do
+  end do
+  if (l .neqv. r) call abort
+  do i = v1, v2
+    do j = v3, v4
+      do k = v5, v6
+         if (a(i, j, k) .ne. b(i, j, k)) call abort
+      end do
+    end do
+  end do
+end program collapse8
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
new file mode 100644
index 0000000..ef6d551
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
@@ -0,0 +1,108 @@
+# This whole file adapted from libgomp.fortran/fortran.exp.
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+load_gcc_lib gfortran-dg.exp
+
+global shlib_ext
+global ALWAYS_CFLAGS
+
+set shlib_ext [get_shlib_extension]
+set lang_library_path	"../libgfortran/.libs"
+set lang_link_flags	"-lgfortran"
+if [info exists lang_include_flags] then {
+    unset lang_include_flags
+}
+set lang_test_file_found 0
+set quadmath_library_path "../libquadmath/.libs"
+
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
+
+if { $blddir != "" } {
+    set lang_source_re {^.*\.[fF](|90|95|03|08)$}
+    set lang_include_flags "-fintrinsic-modules-path=${blddir}"
+    # Look for a static libgfortran first.
+    if [file exists "${blddir}/${lang_library_path}/libgfortran.a"] {
+        set lang_test_file "${lang_library_path}/libgfortran.a"
+        set lang_test_file_found 1
+	# We may have a shared only build, so look for a shared libgfortran.
+    } elseif [file exists "${blddir}/${lang_library_path}/libgfortran.${shlib_ext}"] {
+        set lang_test_file "${lang_library_path}/libgfortran.${shlib_ext}"
+        set lang_test_file_found 1
+    } else {
+        puts "No libgfortran library found, will not execute fortran tests"
+    }
+} elseif [info exists GFORTRAN_UNDER_TEST] {
+    set lang_test_file_found 1
+    # Needs to exist for libgomp.exp.
+    set lang_test_file ""
+} else {
+    puts "GFORTRAN_UNDER_TEST not defined, will not execute fortran tests"
+}
+
+if { $lang_test_file_found } {
+    # Gather a list of all tests.
+    set tests [lsort [find $srcdir/$subdir *.\[fF\]{,90,95,03,08}]]
+
+    if { $blddir != "" } {
+	if { [file exists "${blddir}/${quadmath_library_path}/libquadmath.a"]
+	     || [file exists "${blddir}/${quadmath_library_path}/libquadmath.${shlib_ext}"] } {
+	    lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/${quadmath_library_path}/"
+	    # Allow for spec subsitution.
+	    lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/${quadmath_library_path}/"
+	    set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}:${blddir}/${quadmath_library_path}"
+	} else {
+	    set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}"
+	}
+    } else {
+        set ld_library_path "$always_ld_library_path"
+    }
+    append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+    set_ld_library_path_env_vars
+
+    # Todo: get list of accelerators from configure options --enable-accelerator.
+    set accels { "nonshm-host" "nvidia" }
+
+    # Run on host (or fallback) accelerator.
+    lappend accels "host"
+
+    # Test OpenACC with available accelerators.
+    foreach accel $accels {
+	set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+
+	# Todo: Determine shared memory or not using run-time test.
+	switch $accel {
+	    host {
+		set acc_mem_shared 1
+	    }
+	    nonshm-host {
+		set tagopt "-DACC_DEVICE_TYPE_nonshm_host=1"
+		set acc_mem_shared 0
+	    }
+	    nvidia {
+		set acc_mem_shared 0
+	    }
+	    default {
+		set acc_mem_shared 0
+	    }
+	}
+	set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+	# Todo: Verify that this works for both local and remote testing.
+	setenv ACC_DEVICE_TYPE $accel
+
+	# For Fortran we're doing torture testing, as Fortran has far more tests
+	# with arrays etc. that testing just -O0 or -O2 is insufficient, that is
+	# typically not the case for C/C++.
+	gfortran-dg-runtest $tests "$tagopt" ""
+    }
+}
+
+# All done.
+dg-finish
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90
new file mode 100644
index 0000000..51dc452
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90
@@ -0,0 +1,13 @@
+use openacc
+
+if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+call acc_set_device_type (acc_device_host)
+if (acc_get_device_type () .ne. acc_device_host) call abort
+call acc_set_device_num (0, acc_device_host)
+if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+call acc_shutdown (acc_device_host)
+
+call acc_init (acc_device_host)
+call acc_shutdown (acc_device_host)
+
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90
new file mode 100644
index 0000000..a54d6a7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90
@@ -0,0 +1,82 @@
+! { dg-do run }
+
+program main
+  implicit none
+  include "openacc_lib.h"
+
+  integer, target :: a_3d_i(10, 10, 10)
+  complex a_3d_c(10, 10, 10)
+  real a_3d_r(10, 10, 10)
+
+  integer i, j, k
+  complex c
+  real r
+  integer, parameter :: i_size = sizeof (i)
+  integer, parameter :: c_size = sizeof (c)
+  integer, parameter :: r_size = sizeof (r)
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+  call acc_copyin (a_3d_i)
+  call acc_copyin (a_3d_c)
+  call acc_copyin (a_3d_r)
+
+  if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+      end do
+    end do
+  end do
+
+  call acc_shutdown (acc_device_nvidia)
+
+contains
+
+  subroutine set3d (clear, a_i, a_c, a_r)
+  logical clear
+  integer, dimension (:,:,:), intent (inout) :: a_i
+  complex, dimension (:,:,:), intent (inout) :: a_c
+  real, dimension (:,:,:), intent (inout) :: a_r
+
+  integer i, j, k
+  integer lb1, ub1, lb2, ub2, lb3, ub3
+
+  lb1 = lbound (a_i, 1)
+  ub1 = ubound (a_i, 1)
+
+  lb2 = lbound (a_i, 2)
+  ub2 = ubound (a_i, 2)
+
+  lb3 = lbound (a_i, 3)
+  ub3 = ubound (a_i, 3)
+
+  do i = lb1, ub1
+    do j = lb2, ub2
+      do k = lb3, ub3
+        if (clear) then
+          a_i(i, j, k) = 0
+          a_c(i, j, k) = cmplx (0.0, 0.0)
+          a_r(i, j, k) = 0.0
+        else
+          a_i(i, j, k) = i
+          a_c(i, j, k) = cmplx (i, j)
+          a_r(i, j, k) = i
+        end if
+      end do
+    end do
+  end do
+
+  end subroutine
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90
new file mode 100644
index 0000000..a54d6a7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90
@@ -0,0 +1,82 @@
+! { dg-do run }
+
+program main
+  implicit none
+  include "openacc_lib.h"
+
+  integer, target :: a_3d_i(10, 10, 10)
+  complex a_3d_c(10, 10, 10)
+  real a_3d_r(10, 10, 10)
+
+  integer i, j, k
+  complex c
+  real r
+  integer, parameter :: i_size = sizeof (i)
+  integer, parameter :: c_size = sizeof (c)
+  integer, parameter :: r_size = sizeof (r)
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+  call acc_copyin (a_3d_i)
+  call acc_copyin (a_3d_c)
+  call acc_copyin (a_3d_r)
+
+  if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+      end do
+    end do
+  end do
+
+  call acc_shutdown (acc_device_nvidia)
+
+contains
+
+  subroutine set3d (clear, a_i, a_c, a_r)
+  logical clear
+  integer, dimension (:,:,:), intent (inout) :: a_i
+  complex, dimension (:,:,:), intent (inout) :: a_c
+  real, dimension (:,:,:), intent (inout) :: a_r
+
+  integer i, j, k
+  integer lb1, ub1, lb2, ub2, lb3, ub3
+
+  lb1 = lbound (a_i, 1)
+  ub1 = ubound (a_i, 1)
+
+  lb2 = lbound (a_i, 2)
+  ub2 = ubound (a_i, 2)
+
+  lb3 = lbound (a_i, 3)
+  ub3 = ubound (a_i, 3)
+
+  do i = lb1, ub1
+    do j = lb2, ub2
+      do k = lb3, ub3
+        if (clear) then
+          a_i(i, j, k) = 0
+          a_c(i, j, k) = cmplx (0.0, 0.0)
+          a_r(i, j, k) = 0.0
+        else
+          a_i(i, j, k) = i
+          a_c(i, j, k) = cmplx (i, j)
+          a_r(i, j, k) = i
+        end if
+      end do
+    end do
+  end do
+
+  end subroutine
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f
new file mode 100644
index 0000000..a9d70b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f
@@ -0,0 +1,13 @@
+      USE OPENACC
+
+      IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT
+      CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST)
+      IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT
+      CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST)
+      IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT
+      CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+      CALL ACC_INIT (ACC_DEVICE_HOST)
+      CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+      END
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f
new file mode 100644
index 0000000..56d2cd2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f
@@ -0,0 +1,13 @@
+      INCLUDE "openacc_lib.h"
+
+      IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT
+      CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST)
+      IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT
+      CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST)
+      IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT
+      CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+      CALL ACC_INIT (ACC_DEVICE_HOST)
+      CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+      END
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90
new file mode 100644
index 0000000..3a2b661
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90
@@ -0,0 +1,35 @@
+! { dg-do run }
+
+program main
+  use openacc
+  implicit none
+
+  integer n
+
+  if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+
+  if (acc_get_num_devices (acc_device_none) .ne. 0) call abort
+
+  call acc_init (acc_device_host)
+
+  if (acc_get_device_type () .ne. acc_device_host) call abort
+
+  call acc_set_device_type (acc_device_host)
+
+  if (acc_get_device_type () .ne. acc_device_host) call abort
+
+  n = 0
+
+  call acc_set_device_num (n, acc_device_host)
+
+  if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+
+  if (.NOT. acc_async_test (n) ) call abort
+
+  call acc_wait (n)
+
+  call acc_wait_all ()
+
+  call acc_shutdown (acc_device_host)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90
new file mode 100644
index 0000000..e68eb89
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90
@@ -0,0 +1,31 @@
+! { dg-do run }
+
+program main
+  use openacc
+  implicit none
+
+  integer n
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  n = 0
+
+  call acc_set_device_num (n, acc_device_nvidia)
+
+  if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort
+
+  if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then
+
+    n = 1
+
+    call acc_set_device_num (n, acc_device_nvidia)
+
+    if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort
+
+  end if
+
+  call acc_shutdown (acc_device_nvidia)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90
new file mode 100644
index 0000000..401ad66
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90
@@ -0,0 +1,35 @@
+! { dg-do run }
+
+program main
+  implicit none
+  include "openacc_lib.h"
+
+  integer n
+
+  if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+
+  if (acc_get_num_devices (acc_device_none) .ne. 0) call abort
+
+  call acc_init (acc_device_host)
+
+  if (acc_get_device_type () .ne. acc_device_host) call abort
+
+  call acc_set_device_type (acc_device_host)
+
+  if (acc_get_device_type () .ne. acc_device_host) call abort
+
+  n = 0
+
+  call acc_set_device_num (n, acc_device_host)
+
+  if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+
+  if (.NOT. acc_async_test (n) ) call abort
+
+  call acc_wait (n)
+
+  call acc_wait_all ()
+
+  call acc_shutdown (acc_device_host)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90
new file mode 100644
index 0000000..422df53
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90
@@ -0,0 +1,31 @@
+! { dg-do run }
+
+program main
+  implicit none
+  include "openacc_lib.h"
+
+  integer n
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  n = 0
+
+  call acc_set_device_num (n, acc_device_nvidia)
+
+  if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort
+
+  if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then
+
+    n = 1
+
+    call acc_set_device_num (n, acc_device_nvidia)
+
+    if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort
+
+  end if
+
+  call acc_shutdown (acc_device_nvidia)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90
new file mode 100644
index 0000000..ad758b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90
@@ -0,0 +1,83 @@
+! { dg-do run }
+
+program main
+  use openacc
+  use iso_c_binding
+  implicit none
+
+  integer, target :: a_3d_i(10, 10, 10)
+  complex a_3d_c(10, 10, 10)
+  real a_3d_r(10, 10, 10)
+
+  integer i, j, k
+  complex c
+  real r
+  integer, parameter :: i_size = sizeof (i)
+  integer, parameter :: c_size = sizeof (c)
+  integer, parameter :: r_size = sizeof (r)
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+  call acc_copyin (a_3d_i)
+  call acc_copyin (a_3d_c)
+  call acc_copyin (a_3d_r)
+
+  if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+      end do
+    end do
+  end do
+
+  call acc_shutdown (acc_device_nvidia)
+
+contains
+
+  subroutine set3d (clear, a_i, a_c, a_r)
+  logical clear
+  integer, dimension (:,:,:), intent (inout) :: a_i
+  complex, dimension (:,:,:), intent (inout) :: a_c
+  real, dimension (:,:,:), intent (inout) :: a_r
+
+  integer i, j, k
+  integer lb1, ub1, lb2, ub2, lb3, ub3
+
+  lb1 = lbound (a_i, 1)
+  ub1 = ubound (a_i, 1)
+
+  lb2 = lbound (a_i, 2)
+  ub2 = ubound (a_i, 2)
+
+  lb3 = lbound (a_i, 3)
+  ub3 = ubound (a_i, 3)
+
+  do i = lb1, ub1
+    do j = lb2, ub2
+      do k = lb3, ub3
+        if (clear) then
+          a_i(i, j, k) = 0
+          a_c(i, j, k) = cmplx (0.0, 0.0)
+          a_r(i, j, k) = 0.0
+        else
+          a_i(i, j, k) = i
+          a_c(i, j, k) = cmplx (i, j)
+          a_r(i, j, k) = i
+        end if
+      end do
+    end do
+  end do
+
+  end subroutine
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90
new file mode 100644
index 0000000..ad758b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90
@@ -0,0 +1,83 @@
+! { dg-do run }
+
+program main
+  use openacc
+  use iso_c_binding
+  implicit none
+
+  integer, target :: a_3d_i(10, 10, 10)
+  complex a_3d_c(10, 10, 10)
+  real a_3d_r(10, 10, 10)
+
+  integer i, j, k
+  complex c
+  real r
+  integer, parameter :: i_size = sizeof (i)
+  integer, parameter :: c_size = sizeof (c)
+  integer, parameter :: r_size = sizeof (r)
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+  call acc_copyin (a_3d_i)
+  call acc_copyin (a_3d_c)
+  call acc_copyin (a_3d_r)
+
+  if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+      end do
+    end do
+  end do
+
+  call acc_shutdown (acc_device_nvidia)
+
+contains
+
+  subroutine set3d (clear, a_i, a_c, a_r)
+  logical clear
+  integer, dimension (:,:,:), intent (inout) :: a_i
+  complex, dimension (:,:,:), intent (inout) :: a_c
+  real, dimension (:,:,:), intent (inout) :: a_r
+
+  integer i, j, k
+  integer lb1, ub1, lb2, ub2, lb3, ub3
+
+  lb1 = lbound (a_i, 1)
+  ub1 = ubound (a_i, 1)
+
+  lb2 = lbound (a_i, 2)
+  ub2 = ubound (a_i, 2)
+
+  lb3 = lbound (a_i, 3)
+  ub3 = ubound (a_i, 3)
+
+  do i = lb1, ub1
+    do j = lb2, ub2
+      do k = lb3, ub3
+        if (clear) then
+          a_i(i, j, k) = 0
+          a_c(i, j, k) = cmplx (0.0, 0.0)
+          a_r(i, j, k) = 0.0
+        else
+          a_i(i, j, k) = i
+          a_c(i, j, k) = cmplx (i, j)
+          a_r(i, j, k) = i
+        end if
+      end do
+    end do
+  end do
+
+  end subroutine
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90
new file mode 100644
index 0000000..082dd8a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90
@@ -0,0 +1,97 @@
+program map
+  integer, parameter     :: n = 20, c = 10
+  integer                :: i, a(n), b(n)
+
+  a(:) = 0
+  b(:) = 0
+
+  ! COPY
+
+  !$acc parallel copy (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = 1, n
+     b(i) = i
+  end do
+
+  call check (a, b, n)
+
+  ! COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel copyout (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+  call check (a, b, n)
+
+  ! COPYIN
+
+  a(:) = 0
+
+  !$acc parallel copyout (a) copyin (b)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPY
+
+  !$acc parallel pcopy (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYIN
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a) pcopyin (b)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+end program map
+
+subroutine check (a, b, n)
+  integer :: n, a(n), b(n)
+  integer :: i
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f
new file mode 100644
index 0000000..db3c6b1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f
@@ -0,0 +1,9 @@
+! { dg-do run }
+
+      program main
+      implicit none
+      include "openacc_lib.h"
+
+      if (openacc_version .ne. 201306) call abort;
+
+      end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90
new file mode 100644
index 0000000..a14ecdd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90
@@ -0,0 +1,9 @@
+! { dg-do run }
+
+program main
+  use openacc
+  implicit none
+
+  if (openacc_version .ne. 201306) call abort;
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90
new file mode 100644
index 0000000..e316746
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90
@@ -0,0 +1,15 @@
+program test
+  implicit none
+
+  integer a(5)
+
+  a = 10;
+
+  !$acc parallel copy(a(1:5))
+  a(1) = 5
+  a(2) = 5
+  a(3) = 5
+  a(4) = 5
+  a(5) = 5
+  !$acc end parallel
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90
new file mode 100644
index 0000000..1a1d4c7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90
@@ -0,0 +1,229 @@
+! { dg-do run }
+
+program test
+  implicit none
+  integer, allocatable :: a1(:)
+  integer, allocatable :: b1(:)
+  integer, allocatable :: c1(:)
+  integer, allocatable :: b2(:,:)
+  integer, allocatable :: c3(:,:,:)
+
+  allocate (a1(5))
+  if (.not.allocated (a1)) call abort()
+
+  a1 = 10
+
+  !$acc parallel copy(a1(1:5))
+  a1(1) = 1
+  a1(2) = 2
+  a1(3) = 3
+  a1(4) = 4
+  a1(5) = 5
+  !$acc end parallel
+
+  if (a1(1) .ne. 1) call abort
+  if (a1(2) .ne. 2) call abort
+  if (a1(3) .ne. 3) call abort
+  if (a1(4) .ne. 4) call abort
+  if (a1(5) .ne. 5) call abort
+
+  deallocate(a1)
+
+  allocate (a1(0:4))
+  if (.not.allocated (a1)) call abort()
+
+  a1 = 10
+
+  !$acc parallel copy(a1(0:4))
+  a1(0) = 1
+  a1(1) = 2
+  a1(2) = 3
+  a1(3) = 4
+  a1(4) = 5
+  !$acc end parallel
+
+  if (a1(0) .ne. 1) call abort
+  if (a1(1) .ne. 2) call abort
+  if (a1(2) .ne. 3) call abort
+  if (a1(3) .ne. 4) call abort
+  if (a1(4) .ne. 5) call abort
+
+  deallocate(a1)
+
+  allocate (b2(5,5))
+  if (.not.allocated (b2)) call abort()
+
+  b2 = 11
+
+  !$acc parallel copy(b2(1:5,1:5))
+  b2(1,1) = 1
+  b2(2,2) = 2
+  b2(3,3) = 3
+  b2(4,4) = 4
+  b2(5,5) = 5
+  !$acc end parallel
+
+  if (b2(1,1) .ne. 1) call abort
+  if (b2(2,2) .ne. 2) call abort
+  if (b2(3,3) .ne. 3) call abort
+  if (b2(4,4) .ne. 4) call abort
+  if (b2(5,5) .ne. 5) call abort
+
+  deallocate(b2)
+
+  allocate (b2(0:4,0:4))
+  if (.not.allocated (b2)) call abort()
+
+  b2 = 11
+
+  !$acc parallel copy(b2(0:4,0:4))
+  b2(0,0) = 1
+  b2(1,1) = 2
+  b2(2,2) = 3
+  b2(3,3) = 4
+  b2(4,4) = 5
+  !$acc end parallel
+
+  if (b2(0,0) .ne. 1) call abort
+  if (b2(1,1) .ne. 2) call abort
+  if (b2(2,2) .ne. 3) call abort
+  if (b2(3,3) .ne. 4) call abort
+  if (b2(4,4) .ne. 5) call abort
+
+  deallocate(b2)
+
+  allocate (c3(5,5,5))
+  if (.not.allocated (c3)) call abort()
+
+  c3 = 12
+
+  !$acc parallel copy(c3(1:5,1:5,1:5))
+  c3(1,1,1) = 1
+  c3(2,2,2) = 2
+  c3(3,3,3) = 3
+  c3(4,4,4) = 4
+  c3(5,5,5) = 5
+  !$acc end parallel
+
+  if (c3(1,1,1) .ne. 1) call abort
+  if (c3(2,2,2) .ne. 2) call abort
+  if (c3(3,3,3) .ne. 3) call abort
+  if (c3(4,4,4) .ne. 4) call abort
+  if (c3(5,5,5) .ne. 5) call abort
+
+  deallocate(c3)
+
+  allocate (c3(0:4,0:4,0:4))
+  if (.not.allocated (c3)) call abort()
+
+  c3 = 12
+
+  !$acc parallel copy(c3(0:4,0:4,0:4))
+  c3(0,0,0) = 1
+  c3(1,1,1) = 2
+  c3(2,2,2) = 3
+  c3(3,3,3) = 4
+  c3(4,4,4) = 5
+  !$acc end parallel
+
+  if (c3(0,0,0) .ne. 1) call abort
+  if (c3(1,1,1) .ne. 2) call abort
+  if (c3(2,2,2) .ne. 3) call abort
+  if (c3(3,3,3) .ne. 4) call abort
+  if (c3(4,4,4) .ne. 5) call abort
+
+  deallocate(c3)
+
+  allocate (a1(5))
+  if (.not.allocated (a1)) call abort()
+
+  allocate (b1(5))
+  if (.not.allocated (b1)) call abort()
+
+  allocate (c1(5))
+  if (.not.allocated (c1)) call abort()
+
+  a1 = 10
+  b1 = 3
+  c1 = 7
+   
+  !$acc parallel copyin(a1(1:5)) create(c1(1:5)) copyout(b1(1:5))
+  c1(1) = a1(1)
+  c1(2) = a1(2)
+  c1(3) = a1(3)
+  c1(4) = a1(4)
+  c1(5) = a1(5)
+
+  b1(1) = c1(1)
+  b1(2) = c1(2)
+  b1(3) = c1(3)
+  b1(4) = c1(4)
+  b1(5) = c1(5)
+  !$acc end parallel
+
+  if (b1(1) .ne. 10) call abort
+  if (b1(2) .ne. 10) call abort
+  if (b1(3) .ne. 10) call abort
+  if (b1(4) .ne. 10) call abort
+  if (b1(5) .ne. 10) call abort
+
+  deallocate(a1)
+  deallocate(b1)
+  deallocate(c1)
+
+  allocate (a1(0:4))
+  if (.not.allocated (a1)) call abort()
+
+  allocate (b1(0:4))
+  if (.not.allocated (b1)) call abort()
+
+  allocate (c1(0:4))
+  if (.not.allocated (c1)) call abort()
+
+  a1 = 10
+  b1 = 3
+  c1 = 7
+   
+  !$acc parallel copyin(a1(0:4)) create(c1(0:4)) copyout(b1(0:4))
+  c1(0) = a1(0)
+  c1(1) = a1(1)
+  c1(2) = a1(2)
+  c1(3) = a1(3)
+  c1(4) = a1(4)
+
+  b1(0) = c1(0)
+  b1(1) = c1(1)
+  b1(2) = c1(2)
+  b1(3) = c1(3)
+  b1(4) = c1(4)
+  !$acc end parallel
+
+  if (b1(0) .ne. 10) call abort
+  if (b1(1) .ne. 10) call abort
+  if (b1(2) .ne. 10) call abort
+  if (b1(3) .ne. 10) call abort
+  if (b1(4) .ne. 10) call abort
+
+  deallocate(a1)
+  deallocate(b1)
+  deallocate(c1)
+
+  allocate (a1(5))
+  if (.not.allocated (a1)) call abort()
+
+  a1 = 10
+
+  !$acc parallel copy(a1(2:3))
+  a1(2) = 2
+  a1(3) = 3
+  !$acc end parallel
+
+  if (a1(1) .ne. 10) call abort
+  if (a1(2) .ne. 2) call abort
+  if (a1(3) .ne. 3) call abort
+  if (a1(4) .ne. 10) call abort
+  if (a1(5) .ne. 10) call abort
+
+  deallocate(a1)
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90
new file mode 100644
index 0000000..7c7e078
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90
@@ -0,0 +1,228 @@
+! { dg-do run }
+! TODO:
+! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } }
+
+! Integer reductions
+
+program reduction_1
+  implicit none
+
+  integer, parameter    :: n = 1000, vl = 32
+  integer               :: i, vresult, result
+  logical               :: lresult, lvresult
+  integer, dimension (n) :: array
+
+  do i = 1, n
+     array(i) = i
+  end do
+
+  result = 0
+  vresult = 0
+
+  ! '+' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(+:result)
+  do i = 1, n
+     result = result + array(i)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = vresult + array(i)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 0
+  vresult = 0
+
+  ! '*' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(*:result)
+  do i = 1, n
+     result = result * array(i)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = vresult * array(i)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 0
+  vresult = 0
+
+  ! 'max' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(max:result)
+  do i = 1, n
+     result = max (result, array(i))
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = max (vresult, array(i))
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 1
+  vresult = 1
+
+  ! 'min' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(min:result)
+  do i = 1, n
+     result = min (result, array(i))
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = min (vresult, array(i))
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 1
+  vresult = 1
+
+  ! 'iand' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(iand:result)
+  do i = 1, n
+     result = iand (result, array(i))
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = iand (vresult, array(i))
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 1
+  vresult = 1
+
+  ! 'ior' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(ior:result)
+  do i = 1, n
+     result = ior (result, array(i))
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = ior (vresult, array(i))
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 0
+  vresult = 0
+
+  ! 'ieor' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(ieor:result)
+  do i = 1, n
+     result = ieor (result, array(i))
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = ieor (vresult, array(i))
+  end do
+
+  if (result.ne.vresult) call abort
+
+  lresult = .false.
+  lvresult = .false.
+
+  ! '.and.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.and.:lresult)
+  do i = 1, n
+     lresult = lresult .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .and. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  lresult = .false.
+  lvresult = .false.
+
+  ! '.or.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.or.:lresult)
+  do i = 1, n
+     lresult = lresult .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .or. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  lresult = .false.
+  lvresult = .false.
+
+  ! '.eqv.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.eqv.:lresult)
+  do i = 1, n
+     lresult = lresult .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .eqv. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  lresult = .false.
+  lvresult = .false.
+
+  ! '.neqv.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.neqv.:lresult)
+  do i = 1, n
+     lresult = lresult .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .neqv. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+end program reduction_1
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90
new file mode 100644
index 0000000..d0d34bb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90
@@ -0,0 +1,173 @@
+! { dg-do run }
+! TODO:
+! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } }
+
+! real reductions
+
+program reduction_2
+  implicit none
+
+  integer, parameter    :: n = 1000, vl = 32
+  integer               :: i
+  real, parameter       :: e = .001
+  real                  :: vresult, result
+  logical               :: lresult, lvresult
+  real, dimension (n) :: array
+
+  do i = 1, n
+     array(i) = i
+  end do
+
+  result = 0
+  vresult = 0
+
+  ! '+' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(+:result)
+  do i = 1, n
+     result = result + array(i)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = vresult + array(i)
+  end do
+
+  if (abs (result - vresult) .ge. e) call abort
+
+  result = 1
+  vresult = 1
+
+  ! '*' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(*:result)
+  do i = 1, n
+     result = result * array(i)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = vresult * array(i)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 0
+  vresult = 0
+
+  ! 'max' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(max:result)
+  do i = 1, n
+     result = max (result, array(i))
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = max (vresult, array(i))
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 1
+  vresult = 1
+
+  ! 'min' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(min:result)
+  do i = 1, n
+     result = min (result, array(i))
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = min (vresult, array(i))
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 1
+  vresult = 1
+
+  ! '.and.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.and.:lresult)
+  do i = 1, n
+     lresult = lresult .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .and. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  lresult = .false.
+  lvresult = .false.
+
+  ! '.or.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.or.:lresult)
+  do i = 1, n
+     lresult = lresult .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .or. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  lresult = .false.
+  lvresult = .false.
+
+  ! '.eqv.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.eqv.:lresult)
+  do i = 1, n
+     lresult = lresult .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .eqv. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  lresult = .false.
+  lvresult = .false.
+
+  ! '.neqv.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.neqv.:lresult)
+  do i = 1, n
+     lresult = lresult .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .neqv. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+end program reduction_2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90
new file mode 100644
index 0000000..b42ca61
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90
@@ -0,0 +1,173 @@
+! { dg-do run }
+! TODO:
+! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } }
+
+! double precision reductions
+
+program reduction_3
+  implicit none
+
+  integer, parameter    :: n = 1000, vl = 32
+  integer               :: i
+  double precision, parameter :: e = .001
+  double precision      :: vresult, result
+  logical               :: lresult, lvresult
+  double precision, dimension (n) :: array
+
+  do i = 1, n
+     array(i) = i
+  end do
+
+  result = 0
+  vresult = 0
+
+  ! '+' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(+:result)
+  do i = 1, n
+     result = result + array(i)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = vresult + array(i)
+  end do
+
+  if (abs (result - vresult) .ge. e) call abort
+
+  result = 1
+  vresult = 1
+
+  ! '*' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(*:result)
+  do i = 1, n
+     result = result * array(i)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = vresult * array(i)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 0
+  vresult = 0
+
+  ! 'max' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(max:result)
+  do i = 1, n
+     result = max (result, array(i))
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = max (vresult, array(i))
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 1
+  vresult = 1
+
+  ! 'min' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(min:result)
+  do i = 1, n
+     result = min (result, array(i))
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = min (vresult, array(i))
+  end do
+
+  if (result.ne.vresult) call abort
+
+  result = 1
+  vresult = 1
+
+  ! '.and.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.and.:lresult)
+  do i = 1, n
+     lresult = lresult .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .and. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  lresult = .false.
+  lvresult = .false.
+
+  ! '.or.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.or.:lresult)
+  do i = 1, n
+     lresult = lresult .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .or. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  lresult = .false.
+  lvresult = .false.
+
+  ! '.eqv.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.eqv.:lresult)
+  do i = 1, n
+     lresult = lresult .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .eqv. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+
+  lresult = .false.
+  lvresult = .false.
+
+  ! '.neqv.' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(.neqv.:lresult)
+  do i = 1, n
+     lresult = lresult .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     lvresult = lvresult .neqv. (array(i) .ge. 5)
+  end do
+
+  if (result.ne.vresult) call abort
+end program reduction_3
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90
new file mode 100644
index 0000000..45675c6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90
@@ -0,0 +1,57 @@
+! { dg-do run }
+! TODO:
+! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } }
+
+! complex reductions
+
+program reduction_4
+  implicit none
+
+  integer, parameter    :: n = 1000, vl = 32
+  integer               :: i
+  complex               :: vresult, result
+  complex, dimension (n) :: array
+
+  do i = 1, n
+     array(i) = i
+  end do
+
+  result = 0
+  vresult = 0
+
+  ! '+' reductions
+
+  !$acc parallel vector_length(vl)
+  !$acc loop reduction(+:result)
+  do i = 1, n
+     result = result + array(i)
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+     vresult = vresult + array(i)
+  end do
+
+  if (result .ne. vresult) call abort
+
+  result = 1
+  vresult = 1
+
+!  ! '*' reductions
+!
+!  !$acc parallel vector_length(vl)
+!  !$acc loop reduction(*:result)
+!  do i = 1, n
+!     result = result * array(i)
+!  end do
+!  !$acc end parallel
+!
+!  ! Verify the results
+!  do i = 1, n
+!     vresult = vresult * array(i)
+!  end do
+!
+!  if (result.ne.vresult) call abort
+end program reduction_4
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
new file mode 100644
index 0000000..e125548
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
@@ -0,0 +1,35 @@
+! { dg-do run }
+! TODO:
+! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } }
+
+! subroutine reduction
+
+program reduction
+  integer, parameter    :: n = 40, c = 10
+  integer               :: i, vsum, sum
+
+  call redsub (sum, n, c)
+
+  vsum = 0
+
+  ! Verify the results
+  do i = 1, n
+     vsum = vsum + c
+  end do
+
+  if (sum.ne.vsum) call abort ()
+end program reduction
+
+subroutine redsub(sum, n, c)
+  integer :: sum, n, c
+
+  sum = 0
+
+  !$acc parallel vector_length(n) copyin (n, c)
+  !$acc loop reduction(+:sum)
+  do i = 1, n
+     sum = sum + c
+  end do
+  !$acc end parallel
+end subroutine redsub
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90
new file mode 100644
index 0000000..3390515
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90
@@ -0,0 +1,32 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+  interface
+    recursive function fact (x)
+      !$acc routine
+      integer, intent(in) :: x
+      integer :: fact
+    end function fact
+  end interface
+  integer, parameter :: n = 10
+  integer :: a(n), i
+  !$acc parallel
+  !$acc loop
+  do i = 1, n
+     a(i) = fact (i)
+  end do
+  !$acc end parallel
+  do i = 1, n
+     if (a(i) .ne. fact(i)) call abort
+  end do
+end
+recursive function fact (x) result (res)
+  !$acc routine
+  integer, intent(in) :: x
+  integer :: res
+  if (x < 1) then
+     res = 1
+  else
+     res = x * fact (x - 1)
+  end if
+end function fact
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90
new file mode 100644
index 0000000..3d418b6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90
@@ -0,0 +1,29 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+  module m1
+    contains
+    recursive function fact (x) result (res)
+      !$acc routine
+      integer, intent(in) :: x
+      integer :: res
+      if (x < 1) then
+         res = 1
+      else
+         res = x * fact (x - 1)
+      end if
+    end function fact
+  end module m1
+  use m1
+  integer, parameter :: n = 10
+  integer :: a(n), i
+  !$acc parallel
+  !$acc loop
+  do i = 1, n
+     a(i) = fact (i)
+  end do
+  !$acc end parallel
+  do i = 1, n
+     if (a(i) .ne. fact(i)) call abort
+  end do
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90
new file mode 100644
index 0000000..d233a63
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90
@@ -0,0 +1,27 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+  integer, parameter :: n = 10
+  integer :: a(n), i
+  integer, external :: fact
+  !$acc routine (fact)
+  !$acc parallel
+  !$acc loop
+  do i = 1, n
+     a(i) = fact (i)
+  end do
+  !$acc end parallel
+  do i = 1, n
+     if (a(i) .ne. fact(i)) call abort
+  end do
+end
+recursive function fact (x) result (res)
+  !$acc routine
+  integer, intent(in) :: x
+  integer :: res
+  if (x < 1) then
+     res = 1
+  else
+     res = x * fact (x - 1)
+  end if
+end function fact
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90
new file mode 100644
index 0000000..3e5fb09
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90
@@ -0,0 +1,23 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+  integer, parameter :: n = 10
+  integer :: a(n), i
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc parallel
+  !$acc loop
+  do i = 1, n
+     call incr(a(i))
+  end do
+  !$acc end parallel
+  do i = 1, n
+     if (a(i) .ne. (i + 1)) call abort
+  end do
+end
+subroutine incr (x)
+  !$acc routine
+  integer, intent(inout) :: x
+  x = x + 1
+end subroutine incr
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90
new file mode 100644
index 0000000..b39414f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90
@@ -0,0 +1,97 @@
+program subarrays
+  integer, parameter     :: n = 20, c = 10
+  integer                :: i, a(n), b(n)
+
+  a(:) = 0
+  b(:) = 0
+
+  ! COPY
+
+  !$acc parallel copy (a(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = 1, n
+     b(i) = i
+  end do
+
+  call check (a, b, n)
+
+  ! COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel copyout (a(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+  call check (a, b, n)
+
+  ! COPYIN
+
+  a(:) = 0
+
+  !$acc parallel copyout (a(1:n)) copyin (b(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPY
+
+  !$acc parallel pcopy (a(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYIN
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a(1:n)) pcopyin (b(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+end program subarrays
+
+subroutine check (a, b, n)
+  integer :: n, a(n), b(n)
+  integer :: i
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90
new file mode 100644
index 0000000..81799f6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90
@@ -0,0 +1,100 @@
+program subarrays
+  integer, parameter     :: n = 20, c = 10, low = 5, high = 10
+  integer                :: i, a(n), b(n)
+
+  a(:) = 0
+  b(:) = 0
+
+  ! COPY
+
+  !$acc parallel copy (a(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = low, high
+     b(i) = i
+  end do
+
+  call check (a, b, n)
+
+  ! COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel copyout (a(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = low, high
+     if (a(i) .ne. b(i)) call abort
+  end do
+  call check (a, b, n)
+
+  ! COPYIN
+
+  a(:) = 0
+
+  !$acc parallel copyout (a(low:high)) copyin (b(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = b(i)
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPY
+
+  a(:) = 0
+  
+  !$acc parallel pcopy (a(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYIN
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a(low:high)) &
+  !$acc & pcopyin (b(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = b(i)
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+end program subarrays
+
+subroutine check (a, b, n)
+  integer :: n, a(n), b(n)
+  integer :: i
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+end subroutine check

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]