This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
This patch implements a set of new tests for OpenACC functionality in libgomp. Many of these do not yet work because of missing support elsewhere in GCC. Julian xxxx-xx-xx James Norris <jnorris@codesourcery.com> Thomas Schwinge <thomas@codesourcery.com> Tom de Vries <tom@codesourcery.com> Cesar Philippidis <cesar@codesourcery.com> libgomp/ * testsuite/lib/libgomp.exp (check_effective_target_openacc_nvidia_accel_present) (check_effective_target_openacc_nvidia_accel_selected): New functions. * testsuite/libgomp.oacc-fortran/fortran.exp: New exp file. * testsuite/libgomp.oacc-fortran/*.f: New tests. * testsuite/libgomp.oacc-fortran/*.f90: Likewise. * testsuite/libgomp.oacc-c/c.exp: New exp file. * testsuite/libgomp.oacc-c/subr.ptx: New file. * testsuite/libgomp.oacc-c/subr.cu: New file. * testsuite/libgomp.oacc-c/timer.h: New file. * testsuite/libgomp.oacc-c/*.c: New tests. * testsuite/libgomp.oacc-c++/c++.exp: New exp file.
commit a297265c276f7882d33cfb05bb5ab71e05b6d7a1 Author: Julian Brown <julian@codesourcery.com> Date: Mon Sep 22 03:27:53 2014 -0700 OpenACC tests. diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 094e5ed..b8c3eda 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -239,3 +239,31 @@ proc libgomp_option_proc { option } { return 0 } } + +# Return 1 if at least one nvidia board is present. + +proc check_effective_target_openacc_nvidia_accel_present { } { + return [check_runtime openacc_nvidia_accel_present { + #include <openacc.h> + int main () { + return !(acc_get_num_devices (acc_device_nvidia) > 0); + } + } "" ] +} + +# Return 1 if at least one nvidia board is present, and the nvidia device type +# is selected by default by means of setting the environment variable +# ACC_DEVICE_TYPE. + +proc check_effective_target_openacc_nvidia_accel_selected { } { + if { ![check_effective_target_openacc_nvidia_accel_present] } { + return 0; + } + if { ![info exists ::env(ACC_DEVICE_TYPE)] } { + return 0; + } + if { $::env(ACC_DEVICE_TYPE) == "nvidia" } { + return 1; + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c++/c++.exp b/libgomp/testsuite/libgomp.oacc-c++/c++.exp new file mode 100644 index 0000000..0f426ed --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/c++.exp @@ -0,0 +1,101 @@ +# This whole file adapted from libgomp.c++/c++.exp. + +load_lib libgomp-dg.exp +load_gcc_lib gcc-dg.exp + +global shlib_ext + +set shlib_ext [get_shlib_extension] +set lang_link_flags "-lstdc++" +set lang_test_file_found 0 +set lang_library_path "../libstdc++-v3/src/.libs" +if [info exists lang_include_flags] then { + unset lang_include_flags +} + +# Initialize dg. +dg-init + +# Turn on OpenACC. +# XXX (TEMPORARY): Remove the -flto once that's properly integrated. +lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto" + +set blddir [lookfor_file [get_multilibs] libgomp] + + +if { $blddir != "" } { + # Look for a static libstdc++ first. + if [file exists "${blddir}/${lang_library_path}/libstdc++.a"] { + set lang_test_file "${lang_library_path}/libstdc++.a" + set lang_test_file_found 1 + # We may have a shared only build, so look for a shared libstdc++. + } elseif [file exists "${blddir}/${lang_library_path}/libstdc++.${shlib_ext}"] { + set lang_test_file "${lang_library_path}/libstdc++.${shlib_ext}" + set lang_test_file_found 1 + } else { + puts "No libstdc++ library found, will not execute c++ tests" + } +} elseif { [info exists GXX_UNDER_TEST] } { + set lang_test_file_found 1 + # Needs to exist for libgomp.exp. + set lang_test_file "" +} else { + puts "GXX_UNDER_TEST not defined, will not execute c++ tests" +} + +if { $lang_test_file_found } { + # Gather a list of all tests. + set tests [lsort [glob -nocomplain $srcdir/$subdir/*.C]] + + if { $blddir != "" } { + set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}" + } else { + set ld_library_path "$always_ld_library_path" + } + append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] + set_ld_library_path_env_vars + + set flags_file "${blddir}/../libstdc++-v3/scripts/testsuite_flags" + if { [file exists $flags_file] } { + set libstdcxx_includes [exec sh $flags_file --build-includes] + } else { + set libstdcxx_includes "" + } + + # Todo: get list of accelerators from configure options --enable-accelerator. + set accels { "nonshm-host" "nvidia" } + + # Run on host (or fallback) accelerator. + lappend accels "host" + + # Test OpenACC with available accelerators. + foreach accel $accels { + set tagopt "-DACC_DEVICE_TYPE_$accel=1" + + # Todo: Determine shared memory or not using run-time test. + switch $accel { + host { + set acc_mem_shared 1 + } + nonshm-host { + set tagopt "-DACC_DEVICE_TYPE_nonshm_host=1" + set acc_mem_shared 0 + } + nvidia { + set acc_mem_shared 0 + } + default { + set acc_mem_shared 0 + } + } + set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared" + + # Todo: Verify that this works for both local and remote testing. + setenv ACC_DEVICE_TYPE $accel + + dg-runtest $tests "$tagopt" $libstdcxx_includes + } +} + +# All done. +dg-finish diff --git a/libgomp/testsuite/libgomp.oacc-c/abort-2.c b/libgomp/testsuite/libgomp.oacc-c/abort-2.c new file mode 100644 index 0000000..5c2efa8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/abort-2.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ + +extern void abort (); + +int +main (int argc) +{ + +#pragma acc parallel + { + if (argc != 1) + abort (); + } + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c/abort.c b/libgomp/testsuite/libgomp.oacc-c/abort.c new file mode 100644 index 0000000..8fe5aab --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/abort.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-shouldfail "" { *-*-* } { "*" } { "" } } */ + +extern void abort (); + +int +main (void) +{ + +#pragma acc parallel + { + abort (); + } + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c b/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c new file mode 100644 index 0000000..4190c8c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c @@ -0,0 +1,63 @@ +/* Disable the acc_on_device builtin; we want to test the libgomp library + function. */ +/* { dg-additional-options "-fno-builtin-acc_on_device" } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char *argv[]) +{ + /* Host. */ + + { + if (!acc_on_device (acc_device_none)) + abort (); + if (!acc_on_device (acc_device_host)) + abort (); + if (acc_on_device (acc_device_not_host)) + abort (); + if (acc_on_device (acc_device_nvidia)) + abort (); + } + + + /* Host via offloading fallback mode. */ + +#pragma acc parallel if(0) + { + if (!acc_on_device (acc_device_none)) + abort (); + if (!acc_on_device (acc_device_host)) + abort (); + if (acc_on_device (acc_device_not_host)) + abort (); + if (acc_on_device (acc_device_nvidia)) + abort (); + } + + +#if !ACC_DEVICE_TYPE_host + + /* Offloaded. */ + +#pragma acc parallel + { + if (acc_on_device (acc_device_none)) + abort (); + if (acc_on_device (acc_device_host)) + abort (); + if (!acc_on_device (acc_device_not_host)) + abort (); +#if ACC_DEVICE_TYPE_nvidia + if (!acc_on_device (acc_device_nvidia)) + abort (); +#else + if (acc_on_device (acc_device_nvidia)) + abort (); +#endif + } +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c/asyncwait-1.c new file mode 100644 index 0000000..a7548fb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/asyncwait-1.c @@ -0,0 +1,295 @@ +/* { dg-do compile } */ + +#include <openacc.h> +#include <stdlib.h> + +int +main (int argc, char **argv) +{ + int N = 64; + float *a, *b; + int i; + + acc_init (acc_device_nvidia); + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1 2) /* { dg-error "error: expected '\\)' before numeric constant" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,) /* { dg-error "error: expected '\\)' before ',' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (,1) /* { dg-error "error: expected '\\)' before ',' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2,) /* { dg-error "error: expected '\\)' before ',' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2 3) /* { dg-error "error: expected '\\)' before ',' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2,,) /* { dg-error "error: expected '\\)' before ',' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1 /* { dg-error "error: expected '\\)' before end of line" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (*) /* { dg-error "error: expected '\\)' before '\\*' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (a) /* { dg-error "error: expected '\\)' before 'a'" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (N) /* { dg-error "error: expected '\\)' before 'N'" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async () + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1 2) /* { dg-error "error: expected ',' before numeric constant" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,) /* { dg-error "error: expected integer expression before '\\)' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (,1) /* { dg-error "error: expected integer expression before ',' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2,) /* { dg-error "error: expected integer expression before '\\)' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2 3) /* { dg-error "error: expected ',' before numeric constant" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2,,) /* { dg-error "error: expected integer expression before ',' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1 /* { dg-error "error: expected ',' before end of line" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,*) /* { dg-error "error: expected integer expression before '\\*' token" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,a) /* { dg-error "error: expected integer expression before 'a'" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (a) /* { dg-error "error: expected integer expression before 'a'" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (N) /* { dg-error "error: expected integer expression before 'N'" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */ + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait () + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait (1 2) /* { dg-error "error: expected ',' before numeric constant" } */ + +#pragma acc wait (1,) /* { dg-error "error: expected integer expression before '\\)' token" } */ + +#pragma acc wait (,1) /* { dg-error "error: expected integer expression before ',' token" } */ + +#pragma acc wait (1,2,) /* { dg-error "error: expected integer expression before '\\)' token" } */ + +#pragma acc wait (1,2 3) /* { dg-error "error: expected ',' before numeric constant" } */ + +#pragma acc wait (1,2,,) /* { dg-error "error: expected integer expression before ',' token" } */ + +#pragma acc wait (1 /* { dg-error "error: expected ',' before end of line" } */ + +#pragma acc wait (1,*) /* { dg-error "error: expected integer expression before '\\*' token" } */ + +#pragma acc wait (1,a) /* { dg-error "error: expected integer expression before 'a'" } */ + +#pragma acc wait (a) /* { dg-error "error: expected integer expression before 'a'" } */ + +#pragma acc wait (N) /* { dg-error "error: expected integer expression before 'N'" } */ + +#pragma acc wait (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */ + +#pragma acc wait 1 /* { dg-error "error: expected clause before numeric constant" } */ + +#pragma acc wait N /* { dg-error "error: expected clause before 'N'" } */ + +#pragma acc wait async (1 2) /* { dg-error "error: expected '\\)' before numeric constant" } */ + +#pragma acc wait async (1 2) /* { dg-error "error: expected '\\)' before numeric constant" } */ + +#pragma acc wait async (1,) /* { dg-error "error: expected '\\)' before ',' token" } */ + +#pragma acc wait async (,1) /* { dg-error "error: expected '\\)' before ',' token" } */ + +#pragma acc wait async (1,2,) /* { dg-error "error: expected '\\)' before ',' token" } */ + +#pragma acc wait async (1,2 3) /* { dg-error "error: expected '\\)' before ',' token" } */ + +#pragma acc wait async (1,2,,) /* { dg-error "error: expected '\\)' before ',' token" } */ + +#pragma acc wait async (1 /* { dg-error "error: expected '\\)' before end of line" } */ + +#pragma acc wait async (*) /* { dg-error "error: expected '\\)' before '\\*' token" } */ + +#pragma acc wait async (a) /* { dg-error "error: expected '\\)' before 'a'" } */ + +#pragma acc wait async (N) /* { dg-error "error: expected '\\)' before 'N'" } */ + +#pragma acc wait async (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */ + + acc_shutdown (acc_device_nvidia); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/asyncwait-2.c b/libgomp/testsuite/libgomp.oacc-c/asyncwait-2.c new file mode 100755 index 0000000..22cef6d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/asyncwait-2.c @@ -0,0 +1,466 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <openacc.h> +#include <stdlib.h> +#include "cuda.h" + +#include <stdio.h> +#include <sys/time.h> + +int +main (int argc, char **argv) +{ + CUresult r; + CUstream stream1; + int N = 128; //1024 * 1024; + float *a, *b, *c, *d, *e; + int i; + int nbytes; + + acc_init (acc_device_nvidia); + + nbytes = N * sizeof (float); + + a = (float *) malloc (nbytes); + b = (float *) malloc (nbytes); + c = (float *) malloc (nbytes); + d = (float *) malloc (nbytes); + e = (float *) malloc (nbytes); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N) + { + +#pragma acc parallel async + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 9.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc parallel wait (1) async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 4.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + + if (e[i] != 11.0) + abort (); + } + + + r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (1, stream1); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 0.0; + } + +#pragma acc data copy (a[0:N], b[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 7.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 7.0) + abort (); + + if (b[i] != 49.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc parallel wait (1) async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 9.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + + if (e[i] != 17.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 16.0) + abort (); + + if (c[i] != 4.0) + abort (); + } + + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1) + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 25.0) + abort (); + + if (c[i] != 4.0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/c.exp b/libgomp/testsuite/libgomp.oacc-c/c.exp new file mode 100644 index 0000000..05a554a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/c.exp @@ -0,0 +1,84 @@ +# This whole file adapted from libgomp.c/c.exp. + +if [info exists lang_library_path] then { + unset lang_library_path + unset lang_link_flags +} +if [info exists lang_test_file] then { + unset lang_test_file +} +if [info exists lang_include_flags] then { + unset lang_include_flags +} + +load_lib libgomp-dg.exp +load_gcc_lib gcc-dg.exp + +# If a testcase doesn't have special options, use these. +if ![info exists DEFAULT_CFLAGS] then { + set DEFAULT_CFLAGS "-O2" +} + +# Initialize dg. +dg-init + +# Turn on OpenACC. +# XXX (TEMPORARY): Remove the -flto once that's properly integrated. +lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto" + +# Gather a list of all tests. +set tests [lsort [find $srcdir/$subdir *.c]] + +set ld_library_path $always_ld_library_path +append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] +append ld_library_path ":/opt/nvidia/cuda-5.5/lib64" +set_ld_library_path_env_vars + +# Todo: get list of accelerators from configure options --enable-accelerator. +set accels { "nonshm-host" "nvidia" } + +# Run on host (or fallback) accelerator. +lappend accels "host" + +# Test OpenACC with available accelerators. +set SAVE_ALWAYS_CFLAGS "$ALWAYS_CFLAGS" +foreach accel $accels { + set ALWAYS_CFLAGS "$SAVE_ALWAYS_CFLAGS" + set tagopt "-DACC_DEVICE_TYPE_$accel=1" + + # Todo: Determine shared memory or not using run-time test. + switch $accel { + host { + set acc_mem_shared 1 + } + nonshm-host { + set tagopt "-DACC_DEVICE_TYPE_nonshm_host=1" + set acc_mem_shared 0 + } + nvidia { + # Copy ptx file (TEMPORARY) + remote_download host $srcdir/libgomp.oacc-c/subr.ptx + + # Where cuda.h lives + # Todo: get that from configure option --with-cuda-driver. + lappend ALWAYS_CFLAGS "additional_flags=-I/opt/nvidia/cuda-5.5/include" + lappend ALWAYS_CFLAGS "additional_flags=-L/opt/nvidia/cuda-5.5/lib64" + + # Where timer.h lives + lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}" + set acc_mem_shared 0 + } + default { + set acc_mem_shared 0 + } + } + set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared" + + # Todo: Verify that this works for both local and remote testing. + setenv ACC_DEVICE_TYPE $accel + + dg-runtest $tests "$tagopt" $DEFAULT_CFLAGS +} + +# All done. +dg-finish diff --git a/libgomp/testsuite/libgomp.oacc-c/cache-1.c b/libgomp/testsuite/libgomp.oacc-c/cache-1.c new file mode 100644 index 0000000..5473475 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/cache-1.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ + +#include <openacc.h> +#include <stdlib.h> + +int +main (int argc, char **argv) +{ +#define N 2 + int a[N], b[N]; + int i; + + for (i = 0; i < N; i++) + { + a[i] = 3; + b[i] = 0; + } + +#pragma acc parallel copyin (a[0:N]) copyout (b[0:N]) +{ + int ii; + + for (ii = 0; ii < N; ii++) + { + const int idx = ii; + int n = 1; + const int len = n; + +#pragma acc cache /* { dg-error "error: expected '\\(' before end of line" } */ + +#pragma acc cache (a) /* { dg-error "error: expected '\\\[' before '\\)' token" } */ + +#pragma acc cache (a[0:N]) copyin (a[0:N]) /* { dg-error "error: expected end of line before 'copyin'" } */ + +#pragma acc cache () /* { dg-error "error: expected identifier before '\\)' token" } */ + +#pragma acc cache (a[0:N] b[0:N}) /* { dg-error "error: expected '\\)' before 'b'" } */ + +#pragma acc cache (a[0:N] /* { dg-error "error: expected '\\)' before end of line" } */ + +#pragma acc cache (a[ii]) /* { dg-error "error: 'ii' is not a constant" } */ + +#pragma acc cache (a[idx:n]) /* { dg-error "error: 'n' is not a constant" } */ + +#pragma acc cache (a[0:N]) + +#pragma acc cache (a[0:N], b[0:N]) + +#pragma acc cache (a[0]) + +#pragma acc cache (a[0], a[1], b[0:N]) + +#pragma acc cache (a[idx]) + +#pragma acc cache (a[idx:len]) + + b[ii] = a[ii]; + } +} + + + for (i = 0; i < N; i++) + { + if (a[i] != b[i]) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/clauses-1.c b/libgomp/testsuite/libgomp.oacc-c/clauses-1.c new file mode 100644 index 0000000..8177574 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/clauses-1.c @@ -0,0 +1,623 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include <openacc.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> + +int +main(int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc(N * sizeof (float)); + b = (float *) malloc(N * sizeof (float)); + c = (float *) malloc(N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 3.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 5.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present_or_copyin(a[0:N]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + acc_free(d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc parallel copyin(a[0:N]) present_or_copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 2.0; + } + + d = acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc parallel copyin(a[0:N]) present_or_copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort(); + + if (b[i] != 2.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + acc_free (d); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 4.0; + } + +#pragma acc parallel copy(a[0:N]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = a[ii] + 2; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort(); + + if (b[i] != 6.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 7.0; + } + +#pragma acc parallel present_or_copy(a[0:N]) present_or_copy(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort(); + + if (b[i] != 9.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + + d = acc_copyin (&a[0], N * sizeof (float)); + d = acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc parallel present_or_copy(a[0:N]) present_or_copy(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 7.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + d = acc_deviceptr(&a[0]); + acc_unmap_data (&a[0]); + acc_free (d); + + d = acc_deviceptr(&b[0]); + acc_unmap_data (&b[0]); + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + +#pragma acc parallel copyin(a[0:N]) create(c[0:N]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 3.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + +#pragma acc parallel copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort(); + + if (b[i] != 4.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + + d = acc_malloc (N * sizeof (float)); + acc_map_data(c, d, N * sizeof (float)); + +#pragma acc parallel copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort(); + + if (b[i] != 2.0) + abort(); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (acc_is_present (b, (N * sizeof (float)))) + abort(); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort(); + + d = acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + + d = acc_malloc(N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc parallel copyin(a[0:N]) present(c[0:N]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort(); + + if (b[i] != 4.0) + abort(); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (acc_is_present (b, (N * sizeof (float)))) + abort(); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort(); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + + acc_copyin (a, N * sizeof (float)); + + d = acc_malloc(N * sizeof (float)); + acc_map_data (b, d, N * sizeof (float)); + + d = acc_malloc(N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc parallel present(a[0:N]) present(c[0:N]) present(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + if (!acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (!acc_is_present (b, (N * sizeof (float)))) + abort(); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort(); + + acc_copyout (b, N * sizeof (float)); + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort(); + + if (b[i] != 4.0) + abort(); + } + + d = acc_deviceptr(a); + + acc_unmap_data (a); + + acc_free (d); + + d = acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 6.0; + } + + d = acc_malloc(N * sizeof (float)); + +#pragma acc parallel copyin(a[0:N]) deviceptr(d) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + d[ii] = a[ii]; + b[ii] = d[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 3.0) + abort(); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (acc_is_present (b, (N * sizeof (float)))) + abort(); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel pcopyin(a[0:N]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + acc_free(d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc parallel copyin(a[0:N]) pcopyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 7.0; + } + +#pragma acc parallel copyin(a[0:N]) pcreate(c[0:N]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort(); + + if (b[i] != 5.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort(); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/clauses-2.c b/libgomp/testsuite/libgomp.oacc-c/clauses-2.c new file mode 100644 index 0000000..25c2165 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/clauses-2.c @@ -0,0 +1,67 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include <openacc.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> + +int +main(int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc(N * sizeof (float)); + b = (float *) malloc(N * sizeof (float)); + c = (float *) malloc(N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + + d = acc_malloc (N * sizeof (float)); + acc_map_data(c, d, N * sizeof (float)); + +#pragma acc parallel copyin(a[0:N]) present_or_create(c[0:N+1]) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort(); + + if (b[i] != 2.0) + abort(); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (acc_is_present (b, (N * sizeof (float)))) + abort(); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort(); + + d = acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + return 0; +} +/* { dg-shouldfail "libgomp: \[\h+,\d+\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/collapse-1.c b/libgomp/testsuite/libgomp.oacc-c/collapse-1.c new file mode 100644 index 0000000..80fed6c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/collapse-1.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> + +int +main (void) +{ + int i, j, k, l = 0; + int a[3][3][3]; + + memset (a, '\0', sizeof (a)); + #pragma acc parallel + #pragma acc loop collapse(4 - 1) + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + a[i][j][k] = i + j * 4 + k * 16; + #pragma acc parallel + { + #pragma acc loop collapse(2) reduction(|:l) + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + if (a[i][j][k] != i + j * 4 + k * 16) + l = 1; + } + if (l) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/collapse-2.c b/libgomp/testsuite/libgomp.oacc-c/collapse-2.c new file mode 100644 index 0000000..44a77f7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/collapse-2.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ + +#include <stdlib.h> + +int +main (void) +{ + int i, j, k, l = 0, f = 0, x = 0; + int m1 = 4, m2 = -5, m3 = 17; + + #pragma acc parallel + #pragma acc loop collapse(3) reduction(+:l) + for (i = -2; i < m1; i++) + for (j = m2; j < -2; j++) + { + for (k = 13; k < m3; k++) + { + if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) != 9 + f++) + l++; + } + } + + for (i = -2; i < m1; i++) + for (j = m2; j < -2; j++) + { + for (k = 13; k < m3; k++) + { + if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) != 9 + f++) + x++; + } + } + + if (l != x) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/collapse-3.c b/libgomp/testsuite/libgomp.oacc-c/collapse-3.c new file mode 100644 index 0000000..dcb3b20 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/collapse-3.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -std=gnu99" } */ + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> + +int +main (void) +{ + int i2, l = 0, r = 0; + int a[3][3][3]; + + memset (a, '\0', sizeof (a)); + #pragma acc parallel + #pragma acc loop collapse(4 - 1) + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + a[i][j][k] = i + j * 4 + k * 16; +#pragma acc parallel + { + #pragma acc loop collapse(2) reduction(|:l) + for (i2 = 0; i2 < 2; i2++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + if (a[i2][j][k] != i2 + j * 4 + k * 16) + l += 1; + } + + for (i2 = 0; i2 < 2; i2++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + if (a[i2][j][k] != i2 + j * 4 + k * 16) + r += 1; + + if (l != r) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/context-1.c b/libgomp/testsuite/libgomp.oacc-c/context-1.c new file mode 100644 index 0000000..fd64570 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/context-1.c @@ -0,0 +1,213 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <cuda.h> +#include <cuda_runtime_api.h> +#include <cublas_v2.h> +#include <openacc.h> + +void +saxpy(int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check(CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent(&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf(stderr, "new context established\n"); + exit(EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context(); + + if (ctx1 != ctx3) + { + fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit(EXIT_FAILURE); + } + + return; +} + +int +main(int argc, char **argv) +{ + cublasStatus_t s; + cudaError_t e; + cublasHandle_t h; + CUcontext pctx, ctx; + CUresult r; + int dev; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 1 - cuBLAS creates, OpenACC shares. */ + + s = cublasCreate(&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasCreate failed: %d\n", s); + exit(EXIT_FAILURE); + } + + r = cuCtxGetCurrent(&pctx); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + e = cudaGetDevice(&dev); + if (e != cudaSuccess) + { + fprintf(stderr, "cudaGetDevice failed: %d\n", e); + exit(EXIT_FAILURE); + } + + acc_set_device_num(dev, acc_device_nvidia); + + h_X = (float *)malloc(N * sizeof (float)); + if (!h_X) + { + fprintf(stderr, "malloc failed: for h_X\n"); + exit(EXIT_FAILURE); + } + + h_Y1 = (float *)malloc(N * sizeof (float)); + if (!h_Y1) + { + fprintf(stderr, "malloc failed: for h_Y1\n"); + exit(EXIT_FAILURE); + } + + h_Y2 = (float *)malloc(N * sizeof (float)); + if (!h_Y2) + { + fprintf(stderr, "malloc failed: for h_Y2\n"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand() / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX; + } + + d_X = acc_copyin(&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf(stderr, "copyin error h_X\n"); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + d_Y = acc_copyin(&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf(stderr, "copyin error h_Y1\n"); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasSaxpy failed: %d\n", s); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float)); + + context_check(pctx); + + saxpy(N, alpha, h_X, h_Y2); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float)sqrt((double)error_norm); + ref_norm = (float)sqrt((double)ref_norm); + + if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf(stderr, "math error\n"); + exit(EXIT_FAILURE); + } + + free(h_X); + free(h_Y1); + free(h_Y2); + + acc_free(d_X); + acc_free(d_Y); + + context_check(pctx); + + s = cublasDestroy(h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasDestroy failed: %d\n", s); + exit(EXIT_FAILURE); + } + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent(&ctx); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + if (!ctx) + { + fprintf(stderr, "Expected context\n"); + exit(EXIT_FAILURE); + } + + if (pctx != ctx) + { + fprintf(stderr, "Unexpected new context\n"); + exit(EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/context-2.c b/libgomp/testsuite/libgomp.oacc-c/context-2.c new file mode 100644 index 0000000..03e7c1b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/context-2.c @@ -0,0 +1,223 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <cuda.h> +#include <cuda_runtime_api.h> +#include <cublas_v2.h> +#include <openacc.h> + +void +saxpy(int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check(CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent(&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf(stderr, "new context established\n"); + exit(EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context(); + + if (ctx1 != ctx3) + { + fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit(EXIT_FAILURE); + } + + return; +} + +int +main(int argc, char **argv) +{ + cublasStatus_t s; + cudaError_t e; + cublasHandle_t h; + CUcontext pctx, ctx; + CUresult r; + int dev; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 2 - cuBLAS creates, OpenACC shares. */ + + s = cublasCreate(&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasCreate failed: %d\n", s); + exit(EXIT_FAILURE); + } + + r = cuCtxGetCurrent(&pctx); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + e = cudaGetDevice(&dev); + if (e != cudaSuccess) + { + fprintf(stderr, "cudaGetDevice failed: %d\n", e); + exit(EXIT_FAILURE); + } + + acc_set_device_num(dev, acc_device_nvidia); + + h_X = (float *)malloc(N * sizeof (float)); + if (h_X == 0) + { + fprintf(stderr, "malloc failed: for h_X\n"); + exit(EXIT_FAILURE); + } + + h_Y1 = (float *)malloc(N * sizeof (float)); + if (h_Y1 == 0) + { + fprintf(stderr, "malloc failed: for h_Y1\n"); + exit(EXIT_FAILURE); + } + + h_Y2 = (float *)malloc(N * sizeof (float)); + if (h_Y2 == 0) + { + fprintf(stderr, "malloc failed: for h_Y2\n"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand() / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX; + } + + d_X = acc_copyin(&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf(stderr, "copyin error h_X\n"); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + d_Y = acc_copyin(&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf(stderr, "copyin error h_Y1\n"); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasSaxpy failed: %d\n", s); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float)); + + context_check(pctx); + +#pragma acc parallel copyin(h_X[0:N]), copy(h_Y2[0:N]) copyin(alpha) + { + int i; + + for (i = 0; i < N; i++) + { + h_Y2[i] = alpha * h_X[i] + h_Y2[i]; + } + } + + context_check(pctx); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float)sqrt((double)error_norm); + ref_norm = (float)sqrt((double)ref_norm); + + if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf(stderr, "math error\n"); + exit(EXIT_FAILURE); + } + + free(h_X); + free(h_Y1); + free(h_Y2); + + acc_free(d_X); + acc_free(d_Y); + + context_check(pctx); + + s = cublasDestroy(h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasDestroy failed: %d\n", s); + exit(EXIT_FAILURE); + } + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent(&ctx); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + if (!ctx) + { + fprintf(stderr, "Expected context\n"); + exit(EXIT_FAILURE); + } + + if (pctx != ctx) + { + fprintf(stderr, "Unexpected new context\n"); + exit(EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/context-3.c b/libgomp/testsuite/libgomp.oacc-c/context-3.c new file mode 100644 index 0000000..85c7e7b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/context-3.c @@ -0,0 +1,200 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <cuda.h> +#include <cuda_runtime_api.h> +#include <cublas_v2.h> +#include <openacc.h> + +void +saxpy(int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check(CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent(&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf(stderr, "new context established\n"); + exit(EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context(); + + if (ctx1 != ctx3) + { + fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit(EXIT_FAILURE); + } + + return; +} + +int +main(int argc, char **argv) +{ + cublasStatus_t s; + cublasHandle_t h; + CUcontext pctx; + CUresult r; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 3 - OpenACC creates, cuBLAS shares. */ + + acc_set_device_num(0, acc_device_nvidia); + + r = cuCtxGetCurrent(&pctx); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + h_X = (float *)malloc(N * sizeof (float)); + if (h_X == 0) + { + fprintf(stderr, "malloc failed: for h_X\n"); + exit(EXIT_FAILURE); + } + + h_Y1 = (float *)malloc(N * sizeof (float)); + if (h_Y1 == 0) + { + fprintf(stderr, "malloc failed: for h_Y1\n"); + exit(EXIT_FAILURE); + } + + h_Y2 = (float *)malloc(N * sizeof (float)); + if (h_Y2 == 0) + { + fprintf(stderr, "malloc failed: for h_Y2\n"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand() / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX; + } + + d_X = acc_copyin(&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf(stderr, "copyin error h_X\n"); + exit(EXIT_FAILURE); + } + + d_Y = acc_copyin(&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf(stderr, "copyin error h_Y1\n"); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + s = cublasCreate(&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasCreate failed: %d\n", s); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasSaxpy failed: %d\n", s); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float)); + + context_check(pctx); + + saxpy(N, alpha, h_X, h_Y2); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float)sqrt((double)error_norm); + ref_norm = (float)sqrt((double)ref_norm); + + if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf(stderr, "math error\n"); + exit(EXIT_FAILURE); + } + + free(h_X); + free(h_Y1); + free(h_Y2); + + acc_free(d_X); + acc_free(d_Y); + + context_check(pctx); + + s = cublasDestroy(h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasDestroy failed: %d\n", s); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent(&pctx); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + if (pctx) + { + fprintf(stderr, "Unexpected context\n"); + exit(EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/context-4.c b/libgomp/testsuite/libgomp.oacc-c/context-4.c new file mode 100644 index 0000000..7551cf2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/context-4.c @@ -0,0 +1,213 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <cuda.h> +#include <cuda_runtime_api.h> +#include <cublas_v2.h> +#include <openacc.h> + +void +saxpy(int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check(CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent(&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf(stderr, "new context established\n"); + exit(EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context(); + + if (ctx1 != ctx3) + { + fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit(EXIT_FAILURE); + } + + return; +} + +int +main(int argc, char **argv) +{ + cublasStatus_t s; + cublasHandle_t h; + CUcontext pctx; + CUresult r; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 4 - OpenACC creates, cuBLAS shares. */ + + acc_set_device_num(0, acc_device_nvidia); + + r = cuCtxGetCurrent(&pctx); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + h_X = (float *)malloc(N * sizeof (float)); + if (h_X == 0) + { + fprintf(stderr, "malloc failed: for h_X\n"); + exit(EXIT_FAILURE); + } + + h_Y1 = (float *)malloc(N * sizeof (float)); + if (h_Y1 == 0) + { + fprintf(stderr, "malloc failed: for h_Y1\n"); + exit(EXIT_FAILURE); + } + + h_Y2 = (float *)malloc(N * sizeof (float)); + if (h_Y2 == 0) + { + fprintf(stderr, "malloc failed: for h_Y2\n"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand() / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX; + } + +#pragma acc parallel copyin(h_X[0:N]), copy(h_Y2[0:N]) copy(alpha) + { + int i; + + for (i = 0; i < N; i++) + { + h_Y2[i] = alpha * h_X[i] + h_Y2[i]; + } + } + + r = cuCtxGetCurrent(&pctx); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + d_X = acc_copyin(&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf(stderr, "copyin error h_Y1\n"); + exit(EXIT_FAILURE); + } + + d_Y = acc_copyin(&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf(stderr, "copyin error h_Y1\n"); + exit(EXIT_FAILURE); + } + + s = cublasCreate(&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasCreate failed: %d\n", s); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasSaxpy failed: %d\n", s); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float)); + + context_check(pctx); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float)sqrt((double)error_norm); + ref_norm = (float)sqrt((double)ref_norm); + + if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf(stderr, "math error\n"); + exit(EXIT_FAILURE); + } + + free(h_X); + free(h_Y1); + free(h_Y2); + + acc_free(d_X); + acc_free(d_Y); + + context_check(pctx); + + s = cublasDestroy(h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf(stderr, "cublasDestroy failed: %d\n", s); + exit(EXIT_FAILURE); + } + + context_check(pctx); + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent(&pctx); + if (r != CUDA_SUCCESS) + { + fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r); + exit(EXIT_FAILURE); + } + + if (pctx) + { + fprintf(stderr, "Unexpected context\n"); + exit(EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/data-1.c b/libgomp/testsuite/libgomp.oacc-c/data-1.c new file mode 100644 index 0000000..e7564cc --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/data-1.c @@ -0,0 +1,188 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int i; + +int +is_mapped (void *p, size_t n) +{ +#if ACC_MEM_SHARED + return 1; +#else + return acc_is_present (p, n); +#endif +} + +int main(void) +{ + int j; + + i = -1; + j = -2; +#pragma acc data copyin (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + if (i != 2 || j != 1) + abort (); + + i = -1; + j = -2; +#pragma acc data copyout (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + +#pragma acc parallel present (i, j) + { + i = 4; + j = 2; + } + } + if (i != 4 || j != 2) + abort (); + + i = -1; + j = -2; +#pragma acc data create (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + if (i != 2 || j != 1) + abort (); + + i = -1; + j = -2; +#pragma acc data present_or_copyin (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + if (i != 2 || j != 1) + abort (); + + i = -1; + j = -2; +#pragma acc data present_or_copyout (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + +#pragma acc parallel present (i, j) + { + i = 4; + j = 2; + } + } + if (i != 4 || j != 2) + abort (); + + i = -1; + j = -2; +#pragma acc data present_or_copy (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } +#if ACC_MEM_SHARED + if (i != 2 || j != 1) + abort (); +#else + if (i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; +#pragma acc data present_or_create (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + + if (i != 2 || j != 1) + abort (); + + i = -1; + j = -2; +#pragma acc data copyin (i, j) + { +#pragma acc data present (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + } + if (i != 2 || j != 1) + abort (); + + i = -1; + j = -2; +#pragma acc data + { +#if !ACC_MEM_SHARED + if (is_mapped (&i, sizeof (i)) || is_mapped (&j, sizeof (j))) + abort (); +#endif + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + if (i != 2 || j != 1) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c b/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c new file mode 100644 index 0000000..41f92d4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ + +extern void abort (); + +int main(void) +{ + void *a, *a_1, *a_2; + +#define A (void *) 0x123 + a = A; + +#pragma acc data copyout(a_1, a_2) +#pragma acc kernels deviceptr(a) + { + a_1 = a; + a_2 = &a; + } + + if (a != A) + abort (); + if (a_1 != a) + abort (); +#if ACC_MEM_SHARED + if (a_2 != &a) + abort (); +#else + if (a_2 == &a) + abort (); +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c b/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c new file mode 100644 index 0000000..683fefa --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_DEVICE_TYPE_host=1" } } */ + +#include "libgomp_g.h" + +extern void abort (); + +volatile int i; + +void +f (void *data) +{ + if (i != -1) + abort (); + i = 42; +} + +int main(void) +{ + i = -1; + GOACC_kernels (0, f, (const void *) 0, + 0, (void *) 0, (void *) 0, (void *) 0, + 1, 1, 1, -2, -1); + if (i != 42) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c b/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c new file mode 100644 index 0000000..232ce8a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_DEVICE_TYPE_host=1" } } */ + +#include "libgomp_g.h" + +extern void abort (); + +volatile int i; + +void +f (void *data) +{ + if (i != -1) + abort (); + i = 42; +} + +int main(void) +{ + i = -1; + GOACC_parallel (0, f, (const void *) 0, + 0, (void *) 0, (void *) 0, (void *) 0, + 1, 1, 1, -2, -1); + if (i != 42) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/if-1.c b/libgomp/testsuite/libgomp.oacc-c/if-1.c new file mode 100644 index 0000000..ef2d5fc --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/if-1.c @@ -0,0 +1,537 @@ +/* { dg-do run } */ + +#include <openacc.h> +#include <stdlib.h> +#include <stdbool.h> + +#define N 32 + +int +main(int argc, char **argv) +{ + float *a, *b, *d_a, *d_b, exp; + int i; + const int one = 1; + const int zero = 0; + int n; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + d_a = (float *) acc_malloc (N * sizeof (float)); + d_b = (float *) acc_malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + a[i] = 4.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(1) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 5.0; +#else + exp = 4.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 16.0; + +#pragma acc parallel if(0) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 17.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 8.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 9.0; +#else + exp = 8.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 22.0; + +#pragma acc parallel if(zero) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 23.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 16.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(true) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 17.0; +#else + exp = 16.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 76.0; + +#pragma acc parallel if(false) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 77.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 22.0; + + n = 1; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 23.0; +#else + exp = 22.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 18.0; + + n = 0; + +#pragma acc parallel if(n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 19.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 49.0; + + n = 1; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n + n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 50.0; +#else + exp = 49.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 38.0; + + n = 0; + +#pragma acc parallel if(n + n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 39.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 91.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(-2) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 92.0; +#else + exp = 91.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 43.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one == 1) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 44.0; +#else + exp = 43.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 87.0; + +#pragma acc parallel if(one == 0) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 88.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 9.0; + } + + acc_map_data (a, d_a, N * sizeof (float)); + acc_map_data (b, d_b, N * sizeof (float)); + +#pragma acc update device(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + a[i] = 0.0; + b[i] = 0.0; + } + +#pragma acc update host(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 9.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 12.0; + } + +#pragma acc update device(a[0:N], b[0:N]) if(0) + + for (i = 0; i < N; i++) + { + a[i] = 0.0; + b[i] = 0.0; + } + +#pragma acc update host(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 9.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 26.0; + b[i] = 21.0; + } + +#pragma acc update device(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + a[i] = 0.0; + b[i] = 0.0; + } + +#pragma acc update host(a[0:N], b[0:N]) if(0) + + for (i = 0; i < N; i++) + { + if (a[i] != 0.0) + abort(); + + if (b[i] != 0.0) + abort(); + } + + acc_unmap_data (a); + acc_unmap_data (b); + + acc_free (d_a); + acc_free (d_b); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 0.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(1) +{ +#pragma acc parallel present(a[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + b[ii] = a[ii]; + } + } +} + + for (i = 0; i < N; i++) + { + if (b[i] != 4.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 8.0; + b[i] = 1.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(0) +{ +#if !ACC_MEM_SHARED + if (acc_is_present (a, N * sizeof (float))) + abort (); +#endif + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif +} + + for (i = 0; i < N; i++) + { + a[i] = 18.0; + b[i] = 21.0; + } + +#pragma acc data copyin(a[0:N]) if(1) +{ +#if !ACC_MEM_SHARED + if (!acc_is_present (a, N * sizeof (float))) + abort (); +#endif + +#pragma acc data copyout(b[0:N]) if(0) + { +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc data copyout(b[0:N]) if(1) + { +#pragma acc parallel present(a[0:N]) present(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + b[ii] = a[ii]; + } + } + } + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + } +} + + for (i = 0; i < N; i++) + { + if (b[i] != 18.0) + abort (); + } + +#ifdef XXX_TODO_ENTER_END_DATA +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/kernels-1.c b/libgomp/testsuite/libgomp.oacc-c/kernels-1.c new file mode 100644 index 0000000..64fea61 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/kernels-1.c @@ -0,0 +1,184 @@ +/* { dg-do run } */ + +extern void abort (); + +int i; + +int main(void) +{ + int j, v; + +#if 0 + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) copyin (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != -1 || j != -2) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) copyout (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) copy (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) create (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyin (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (i != 2 || j != 1) + abort (); +#else + if (i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyout (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copy (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_create (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (i != 2 || j != 1) + abort (); +#else + if (i != -1 || j != -2) + abort (); +#endif + +#if 0 + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) present (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); +#endif + +#if 0 + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-1.c b/libgomp/testsuite/libgomp.oacc-c/lib-1.c new file mode 100644 index 0000000..17129d8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-1.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ + +#include <openacc.h> + +int +main (int argc, char **argv) +{ + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; + + if (acc_get_num_devices (devtype) == 0) + return 0; +#endif + + acc_init (devtype); + + acc_init (devtype); + + return 0; +} + +/* { dg-shouldfail "libgomp: device already active" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-10.c b/libgomp/testsuite/libgomp.oacc-c/lib-10.c new file mode 100644 index 0000000..cf1af8c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-10.c @@ -0,0 +1,58 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + void *d; + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; +#endif + + acc_init (devtype); + + d = acc_malloc (0); + if (d != NULL) + abort (); + + acc_free (0); + + acc_shutdown (devtype); + + acc_set_device_type (devtype); + + d = acc_malloc (0); + if (d != NULL) + abort (); + + acc_shutdown (devtype); + + acc_init (devtype); + + d = acc_malloc (1024); + if (d == NULL) + abort (); + + acc_free (d); + + acc_shutdown (devtype); + + acc_set_device_type (devtype); + + d = acc_malloc (1024); + if (d == NULL) + abort (); + + acc_free (d); + + acc_shutdown (devtype); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-11.c b/libgomp/testsuite/libgomp.oacc-c/lib-11.c new file mode 100644 index 0000000..fdb53a8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-11.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 512; + void *d; + + d = acc_malloc (N); + if (d == NULL) + abort (); + + acc_free (d + (N >> 1)); + + return 0; +} + +/* { dg-shouldfail "libgomp: mem free failed 1" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-12.c b/libgomp/testsuite/libgomp.oacc-c/lib-12.c new file mode 100644 index 0000000..1c3a127 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-12.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + memset (h, 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-13.c b/libgomp/testsuite/libgomp.oacc-c/lib-13.c new file mode 100644 index 0000000..7098ef3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-13.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +#include <stdio.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + + if (acc_is_present (h, 1) != 1) + abort (); + + if (acc_is_present (h, N + 1) != 0) + abort (); + + if (acc_is_present (h + 1, N) != 0) + abort (); + + if (acc_is_present (h - 1, N) != 0) + abort (); + + if (acc_is_present (h - 1, N - 1) != 0) + abort (); + + if (acc_is_present (h + N, 0) != 0) + abort (); + + if (acc_is_present (h + N, N) != 0) + abort (); + + if (acc_is_present (0, N) != 0) + abort (); + + if (acc_is_present (h, 0) != 0) + abort (); + + acc_free (d); + + if (acc_is_present (h, 1) != 0) + abort (); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-14.c b/libgomp/testsuite/libgomp.oacc-c/lib-14.c new file mode 100644 index 0000000..a9632f7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-14.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +#include <stdio.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + + if (acc_is_present (h, 1) != 1) + abort (); + + if (acc_is_present (h + N - 1, 1) != 1) + abort (); + + if (acc_is_present (h - 1, 1) != 0) + abort (); + + if (acc_is_present (h + N, 1) != 0) + abort (); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 1) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, N - i) != 1) + abort (); + } + + acc_free (d); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, N - i) != 0) + abort (); + } + + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-15.c b/libgomp/testsuite/libgomp.oacc-c/lib-15.c new file mode 100644 index 0000000..4f6a731 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-15.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-16.c b/libgomp/testsuite/libgomp.oacc-c/lib-16.c new file mode 100644 index 0000000..9d277ac --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-16.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + (void) acc_copyin (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+256\] already mapped to \[\h+,\+256\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-17.c b/libgomp/testsuite/libgomp.oacc-c/lib-17.c new file mode 100644 index 0000000..5ff894c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-17.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, N); + + acc_copyout (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-18.c b/libgomp/testsuite/libgomp.oacc-c/lib-18.c new file mode 100644 index 0000000..2bc3263 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-18.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +#include <stdio.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + + acc_free (d); + + acc_copyout (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-19.c b/libgomp/testsuite/libgomp.oacc-c/lib-19.c new file mode 100644 index 0000000..57b1221 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-19.c @@ -0,0 +1,59 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +#include <stdio.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h[N]; + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + h[i] = (unsigned char *) malloc (N); + p = h[i]; + + for (j = 0; j < N; j++) + { + p[j] = i; + } + + (void) acc_copyin (p, N); + } + + for (i = 0; i < N; i++) + { + memset (h[i], 0, i); + } + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + acc_copyout (h[i], N); + + p = h[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + } + + for (i = 0; i < N; i++) + { + free (h[i]); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-2.c b/libgomp/testsuite/libgomp.oacc-c/lib-2.c new file mode 100644 index 0000000..9a4501f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-2.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <openacc.h> + +int +main (int argc, char **argv) +{ + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; +#endif + + acc_init (devtype); + + acc_shutdown (devtype); + + acc_shutdown (devtype); + + return 0; +} + +/* { dg-shouldfail "libgomp: no device initialized" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-20.c b/libgomp/testsuite/libgomp.oacc-c/lib-20.c new file mode 100644 index 0000000..b379a8f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-20.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, N + 1); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surounds2 \[\h+,\+257\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-21.c b/libgomp/testsuite/libgomp.oacc-c/lib-21.c new file mode 100644 index 0000000..3a67400 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-21.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, 0); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-22.c b/libgomp/testsuite/libgomp.oacc-c/lib-22.c new file mode 100644 index 0000000..2b86da8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-22.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h + 1, N - 1); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+255\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-23.c b/libgomp/testsuite/libgomp.oacc-c/lib-23.c new file mode 100644 index 0000000..38f236d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-23.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h1, *h2; + + h1 = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h1[i] = 0xab; + } + + (void) acc_copyin (h1, N); + + h2 = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h2[i] = 0xde; + } + + (void) acc_copyin (h2, N); + + acc_copyout (h1, N + N); + + free (h1); + free (h2); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+512\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-24.c b/libgomp/testsuite/libgomp.oacc-c/lib-24.c new file mode 100644 index 0000000..d7de8e3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-24.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 1) + abort (); + } + + acc_delete (h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + d = acc_create (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 1) + abort (); + } + + acc_delete (h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-25.c b/libgomp/testsuite/libgomp.oacc-c/lib-25.c new file mode 100644 index 0000000..1145828 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-25.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] already mapped to \[\h+,256\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-26.c b/libgomp/testsuite/libgomp.oacc-c/lib-26.c new file mode 100644 index 0000000..a23f56e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-26.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, 0); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-27.c b/libgomp/testsuite/libgomp.oacc-c/lib-27.c new file mode 100644 index 0000000..074fddb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-27.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (0, N); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\)\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-28.c b/libgomp/testsuite/libgomp.oacc-c/lib-28.c new file mode 100644 index 0000000..027f7cc --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-28.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (0, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-29.c b/libgomp/testsuite/libgomp.oacc-c/lib-29.c new file mode 100644 index 0000000..a66de0f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-29.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (h, 0); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-3.c b/libgomp/testsuite/libgomp.oacc-c/lib-3.c new file mode 100644 index 0000000..e823a41 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-3.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ + +#include <openacc.h> + +int +main (int argc, char **argv) +{ + acc_init (acc_device_host); + + acc_shutdown (acc_device_not_host); + + return 0; +} + +/* { dg-shouldfail "libgomp: device 4(4) is initialized" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-30.c b/libgomp/testsuite/libgomp.oacc-c/lib-30.c new file mode 100644 index 0000000..ce2bdb4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-30.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (h, N - 2); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+254\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-31.c b/libgomp/testsuite/libgomp.oacc-c/lib-31.c new file mode 100644 index 0000000..25ce5a9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-31.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_present_or_create (h, N); + if (!d) + abort (); + + if (acc_is_present (h, 1) != 1) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-32.c b/libgomp/testsuite/libgomp.oacc-c/lib-32.c new file mode 100755 index 0000000..e3f87a8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-32.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + d1 = acc_present_or_create (h, N); + if (!d1) + abort (); + + d2 = acc_present_or_create (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + d2 = acc_pcreate (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-33.c b/libgomp/testsuite/libgomp.oacc-c/lib-33.c new file mode 100755 index 0000000..4abaa02 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-33.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + d1 = acc_present_or_create (h, N); + if (!d1) + abort (); + + d2 = acc_present_or_create (h, N - 2); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-34.c b/libgomp/testsuite/libgomp.oacc-c/lib-34.c new file mode 100755 index 0000000..32d5d51 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-34.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + d1 = acc_present_or_create (h, N); + if (!d1) + abort (); + + d2 = acc_present_or_create (h + 2, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+256\] not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-35.c b/libgomp/testsuite/libgomp.oacc-c/lib-35.c new file mode 100755 index 0000000..ca8edab --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-35.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_present_or_create (0, N); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-36.c b/libgomp/testsuite/libgomp.oacc-c/lib-36.c new file mode 100755 index 0000000..cb29397 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-36.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_present_or_create (h, 0); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-37.c b/libgomp/testsuite/libgomp.oacc-c/lib-37.c new file mode 100644 index 0000000..20bee1b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-37.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_present_or_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-38.c b/libgomp/testsuite/libgomp.oacc-c/lib-38.c new file mode 100644 index 0000000..1211272 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-38.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d1 = acc_present_or_copyin (h, N); + if (!d1) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + d2 = acc_present_or_copyin (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + d2 = acc_pcopyin (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-39.c b/libgomp/testsuite/libgomp.oacc-c/lib-39.c new file mode 100644 index 0000000..db1e0b3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-39.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_present_or_copyin (0, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-4.c b/libgomp/testsuite/libgomp.oacc-c/lib-4.c new file mode 100644 index 0000000..84d2ace --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-4.c @@ -0,0 +1,13 @@ +/* { dg-do run } */ + +#include <openacc.h> + +int +main (int argc, char **argv) +{ + acc_init (99); + + return 0; +} + +/* { dg-shouldfail "libgomp: device 99 is out of range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-40.c b/libgomp/testsuite/libgomp.oacc-c/lib-40.c new file mode 100644 index 0000000..cb6c422 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-40.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_present_or_copyin (h, 0); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-41.c b/libgomp/testsuite/libgomp.oacc-c/lib-41.c new file mode 100644 index 0000000..01c5f3c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-41.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (h, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-42.c b/libgomp/testsuite/libgomp.oacc-c/lib-42.c new file mode 100644 index 0000000..d577fe3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-42.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + acc_update_device (h, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-43.c b/libgomp/testsuite/libgomp.oacc-c/lib-43.c new file mode 100644 index 0000000..ceeb155 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-43.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-44.c b/libgomp/testsuite/libgomp.oacc-c/lib-44.c new file mode 100644 index 0000000..0cabb0d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-44.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (h, 0); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-45.c b/libgomp/testsuite/libgomp.oacc-c/lib-45.c new file mode 100644 index 0000000..82cbc5e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-45.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (h, N - 2); + + acc_copyout (h, N); + + for (i = 0; i < N - 2; i++) + { + if (h[i] != 0xab) + abort (); + } + + for (i = N - 2; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-46.c b/libgomp/testsuite/libgomp.oacc-c/lib-46.c new file mode 100644 index 0000000..36faa28 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-46.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-47.c b/libgomp/testsuite/libgomp.oacc-c/lib-47.c new file mode 100644 index 0000000..a7ff904 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-47.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (0, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-48.c b/libgomp/testsuite/libgomp.oacc-c/lib-48.c new file mode 100644 index 0000000..01d3c6c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-48.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (h, 0); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-49.c b/libgomp/testsuite/libgomp.oacc-c/lib-49.c new file mode 100644 index 0000000..e0c5d2a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-49.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (h, N - 2); + + for (i = 0; i < N - 2; i++) + { + if (h[i] != i) + abort (); + } + + for (i = N - 2; i < N; i++) + { + if (h[i] != 0) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-5.c b/libgomp/testsuite/libgomp.oacc-c/lib-5.c new file mode 100644 index 0000000..961a62c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-5.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + if (acc_get_device_type () == acc_device_default) + abort (); + + acc_init (acc_device_default); + + if (acc_get_device_type () == acc_device_default) + abort (); + + acc_shutdown (acc_device_default); + + if (acc_get_num_devices (acc_device_nvidia) != 0) + { + acc_init (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + acc_shutdown (acc_device_nvidia); + + acc_init (acc_device_default); + + acc_set_device_type (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + acc_shutdown (acc_device_nvidia); + } + + return 0; + +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-50.c b/libgomp/testsuite/libgomp.oacc-c/lib-50.c new file mode 100644 index 0000000..662309b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-50.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + if (acc_is_present (h, N) != 1) + abort (); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-51.c b/libgomp/testsuite/libgomp.oacc-c/lib-51.c new file mode 100644 index 0000000..38a3bb8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-51.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h[N]; + void *d[N]; + + for (i = 0; i < N; i++) + { + h[i] = (unsigned char *) malloc (N); + d[i] = acc_malloc (N); + + acc_map_data (h[i], d[i], N); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h[i], N) != 1) + abort (); + } + + for (i = 0; i < N; i++) + { + acc_unmap_data (h[i]); + + if (acc_is_present (h[i], N) != 0) + abort (); + + acc_free (d[i]); + free (h[i]); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-52.c b/libgomp/testsuite/libgomp.oacc-c/lib-52.c new file mode 100644 index 0000000..780db31 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-52.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (0, d, N); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[(nil),+256\]->\[\h+,\+256\] is a bad map" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-53.c b/libgomp/testsuite/libgomp.oacc-c/lib-53.c new file mode 100644 index 0000000..657adde --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-53.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, 0, N); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+256\]->\[(nil),\+256\] is a bad map" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-54.c b/libgomp/testsuite/libgomp.oacc-c/lib-54.c new file mode 100644 index 0000000..1f3df80 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-54.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, 0); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\]->\[\h+,\+0\] is a bad map" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-55.c b/libgomp/testsuite/libgomp.oacc-c/lib-55.c new file mode 100644 index 0000000..bf49e0a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-55.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + int i; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + for (i = 0; i < N; i++) + { + acc_map_data (h + i, d + i, 1); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + 1, 1) != 1) + abort (); + } + + for (i = 0; i < N; i++) + { + acc_unmap_data (h + i); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + 1, 1) != 0) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-56.c b/libgomp/testsuite/libgomp.oacc-c/lib-56.c new file mode 100644 index 0000000..4ebb06d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-56.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N >> 1); + + if (acc_is_present (h, 1) != 1) + abort (); + + if (acc_is_present (h + (N >> 1), 1) != 0) + abort (); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-57.c b/libgomp/testsuite/libgomp.oacc-c/lib-57.c new file mode 100644 index 0000000..f9043a4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-57.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + acc_unmap_data (d); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \h+ is not a mapped block" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-58.c b/libgomp/testsuite/libgomp.oacc-c/lib-58.c new file mode 100644 index 0000000..9d6e27d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-58.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + acc_unmap_data (0); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \(nil\) is not a mapped block" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-59.c b/libgomp/testsuite/libgomp.oacc-c/lib-59.c new file mode 100644 index 0000000..352962d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-59.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + for (i = 0; i < N; i++) + { + if (acc_hostptr (d + i) != h + i) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_deviceptr (h + i) != d + i) + abort (); + } + + acc_unmap_data (h); + + for (i = 0; i < N; i++) + { + if (acc_hostptr (d + i) != 0) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_deviceptr (h + i) != 0) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-6.c b/libgomp/testsuite/libgomp.oacc-c/lib-6.c new file mode 100644 index 0000000..afdd480 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-6.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + int devnum; + + if (acc_get_device_type () == acc_device_default) + abort (); + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_set_device_type (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + acc_shutdown (acc_device_nvidia); + + acc_set_device_type (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + devnum = acc_get_num_devices (acc_device_host); + if (devnum != 1) + abort (); + + acc_shutdown (acc_device_nvidia); + + if (acc_get_device_type () == acc_device_default) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-60.c b/libgomp/testsuite/libgomp.oacc-c/lib-60.c new file mode 100644 index 0000000..e1f2f43 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-60.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-61.c b/libgomp/testsuite/libgomp.oacc-c/lib-61.c new file mode 100644 index 0000000..02fd8b6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-61.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h[N]; + void *d[N]; + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + h[i] = (unsigned char *) malloc (N); + + p = h[i]; + + for (j = 0; j < N; j++) + { + p[j] = i; + } + + d[i] = acc_malloc (N); + + acc_memcpy_to_device (d[i], h[i], N); + + for (j = 0; j < N; j++) + { + if (acc_is_present (h[i] + j, 1) != 0) + abort (); + } + } + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + memset (h[i], 0, N); + + acc_memcpy_from_device (h[i], d[i], N); + + p = h[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + + for (j = 0; j < N; j++) + { + if (acc_is_present (h[i] + j, 1) != 0) + abort (); + } + + acc_free (d[i]); + + free (h[i]); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-62.c b/libgomp/testsuite/libgomp.oacc-c/lib-62.c new file mode 100644 index 0000000..e6178e2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-62.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + acc_init (acc_device_nvidia); + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_to_device (d, h, N << 1); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid size" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-63.c b/libgomp/testsuite/libgomp.oacc-c/lib-63.c new file mode 100644 index 0000000..ca237ec --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-63.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (0, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid device address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-64.c b/libgomp/testsuite/libgomp.oacc-c/lib-64.c new file mode 100644 index 0000000..850fd2e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-64.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, 0, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid host address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-65.c b/libgomp/testsuite/libgomp.oacc-c/lib-65.c new file mode 100644 index 0000000..26c8cef --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-65.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, d, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid host or device address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-66.c b/libgomp/testsuite/libgomp.oacc-c/lib-66.c new file mode 100644 index 0000000..360c05b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-66.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + acc_init (acc_device_nvidia); + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_to_device (d, h, 0); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + acc_shutdown (acc_device_nvidia); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-67.c b/libgomp/testsuite/libgomp.oacc-c/lib-67.c new file mode 100644 index 0000000..01b8b2d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-67.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (0, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid host address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-68.c b/libgomp/testsuite/libgomp.oacc-c/lib-68.c new file mode 100644 index 0000000..3ff5bd7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-68.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, 0, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid device address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-69.c b/libgomp/testsuite/libgomp.oacc-c/lib-69.c new file mode 100644 index 0000000..7cac954 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-69.c @@ -0,0 +1,131 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <unistd.h> +#include <openacc.h> +#include <cuda.h> + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort(); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test (0) != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep (1); + + if (acc_async_test (0) != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-7.c b/libgomp/testsuite/libgomp.oacc-c/lib-7.c new file mode 100644 index 0000000..e78734b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-7.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + if (acc_get_num_devices (acc_device_none) != 0) + abort (); + + if (acc_get_num_devices (acc_device_host) == 0) + abort (); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-70.c b/libgomp/testsuite/libgomp.oacc-c/lib-70.c new file mode 100644 index 0000000..e20e278 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-70.c @@ -0,0 +1,143 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include <cuda.h> + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + const int N = 10; + int i; + CUstream streams[N]; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + for (i = 0; i < N; i++) + { + streams[i] = acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test (i) != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + } + + sleep ((int) (dtime / 1000.0f) + 1); + + for (i = 0; i < N; i++) + { + if (acc_async_test (i) != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-71.c b/libgomp/testsuite/libgomp.oacc-c/lib-71.c new file mode 100644 index 0000000..a55dfbf --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-71.c @@ -0,0 +1,126 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <unistd.h> +#include <openacc.h> +#include <cuda.h> + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (0, stream); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test (1) != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep ((int) (dtime / 1000.0f) + 1); + + if (acc_async_test (1) != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: unknown async \d" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-72.c b/libgomp/testsuite/libgomp.oacc-c/lib-72.c new file mode 100644 index 0000000..bb84b6f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-72.c @@ -0,0 +1,128 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <openacc.h> +#include <cuda.h> + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test_all () != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep ((int) (dtime / 1000.f) + 1); + + if (acc_async_test_all () != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-73.c b/libgomp/testsuite/libgomp.oacc-c/lib-73.c new file mode 100644 index 0000000..328a8aa --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-73.c @@ -0,0 +1,141 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <openacc.h> +#include <cuda.h> + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + const int N = 10; + int i; + CUstream streams[N]; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + for (i = 0; i < N; i++) + { + streams[i] = acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + } + + if (acc_async_test_all () != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep ((int) (dtime / 1000.0f) + 1); + + if (acc_async_test_all () != 1) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-74.c b/libgomp/testsuite/libgomp.oacc-c/lib-74.c new file mode 100644 index 0000000..1b70b4b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-74.c @@ -0,0 +1,146 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <openacc.h> +#include <cuda.h> +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + start_timer (0); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (0); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + acc_wait (0); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-75.c b/libgomp/testsuite/libgomp.oacc-c/lib-75.c new file mode 100644 index 0000000..4381b3e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-75.c @@ -0,0 +1,148 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <openacc.h> +#include <cuda.h> +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + int N; + int i; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime, hitime, lotime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + stream = acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (0); + } + + atime = stop_timer (0); + + hitime = dtime * N; + hitime += hitime * 0.02; + + lotime = dtime * N; + lotime -= lotime * 0.02; + + if (atime > hitime || atime < lotime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-76.c b/libgomp/testsuite/libgomp.oacc-c/lib-76.c new file mode 100644 index 0000000..b3527e5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-76.c @@ -0,0 +1,154 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include <cuda.h> +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + int N; + int i; + CUstream *streams; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime, hitime, lotime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + streams = malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (i); + } + + atime = stop_timer (0); + + hitime = dtime * N; + hitime += hitime * 0.02; + + lotime = dtime * N; + lotime -= lotime * 0.02; + + if (atime > hitime || atime < lotime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (streams); + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-77.c b/libgomp/testsuite/libgomp.oacc-c/lib-77.c new file mode 100644 index 0000000..acc1b14 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-77.c @@ -0,0 +1,142 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include <cuda.h> +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (0, stream); + + init_timers (1); + + start_timer (0); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (1); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + acc_wait (1); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: unknown async \d" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-78.c b/libgomp/testsuite/libgomp.oacc-c/lib-78.c new file mode 100644 index 0000000..76c54ca --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-78.c @@ -0,0 +1,147 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include <cuda.h> +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + start_timer (0); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait_all (); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + acc_wait_all (); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-79.c b/libgomp/testsuite/libgomp.oacc-c/lib-79.c new file mode 100644 index 0000000..625acb6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-79.c @@ -0,0 +1,174 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include <cuda.h> +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + int N; + int i; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime, hitime, lotime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + devnum = 2; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (1, stream)) + abort (); + + stream = acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + } + + acc_wait_async (0, 1); + + if (acc_async_test (0) != 0) + abort (); + + if (acc_async_test (1) != 0) + abort (); + + acc_wait (1); + + atime = stop_timer (0); + + if (acc_async_test (0) != 1) + abort (); + + if (acc_async_test (1) != 1) + abort (); + + hitime = dtime * N; + hitime += hitime * 0.02; + + lotime = dtime * N; + lotime -= lotime * 0.02; + + if (atime > hitime || atime < lotime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-80.c b/libgomp/testsuite/libgomp.oacc-c/lib-80.c new file mode 100644 index 0000000..0c284de --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-80.c @@ -0,0 +1,139 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include <cuda.h> +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + CUstream stream; + int N; + int i; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (1, stream); + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + } + + acc_wait_async (1, 1); + + acc_wait (1); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: identical parameters" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-81.c b/libgomp/testsuite/libgomp.oacc-c/lib-81.c new file mode 100644 index 0000000..466eac4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-81.c @@ -0,0 +1,218 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include <cuda.h> +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + int N; + int i; + CUstream *streams, stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 500.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = malloc (nbytes); + d_a = acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + streams = malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = acc_get_cuda_stream (N); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (N, stream)) + abort (); + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + } + + acc_wait_all_async (N); + + for (i = 0; i <= N; i++) + { + if (acc_async_test (i) != 0) + abort (); + } + + acc_wait (N); + + for (i = 0; i <= N; i++) + { + if (acc_async_test (i) != 1) + abort (); + } + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + stream = acc_get_cuda_stream (N + 1); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (N + 1, stream)) + abort (); + + acc_wait_all_async (N + 1); + + acc_wait (N + 1); + + atime = stop_timer (0); + + if (0.10 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + start_timer (0); + + acc_wait_all_async (N); + + acc_wait (N); + + atime = stop_timer (0); + + if (0.10 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (streams); + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-82.c b/libgomp/testsuite/libgomp.oacc-c/lib-82.c new file mode 100644 index 0000000..1064c37 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-82.c @@ -0,0 +1,151 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include <cuda.h> + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay, sum; + CUmodule module; + CUresult r; + int N; + int i; + CUstream *streams; + unsigned long **a, **d_a, *tid, ticks; + int nbytes; + void *kargs[3]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay2"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&sum, module, "sum"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = sizeof (int); + + ticks = (unsigned long) (200.0 * clkrate); + + N = nprocs; + + streams = malloc (N * sizeof (void *)); + + a = malloc (N * sizeof (unsigned long *)); + d_a = malloc (N * sizeof (unsigned long *)); + tid = malloc (N * sizeof (unsigned long)); + + for (i = 0; i < N; i++) + { + a[i] = malloc (sizeof (unsigned long)); + *a[i] = N; + d_a[i] = acc_malloc (nbytes); + tid[i] = i; + + acc_map_data (a[i], d_a[i], nbytes); + + streams[i] = acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + kargs[0] = (void *) &d_a[i]; + kargs[1] = (void *) &ticks; + kargs[2] = (void *) &tid[i]; + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + ticks = (unsigned long) (50.0 * clkrate); + } + + acc_wait_all_async (0); + + for (i = 0; i < N; i++) + { + acc_copyout (a[i], nbytes); + if (*a[i] != i) + abort (); + } + + free (streams); + + for (i = 0; i < N; i++) + { + free (a[i]); + } + + free (a); + free (d_a); + free (tid); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-83.c b/libgomp/testsuite/libgomp.oacc-c/lib-83.c new file mode 100644 index 0000000..4b5076b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-83.c @@ -0,0 +1,58 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include "timer.h" + +int +main (int argc, char **argv) +{ + float atime; + CUstream stream; + CUresult r; + + acc_init (acc_device_nvidia); + + (void) acc_get_device_num (acc_device_nvidia); + + init_timers (1); + + stream = acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + start_timer (0); + + acc_wait_all_async (0); + + acc_wait (0); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + fini_timers (); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-84.c b/libgomp/testsuite/libgomp.oacc-c/lib-84.c new file mode 100644 index 0000000..6c3469b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-84.c @@ -0,0 +1,66 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <openacc.h> +#include <cuda.h> + +int +main (int argc, char **argv) +{ + const int N = 100; + int i; + CUstream *streams; + CUstream s; + CUresult r; + + acc_init (acc_device_nvidia); + + (void) acc_get_device_num (acc_device_nvidia); + + streams = malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + int j; + int cnt; + + cnt = 0; + + s = streams[i]; + + for (j = 0; j < N; j++) + { + if (s == streams[j]) + cnt++; + } + + if (cnt != 1) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-85.c b/libgomp/testsuite/libgomp.oacc-c/lib-85.c new file mode 100644 index 0000000..b35b195 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-85.c @@ -0,0 +1,52 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include <stdio.h> +#include <cuda.h> + +int +main (int argc, char **argv) +{ + const int N = 100; + int i; + CUstream *streams; + CUstream s; + CUresult r; + + acc_init (acc_device_nvidia); + + (void) acc_get_device_num (acc_device_nvidia); + + streams = malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + s = NULL; + + if (acc_set_cuda_stream (N + 1, s) != 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-86.c b/libgomp/testsuite/libgomp.oacc-c/lib-86.c new file mode 100644 index 0000000..b8a8ee9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-86.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_device () != 0) + abort (); + + acc_init (acc_device_host); + + if (acc_get_current_cuda_device () != 0) + abort (); + + acc_shutdown (acc_device_host); + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_device () != 0) + abort (); + + acc_init (acc_device_nvidia); + + if (acc_get_current_cuda_device () == 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + if (acc_get_current_cuda_device () != 0) + abort (); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-87.c b/libgomp/testsuite/libgomp.oacc-c/lib-87.c new file mode 100644 index 0000000..147d443 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-87.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_context () != 0) + abort (); + + acc_init (acc_device_host); + + if (acc_get_current_cuda_context () != 0) + abort (); + + acc_shutdown (acc_device_host); + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_context () != 0) + abort (); + + acc_init (acc_device_nvidia); + + if (acc_get_current_cuda_context () == 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + if (acc_get_current_cuda_context () != 0) + abort (); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-88.c b/libgomp/testsuite/libgomp.oacc-c/lib-88.c new file mode 100644 index 0000000..3af4f54 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-88.c @@ -0,0 +1,111 @@ +/* { dg-do run } */ + +#include <stdio.h> +#include <pthread.h> +#include <string.h> +#include <stdlib.h> +#include <ctype.h> +#include <openacc.h> + +unsigned char *x; +void *d_x; +const int N = 256; + +static void * +test (void *arg) +{ + int i; + + if (acc_get_current_cuda_context () != NULL) + abort (); + + if (acc_is_present (x, N) != 1) + abort (); + + memset (x, 0, N); + + acc_copyout (x, N); + + for (i = 0; i < N; i++) + { + if (x[i] != i) + abort (); + + x[i] = N - i - 1; + } + + d_x = acc_copyin (x, N); + + return 0; +} + +int +main (int argc, char **argv) +{ + const int nthreads = 1; + int i; + pthread_attr_t attr; + pthread_t *tid; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_init (acc_device_nvidia); + + x = malloc (N); + + for (i = 0; i < N; i++) + { + x[i] = i; + } + + d_x = acc_copyin (x, N); + + if (acc_is_present (x, N) != 1) + abort (); + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (nthreads * sizeof (pthread_t)); + + for (i = 0; i < nthreads; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < nthreads; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + if (acc_is_present (x, N) != 1) + abort (); + + memset (x, 0, N); + + acc_copyout (x, N); + + for (i = 0; i < N; i++) + { + if (x[i] != N - i - 1) + abort (); + } + + if (acc_is_present (x, N) != 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-89.c b/libgomp/testsuite/libgomp.oacc-c/lib-89.c new file mode 100644 index 0000000..4cc97ce --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-89.c @@ -0,0 +1,118 @@ +/* { dg-do run } */ + +#include <stdio.h> +#include <pthread.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <ctype.h> +#include <openacc.h> + +unsigned char **x; +void **d_x; +const int N = 16; +const int NTHREADS = 32; + +static void * +test (void *arg) +{ + int i; + int tid; + unsigned char *p; + int devnum; + + tid = (int) (long) arg; + + devnum = acc_get_device_num (acc_device_nvidia); + acc_set_device_num (devnum, acc_device_nvidia); + + if (acc_get_current_cuda_context () == NULL) + abort (); + + p = malloc (N); + + for (i = 0; i < N; i++) + { + p[i] = tid; + } + + x[tid] = p; + + d_x[tid] = acc_copyin (p, N); + + return 0; +} + +int +main (int argc, char **argv) +{ + int i; + pthread_attr_t attr; + pthread_t *tid; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_init (acc_device_nvidia); + + x = malloc (NTHREADS * N); + d_x = malloc (NTHREADS * N); + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t)); + + for (i = 0; i < NTHREADS; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < NTHREADS; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + for (i = 0; i < NTHREADS; i++) + { + if (acc_is_present (x[i], N) != 1) + abort (); + } + + for (i = 0; i < NTHREADS; i++) + { + memset (x[i], 0, N); + acc_copyout (x[i], N); + } + + for (i = 0; i < NTHREADS; i++) + { + unsigned char *p; + int j; + + p = x[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + + if (acc_is_present (x[i], N) != 0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-9.c b/libgomp/testsuite/libgomp.oacc-c/lib-9.c new file mode 100644 index 0000000..c2146ad --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-9.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + int i; + int num_devices; + int devnum; + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; +#endif + + num_devices = acc_get_num_devices (devtype); + if (num_devices == 0) + return 0; + + acc_init (devtype); + + for (i = 0; i < num_devices; i++) + { + acc_set_device_num (i, devtype); + devnum = acc_get_device_num (devtype); + if (devnum != i) + abort (); + } + + acc_shutdown (devtype); + + num_devices = acc_get_num_devices (devtype); + if (num_devices == 0) + abort (); + + for (i = 0; i < num_devices; i++) + { + acc_set_device_num (i, devtype); + devnum = acc_get_device_num (devtype); + if (devnum != i) + abort (); + } + + acc_shutdown (devtype); + + acc_init (devtype); + + acc_set_device_num (0, devtype); + + devnum = acc_get_device_num (devtype); + if (devnum != 0) + abort(); + + if (num_devices > 1) + { + acc_set_device_num (1, 0); + + devnum = acc_get_device_num (devtype); + if (devnum != 1) + abort(); + } + + acc_shutdown (devtype); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-90.c b/libgomp/testsuite/libgomp.oacc-c/lib-90.c new file mode 100644 index 0000000..664bb59 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-90.c @@ -0,0 +1,137 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <pthread.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <ctype.h> +#include <openacc.h> +#include <cuda.h> + +unsigned char **x; +void **d_x; +const int N = 16; +const int NTHREADS = 32; + +static void * +test (void *arg) +{ + int i; + int tid; + unsigned char *p; + int devnum; + + tid = (int) (long) arg; + + devnum = acc_get_device_num (acc_device_nvidia); + acc_set_device_num (devnum, acc_device_nvidia); + + if (acc_get_current_cuda_context () == NULL) + abort (); + + p = malloc (N); + + for (i = 0; i < N; i++) + { + p[i] = tid; + } + + x[tid] = p; + + d_x[tid] = acc_copyin (p, N); + + acc_wait_all (); + + return 0; +} + +int +main (int argc, char **argv) +{ + int i; + pthread_attr_t attr; + pthread_t *tid; + CUresult r; + CUstream s; + + acc_init (acc_device_nvidia); + + x = malloc (NTHREADS * N); + d_x = malloc (NTHREADS * N); + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t)); + + r = cuStreamCreate (&s, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, s)) + abort (); + + for (i = 0; i < NTHREADS; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < NTHREADS; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + + for (i = 0; i < NTHREADS; i++) + { + if (acc_is_present (x[i], N) != 1) + abort (); + } + + acc_get_cuda_stream (1); + + for (i = 0; i < NTHREADS; i++) + { + memset (x[i], 0, N); + acc_copyout (x[i], N); + } + + acc_wait_all (); + + for (i = 0; i < NTHREADS; i++) + { + unsigned char *p; + int j; + + p = x[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + + if (acc_is_present (x[i], N) != 0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-91.c b/libgomp/testsuite/libgomp.oacc-c/lib-91.c new file mode 100644 index 0000000..e00ef4f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-91.c @@ -0,0 +1,84 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include <stdlib.h> +#include <unistd.h> +#include <openacc.h> +#include <sys/time.h> +#include <stdio.h> +#include <cuda.h> + +int +main (int argc, char **argv) +{ + const int N = 1024 * 1024; + int i; + unsigned char *h; + void *d; + float async, sync; + struct timeval start, stop; + CUresult r; + CUstream s; + + acc_init (acc_device_nvidia); + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + gettimeofday (&start, NULL); + + for (i = 0; i < 100; i++) + { +#pragma acc update device(h[0:N]) + } + + gettimeofday (&stop, NULL); + + sync = (float) (stop.tv_sec - start.tv_sec); + sync += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0); + + gettimeofday (&start, NULL); + + r = cuStreamCreate (&s, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, s)) + abort (); + + for (i = 0; i < 100; i++) + { +#pragma acc update device(h[0:N]) async(0) + } + + acc_wait_all (); + + gettimeofday (&stop, NULL); + + async = (float) (stop.tv_sec - start.tv_sec); + async += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0); + + if (async > (sync * 1.5)) + abort (); + + acc_free (d); + + free (h); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-92.c b/libgomp/testsuite/libgomp.oacc-c/lib-92.c new file mode 100644 index 0000000..0756b29 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-92.c @@ -0,0 +1,111 @@ +/* { dg-do run } */ + +#include <pthread.h> +#include <stdlib.h> +#include <errno.h> +#include <ctype.h> +#include <openacc.h> + +unsigned char **x; +void **d_x; +const int N = 32; +const int NTHREADS = 32; + +static void * +test (void *arg) +{ + int i; + int tid; + unsigned char *p; + int devnum; + + tid = (int) (long) arg; + + devnum = acc_get_device_num (acc_device_nvidia); + acc_set_device_num (devnum, acc_device_nvidia); + + if (acc_get_current_cuda_context () == NULL) + abort (); + + acc_copyout (x[tid], N); + + p = x[tid]; + + for (i = 0; i < N; i++) + { + if (p[i] != i) + abort (); + } + + return 0; +} + +int +main (int argc, char **argv) +{ + int i; + pthread_attr_t attr; + pthread_t *tid; + unsigned char *p; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_init (acc_device_nvidia); + + x = malloc (NTHREADS * N); + d_x = malloc (NTHREADS * N); + + for (i = 0; i < N; i++) + { + int j; + + p = malloc (N); + + x[i] = p; + + for (j = 0; j < N; j++) + { + p[j] = j; + } + + d_x[i] = acc_copyin (p, N); + } + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t)); + + acc_get_cuda_stream (1); + + for (i = 0; i < NTHREADS; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < NTHREADS; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + for (i = 0; i < NTHREADS; i++) + { + if (acc_is_present (x[i], N) != 0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-1.c b/libgomp/testsuite/libgomp.oacc-c/nested-1.c new file mode 100644 index 0000000..577ef8c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/nested-1.c @@ -0,0 +1,680 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include <openacc.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> + +int +main(int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc(N * sizeof (float)); + b = (float *) malloc(N * sizeof (float)); + c = (float *) malloc(N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 3.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 5.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc data present_or_copyin(a[0:N]) copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + acc_free(d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc data copyin(a[0:N]) present_or_copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 2.0; + } + + d = acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc data copyin(a[0:N]) present_or_copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort(); + + if (b[i] != 2.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + acc_free (d); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 4.0; + } + +#pragma acc data copy(a[0:N]) copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = a[ii] + 2; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort(); + + if (b[i] != 6.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 7.0; + } + +#pragma acc data present_or_copy(a[0:N]) present_or_copy(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort(); + + if (b[i] != 9.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + + d = acc_copyin (&a[0], N * sizeof (float)); + d = acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc data present_or_copy(a[0:N]) present_or_copy(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 7.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + d = acc_deviceptr(&a[0]); + acc_unmap_data (&a[0]); + acc_free (d); + + d = acc_deviceptr(&b[0]); + acc_unmap_data (&b[0]); + acc_free (d); + + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + +#pragma acc data copyin(a[0:N]) create(c[0:N]) copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 3.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + +#pragma acc data copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort(); + + if (b[i] != 4.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + + d = acc_malloc (N * sizeof (float)); + acc_map_data(c, d, N * sizeof (float)); + +#pragma acc data copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort(); + + if (b[i] != 2.0) + abort(); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (acc_is_present (b, (N * sizeof (float)))) + abort(); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort(); + + d = acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + + d = acc_malloc(N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc data copyin(a[0:N]) present(c[0:N]) copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort(); + + if (b[i] != 4.0) + abort(); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (acc_is_present (b, (N * sizeof (float)))) + abort(); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort(); + + acc_unmap_data (c); + + if (acc_is_present (c, (N * sizeof (float)))) + abort(); + + acc_free (d); + + d = acc_malloc(N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort(); + + d = acc_malloc(N * sizeof (float)); + acc_map_data (b, d, N * sizeof (float)); + + if (!acc_is_present (b, (N * sizeof (float)))) + abort(); + + d = acc_malloc(N * sizeof (float)); + acc_map_data (a, d, N * sizeof (float)); + + if (!acc_is_present (a, (N * sizeof (float)))) + abort(); + +#pragma acc data present(a[0:N]) present(c[0:N]) present(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = 1.0; + c[ii] = 2.0; + b[ii] = 4.0; + } + } + } + + if (!acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (!acc_is_present (b, (N * sizeof (float)))) + abort(); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort(); + + acc_copyout (b, N * sizeof (float)); + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort(); + + if (b[i] != 4.0) + abort(); + } + + d = acc_deviceptr(a); + + acc_unmap_data (a); + + acc_free (d); + + d = acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 6.0; + } + + d = acc_malloc(N * sizeof (float)); + +#pragma acc parallel copyin(a[0:N]) deviceptr(d) copyout(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + d[ii] = a[ii]; + b[ii] = d[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 3.0) + abort(); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (acc_is_present (b, (N * sizeof (float)))) + abort(); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc data pcopyin(a[0:N]) copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + acc_free(d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc data copyin(a[0:N]) pcopyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 7.0; + } + +#pragma acc data copyin(a[0:N]) pcreate(c[0:N]) copyout(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort(); + + if (b[i] != 5.0) + abort(); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort(); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-2.c b/libgomp/testsuite/libgomp.oacc-c/nested-2.c new file mode 100644 index 0000000..6975467 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/nested-2.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ + +extern void abort(void); + +int +main(int argc, char *argv[]) +{ +#define N 10 + char a[N]; + + { + int i; + for (i = 0; i < N; ++i) + a[i] = 0; + } + +#pragma acc data copyout(a) + { +#pragma acc parallel /* will result in a "dummy frame" */ present(a) + { + int i; + for (i = 0; i < N; ++i) + a[i] = i; + } + } + + { + int i; + for (i = 0; i < N; ++i) + if (a[i] != i) + abort(); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/offset-1.c b/libgomp/testsuite/libgomp.oacc-c/offset-1.c new file mode 100644 index 0000000..0bae23a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/offset-1.c @@ -0,0 +1,97 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include <openacc.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> + +int +main(int argc, char **argv) +{ + int N = 8; + float *a, *b; + int i; + + a = (float *) malloc(N * sizeof (float)); + b = (float *) malloc(N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + +#pragma acc parallel copyin(a[2:4]) copyout(b[2:4]) + { + b[2] = a[2]; + b[3] = a[3]; + } + + for (i = 2; i < 4; i++) + { + if (a[i] != 2.0) + abort(); + + if (b[i] != 2.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 1.0; + } + +#pragma acc parallel copyin(a[0:4]) copyout(b[0:4]) + { + b[0] = a[0]; + b[1] = a[1]; + b[2] = a[2]; + b[3] = a[3]; + } + + for (i = 0; i < 4; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 3.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + b[i] = 6.0; + } + +#pragma acc parallel copyin(a[0:4]) copyout(b[4:4]) + { + b[4] = a[0]; + b[5] = a[1]; + b[6] = a[2]; + b[7] = a[3]; + } + + for (i = 0; i < 4; i++) + { + if (a[i] != 9.0) + abort(); + } + + for (i = 4; i < 8; i++) + { + if (b[i] != 9.0) + abort(); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (acc_is_present (b, (N * sizeof (float)))) + abort(); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/parallel-1.c b/libgomp/testsuite/libgomp.oacc-c/parallel-1.c new file mode 100644 index 0000000..fd9df33 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/parallel-1.c @@ -0,0 +1,206 @@ +/* { dg-do run } */ + +#include <stdlib.h> + +int i; + +int main(void) +{ + int j, v; + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) copyin (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) copyout (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) copy (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) create (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyin (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyout (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copy (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_create (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; + +#pragma acc data copyin (i, j) + { +#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + } +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; + +#pragma acc data copyin(i, j) + { +#pragma acc parallel /* copyout */ present_or_copyout (v) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + } +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c b/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c new file mode 100644 index 0000000..6c410d0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ + +#include <openacc.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> + +int +main(int argc, char **argv) +{ + int N = 5; + int a[N]; + + a[0] = 10; + a[1] = 10; + a[2] = 10; + a[3] = 10; + a[4] = 10; + + acc_init(acc_device_nvidia); + +#pragma acc parallel copy(a[0:N]) + { + a[0] = 5; + a[1] = 5; + a[2] = 5; + a[3] = 5; + a[4] = 5; + } + + acc_shutdown(acc_device_nvidia); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/present-1.c b/libgomp/testsuite/libgomp.oacc-c/present-1.c new file mode 100644 index 0000000..0e284ad --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/present-1.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include <openacc.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> + +int +main(int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc(N * sizeof (float)); + b = (float *) malloc(N * sizeof (float)); + c = (float *) malloc(N * sizeof (float)); + + d = acc_malloc(N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc data present(a[0:N]) present(c[0:N]) present(b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + d = acc_deviceptr (c); + acc_unmap_data (c); + acc_free (d); + + free (a); + free (b); + free (c); + + return 0; +} +/* { dg-shouldfail "libgomp: present clause: !acc_is_present" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/present-2.c b/libgomp/testsuite/libgomp.oacc-c/present-2.c new file mode 100644 index 0000000..41efa70 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/present-2.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include <openacc.h> +#include <stdlib.h> + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 0.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) + { + +#pragma acc parallel present(a[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + b[ii] = a[ii]; + } + } + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/reduction-1.c b/libgomp/testsuite/libgomp.oacc-c/reduction-1.c new file mode 100644 index 0000000..0aa02ca --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/reduction-1.c @@ -0,0 +1,186 @@ +/* { dg-do run } */ +/* TODO: + <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>. + { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */ + +/* Integer reductions. */ + +#include <stdlib.h> +#include <stdbool.h> + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + int vresult, result, array[n]; + bool lvresult, lresult; + + for (i = 0; i < n; i++) + array[i] = i; + + result = 0; + vresult = 0; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult += array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* '*' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (*:result) + for (i = 0; i < n; i++) + result *= array[i]; +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult *= array[i]; + + if (result != vresult) + abort (); + +// result = 0; +// vresult = 0; +// +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// #pragma acc end parallel +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult > array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); +// +// result = 0; +// vresult = 0; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; +// #pragma acc end parallel +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult < array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); + + result = 0; + vresult = 0; + + /* '&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&:result) + for (i = 0; i < n; i++) + result &= array[i]; +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult &= array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* '|' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (|:result) + for (i = 0; i < n; i++) + result |= array[i]; +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult |= array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* '^' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (^:result) + for (i = 0; i < n; i++) + result ^= array[i]; +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult ^= array[i]; + + if (result != vresult) + abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (result > array[i]); +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult && (result > array[i]); + + if (lresult != lvresult) + abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (result > array[i]); +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult || (result > array[i]); + + if (lresult != lvresult) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/reduction-2.c b/libgomp/testsuite/libgomp.oacc-c/reduction-2.c new file mode 100644 index 0000000..cac6ce4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/reduction-2.c @@ -0,0 +1,135 @@ +/* { dg-do run } */ +/* TODO: + <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>. + { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */ + +/* float reductions. */ + +#include <stdlib.h> +#include <stdbool.h> +#include <math.h> + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + float vresult, result, array[n]; + bool lvresult, lresult; + + for (i = 0; i < n; i++) + array[i] = i; + + result = 0; + vresult = 0; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult += array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* '*' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (*:result) + for (i = 0; i < n; i++) + result *= array[i]; +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult *= array[i]; + + if (fabs(result - vresult) > .0001) + abort (); +// result = 0; +// vresult = 0; +// +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// #pragma acc end parallel +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult > array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); +// +// result = 0; +// vresult = 0; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; +// #pragma acc end parallel +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult < array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (result > array[i]); +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult && (result > array[i]); + + if (lresult != lvresult) + abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (result > array[i]); +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult || (result > array[i]); + + if (lresult != lvresult) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/reduction-3.c b/libgomp/testsuite/libgomp.oacc-c/reduction-3.c new file mode 100644 index 0000000..9bcd7fe --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/reduction-3.c @@ -0,0 +1,135 @@ +/* { dg-do run } */ +/* TODO: + <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>. + { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */ + +/* double reductions. */ + +#include <stdlib.h> +#include <stdbool.h> +#include <math.h> + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + double vresult, result, array[n]; + bool lvresult, lresult; + + for (i = 0; i < n; i++) + array[i] = i; + + result = 0; + vresult = 0; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult += array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* '*' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (*:result) + for (i = 0; i < n; i++) + result *= array[i]; +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult *= array[i]; + + if (fabs(result - vresult) > .0001) + abort (); +// result = 0; +// vresult = 0; +// +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// #pragma acc end parallel +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult > array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); +// +// result = 0; +// vresult = 0; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; +// #pragma acc end parallel +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult < array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (result > array[i]); +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult && (result > array[i]); + + if (lresult != lvresult) + abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (result > array[i]); +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult || (result > array[i]); + + if (lresult != lvresult) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/reduction-4.c b/libgomp/testsuite/libgomp.oacc-c/reduction-4.c new file mode 100644 index 0000000..6cebcf5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/reduction-4.c @@ -0,0 +1,138 @@ +/* { dg-do run } */ +/* TODO: + <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>. + { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */ + +/* complex reductions. */ + +#include <stdlib.h> +#include <stdbool.h> +#include <math.h> +#include <complex.h> + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + double complex vresult, result, array[n]; + bool lvresult, lresult; + + for (i = 0; i < n; i++) + array[i] = i; + + result = 0; + vresult = 0; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult += array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* Needs support for complex multiplication. */ + +// /* '*' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (*:result) +// for (i = 0; i < n; i++) +// result *= array[i]; +// #pragma acc end parallel +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult *= array[i]; +// +// if (fabs(result - vresult) > .0001) +// abort (); +// result = 0; +// vresult = 0; + +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// #pragma acc end parallel +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult > array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); +// +// result = 0; +// vresult = 0; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; +// #pragma acc end parallel +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult < array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (creal(result) > creal(array[i])); +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult && (creal(result) > creal(array[i])); + + if (lresult != lvresult) + abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (creal(result) > creal(array[i])); +#pragma acc end parallel + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult || (creal(result) > creal(array[i])); + + if (lresult != lvresult) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/subr.cu b/libgomp/testsuite/libgomp.oacc-c/subr.cu new file mode 100644 index 0000000..e86e0fc --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/subr.cu @@ -0,0 +1,64 @@ + +extern "C" __global__ void +delay (clock_t * d_o, clock_t delay) +{ + clock_t start, ticks; + + start = clock (); + + ticks = 0; + + while (ticks < delay) + ticks = clock () - start; +} + +extern "C" __global__ void +delay2 (unsigned long *d_o, clock_t delay, unsigned long tid) +{ + clock_t start, ticks; + + start = clock (); + + ticks = 0; + + while (ticks < delay) + ticks = clock () - start; + + d_o[0] = tid; +} + +extern "C" __global__ void +sum (clock_t * d_o, int N) +{ + int i; + clock_t sum; + __shared__ clock_t ticks[32]; + + sum = 0; + + for (i = threadIdx.x; i < N; i += blockDim.x) + sum += d_o[i]; + + ticks[threadIdx.x] = sum; + + syncthreads (); + + for (i = 16; i >= 1; i >>= 1) + { + if (threadIdx.x < i) + ticks[threadIdx.x] += ticks[threadIdx.x + i]; + + syncthreads (); + } + + d_o[0] = ticks[0]; +} + +extern "C" __global__ void +mult (int n, float *x, float *y) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + + for (i = 0; i < n; i++) + y[i] = x[i] * x[i]; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/subr.ptx b/libgomp/testsuite/libgomp.oacc-c/subr.ptx new file mode 100644 index 0000000..f398b15 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/subr.ptx @@ -0,0 +1,251 @@ + .version 1.4 + .target sm_10, map_f64_to_f32 + // compiled with /sj/opt/nvidia/cuda-5.5/open64/lib//be + // nvopencc 4.1 built on 2013-07-17 + + //----------------------------------------------------------- + // Compiling /tmp/tmpxft_00007716_00000000-9_subr.cpp3.i (/tmp/ccBI#.6sfiTI) + //----------------------------------------------------------- + + //----------------------------------------------------------- + // Options: + //----------------------------------------------------------- + // Target:ptx, ISA:sm_10, Endian:little, Pointer Size:64 + // -O3 (Optimization level) + // -g0 (Debug level) + // -m2 (Report advisories) + //----------------------------------------------------------- + + .file 1 "<command-line>" + .file 2 "/tmp/tmpxft_00007716_00000000-8_subr.cudafe2.gpu" + .file 3 "/usr/lib/gcc/x86_64-redhat-linux/4.4.5/include/stddef.h" + .file 4 "/opt/nvidia/cuda-5.5/bin/..//include/crt/device_runtime.h" + .file 5 "/opt/nvidia/cuda-5.5/bin/..//include/host_defines.h" + .file 6 "/opt/nvidia/cuda-5.5/bin/..//include/builtin_types.h" + .file 7 "/opt/nvidia/cuda-5.5/bin/..//include/device_types.h" + .file 8 "/opt/nvidia/cuda-5.5/bin/..//include/driver_types.h" + .file 9 "/opt/nvidia/cuda-5.5/bin/..//include/surface_types.h" + .file 10 "/opt/nvidia/cuda-5.5/bin/..//include/texture_types.h" + .file 11 "/opt/nvidia/cuda-5.5/bin/..//include/vector_types.h" + .file 12 "/opt/nvidia/cuda-5.5/bin/..//include/device_launch_parameters.h" + .file 13 "/opt/nvidia/cuda-5.5/bin/..//include/crt/storage_class.h" + .file 14 "/usr/include/bits/types.h" + .file 15 "/usr/include/time.h" + .file 16 "subr.cu" + .file 17 "/opt/nvidia/cuda-5.5/bin/..//include/common_functions.h" + .file 18 "/opt/nvidia/cuda-5.5/bin/..//include/math_functions.h" + .file 19 "/opt/nvidia/cuda-5.5/bin/..//include/math_constants.h" + .file 20 "/opt/nvidia/cuda-5.5/bin/..//include/device_functions.h" + .file 21 "/opt/nvidia/cuda-5.5/bin/..//include/sm_11_atomic_functions.h" + .file 22 "/opt/nvidia/cuda-5.5/bin/..//include/sm_12_atomic_functions.h" + .file 23 "/opt/nvidia/cuda-5.5/bin/..//include/sm_13_double_functions.h" + .file 24 "/opt/nvidia/cuda-5.5/bin/..//include/sm_20_atomic_functions.h" + .file 25 "/opt/nvidia/cuda-5.5/bin/..//include/sm_32_atomic_functions.h" + .file 26 "/opt/nvidia/cuda-5.5/bin/..//include/sm_35_atomic_functions.h" + .file 27 "/opt/nvidia/cuda-5.5/bin/..//include/sm_20_intrinsics.h" + .file 28 "/opt/nvidia/cuda-5.5/bin/..//include/sm_30_intrinsics.h" + .file 29 "/opt/nvidia/cuda-5.5/bin/..//include/sm_32_intrinsics.h" + .file 30 "/opt/nvidia/cuda-5.5/bin/..//include/sm_35_intrinsics.h" + .file 31 "/opt/nvidia/cuda-5.5/bin/..//include/surface_functions.h" + .file 32 "/opt/nvidia/cuda-5.5/bin/..//include/texture_fetch_functions.h" + .file 33 "/opt/nvidia/cuda-5.5/bin/..//include/texture_indirect_functions.h" + .file 34 "/opt/nvidia/cuda-5.5/bin/..//include/surface_indirect_functions.h" + .file 35 "/opt/nvidia/cuda-5.5/bin/..//include/math_functions_dbl_ptx1.h" + + + .entry delay ( + .param .u64 __cudaparm_delay_d_o, + .param .s64 __cudaparm_delay_delay) + { + .reg .u32 %rv1; + .reg .u32 %r<6>; + .reg .u64 %rd<7>; + .reg .pred %p<4>; + .loc 16 3 0 +$LDWbegin_delay: + .loc 16 7 0 + mov.u32 %r1, %clock; + mov.s32 %r2, %r1; + ld.param.s64 %rd1, [__cudaparm_delay_delay]; + mov.u64 %rd2, 0; + setp.le.s64 %p1, %rd1, %rd2; + @%p1 bra $Lt_0_1282; + cvt.s64.s32 %rd3, %r2; +$Lt_0_1794: + .loc 16 12 0 + mov.u32 %r3, %clock; + mov.s32 %r4, %r3; + cvt.s64.s32 %rd4, %r4; + sub.s64 %rd5, %rd4, %rd3; + .loc 16 7 0 + ld.param.s64 %rd1, [__cudaparm_delay_delay]; + .loc 16 12 0 + setp.gt.s64 %p2, %rd1, %rd5; + @%p2 bra $Lt_0_1794; +$Lt_0_1282: + .loc 16 13 0 + exit; +$LDWend_delay: + } // delay + + .entry delay2 ( + .param .u64 __cudaparm_delay2_d_o, + .param .s64 __cudaparm_delay2_delay, + .param .u64 __cudaparm_delay2_tid) + { + .reg .u32 %rv1; + .reg .u32 %r<6>; + .reg .u64 %rd<9>; + .reg .pred %p<4>; + .loc 16 16 0 +$LDWbegin_delay2: + .loc 16 20 0 + mov.u32 %r1, %clock; + mov.s32 %r2, %r1; + ld.param.s64 %rd1, [__cudaparm_delay2_delay]; + mov.u64 %rd2, 0; + setp.le.s64 %p1, %rd1, %rd2; + @%p1 bra $Lt_1_1282; + cvt.s64.s32 %rd3, %r2; +$Lt_1_1794: + .loc 16 25 0 + mov.u32 %r3, %clock; + mov.s32 %r4, %r3; + cvt.s64.s32 %rd4, %r4; + sub.s64 %rd5, %rd4, %rd3; + .loc 16 20 0 + ld.param.s64 %rd1, [__cudaparm_delay2_delay]; + .loc 16 25 0 + setp.gt.s64 %p2, %rd1, %rd5; + @%p2 bra $Lt_1_1794; +$Lt_1_1282: + .loc 16 27 0 + ld.param.u64 %rd6, [__cudaparm_delay2_tid]; + ld.param.u64 %rd7, [__cudaparm_delay2_d_o]; + st.global.u64 [%rd7+0], %rd6; + .loc 16 28 0 + exit; +$LDWend_delay2: + } // delay2 + + .entry sum ( + .param .u64 __cudaparm_sum_d_o, + .param .s32 __cudaparm_sum_N) + { + .reg .u32 %r<9>; + .reg .u64 %rd<21>; + .reg .pred %p<6>; + .shared .align 8 .b8 __cuda___cuda_local_var_14805_37_non_const_ticks56[256]; + .loc 16 31 0 +$LDWbegin_sum: + .loc 16 39 0 + cvt.s32.u16 %r1, %tid.x; + mov.s32 %r2, %r1; + ld.param.u64 %rd1, [__cudaparm_sum_d_o]; + ld.param.s32 %r3, [__cudaparm_sum_N]; + setp.le.s32 %p1, %r3, %r1; + @%p1 bra $Lt_2_5634; + cvt.u32.u16 %r4, %ntid.x; + cvt.s64.u32 %rd2, %r4; + mul.wide.u32 %rd3, %r4, 8; + cvt.s64.s32 %rd4, %r1; + mul.wide.s32 %rd5, %r1, 8; + ld.param.u64 %rd1, [__cudaparm_sum_d_o]; + add.u64 %rd6, %rd1, %rd5; + mov.s64 %rd7, 0; +$Lt_2_3586: + //<loop> Loop body line 39, nesting depth: 1, estimated iterations: unknown + .loc 16 40 0 + ld.global.s64 %rd8, [%rd6+0]; + add.s64 %rd7, %rd8, %rd7; + add.u32 %r2, %r2, %r4; + add.u64 %rd6, %rd3, %rd6; + .loc 16 39 0 + ld.param.s32 %r3, [__cudaparm_sum_N]; + .loc 16 40 0 + setp.gt.s32 %p2, %r3, %r2; + @%p2 bra $Lt_2_3586; + bra.uni $Lt_2_3074; +$Lt_2_5634: + mov.s64 %rd7, 0; +$Lt_2_3074: + mov.u64 %rd9, __cuda___cuda_local_var_14805_37_non_const_ticks56; + .loc 16 42 0 + cvt.u32.u16 %r5, %tid.x; + cvt.u64.u32 %rd10, %r5; + mul.wide.u32 %rd11, %r5, 8; + add.u64 %rd12, %rd9, %rd11; + st.shared.s64 [%rd12+0], %rd7; + .loc 16 44 0 + bar.sync 0; + mov.s32 %r2, 16; +$Lt_2_4610: + //<loop> Loop body line 44, nesting depth: 1, estimated iterations: unknown + setp.le.u32 %p3, %r2, %r5; + @%p3 bra $Lt_2_4866; + .loc 16 49 0 + ld.shared.s64 %rd13, [%rd12+0]; + add.u32 %r6, %r2, %r5; + cvt.u64.u32 %rd14, %r6; + mul.wide.u32 %rd15, %r6, 8; + add.u64 %rd16, %rd9, %rd15; + ld.shared.s64 %rd17, [%rd16+0]; + add.s64 %rd18, %rd13, %rd17; + st.shared.s64 [%rd12+0], %rd18; +$Lt_2_4866: + .loc 16 51 0 + bar.sync 0; + .loc 16 46 0 + shr.s32 %r2, %r2, 1; + mov.u32 %r7, 0; + setp.gt.s32 %p4, %r2, %r7; + @%p4 bra $Lt_2_4610; + .loc 16 54 0 + ld.shared.s64 %rd19, [__cuda___cuda_local_var_14805_37_non_const_ticks56+0]; + st.global.s64 [%rd1+0], %rd19; + .loc 16 55 0 + exit; +$LDWend_sum: + } // sum + + .entry mult ( + .param .s32 __cudaparm_mult_n, + .param .u64 __cudaparm_mult_x, + .param .u64 __cudaparm_mult_y) + { + .reg .u32 %r<7>; + .reg .u64 %rd<4>; + .reg .f32 %f<4>; + .reg .pred %p<4>; + .loc 16 58 0 +$LDWbegin_mult: + ld.param.s32 %r1, [__cudaparm_mult_n]; + mov.u32 %r2, 0; + setp.le.s32 %p1, %r1, %r2; + @%p1 bra $Lt_3_1282; + ld.param.s32 %r1, [__cudaparm_mult_n]; + mov.s32 %r3, %r1; + ld.param.u64 %rd1, [__cudaparm_mult_x]; + ld.param.u64 %rd2, [__cudaparm_mult_y]; + mov.s32 %r4, 0; + mov.s32 %r5, %r3; +$Lt_3_1794: + //<loop> Loop body line 58, nesting depth: 1, estimated iterations: unknown + .loc 16 63 0 + ld.global.f32 %f1, [%rd1+0]; + mul.f32 %f2, %f1, %f1; + st.global.f32 [%rd2+0], %f2; + add.s32 %r4, %r4, 1; + add.u64 %rd2, %rd2, 4; + add.u64 %rd1, %rd1, 4; + .loc 16 58 0 + ld.param.s32 %r1, [__cudaparm_mult_n]; + .loc 16 63 0 + setp.ne.s32 %p2, %r1, %r4; + @%p2 bra $Lt_3_1794; +$Lt_3_1282: + .loc 16 64 0 + exit; +$LDWend_mult: + } // mult + diff --git a/libgomp/testsuite/libgomp.oacc-c/timer.h b/libgomp/testsuite/libgomp.oacc-c/timer.h new file mode 100644 index 0000000..aa6cb3b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/timer.h @@ -0,0 +1,103 @@ + +#include <stdio.h> +#include <cuda.h> + +static int _Tnum_timers; +static CUevent *_Tstart_events, *_Tstop_events; +static CUstream _Tstream; + +void +init_timers (int ntimers) +{ + int i; + CUresult r; + + _Tnum_timers = ntimers; + + _Tstart_events = malloc (_Tnum_timers * sizeof (CUevent)); + _Tstop_events = malloc (_Tnum_timers * sizeof (CUevent)); + + r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + for (i = 0; i < _Tnum_timers; i++) + { + r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventCreate failed: %d\n", r); + abort (); + } + + r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventCreate failed: %d\n", r); + abort (); + } + } +} + +void +fini_timers (void) +{ + int i; + + for (i = 0; i < _Tnum_timers; i++) + { + cuEventDestroy (_Tstart_events[i]); + cuEventDestroy (_Tstop_events[i]); + } + + cuStreamDestroy (_Tstream); + + free (_Tstart_events); + free (_Tstop_events); +} + +void +start_timer (int timer) +{ + CUresult r; + + r = cuEventRecord (_Tstart_events[timer], _Tstream); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventRecord failed: %d\n", r); + abort (); + } +} + +float +stop_timer (int timer) +{ + CUresult r; + float etime; + + r = cuEventRecord (_Tstop_events[timer], _Tstream); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventRecord failed: %d\n", r); + abort (); + } + + r = cuEventSynchronize (_Tstop_events[timer]); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventSynchronize failed: %d\n", r); + abort (); + } + + r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventElapsedTime failed: %d\n", r); + abort (); + } + + return etime; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/update-1.c b/libgomp/testsuite/libgomp.oacc-c/update-1.c new file mode 100644 index 0000000..4517273 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/update-1.c @@ -0,0 +1,280 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include <openacc.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> + +int +main(int argc, char **argv) +{ + int N = 8; + float *a, *b, *c; + float *d_a, *d_b, *d_c; + int i; + + a = (float *) malloc(N * sizeof (float)); + b = (float *) malloc(N * sizeof (float)); + c = (float *) malloc(N * sizeof (float)); + + d_a = acc_malloc(N * sizeof (float)); + d_b = acc_malloc(N * sizeof (float)); + d_c = acc_malloc(N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + + acc_map_data(a, d_a, N * sizeof (float)); + acc_map_data(b, d_b, N * sizeof (float)); + acc_map_data(c, d_c, N * sizeof (float)); + +#pragma acc update device(a[0:N], b[0:N]) + +#pragma acc parallel present(a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host(a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 3.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc update device(a[0:N], b[0:N]) + +#pragma acc parallel present(a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host(a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort(); + + if (b[i] != 5.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc update device(a[0:N], b[0:N]) + +#pragma acc parallel present(a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update self(a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort(); + + if (b[i] != 5.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc update device(a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present(a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host(a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 6.0) + abort(); + + if (b[i] != 6.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 7.0; + b[i] = 2.0; + } + +#pragma acc update device(a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present(a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host(a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 7.0) + abort(); + + if (b[i] != 7.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc update device(a[0:N]) + +#pragma acc parallel present(a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host(a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 9.0) + abort(); + + if (b[i] != 9.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + } + +#pragma acc update device(a[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + } + +#pragma acc update device(a[0:N >> 1]) + +#pragma acc parallel present(a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host(a[0:N], b[0:N]) + + for (i = 0; i < (N >> 1); i++) + { + if (a[i] != 6.0) + abort(); + + if (b[i] != 6.0) + abort(); + } + + for (i = (N >> 1); i < N; i++) + { + if (a[i] != 5.0) + abort(); + + if (b[i] != 5.0) + abort(); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort(); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort(); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 new file mode 100644 index 0000000..52b030b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 @@ -0,0 +1,10 @@ +! { dg-shouldfail "" { *-*-* } { "*" } { "" } } + +program main + implicit none + + !$acc parallel + call abort + !$acc end parallel + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 new file mode 100644 index 0000000..2ba2bcb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 @@ -0,0 +1,13 @@ +program main + implicit none + + integer :: argc + argc = command_argument_count () + + !$acc parallel copyin(argc) + if (argc .ne. 0) then + call abort + end if + !$acc end parallel + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 new file mode 100644 index 0000000..1a10f32 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 @@ -0,0 +1,45 @@ +! { dg-additional-options "-cpp" } +! TODO: Have to disable the acc_on_device builtin for we want to test the +! libgomp library function? The command line option +! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not for +! Fortran. + +use openacc +implicit none + +! Host. + +if (.not. acc_on_device (acc_device_none)) call abort +if (.not. acc_on_device (acc_device_host)) call abort +if (acc_on_device (acc_device_not_host)) call abort +if (acc_on_device (acc_device_nvidia)) call abort + + +! Host via offloading fallback mode. + +!$acc parallel if(.false.) +if (.not. acc_on_device (acc_device_none)) call abort +if (.not. acc_on_device (acc_device_host)) call abort +if (acc_on_device (acc_device_not_host)) call abort +if (acc_on_device (acc_device_nvidia)) call abort +!$acc end parallel + + +#if !ACC_DEVICE_TYPE_host + +! Offloaded. + +!$acc parallel +if (acc_on_device (acc_device_none)) call abort +if (acc_on_device (acc_device_host)) call abort +if (.not. acc_on_device (acc_device_not_host)) call abort +#if ACC_DEVICE_TYPE_nvidia +if (.not. acc_on_device (acc_device_nvidia)) call abort +#else +if (acc_on_device (acc_device_nvidia)) call abort +#endif +!$acc end parallel + +#endif + +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f new file mode 100644 index 0000000..a19045b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f @@ -0,0 +1,45 @@ +! { dg-additional-options "-cpp" } +! TODO: Have to disable the acc_on_device builtin for we want to test +! the libgomp library function? The command line option +! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not +! for Fortran. + + USE OPENACC + IMPLICIT NONE + +!Host. + + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT + + +!Host via offloading fallback mode. + +!$ACC PARALLEL IF(.FALSE.) + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +!$ACC END PARALLEL + + +#if !ACC_DEVICE_TYPE_host + +! Offloaded. + +!$ACC PARALLEL + IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT +#if ACC_DEVICE_TYPE_nvidia + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#endif +!$ACC END PARALLEL + +#endif + + END diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f new file mode 100644 index 0000000..c391776 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f @@ -0,0 +1,45 @@ +! { dg-additional-options "-cpp" } +! TODO: Have to disable the acc_on_device builtin for we want to test +! the libgomp library function? The command line option +! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not +! for Fortran. + + IMPLICIT NONE + INCLUDE "openacc_lib.h" + +!Host. + + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT + + +!Host via offloading fallback mode. + +!$ACC PARALLEL IF(.FALSE.) + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +!$ACC END PARALLEL + + +#if !ACC_DEVICE_TYPE_host + +! Offloaded. + +!$ACC PARALLEL + IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT +#if ACC_DEVICE_TYPE_nvidia + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#endif +!$ACC END PARALLEL + +#endif + + END diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90 new file mode 100644 index 0000000..4c07bc2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90 @@ -0,0 +1,27 @@ +! { dg-do run } + +program collapse1 + integer :: i, j, k, a(1:3, 4:6, 5:7) + logical :: l + l = .false. + a(:, :, :) = 0 + !$acc parallel + !$acc loop collapse(4 - 1) + do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + a(i, j, k) = i + j + k + end do + end do + end do + !$acc loop collapse(2) reduction(.or.:l) + do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + if (a(i, j, k) .ne. (i + j + k)) l = .true. + end do + end do + end do + !$acc end parallel + if (l) call abort +end program collapse1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90 new file mode 100644 index 0000000..ca3b638 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90 @@ -0,0 +1,25 @@ +! { dg-do run } + +program collapse2 + integer :: i, j, k, a(1:3, 4:6, 5:7) + logical :: l + l = .false. + a(:, :, :) = 0 + !$acc parallel + !$acc loop collapse(4 - 1) + do 164 i = 1, 3 + do 164 j = 4, 6 + do 164 k = 5, 7 + a(i, j, k) = i + j + k +164 end do + !$acc loop collapse(2) reduction(.or.:l) +firstdo: do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + if (a(i, j, k) .ne. (i + j + k)) l = .true. + end do + end do + end do firstdo + !$acc end parallel + if (l) call abort +end program collapse2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90 new file mode 100644 index 0000000..50e6100 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90 @@ -0,0 +1,28 @@ +! { dg-do run } + +program collapse3 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + !$acc parallel + !$acc loop collapse(3) + do 115 k=1,3 +dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.1)) call abort + + !$acc parallel + !$acc loop collapse(3) +dol: do 120 l=1,3 +doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.2)) call abort +end program collapse3 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90 new file mode 100644 index 0000000..41b66db --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90 @@ -0,0 +1,40 @@ +! { dg-do run } + +! collapse3.f90:test1 +program collapse4 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel + !$acc loop collapse (3) reduction (.or.:l) + do i = 2, 6 + do j = -2, 4 + do k = 13, 18 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = 2, 6 + do j = -2, 4 + do k = 13, 18 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = 2, 6 + do j = -2, 4 + do k = 13, 18 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do +end program collapse4 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90 new file mode 100644 index 0000000..8c20f04 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90 @@ -0,0 +1,48 @@ +! { dg-do run } + +! collapse3.f90:test2 +program collapse5 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = 3 + v2 = 6 + v3 = -2 + v4 = 4 + v5 = 13 + v6 = 18 + !$acc parallel + !$acc loop collapse (3) reduction (.or.:l) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + m = i * 100 + j * 10 + k + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do +end program collapse5 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90 new file mode 100644 index 0000000..7404b91 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90 @@ -0,0 +1,50 @@ +! { dg-do run } + +! collapse3.f90:test3 +program collapse6 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = 3 + v2 = 6 + v3 = -2 + v4 = 4 + v5 = 13 + v6 = 18 + v7 = 1 + v8 = 1 + v9 = 1 + !$acc parallel + !$acc loop collapse (3) reduction (.or.:l) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do +end program collapse6 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90 new file mode 100644 index 0000000..12efd8c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90 @@ -0,0 +1,40 @@ +! { dg-do run } + +! collapse3.f90:test4 +program collapse7 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel + !$acc loop collapse (3) reduction (.or.:l) + do i = 2, 6 + do j = -2, 4 + do k = 13, 18 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = 2, 6 + do j = -2, 4 + do k = 13, 18 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = 1, 7 + do j = -3, 5 + do k = 12, 19 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do +end program collapse7 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90 new file mode 100644 index 0000000..04fbcfe --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90 @@ -0,0 +1,47 @@ +! { dg-do run } + +! collapse3.f90:test5 +program collapse8 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = 3 + v2 = 6 + v3 = -2 + v4 = 4 + v5 = 13 + v6 = 18 + !$acc parallel + !$acc loop collapse (3) reduction (.or.:l) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do +end program collapse8 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp new file mode 100644 index 0000000..ef6d551 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp @@ -0,0 +1,108 @@ +# This whole file adapted from libgomp.fortran/fortran.exp. + +load_lib libgomp-dg.exp +load_gcc_lib gcc-dg.exp +load_gcc_lib gfortran-dg.exp + +global shlib_ext +global ALWAYS_CFLAGS + +set shlib_ext [get_shlib_extension] +set lang_library_path "../libgfortran/.libs" +set lang_link_flags "-lgfortran" +if [info exists lang_include_flags] then { + unset lang_include_flags +} +set lang_test_file_found 0 +set quadmath_library_path "../libquadmath/.libs" + + +# Initialize dg. +dg-init + +# Turn on OpenACC. +# XXX (TEMPORARY): Remove the -flto once that's properly integrated. +lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto" + +if { $blddir != "" } { + set lang_source_re {^.*\.[fF](|90|95|03|08)$} + set lang_include_flags "-fintrinsic-modules-path=${blddir}" + # Look for a static libgfortran first. + if [file exists "${blddir}/${lang_library_path}/libgfortran.a"] { + set lang_test_file "${lang_library_path}/libgfortran.a" + set lang_test_file_found 1 + # We may have a shared only build, so look for a shared libgfortran. + } elseif [file exists "${blddir}/${lang_library_path}/libgfortran.${shlib_ext}"] { + set lang_test_file "${lang_library_path}/libgfortran.${shlib_ext}" + set lang_test_file_found 1 + } else { + puts "No libgfortran library found, will not execute fortran tests" + } +} elseif [info exists GFORTRAN_UNDER_TEST] { + set lang_test_file_found 1 + # Needs to exist for libgomp.exp. + set lang_test_file "" +} else { + puts "GFORTRAN_UNDER_TEST not defined, will not execute fortran tests" +} + +if { $lang_test_file_found } { + # Gather a list of all tests. + set tests [lsort [find $srcdir/$subdir *.\[fF\]{,90,95,03,08}]] + + if { $blddir != "" } { + if { [file exists "${blddir}/${quadmath_library_path}/libquadmath.a"] + || [file exists "${blddir}/${quadmath_library_path}/libquadmath.${shlib_ext}"] } { + lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/${quadmath_library_path}/" + # Allow for spec subsitution. + lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/${quadmath_library_path}/" + set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}:${blddir}/${quadmath_library_path}" + } else { + set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}" + } + } else { + set ld_library_path "$always_ld_library_path" + } + append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] + set_ld_library_path_env_vars + + # Todo: get list of accelerators from configure options --enable-accelerator. + set accels { "nonshm-host" "nvidia" } + + # Run on host (or fallback) accelerator. + lappend accels "host" + + # Test OpenACC with available accelerators. + foreach accel $accels { + set tagopt "-DACC_DEVICE_TYPE_$accel=1" + + # Todo: Determine shared memory or not using run-time test. + switch $accel { + host { + set acc_mem_shared 1 + } + nonshm-host { + set tagopt "-DACC_DEVICE_TYPE_nonshm_host=1" + set acc_mem_shared 0 + } + nvidia { + set acc_mem_shared 0 + } + default { + set acc_mem_shared 0 + } + } + set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared" + + # Todo: Verify that this works for both local and remote testing. + setenv ACC_DEVICE_TYPE $accel + + # For Fortran we're doing torture testing, as Fortran has far more tests + # with arrays etc. that testing just -O0 or -O2 is insufficient, that is + # typically not the case for C/C++. + gfortran-dg-runtest $tests "$tagopt" "" + } +} + +# All done. +dg-finish diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 new file mode 100644 index 0000000..51dc452 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 @@ -0,0 +1,13 @@ +use openacc + +if (acc_get_num_devices (acc_device_host) .ne. 1) call abort +call acc_set_device_type (acc_device_host) +if (acc_get_device_type () .ne. acc_device_host) call abort +call acc_set_device_num (0, acc_device_host) +if (acc_get_device_num (acc_device_host) .ne. 0) call abort +call acc_shutdown (acc_device_host) + +call acc_init (acc_device_host) +call acc_shutdown (acc_device_host) + +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 new file mode 100644 index 0000000..a54d6a7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 @@ -0,0 +1,82 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer, target :: a_3d_i(10, 10, 10) + complex a_3d_c(10, 10, 10) + real a_3d_r(10, 10, 10) + + integer i, j, k + complex c + real r + integer, parameter :: i_size = sizeof (i) + integer, parameter :: c_size = sizeof (c) + integer, parameter :: r_size = sizeof (r) + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) + + call acc_copyin (a_3d_i) + call acc_copyin (a_3d_c) + call acc_copyin (a_3d_r) + + if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort + end do + end do + end do + + call acc_shutdown (acc_device_nvidia) + +contains + + subroutine set3d (clear, a_i, a_c, a_r) + logical clear + integer, dimension (:,:,:), intent (inout) :: a_i + complex, dimension (:,:,:), intent (inout) :: a_c + real, dimension (:,:,:), intent (inout) :: a_r + + integer i, j, k + integer lb1, ub1, lb2, ub2, lb3, ub3 + + lb1 = lbound (a_i, 1) + ub1 = ubound (a_i, 1) + + lb2 = lbound (a_i, 2) + ub2 = ubound (a_i, 2) + + lb3 = lbound (a_i, 3) + ub3 = ubound (a_i, 3) + + do i = lb1, ub1 + do j = lb2, ub2 + do k = lb3, ub3 + if (clear) then + a_i(i, j, k) = 0 + a_c(i, j, k) = cmplx (0.0, 0.0) + a_r(i, j, k) = 0.0 + else + a_i(i, j, k) = i + a_c(i, j, k) = cmplx (i, j) + a_r(i, j, k) = i + end if + end do + end do + end do + + end subroutine + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 new file mode 100644 index 0000000..a54d6a7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 @@ -0,0 +1,82 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer, target :: a_3d_i(10, 10, 10) + complex a_3d_c(10, 10, 10) + real a_3d_r(10, 10, 10) + + integer i, j, k + complex c + real r + integer, parameter :: i_size = sizeof (i) + integer, parameter :: c_size = sizeof (c) + integer, parameter :: r_size = sizeof (r) + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) + + call acc_copyin (a_3d_i) + call acc_copyin (a_3d_c) + call acc_copyin (a_3d_r) + + if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort + end do + end do + end do + + call acc_shutdown (acc_device_nvidia) + +contains + + subroutine set3d (clear, a_i, a_c, a_r) + logical clear + integer, dimension (:,:,:), intent (inout) :: a_i + complex, dimension (:,:,:), intent (inout) :: a_c + real, dimension (:,:,:), intent (inout) :: a_r + + integer i, j, k + integer lb1, ub1, lb2, ub2, lb3, ub3 + + lb1 = lbound (a_i, 1) + ub1 = ubound (a_i, 1) + + lb2 = lbound (a_i, 2) + ub2 = ubound (a_i, 2) + + lb3 = lbound (a_i, 3) + ub3 = ubound (a_i, 3) + + do i = lb1, ub1 + do j = lb2, ub2 + do k = lb3, ub3 + if (clear) then + a_i(i, j, k) = 0 + a_c(i, j, k) = cmplx (0.0, 0.0) + a_r(i, j, k) = 0.0 + else + a_i(i, j, k) = i + a_c(i, j, k) = cmplx (i, j) + a_r(i, j, k) = i + end if + end do + end do + end do + + end subroutine + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f new file mode 100644 index 0000000..a9d70b2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f @@ -0,0 +1,13 @@ + USE OPENACC + + IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT + CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT + CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + + CALL ACC_INIT (ACC_DEVICE_HOST) + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + + END diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f new file mode 100644 index 0000000..56d2cd2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f @@ -0,0 +1,13 @@ + INCLUDE "openacc_lib.h" + + IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT + CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT + CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + + CALL ACC_INIT (ACC_DEVICE_HOST) + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + + END diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 new file mode 100644 index 0000000..3a2b661 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 @@ -0,0 +1,35 @@ +! { dg-do run } + +program main + use openacc + implicit none + + integer n + + if (acc_get_num_devices (acc_device_host) .ne. 1) call abort + + if (acc_get_num_devices (acc_device_none) .ne. 0) call abort + + call acc_init (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + call acc_set_device_type (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + n = 0 + + call acc_set_device_num (n, acc_device_host) + + if (acc_get_device_num (acc_device_host) .ne. 0) call abort + + if (.NOT. acc_async_test (n) ) call abort + + call acc_wait (n) + + call acc_wait_all () + + call acc_shutdown (acc_device_host) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 new file mode 100644 index 0000000..e68eb89 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 @@ -0,0 +1,31 @@ +! { dg-do run } + +program main + use openacc + implicit none + + integer n + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + n = 0 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort + + if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then + + n = 1 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort + + end if + + call acc_shutdown (acc_device_nvidia) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 new file mode 100644 index 0000000..401ad66 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 @@ -0,0 +1,35 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer n + + if (acc_get_num_devices (acc_device_host) .ne. 1) call abort + + if (acc_get_num_devices (acc_device_none) .ne. 0) call abort + + call acc_init (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + call acc_set_device_type (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + n = 0 + + call acc_set_device_num (n, acc_device_host) + + if (acc_get_device_num (acc_device_host) .ne. 0) call abort + + if (.NOT. acc_async_test (n) ) call abort + + call acc_wait (n) + + call acc_wait_all () + + call acc_shutdown (acc_device_host) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 new file mode 100644 index 0000000..422df53 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 @@ -0,0 +1,31 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer n + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + n = 0 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort + + if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then + + n = 1 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort + + end if + + call acc_shutdown (acc_device_nvidia) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 new file mode 100644 index 0000000..ad758b2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 @@ -0,0 +1,83 @@ +! { dg-do run } + +program main + use openacc + use iso_c_binding + implicit none + + integer, target :: a_3d_i(10, 10, 10) + complex a_3d_c(10, 10, 10) + real a_3d_r(10, 10, 10) + + integer i, j, k + complex c + real r + integer, parameter :: i_size = sizeof (i) + integer, parameter :: c_size = sizeof (c) + integer, parameter :: r_size = sizeof (r) + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) + + call acc_copyin (a_3d_i) + call acc_copyin (a_3d_c) + call acc_copyin (a_3d_r) + + if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort + end do + end do + end do + + call acc_shutdown (acc_device_nvidia) + +contains + + subroutine set3d (clear, a_i, a_c, a_r) + logical clear + integer, dimension (:,:,:), intent (inout) :: a_i + complex, dimension (:,:,:), intent (inout) :: a_c + real, dimension (:,:,:), intent (inout) :: a_r + + integer i, j, k + integer lb1, ub1, lb2, ub2, lb3, ub3 + + lb1 = lbound (a_i, 1) + ub1 = ubound (a_i, 1) + + lb2 = lbound (a_i, 2) + ub2 = ubound (a_i, 2) + + lb3 = lbound (a_i, 3) + ub3 = ubound (a_i, 3) + + do i = lb1, ub1 + do j = lb2, ub2 + do k = lb3, ub3 + if (clear) then + a_i(i, j, k) = 0 + a_c(i, j, k) = cmplx (0.0, 0.0) + a_r(i, j, k) = 0.0 + else + a_i(i, j, k) = i + a_c(i, j, k) = cmplx (i, j) + a_r(i, j, k) = i + end if + end do + end do + end do + + end subroutine + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 new file mode 100644 index 0000000..ad758b2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 @@ -0,0 +1,83 @@ +! { dg-do run } + +program main + use openacc + use iso_c_binding + implicit none + + integer, target :: a_3d_i(10, 10, 10) + complex a_3d_c(10, 10, 10) + real a_3d_r(10, 10, 10) + + integer i, j, k + complex c + real r + integer, parameter :: i_size = sizeof (i) + integer, parameter :: c_size = sizeof (c) + integer, parameter :: r_size = sizeof (r) + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) + + call acc_copyin (a_3d_i) + call acc_copyin (a_3d_c) + call acc_copyin (a_3d_r) + + if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort + end do + end do + end do + + call acc_shutdown (acc_device_nvidia) + +contains + + subroutine set3d (clear, a_i, a_c, a_r) + logical clear + integer, dimension (:,:,:), intent (inout) :: a_i + complex, dimension (:,:,:), intent (inout) :: a_c + real, dimension (:,:,:), intent (inout) :: a_r + + integer i, j, k + integer lb1, ub1, lb2, ub2, lb3, ub3 + + lb1 = lbound (a_i, 1) + ub1 = ubound (a_i, 1) + + lb2 = lbound (a_i, 2) + ub2 = ubound (a_i, 2) + + lb3 = lbound (a_i, 3) + ub3 = ubound (a_i, 3) + + do i = lb1, ub1 + do j = lb2, ub2 + do k = lb3, ub3 + if (clear) then + a_i(i, j, k) = 0 + a_c(i, j, k) = cmplx (0.0, 0.0) + a_r(i, j, k) = 0.0 + else + a_i(i, j, k) = i + a_c(i, j, k) = cmplx (i, j) + a_r(i, j, k) = i + end if + end do + end do + end do + + end subroutine + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 new file mode 100644 index 0000000..082dd8a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 @@ -0,0 +1,97 @@ +program map + integer, parameter :: n = 20, c = 10 + integer :: i, a(n), b(n) + + a(:) = 0 + b(:) = 0 + + ! COPY + + !$acc parallel copy (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + b(i) = i + end do + + call check (a, b, n) + + ! COPYOUT + + a(:) = 0 + + !$acc parallel copyout (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do + call check (a, b, n) + + ! COPYIN + + a(:) = 0 + + !$acc parallel copyout (a) copyin (b) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPY + + !$acc parallel pcopy (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYOUT + + a(:) = 0 + + !$acc parallel pcopyout (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYIN + + a(:) = 0 + + !$acc parallel pcopyout (a) pcopyin (b) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) +end program map + +subroutine check (a, b, n) + integer :: n, a(n), b(n) + integer :: i + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do +end subroutine check diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f new file mode 100644 index 0000000..db3c6b1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f @@ -0,0 +1,9 @@ +! { dg-do run } + + program main + implicit none + include "openacc_lib.h" + + if (openacc_version .ne. 201306) call abort; + + end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 new file mode 100644 index 0000000..a14ecdd --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 @@ -0,0 +1,9 @@ +! { dg-do run } + +program main + use openacc + implicit none + + if (openacc_version .ne. 201306) call abort; + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 new file mode 100644 index 0000000..e316746 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 @@ -0,0 +1,15 @@ +program test + implicit none + + integer a(5) + + a = 10; + + !$acc parallel copy(a(1:5)) + a(1) = 5 + a(2) = 5 + a(3) = 5 + a(4) = 5 + a(5) = 5 + !$acc end parallel +end program test diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 new file mode 100644 index 0000000..1a1d4c7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 @@ -0,0 +1,229 @@ +! { dg-do run } + +program test + implicit none + integer, allocatable :: a1(:) + integer, allocatable :: b1(:) + integer, allocatable :: c1(:) + integer, allocatable :: b2(:,:) + integer, allocatable :: c3(:,:,:) + + allocate (a1(5)) + if (.not.allocated (a1)) call abort() + + a1 = 10 + + !$acc parallel copy(a1(1:5)) + a1(1) = 1 + a1(2) = 2 + a1(3) = 3 + a1(4) = 4 + a1(5) = 5 + !$acc end parallel + + if (a1(1) .ne. 1) call abort + if (a1(2) .ne. 2) call abort + if (a1(3) .ne. 3) call abort + if (a1(4) .ne. 4) call abort + if (a1(5) .ne. 5) call abort + + deallocate(a1) + + allocate (a1(0:4)) + if (.not.allocated (a1)) call abort() + + a1 = 10 + + !$acc parallel copy(a1(0:4)) + a1(0) = 1 + a1(1) = 2 + a1(2) = 3 + a1(3) = 4 + a1(4) = 5 + !$acc end parallel + + if (a1(0) .ne. 1) call abort + if (a1(1) .ne. 2) call abort + if (a1(2) .ne. 3) call abort + if (a1(3) .ne. 4) call abort + if (a1(4) .ne. 5) call abort + + deallocate(a1) + + allocate (b2(5,5)) + if (.not.allocated (b2)) call abort() + + b2 = 11 + + !$acc parallel copy(b2(1:5,1:5)) + b2(1,1) = 1 + b2(2,2) = 2 + b2(3,3) = 3 + b2(4,4) = 4 + b2(5,5) = 5 + !$acc end parallel + + if (b2(1,1) .ne. 1) call abort + if (b2(2,2) .ne. 2) call abort + if (b2(3,3) .ne. 3) call abort + if (b2(4,4) .ne. 4) call abort + if (b2(5,5) .ne. 5) call abort + + deallocate(b2) + + allocate (b2(0:4,0:4)) + if (.not.allocated (b2)) call abort() + + b2 = 11 + + !$acc parallel copy(b2(0:4,0:4)) + b2(0,0) = 1 + b2(1,1) = 2 + b2(2,2) = 3 + b2(3,3) = 4 + b2(4,4) = 5 + !$acc end parallel + + if (b2(0,0) .ne. 1) call abort + if (b2(1,1) .ne. 2) call abort + if (b2(2,2) .ne. 3) call abort + if (b2(3,3) .ne. 4) call abort + if (b2(4,4) .ne. 5) call abort + + deallocate(b2) + + allocate (c3(5,5,5)) + if (.not.allocated (c3)) call abort() + + c3 = 12 + + !$acc parallel copy(c3(1:5,1:5,1:5)) + c3(1,1,1) = 1 + c3(2,2,2) = 2 + c3(3,3,3) = 3 + c3(4,4,4) = 4 + c3(5,5,5) = 5 + !$acc end parallel + + if (c3(1,1,1) .ne. 1) call abort + if (c3(2,2,2) .ne. 2) call abort + if (c3(3,3,3) .ne. 3) call abort + if (c3(4,4,4) .ne. 4) call abort + if (c3(5,5,5) .ne. 5) call abort + + deallocate(c3) + + allocate (c3(0:4,0:4,0:4)) + if (.not.allocated (c3)) call abort() + + c3 = 12 + + !$acc parallel copy(c3(0:4,0:4,0:4)) + c3(0,0,0) = 1 + c3(1,1,1) = 2 + c3(2,2,2) = 3 + c3(3,3,3) = 4 + c3(4,4,4) = 5 + !$acc end parallel + + if (c3(0,0,0) .ne. 1) call abort + if (c3(1,1,1) .ne. 2) call abort + if (c3(2,2,2) .ne. 3) call abort + if (c3(3,3,3) .ne. 4) call abort + if (c3(4,4,4) .ne. 5) call abort + + deallocate(c3) + + allocate (a1(5)) + if (.not.allocated (a1)) call abort() + + allocate (b1(5)) + if (.not.allocated (b1)) call abort() + + allocate (c1(5)) + if (.not.allocated (c1)) call abort() + + a1 = 10 + b1 = 3 + c1 = 7 + + !$acc parallel copyin(a1(1:5)) create(c1(1:5)) copyout(b1(1:5)) + c1(1) = a1(1) + c1(2) = a1(2) + c1(3) = a1(3) + c1(4) = a1(4) + c1(5) = a1(5) + + b1(1) = c1(1) + b1(2) = c1(2) + b1(3) = c1(3) + b1(4) = c1(4) + b1(5) = c1(5) + !$acc end parallel + + if (b1(1) .ne. 10) call abort + if (b1(2) .ne. 10) call abort + if (b1(3) .ne. 10) call abort + if (b1(4) .ne. 10) call abort + if (b1(5) .ne. 10) call abort + + deallocate(a1) + deallocate(b1) + deallocate(c1) + + allocate (a1(0:4)) + if (.not.allocated (a1)) call abort() + + allocate (b1(0:4)) + if (.not.allocated (b1)) call abort() + + allocate (c1(0:4)) + if (.not.allocated (c1)) call abort() + + a1 = 10 + b1 = 3 + c1 = 7 + + !$acc parallel copyin(a1(0:4)) create(c1(0:4)) copyout(b1(0:4)) + c1(0) = a1(0) + c1(1) = a1(1) + c1(2) = a1(2) + c1(3) = a1(3) + c1(4) = a1(4) + + b1(0) = c1(0) + b1(1) = c1(1) + b1(2) = c1(2) + b1(3) = c1(3) + b1(4) = c1(4) + !$acc end parallel + + if (b1(0) .ne. 10) call abort + if (b1(1) .ne. 10) call abort + if (b1(2) .ne. 10) call abort + if (b1(3) .ne. 10) call abort + if (b1(4) .ne. 10) call abort + + deallocate(a1) + deallocate(b1) + deallocate(c1) + + allocate (a1(5)) + if (.not.allocated (a1)) call abort() + + a1 = 10 + + !$acc parallel copy(a1(2:3)) + a1(2) = 2 + a1(3) = 3 + !$acc end parallel + + if (a1(1) .ne. 10) call abort + if (a1(2) .ne. 2) call abort + if (a1(3) .ne. 3) call abort + if (a1(4) .ne. 10) call abort + if (a1(5) .ne. 10) call abort + + deallocate(a1) + +end program test diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 new file mode 100644 index 0000000..7c7e078 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 @@ -0,0 +1,228 @@ +! { dg-do run } +! TODO: +! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>. +! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } + +! Integer reductions + +program reduction_1 + implicit none + + integer, parameter :: n = 1000, vl = 32 + integer :: i, vresult, result + logical :: lresult, lvresult + integer, dimension (n) :: array + + do i = 1, n + array(i) = i + end do + + result = 0 + vresult = 0 + + ! '+' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(+:result) + do i = 1, n + result = result + array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult + array(i) + end do + + if (result.ne.vresult) call abort + + result = 0 + vresult = 0 + + ! '*' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(*:result) + do i = 1, n + result = result * array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult * array(i) + end do + + if (result.ne.vresult) call abort + + result = 0 + vresult = 0 + + ! 'max' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(max:result) + do i = 1, n + result = max (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = max (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! 'min' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(min:result) + do i = 1, n + result = min (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = min (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! 'iand' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(iand:result) + do i = 1, n + result = iand (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = iand (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! 'ior' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(ior:result) + do i = 1, n + result = ior (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = ior (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 0 + vresult = 0 + + ! 'ieor' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(ieor:result) + do i = 1, n + result = ieor (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = ieor (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.and.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.and.:lresult) + do i = 1, n + lresult = lresult .and. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .and. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.or.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.or.:lresult) + do i = 1, n + lresult = lresult .or. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .or. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.eqv.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.eqv.:lresult) + do i = 1, n + lresult = lresult .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .eqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.neqv.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.neqv.:lresult) + do i = 1, n + lresult = lresult .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .neqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort +end program reduction_1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 new file mode 100644 index 0000000..d0d34bb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 @@ -0,0 +1,173 @@ +! { dg-do run } +! TODO: +! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>. +! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } + +! real reductions + +program reduction_2 + implicit none + + integer, parameter :: n = 1000, vl = 32 + integer :: i + real, parameter :: e = .001 + real :: vresult, result + logical :: lresult, lvresult + real, dimension (n) :: array + + do i = 1, n + array(i) = i + end do + + result = 0 + vresult = 0 + + ! '+' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(+:result) + do i = 1, n + result = result + array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult + array(i) + end do + + if (abs (result - vresult) .ge. e) call abort + + result = 1 + vresult = 1 + + ! '*' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(*:result) + do i = 1, n + result = result * array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult * array(i) + end do + + if (result.ne.vresult) call abort + + result = 0 + vresult = 0 + + ! 'max' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(max:result) + do i = 1, n + result = max (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = max (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! 'min' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(min:result) + do i = 1, n + result = min (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = min (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! '.and.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.and.:lresult) + do i = 1, n + lresult = lresult .and. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .and. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.or.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.or.:lresult) + do i = 1, n + lresult = lresult .or. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .or. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.eqv.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.eqv.:lresult) + do i = 1, n + lresult = lresult .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .eqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.neqv.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.neqv.:lresult) + do i = 1, n + lresult = lresult .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .neqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort +end program reduction_2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 new file mode 100644 index 0000000..b42ca61 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 @@ -0,0 +1,173 @@ +! { dg-do run } +! TODO: +! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>. +! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } + +! double precision reductions + +program reduction_3 + implicit none + + integer, parameter :: n = 1000, vl = 32 + integer :: i + double precision, parameter :: e = .001 + double precision :: vresult, result + logical :: lresult, lvresult + double precision, dimension (n) :: array + + do i = 1, n + array(i) = i + end do + + result = 0 + vresult = 0 + + ! '+' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(+:result) + do i = 1, n + result = result + array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult + array(i) + end do + + if (abs (result - vresult) .ge. e) call abort + + result = 1 + vresult = 1 + + ! '*' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(*:result) + do i = 1, n + result = result * array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult * array(i) + end do + + if (result.ne.vresult) call abort + + result = 0 + vresult = 0 + + ! 'max' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(max:result) + do i = 1, n + result = max (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = max (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! 'min' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(min:result) + do i = 1, n + result = min (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = min (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! '.and.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.and.:lresult) + do i = 1, n + lresult = lresult .and. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .and. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.or.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.or.:lresult) + do i = 1, n + lresult = lresult .or. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .or. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.eqv.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.eqv.:lresult) + do i = 1, n + lresult = lresult .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .eqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.neqv.' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(.neqv.:lresult) + do i = 1, n + lresult = lresult .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .neqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort +end program reduction_3 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 new file mode 100644 index 0000000..45675c6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 @@ -0,0 +1,57 @@ +! { dg-do run } +! TODO: +! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>. +! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } + +! complex reductions + +program reduction_4 + implicit none + + integer, parameter :: n = 1000, vl = 32 + integer :: i + complex :: vresult, result + complex, dimension (n) :: array + + do i = 1, n + array(i) = i + end do + + result = 0 + vresult = 0 + + ! '+' reductions + + !$acc parallel vector_length(vl) + !$acc loop reduction(+:result) + do i = 1, n + result = result + array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult + array(i) + end do + + if (result .ne. vresult) call abort + + result = 1 + vresult = 1 + +! ! '*' reductions +! +! !$acc parallel vector_length(vl) +! !$acc loop reduction(*:result) +! do i = 1, n +! result = result * array(i) +! end do +! !$acc end parallel +! +! ! Verify the results +! do i = 1, n +! vresult = vresult * array(i) +! end do +! +! if (result.ne.vresult) call abort +end program reduction_4 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 new file mode 100644 index 0000000..e125548 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 @@ -0,0 +1,35 @@ +! { dg-do run } +! TODO: +! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>. +! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } + +! subroutine reduction + +program reduction + integer, parameter :: n = 40, c = 10 + integer :: i, vsum, sum + + call redsub (sum, n, c) + + vsum = 0 + + ! Verify the results + do i = 1, n + vsum = vsum + c + end do + + if (sum.ne.vsum) call abort () +end program reduction + +subroutine redsub(sum, n, c) + integer :: sum, n, c + + sum = 0 + + !$acc parallel vector_length(n) copyin (n, c) + !$acc loop reduction(+:sum) + do i = 1, n + sum = sum + c + end do + !$acc end parallel +end subroutine redsub diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90 new file mode 100644 index 0000000..3390515 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90 @@ -0,0 +1,32 @@ +! { dg-do run } +! { dg-options "-fno-inline" } + + interface + recursive function fact (x) + !$acc routine + integer, intent(in) :: x + integer :: fact + end function fact + end interface + integer, parameter :: n = 10 + integer :: a(n), i + !$acc parallel + !$acc loop + do i = 1, n + a(i) = fact (i) + end do + !$acc end parallel + do i = 1, n + if (a(i) .ne. fact(i)) call abort + end do +end +recursive function fact (x) result (res) + !$acc routine + integer, intent(in) :: x + integer :: res + if (x < 1) then + res = 1 + else + res = x * fact (x - 1) + end if +end function fact diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90 new file mode 100644 index 0000000..3d418b6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90 @@ -0,0 +1,29 @@ +! { dg-do run } +! { dg-options "-fno-inline" } + + module m1 + contains + recursive function fact (x) result (res) + !$acc routine + integer, intent(in) :: x + integer :: res + if (x < 1) then + res = 1 + else + res = x * fact (x - 1) + end if + end function fact + end module m1 + use m1 + integer, parameter :: n = 10 + integer :: a(n), i + !$acc parallel + !$acc loop + do i = 1, n + a(i) = fact (i) + end do + !$acc end parallel + do i = 1, n + if (a(i) .ne. fact(i)) call abort + end do +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90 new file mode 100644 index 0000000..d233a63 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90 @@ -0,0 +1,27 @@ +! { dg-do run } +! { dg-options "-fno-inline" } + + integer, parameter :: n = 10 + integer :: a(n), i + integer, external :: fact + !$acc routine (fact) + !$acc parallel + !$acc loop + do i = 1, n + a(i) = fact (i) + end do + !$acc end parallel + do i = 1, n + if (a(i) .ne. fact(i)) call abort + end do +end +recursive function fact (x) result (res) + !$acc routine + integer, intent(in) :: x + integer :: res + if (x < 1) then + res = 1 + else + res = x * fact (x - 1) + end if +end function fact diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90 new file mode 100644 index 0000000..3e5fb09 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90 @@ -0,0 +1,23 @@ +! { dg-do run } +! { dg-options "-fno-inline" } + + integer, parameter :: n = 10 + integer :: a(n), i + do i = 1, n + a(i) = i + end do + !$acc parallel + !$acc loop + do i = 1, n + call incr(a(i)) + end do + !$acc end parallel + do i = 1, n + if (a(i) .ne. (i + 1)) call abort + end do +end +subroutine incr (x) + !$acc routine + integer, intent(inout) :: x + x = x + 1 +end subroutine incr diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 new file mode 100644 index 0000000..b39414f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 @@ -0,0 +1,97 @@ +program subarrays + integer, parameter :: n = 20, c = 10 + integer :: i, a(n), b(n) + + a(:) = 0 + b(:) = 0 + + ! COPY + + !$acc parallel copy (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + b(i) = i + end do + + call check (a, b, n) + + ! COPYOUT + + a(:) = 0 + + !$acc parallel copyout (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do + call check (a, b, n) + + ! COPYIN + + a(:) = 0 + + !$acc parallel copyout (a(1:n)) copyin (b(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPY + + !$acc parallel pcopy (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYOUT + + a(:) = 0 + + !$acc parallel pcopyout (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYIN + + a(:) = 0 + + !$acc parallel pcopyout (a(1:n)) pcopyin (b(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) +end program subarrays + +subroutine check (a, b, n) + integer :: n, a(n), b(n) + integer :: i + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do +end subroutine check diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 new file mode 100644 index 0000000..81799f6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 @@ -0,0 +1,100 @@ +program subarrays + integer, parameter :: n = 20, c = 10, low = 5, high = 10 + integer :: i, a(n), b(n) + + a(:) = 0 + b(:) = 0 + + ! COPY + + !$acc parallel copy (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + do i = low, high + b(i) = i + end do + + call check (a, b, n) + + ! COPYOUT + + a(:) = 0 + + !$acc parallel copyout (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + do i = low, high + if (a(i) .ne. b(i)) call abort + end do + call check (a, b, n) + + ! COPYIN + + a(:) = 0 + + !$acc parallel copyout (a(low:high)) copyin (b(low:high)) + !$acc loop + do i = low, high + a(i) = b(i) + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPY + + a(:) = 0 + + !$acc parallel pcopy (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYOUT + + a(:) = 0 + + !$acc parallel pcopyout (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYIN + + a(:) = 0 + + !$acc parallel pcopyout (a(low:high)) & + !$acc & pcopyin (b(low:high)) + !$acc loop + do i = low, high + a(i) = b(i) + end do + !$acc end parallel + + call check (a, b, n) +end program subarrays + +subroutine check (a, b, n) + integer :: n, a(n), b(n) + integer :: i + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do +end subroutine check
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |