This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH 0/6, OpenACC, libgomp] Async re-work
- From: Thomas Schwinge <thomas at codesourcery dot com>
- To: Chung-Lin Tang <cltang at codesourcery dot com>
- Cc: <Catherine_Moore at mentor dot com>, <gcc-patches at gcc dot gnu dot org>
- Date: Fri, 14 Dec 2018 15:04:30 +0100
- Subject: Re: [PATCH 0/6, OpenACC, libgomp] Async re-work
- References: <432c2e58-7bf6-1f7e-457f-32813207b282@mentor.com> <yxfpsgza8k0p.fsf@hertz.schwinge.homeip.net>
Hi Chung-Lin!
On Thu, 06 Dec 2018 21:42:14 +0100, I wrote:
> On Tue, 25 Sep 2018 21:09:49 +0800, Chung-Lin Tang <chunglin_tang@mentor.com> wrote:
> > Also included in this patch is the code for the acc_get/set_default_async API functions in OpenACC 2.5.
> > It's a minor part of this patch, but since some code was merge together, I'm submitting it together here.
>
> As I requested, I'm reviewing those changes separately, and have backed
> out those changes in my working copy.
... as follows:
commit 79b89a5214dc2624a52f0593bbfad5cefed0c025
Author: Thomas Schwinge <thomas@codesourcery.com>
Date: Thu Dec 6 15:57:46 2018 +0100
into async re-work: revert default_async changes
---
include/gomp-constants.h | 1 -
libgomp/libgomp.map | 4 -
libgomp/oacc-async.c | 19 +-
libgomp/oacc-init.c | 2 -
libgomp/oacc-int.h | 3 -
libgomp/openacc.f90 | 22 +-
libgomp/openacc.h | 3 -
libgomp/openacc_lib.h | 13 -
.../libgomp.oacc-c-c++-common/asyncwait-2.c | 904 ---------------------
9 files changed, 2 insertions(+), 969 deletions(-)
diff --git include/gomp-constants.h include/gomp-constants.h
index acd25851bcc7..1021306ed661 100644
--- include/gomp-constants.h
+++ include/gomp-constants.h
@@ -160,7 +160,6 @@ enum gomp_map_kind
/* Asynchronous behavior. Keep in sync with
libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t. */
-#define GOMP_ASYNC_DEFAULT 0
#define GOMP_ASYNC_NOVAL -1
#define GOMP_ASYNC_SYNC -2
diff --git libgomp/libgomp.map libgomp/libgomp.map
index c5e1b876fccd..d2381da3bf07 100644
--- libgomp/libgomp.map
+++ libgomp/libgomp.map
@@ -464,12 +464,8 @@ OACC_2.5 {
acc_delete_finalize_async_32_h_;
acc_delete_finalize_async_64_h_;
acc_delete_finalize_async_array_h_;
- acc_get_default_async;
- acc_get_default_async_h_;
acc_memcpy_from_device_async;
acc_memcpy_to_device_async;
- acc_set_default_async;
- acc_set_default_async_h_;
acc_update_device_async;
acc_update_device_async_32_h_;
acc_update_device_async_64_h_;
diff --git libgomp/oacc-async.c libgomp/oacc-async.c
index 68aaf199a27e..553082fe3d4a 100644
--- libgomp/oacc-async.c
+++ libgomp/oacc-async.c
@@ -60,7 +60,7 @@ lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
/* The special value acc_async_noval (-1) maps to the thread-specific
default async stream. */
if (async == acc_async_noval)
- async = thr->default_async;
+ async = 0; //TODO thr->default_async;
if (async == acc_async_sync)
return NULL;
@@ -221,23 +221,6 @@ acc_wait_all_async (int async)
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
}
-int
-acc_get_default_async (void)
-{
- struct goacc_thread *thr = get_goacc_thread ();
- return thr->default_async;
-}
-
-void
-acc_set_default_async (int async)
-{
- if (async < acc_async_sync)
- gomp_fatal ("invalid async argument: %d", async);
-
- struct goacc_thread *thr = get_goacc_thread ();
- thr->default_async = async;
-}
-
static void
goacc_async_unmap_tgt (void *ptr)
{
diff --git libgomp/oacc-init.c libgomp/oacc-init.c
index 2c2f91ce3c2c..c40f48829078 100644
--- libgomp/oacc-init.c
+++ libgomp/oacc-init.c
@@ -426,8 +426,6 @@ goacc_attach_host_thread_to_device (int ord)
thr->target_tls
= acc_dev->openacc.create_thread_data_func (ord);
-
- thr->default_async = acc_async_default;
}
/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
diff --git libgomp/oacc-int.h libgomp/oacc-int.h
index 3354eb654ce9..97f3fc8a61ed 100644
--- libgomp/oacc-int.h
+++ libgomp/oacc-int.h
@@ -73,9 +73,6 @@ struct goacc_thread
/* Target-specific data (used by plugin). */
void *target_tls;
-
- /* Default OpenACC async queue for current thread, exported to plugin. */
- int default_async;
};
#if defined HAVE_TLS || defined USE_EMUTLS
diff --git libgomp/openacc.f90 libgomp/openacc.f90
index 7d31ee689479..7c809fe00738 100644
--- libgomp/openacc.f90
+++ libgomp/openacc.f90
@@ -51,10 +51,9 @@ module openacc_kinds
integer, parameter :: acc_handle_kind = int32
- public :: acc_async_default, acc_async_noval, acc_async_sync
+ public :: acc_async_noval, acc_async_sync
! Keep in sync with include/gomp-constants.h.
- integer (acc_handle_kind), parameter :: acc_async_default = 0
integer (acc_handle_kind), parameter :: acc_async_noval = -1
integer (acc_handle_kind), parameter :: acc_async_sync = -2
@@ -93,16 +92,6 @@ module openacc_internal
integer (acc_device_kind) d
end function
- subroutine acc_set_default_async_h (a)
- import
- integer a
- end subroutine
-
- function acc_get_default_async_h ()
- import
- integer acc_get_default_async_h
- end function
-
function acc_async_test_h (a)
logical acc_async_test_h
integer a
@@ -731,7 +720,6 @@ module openacc
public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type
public :: acc_set_device_num, acc_get_device_num, acc_async_test
- public :: acc_set_default_async, acc_get_default_async
public :: acc_async_test_all
public :: acc_wait, acc_async_wait, acc_wait_async
public :: acc_wait_all, acc_async_wait_all, acc_wait_all_async
@@ -764,14 +752,6 @@ module openacc
procedure :: acc_get_device_num_h
end interface
- interface acc_set_default_async
- procedure :: acc_set_default_async_h
- end interface
-
- interface acc_get_default_async
- procedure :: acc_get_default_async_h
- end interface
-
interface acc_async_test
procedure :: acc_async_test_h
end interface
diff --git libgomp/openacc.h libgomp/openacc.h
index ede59d76c862..f61bb77f9f3e 100644
--- libgomp/openacc.h
+++ libgomp/openacc.h
@@ -63,7 +63,6 @@ typedef enum acc_device_t {
typedef enum acc_async_t {
/* Keep in sync with include/gomp-constants.h. */
- acc_async_default = 0,
acc_async_noval = -1,
acc_async_sync = -2
} acc_async_t;
@@ -73,8 +72,6 @@ void acc_set_device_type (acc_device_t) __GOACC_NOTHROW;
acc_device_t acc_get_device_type (void) __GOACC_NOTHROW;
void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW;
int acc_get_device_num (acc_device_t) __GOACC_NOTHROW;
-void acc_set_default_async (int) __GOACC_NOTHROW;
-int acc_get_default_async (void) __GOACC_NOTHROW;
int acc_async_test (int) __GOACC_NOTHROW;
int acc_async_test_all (void) __GOACC_NOTHROW;
void acc_wait (int) __GOACC_NOTHROW;
diff --git libgomp/openacc_lib.h libgomp/openacc_lib.h
index 75a693937967..820d987d72e2 100644
--- libgomp/openacc_lib.h
+++ libgomp/openacc_lib.h
@@ -46,7 +46,6 @@
integer, parameter :: acc_handle_kind = 4
! Keep in sync with include/gomp-constants.h.
- integer (acc_handle_kind), parameter :: acc_async_default = 0
integer (acc_handle_kind), parameter :: acc_async_noval = -1
integer (acc_handle_kind), parameter :: acc_async_sync = -2
@@ -90,18 +89,6 @@
end function
end interface
- interface acc_set_default_async
- subroutine acc_set_default_async_h (a)
- integer a
- end subroutine
- end interface
-
- interface acc_get_default_async
- function acc_get_default_async_h ()
- integer acc_get_default_async_h
- end function
- end interface
-
interface acc_async_test
function acc_async_test_h (a)
logical acc_async_test_h
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c
deleted file mode 100644
index 94205407d41d..000000000000
--- libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c
+++ /dev/null
@@ -1,904 +0,0 @@
-/* { dg-do run { target openacc_nvidia_accel_selected } } */
-/* { dg-additional-options "-lcuda" } */
-
-#include <openacc.h>
-#include <stdlib.h>
-#include <cuda.h>
-
-#include <stdio.h>
-#include <time.h>
-#include <sys/time.h>
-
-int
-main (int argc, char **argv)
-{
- CUresult r;
- CUstream stream1;
- int N = 128; //1024 * 1024;
- float *a, *b, *c, *d, *e;
- int i;
- int nbytes;
-
- srand (time (NULL));
- int s = rand () % 100;
-
- acc_init (acc_device_nvidia);
-
- nbytes = N * sizeof (float);
-
- a = (float *) malloc (nbytes);
- b = (float *) malloc (nbytes);
- c = (float *) malloc (nbytes);
- d = (float *) malloc (nbytes);
- e = (float *) malloc (nbytes);
-
- for (i = 0; i < N; i++)
- {
- a[i] = 3.0;
- b[i] = 0.0;
- }
-
- acc_set_default_async (s);
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
- {
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc wait
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 3.0)
- abort ();
-
- if (b[i] != 3.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 2.0;
- b[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
- {
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 2.0)
- abort ();
-
- if (b[i] != 2.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 3.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
- {
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 3.0)
- abort ();
-
- if (b[i] != 9.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
-
- if (d[i] != 1.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 2.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- e[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
- {
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
- }
-
-#pragma acc parallel wait (s) async (s)
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 2.0)
- abort ();
-
- if (b[i] != 4.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
-
- if (d[i] != 1.0)
- abort ();
-
- if (e[i] != 11.0)
- abort ();
- }
-
-
- r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuStreamCreate failed: %d\n", r);
- abort ();
- }
-
- acc_set_cuda_stream (1, stream1);
-
- for (i = 0; i < N; i++)
- {
- a[i] = 5.0;
- b[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
- {
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 5.0)
- abort ();
-
- if (b[i] != 5.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 7.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
- {
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 7.0)
- abort ();
-
- if (b[i] != 49.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
-
- if (d[i] != 1.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 3.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- e[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
- {
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
- }
-
-#pragma acc parallel wait (s) async (s)
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 3.0)
- abort ();
-
- if (b[i] != 9.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
-
- if (d[i] != 1.0)
- abort ();
-
- if (e[i] != 17.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 4.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- e[i] = 0.0;
- }
-
-#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
- {
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 4.0)
- abort ();
-
- if (b[i] != 16.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
- }
-
-
- for (i = 0; i < N; i++)
- {
- a[i] = 5.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- e[i] = 0.0;
- }
-
-#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
- {
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc parallel async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-#pragma acc update host (a[0:N], b[0:N], c[0:N]) async
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 5.0)
- abort ();
-
- if (b[i] != 25.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 3.0;
- b[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
- {
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc wait
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 3.0)
- abort ();
-
- if (b[i] != 3.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 2.0;
- b[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
- {
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 2.0)
- abort ();
-
- if (b[i] != 2.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 3.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
- {
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 3.0)
- abort ();
-
- if (b[i] != 9.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
-
- if (d[i] != 1.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 2.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- e[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
- {
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
- }
-
-#pragma acc kernels wait (s) async (s)
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 2.0)
- abort ();
-
- if (b[i] != 4.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
-
- if (d[i] != 1.0)
- abort ();
-
- if (e[i] != 11.0)
- abort ();
- }
-
-
- r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuStreamCreate failed: %d\n", r);
- abort ();
- }
-
- acc_set_cuda_stream (1, stream1);
-
- for (i = 0; i < N; i++)
- {
- a[i] = 5.0;
- b[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
- {
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 5.0)
- abort ();
-
- if (b[i] != 5.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 7.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
- {
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 7.0)
- abort ();
-
- if (b[i] != 49.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
-
- if (d[i] != 1.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 3.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- e[i] = 0.0;
- }
-
-#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
- {
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
- }
-
-#pragma acc kernels wait (s) async (s)
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
- }
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 3.0)
- abort ();
-
- if (b[i] != 9.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
-
- if (d[i] != 1.0)
- abort ();
-
- if (e[i] != 17.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 4.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- e[i] = 0.0;
- }
-
-#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
- {
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 4.0)
- abort ();
-
- if (b[i] != 16.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
- }
-
-
- for (i = 0; i < N; i++)
- {
- a[i] = 5.0;
- b[i] = 0.0;
- c[i] = 0.0;
- d[i] = 0.0;
- e[i] = 0.0;
- }
-
-#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
- {
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
- }
-
-#pragma acc kernels async
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
- }
-
-#pragma acc update host (a[0:N], b[0:N], c[0:N]) async
-
-#pragma acc wait (s)
-
- }
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 5.0)
- abort ();
-
- if (b[i] != 25.0)
- abort ();
-
- if (c[i] != 4.0)
- abort ();
- }
-
- acc_shutdown (acc_device_nvidia);
-
- return 0;
-}
Grüße
Thomas