This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 0/6, OpenACC, libgomp] Async re-work


Hi Chung-Lin!

On Thu, 06 Dec 2018 21:42:14 +0100, I wrote:
> On Tue, 25 Sep 2018 21:09:49 +0800, Chung-Lin Tang <chunglin_tang@mentor.com> wrote:
> > Also included in this patch is the code for the acc_get/set_default_async API functions in OpenACC 2.5.
> > It's a minor part of this patch, but since some code was merge together, I'm submitting it together here.
> 
> As I requested, I'm reviewing those changes separately, and have backed
> out those changes in my working copy.

... as follows:

commit 79b89a5214dc2624a52f0593bbfad5cefed0c025
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Thu Dec 6 15:57:46 2018 +0100

    into async re-work: revert default_async changes
---
 include/gomp-constants.h                           |   1 -
 libgomp/libgomp.map                                |   4 -
 libgomp/oacc-async.c                               |  19 +-
 libgomp/oacc-init.c                                |   2 -
 libgomp/oacc-int.h                                 |   3 -
 libgomp/openacc.f90                                |  22 +-
 libgomp/openacc.h                                  |   3 -
 libgomp/openacc_lib.h                              |  13 -
 .../libgomp.oacc-c-c++-common/asyncwait-2.c        | 904 ---------------------
 9 files changed, 2 insertions(+), 969 deletions(-)

diff --git include/gomp-constants.h include/gomp-constants.h
index acd25851bcc7..1021306ed661 100644
--- include/gomp-constants.h
+++ include/gomp-constants.h
@@ -160,7 +160,6 @@ enum gomp_map_kind
 /* Asynchronous behavior.  Keep in sync with
    libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t.  */
 
-#define GOMP_ASYNC_DEFAULT		0
 #define GOMP_ASYNC_NOVAL		-1
 #define GOMP_ASYNC_SYNC			-2
 
diff --git libgomp/libgomp.map libgomp/libgomp.map
index c5e1b876fccd..d2381da3bf07 100644
--- libgomp/libgomp.map
+++ libgomp/libgomp.map
@@ -464,12 +464,8 @@ OACC_2.5 {
 	acc_delete_finalize_async_32_h_;
 	acc_delete_finalize_async_64_h_;
 	acc_delete_finalize_async_array_h_;
-	acc_get_default_async;
-	acc_get_default_async_h_;
 	acc_memcpy_from_device_async;
 	acc_memcpy_to_device_async;
-	acc_set_default_async;
-	acc_set_default_async_h_;
 	acc_update_device_async;
 	acc_update_device_async_32_h_;
 	acc_update_device_async_64_h_;
diff --git libgomp/oacc-async.c libgomp/oacc-async.c
index 68aaf199a27e..553082fe3d4a 100644
--- libgomp/oacc-async.c
+++ libgomp/oacc-async.c
@@ -60,7 +60,7 @@ lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
   /* The special value acc_async_noval (-1) maps to the thread-specific
      default async stream.  */
   if (async == acc_async_noval)
-    async = thr->default_async;
+    async = 0; //TODO thr->default_async;
 
   if (async == acc_async_sync)
     return NULL;
@@ -221,23 +221,6 @@ acc_wait_all_async (int async)
   gomp_mutex_unlock (&thr->dev->openacc.async.lock);
 }
 
-int
-acc_get_default_async (void)
-{
-  struct goacc_thread *thr = get_goacc_thread ();
-  return thr->default_async;
-}
-
-void
-acc_set_default_async (int async)
-{
-  if (async < acc_async_sync)
-    gomp_fatal ("invalid async argument: %d", async);
-
-  struct goacc_thread *thr = get_goacc_thread ();
-  thr->default_async = async;
-}
-
 static void
 goacc_async_unmap_tgt (void *ptr)
 {
diff --git libgomp/oacc-init.c libgomp/oacc-init.c
index 2c2f91ce3c2c..c40f48829078 100644
--- libgomp/oacc-init.c
+++ libgomp/oacc-init.c
@@ -426,8 +426,6 @@ goacc_attach_host_thread_to_device (int ord)
   
   thr->target_tls
     = acc_dev->openacc.create_thread_data_func (ord);
-
-  thr->default_async = acc_async_default;
 }
 
 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
diff --git libgomp/oacc-int.h libgomp/oacc-int.h
index 3354eb654ce9..97f3fc8a61ed 100644
--- libgomp/oacc-int.h
+++ libgomp/oacc-int.h
@@ -73,9 +73,6 @@ struct goacc_thread
 
   /* Target-specific data (used by plugin).  */
   void *target_tls;
-
-  /* Default OpenACC async queue for current thread, exported to plugin.  */
-  int default_async;
 };
 
 #if defined HAVE_TLS || defined USE_EMUTLS
diff --git libgomp/openacc.f90 libgomp/openacc.f90
index 7d31ee689479..7c809fe00738 100644
--- libgomp/openacc.f90
+++ libgomp/openacc.f90
@@ -51,10 +51,9 @@ module openacc_kinds
 
   integer, parameter :: acc_handle_kind = int32
 
-  public :: acc_async_default, acc_async_noval, acc_async_sync
+  public :: acc_async_noval, acc_async_sync
 
   ! Keep in sync with include/gomp-constants.h.
-  integer (acc_handle_kind), parameter :: acc_async_default = 0
   integer (acc_handle_kind), parameter :: acc_async_noval = -1
   integer (acc_handle_kind), parameter :: acc_async_sync = -2
 
@@ -93,16 +92,6 @@ module openacc_internal
       integer (acc_device_kind) d
     end function
 
-    subroutine acc_set_default_async_h (a)
-      import
-      integer a
-    end subroutine
-
-    function acc_get_default_async_h ()
-      import
-      integer acc_get_default_async_h
-    end function
-
     function acc_async_test_h (a)
       logical acc_async_test_h
       integer a
@@ -731,7 +720,6 @@ module openacc
 
   public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type
   public :: acc_set_device_num, acc_get_device_num, acc_async_test
-  public :: acc_set_default_async, acc_get_default_async
   public :: acc_async_test_all
   public :: acc_wait, acc_async_wait, acc_wait_async
   public :: acc_wait_all, acc_async_wait_all, acc_wait_all_async
@@ -764,14 +752,6 @@ module openacc
     procedure :: acc_get_device_num_h
   end interface
 
-  interface acc_set_default_async
-    procedure :: acc_set_default_async_h
-  end interface
-
-  interface acc_get_default_async
-    procedure :: acc_get_default_async_h
-  end interface
-
   interface acc_async_test
     procedure :: acc_async_test_h
   end interface
diff --git libgomp/openacc.h libgomp/openacc.h
index ede59d76c862..f61bb77f9f3e 100644
--- libgomp/openacc.h
+++ libgomp/openacc.h
@@ -63,7 +63,6 @@ typedef enum acc_device_t {
 
 typedef enum acc_async_t {
   /* Keep in sync with include/gomp-constants.h.  */
-  acc_async_default = 0,
   acc_async_noval = -1,
   acc_async_sync  = -2
 } acc_async_t;
@@ -73,8 +72,6 @@ void acc_set_device_type (acc_device_t) __GOACC_NOTHROW;
 acc_device_t acc_get_device_type (void) __GOACC_NOTHROW;
 void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW;
 int acc_get_device_num (acc_device_t) __GOACC_NOTHROW;
-void acc_set_default_async (int) __GOACC_NOTHROW;
-int acc_get_default_async (void) __GOACC_NOTHROW;
 int acc_async_test (int) __GOACC_NOTHROW;
 int acc_async_test_all (void) __GOACC_NOTHROW;
 void acc_wait (int) __GOACC_NOTHROW;
diff --git libgomp/openacc_lib.h libgomp/openacc_lib.h
index 75a693937967..820d987d72e2 100644
--- libgomp/openacc_lib.h
+++ libgomp/openacc_lib.h
@@ -46,7 +46,6 @@
       integer, parameter :: acc_handle_kind = 4
 
 !     Keep in sync with include/gomp-constants.h.
-      integer (acc_handle_kind), parameter :: acc_async_default = 0
       integer (acc_handle_kind), parameter :: acc_async_noval = -1
       integer (acc_handle_kind), parameter :: acc_async_sync = -2
 
@@ -90,18 +89,6 @@
         end function
       end interface
 
-      interface acc_set_default_async
-        subroutine acc_set_default_async_h (a)
-          integer a
-        end subroutine
-      end interface
-
-      interface acc_get_default_async
-        function acc_get_default_async_h ()
-          integer acc_get_default_async_h
-        end function
-      end interface
-
       interface acc_async_test
         function acc_async_test_h (a)
           logical acc_async_test_h
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c
deleted file mode 100644
index 94205407d41d..000000000000
--- libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c
+++ /dev/null
@@ -1,904 +0,0 @@
-/* { dg-do run { target openacc_nvidia_accel_selected } } */
-/* { dg-additional-options "-lcuda" } */
-
-#include <openacc.h>
-#include <stdlib.h>
-#include <cuda.h>
-
-#include <stdio.h>
-#include <time.h>
-#include <sys/time.h>
-
-int
-main (int argc, char **argv)
-{
-    CUresult r;
-    CUstream stream1;
-    int N = 128; //1024 * 1024;
-    float *a, *b, *c, *d, *e;
-    int i;
-    int nbytes;
-
-    srand (time (NULL));
-    int s = rand () % 100;
-
-    acc_init (acc_device_nvidia);
-
-    nbytes = N * sizeof (float);
-
-    a = (float *) malloc (nbytes);
-    b = (float *) malloc (nbytes);
-    c = (float *) malloc (nbytes);
-    d = (float *) malloc (nbytes);
-    e = (float *) malloc (nbytes);
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 3.0;
-        b[i] = 0.0;
-    }
-
-    acc_set_default_async (s);
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
-    {
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = a[ii];
-    }
-
-#pragma acc wait
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 3.0)
-            abort ();
-
-        if (b[i] != 3.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 2.0;
-        b[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
-    {
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = a[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 2.0)
-            abort ();
-
-        if (b[i] != 2.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 3.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
-    {
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 3.0)
-            abort ();
-
-        if (b[i] != 9.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-
-        if (d[i] != 1.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 2.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-        e[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
-    {
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
-    }
-
-#pragma acc parallel wait (s) async (s)
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 2.0)
-            abort ();
-
-        if (b[i] != 4.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-
-        if (d[i] != 1.0)
-            abort ();
-
-        if (e[i] != 11.0)
-            abort ();
-    }
-
-
-    r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
-    if (r != CUDA_SUCCESS)
-    {
-        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
-        abort ();
-    }
-
-    acc_set_cuda_stream (1, stream1);
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 5.0;
-        b[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
-    {
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = a[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 5.0)
-            abort ();
-
-        if (b[i] != 5.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 7.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
-    {
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 7.0)
-            abort ();
-
-        if (b[i] != 49.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-
-        if (d[i] != 1.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 3.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-        e[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
-    {
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
-    }
-
-#pragma acc parallel wait (s) async (s)
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 3.0)
-            abort ();
-
-        if (b[i] != 9.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-
-        if (d[i] != 1.0)
-            abort ();
-
-        if (e[i] != 17.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 4.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-        e[i] = 0.0;
-    }
-
-#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
-    {
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 4.0)
-            abort ();
-
-        if (b[i] != 16.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-    }
-
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 5.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-        e[i] = 0.0;
-    }
-
-#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
-    {
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc parallel async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-#pragma acc update host (a[0:N], b[0:N], c[0:N]) async
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 5.0)
-            abort ();
-
-        if (b[i] != 25.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 3.0;
-        b[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
-    {
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = a[ii];
-    }
-
-#pragma acc wait
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 3.0)
-            abort ();
-
-        if (b[i] != 3.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 2.0;
-        b[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
-    {
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = a[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 2.0)
-            abort ();
-
-        if (b[i] != 2.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 3.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
-    {
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 3.0)
-            abort ();
-
-        if (b[i] != 9.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-
-        if (d[i] != 1.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 2.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-        e[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
-    {
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
-    }
-
-#pragma acc kernels wait (s) async (s)
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 2.0)
-            abort ();
-
-        if (b[i] != 4.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-
-        if (d[i] != 1.0)
-            abort ();
-
-        if (e[i] != 11.0)
-            abort ();
-    }
-
-
-    r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
-    if (r != CUDA_SUCCESS)
-    {
-        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
-        abort ();
-    }
-
-    acc_set_cuda_stream (1, stream1);
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 5.0;
-        b[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
-    {
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = a[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 5.0)
-            abort ();
-
-        if (b[i] != 5.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 7.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
-    {
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 7.0)
-            abort ();
-
-        if (b[i] != 49.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-
-        if (d[i] != 1.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 3.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-        e[i] = 0.0;
-    }
-
-#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
-    {
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
-    }
-
-#pragma acc kernels wait (s) async (s)
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
-    }
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 3.0)
-            abort ();
-
-        if (b[i] != 9.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-
-        if (d[i] != 1.0)
-            abort ();
-
-        if (e[i] != 17.0)
-            abort ();
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 4.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-        e[i] = 0.0;
-    }
-
-#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
-    {
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 4.0)
-            abort ();
-
-        if (b[i] != 16.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-    }
-
-
-    for (i = 0; i < N; i++)
-    {
-        a[i] = 5.0;
-        b[i] = 0.0;
-        c[i] = 0.0;
-        d[i] = 0.0;
-        e[i] = 0.0;
-    }
-
-#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
-    {
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
-    }
-
-#pragma acc kernels async
-    {
-        int ii;
-
-        for (ii = 0; ii < N; ii++)
-            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
-    }
-
-#pragma acc update host (a[0:N], b[0:N], c[0:N]) async
-
-#pragma acc wait (s)
-
-    }
-
-    for (i = 0; i < N; i++)
-    {
-        if (a[i] != 5.0)
-            abort ();
-
-        if (b[i] != 25.0)
-            abort ();
-
-        if (c[i] != 4.0)
-            abort ();
-    }
-
-    acc_shutdown (acc_device_nvidia);
-
-    return 0;
-}


Grüße
 Thomas


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]