This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] declare directive [5/5]



diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index fe38dc6..663c27c 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -318,6 +318,7 @@ GOACC_2.0 {
   global:
 	GOACC_data_end;
 	GOACC_data_start;
+	GOACC_declare;
 	GOACC_enter_exit_data;
 	GOACC_parallel;
 	GOACC_update;
@@ -331,6 +332,7 @@ GOACC_2.0.GOMP_4_BRANCH {
 	GOACC_deviceptr;
 	GOACC_get_ganglocal_ptr;
 	GOACC_kernels;
+	GOACC_register_static;
 } GOACC_2.0;
 
 GOMP_PLUGIN_1.0 {
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 9f24dc3..e772f48 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -205,6 +205,8 @@ acc_shutdown_1 (acc_device_t d)
   if (!base_dev)
     gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
 
+  goacc_deallocate_static (d);
+
   gomp_mutex_lock (&goacc_thread_lock);
 
   /* Free target-specific TLS data and close all devices.  */
@@ -373,7 +375,9 @@ goacc_attach_host_thread_to_device (int ord)
 void
 acc_init (acc_device_t d)
 {
-  if (!cached_base_dev)
+  bool init = !cached_base_dev;
+
+  if (init)
     gomp_init_targets_once ();
 
   gomp_mutex_lock (&acc_device_lock);
@@ -381,6 +385,9 @@ acc_init (acc_device_t d)
   cached_base_dev = acc_init_1 (d);
 
   gomp_mutex_unlock (&acc_device_lock);
+
+  if (init)
+    goacc_allocate_static (d);
   
   goacc_attach_host_thread_to_device (-1);
 }
diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h
index 0ace737..8f4938e 100644
--- a/libgomp/oacc-int.h
+++ b/libgomp/oacc-int.h
@@ -98,6 +98,9 @@ void goacc_save_and_set_bind (acc_device_t);
 void goacc_restore_bind (void);
 void goacc_lazy_initialize (void);
 
+void goacc_allocate_static (acc_device_t);
+void goacc_deallocate_static (acc_device_t);
+
 #ifdef HAVE_ATTRIBUTE_VISIBILITY
 # pragma GCC visibility pop
 #endif
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 513d0bc..70758bc 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -109,6 +109,68 @@ alloc_ganglocal_addrs (size_t mapnum, void **hostaddrs, size_t *sizes,
     }
 }
 
+static struct oacc_static
+{
+  void *addr;
+  size_t size;
+  unsigned short mask;
+  bool free;
+  struct oacc_static *next;
+} *oacc_statics;
+
+static bool alloc_done = false;
+
+void
+goacc_allocate_static (acc_device_t d)
+{
+  struct oacc_static *s;
+
+  if (alloc_done)
+    assert (0);
+
+  for (s = oacc_statics; s; s = s->next)
+    {
+      void *d;
+
+      switch (s->mask)
+	{
+	case GOMP_MAP_FORCE_ALLOC:
+	  break;
+
+	case GOMP_MAP_FORCE_TO:
+	  d = acc_deviceptr (s->addr);
+	  acc_memcpy_to_device (d, s->addr, s->size);
+	  break;
+
+	case GOMP_MAP_FORCE_DEVICEPTR:
+	case GOMP_MAP_DEVICE_RESIDENT:
+	case GOMP_MAP_LINK:
+	  break;
+
+	default:
+	  assert (0);
+	  break;
+	}
+    }
+
+  alloc_done = true;
+}
+
+void
+goacc_deallocate_static (acc_device_t d)
+{
+  struct oacc_static *s;
+  unsigned short mask = GOMP_MAP_FORCE_DEALLOC;
+
+  if (!alloc_done)
+    return;
+
+  for (s = oacc_statics; s; s = s->next)
+    GOACC_enter_exit_data (d, 1, &s->addr, &s->size, &mask, 0, 0);
+
+  alloc_done = false;
+}
+
 static void goacc_wait (int async, int num_waits, va_list ap);
 
 void
@@ -592,3 +654,82 @@ GOACC_get_thread_num (int gang, int worker, int vector)
 {
   return 0;
 }
+
+void
+GOACC_register_static (void *addr, int size, unsigned int mask)
+{
+  struct oacc_static *s;
+
+  s = (struct oacc_static *) malloc (sizeof (struct oacc_static));
+  s->addr = addr;
+  s->size = (size_t) size;
+  s->mask = mask;
+  s->free = false;
+  s->next = NULL;
+
+  if (oacc_statics)
+    s->next = oacc_statics;
+
+   oacc_statics = s;
+}
+
+#include <stdio.h>
+
+void
+GOACC_declare (int device, size_t mapnum,
+	       void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+  int i;
+
+  for (i = 0; i < mapnum; i++)
+    {
+      unsigned char kind = kinds[i] & 0xff;
+
+      if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
+	continue;
+
+      switch (kind)
+	{
+	  case GOMP_MAP_FORCE_ALLOC:
+	  case GOMP_MAP_FORCE_DEALLOC:
+	  case GOMP_MAP_FORCE_FROM:
+	  case GOMP_MAP_FORCE_TO:
+	  case GOMP_MAP_POINTER:
+	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+				   &kinds[i], 0, 0);
+	    break;
+
+	  case GOMP_MAP_FORCE_DEVICEPTR:
+	    break;
+
+	  case GOMP_MAP_ALLOC:
+	    if (!acc_is_present (hostaddrs[i], sizes[i]))
+	      {
+		GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+				       &kinds[i], 0, 0);
+	      }
+	    break;
+
+	  case GOMP_MAP_TO:
+	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+				   &kinds[i], 0, 0);
+
+	    break;
+
+	  case GOMP_MAP_FROM:
+	    kinds[i] = GOMP_MAP_FORCE_FROM;
+	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+				       &kinds[i], 0, 0);
+	    break;
+
+	  case GOMP_MAP_FORCE_PRESENT:
+	    if (!acc_is_present (hostaddrs[i], sizes[i]))
+	      gomp_fatal ("[%p,%zd] is not mapped", hostaddrs[i], sizes[i]);
+	    break;
+
+	  default:
+	    assert (0);
+	    break;
+	}
+    }
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c++/declare-1.C b/libgomp/testsuite/libgomp.oacc-c++/declare-1.C
new file mode 100644
index 0000000..268809b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/declare-1.C
@@ -0,0 +1,24 @@
+
+template<class T>
+T foo()
+{
+  T a;
+  #pragma acc declare create (a)
+
+  #pragma acc parallel
+  {
+    a = 5;
+  }
+
+  return a;
+}
+
+int
+main (void)
+{
+  int rc;
+
+  rc = foo<int>();
+
+  return rc;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-1.c
new file mode 100644
index 0000000..59cfe51
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-1.c
@@ -0,0 +1,65 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int b[8];
+#pragma acc declare create (b)
+
+int d[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+#pragma acc declare copyin (d)
+
+int
+main (int argc, char **argv)
+{
+  const int N = 8;
+  int a[N];
+  int e[N];
+#pragma acc declare create (e)
+  int i;
+
+  for (i = 0; i < N; i++)
+    a[i] = i + 1;
+
+  if (!acc_is_present (&b, sizeof (b)))
+    abort ();
+
+  if (!acc_is_present (&d, sizeof (d)))
+    abort ();
+
+  if (!acc_is_present (&e, sizeof (e)))
+    abort ();
+
+#pragma acc parallel copyin (a[0:N])
+  {
+    for (i = 0; i < N; i++)
+      {
+        b[i] = a[i];
+        a[i] = b[i];
+      }
+  }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i] != i + 1)
+	abort ();
+    }
+
+#pragma acc parallel copy (a[0:N])
+  {
+    for (i = 0; i < N; i++)
+      {
+        e[i] = a[i] + d[i];
+	a[i] = e[i];
+      }
+  }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i] != (i + 1) * 2)
+	abort ();
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-2.c
new file mode 100644
index 0000000..2078a33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+
+#define N 16
+
+float c[N];
+#pragma acc declare device_resident (c)
+
+#pragma acc routine
+float
+subr2 (float a)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    c[i] = 2.0;
+
+  for (i = 0; i < N; i++)
+    a += c[i];
+
+  return a;
+}
+
+float b[N];
+#pragma acc declare copyin (b)
+
+#pragma acc routine
+float
+subr1 (float a)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    a += b[i];
+
+  return a;
+}
+
+int
+main (int argc, char **argv)
+{
+  float a;
+  int i;
+
+  for (i = 0; i < 16; i++)
+    b[i] = 1.0;
+
+  a = 0.0;
+
+  a = subr1 (a);
+
+  if (a != 16.0)
+    abort ();
+
+  a = 0.0;
+
+  a = subr2 (a);
+
+  if (a != 32.0)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c
new file mode 100644
index 0000000..c3a2187
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+float *b;
+#pragma acc declare deviceptr (b)
+
+#pragma acc routine
+float *
+subr2 (void)
+{
+  return b;
+}
+
+float
+subr1 (float a)
+{
+  float b;
+#pragma acc declare present_or_copy (b)
+  float c;
+#pragma acc declare present_or_copyin (c)
+  float d;
+#pragma acc declare present_or_create (d)
+  float e;
+#pragma acc declare present_or_copyout (e)
+
+#pragma acc parallel copy (a)
+  {
+    b = a;
+    c = b;
+    d = c;
+    e = d;
+    a = e;
+  }
+
+  return a;
+}
+
+int
+main (int argc, char **argv)
+{
+  float a;
+  float *c;
+
+  a = 2.0;
+
+  a = subr1 (a);
+
+  if (a != 2.0)
+    abort ();
+
+  b = (float *) acc_malloc (sizeof (float));
+
+  c = subr2 ();
+
+  if (b != c)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-4.c
new file mode 100644
index 0000000..84ec64f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-4.c
@@ -0,0 +1,27 @@
+/* { dg-do run  { target openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+float b;
+#pragma acc declare link (b)
+
+int
+main (int argc, char **argv)
+{
+  float a;
+
+  a = 2.0;
+
+#pragma acc parallel copy (a)
+  {
+    b = a;
+    a = 1.0;
+    a = a + b;
+  }
+
+  if (a != 3.0)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
index 0bab5bd..4d58e70 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
@@ -1,5 +1,10 @@
 ! { dg-do run  { target openacc_nvidia_accel_selected } }
 
+module vars
+  integer z
+  !$acc declare create (z)
+end module vars
+
 subroutine subr6 (a, d)
   integer, parameter :: N = 8
   integer :: i
@@ -200,6 +205,7 @@ subroutine subr0 (a, b, c, d)
 end subroutine
 
 program main
+  use vars
   use openacc
   integer, parameter :: N = 8
   integer :: a(N)
@@ -212,6 +218,8 @@ program main
   c(:) = 4
   d(:) = 5
 
+  if (acc_is_present (z) .neqv. .true.) call abort
+
   call subr0 (a, b, c, d)
 
   call test (a, .false.)
@@ -226,4 +234,5 @@ program main
     if (d(i) .ne. 16) call abort
   end do
 
+
 end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-2.f90
new file mode 100644
index 0000000..9b75aa1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-2.f90
@@ -0,0 +1,14 @@
+! { dg-do run  { target openacc_nvidia_accel_selected } }
+
+module globalvars
+  integer a
+  !$acc declare create (a)
+end module globalvars
+
+program test
+  use globalvars
+  use openacc
+
+  if (acc_is_present (a) .neqv. .true.) call abort
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-3.f90
new file mode 100644
index 0000000..79fc011
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-3.f90
@@ -0,0 +1,65 @@
+! { dg-do run  { target openacc_nvidia_accel_selected } }
+
+module globalvars
+  real b
+  !$acc declare link (b)
+end module globalvars
+
+program test
+  use openacc
+
+  real a
+  real c
+  !$acc declare link (c)
+
+  if (acc_is_present (b) .neqv. .false.) call abort
+  if (acc_is_present (c) .neqv. .false.) call abort
+
+  a = 0.0
+  b = 1.0
+
+  !$acc parallel copy (a) copyin (b)
+    b = b + 4.0
+    a = b
+  !$acc end parallel
+
+  if (a .ne. 5.0) call abort
+
+  if (acc_is_present (b) .neqv. .false.) call abort
+
+  a = 0.0
+
+  !$acc parallel copy (a) create (b)
+    b = 4.0
+    a = b
+  !$acc end parallel
+
+  if (a .ne. 4.0) call abort
+
+  if (acc_is_present (b) .neqv. .false.) call abort
+
+  a = 0.0
+
+  !$acc parallel copy (a) copy (b)
+    b = 4.0
+    a = b
+  !$acc end parallel
+
+  if (a .ne. 4.0) call abort
+  if (b .ne. 4.0) call abort
+
+  if (acc_is_present (b) .neqv. .false.) call abort
+
+  a = 0.0
+
+  !$acc parallel copy (a) copy (b) copy (c)
+    b = 4.0
+    c = b
+    a = c
+  !$acc end parallel
+
+  if (a .ne. 4.0) call abort
+
+  if (acc_is_present (b) .neqv. .false.) call abort
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-4.f90
new file mode 100644
index 0000000..997c8ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-4.f90
@@ -0,0 +1,27 @@
+! { dg-do run  { target openacc_nvidia_accel_selected } }
+
+module vars
+  real b
+ !$acc declare create (b)
+end module vars
+
+program test
+  use vars
+  use openacc
+  real a
+
+  if (acc_is_present (b) .neqv. .true.) call abort
+
+  a = 2.0
+
+  !$acc parallel copy (a)
+    b = a
+    a = 1.0
+    a = a + b
+   !$acc end parallel
+
+  if (acc_is_present (b) .neqv. .true.) call abort
+
+  if (a .ne. 3.0) call abort
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-5.f90
new file mode 100644
index 0000000..d7c9bac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-5.f90
@@ -0,0 +1,28 @@
+! { dg-do run  { target openacc_nvidia_accel_selected } }
+
+module vars
+  implicit none
+  real b
+ !$acc declare device_resident (b)
+end module vars
+
+program test
+  use vars
+  use openacc
+  real a
+
+  if (acc_is_present (b) .neqv. .true.) call abort
+
+  a = 2.0
+
+  !$acc parallel copy (a)
+    b = a
+    a = 1.0
+    a = a + b
+   !$acc end parallel
+
+  if (acc_is_present (b) .neqv. .true.) call abort
+
+  if (a .ne. 3.0) call abort
+
+end program test

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]