This is the mail archive of the fortran@gcc.gnu.org mailing list for the GNU Fortran project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] Extend libgomp's fortran test coverage of host_data


This patch ports libgomp.oacc-c-c++-common/host_data-1.c to fortran.
Fortunately, the existing fortran host_data infrastructure was already
in place, so I had to do was port over the calls to Nvidia's CUDA BLAS
library.

There are a couple of details that one needs to consider when using CUDA
BLAS in gfortran. First, if you want to use Nvidia's wrapper functions
written in C to set up the appropriate cuda device contexts, then use
the thunking variants of the functions described here
<http://docs.nvidia.com/cuda/cublas/#appendix-b-cublas-fortran-bindings>.
Otherwise, it's much easier to let gfortran's OpenACC runtime manage the
data mappings and use the host_data clause to pass those data pointers
to the CUDA BLAS library calls.

In terms of calling the actual CUDA BLAS functions, there's already good
documentation for that here
<https://gcc.gnu.org/onlinedocs/gfortran/Interoperability-with-C.html>.
Basically, those library calls need a function interface with a special
C binding. The function I tested in host_data-2.f90 is cublasSaxpy.
Other function interfaces will need to be created as necessary.

I've applied this patch to gomp-4_0-branch.

Cesar
2016-08-11  Cesar Philippidis  <cesar@codesourcery.com>

	libgomp/
	* testsuite/libgomp.oacc-fortran/host_data-1.f90: Remove stale xfail.
	* testsuite/libgomp.oacc-fortran/host_data-2.f90: New test.


diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90
index 497b0f7..69a491d 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90
@@ -1,9 +1,6 @@
 ! { dg-do run }
 ! { dg-additional-options "-cpp" }
 
-! { dg-xfail-if "TODO" { *-*-* } }
-! { dg-excess-errors "TODO" }
-
 program test
   implicit none
 
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90
new file mode 100644
index 0000000..68e14e3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90
@@ -0,0 +1,85 @@
+! Test host_data interoperability with CUDA blas.
+
+! { dg-do run { target openacc_nvidia_accel_selected } }
+! { dg-additional-options "-lcublas" }
+
+program test
+  implicit none
+
+  integer, parameter :: N = 10
+  integer :: i
+  real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
+  
+  interface
+     subroutine cublassaxpy(N, alpha, x, incx, y, incy) bind(c, name="cublasSaxpy")
+       use iso_c_binding
+       integer(kind=c_int), value :: N
+       real*4, value :: alpha
+       type(*), dimension(*) :: x
+       integer(kind=c_int), value :: incx
+       type(*), dimension(*) :: y
+       integer(kind=c_int), value :: incy
+     end subroutine cublassaxpy
+  end interface
+
+  a = 2.0
+
+  do i = 1, N
+     x(i) = 4.0 * i
+     y(i) = 3.0
+     x_ref(i) = x(i)
+     y_ref(i) = y(i)
+  end do
+
+  call saxpy (N, a, x_ref, y_ref)
+  
+  !$acc data copyin (x) copy (y)
+  !$acc host_data use_device (x, y)
+  call cublassaxpy(N, a, x, 1, y, 1)
+  !$acc end host_data
+  !$acc end data
+  
+  do i = 1, N
+     if (y(i) .ne. y_ref(i)) call abort
+  end do
+
+  !$acc data create (x) copyout (y)
+  !$acc parallel loop
+  do i = 1, N
+     y(i) = 3.0
+  end do
+  !$acc end parallel loop
+
+  !$acc host_data use_device (x, y)
+  call cublassaxpy(N, a, x, 1, y, 1)
+  !$acc end host_data
+  !$acc end data
+
+  do i = 1, N
+     if (y(i) .ne. y_ref(i)) call abort
+  end do
+
+  y(:) = 3.0
+  
+  !$acc data copyin (x) copyin (a) copy (y)
+  !$acc parallel present (x) pcopy (y) present (a)
+  call saxpy (N, a, x, y)
+  !$acc end parallel
+  !$acc end data
+
+  do i = 1, N
+     if (y(i) .ne. y_ref(i)) call abort
+  end do
+end program test
+
+subroutine saxpy (nn, aa, xx, yy)
+  integer :: nn
+  real*4 :: aa, xx(nn), yy(nn)
+  integer i
+  real*4 :: t
+  !$acc routine
+
+  do i = 1, nn
+    yy(i) = yy(i) + aa * xx(i)
+  end do
+end subroutine saxpy

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]