This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[og7] vector_length extension part 5: libgomp and tests


The attached patch is the last one in the vector length extension
series. It consists of some tweaks to the libgomp nvptx plugin to
accommodate larger vectors along with two test cases.

I only added two test cases because there's really not much interesting
going on with longer vector lengths. We should eventually add more tests
cases to handle situations where the nvptx BE falls back to using a
shorter vector length. But right now GCC just makes those changes
silently. There is precedent for the nvptx BE to emit a warning when
vector length != 32, but that might be too verbose. On one hand, it
could be argued that the compiler should error if it cannot satisfy the
user's request. On the other hand, falling back to a smaller vector
length ensures correctness.

Thomas, do you have any thoughts on the warnings/errors or there lack of?

I'll apply this patch to openacc-gcc-7-branch once the reduction changes
have been approved.

Cesar
2018-03-02  Cesar Philippidis  <cesar@codesourcery.com>

	libgomp/
	* plugin/plugin-nvptx.c (nvptx_exec): Adjust calculations of
	workers and vectors.
	* testsuite/libgomp.oacc-c-c++-common/vred2d-128.c: New test.
	* testsuite/libgomp.oacc-fortran/gemm.f90: New test.


>From 37e83baa6ae7e867e6203d7554e8381660488421 Mon Sep 17 00:00:00 2001
From: Cesar Philippidis <cesar@codesourcery.com>
Date: Fri, 2 Mar 2018 06:54:25 -0800
Subject: [PATCH] runtime changes and tests

---
 libgomp/plugin/plugin-nvptx.c                      |  10 +-
 .../libgomp.oacc-c-c++-common/vred2d-128.c         |  55 +++++++++++
 libgomp/testsuite/libgomp.oacc-fortran/gemm.f90    | 108 +++++++++++++++++++++
 3 files changed, 169 insertions(+), 4 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm.f90

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index bdc0c30e1f5..9b4768f0e59 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -734,8 +734,6 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
   int threads_per_block = threads_per_sm > block_size
     ? block_size : threads_per_sm;
 
-  threads_per_block /= warp_size;
-
   if (threads_per_sm > cpu_size)
     threads_per_sm = cpu_size;
 
@@ -802,6 +800,10 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
 
   if (seen_zero)
     {
+      int vectors = dims[GOMP_DIM_VECTOR] > 0
+	? dims[GOMP_DIM_VECTOR] : warp_size;
+      int workers = threads_per_block / vectors;
+
       for (i = 0; i != GOMP_DIM_MAX; i++)
 	if (!dims[i])
 	  {
@@ -819,10 +821,10 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
 		  : 2 * dev_size;
 		break;
 	      case GOMP_DIM_WORKER:
-		dims[i] = threads_per_block;
+		dims[i] = workers;
 		break;
 	      case GOMP_DIM_VECTOR:
-		dims[i] = warp_size;
+		dims[i] = vectors;
 		break;
 	      default:
 		abort ();
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
new file mode 100644
index 00000000000..0fbd30e77b9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
@@ -0,0 +1,55 @@
+/* Test large vector lengths.  */
+
+#include <assert.h>
+
+#define n 10000
+int a1[n], a2[n];
+
+#define gentest(name, outer, inner)		\
+  void name ()					\
+  {						\
+  long i, j, t1, t2, t3;			\
+  _Pragma(outer)				\
+  for (i = 0; i < n; i++)			\
+    {						\
+      t1 = 0;					\
+      t2 = 0;					\
+      _Pragma(inner)				\
+      for (j = i; j < n; j++)			\
+	{					\
+	  t1++;					\
+	  t2--;					\
+	}					\
+      a1[i] = t1;				\
+      a2[i] = t2;				\
+    }						\
+  for (i = 0; i < n; i++)			\
+    {						\
+      assert (a1[i] == n-i);			\
+      assert (a2[i] == -(n-i));			\
+    }						\
+  }						\
+  
+gentest (test1, "acc parallel loop gang vector_length (128)",
+	 "acc loop vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test2, "acc parallel loop gang vector_length (128)",
+	 "acc loop worker vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test3, "acc parallel loop gang worker vector_length (128)",
+	 "acc loop vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test4, "acc parallel loop",
+	 "acc loop reduction(+:t1) reduction(-:t2)")
+
+
+int
+main ()
+{
+  test1 ();
+  test2 ();
+  test3 ();
+  test4 ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90
new file mode 100644
index 00000000000..ad67dce5cad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90
@@ -0,0 +1,108 @@
+! Exercise three levels of parallelism using SGEMM from BLAS.
+
+! { dg-additional-options "-fopenacc-dim=-:-:128" }
+
+! Implicitly set vector_length to 128 using -fopenacc-dim.
+subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c)
+  integer :: m, n, k
+  real :: alpha, beta
+  real :: a(k,*), b(k,*), c(m,*)
+
+  integer :: i, j, l
+  real :: temp
+
+  !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n))
+  do j = 1, n
+     !$acc loop
+     do i = 1, m
+        temp = 0.0
+        !$acc loop reduction(+:temp)
+        do l = 1, k
+           temp = temp + a(l,i)*b(l,j)
+        end do
+        if(beta == 0.0) then
+           c(i,j) = alpha*temp
+        else
+           c(i,j) = alpha*temp + beta*c(i,j)
+        end if
+     end do
+  end do
+end subroutine openacc_sgemm
+
+! Explicitly set vector_length to 128 using a vector_length clause.
+subroutine openacc_sgemm_128 (m, n, k, alpha, a, b, beta, c)
+  integer :: m, n, k
+  real :: alpha, beta
+  real :: a(k,*), b(k,*), c(m,*)
+
+  integer :: i, j, l
+  real :: temp
+
+  !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) vector_length (128)
+  do j = 1, n
+     !$acc loop
+     do i = 1, m
+        temp = 0.0
+        !$acc loop reduction(+:temp)
+        do l = 1, k
+           temp = temp + a(l,i)*b(l,j)
+        end do
+        if(beta == 0.0) then
+           c(i,j) = alpha*temp
+        else
+           c(i,j) = alpha*temp + beta*c(i,j)
+        end if
+     end do
+  end do
+end subroutine openacc_sgemm_128
+
+subroutine host_sgemm (m, n, k, alpha, a, b, beta, c)
+  integer :: m, n, k
+  real :: alpha, beta
+  real :: a(k,*), b(k,*), c(m,*)
+
+  integer :: i, j, l
+  real :: temp
+
+  do j = 1, n
+     do i = 1, m
+        temp = 0.0
+        do l = 1, k
+           temp = temp + a(l,i)*b(l,j)
+        end do
+        if(beta == 0.0) then
+           c(i,j) = alpha*temp
+        else
+           c(i,j) = alpha*temp + beta*c(i,j)
+        end if
+     end do
+  end do
+end subroutine host_sgemm
+
+program main
+  integer, parameter :: M = 100, N = 50, K = 2000
+  real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N)
+  real alpha, beta
+  integer i, j
+
+  a(:,:) = 1.0
+  b(:,:) = 0.25
+
+  c(:,:) = 0.0
+  d(:,:) = 0.0
+  e(:,:) = 0.0
+
+  alpha = 1.05
+  beta = 1.25
+
+  call openacc_sgemm (M, N, K, alpha, a, b, beta, c)
+  call openacc_sgemm_128 (M, N, K, alpha, a, b, beta, d)
+  call host_sgemm (M, N, K, alpha, a, b, beta, e)
+
+  do i = 1, m
+     do j = 1, n
+        if (c(i,j) /= e(i,j)) call abort
+        if (d(i,j) /= e(i,j)) call abort
+     end do
+  end do
+end program main
-- 
2.14.3


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]