This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Handle global loop counters in fortran oacc kernels (was: openacc kernels directive -- initial support)


Hi!

On Sat, 15 Nov 2014 13:14:52 +0100, Tom de Vries <Tom_deVries@mentor.com> wrote:
> I'm submitting a patch series with initial support for the oacc kernels directive.

Committed to gomp-4_0-branch in r222286:

commit 0c33234340aa17536c2c86e0982c42070c89226b
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Tue Apr 21 20:22:54 2015 +0000

    Handle global loop counters in fortran oacc kernels
    
    Unable to have loop counters with a scope limited to the kernels region, and
    the fact that function scope inhibits parallelization, at the technical level,
    it seems possible to do DCE and get rid of the dead code that is inhibiting
    parallelization (in other words, the code copying the loop iterator value out
    of the region), but probably some effort would be involved.
    
    Another possibility is to add an assign of the final value of the loop
    iteration variable after the loop to cut the dependency, though this will only
    work for loops where that value is know at compile time -- which is exactly
    what pass_scev_cprop does.
    
    	gcc/
    	* passes.def: Add pass_scev_cprop to pass_oacc_kernels.
    	* tree-ssa-loop.c (pass_scev_cprop::clone): New function.
    
    	gcc/testsuite/
    	* gcc.dg/pr41488.c: Update for new pass_scev_cprop.
    	* gcc.dg/tree-ssa/loop-17.c: Likewise.
    	* gcc.dg/tree-ssa/loop-39.c: Likewise.
    	* gcc.dg/tree-ssa/scev-7.c: Likewise.
    	* gfortran.dg/goacc/kernels-loop-2.f95: New test.
    	* gfortran.dg/goacc/kernels-loop.f95: New test.
    
    	libgomp/
    	* testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: New test.
    	* testsuite/libgomp.oacc-fortran/kernels-loop.f95: New test.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@222286 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                                 |    3 ++
 gcc/passes.def                                     |    1 +
 gcc/testsuite/ChangeLog.gomp                       |    7 +++
 gcc/testsuite/gcc.dg/pr41488.c                     |    6 +--
 gcc/testsuite/gcc.dg/tree-ssa/loop-17.c            |    6 +--
 gcc/testsuite/gcc.dg/tree-ssa/loop-39.c            |    6 +--
 gcc/testsuite/gcc.dg/tree-ssa/scev-7.c             |    6 +--
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 |   46 ++++++++++++++++++++
 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95   |   40 +++++++++++++++++
 gcc/tree-ssa-loop.c                                |    1 +
 libgomp/ChangeLog.gomp                             |    3 ++
 .../libgomp.oacc-fortran/kernels-loop-2.f95        |   32 ++++++++++++++
 .../libgomp.oacc-fortran/kernels-loop.f95          |   28 ++++++++++++
 13 files changed, 173 insertions(+), 12 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index bf0ee52..f14c3718 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,5 +1,8 @@
 2015-04-21  Tom de Vries  <tom@codesourcery.com>
 
+	* passes.def: Add pass_scev_cprop to pass_oacc_kernels.
+	* tree-ssa-loop.c (pass_scev_cprop::clone): New function.
+
 	* passes.def: Add pass_parallelize_loops_oacc_kernels in pass group
 	pass_oacc_kernels.
 	* tree-parloops.c (create_parallel_loop, gen_parallel_loop): Add
diff --git gcc/passes.def gcc/passes.def
index 2d2e286..3e85808 100644
--- gcc/passes.def
+++ gcc/passes.def
@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3.  If not see
 	      NEXT_PASS (pass_tree_loop_init);
 	      NEXT_PASS (pass_lim);
 	      NEXT_PASS (pass_copy_prop);
+	      NEXT_PASS (pass_scev_cprop);
       	      NEXT_PASS (pass_parallelize_loops_oacc_kernels);
 	      NEXT_PASS (pass_expand_omp_ssa);
 	      NEXT_PASS (pass_tree_loop_done);
diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index 2c6abff..eed22e2 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,6 +1,13 @@
 2015-04-21  Tom de Vries  <tom@codesourcery.com>
 	    Thomas Schwinge  <thomas@codesourcery.com>
 
+	* gcc.dg/pr41488.c: Update for new pass_scev_cprop.
+	* gcc.dg/tree-ssa/loop-17.c: Likewise.
+	* gcc.dg/tree-ssa/loop-39.c: Likewise.
+	* gcc.dg/tree-ssa/scev-7.c: Likewise.
+	* gfortran.dg/goacc/kernels-loop-2.f95: New test.
+	* gfortran.dg/goacc/kernels-loop.f95: New test.
+
 	* c-c++-common/goacc/kernels-loop-2.c: New test.
 	* c-c++-common/goacc/kernels-loop.c: New test.
 	* c-c++-common/goacc/kernels-loop-n.c: New test.
diff --git gcc/testsuite/gcc.dg/pr41488.c gcc/testsuite/gcc.dg/pr41488.c
index c4bc428..1f306b4 100644
--- gcc/testsuite/gcc.dg/pr41488.c
+++ gcc/testsuite/gcc.dg/pr41488.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-sccp-scev" } */
+/* { dg-options "-O2 -fdump-tree-sccp2-scev" } */
 
 struct struct_t
 {
@@ -14,5 +14,5 @@ void foo (struct struct_t* sp, int start, int end)
     sp->data[i+start] = 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Simplify PEELED_CHREC into POLYNOMIAL_CHREC" 1 "sccp" } } */
-/* { dg-final { cleanup-tree-dump "sccp" } } */
+/* { dg-final { scan-tree-dump-times "Simplify PEELED_CHREC into POLYNOMIAL_CHREC" 1 "sccp2" } } */
+/* { dg-final { cleanup-tree-dump "sccp2" } } */
diff --git gcc/testsuite/gcc.dg/tree-ssa/loop-17.c gcc/testsuite/gcc.dg/tree-ssa/loop-17.c
index 0e856d8..d51fe57 100644
--- gcc/testsuite/gcc.dg/tree-ssa/loop-17.c
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-17.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-sccp-details" } */
+/* { dg-options "-O -fdump-tree-sccp2-details" } */
 
 /* To determine the number of iterations in this loop we need to fold
    p_4 + 4B > p_4 + 8B to false.  This transformation has caused
@@ -15,5 +15,5 @@ int foo (int *p)
   return i;
 }
 
-/* { dg-final { scan-tree-dump "# of iterations 1, bounded by 1" "sccp" } } */
-/* { dg-final { cleanup-tree-dump "sccp" } } */
+/* { dg-final { scan-tree-dump "# of iterations 1, bounded by 1" "sccp2" } } */
+/* { dg-final { cleanup-tree-dump "sccp2" } } */
diff --git gcc/testsuite/gcc.dg/tree-ssa/loop-39.c gcc/testsuite/gcc.dg/tree-ssa/loop-39.c
index 1f6bba4..5c56f00 100644
--- gcc/testsuite/gcc.dg/tree-ssa/loop-39.c
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-39.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-sccp-details" } */
+/* { dg-options "-O2 -fdump-tree-sccp2-details" } */
 
 int
 foo (unsigned int n)
@@ -22,5 +22,5 @@ foo (unsigned int n)
   return r + n;
 }
 
-/* { dg-final { scan-tree-dump "# of iterations \[^\n\r]*, bounded by 8" "sccp" } } */
-/* { dg-final { cleanup-tree-dump "sccp" } } */
+/* { dg-final { scan-tree-dump "# of iterations \[^\n\r]*, bounded by 8" "sccp2" } } */
+/* { dg-final { cleanup-tree-dump "sccp2" } } */
diff --git gcc/testsuite/gcc.dg/tree-ssa/scev-7.c gcc/testsuite/gcc.dg/tree-ssa/scev-7.c
index d6ceb20..0b3928f 100644
--- gcc/testsuite/gcc.dg/tree-ssa/scev-7.c
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-7.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-sccp-scev" } */
+/* { dg-options "-O2 -fdump-tree-sccp2-scev" } */
 
 struct struct_t
 {
@@ -14,5 +14,5 @@ void foo (struct struct_t* sp, int start, int end)
     sp->data[i+start] = 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Simplify PEELED_CHREC into POLYNOMIAL_CHREC" 1 "sccp" } } */
-/* { dg-final { cleanup-tree-dump "sccp" } } */
+/* { dg-final { scan-tree-dump-times "Simplify PEELED_CHREC into POLYNOMIAL_CHREC" 1 "sccp2" } } */
+/* { dg-final { cleanup-tree-dump "sccp2" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
new file mode 100644
index 0000000..bef69f8
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
@@ -0,0 +1,46 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+program main
+  implicit none
+  integer, parameter         :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer                    :: i, ii
+
+  !$acc kernels copyout (a(0:n-1))
+  do i = 0, n - 1
+     a(i) = i * 2
+  end do
+  !$acc end kernels
+
+  !$acc kernels copyout (b(0:n-1))
+  do i = 0, n -1
+     b(i) = i * 4
+  end do
+  !$acc end kernels
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do ii = 0, n - 1
+     c(ii) = a(ii) + b(ii)
+  end do
+  !$acc end kernels
+
+  do i = 0, n - 1
+     if (c(i) .ne. a(i) + b(i)) call abort
+  end do
+
+end program main
+
+! Check that only three loops are analyzed, and that all can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
+
+! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
new file mode 100644
index 0000000..be5f26d
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
@@ -0,0 +1,40 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+program main
+  implicit none
+  integer, parameter         :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer                    :: i, ii
+
+  do i = 0, n - 1
+     a(i) = i * 2
+  end do
+
+  do i = 0, n -1
+     b(i) = i * 4
+  end do
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do ii = 0, n - 1
+     c(ii) = a(ii) + b(ii)
+  end do
+  !$acc end kernels
+
+  do i = 0, n - 1
+     if (c(i) .ne. a(i) + b(i)) call abort
+  end do
+
+end program main
+
+! Check that only one loop is analyzed, and that it can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+
+! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
diff --git gcc/tree-ssa-loop.c gcc/tree-ssa-loop.c
index 2a96a39..0915cee 100644
--- gcc/tree-ssa-loop.c
+++ gcc/tree-ssa-loop.c
@@ -425,6 +425,7 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *) { return flag_tree_scev_cprop; }
   virtual unsigned int execute (function *) { return scev_const_prop (); }
+  opt_pass * clone () { return new pass_scev_cprop (m_ctxt); }
 
 }; // class pass_scev_cprop
 
diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index f6968b8..bcb3340 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,6 +1,9 @@
 2015-04-21  Tom de Vries  <tom@codesourcery.com>
 	    Thomas Schwinge  <thomas@codesourcery.com>
 
+	* testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: New test.
+	* testsuite/libgomp.oacc-fortran/kernels-loop.f95: New test.
+
 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: New test.
 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop.c: New test.
 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c: New test.
diff --git libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95
new file mode 100644
index 0000000..1fb40ee
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95
@@ -0,0 +1,32 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+  implicit none
+  integer, parameter         :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer                    :: i, ii
+
+  !$acc kernels copyout (a(0:n-1))
+  do i = 0, n - 1
+     a(i) = i * 2
+  end do
+  !$acc end kernels
+
+  !$acc kernels copyout (b(0:n-1))
+  do i = 0, n -1
+     b(i) = i * 4
+  end do
+  !$acc end kernels
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do ii = 0, n - 1
+     c(ii) = a(ii) + b(ii)
+  end do
+  !$acc end kernels
+
+  do i = 0, n - 1
+     if (c(i) .ne. a(i) + b(i)) call abort
+  end do
+
+end program main
diff --git libgomp/testsuite/libgomp.oacc-fortran/kernels-loop.f95 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop.f95
new file mode 100644
index 0000000..b02dd57
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/kernels-loop.f95
@@ -0,0 +1,28 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+  implicit none
+  integer, parameter         :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer                    :: i, ii
+
+  do i = 0, n - 1
+     a(i) = i * 2
+  end do
+
+  do i = 0, n -1
+     b(i) = i * 4
+  end do
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do ii = 0, n - 1
+     c(ii) = a(ii) + b(ii)
+  end do
+  !$acc end kernels
+
+  do i = 0, n - 1
+     if (c(i) .ne. a(i) + b(i)) call abort
+  end do
+
+end program main


GrÃÃe,
 Thomas

Attachment: signature.asc
Description: PGP signature


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]