[PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition

Thomas Schwinge thomas@codesourcery.com
Fri Jun 10 10:36:00 GMT 2016


Hi!

On Thu, 2 Jun 2016 18:25:17 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Jun 02, 2016 at 06:20:57PM +0200, Thomas Schwinge wrote:
> >     [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition

> LGTM.

Committed to trunk in r237291, gcc-6-branch in r237296, and
gomp-4_0-branch in r237300:

commit e2c7e1b8ad89532911b25af34875a75f24823e1a
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri Jun 10 09:22:51 2016 +0000

    [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
    
    	gcc/
    	* gimplify.c (gimplify_adjust_omp_clauses): Discard
    	OMP_CLAUSE_TILE.
    	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
    	for OpenACC tile clauses.
    	* gfortran.dg/goacc/combined-directives.f90: Likewise.
    
    	gcc/
    	PR middle-end/71373
    	* tree-nested.c (convert_nonlocal_omp_clauses)
    	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
    	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
    	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	PR middle-end/71373
    	* gcc.dg/goacc/nested-function-1.c: New file.
    	* gcc.dg/goacc/nested-function-2.c: Likewise.
    	* gcc.dg/goacc/pr71373.c: Likewise.
    	* gfortran.dg/goacc/cray-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
    	* gfortran.dg/goacc/cray.f95: Update.
    	* gfortran.dg/goacc/loop-1.f95: Likewise.
    	* gfortran.dg/goacc/loop-3.f95: Likewise.
    	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
    	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
    	libgomp/testsuite/
    	PR middle-end/71373
    	* libgomp.oacc-c/nested-function-1.c: New file.
    	* libgomp.oacc-c/nested-function-2.c: Likewise.
    	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237291 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                                      |  10 +
 gcc/gimplify.c                                     |   6 +
 gcc/omp-low.c                                      |   4 +-
 gcc/testsuite/ChangeLog                            |  20 ++
 .../c-c++-common/goacc/combined-directives.c       |   3 +-
 gcc/testsuite/gcc.dg/goacc/nested-function-1.c     | 100 +++++++++
 gcc/testsuite/gcc.dg/goacc/nested-function-2.c     |  45 ++++
 gcc/testsuite/gcc.dg/goacc/pr71373.c               |  41 ++++
 .../gfortran.dg/goacc/combined-directives.f90      |   3 +-
 .../gfortran.dg/goacc/{cray.f95 => cray-2.f95}     |  11 +-
 gcc/testsuite/gfortran.dg/goacc/cray.f95           |   6 +-
 .../gfortran.dg/goacc/{loop-1.f95 => loop-1-2.f95} |  36 +--
 gcc/testsuite/gfortran.dg/goacc/loop-1.f95         |  30 +--
 .../gfortran.dg/goacc/{loop-3.f95 => loop-3-2.f95} |  15 +-
 gcc/testsuite/gfortran.dg/goacc/loop-3.f95         |  11 +-
 .../gfortran.dg/goacc/nested-function-1.f90        |  93 ++++++++
 gcc/testsuite/gfortran.dg/goacc/subroutines.f90    |  73 ------
 gcc/tree-nested.c                                  |  30 +++
 libgomp/ChangeLog                                  |  10 +
 .../testsuite/libgomp.oacc-c/nested-function-1.c   |  52 +++++
 .../testsuite/libgomp.oacc-c/nested-function-2.c   | 155 +++++++++++++
 .../libgomp.oacc-fortran/nested-function-1.f90     |  70 ++++++
 .../libgomp.oacc-fortran/nested-function-2.f90     | 173 +++++++++++++++
 .../libgomp.oacc-fortran/nested-function-3.f90     | 244 +++++++++++++++++++++
 24 files changed, 1113 insertions(+), 128 deletions(-)

diff --git gcc/ChangeLog gcc/ChangeLog
index 6afbae7..9cab311 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,5 +1,15 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	* tree-nested.c (convert_nonlocal_omp_clauses)
+	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
+	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
+	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
+
+	* gimplify.c (gimplify_adjust_omp_clauses): Discard
+	OMP_CLAUSE_TILE.
+	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
+
 	* omp-low.c (scan_sharing_clauses): Don't expect
 	OMP_CLAUSE__CACHE_.
 
diff --git gcc/gimplify.c gcc/gimplify.c
index f12c6a1..7c19cf3 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -8280,7 +8280,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	case OMP_CLAUSE_VECTOR:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	  break;
+
 	case OMP_CLAUSE_TILE:
+	  /* We're not yet making use of the information provided by OpenACC
+	     tile clauses.  Discard these here, to simplify later middle end
+	     processing.  */
+	  remove = true;
 	  break;
 
 	default:
diff --git gcc/omp-low.c gcc/omp-low.c
index 91d5fcf..22e5909 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -2187,7 +2187,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2200,6 +2199,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	    install_var_local (decl, ctx);
 	  break;
 
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
@@ -2357,13 +2357,13 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
 	case OMP_CLAUSE__GRIDDIM_:
 	  break;
 
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog
index e15b009..325de75 100644
--- gcc/testsuite/ChangeLog
+++ gcc/testsuite/ChangeLog
@@ -1,4 +1,24 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+	    Cesar Philippidis  <cesar@codesourcery.com>
+
+	PR middle-end/71373
+	* gcc.dg/goacc/nested-function-1.c: New file.
+	* gcc.dg/goacc/nested-function-2.c: Likewise.
+	* gcc.dg/goacc/pr71373.c: Likewise.
+	* gfortran.dg/goacc/cray-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
+	* gfortran.dg/goacc/cray.f95: Update.
+	* gfortran.dg/goacc/loop-1.f95: Likewise.
+	* gfortran.dg/goacc/loop-3.f95: Likewise.
+	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
+	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
+
+2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
+	for OpenACC tile clauses.
+	* gfortran.dg/goacc/combined-directives.f90: Likewise.
 
 	PR c/71381
 	* c-c++-common/goacc/cache-1.c: Update.  Move invalid usage tests
diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c
index c2a3c57..3fa800d 100644
--- gcc/testsuite/c-c++-common/goacc/combined-directives.c
+++ gcc/testsuite/c-c++-common/goacc/combined-directives.c
@@ -111,6 +111,7 @@ test ()
 // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-1.c gcc/testsuite/gcc.dg/goacc/nested-function-1.c
new file mode 100644
index 0000000..e17c0e2
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-1.c
@@ -0,0 +1,100 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran
+   version.  */
+
+int main ()
+{
+#define N 100
+  int nonlocal_arg;
+  int nonlocal_a[N];
+  int nonlocal_i;
+  int nonlocal_j;
+
+  for (int i = 0; i < N; ++i)
+    nonlocal_a[i] = 5;
+  nonlocal_arg = 5;
+
+  void local ()
+  {
+    int local_i;
+    int local_arg;
+    int local_a[N];
+    int local_j;
+
+    for (int i = 0; i < N; ++i)
+      local_a[i] = 5;
+    local_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:local_arg) worker(local_arg) vector(local_arg) \
+  wait async(local_arg)
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:5])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:local_arg) worker(local_arg) vector(local_arg) \
+  wait(local_arg, local_arg + 1, local_arg + 2) async
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:4])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+  }
+
+  void nonlocal ()
+  {
+    for (int i = 0; i < N; ++i)
+      nonlocal_a[i] = 5;
+    nonlocal_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait async(nonlocal_arg)
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:3])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(2)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(3)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:2])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+  }
+
+  local ();
+  nonlocal ();
+
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-2.c gcc/testsuite/gcc.dg/goacc/nested-function-2.c
new file mode 100644
index 0000000..70c9ec8
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c
@@ -0,0 +1,45 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int j = 0, k = 6, l = 7, m = 8;
+  void simple (void)
+  {
+    int i;
+#pragma acc parallel
+    {
+#pragma acc loop
+      for (i = 0; i < m; i+= k)
+	j = (m + i - j) * l;
+    }
+  }
+  void collapse (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  void reduction (void)
+  {
+    int x, y, z;
+#pragma acc parallel reduction (+:j)
+    {
+#pragma acc loop reduction (+:j) collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  simple();
+  collapse();
+  reduction();
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/pr71373.c gcc/testsuite/gcc.dg/goacc/pr71373.c
new file mode 100644
index 0000000..9381752
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/pr71373.c
@@ -0,0 +1,41 @@
+/* Unintentional nested function usage.  */
+/* Due to missing right braces '}', the following functions are parsed as
+   nested functions.  This ran into an ICE.  */
+
+void foo (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  // Note right brace '}' commented out here.
+  //}
+}
+void bar (void)
+{
+}
+
+// Adding right brace '}' here, to make this compile.
+}
+
+
+// ..., and the other way round:
+
+void BAR (void)
+{
+// Note right brace '}' commented out here.
+//}
+
+void FOO (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  }
+}
+
+// Adding right brace '}' here, to make this compile.
+}
diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
index 42a447a..abb5e6b 100644
--- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
+++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
@@ -143,7 +143,8 @@ end subroutine test
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
+! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray-2.f95
similarity index 91%
copy from gcc/testsuite/gfortran.dg/goacc/cray.f95
copy to gcc/testsuite/gfortran.dg/goacc/cray-2.f95
index 705c18c..51b79b5 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray-2.f95
@@ -1,15 +1,16 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray.f95.
 
-module test
+program test
+  call oacc1
 contains
   subroutine oacc1
     implicit none
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
@@ -52,4 +53,4 @@ contains
     !$acc update host (ptr)
     !$acc update self (ptr)
   end subroutine oacc1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray.f95
index 705c18c..d6d5317 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray.f95
@@ -1,5 +1,5 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray-2.f95.
 
 module test
 contains
@@ -8,8 +8,8 @@ contains
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
similarity index 89%
copy from gcc/testsuite/gfortran.dg/goacc/loop-1.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
index a605f03..79665b9 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
@@ -1,8 +1,10 @@
-module test
-  implicit none
+! See also loop-1.f95.
+
+program test
+  call test1
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
@@ -171,4 +173,4 @@ subroutine test1
   enddo
 
 end subroutine test1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1.f95
index a605f03..5f81b7a 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95
@@ -1,8 +1,10 @@
+! See also loop-1-2.f95.
+
 module test
   implicit none
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
similarity index 90%
copy from gcc/testsuite/gfortran.dg/goacc/loop-3.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
index 2a866c7..9be74a8 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
@@ -1,10 +1,13 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3.f95.
 
+program test
+  call test1
+contains
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +26,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +38,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +53,6 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3.f95
index 2a866c7..30930f4 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3.f95
@@ -1,10 +1,10 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3-2.f95.
 
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +23,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +35,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +50,5 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
diff --git gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
new file mode 100644
index 0000000..2fcaa40
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
@@ -0,0 +1,93 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version.
+
+program main
+  integer, parameter :: N = 100
+  integer :: nonlocal_arg
+  integer :: nonlocal_a(N)
+  integer :: nonlocal_i
+  integer :: nonlocal_j
+
+  nonlocal_a (:) = 5
+  nonlocal_arg = 5
+
+  call local ()
+  call nonlocal ()
+
+contains
+
+  subroutine local ()
+    integer :: local_i
+    integer :: local_arg
+    integer :: local_a(N)
+    integer :: local_j
+
+    local_a (:) = 5
+    local_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait async(local_arg)
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 5))
+       local_a(local_i) = 100
+       !$acc loop seq tile(*)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(1)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 4))
+       local_a(local_i) = 100
+       !$acc loop seq tile(1)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine local
+
+  subroutine nonlocal ()
+    nonlocal_a (:) = 5
+    nonlocal_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait async(nonlocal_arg)
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(2)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(3)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(*)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine nonlocal
+end program main
diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90
deleted file mode 100644
index 6cab798..0000000
--- gcc/testsuite/gfortran.dg/goacc/subroutines.f90
+++ /dev/null
@@ -1,73 +0,0 @@
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile } 
-
-program main
-  integer, parameter :: N = 100
-  integer :: nonlocal_arg
-  integer :: nonlocal_a(N)
-  integer :: nonlocal_i
-  integer :: nonlocal_j
-  
-  nonlocal_a (:) = 5
-  nonlocal_arg = 5
-  
-  call local ()
-  call nonlocal ()
-
-contains
-
-  subroutine local ()
-    integer :: local_i
-    integer :: local_arg
-    integer :: local_a(N)
-    integer :: local_j
-    
-    local_a (:) = 5
-    local_arg = 5
-
-    !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:local_arg) worker(local_arg) &
-    !$acc vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine local
-
-  subroutine nonlocal ()
-    nonlocal_a (:) = 5
-    nonlocal_arg = 5
-  
-    !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine nonlocal
-end program main
diff --git gcc/tree-nested.c gcc/tree-nested.c
index 25a92aa..6fc6326 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1197,8 +1199,21 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
 	default:
 	  gcc_unreachable ();
 	}
@@ -1790,6 +1805,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1878,8 +1895,21 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
 	default:
 	  gcc_unreachable ();
 	}
diff --git libgomp/ChangeLog libgomp/ChangeLog
index 5c7f41a..cf551f4 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,4 +1,14 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+	    Cesar Philippidis  <cesar@codesourcery.com>
+
+	PR middle-end/71373
+	* libgomp.oacc-c/nested-function-1.c: New file.
+	* libgomp.oacc-c/nested-function-2.c: Likewise.
+	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
+
+2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
 	PR c/71381
 	* testsuite/libgomp.oacc-c-c++-common/cache-1.c: #include
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-1.c libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
new file mode 100644
index 0000000..fb2a3ac
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
@@ -0,0 +1,52 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][7][8];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(4 - 1)
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  a[i][j][k] = i + j + k;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  if (a[i][j][k] != i + j + k)
+	    __builtin_abort();
+  }
+
+  void test2 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-2.c libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
new file mode 100644
index 0000000..2c3f3fe
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
@@ -0,0 +1,155 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1;
+
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, '\0', sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort();
+  }
+
+  void test2 (int v1, int v2, int v3, int v4, int v5, int v6)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8,
+      int v9)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test4 ()
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+    int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8,
+      v9 = p9;
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 (p1, p2, p3, p4, p5, p6);
+  test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9);
+  test4 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
new file mode 100644
index 0000000..fdbca44
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
@@ -0,0 +1,70 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse2
+  call test1
+  call test2
+contains
+  subroutine test1
+    integer :: i, j, k, a(1:3, 4:6, 5:7)
+    logical :: l
+    l = .false.
+    a(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop worker vector collapse(4 - 1)
+      do 164 i = 1, 3
+        do 164 j = 4, 6
+          do 164 k = 5, 7
+            a(i, j, k) = i + j + k
+164      end do
+    !$acc loop worker vector reduction(.or.:l) collapse(2)
+firstdo: do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+            if (a(i, j, k) .ne. (i + j + k)) l = .true.
+          end do
+        end do
+      end do firstdo
+    !$acc end parallel
+    if (l) call abort
+  end subroutine test1
+
+  subroutine test2
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    a = 0
+    !$acc parallel
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+      do 115 k=1,3
+         !$acc loop collapse(2)
+  dokk: do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc loop gang(static:1) collapse(1)
+      do k=1,3
+         if (any(a(k,1:3,1:3).ne.1)) call abort
+      enddo
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+ dol: do 120 l=1,3
+    !$acc loop collapse(2)
+  doll: do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc loop gang(static:1) collapse(1)
+     do l=1,3
+        if (any(a(l,1:3,1:3).ne.2)) call abort
+     enddo
+    !$acc end parallel
+  end subroutine test2
+
+end program collapse2
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
new file mode 100644
index 0000000..4e28196
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
@@ -0,0 +1,173 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse3
+  integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9
+  p1 = 2
+  p2 = 6
+  p3 = -2
+  p4 = 4
+  p5 = 13
+  p6 = 18
+  p7 = 1
+  p8 = 1
+  p9 = 1
+  call test1
+  call test2 (p1, p2, p3, p4, p5, p6)
+  call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9)
+  call test4
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+    end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = p1
+    v2 = p2
+    v3 = p3
+    v4 = p4
+    v5 = p5
+    v6 = p6
+    v7 = p7
+    v8 = p8
+    v9 = p9
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+end program collapse3
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
new file mode 100644
index 0000000..2f6485e
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
@@ -0,0 +1,244 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program sub_collapse_3
+  call test1
+  call test2 (2, 6, -2, 4, 13, 18)
+  call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+  call test4
+  call test5 (2, 6, -2, 4, 13, 18)
+  call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+  end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = 2
+    v2 = 6
+    v3 = -2
+    v4 = 4
+    v5 = 13
+    v6 = 18
+    v7 = 1
+    v8 = 1
+    v9 = 1
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+  subroutine test5(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test5
+
+  subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+            m = i * 100 + j * 10 + k
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test6
+
+end program sub_collapse_3

commit 507c9999e155b704d9a7244b1c9d467a95143b77
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri Jun 10 09:46:18 2016 +0000

    [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
    
    Backport trunk r237291:
    
    	gcc/
    	* gimplify.c (gimplify_adjust_omp_clauses): Discard
    	OMP_CLAUSE_TILE.
    	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
    	for OpenACC tile clauses.
    	* gfortran.dg/goacc/combined-directives.f90: Likewise.
    
    	gcc/
    	PR middle-end/71373
    	* tree-nested.c (convert_nonlocal_omp_clauses)
    	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
    	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
    	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	PR middle-end/71373
    	* gcc.dg/goacc/nested-function-1.c: New file.
    	* gcc.dg/goacc/nested-function-2.c: Likewise.
    	* gcc.dg/goacc/pr71373.c: Likewise.
    	* gfortran.dg/goacc/cray-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
    	* gfortran.dg/goacc/cray.f95: Update.
    	* gfortran.dg/goacc/loop-1.f95: Likewise.
    	* gfortran.dg/goacc/loop-3.f95: Likewise.
    	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
    	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
    	libgomp/testsuite/
    	PR middle-end/71373
    	* libgomp.oacc-c/nested-function-1.c: New file.
    	* libgomp.oacc-c/nested-function-2.c: Likewise.
    	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-6-branch@237296 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                                      |  12 +
 gcc/gimplify.c                                     |   6 +
 gcc/omp-low.c                                      |   4 +-
 gcc/testsuite/ChangeLog                            |  22 ++
 .../c-c++-common/goacc/combined-directives.c       |   3 +-
 gcc/testsuite/gcc.dg/goacc/nested-function-1.c     | 100 +++++++++
 gcc/testsuite/gcc.dg/goacc/nested-function-2.c     |  45 ++++
 gcc/testsuite/gcc.dg/goacc/pr71373.c               |  41 ++++
 .../gfortran.dg/goacc/combined-directives.f90      |   3 +-
 .../gfortran.dg/goacc/{cray.f95 => cray-2.f95}     |  11 +-
 gcc/testsuite/gfortran.dg/goacc/cray.f95           |   6 +-
 .../gfortran.dg/goacc/{loop-1.f95 => loop-1-2.f95} |  36 +--
 gcc/testsuite/gfortran.dg/goacc/loop-1.f95         |  30 +--
 .../gfortran.dg/goacc/{loop-3.f95 => loop-3-2.f95} |  15 +-
 gcc/testsuite/gfortran.dg/goacc/loop-3.f95         |  11 +-
 .../gfortran.dg/goacc/nested-function-1.f90        |  93 ++++++++
 gcc/testsuite/gfortran.dg/goacc/subroutines.f90    |  73 ------
 gcc/tree-nested.c                                  |  32 +++
 libgomp/ChangeLog                                  |  11 +
 .../testsuite/libgomp.oacc-c/nested-function-1.c   |  52 +++++
 .../testsuite/libgomp.oacc-c/nested-function-2.c   | 155 +++++++++++++
 .../libgomp.oacc-fortran/nested-function-1.f90     |  70 ++++++
 .../libgomp.oacc-fortran/nested-function-2.f90     | 173 +++++++++++++++
 .../libgomp.oacc-fortran/nested-function-3.f90     | 244 +++++++++++++++++++++
 24 files changed, 1120 insertions(+), 128 deletions(-)

diff --git gcc/ChangeLog gcc/ChangeLog
index cbdcc42..bb9099d 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,5 +1,17 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	* tree-nested.c (convert_nonlocal_omp_clauses)
+	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
+	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
+	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
+
+	Backport from trunk r237291:
+	* gimplify.c (gimplify_adjust_omp_clauses): Discard
+	OMP_CLAUSE_TILE.
+	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
+
 	Backport from trunk r237290:
 	* omp-low.c (scan_sharing_clauses): Don't expect
 	OMP_CLAUSE__CACHE_.
diff --git gcc/gimplify.c gcc/gimplify.c
index 259c88b..846a75a 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -8224,7 +8224,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	case OMP_CLAUSE_VECTOR:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	  break;
+
 	case OMP_CLAUSE_TILE:
+	  /* We're not yet making use of the information provided by OpenACC
+	     tile clauses.  Discard these here, to simplify later middle end
+	     processing.  */
+	  remove = true;
 	  break;
 
 	default:
diff --git gcc/omp-low.c gcc/omp-low.c
index e570c22..5f1e6aa 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -2186,7 +2186,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2200,6 +2199,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	  break;
 
 	case OMP_CLAUSE_DEVICE_RESIDENT:
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
@@ -2357,7 +2357,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2365,6 +2364,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	  break;
 
 	case OMP_CLAUSE_DEVICE_RESIDENT:
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog
index 5e3f7e3..aa7c3a8 100644
--- gcc/testsuite/ChangeLog
+++ gcc/testsuite/ChangeLog
@@ -1,5 +1,27 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+		    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* gcc.dg/goacc/nested-function-1.c: New file.
+	* gcc.dg/goacc/nested-function-2.c: Likewise.
+	* gcc.dg/goacc/pr71373.c: Likewise.
+	* gfortran.dg/goacc/cray-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
+	* gfortran.dg/goacc/cray.f95: Update.
+	* gfortran.dg/goacc/loop-1.f95: Likewise.
+	* gfortran.dg/goacc/loop-3.f95: Likewise.
+	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
+	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
+
+	Backport from trunk r237291:
+	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
+	for OpenACC tile clauses.
+	* gfortran.dg/goacc/combined-directives.f90: Likewise.
+
 	PR c/71381
 	Backport from trunk r237290:
 	* c-c++-common/goacc/cache-1.c: Update.  Move invalid usage tests
diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c
index c2a3c57..3fa800d 100644
--- gcc/testsuite/c-c++-common/goacc/combined-directives.c
+++ gcc/testsuite/c-c++-common/goacc/combined-directives.c
@@ -111,6 +111,7 @@ test ()
 // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-1.c gcc/testsuite/gcc.dg/goacc/nested-function-1.c
new file mode 100644
index 0000000..e17c0e2
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-1.c
@@ -0,0 +1,100 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran
+   version.  */
+
+int main ()
+{
+#define N 100
+  int nonlocal_arg;
+  int nonlocal_a[N];
+  int nonlocal_i;
+  int nonlocal_j;
+
+  for (int i = 0; i < N; ++i)
+    nonlocal_a[i] = 5;
+  nonlocal_arg = 5;
+
+  void local ()
+  {
+    int local_i;
+    int local_arg;
+    int local_a[N];
+    int local_j;
+
+    for (int i = 0; i < N; ++i)
+      local_a[i] = 5;
+    local_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:local_arg) worker(local_arg) vector(local_arg) \
+  wait async(local_arg)
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:5])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:local_arg) worker(local_arg) vector(local_arg) \
+  wait(local_arg, local_arg + 1, local_arg + 2) async
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:4])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+  }
+
+  void nonlocal ()
+  {
+    for (int i = 0; i < N; ++i)
+      nonlocal_a[i] = 5;
+    nonlocal_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait async(nonlocal_arg)
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:3])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(2)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(3)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:2])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+  }
+
+  local ();
+  nonlocal ();
+
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-2.c gcc/testsuite/gcc.dg/goacc/nested-function-2.c
new file mode 100644
index 0000000..70c9ec8
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c
@@ -0,0 +1,45 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int j = 0, k = 6, l = 7, m = 8;
+  void simple (void)
+  {
+    int i;
+#pragma acc parallel
+    {
+#pragma acc loop
+      for (i = 0; i < m; i+= k)
+	j = (m + i - j) * l;
+    }
+  }
+  void collapse (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  void reduction (void)
+  {
+    int x, y, z;
+#pragma acc parallel reduction (+:j)
+    {
+#pragma acc loop reduction (+:j) collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  simple();
+  collapse();
+  reduction();
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/pr71373.c gcc/testsuite/gcc.dg/goacc/pr71373.c
new file mode 100644
index 0000000..9381752
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/pr71373.c
@@ -0,0 +1,41 @@
+/* Unintentional nested function usage.  */
+/* Due to missing right braces '}', the following functions are parsed as
+   nested functions.  This ran into an ICE.  */
+
+void foo (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  // Note right brace '}' commented out here.
+  //}
+}
+void bar (void)
+{
+}
+
+// Adding right brace '}' here, to make this compile.
+}
+
+
+// ..., and the other way round:
+
+void BAR (void)
+{
+// Note right brace '}' commented out here.
+//}
+
+void FOO (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  }
+}
+
+// Adding right brace '}' here, to make this compile.
+}
diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
index 42a447a..abb5e6b 100644
--- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
+++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
@@ -143,7 +143,8 @@ end subroutine test
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
+! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray-2.f95
similarity index 91%
copy from gcc/testsuite/gfortran.dg/goacc/cray.f95
copy to gcc/testsuite/gfortran.dg/goacc/cray-2.f95
index 705c18c..51b79b5 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray-2.f95
@@ -1,15 +1,16 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray.f95.
 
-module test
+program test
+  call oacc1
 contains
   subroutine oacc1
     implicit none
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
@@ -52,4 +53,4 @@ contains
     !$acc update host (ptr)
     !$acc update self (ptr)
   end subroutine oacc1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray.f95
index 705c18c..d6d5317 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray.f95
@@ -1,5 +1,5 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray-2.f95.
 
 module test
 contains
@@ -8,8 +8,8 @@ contains
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
similarity index 89%
copy from gcc/testsuite/gfortran.dg/goacc/loop-1.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
index a605f03..79665b9 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
@@ -1,8 +1,10 @@
-module test
-  implicit none
+! See also loop-1.f95.
+
+program test
+  call test1
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
@@ -171,4 +173,4 @@ subroutine test1
   enddo
 
 end subroutine test1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1.f95
index a605f03..5f81b7a 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95
@@ -1,8 +1,10 @@
+! See also loop-1-2.f95.
+
 module test
   implicit none
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
similarity index 90%
copy from gcc/testsuite/gfortran.dg/goacc/loop-3.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
index 2a866c7..9be74a8 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
@@ -1,10 +1,13 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3.f95.
 
+program test
+  call test1
+contains
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +26,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +38,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +53,6 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3.f95
index 2a866c7..30930f4 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3.f95
@@ -1,10 +1,10 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3-2.f95.
 
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +23,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +35,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +50,5 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
diff --git gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
new file mode 100644
index 0000000..2fcaa40
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
@@ -0,0 +1,93 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version.
+
+program main
+  integer, parameter :: N = 100
+  integer :: nonlocal_arg
+  integer :: nonlocal_a(N)
+  integer :: nonlocal_i
+  integer :: nonlocal_j
+
+  nonlocal_a (:) = 5
+  nonlocal_arg = 5
+
+  call local ()
+  call nonlocal ()
+
+contains
+
+  subroutine local ()
+    integer :: local_i
+    integer :: local_arg
+    integer :: local_a(N)
+    integer :: local_j
+
+    local_a (:) = 5
+    local_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait async(local_arg)
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 5))
+       local_a(local_i) = 100
+       !$acc loop seq tile(*)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(1)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 4))
+       local_a(local_i) = 100
+       !$acc loop seq tile(1)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine local
+
+  subroutine nonlocal ()
+    nonlocal_a (:) = 5
+    nonlocal_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait async(nonlocal_arg)
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(2)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(3)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(*)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine nonlocal
+end program main
diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90
deleted file mode 100644
index 6cab798..0000000
--- gcc/testsuite/gfortran.dg/goacc/subroutines.f90
+++ /dev/null
@@ -1,73 +0,0 @@
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile } 
-
-program main
-  integer, parameter :: N = 100
-  integer :: nonlocal_arg
-  integer :: nonlocal_a(N)
-  integer :: nonlocal_i
-  integer :: nonlocal_j
-  
-  nonlocal_a (:) = 5
-  nonlocal_arg = 5
-  
-  call local ()
-  call nonlocal ()
-
-contains
-
-  subroutine local ()
-    integer :: local_i
-    integer :: local_arg
-    integer :: local_a(N)
-    integer :: local_j
-    
-    local_a (:) = 5
-    local_arg = 5
-
-    !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:local_arg) worker(local_arg) &
-    !$acc vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine local
-
-  subroutine nonlocal ()
-    nonlocal_a (:) = 5
-    nonlocal_arg = 5
-  
-    !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine nonlocal
-end program main
diff --git gcc/tree-nested.c gcc/tree-nested.c
index 8563687..327f663 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1197,8 +1199,22 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_DEVICE_RESIDENT:
 	default:
 	  gcc_unreachable ();
 	}
@@ -1790,6 +1806,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1878,8 +1896,22 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_DEVICE_RESIDENT:
 	default:
 	  gcc_unreachable ();
 	}
diff --git libgomp/ChangeLog libgomp/ChangeLog
index f6da5c4..620ddb4 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,5 +1,16 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+		    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* libgomp.oacc-c/nested-function-1.c: New file.
+	* libgomp.oacc-c/nested-function-2.c: Likewise.
+	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
+
 	PR c/71381
 	Backport from trunk r237290:
 	* testsuite/libgomp.oacc-c-c++-common/cache-1.c: #include
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-1.c libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
new file mode 100644
index 0000000..fb2a3ac
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
@@ -0,0 +1,52 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][7][8];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(4 - 1)
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  a[i][j][k] = i + j + k;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  if (a[i][j][k] != i + j + k)
+	    __builtin_abort();
+  }
+
+  void test2 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-2.c libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
new file mode 100644
index 0000000..2c3f3fe
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
@@ -0,0 +1,155 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1;
+
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, '\0', sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort();
+  }
+
+  void test2 (int v1, int v2, int v3, int v4, int v5, int v6)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8,
+      int v9)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test4 ()
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+    int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8,
+      v9 = p9;
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 (p1, p2, p3, p4, p5, p6);
+  test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9);
+  test4 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
new file mode 100644
index 0000000..fdbca44
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
@@ -0,0 +1,70 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse2
+  call test1
+  call test2
+contains
+  subroutine test1
+    integer :: i, j, k, a(1:3, 4:6, 5:7)
+    logical :: l
+    l = .false.
+    a(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop worker vector collapse(4 - 1)
+      do 164 i = 1, 3
+        do 164 j = 4, 6
+          do 164 k = 5, 7
+            a(i, j, k) = i + j + k
+164      end do
+    !$acc loop worker vector reduction(.or.:l) collapse(2)
+firstdo: do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+            if (a(i, j, k) .ne. (i + j + k)) l = .true.
+          end do
+        end do
+      end do firstdo
+    !$acc end parallel
+    if (l) call abort
+  end subroutine test1
+
+  subroutine test2
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    a = 0
+    !$acc parallel
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+      do 115 k=1,3
+         !$acc loop collapse(2)
+  dokk: do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc loop gang(static:1) collapse(1)
+      do k=1,3
+         if (any(a(k,1:3,1:3).ne.1)) call abort
+      enddo
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+ dol: do 120 l=1,3
+    !$acc loop collapse(2)
+  doll: do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc loop gang(static:1) collapse(1)
+     do l=1,3
+        if (any(a(l,1:3,1:3).ne.2)) call abort
+     enddo
+    !$acc end parallel
+  end subroutine test2
+
+end program collapse2
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
new file mode 100644
index 0000000..4e28196
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
@@ -0,0 +1,173 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse3
+  integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9
+  p1 = 2
+  p2 = 6
+  p3 = -2
+  p4 = 4
+  p5 = 13
+  p6 = 18
+  p7 = 1
+  p8 = 1
+  p9 = 1
+  call test1
+  call test2 (p1, p2, p3, p4, p5, p6)
+  call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9)
+  call test4
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+    end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = p1
+    v2 = p2
+    v3 = p3
+    v4 = p4
+    v5 = p5
+    v6 = p6
+    v7 = p7
+    v8 = p8
+    v9 = p9
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+end program collapse3
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
new file mode 100644
index 0000000..2f6485e
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
@@ -0,0 +1,244 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program sub_collapse_3
+  call test1
+  call test2 (2, 6, -2, 4, 13, 18)
+  call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+  call test4
+  call test5 (2, 6, -2, 4, 13, 18)
+  call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+  end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = 2
+    v2 = 6
+    v3 = -2
+    v4 = 4
+    v5 = 13
+    v6 = 18
+    v7 = 1
+    v8 = 1
+    v9 = 1
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+  subroutine test5(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test5
+
+  subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+            m = i * 100 + j * 10 + k
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test6
+
+end program sub_collapse_3

commit ce7fd1fc5d97d8c73c72a45902da983d5793c374
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri Jun 10 10:12:36 2016 +0000

    [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
    
    Backport trunk r237291:
    
    	gcc/
    	* gimplify.c (gimplify_adjust_omp_clauses): Discard
    	OMP_CLAUSE_TILE.
    	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
    	for OpenACC tile clauses.
    	* gfortran.dg/goacc/combined-directives.f90: Likewise.
    
    	gcc/
    	PR middle-end/71373
    	* tree-nested.c (convert_nonlocal_omp_clauses)
    	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
    	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
    	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	PR middle-end/71373
    	* gcc.dg/goacc/nested-function-1.c: New file.
    	* gcc.dg/goacc/nested-function-2.c: Likewise.
    	* gcc.dg/goacc/pr71373.c: Likewise.
    	* gfortran.dg/goacc/cray-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
    	* gfortran.dg/goacc/cray.f95: Update.
    	* gfortran.dg/goacc/loop-1.f95: Likewise.
    	* gfortran.dg/goacc/loop-3.f95: Likewise.
    	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
    	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
    	libgomp/testsuite/
    	PR middle-end/71373
    	* libgomp.oacc-c/nested-function-1.c: New file.
    	* libgomp.oacc-c/nested-function-2.c: Likewise.
    	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@237300 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                                 |  12 +
 gcc/gimplify.c                                     |   8 +-
 gcc/omp-low.c                                      |   4 +-
 gcc/testsuite/ChangeLog.gomp                       |  22 ++
 .../c-c++-common/goacc/combined-directives.c       |   3 +-
 gcc/testsuite/gcc.dg/goacc/nested-function-1.c     | 100 +++++++++
 gcc/testsuite/gcc.dg/goacc/nested-function-2.c     |  45 ++++
 gcc/testsuite/gcc.dg/goacc/pr71373.c               |  41 ++++
 .../gfortran.dg/goacc/combined-directives.f90      |   1 +
 .../gfortran.dg/goacc/{cray.f95 => cray-2.f95}     |  11 +-
 gcc/testsuite/gfortran.dg/goacc/cray.f95           |   6 +-
 .../gfortran.dg/goacc/{loop-1.f95 => loop-1-2.f95} |  36 +--
 gcc/testsuite/gfortran.dg/goacc/loop-1.f95         |  30 +--
 .../gfortran.dg/goacc/{loop-3.f95 => loop-3-2.f95} |  15 +-
 gcc/testsuite/gfortran.dg/goacc/loop-3.f95         |  11 +-
 .../gfortran.dg/goacc/nested-function-1.f90        |  93 ++++++++
 gcc/testsuite/gfortran.dg/goacc/subroutines.f90    |  73 ------
 gcc/tree-nested.c                                  |  40 ++++
 libgomp/ChangeLog.gomp                             |  11 +
 .../testsuite/libgomp.oacc-c/nested-function-1.c   |  52 +++++
 .../testsuite/libgomp.oacc-c/nested-function-2.c   | 155 +++++++++++++
 .../libgomp.oacc-fortran/nested-function-1.f90     |  70 ++++++
 .../libgomp.oacc-fortran/nested-function-2.f90     | 173 +++++++++++++++
 .../libgomp.oacc-fortran/nested-function-3.f90     | 244 +++++++++++++++++++++
 24 files changed, 1128 insertions(+), 128 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index 4477abf..b68538d 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,5 +1,17 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	* tree-nested.c (convert_nonlocal_omp_clauses)
+	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
+	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
+	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
+
+	Backport from trunk r237291:
+	* gimplify.c (gimplify_adjust_omp_clauses): Discard
+	OMP_CLAUSE_TILE.
+	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
+
 	Backport from trunk r237290:
 	* omp-low.c (scan_sharing_clauses): Don't expect
 	OMP_CLAUSE__CACHE_.
diff --git gcc/gimplify.c gcc/gimplify.c
index 37971c7..717b25f 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -8265,10 +8265,16 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	case OMP_CLAUSE_VECTOR:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_DEVICE_TYPE:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* We're not yet making use of the information provided by OpenACC
+	     tile clauses.  Discard these here, to simplify later middle end
+	     processing.  */
+	  remove = true;
+	  break;
+
 	case OMP_CLAUSE_BIND:
 	case OMP_CLAUSE_NOHOST:
 	default:
diff --git gcc/omp-low.c gcc/omp-low.c
index 40ac8c8..fedb195 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -2218,7 +2218,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2235,6 +2234,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_BIND:
 	case OMP_CLAUSE_DEVICE_RESIDENT:
 	case OMP_CLAUSE_NOHOST:
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
@@ -2392,7 +2392,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2403,6 +2402,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_BIND:
 	case OMP_CLAUSE_DEVICE_RESIDENT:
 	case OMP_CLAUSE_NOHOST:
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index eef9425..031774d 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,5 +1,27 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+		    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* gcc.dg/goacc/nested-function-1.c: New file.
+	* gcc.dg/goacc/nested-function-2.c: Likewise.
+	* gcc.dg/goacc/pr71373.c: Likewise.
+	* gfortran.dg/goacc/cray-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
+	* gfortran.dg/goacc/cray.f95: Update.
+	* gfortran.dg/goacc/loop-1.f95: Likewise.
+	* gfortran.dg/goacc/loop-3.f95: Likewise.
+	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
+	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
+
+	Backport from trunk r237291:
+	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
+	for OpenACC tile clauses.
+	* gfortran.dg/goacc/combined-directives.f90: Likewise.
+
 	PR c/71381
 	Backport from trunk r237290:
 	* c-c++-common/goacc/cache-1.c: Update.  Move invalid usage tests
diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c
index 2ef5b53..2342c9f 100644
--- gcc/testsuite/c-c++-common/goacc/combined-directives.c
+++ gcc/testsuite/c-c++-common/goacc/combined-directives.c
@@ -114,6 +114,7 @@ test ()
 // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-1.c gcc/testsuite/gcc.dg/goacc/nested-function-1.c
new file mode 100644
index 0000000..e17c0e2
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-1.c
@@ -0,0 +1,100 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran
+   version.  */
+
+int main ()
+{
+#define N 100
+  int nonlocal_arg;
+  int nonlocal_a[N];
+  int nonlocal_i;
+  int nonlocal_j;
+
+  for (int i = 0; i < N; ++i)
+    nonlocal_a[i] = 5;
+  nonlocal_arg = 5;
+
+  void local ()
+  {
+    int local_i;
+    int local_arg;
+    int local_a[N];
+    int local_j;
+
+    for (int i = 0; i < N; ++i)
+      local_a[i] = 5;
+    local_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:local_arg) worker(local_arg) vector(local_arg) \
+  wait async(local_arg)
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:5])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:local_arg) worker(local_arg) vector(local_arg) \
+  wait(local_arg, local_arg + 1, local_arg + 2) async
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:4])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+  }
+
+  void nonlocal ()
+  {
+    for (int i = 0; i < N; ++i)
+      nonlocal_a[i] = 5;
+    nonlocal_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait async(nonlocal_arg)
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:3])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(2)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(3)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:2])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+  }
+
+  local ();
+  nonlocal ();
+
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-2.c gcc/testsuite/gcc.dg/goacc/nested-function-2.c
new file mode 100644
index 0000000..70c9ec8
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c
@@ -0,0 +1,45 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int j = 0, k = 6, l = 7, m = 8;
+  void simple (void)
+  {
+    int i;
+#pragma acc parallel
+    {
+#pragma acc loop
+      for (i = 0; i < m; i+= k)
+	j = (m + i - j) * l;
+    }
+  }
+  void collapse (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  void reduction (void)
+  {
+    int x, y, z;
+#pragma acc parallel reduction (+:j)
+    {
+#pragma acc loop reduction (+:j) collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  simple();
+  collapse();
+  reduction();
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/pr71373.c gcc/testsuite/gcc.dg/goacc/pr71373.c
new file mode 100644
index 0000000..9381752
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/pr71373.c
@@ -0,0 +1,41 @@
+/* Unintentional nested function usage.  */
+/* Due to missing right braces '}', the following functions are parsed as
+   nested functions.  This ran into an ICE.  */
+
+void foo (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  // Note right brace '}' commented out here.
+  //}
+}
+void bar (void)
+{
+}
+
+// Adding right brace '}' here, to make this compile.
+}
+
+
+// ..., and the other way round:
+
+void BAR (void)
+{
+// Note right brace '}' commented out here.
+//}
+
+void FOO (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  }
+}
+
+// Adding right brace '}' here, to make this compile.
+}
diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
index 00b8822..ffa9371 100644
--- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
+++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
@@ -162,6 +162,7 @@ end subroutine test
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" { xfail *-*-* } } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray-2.f95
similarity index 91%
copy from gcc/testsuite/gfortran.dg/goacc/cray.f95
copy to gcc/testsuite/gfortran.dg/goacc/cray-2.f95
index 705c18c..51b79b5 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray-2.f95
@@ -1,15 +1,16 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray.f95.
 
-module test
+program test
+  call oacc1
 contains
   subroutine oacc1
     implicit none
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
@@ -52,4 +53,4 @@ contains
     !$acc update host (ptr)
     !$acc update self (ptr)
   end subroutine oacc1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray.f95
index 705c18c..d6d5317 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray.f95
@@ -1,5 +1,5 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray-2.f95.
 
 module test
 contains
@@ -8,8 +8,8 @@ contains
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
similarity index 89%
copy from gcc/testsuite/gfortran.dg/goacc/loop-1.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
index a605f03..79665b9 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
@@ -1,8 +1,10 @@
-module test
-  implicit none
+! See also loop-1.f95.
+
+program test
+  call test1
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
@@ -171,4 +173,4 @@ subroutine test1
   enddo
 
 end subroutine test1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1.f95
index a605f03..5f81b7a 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95
@@ -1,8 +1,10 @@
+! See also loop-1-2.f95.
+
 module test
   implicit none
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
similarity index 90%
copy from gcc/testsuite/gfortran.dg/goacc/loop-3.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
index 2a866c7..9be74a8 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
@@ -1,10 +1,13 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3.f95.
 
+program test
+  call test1
+contains
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +26,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +38,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +53,6 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3.f95
index 2a866c7..30930f4 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3.f95
@@ -1,10 +1,10 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3-2.f95.
 
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +23,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +35,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +50,5 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
diff --git gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
new file mode 100644
index 0000000..2fcaa40
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
@@ -0,0 +1,93 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version.
+
+program main
+  integer, parameter :: N = 100
+  integer :: nonlocal_arg
+  integer :: nonlocal_a(N)
+  integer :: nonlocal_i
+  integer :: nonlocal_j
+
+  nonlocal_a (:) = 5
+  nonlocal_arg = 5
+
+  call local ()
+  call nonlocal ()
+
+contains
+
+  subroutine local ()
+    integer :: local_i
+    integer :: local_arg
+    integer :: local_a(N)
+    integer :: local_j
+
+    local_a (:) = 5
+    local_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait async(local_arg)
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 5))
+       local_a(local_i) = 100
+       !$acc loop seq tile(*)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(1)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 4))
+       local_a(local_i) = 100
+       !$acc loop seq tile(1)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine local
+
+  subroutine nonlocal ()
+    nonlocal_a (:) = 5
+    nonlocal_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait async(nonlocal_arg)
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(2)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(3)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(*)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine nonlocal
+end program main
diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90
deleted file mode 100644
index 6cab798..0000000
--- gcc/testsuite/gfortran.dg/goacc/subroutines.f90
+++ /dev/null
@@ -1,73 +0,0 @@
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile } 
-
-program main
-  integer, parameter :: N = 100
-  integer :: nonlocal_arg
-  integer :: nonlocal_a(N)
-  integer :: nonlocal_i
-  integer :: nonlocal_j
-  
-  nonlocal_a (:) = 5
-  nonlocal_arg = 5
-  
-  call local ()
-  call nonlocal ()
-
-contains
-
-  subroutine local ()
-    integer :: local_i
-    integer :: local_arg
-    integer :: local_a(N)
-    integer :: local_j
-    
-    local_a (:) = 5
-    local_arg = 5
-
-    !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:local_arg) worker(local_arg) &
-    !$acc vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine local
-
-  subroutine nonlocal ()
-    nonlocal_a (:) = 5
-    nonlocal_arg = 5
-  
-    !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine nonlocal
-end program main
diff --git gcc/tree-nested.c gcc/tree-nested.c
index 1433c3e..984fa81 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1197,9 +1199,27 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_DEVICE_TYPE:
+	  /* TODO.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
 	case OMP_CLAUSE_BIND:
+	case OMP_CLAUSE_DEVICE_RESIDENT:
 	case OMP_CLAUSE_NOHOST:
 	default:
 	  gcc_unreachable ();
@@ -1792,6 +1812,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1880,9 +1902,27 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_DEVICE_TYPE:
+	  /* TODO.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
 	case OMP_CLAUSE_BIND:
+	case OMP_CLAUSE_DEVICE_RESIDENT:
 	case OMP_CLAUSE_NOHOST:
 	default:
 	  gcc_unreachable ();
diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 17c26c5..af4e0d5 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,5 +1,16 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+		    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* libgomp.oacc-c/nested-function-1.c: New file.
+	* libgomp.oacc-c/nested-function-2.c: Likewise.
+	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
+
 	PR c/71381
 	* testsuite/libgomp.oacc-fortran/cache-1.f90: Remove file.
 
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-1.c libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
new file mode 100644
index 0000000..fb2a3ac
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
@@ -0,0 +1,52 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][7][8];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(4 - 1)
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  a[i][j][k] = i + j + k;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  if (a[i][j][k] != i + j + k)
+	    __builtin_abort();
+  }
+
+  void test2 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-2.c libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
new file mode 100644
index 0000000..2c3f3fe
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
@@ -0,0 +1,155 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1;
+
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, '\0', sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort();
+  }
+
+  void test2 (int v1, int v2, int v3, int v4, int v5, int v6)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8,
+      int v9)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test4 ()
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+    int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8,
+      v9 = p9;
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 (p1, p2, p3, p4, p5, p6);
+  test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9);
+  test4 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
new file mode 100644
index 0000000..fdbca44
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
@@ -0,0 +1,70 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse2
+  call test1
+  call test2
+contains
+  subroutine test1
+    integer :: i, j, k, a(1:3, 4:6, 5:7)
+    logical :: l
+    l = .false.
+    a(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop worker vector collapse(4 - 1)
+      do 164 i = 1, 3
+        do 164 j = 4, 6
+          do 164 k = 5, 7
+            a(i, j, k) = i + j + k
+164      end do
+    !$acc loop worker vector reduction(.or.:l) collapse(2)
+firstdo: do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+            if (a(i, j, k) .ne. (i + j + k)) l = .true.
+          end do
+        end do
+      end do firstdo
+    !$acc end parallel
+    if (l) call abort
+  end subroutine test1
+
+  subroutine test2
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    a = 0
+    !$acc parallel
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+      do 115 k=1,3
+         !$acc loop collapse(2)
+  dokk: do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc loop gang(static:1) collapse(1)
+      do k=1,3
+         if (any(a(k,1:3,1:3).ne.1)) call abort
+      enddo
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+ dol: do 120 l=1,3
+    !$acc loop collapse(2)
+  doll: do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc loop gang(static:1) collapse(1)
+     do l=1,3
+        if (any(a(l,1:3,1:3).ne.2)) call abort
+     enddo
+    !$acc end parallel
+  end subroutine test2
+
+end program collapse2
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
new file mode 100644
index 0000000..4e28196
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
@@ -0,0 +1,173 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse3
+  integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9
+  p1 = 2
+  p2 = 6
+  p3 = -2
+  p4 = 4
+  p5 = 13
+  p6 = 18
+  p7 = 1
+  p8 = 1
+  p9 = 1
+  call test1
+  call test2 (p1, p2, p3, p4, p5, p6)
+  call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9)
+  call test4
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+    end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = p1
+    v2 = p2
+    v3 = p3
+    v4 = p4
+    v5 = p5
+    v6 = p6
+    v7 = p7
+    v8 = p8
+    v9 = p9
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+end program collapse3
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
new file mode 100644
index 0000000..2f6485e
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
@@ -0,0 +1,244 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program sub_collapse_3
+  call test1
+  call test2 (2, 6, -2, 4, 13, 18)
+  call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+  call test4
+  call test5 (2, 6, -2, 4, 13, 18)
+  call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+  end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = 2
+    v2 = 6
+    v3 = -2
+    v4 = 4
+    v5 = 13
+    v6 = 18
+    v7 = 1
+    v8 = 1
+    v9 = 1
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+  subroutine test5(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test5
+
+  subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+            m = i * 100 + j * 10 + k
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test6
+
+end program sub_collapse_3


Grüße
 Thomas
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://gcc.gnu.org/pipermail/gcc-patches/attachments/20160610/8fb0da80/attachment.sig>


More information about the Gcc-patches mailing list