This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi! On Thu, 2 Jun 2016 18:25:17 +0200, Jakub Jelinek <jakub@redhat.com> wrote: > On Thu, Jun 02, 2016 at 06:20:57PM +0200, Thomas Schwinge wrote: > > [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition > LGTM. Committed to trunk in r237291, gcc-6-branch in r237296, and gomp-4_0-branch in r237300: commit e2c7e1b8ad89532911b25af34875a75f24823e1a Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Fri Jun 10 09:22:51 2016 +0000 [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition gcc/ * gimplify.c (gimplify_adjust_omp_clauses): Discard OMP_CLAUSE_TILE. * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE. gcc/testsuite/ * c-c++-common/goacc/combined-directives.c: XFAIL tree scanning for OpenACC tile clauses. * gfortran.dg/goacc/combined-directives.f90: Likewise. gcc/ PR middle-end/71373 * tree-nested.c (convert_nonlocal_omp_clauses) (convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE. gcc/testsuite/ PR middle-end/71373 * gcc.dg/goacc/nested-function-1.c: New file. * gcc.dg/goacc/nested-function-2.c: Likewise. * gcc.dg/goacc/pr71373.c: Likewise. * gfortran.dg/goacc/cray-2.f95: Likewise. * gfortran.dg/goacc/loop-1-2.f95: Likewise. * gfortran.dg/goacc/loop-3-2.f95: Likewise. * gfortran.dg/goacc/cray.f95: Update. * gfortran.dg/goacc/loop-1.f95: Likewise. * gfortran.dg/goacc/loop-3.f95: Likewise. * gfortran.dg/goacc/subroutines.f90: Update, and rename to... * gfortran.dg/goacc/nested-function-1.f90: ... this new file. libgomp/testsuite/ PR middle-end/71373 * libgomp.oacc-c/nested-function-1.c: New file. * libgomp.oacc-c/nested-function-2.c: Likewise. * libgomp.oacc-fortran/nested-function-1.f90: Likewise. * libgomp.oacc-fortran/nested-function-2.f90: Likewise. * libgomp.oacc-fortran/nested-function-3.f90: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237291 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 10 + gcc/gimplify.c | 6 + gcc/omp-low.c | 4 +- gcc/testsuite/ChangeLog | 20 ++ .../c-c++-common/goacc/combined-directives.c | 3 +- gcc/testsuite/gcc.dg/goacc/nested-function-1.c | 100 +++++++++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c | 45 ++++ gcc/testsuite/gcc.dg/goacc/pr71373.c | 41 ++++ .../gfortran.dg/goacc/combined-directives.f90 | 3 +- .../gfortran.dg/goacc/{cray.f95 => cray-2.f95} | 11 +- gcc/testsuite/gfortran.dg/goacc/cray.f95 | 6 +- .../gfortran.dg/goacc/{loop-1.f95 => loop-1-2.f95} | 36 +-- gcc/testsuite/gfortran.dg/goacc/loop-1.f95 | 30 +-- .../gfortran.dg/goacc/{loop-3.f95 => loop-3-2.f95} | 15 +- gcc/testsuite/gfortran.dg/goacc/loop-3.f95 | 11 +- .../gfortran.dg/goacc/nested-function-1.f90 | 93 ++++++++ gcc/testsuite/gfortran.dg/goacc/subroutines.f90 | 73 ------ gcc/tree-nested.c | 30 +++ libgomp/ChangeLog | 10 + .../testsuite/libgomp.oacc-c/nested-function-1.c | 52 +++++ .../testsuite/libgomp.oacc-c/nested-function-2.c | 155 +++++++++++++ .../libgomp.oacc-fortran/nested-function-1.f90 | 70 ++++++ .../libgomp.oacc-fortran/nested-function-2.f90 | 173 +++++++++++++++ .../libgomp.oacc-fortran/nested-function-3.f90 | 244 +++++++++++++++++++++ 24 files changed, 1113 insertions(+), 128 deletions(-) diff --git gcc/ChangeLog gcc/ChangeLog index 6afbae7..9cab311 100644 --- gcc/ChangeLog +++ gcc/ChangeLog @@ -1,5 +1,15 @@ 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + PR middle-end/71373 + * tree-nested.c (convert_nonlocal_omp_clauses) + (convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC, + OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, + OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE. + + * gimplify.c (gimplify_adjust_omp_clauses): Discard + OMP_CLAUSE_TILE. + * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE. + * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE__CACHE_. diff --git gcc/gimplify.c gcc/gimplify.c index f12c6a1..7c19cf3 100644 --- gcc/gimplify.c +++ gcc/gimplify.c @@ -8280,7 +8280,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, case OMP_CLAUSE_VECTOR: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + break; + case OMP_CLAUSE_TILE: + /* We're not yet making use of the information provided by OpenACC + tile clauses. Discard these here, to simplify later middle end + processing. */ + remove = true; break; default: diff --git gcc/omp-low.c gcc/omp-low.c index 91d5fcf..22e5909 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -2187,7 +2187,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: - case OMP_CLAUSE_TILE: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: @@ -2200,6 +2199,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, install_var_local (decl, ctx); break; + case OMP_CLAUSE_TILE: case OMP_CLAUSE__CACHE_: default: gcc_unreachable (); @@ -2357,13 +2357,13 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: - case OMP_CLAUSE_TILE: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: case OMP_CLAUSE__GRIDDIM_: break; + case OMP_CLAUSE_TILE: case OMP_CLAUSE__CACHE_: default: gcc_unreachable (); diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog index e15b009..325de75 100644 --- gcc/testsuite/ChangeLog +++ gcc/testsuite/ChangeLog @@ -1,4 +1,24 @@ 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + Cesar Philippidis <cesar@codesourcery.com> + + PR middle-end/71373 + * gcc.dg/goacc/nested-function-1.c: New file. + * gcc.dg/goacc/nested-function-2.c: Likewise. + * gcc.dg/goacc/pr71373.c: Likewise. + * gfortran.dg/goacc/cray-2.f95: Likewise. + * gfortran.dg/goacc/loop-1-2.f95: Likewise. + * gfortran.dg/goacc/loop-3-2.f95: Likewise. + * gfortran.dg/goacc/cray.f95: Update. + * gfortran.dg/goacc/loop-1.f95: Likewise. + * gfortran.dg/goacc/loop-3.f95: Likewise. + * gfortran.dg/goacc/subroutines.f90: Update, and rename to... + * gfortran.dg/goacc/nested-function-1.f90: ... this new file. + +2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + + * c-c++-common/goacc/combined-directives.c: XFAIL tree scanning + for OpenACC tile clauses. + * gfortran.dg/goacc/combined-directives.f90: Likewise. PR c/71381 * c-c++-common/goacc/cache-1.c: Update. Move invalid usage tests diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c index c2a3c57..3fa800d 100644 --- gcc/testsuite/c-c++-common/goacc/combined-directives.c +++ gcc/testsuite/c-c++-common/goacc/combined-directives.c @@ -111,6 +111,7 @@ test () // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } } // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } } // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } } +// XFAILed: OpenACC tile clauses are discarded during gimplification. +// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } } // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } } // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } } diff --git gcc/testsuite/gcc.dg/goacc/nested-function-1.c gcc/testsuite/gcc.dg/goacc/nested-function-1.c new file mode 100644 index 0000000..e17c0e2 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc/nested-function-1.c @@ -0,0 +1,100 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ +/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran + version. */ + +int main () +{ +#define N 100 + int nonlocal_arg; + int nonlocal_a[N]; + int nonlocal_i; + int nonlocal_j; + + for (int i = 0; i < N; ++i) + nonlocal_a[i] = 5; + nonlocal_arg = 5; + + void local () + { + int local_i; + int local_arg; + int local_a[N]; + int local_j; + + for (int i = 0; i < N; ++i) + local_a[i] = 5; + local_arg = 5; + +#pragma acc kernels loop \ + gang(num:local_arg) worker(local_arg) vector(local_arg) \ + wait async(local_arg) + for (local_i = 0; local_i < N; ++local_i) + { +#pragma acc cache (local_a[local_i:5]) + local_a[local_i] = 100; +#pragma acc loop seq tile(*) + for (local_j = 0; local_j < N; ++local_j) + ; +#pragma acc loop auto independent tile(1) + for (local_j = 0; local_j < N; ++local_j) + ; + } + +#pragma acc kernels loop \ + gang(static:local_arg) worker(local_arg) vector(local_arg) \ + wait(local_arg, local_arg + 1, local_arg + 2) async + for (local_i = 0; local_i < N; ++local_i) + { +#pragma acc cache (local_a[local_i:4]) + local_a[local_i] = 100; +#pragma acc loop seq tile(1) + for (local_j = 0; local_j < N; ++local_j) + ; +#pragma acc loop auto independent tile(*) + for (local_j = 0; local_j < N; ++local_j) + ; + } + } + + void nonlocal () + { + for (int i = 0; i < N; ++i) + nonlocal_a[i] = 5; + nonlocal_arg = 5; + +#pragma acc kernels loop \ + gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \ + wait async(nonlocal_arg) + for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i) + { +#pragma acc cache (nonlocal_a[nonlocal_i:3]) + nonlocal_a[nonlocal_i] = 100; +#pragma acc loop seq tile(2) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; +#pragma acc loop auto independent tile(3) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; + } + +#pragma acc kernels loop \ + gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \ + wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async + for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i) + { +#pragma acc cache (nonlocal_a[nonlocal_i:2]) + nonlocal_a[nonlocal_i] = 100; +#pragma acc loop seq tile(*) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; +#pragma acc loop auto independent tile(*) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; + } + } + + local (); + nonlocal (); + + return 0; +} diff --git gcc/testsuite/gcc.dg/goacc/nested-function-2.c gcc/testsuite/gcc.dg/goacc/nested-function-2.c new file mode 100644 index 0000000..70c9ec8 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c @@ -0,0 +1,45 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + int j = 0, k = 6, l = 7, m = 8; + void simple (void) + { + int i; +#pragma acc parallel + { +#pragma acc loop + for (i = 0; i < m; i+= k) + j = (m + i - j) * l; + } + } + void collapse (void) + { + int x, y, z; +#pragma acc parallel + { +#pragma acc loop collapse (3) + for (x = 0; x < k; x++) + for (y = -5; y < l; y++) + for (z = 0; z < m; z++) + j += x + y + z; + } + } + void reduction (void) + { + int x, y, z; +#pragma acc parallel reduction (+:j) + { +#pragma acc loop reduction (+:j) collapse (3) + for (x = 0; x < k; x++) + for (y = -5; y < l; y++) + for (z = 0; z < m; z++) + j += x + y + z; + } + } + simple(); + collapse(); + reduction(); + return 0; +} diff --git gcc/testsuite/gcc.dg/goacc/pr71373.c gcc/testsuite/gcc.dg/goacc/pr71373.c new file mode 100644 index 0000000..9381752 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc/pr71373.c @@ -0,0 +1,41 @@ +/* Unintentional nested function usage. */ +/* Due to missing right braces '}', the following functions are parsed as + nested functions. This ran into an ICE. */ + +void foo (void) +{ + #pragma acc parallel + { + #pragma acc loop independent + for (int i = 0; i < 16; i++) + ; + // Note right brace '}' commented out here. + //} +} +void bar (void) +{ +} + +// Adding right brace '}' here, to make this compile. +} + + +// ..., and the other way round: + +void BAR (void) +{ +// Note right brace '}' commented out here. +//} + +void FOO (void) +{ + #pragma acc parallel + { + #pragma acc loop independent + for (int i = 0; i < 16; i++) + ; + } +} + +// Adding right brace '}' here, to make this compile. +} diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 index 42a447a..abb5e6b 100644 --- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 +++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 @@ -143,7 +143,8 @@ end subroutine test ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } } +! XFAILed: OpenACC tile clauses are discarded during gimplification. +! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } } ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } } diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray-2.f95 similarity index 91% copy from gcc/testsuite/gfortran.dg/goacc/cray.f95 copy to gcc/testsuite/gfortran.dg/goacc/cray-2.f95 index 705c18c..51b79b5 100644 --- gcc/testsuite/gfortran.dg/goacc/cray.f95 +++ gcc/testsuite/gfortran.dg/goacc/cray-2.f95 @@ -1,15 +1,16 @@ -! { dg-do compile } ! { dg-additional-options "-fcray-pointer" } +! See also cray.f95. -module test +program test + call oacc1 contains subroutine oacc1 implicit none integer :: i real :: pointee pointer (ptr, pointee) - !$acc declare device_resident (pointee) - !$acc declare device_resident (ptr) + !$acc declare device_resident (pointee) + !$acc declare device_resident (ptr) !$acc data copy (pointee) ! { dg-error "Cray pointee" } !$acc end data !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" } @@ -52,4 +53,4 @@ contains !$acc update host (ptr) !$acc update self (ptr) end subroutine oacc1 -end module test +end program test diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray.f95 index 705c18c..d6d5317 100644 --- gcc/testsuite/gfortran.dg/goacc/cray.f95 +++ gcc/testsuite/gfortran.dg/goacc/cray.f95 @@ -1,5 +1,5 @@ -! { dg-do compile } ! { dg-additional-options "-fcray-pointer" } +! See also cray-2.f95. module test contains @@ -8,8 +8,8 @@ contains integer :: i real :: pointee pointer (ptr, pointee) - !$acc declare device_resident (pointee) - !$acc declare device_resident (ptr) + !$acc declare device_resident (pointee) + !$acc declare device_resident (ptr) !$acc data copy (pointee) ! { dg-error "Cray pointee" } !$acc end data !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" } diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 similarity index 89% copy from gcc/testsuite/gfortran.dg/goacc/loop-1.f95 copy to gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 index a605f03..79665b9 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-1.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 @@ -1,8 +1,10 @@ -module test - implicit none +! See also loop-1.f95. + +program test + call test1 contains -subroutine test1 +subroutine test1 integer :: i, j, k, b(10) integer, dimension (30) :: a double precision :: d @@ -30,15 +32,15 @@ subroutine test1 do 300 d = 1, 30, 6 i = d 300 a(i) = 1 - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 } !$acc loop do d = 1, 30, 5 i = d a(i) = 2 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 } !$acc loop do i = 1, 30 if (i .eq. 16) exit ! { dg-error "EXIT statement" } @@ -53,7 +55,7 @@ subroutine test1 end do last ! different types of loop are allowed - !$acc loop + !$acc loop do i = 1,10 end do !$acc loop @@ -65,8 +67,8 @@ subroutine test1 a(1) = 1 ! { dg-error "Expected DO loop" } do i = 1,10 enddo - - ! combined directives may be used with/without end + + ! combined directives may be used with/without end !$acc parallel loop do i = 1,10 enddo @@ -82,11 +84,11 @@ subroutine test1 enddo !$acc end kernels loop - !$acc kernels loop reduction(max:i) + !$acc kernels loop reduction(max:i) do i = 1,10 enddo - !$acc kernels - !$acc loop reduction(max:i) + !$acc kernels + !$acc loop reduction(max:i) do i = 1,10 enddo !$acc end kernels @@ -118,7 +120,7 @@ subroutine test1 end do !$acc parallel loop collapse(2) do i = 1, 3 - do j = 4, 6 + do j = 4, 6 end do end do !$acc parallel loop collapse(2) @@ -148,8 +150,8 @@ subroutine test1 do i = 1, 3 do r = 4, 6 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 } end do ! Both seq and independent are not allowed @@ -171,4 +173,4 @@ subroutine test1 enddo end subroutine test1 -end module test +end program test diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1.f95 index a605f03..5f81b7a 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-1.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95 @@ -1,8 +1,10 @@ +! See also loop-1-2.f95. + module test implicit none contains -subroutine test1 +subroutine test1 integer :: i, j, k, b(10) integer, dimension (30) :: a double precision :: d @@ -30,15 +32,15 @@ subroutine test1 do 300 d = 1, 30, 6 i = d 300 a(i) = 1 - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 } !$acc loop do d = 1, 30, 5 i = d a(i) = 2 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 } !$acc loop do i = 1, 30 if (i .eq. 16) exit ! { dg-error "EXIT statement" } @@ -53,7 +55,7 @@ subroutine test1 end do last ! different types of loop are allowed - !$acc loop + !$acc loop do i = 1,10 end do !$acc loop @@ -65,8 +67,8 @@ subroutine test1 a(1) = 1 ! { dg-error "Expected DO loop" } do i = 1,10 enddo - - ! combined directives may be used with/without end + + ! combined directives may be used with/without end !$acc parallel loop do i = 1,10 enddo @@ -82,11 +84,11 @@ subroutine test1 enddo !$acc end kernels loop - !$acc kernels loop reduction(max:i) + !$acc kernels loop reduction(max:i) do i = 1,10 enddo - !$acc kernels - !$acc loop reduction(max:i) + !$acc kernels + !$acc loop reduction(max:i) do i = 1,10 enddo !$acc end kernels @@ -118,7 +120,7 @@ subroutine test1 end do !$acc parallel loop collapse(2) do i = 1, 3 - do j = 4, 6 + do j = 4, 6 end do end do !$acc parallel loop collapse(2) @@ -148,8 +150,8 @@ subroutine test1 do i = 1, 3 do r = 4, 6 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 } end do ! Both seq and independent are not allowed diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 similarity index 90% copy from gcc/testsuite/gfortran.dg/goacc/loop-3.f95 copy to gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 index 2a866c7..9be74a8 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-3.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 @@ -1,10 +1,13 @@ -! { dg-do compile } ! { dg-additional-options "-std=f2008" } +! See also loop-3.f95. +program test + call test1 +contains subroutine test1 implicit none integer :: i, j - + ! !$acc end loop not required by spec !$acc loop do i = 1,5 @@ -23,7 +26,7 @@ subroutine test1 enddo !$acc end parallel !$acc end loop ! { dg-error "Unexpected" } - + ! OpenACC supports Fortran 2008 do concurrent statement !$acc loop do concurrent (i = 1:5) @@ -35,7 +38,7 @@ subroutine test1 if (i .eq. j) cycle outer_loop if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" } end do inner_loop - end do outer_loop + end do outer_loop outer_loop1: do i = 1, 5 !$acc loop @@ -50,6 +53,6 @@ subroutine test1 if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" } if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" } end do inner_loop2 - end do outer_loop2 + end do outer_loop2 end subroutine test1 - +end program test diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3.f95 index 2a866c7..30930f4 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-3.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-3.f95 @@ -1,10 +1,10 @@ -! { dg-do compile } ! { dg-additional-options "-std=f2008" } +! See also loop-3-2.f95. subroutine test1 implicit none integer :: i, j - + ! !$acc end loop not required by spec !$acc loop do i = 1,5 @@ -23,7 +23,7 @@ subroutine test1 enddo !$acc end parallel !$acc end loop ! { dg-error "Unexpected" } - + ! OpenACC supports Fortran 2008 do concurrent statement !$acc loop do concurrent (i = 1:5) @@ -35,7 +35,7 @@ subroutine test1 if (i .eq. j) cycle outer_loop if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" } end do inner_loop - end do outer_loop + end do outer_loop outer_loop1: do i = 1, 5 !$acc loop @@ -50,6 +50,5 @@ subroutine test1 if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" } if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" } end do inner_loop2 - end do outer_loop2 + end do outer_loop2 end subroutine test1 - diff --git gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 new file mode 100644 index 0000000..2fcaa40 --- /dev/null +++ gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 @@ -0,0 +1,93 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. +! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version. + +program main + integer, parameter :: N = 100 + integer :: nonlocal_arg + integer :: nonlocal_a(N) + integer :: nonlocal_i + integer :: nonlocal_j + + nonlocal_a (:) = 5 + nonlocal_arg = 5 + + call local () + call nonlocal () + +contains + + subroutine local () + integer :: local_i + integer :: local_arg + integer :: local_a(N) + integer :: local_j + + local_a (:) = 5 + local_arg = 5 + + !$acc kernels loop & + !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) & + !$acc wait async(local_arg) + do local_i = 1, N + !$acc cache (local_a(local_i:local_i + 5)) + local_a(local_i) = 100 + !$acc loop seq tile(*) + do local_j = 1, N + enddo + !$acc loop auto independent tile(1) + do local_j = 1, N + enddo + enddo + !$acc end kernels loop + + !$acc kernels loop & + !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) & + !$acc wait(local_arg, local_arg + 1, local_arg + 2) async + do local_i = 1, N + !$acc cache (local_a(local_i:local_i + 4)) + local_a(local_i) = 100 + !$acc loop seq tile(1) + do local_j = 1, N + enddo + !$acc loop auto independent tile(*) + do local_j = 1, N + enddo + enddo + !$acc end kernels loop + end subroutine local + + subroutine nonlocal () + nonlocal_a (:) = 5 + nonlocal_arg = 5 + + !$acc kernels loop & + !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) & + !$acc wait async(nonlocal_arg) + do nonlocal_i = 1, N + !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3)) + nonlocal_a(nonlocal_i) = 100 + !$acc loop seq tile(2) + do nonlocal_j = 1, N + enddo + !$acc loop auto independent tile(3) + do nonlocal_j = 1, N + enddo + enddo + !$acc end kernels loop + + !$acc kernels loop & + !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) & + !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async + do nonlocal_i = 1, N + !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2)) + nonlocal_a(nonlocal_i) = 100 + !$acc loop seq tile(*) + do nonlocal_j = 1, N + enddo + !$acc loop auto independent tile(*) + do nonlocal_j = 1, N + enddo + enddo + !$acc end kernels loop + end subroutine nonlocal +end program main diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90 deleted file mode 100644 index 6cab798..0000000 --- gcc/testsuite/gfortran.dg/goacc/subroutines.f90 +++ /dev/null @@ -1,73 +0,0 @@ -! Exercise how tree-nested.c handles gang, worker vector and seq. - -! { dg-do compile } - -program main - integer, parameter :: N = 100 - integer :: nonlocal_arg - integer :: nonlocal_a(N) - integer :: nonlocal_i - integer :: nonlocal_j - - nonlocal_a (:) = 5 - nonlocal_arg = 5 - - call local () - call nonlocal () - -contains - - subroutine local () - integer :: local_i - integer :: local_arg - integer :: local_a(N) - integer :: local_j - - local_a (:) = 5 - local_arg = 5 - - !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg) - do local_i = 1, N - local_a(local_i) = 100 - !$acc loop seq - do local_j = 1, N - enddo - enddo - !$acc end kernels loop - - !$acc kernels loop gang(static:local_arg) worker(local_arg) & - !$acc vector(local_arg) - do local_i = 1, N - local_a(local_i) = 100 - !$acc loop seq - do local_j = 1, N - enddo - enddo - !$acc end kernels loop - end subroutine local - - subroutine nonlocal () - nonlocal_a (:) = 5 - nonlocal_arg = 5 - - !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) & - !$acc vector(nonlocal_arg) - do nonlocal_i = 1, N - nonlocal_a(nonlocal_i) = 100 - !$acc loop seq - do nonlocal_j = 1, N - enddo - enddo - !$acc end kernels loop - - !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) & - !$acc vector(nonlocal_arg) - do nonlocal_i = 1, N - nonlocal_a(nonlocal_i) = 100 - !$acc loop seq - do nonlocal_j = 1, N - enddo - enddo - !$acc end kernels loop - end subroutine nonlocal -end program main diff --git gcc/tree-nested.c gcc/tree-nested.c index 25a92aa..6fc6326 100644 --- gcc/tree-nested.c +++ gcc/tree-nested.c @@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: /* Several OpenACC clauses have optional arguments. Check if they are present. */ if (OMP_CLAUSE_OPERAND (clause, 0)) @@ -1197,8 +1199,21 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: break; + case OMP_CLAUSE_TILE: + /* OpenACC tile clauses are discarded during gimplification, so we + don't expect to see anything here. */ + gcc_unreachable (); + + case OMP_CLAUSE__CACHE_: + /* These clauses belong to the OpenACC cache directive, which is + discarded during gimplification, so we don't expect to see + anything here. */ + gcc_unreachable (); + default: gcc_unreachable (); } @@ -1790,6 +1805,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: /* Several OpenACC clauses have optional arguments. Check if they are present. */ if (OMP_CLAUSE_OPERAND (clause, 0)) @@ -1878,8 +1895,21 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: break; + case OMP_CLAUSE_TILE: + /* OpenACC tile clauses are discarded during gimplification, so we + don't expect to see anything here. */ + gcc_unreachable (); + + case OMP_CLAUSE__CACHE_: + /* These clauses belong to the OpenACC cache directive, which is + discarded during gimplification, so we don't expect to see + anything here. */ + gcc_unreachable (); + default: gcc_unreachable (); } diff --git libgomp/ChangeLog libgomp/ChangeLog index 5c7f41a..cf551f4 100644 --- libgomp/ChangeLog +++ libgomp/ChangeLog @@ -1,4 +1,14 @@ 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + Cesar Philippidis <cesar@codesourcery.com> + + PR middle-end/71373 + * libgomp.oacc-c/nested-function-1.c: New file. + * libgomp.oacc-c/nested-function-2.c: Likewise. + * libgomp.oacc-fortran/nested-function-1.f90: Likewise. + * libgomp.oacc-fortran/nested-function-2.f90: Likewise. + * libgomp.oacc-fortran/nested-function-3.f90: Likewise. + +2016-06-10 Thomas Schwinge <thomas@codesourcery.com> PR c/71381 * testsuite/libgomp.oacc-c-c++-common/cache-1.c: #include diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-1.c libgomp/testsuite/libgomp.oacc-c/nested-function-1.c new file mode 100644 index 0000000..fb2a3ac --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c/nested-function-1.c @@ -0,0 +1,52 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + void test1 () + { + int i, j, k; + int a[4][7][8]; + + __builtin_memset (a, 0, sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(4 - 1) + for (i = 1; i <= 3; i++) + for (j = 4; j <= 6; j++) + for (k = 5; k <= 7; k++) + a[i][j][k] = i + j + k; + + for (i = 1; i <= 3; i++) + for (j = 4; j <= 6; j++) + for (k = 5; k <= 7; k++) + if (a[i][j][k] != i + j + k) + __builtin_abort(); + } + + void test2 () + { + int i, j, k; + int a[4][4][4]; + + __builtin_memset (a, 0, sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(3) + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 1; k <= 3; k++) + a[i][j][k] = 1; + + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 1; k <= 3; k++) + if (a[i][j][k] != 1) + __builtin_abort (); + } + + test1 (); + test2 (); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-2.c libgomp/testsuite/libgomp.oacc-c/nested-function-2.c new file mode 100644 index 0000000..2c3f3fe --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c/nested-function-2.c @@ -0,0 +1,155 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1; + + void test1 () + { + int i, j, k; + int a[4][4][4]; + + __builtin_memset (a, '\0', sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(3) + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 2; k <= 3; k++) + a[i][j][k] = 1; + + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 2; k <= 3; k++) + if (a[i][j][k] != 1) + __builtin_abort(); + } + + void test2 (int v1, int v2, int v3, int v4, int v5, int v6) + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8, + int v9) + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + void test4 () + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8, + v9 = p9; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + test1 (); + test2 (p1, p2, p3, p4, p5, p6); + test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9); + test4 (); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 new file mode 100644 index 0000000..fdbca44 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 @@ -0,0 +1,70 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program collapse2 + call test1 + call test2 +contains + subroutine test1 + integer :: i, j, k, a(1:3, 4:6, 5:7) + logical :: l + l = .false. + a(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop worker vector collapse(4 - 1) + do 164 i = 1, 3 + do 164 j = 4, 6 + do 164 k = 5, 7 + a(i, j, k) = i + j + k +164 end do + !$acc loop worker vector reduction(.or.:l) collapse(2) +firstdo: do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + if (a(i, j, k) .ne. (i + j + k)) l = .true. + end do + end do + end do firstdo + !$acc end parallel + if (l) call abort + end subroutine test1 + + subroutine test2 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + a = 0 + !$acc parallel + ! Use "gang(static:1)" here and below to effectively turn gang-redundant + ! execution mode into something like gang-single. + !$acc loop gang(static:1) collapse(1) + do 115 k=1,3 + !$acc loop collapse(2) + dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc loop gang(static:1) collapse(1) + do k=1,3 + if (any(a(k,1:3,1:3).ne.1)) call abort + enddo + ! Use "gang(static:1)" here and below to effectively turn gang-redundant + ! execution mode into something like gang-single. + !$acc loop gang(static:1) collapse(1) + dol: do 120 l=1,3 + !$acc loop collapse(2) + doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc loop gang(static:1) collapse(1) + do l=1,3 + if (any(a(l,1:3,1:3).ne.2)) call abort + enddo + !$acc end parallel + end subroutine test2 + +end program collapse2 diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 new file mode 100644 index 0000000..4e28196 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 @@ -0,0 +1,173 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program collapse3 + integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9 + p1 = 2 + p2 = 6 + p3 = -2 + p4 = 4 + p5 = 13 + p6 = 18 + p7 = 1 + p8 = 1 + p9 = 1 + call test1 + call test2 (p1, p2, p3, p4, p5, p6) + call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9) + call test4 +contains + subroutine test1 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + !$acc parallel + !$acc loop collapse(3) + do 115 k=1,3 +dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.1)) call abort + !$acc parallel + !$acc loop collapse(3) +dol: do 120 l=1,3 +doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.2)) call abort + end subroutine test1 + + subroutine test2(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test2 + + subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test3 + + subroutine test4 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = p1 + v2 = p2 + v3 = p3 + v4 = p4 + v5 = p5 + v6 = p6 + v7 = p7 + v8 = p8 + v9 = p9 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test4 + +end program collapse3 diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 new file mode 100644 index 0000000..2f6485e --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 @@ -0,0 +1,244 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program sub_collapse_3 + call test1 + call test2 (2, 6, -2, 4, 13, 18) + call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1) + call test4 + call test5 (2, 6, -2, 4, 13, 18) + call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1) +contains + subroutine test1 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + !$acc parallel + !$acc loop collapse(3) + do 115 k=1,3 +dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.1)) call abort + !$acc parallel + !$acc loop collapse(3) +dol: do 120 l=1,3 +doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.2)) call abort + end subroutine test1 + + subroutine test2(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test2 + + subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test3 + + subroutine test4 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = 2 + v2 = 6 + v3 = -2 + v4 = 4 + v5 = 13 + v6 = 18 + v7 = 1 + v8 = 1 + v9 = 1 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test4 + + subroutine test5(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test5 + + subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + m = i * 100 + j * 10 + k + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test6 + +end program sub_collapse_3 commit 507c9999e155b704d9a7244b1c9d467a95143b77 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Fri Jun 10 09:46:18 2016 +0000 [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition Backport trunk r237291: gcc/ * gimplify.c (gimplify_adjust_omp_clauses): Discard OMP_CLAUSE_TILE. * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE. gcc/testsuite/ * c-c++-common/goacc/combined-directives.c: XFAIL tree scanning for OpenACC tile clauses. * gfortran.dg/goacc/combined-directives.f90: Likewise. gcc/ PR middle-end/71373 * tree-nested.c (convert_nonlocal_omp_clauses) (convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE. gcc/testsuite/ PR middle-end/71373 * gcc.dg/goacc/nested-function-1.c: New file. * gcc.dg/goacc/nested-function-2.c: Likewise. * gcc.dg/goacc/pr71373.c: Likewise. * gfortran.dg/goacc/cray-2.f95: Likewise. * gfortran.dg/goacc/loop-1-2.f95: Likewise. * gfortran.dg/goacc/loop-3-2.f95: Likewise. * gfortran.dg/goacc/cray.f95: Update. * gfortran.dg/goacc/loop-1.f95: Likewise. * gfortran.dg/goacc/loop-3.f95: Likewise. * gfortran.dg/goacc/subroutines.f90: Update, and rename to... * gfortran.dg/goacc/nested-function-1.f90: ... this new file. libgomp/testsuite/ PR middle-end/71373 * libgomp.oacc-c/nested-function-1.c: New file. * libgomp.oacc-c/nested-function-2.c: Likewise. * libgomp.oacc-fortran/nested-function-1.f90: Likewise. * libgomp.oacc-fortran/nested-function-2.f90: Likewise. * libgomp.oacc-fortran/nested-function-3.f90: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-6-branch@237296 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 12 + gcc/gimplify.c | 6 + gcc/omp-low.c | 4 +- gcc/testsuite/ChangeLog | 22 ++ .../c-c++-common/goacc/combined-directives.c | 3 +- gcc/testsuite/gcc.dg/goacc/nested-function-1.c | 100 +++++++++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c | 45 ++++ gcc/testsuite/gcc.dg/goacc/pr71373.c | 41 ++++ .../gfortran.dg/goacc/combined-directives.f90 | 3 +- .../gfortran.dg/goacc/{cray.f95 => cray-2.f95} | 11 +- gcc/testsuite/gfortran.dg/goacc/cray.f95 | 6 +- .../gfortran.dg/goacc/{loop-1.f95 => loop-1-2.f95} | 36 +-- gcc/testsuite/gfortran.dg/goacc/loop-1.f95 | 30 +-- .../gfortran.dg/goacc/{loop-3.f95 => loop-3-2.f95} | 15 +- gcc/testsuite/gfortran.dg/goacc/loop-3.f95 | 11 +- .../gfortran.dg/goacc/nested-function-1.f90 | 93 ++++++++ gcc/testsuite/gfortran.dg/goacc/subroutines.f90 | 73 ------ gcc/tree-nested.c | 32 +++ libgomp/ChangeLog | 11 + .../testsuite/libgomp.oacc-c/nested-function-1.c | 52 +++++ .../testsuite/libgomp.oacc-c/nested-function-2.c | 155 +++++++++++++ .../libgomp.oacc-fortran/nested-function-1.f90 | 70 ++++++ .../libgomp.oacc-fortran/nested-function-2.f90 | 173 +++++++++++++++ .../libgomp.oacc-fortran/nested-function-3.f90 | 244 +++++++++++++++++++++ 24 files changed, 1120 insertions(+), 128 deletions(-) diff --git gcc/ChangeLog gcc/ChangeLog index cbdcc42..bb9099d 100644 --- gcc/ChangeLog +++ gcc/ChangeLog @@ -1,5 +1,17 @@ 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + PR middle-end/71373 + Backport from trunk r237291: + * tree-nested.c (convert_nonlocal_omp_clauses) + (convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC, + OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, + OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE. + + Backport from trunk r237291: + * gimplify.c (gimplify_adjust_omp_clauses): Discard + OMP_CLAUSE_TILE. + * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE. + Backport from trunk r237290: * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE__CACHE_. diff --git gcc/gimplify.c gcc/gimplify.c index 259c88b..846a75a 100644 --- gcc/gimplify.c +++ gcc/gimplify.c @@ -8224,7 +8224,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, case OMP_CLAUSE_VECTOR: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + break; + case OMP_CLAUSE_TILE: + /* We're not yet making use of the information provided by OpenACC + tile clauses. Discard these here, to simplify later middle end + processing. */ + remove = true; break; default: diff --git gcc/omp-low.c gcc/omp-low.c index e570c22..5f1e6aa 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -2186,7 +2186,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: - case OMP_CLAUSE_TILE: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: @@ -2200,6 +2199,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, break; case OMP_CLAUSE_DEVICE_RESIDENT: + case OMP_CLAUSE_TILE: case OMP_CLAUSE__CACHE_: default: gcc_unreachable (); @@ -2357,7 +2357,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: - case OMP_CLAUSE_TILE: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: @@ -2365,6 +2364,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, break; case OMP_CLAUSE_DEVICE_RESIDENT: + case OMP_CLAUSE_TILE: case OMP_CLAUSE__CACHE_: default: gcc_unreachable (); diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog index 5e3f7e3..aa7c3a8 100644 --- gcc/testsuite/ChangeLog +++ gcc/testsuite/ChangeLog @@ -1,5 +1,27 @@ 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + PR middle-end/71373 + Backport from trunk r237291: + 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + Cesar Philippidis <cesar@codesourcery.com> + + * gcc.dg/goacc/nested-function-1.c: New file. + * gcc.dg/goacc/nested-function-2.c: Likewise. + * gcc.dg/goacc/pr71373.c: Likewise. + * gfortran.dg/goacc/cray-2.f95: Likewise. + * gfortran.dg/goacc/loop-1-2.f95: Likewise. + * gfortran.dg/goacc/loop-3-2.f95: Likewise. + * gfortran.dg/goacc/cray.f95: Update. + * gfortran.dg/goacc/loop-1.f95: Likewise. + * gfortran.dg/goacc/loop-3.f95: Likewise. + * gfortran.dg/goacc/subroutines.f90: Update, and rename to... + * gfortran.dg/goacc/nested-function-1.f90: ... this new file. + + Backport from trunk r237291: + * c-c++-common/goacc/combined-directives.c: XFAIL tree scanning + for OpenACC tile clauses. + * gfortran.dg/goacc/combined-directives.f90: Likewise. + PR c/71381 Backport from trunk r237290: * c-c++-common/goacc/cache-1.c: Update. Move invalid usage tests diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c index c2a3c57..3fa800d 100644 --- gcc/testsuite/c-c++-common/goacc/combined-directives.c +++ gcc/testsuite/c-c++-common/goacc/combined-directives.c @@ -111,6 +111,7 @@ test () // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } } // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } } // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } } +// XFAILed: OpenACC tile clauses are discarded during gimplification. +// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } } // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } } // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } } diff --git gcc/testsuite/gcc.dg/goacc/nested-function-1.c gcc/testsuite/gcc.dg/goacc/nested-function-1.c new file mode 100644 index 0000000..e17c0e2 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc/nested-function-1.c @@ -0,0 +1,100 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ +/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran + version. */ + +int main () +{ +#define N 100 + int nonlocal_arg; + int nonlocal_a[N]; + int nonlocal_i; + int nonlocal_j; + + for (int i = 0; i < N; ++i) + nonlocal_a[i] = 5; + nonlocal_arg = 5; + + void local () + { + int local_i; + int local_arg; + int local_a[N]; + int local_j; + + for (int i = 0; i < N; ++i) + local_a[i] = 5; + local_arg = 5; + +#pragma acc kernels loop \ + gang(num:local_arg) worker(local_arg) vector(local_arg) \ + wait async(local_arg) + for (local_i = 0; local_i < N; ++local_i) + { +#pragma acc cache (local_a[local_i:5]) + local_a[local_i] = 100; +#pragma acc loop seq tile(*) + for (local_j = 0; local_j < N; ++local_j) + ; +#pragma acc loop auto independent tile(1) + for (local_j = 0; local_j < N; ++local_j) + ; + } + +#pragma acc kernels loop \ + gang(static:local_arg) worker(local_arg) vector(local_arg) \ + wait(local_arg, local_arg + 1, local_arg + 2) async + for (local_i = 0; local_i < N; ++local_i) + { +#pragma acc cache (local_a[local_i:4]) + local_a[local_i] = 100; +#pragma acc loop seq tile(1) + for (local_j = 0; local_j < N; ++local_j) + ; +#pragma acc loop auto independent tile(*) + for (local_j = 0; local_j < N; ++local_j) + ; + } + } + + void nonlocal () + { + for (int i = 0; i < N; ++i) + nonlocal_a[i] = 5; + nonlocal_arg = 5; + +#pragma acc kernels loop \ + gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \ + wait async(nonlocal_arg) + for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i) + { +#pragma acc cache (nonlocal_a[nonlocal_i:3]) + nonlocal_a[nonlocal_i] = 100; +#pragma acc loop seq tile(2) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; +#pragma acc loop auto independent tile(3) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; + } + +#pragma acc kernels loop \ + gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \ + wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async + for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i) + { +#pragma acc cache (nonlocal_a[nonlocal_i:2]) + nonlocal_a[nonlocal_i] = 100; +#pragma acc loop seq tile(*) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; +#pragma acc loop auto independent tile(*) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; + } + } + + local (); + nonlocal (); + + return 0; +} diff --git gcc/testsuite/gcc.dg/goacc/nested-function-2.c gcc/testsuite/gcc.dg/goacc/nested-function-2.c new file mode 100644 index 0000000..70c9ec8 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c @@ -0,0 +1,45 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + int j = 0, k = 6, l = 7, m = 8; + void simple (void) + { + int i; +#pragma acc parallel + { +#pragma acc loop + for (i = 0; i < m; i+= k) + j = (m + i - j) * l; + } + } + void collapse (void) + { + int x, y, z; +#pragma acc parallel + { +#pragma acc loop collapse (3) + for (x = 0; x < k; x++) + for (y = -5; y < l; y++) + for (z = 0; z < m; z++) + j += x + y + z; + } + } + void reduction (void) + { + int x, y, z; +#pragma acc parallel reduction (+:j) + { +#pragma acc loop reduction (+:j) collapse (3) + for (x = 0; x < k; x++) + for (y = -5; y < l; y++) + for (z = 0; z < m; z++) + j += x + y + z; + } + } + simple(); + collapse(); + reduction(); + return 0; +} diff --git gcc/testsuite/gcc.dg/goacc/pr71373.c gcc/testsuite/gcc.dg/goacc/pr71373.c new file mode 100644 index 0000000..9381752 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc/pr71373.c @@ -0,0 +1,41 @@ +/* Unintentional nested function usage. */ +/* Due to missing right braces '}', the following functions are parsed as + nested functions. This ran into an ICE. */ + +void foo (void) +{ + #pragma acc parallel + { + #pragma acc loop independent + for (int i = 0; i < 16; i++) + ; + // Note right brace '}' commented out here. + //} +} +void bar (void) +{ +} + +// Adding right brace '}' here, to make this compile. +} + + +// ..., and the other way round: + +void BAR (void) +{ +// Note right brace '}' commented out here. +//} + +void FOO (void) +{ + #pragma acc parallel + { + #pragma acc loop independent + for (int i = 0; i < 16; i++) + ; + } +} + +// Adding right brace '}' here, to make this compile. +} diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 index 42a447a..abb5e6b 100644 --- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 +++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 @@ -143,7 +143,8 @@ end subroutine test ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } } +! XFAILed: OpenACC tile clauses are discarded during gimplification. +! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } } ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } } diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray-2.f95 similarity index 91% copy from gcc/testsuite/gfortran.dg/goacc/cray.f95 copy to gcc/testsuite/gfortran.dg/goacc/cray-2.f95 index 705c18c..51b79b5 100644 --- gcc/testsuite/gfortran.dg/goacc/cray.f95 +++ gcc/testsuite/gfortran.dg/goacc/cray-2.f95 @@ -1,15 +1,16 @@ -! { dg-do compile } ! { dg-additional-options "-fcray-pointer" } +! See also cray.f95. -module test +program test + call oacc1 contains subroutine oacc1 implicit none integer :: i real :: pointee pointer (ptr, pointee) - !$acc declare device_resident (pointee) - !$acc declare device_resident (ptr) + !$acc declare device_resident (pointee) + !$acc declare device_resident (ptr) !$acc data copy (pointee) ! { dg-error "Cray pointee" } !$acc end data !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" } @@ -52,4 +53,4 @@ contains !$acc update host (ptr) !$acc update self (ptr) end subroutine oacc1 -end module test +end program test diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray.f95 index 705c18c..d6d5317 100644 --- gcc/testsuite/gfortran.dg/goacc/cray.f95 +++ gcc/testsuite/gfortran.dg/goacc/cray.f95 @@ -1,5 +1,5 @@ -! { dg-do compile } ! { dg-additional-options "-fcray-pointer" } +! See also cray-2.f95. module test contains @@ -8,8 +8,8 @@ contains integer :: i real :: pointee pointer (ptr, pointee) - !$acc declare device_resident (pointee) - !$acc declare device_resident (ptr) + !$acc declare device_resident (pointee) + !$acc declare device_resident (ptr) !$acc data copy (pointee) ! { dg-error "Cray pointee" } !$acc end data !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" } diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 similarity index 89% copy from gcc/testsuite/gfortran.dg/goacc/loop-1.f95 copy to gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 index a605f03..79665b9 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-1.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 @@ -1,8 +1,10 @@ -module test - implicit none +! See also loop-1.f95. + +program test + call test1 contains -subroutine test1 +subroutine test1 integer :: i, j, k, b(10) integer, dimension (30) :: a double precision :: d @@ -30,15 +32,15 @@ subroutine test1 do 300 d = 1, 30, 6 i = d 300 a(i) = 1 - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 } !$acc loop do d = 1, 30, 5 i = d a(i) = 2 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 } !$acc loop do i = 1, 30 if (i .eq. 16) exit ! { dg-error "EXIT statement" } @@ -53,7 +55,7 @@ subroutine test1 end do last ! different types of loop are allowed - !$acc loop + !$acc loop do i = 1,10 end do !$acc loop @@ -65,8 +67,8 @@ subroutine test1 a(1) = 1 ! { dg-error "Expected DO loop" } do i = 1,10 enddo - - ! combined directives may be used with/without end + + ! combined directives may be used with/without end !$acc parallel loop do i = 1,10 enddo @@ -82,11 +84,11 @@ subroutine test1 enddo !$acc end kernels loop - !$acc kernels loop reduction(max:i) + !$acc kernels loop reduction(max:i) do i = 1,10 enddo - !$acc kernels - !$acc loop reduction(max:i) + !$acc kernels + !$acc loop reduction(max:i) do i = 1,10 enddo !$acc end kernels @@ -118,7 +120,7 @@ subroutine test1 end do !$acc parallel loop collapse(2) do i = 1, 3 - do j = 4, 6 + do j = 4, 6 end do end do !$acc parallel loop collapse(2) @@ -148,8 +150,8 @@ subroutine test1 do i = 1, 3 do r = 4, 6 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 } end do ! Both seq and independent are not allowed @@ -171,4 +173,4 @@ subroutine test1 enddo end subroutine test1 -end module test +end program test diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1.f95 index a605f03..5f81b7a 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-1.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95 @@ -1,8 +1,10 @@ +! See also loop-1-2.f95. + module test implicit none contains -subroutine test1 +subroutine test1 integer :: i, j, k, b(10) integer, dimension (30) :: a double precision :: d @@ -30,15 +32,15 @@ subroutine test1 do 300 d = 1, 30, 6 i = d 300 a(i) = 1 - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 } !$acc loop do d = 1, 30, 5 i = d a(i) = 2 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 } !$acc loop do i = 1, 30 if (i .eq. 16) exit ! { dg-error "EXIT statement" } @@ -53,7 +55,7 @@ subroutine test1 end do last ! different types of loop are allowed - !$acc loop + !$acc loop do i = 1,10 end do !$acc loop @@ -65,8 +67,8 @@ subroutine test1 a(1) = 1 ! { dg-error "Expected DO loop" } do i = 1,10 enddo - - ! combined directives may be used with/without end + + ! combined directives may be used with/without end !$acc parallel loop do i = 1,10 enddo @@ -82,11 +84,11 @@ subroutine test1 enddo !$acc end kernels loop - !$acc kernels loop reduction(max:i) + !$acc kernels loop reduction(max:i) do i = 1,10 enddo - !$acc kernels - !$acc loop reduction(max:i) + !$acc kernels + !$acc loop reduction(max:i) do i = 1,10 enddo !$acc end kernels @@ -118,7 +120,7 @@ subroutine test1 end do !$acc parallel loop collapse(2) do i = 1, 3 - do j = 4, 6 + do j = 4, 6 end do end do !$acc parallel loop collapse(2) @@ -148,8 +150,8 @@ subroutine test1 do i = 1, 3 do r = 4, 6 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 } end do ! Both seq and independent are not allowed diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 similarity index 90% copy from gcc/testsuite/gfortran.dg/goacc/loop-3.f95 copy to gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 index 2a866c7..9be74a8 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-3.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 @@ -1,10 +1,13 @@ -! { dg-do compile } ! { dg-additional-options "-std=f2008" } +! See also loop-3.f95. +program test + call test1 +contains subroutine test1 implicit none integer :: i, j - + ! !$acc end loop not required by spec !$acc loop do i = 1,5 @@ -23,7 +26,7 @@ subroutine test1 enddo !$acc end parallel !$acc end loop ! { dg-error "Unexpected" } - + ! OpenACC supports Fortran 2008 do concurrent statement !$acc loop do concurrent (i = 1:5) @@ -35,7 +38,7 @@ subroutine test1 if (i .eq. j) cycle outer_loop if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" } end do inner_loop - end do outer_loop + end do outer_loop outer_loop1: do i = 1, 5 !$acc loop @@ -50,6 +53,6 @@ subroutine test1 if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" } if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" } end do inner_loop2 - end do outer_loop2 + end do outer_loop2 end subroutine test1 - +end program test diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3.f95 index 2a866c7..30930f4 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-3.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-3.f95 @@ -1,10 +1,10 @@ -! { dg-do compile } ! { dg-additional-options "-std=f2008" } +! See also loop-3-2.f95. subroutine test1 implicit none integer :: i, j - + ! !$acc end loop not required by spec !$acc loop do i = 1,5 @@ -23,7 +23,7 @@ subroutine test1 enddo !$acc end parallel !$acc end loop ! { dg-error "Unexpected" } - + ! OpenACC supports Fortran 2008 do concurrent statement !$acc loop do concurrent (i = 1:5) @@ -35,7 +35,7 @@ subroutine test1 if (i .eq. j) cycle outer_loop if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" } end do inner_loop - end do outer_loop + end do outer_loop outer_loop1: do i = 1, 5 !$acc loop @@ -50,6 +50,5 @@ subroutine test1 if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" } if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" } end do inner_loop2 - end do outer_loop2 + end do outer_loop2 end subroutine test1 - diff --git gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 new file mode 100644 index 0000000..2fcaa40 --- /dev/null +++ gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 @@ -0,0 +1,93 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. +! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version. + +program main + integer, parameter :: N = 100 + integer :: nonlocal_arg + integer :: nonlocal_a(N) + integer :: nonlocal_i + integer :: nonlocal_j + + nonlocal_a (:) = 5 + nonlocal_arg = 5 + + call local () + call nonlocal () + +contains + + subroutine local () + integer :: local_i + integer :: local_arg + integer :: local_a(N) + integer :: local_j + + local_a (:) = 5 + local_arg = 5 + + !$acc kernels loop & + !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) & + !$acc wait async(local_arg) + do local_i = 1, N + !$acc cache (local_a(local_i:local_i + 5)) + local_a(local_i) = 100 + !$acc loop seq tile(*) + do local_j = 1, N + enddo + !$acc loop auto independent tile(1) + do local_j = 1, N + enddo + enddo + !$acc end kernels loop + + !$acc kernels loop & + !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) & + !$acc wait(local_arg, local_arg + 1, local_arg + 2) async + do local_i = 1, N + !$acc cache (local_a(local_i:local_i + 4)) + local_a(local_i) = 100 + !$acc loop seq tile(1) + do local_j = 1, N + enddo + !$acc loop auto independent tile(*) + do local_j = 1, N + enddo + enddo + !$acc end kernels loop + end subroutine local + + subroutine nonlocal () + nonlocal_a (:) = 5 + nonlocal_arg = 5 + + !$acc kernels loop & + !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) & + !$acc wait async(nonlocal_arg) + do nonlocal_i = 1, N + !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3)) + nonlocal_a(nonlocal_i) = 100 + !$acc loop seq tile(2) + do nonlocal_j = 1, N + enddo + !$acc loop auto independent tile(3) + do nonlocal_j = 1, N + enddo + enddo + !$acc end kernels loop + + !$acc kernels loop & + !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) & + !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async + do nonlocal_i = 1, N + !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2)) + nonlocal_a(nonlocal_i) = 100 + !$acc loop seq tile(*) + do nonlocal_j = 1, N + enddo + !$acc loop auto independent tile(*) + do nonlocal_j = 1, N + enddo + enddo + !$acc end kernels loop + end subroutine nonlocal +end program main diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90 deleted file mode 100644 index 6cab798..0000000 --- gcc/testsuite/gfortran.dg/goacc/subroutines.f90 +++ /dev/null @@ -1,73 +0,0 @@ -! Exercise how tree-nested.c handles gang, worker vector and seq. - -! { dg-do compile } - -program main - integer, parameter :: N = 100 - integer :: nonlocal_arg - integer :: nonlocal_a(N) - integer :: nonlocal_i - integer :: nonlocal_j - - nonlocal_a (:) = 5 - nonlocal_arg = 5 - - call local () - call nonlocal () - -contains - - subroutine local () - integer :: local_i - integer :: local_arg - integer :: local_a(N) - integer :: local_j - - local_a (:) = 5 - local_arg = 5 - - !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg) - do local_i = 1, N - local_a(local_i) = 100 - !$acc loop seq - do local_j = 1, N - enddo - enddo - !$acc end kernels loop - - !$acc kernels loop gang(static:local_arg) worker(local_arg) & - !$acc vector(local_arg) - do local_i = 1, N - local_a(local_i) = 100 - !$acc loop seq - do local_j = 1, N - enddo - enddo - !$acc end kernels loop - end subroutine local - - subroutine nonlocal () - nonlocal_a (:) = 5 - nonlocal_arg = 5 - - !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) & - !$acc vector(nonlocal_arg) - do nonlocal_i = 1, N - nonlocal_a(nonlocal_i) = 100 - !$acc loop seq - do nonlocal_j = 1, N - enddo - enddo - !$acc end kernels loop - - !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) & - !$acc vector(nonlocal_arg) - do nonlocal_i = 1, N - nonlocal_a(nonlocal_i) = 100 - !$acc loop seq - do nonlocal_j = 1, N - enddo - enddo - !$acc end kernels loop - end subroutine nonlocal -end program main diff --git gcc/tree-nested.c gcc/tree-nested.c index 8563687..327f663 100644 --- gcc/tree-nested.c +++ gcc/tree-nested.c @@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: /* Several OpenACC clauses have optional arguments. Check if they are present. */ if (OMP_CLAUSE_OPERAND (clause, 0)) @@ -1197,8 +1199,22 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: break; + case OMP_CLAUSE_TILE: + /* OpenACC tile clauses are discarded during gimplification, so we + don't expect to see anything here. */ + gcc_unreachable (); + + case OMP_CLAUSE__CACHE_: + /* These clauses belong to the OpenACC cache directive, which is + discarded during gimplification, so we don't expect to see + anything here. */ + gcc_unreachable (); + + case OMP_CLAUSE_DEVICE_RESIDENT: default: gcc_unreachable (); } @@ -1790,6 +1806,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: /* Several OpenACC clauses have optional arguments. Check if they are present. */ if (OMP_CLAUSE_OPERAND (clause, 0)) @@ -1878,8 +1896,22 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: break; + case OMP_CLAUSE_TILE: + /* OpenACC tile clauses are discarded during gimplification, so we + don't expect to see anything here. */ + gcc_unreachable (); + + case OMP_CLAUSE__CACHE_: + /* These clauses belong to the OpenACC cache directive, which is + discarded during gimplification, so we don't expect to see + anything here. */ + gcc_unreachable (); + + case OMP_CLAUSE_DEVICE_RESIDENT: default: gcc_unreachable (); } diff --git libgomp/ChangeLog libgomp/ChangeLog index f6da5c4..620ddb4 100644 --- libgomp/ChangeLog +++ libgomp/ChangeLog @@ -1,5 +1,16 @@ 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + PR middle-end/71373 + Backport from trunk r237291: + 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + Cesar Philippidis <cesar@codesourcery.com> + + * libgomp.oacc-c/nested-function-1.c: New file. + * libgomp.oacc-c/nested-function-2.c: Likewise. + * libgomp.oacc-fortran/nested-function-1.f90: Likewise. + * libgomp.oacc-fortran/nested-function-2.f90: Likewise. + * libgomp.oacc-fortran/nested-function-3.f90: Likewise. + PR c/71381 Backport from trunk r237290: * testsuite/libgomp.oacc-c-c++-common/cache-1.c: #include diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-1.c libgomp/testsuite/libgomp.oacc-c/nested-function-1.c new file mode 100644 index 0000000..fb2a3ac --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c/nested-function-1.c @@ -0,0 +1,52 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + void test1 () + { + int i, j, k; + int a[4][7][8]; + + __builtin_memset (a, 0, sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(4 - 1) + for (i = 1; i <= 3; i++) + for (j = 4; j <= 6; j++) + for (k = 5; k <= 7; k++) + a[i][j][k] = i + j + k; + + for (i = 1; i <= 3; i++) + for (j = 4; j <= 6; j++) + for (k = 5; k <= 7; k++) + if (a[i][j][k] != i + j + k) + __builtin_abort(); + } + + void test2 () + { + int i, j, k; + int a[4][4][4]; + + __builtin_memset (a, 0, sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(3) + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 1; k <= 3; k++) + a[i][j][k] = 1; + + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 1; k <= 3; k++) + if (a[i][j][k] != 1) + __builtin_abort (); + } + + test1 (); + test2 (); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-2.c libgomp/testsuite/libgomp.oacc-c/nested-function-2.c new file mode 100644 index 0000000..2c3f3fe --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c/nested-function-2.c @@ -0,0 +1,155 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1; + + void test1 () + { + int i, j, k; + int a[4][4][4]; + + __builtin_memset (a, '\0', sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(3) + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 2; k <= 3; k++) + a[i][j][k] = 1; + + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 2; k <= 3; k++) + if (a[i][j][k] != 1) + __builtin_abort(); + } + + void test2 (int v1, int v2, int v3, int v4, int v5, int v6) + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8, + int v9) + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + void test4 () + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8, + v9 = p9; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + test1 (); + test2 (p1, p2, p3, p4, p5, p6); + test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9); + test4 (); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 new file mode 100644 index 0000000..fdbca44 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 @@ -0,0 +1,70 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program collapse2 + call test1 + call test2 +contains + subroutine test1 + integer :: i, j, k, a(1:3, 4:6, 5:7) + logical :: l + l = .false. + a(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop worker vector collapse(4 - 1) + do 164 i = 1, 3 + do 164 j = 4, 6 + do 164 k = 5, 7 + a(i, j, k) = i + j + k +164 end do + !$acc loop worker vector reduction(.or.:l) collapse(2) +firstdo: do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + if (a(i, j, k) .ne. (i + j + k)) l = .true. + end do + end do + end do firstdo + !$acc end parallel + if (l) call abort + end subroutine test1 + + subroutine test2 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + a = 0 + !$acc parallel + ! Use "gang(static:1)" here and below to effectively turn gang-redundant + ! execution mode into something like gang-single. + !$acc loop gang(static:1) collapse(1) + do 115 k=1,3 + !$acc loop collapse(2) + dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc loop gang(static:1) collapse(1) + do k=1,3 + if (any(a(k,1:3,1:3).ne.1)) call abort + enddo + ! Use "gang(static:1)" here and below to effectively turn gang-redundant + ! execution mode into something like gang-single. + !$acc loop gang(static:1) collapse(1) + dol: do 120 l=1,3 + !$acc loop collapse(2) + doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc loop gang(static:1) collapse(1) + do l=1,3 + if (any(a(l,1:3,1:3).ne.2)) call abort + enddo + !$acc end parallel + end subroutine test2 + +end program collapse2 diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 new file mode 100644 index 0000000..4e28196 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 @@ -0,0 +1,173 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program collapse3 + integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9 + p1 = 2 + p2 = 6 + p3 = -2 + p4 = 4 + p5 = 13 + p6 = 18 + p7 = 1 + p8 = 1 + p9 = 1 + call test1 + call test2 (p1, p2, p3, p4, p5, p6) + call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9) + call test4 +contains + subroutine test1 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + !$acc parallel + !$acc loop collapse(3) + do 115 k=1,3 +dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.1)) call abort + !$acc parallel + !$acc loop collapse(3) +dol: do 120 l=1,3 +doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.2)) call abort + end subroutine test1 + + subroutine test2(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test2 + + subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test3 + + subroutine test4 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = p1 + v2 = p2 + v3 = p3 + v4 = p4 + v5 = p5 + v6 = p6 + v7 = p7 + v8 = p8 + v9 = p9 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test4 + +end program collapse3 diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 new file mode 100644 index 0000000..2f6485e --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 @@ -0,0 +1,244 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program sub_collapse_3 + call test1 + call test2 (2, 6, -2, 4, 13, 18) + call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1) + call test4 + call test5 (2, 6, -2, 4, 13, 18) + call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1) +contains + subroutine test1 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + !$acc parallel + !$acc loop collapse(3) + do 115 k=1,3 +dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.1)) call abort + !$acc parallel + !$acc loop collapse(3) +dol: do 120 l=1,3 +doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.2)) call abort + end subroutine test1 + + subroutine test2(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test2 + + subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test3 + + subroutine test4 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = 2 + v2 = 6 + v3 = -2 + v4 = 4 + v5 = 13 + v6 = 18 + v7 = 1 + v8 = 1 + v9 = 1 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test4 + + subroutine test5(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test5 + + subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + m = i * 100 + j * 10 + k + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test6 + +end program sub_collapse_3 commit ce7fd1fc5d97d8c73c72a45902da983d5793c374 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Fri Jun 10 10:12:36 2016 +0000 [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition Backport trunk r237291: gcc/ * gimplify.c (gimplify_adjust_omp_clauses): Discard OMP_CLAUSE_TILE. * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE. gcc/testsuite/ * c-c++-common/goacc/combined-directives.c: XFAIL tree scanning for OpenACC tile clauses. * gfortran.dg/goacc/combined-directives.f90: Likewise. gcc/ PR middle-end/71373 * tree-nested.c (convert_nonlocal_omp_clauses) (convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE. gcc/testsuite/ PR middle-end/71373 * gcc.dg/goacc/nested-function-1.c: New file. * gcc.dg/goacc/nested-function-2.c: Likewise. * gcc.dg/goacc/pr71373.c: Likewise. * gfortran.dg/goacc/cray-2.f95: Likewise. * gfortran.dg/goacc/loop-1-2.f95: Likewise. * gfortran.dg/goacc/loop-3-2.f95: Likewise. * gfortran.dg/goacc/cray.f95: Update. * gfortran.dg/goacc/loop-1.f95: Likewise. * gfortran.dg/goacc/loop-3.f95: Likewise. * gfortran.dg/goacc/subroutines.f90: Update, and rename to... * gfortran.dg/goacc/nested-function-1.f90: ... this new file. libgomp/testsuite/ PR middle-end/71373 * libgomp.oacc-c/nested-function-1.c: New file. * libgomp.oacc-c/nested-function-2.c: Likewise. * libgomp.oacc-fortran/nested-function-1.f90: Likewise. * libgomp.oacc-fortran/nested-function-2.f90: Likewise. * libgomp.oacc-fortran/nested-function-3.f90: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@237300 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 12 + gcc/gimplify.c | 8 +- gcc/omp-low.c | 4 +- gcc/testsuite/ChangeLog.gomp | 22 ++ .../c-c++-common/goacc/combined-directives.c | 3 +- gcc/testsuite/gcc.dg/goacc/nested-function-1.c | 100 +++++++++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c | 45 ++++ gcc/testsuite/gcc.dg/goacc/pr71373.c | 41 ++++ .../gfortran.dg/goacc/combined-directives.f90 | 1 + .../gfortran.dg/goacc/{cray.f95 => cray-2.f95} | 11 +- gcc/testsuite/gfortran.dg/goacc/cray.f95 | 6 +- .../gfortran.dg/goacc/{loop-1.f95 => loop-1-2.f95} | 36 +-- gcc/testsuite/gfortran.dg/goacc/loop-1.f95 | 30 +-- .../gfortran.dg/goacc/{loop-3.f95 => loop-3-2.f95} | 15 +- gcc/testsuite/gfortran.dg/goacc/loop-3.f95 | 11 +- .../gfortran.dg/goacc/nested-function-1.f90 | 93 ++++++++ gcc/testsuite/gfortran.dg/goacc/subroutines.f90 | 73 ------ gcc/tree-nested.c | 40 ++++ libgomp/ChangeLog.gomp | 11 + .../testsuite/libgomp.oacc-c/nested-function-1.c | 52 +++++ .../testsuite/libgomp.oacc-c/nested-function-2.c | 155 +++++++++++++ .../libgomp.oacc-fortran/nested-function-1.f90 | 70 ++++++ .../libgomp.oacc-fortran/nested-function-2.f90 | 173 +++++++++++++++ .../libgomp.oacc-fortran/nested-function-3.f90 | 244 +++++++++++++++++++++ 24 files changed, 1128 insertions(+), 128 deletions(-) diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp index 4477abf..b68538d 100644 --- gcc/ChangeLog.gomp +++ gcc/ChangeLog.gomp @@ -1,5 +1,17 @@ 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + PR middle-end/71373 + Backport from trunk r237291: + * tree-nested.c (convert_nonlocal_omp_clauses) + (convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC, + OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, + OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE. + + Backport from trunk r237291: + * gimplify.c (gimplify_adjust_omp_clauses): Discard + OMP_CLAUSE_TILE. + * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE. + Backport from trunk r237290: * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE__CACHE_. diff --git gcc/gimplify.c gcc/gimplify.c index 37971c7..717b25f 100644 --- gcc/gimplify.c +++ gcc/gimplify.c @@ -8265,10 +8265,16 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, case OMP_CLAUSE_VECTOR: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: - case OMP_CLAUSE_TILE: case OMP_CLAUSE_DEVICE_TYPE: break; + case OMP_CLAUSE_TILE: + /* We're not yet making use of the information provided by OpenACC + tile clauses. Discard these here, to simplify later middle end + processing. */ + remove = true; + break; + case OMP_CLAUSE_BIND: case OMP_CLAUSE_NOHOST: default: diff --git gcc/omp-low.c gcc/omp-low.c index 40ac8c8..fedb195 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -2218,7 +2218,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: - case OMP_CLAUSE_TILE: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: @@ -2235,6 +2234,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_BIND: case OMP_CLAUSE_DEVICE_RESIDENT: case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_TILE: case OMP_CLAUSE__CACHE_: default: gcc_unreachable (); @@ -2392,7 +2392,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: - case OMP_CLAUSE_TILE: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: @@ -2403,6 +2402,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_BIND: case OMP_CLAUSE_DEVICE_RESIDENT: case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_TILE: case OMP_CLAUSE__CACHE_: default: gcc_unreachable (); diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp index eef9425..031774d 100644 --- gcc/testsuite/ChangeLog.gomp +++ gcc/testsuite/ChangeLog.gomp @@ -1,5 +1,27 @@ 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + PR middle-end/71373 + Backport from trunk r237291: + 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + Cesar Philippidis <cesar@codesourcery.com> + + * gcc.dg/goacc/nested-function-1.c: New file. + * gcc.dg/goacc/nested-function-2.c: Likewise. + * gcc.dg/goacc/pr71373.c: Likewise. + * gfortran.dg/goacc/cray-2.f95: Likewise. + * gfortran.dg/goacc/loop-1-2.f95: Likewise. + * gfortran.dg/goacc/loop-3-2.f95: Likewise. + * gfortran.dg/goacc/cray.f95: Update. + * gfortran.dg/goacc/loop-1.f95: Likewise. + * gfortran.dg/goacc/loop-3.f95: Likewise. + * gfortran.dg/goacc/subroutines.f90: Update, and rename to... + * gfortran.dg/goacc/nested-function-1.f90: ... this new file. + + Backport from trunk r237291: + * c-c++-common/goacc/combined-directives.c: XFAIL tree scanning + for OpenACC tile clauses. + * gfortran.dg/goacc/combined-directives.f90: Likewise. + PR c/71381 Backport from trunk r237290: * c-c++-common/goacc/cache-1.c: Update. Move invalid usage tests diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c index 2ef5b53..2342c9f 100644 --- gcc/testsuite/c-c++-common/goacc/combined-directives.c +++ gcc/testsuite/c-c++-common/goacc/combined-directives.c @@ -114,6 +114,7 @@ test () // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } } // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } } // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } } +// XFAILed: OpenACC tile clauses are discarded during gimplification. +// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } } // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } } // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } } diff --git gcc/testsuite/gcc.dg/goacc/nested-function-1.c gcc/testsuite/gcc.dg/goacc/nested-function-1.c new file mode 100644 index 0000000..e17c0e2 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc/nested-function-1.c @@ -0,0 +1,100 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ +/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran + version. */ + +int main () +{ +#define N 100 + int nonlocal_arg; + int nonlocal_a[N]; + int nonlocal_i; + int nonlocal_j; + + for (int i = 0; i < N; ++i) + nonlocal_a[i] = 5; + nonlocal_arg = 5; + + void local () + { + int local_i; + int local_arg; + int local_a[N]; + int local_j; + + for (int i = 0; i < N; ++i) + local_a[i] = 5; + local_arg = 5; + +#pragma acc kernels loop \ + gang(num:local_arg) worker(local_arg) vector(local_arg) \ + wait async(local_arg) + for (local_i = 0; local_i < N; ++local_i) + { +#pragma acc cache (local_a[local_i:5]) + local_a[local_i] = 100; +#pragma acc loop seq tile(*) + for (local_j = 0; local_j < N; ++local_j) + ; +#pragma acc loop auto independent tile(1) + for (local_j = 0; local_j < N; ++local_j) + ; + } + +#pragma acc kernels loop \ + gang(static:local_arg) worker(local_arg) vector(local_arg) \ + wait(local_arg, local_arg + 1, local_arg + 2) async + for (local_i = 0; local_i < N; ++local_i) + { +#pragma acc cache (local_a[local_i:4]) + local_a[local_i] = 100; +#pragma acc loop seq tile(1) + for (local_j = 0; local_j < N; ++local_j) + ; +#pragma acc loop auto independent tile(*) + for (local_j = 0; local_j < N; ++local_j) + ; + } + } + + void nonlocal () + { + for (int i = 0; i < N; ++i) + nonlocal_a[i] = 5; + nonlocal_arg = 5; + +#pragma acc kernels loop \ + gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \ + wait async(nonlocal_arg) + for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i) + { +#pragma acc cache (nonlocal_a[nonlocal_i:3]) + nonlocal_a[nonlocal_i] = 100; +#pragma acc loop seq tile(2) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; +#pragma acc loop auto independent tile(3) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; + } + +#pragma acc kernels loop \ + gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \ + wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async + for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i) + { +#pragma acc cache (nonlocal_a[nonlocal_i:2]) + nonlocal_a[nonlocal_i] = 100; +#pragma acc loop seq tile(*) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; +#pragma acc loop auto independent tile(*) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; + } + } + + local (); + nonlocal (); + + return 0; +} diff --git gcc/testsuite/gcc.dg/goacc/nested-function-2.c gcc/testsuite/gcc.dg/goacc/nested-function-2.c new file mode 100644 index 0000000..70c9ec8 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c @@ -0,0 +1,45 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + int j = 0, k = 6, l = 7, m = 8; + void simple (void) + { + int i; +#pragma acc parallel + { +#pragma acc loop + for (i = 0; i < m; i+= k) + j = (m + i - j) * l; + } + } + void collapse (void) + { + int x, y, z; +#pragma acc parallel + { +#pragma acc loop collapse (3) + for (x = 0; x < k; x++) + for (y = -5; y < l; y++) + for (z = 0; z < m; z++) + j += x + y + z; + } + } + void reduction (void) + { + int x, y, z; +#pragma acc parallel reduction (+:j) + { +#pragma acc loop reduction (+:j) collapse (3) + for (x = 0; x < k; x++) + for (y = -5; y < l; y++) + for (z = 0; z < m; z++) + j += x + y + z; + } + } + simple(); + collapse(); + reduction(); + return 0; +} diff --git gcc/testsuite/gcc.dg/goacc/pr71373.c gcc/testsuite/gcc.dg/goacc/pr71373.c new file mode 100644 index 0000000..9381752 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc/pr71373.c @@ -0,0 +1,41 @@ +/* Unintentional nested function usage. */ +/* Due to missing right braces '}', the following functions are parsed as + nested functions. This ran into an ICE. */ + +void foo (void) +{ + #pragma acc parallel + { + #pragma acc loop independent + for (int i = 0; i < 16; i++) + ; + // Note right brace '}' commented out here. + //} +} +void bar (void) +{ +} + +// Adding right brace '}' here, to make this compile. +} + + +// ..., and the other way round: + +void BAR (void) +{ +// Note right brace '}' commented out here. +//} + +void FOO (void) +{ + #pragma acc parallel + { + #pragma acc loop independent + for (int i = 0; i < 16; i++) + ; + } +} + +// Adding right brace '}' here, to make this compile. +} diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 index 00b8822..ffa9371 100644 --- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 +++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 @@ -162,6 +162,7 @@ end subroutine test ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" { xfail *-*-* } } } ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" { xfail *-*-* } } } ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" { xfail *-*-* } } } +! XFAILed: OpenACC tile clauses are discarded during gimplification. ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } } ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" { xfail *-*-* } } } ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } } diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray-2.f95 similarity index 91% copy from gcc/testsuite/gfortran.dg/goacc/cray.f95 copy to gcc/testsuite/gfortran.dg/goacc/cray-2.f95 index 705c18c..51b79b5 100644 --- gcc/testsuite/gfortran.dg/goacc/cray.f95 +++ gcc/testsuite/gfortran.dg/goacc/cray-2.f95 @@ -1,15 +1,16 @@ -! { dg-do compile } ! { dg-additional-options "-fcray-pointer" } +! See also cray.f95. -module test +program test + call oacc1 contains subroutine oacc1 implicit none integer :: i real :: pointee pointer (ptr, pointee) - !$acc declare device_resident (pointee) - !$acc declare device_resident (ptr) + !$acc declare device_resident (pointee) + !$acc declare device_resident (ptr) !$acc data copy (pointee) ! { dg-error "Cray pointee" } !$acc end data !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" } @@ -52,4 +53,4 @@ contains !$acc update host (ptr) !$acc update self (ptr) end subroutine oacc1 -end module test +end program test diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray.f95 index 705c18c..d6d5317 100644 --- gcc/testsuite/gfortran.dg/goacc/cray.f95 +++ gcc/testsuite/gfortran.dg/goacc/cray.f95 @@ -1,5 +1,5 @@ -! { dg-do compile } ! { dg-additional-options "-fcray-pointer" } +! See also cray-2.f95. module test contains @@ -8,8 +8,8 @@ contains integer :: i real :: pointee pointer (ptr, pointee) - !$acc declare device_resident (pointee) - !$acc declare device_resident (ptr) + !$acc declare device_resident (pointee) + !$acc declare device_resident (ptr) !$acc data copy (pointee) ! { dg-error "Cray pointee" } !$acc end data !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" } diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 similarity index 89% copy from gcc/testsuite/gfortran.dg/goacc/loop-1.f95 copy to gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 index a605f03..79665b9 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-1.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 @@ -1,8 +1,10 @@ -module test - implicit none +! See also loop-1.f95. + +program test + call test1 contains -subroutine test1 +subroutine test1 integer :: i, j, k, b(10) integer, dimension (30) :: a double precision :: d @@ -30,15 +32,15 @@ subroutine test1 do 300 d = 1, 30, 6 i = d 300 a(i) = 1 - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 } !$acc loop do d = 1, 30, 5 i = d a(i) = 2 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 } !$acc loop do i = 1, 30 if (i .eq. 16) exit ! { dg-error "EXIT statement" } @@ -53,7 +55,7 @@ subroutine test1 end do last ! different types of loop are allowed - !$acc loop + !$acc loop do i = 1,10 end do !$acc loop @@ -65,8 +67,8 @@ subroutine test1 a(1) = 1 ! { dg-error "Expected DO loop" } do i = 1,10 enddo - - ! combined directives may be used with/without end + + ! combined directives may be used with/without end !$acc parallel loop do i = 1,10 enddo @@ -82,11 +84,11 @@ subroutine test1 enddo !$acc end kernels loop - !$acc kernels loop reduction(max:i) + !$acc kernels loop reduction(max:i) do i = 1,10 enddo - !$acc kernels - !$acc loop reduction(max:i) + !$acc kernels + !$acc loop reduction(max:i) do i = 1,10 enddo !$acc end kernels @@ -118,7 +120,7 @@ subroutine test1 end do !$acc parallel loop collapse(2) do i = 1, 3 - do j = 4, 6 + do j = 4, 6 end do end do !$acc parallel loop collapse(2) @@ -148,8 +150,8 @@ subroutine test1 do i = 1, 3 do r = 4, 6 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 } end do ! Both seq and independent are not allowed @@ -171,4 +173,4 @@ subroutine test1 enddo end subroutine test1 -end module test +end program test diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1.f95 index a605f03..5f81b7a 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-1.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95 @@ -1,8 +1,10 @@ +! See also loop-1-2.f95. + module test implicit none contains -subroutine test1 +subroutine test1 integer :: i, j, k, b(10) integer, dimension (30) :: a double precision :: d @@ -30,15 +32,15 @@ subroutine test1 do 300 d = 1, 30, 6 i = d 300 a(i) = 1 - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 } !$acc loop do d = 1, 30, 5 i = d a(i) = 2 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 } !$acc loop do i = 1, 30 if (i .eq. 16) exit ! { dg-error "EXIT statement" } @@ -53,7 +55,7 @@ subroutine test1 end do last ! different types of loop are allowed - !$acc loop + !$acc loop do i = 1,10 end do !$acc loop @@ -65,8 +67,8 @@ subroutine test1 a(1) = 1 ! { dg-error "Expected DO loop" } do i = 1,10 enddo - - ! combined directives may be used with/without end + + ! combined directives may be used with/without end !$acc parallel loop do i = 1,10 enddo @@ -82,11 +84,11 @@ subroutine test1 enddo !$acc end kernels loop - !$acc kernels loop reduction(max:i) + !$acc kernels loop reduction(max:i) do i = 1,10 enddo - !$acc kernels - !$acc loop reduction(max:i) + !$acc kernels + !$acc loop reduction(max:i) do i = 1,10 enddo !$acc end kernels @@ -118,7 +120,7 @@ subroutine test1 end do !$acc parallel loop collapse(2) do i = 1, 3 - do j = 4, 6 + do j = 4, 6 end do end do !$acc parallel loop collapse(2) @@ -148,8 +150,8 @@ subroutine test1 do i = 1, 3 do r = 4, 6 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 } end do ! Both seq and independent are not allowed diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 similarity index 90% copy from gcc/testsuite/gfortran.dg/goacc/loop-3.f95 copy to gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 index 2a866c7..9be74a8 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-3.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 @@ -1,10 +1,13 @@ -! { dg-do compile } ! { dg-additional-options "-std=f2008" } +! See also loop-3.f95. +program test + call test1 +contains subroutine test1 implicit none integer :: i, j - + ! !$acc end loop not required by spec !$acc loop do i = 1,5 @@ -23,7 +26,7 @@ subroutine test1 enddo !$acc end parallel !$acc end loop ! { dg-error "Unexpected" } - + ! OpenACC supports Fortran 2008 do concurrent statement !$acc loop do concurrent (i = 1:5) @@ -35,7 +38,7 @@ subroutine test1 if (i .eq. j) cycle outer_loop if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" } end do inner_loop - end do outer_loop + end do outer_loop outer_loop1: do i = 1, 5 !$acc loop @@ -50,6 +53,6 @@ subroutine test1 if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" } if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" } end do inner_loop2 - end do outer_loop2 + end do outer_loop2 end subroutine test1 - +end program test diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3.f95 index 2a866c7..30930f4 100644 --- gcc/testsuite/gfortran.dg/goacc/loop-3.f95 +++ gcc/testsuite/gfortran.dg/goacc/loop-3.f95 @@ -1,10 +1,10 @@ -! { dg-do compile } ! { dg-additional-options "-std=f2008" } +! See also loop-3-2.f95. subroutine test1 implicit none integer :: i, j - + ! !$acc end loop not required by spec !$acc loop do i = 1,5 @@ -23,7 +23,7 @@ subroutine test1 enddo !$acc end parallel !$acc end loop ! { dg-error "Unexpected" } - + ! OpenACC supports Fortran 2008 do concurrent statement !$acc loop do concurrent (i = 1:5) @@ -35,7 +35,7 @@ subroutine test1 if (i .eq. j) cycle outer_loop if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" } end do inner_loop - end do outer_loop + end do outer_loop outer_loop1: do i = 1, 5 !$acc loop @@ -50,6 +50,5 @@ subroutine test1 if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" } if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" } end do inner_loop2 - end do outer_loop2 + end do outer_loop2 end subroutine test1 - diff --git gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 new file mode 100644 index 0000000..2fcaa40 --- /dev/null +++ gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 @@ -0,0 +1,93 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. +! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version. + +program main + integer, parameter :: N = 100 + integer :: nonlocal_arg + integer :: nonlocal_a(N) + integer :: nonlocal_i + integer :: nonlocal_j + + nonlocal_a (:) = 5 + nonlocal_arg = 5 + + call local () + call nonlocal () + +contains + + subroutine local () + integer :: local_i + integer :: local_arg + integer :: local_a(N) + integer :: local_j + + local_a (:) = 5 + local_arg = 5 + + !$acc kernels loop & + !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) & + !$acc wait async(local_arg) + do local_i = 1, N + !$acc cache (local_a(local_i:local_i + 5)) + local_a(local_i) = 100 + !$acc loop seq tile(*) + do local_j = 1, N + enddo + !$acc loop auto independent tile(1) + do local_j = 1, N + enddo + enddo + !$acc end kernels loop + + !$acc kernels loop & + !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) & + !$acc wait(local_arg, local_arg + 1, local_arg + 2) async + do local_i = 1, N + !$acc cache (local_a(local_i:local_i + 4)) + local_a(local_i) = 100 + !$acc loop seq tile(1) + do local_j = 1, N + enddo + !$acc loop auto independent tile(*) + do local_j = 1, N + enddo + enddo + !$acc end kernels loop + end subroutine local + + subroutine nonlocal () + nonlocal_a (:) = 5 + nonlocal_arg = 5 + + !$acc kernels loop & + !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) & + !$acc wait async(nonlocal_arg) + do nonlocal_i = 1, N + !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3)) + nonlocal_a(nonlocal_i) = 100 + !$acc loop seq tile(2) + do nonlocal_j = 1, N + enddo + !$acc loop auto independent tile(3) + do nonlocal_j = 1, N + enddo + enddo + !$acc end kernels loop + + !$acc kernels loop & + !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) & + !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async + do nonlocal_i = 1, N + !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2)) + nonlocal_a(nonlocal_i) = 100 + !$acc loop seq tile(*) + do nonlocal_j = 1, N + enddo + !$acc loop auto independent tile(*) + do nonlocal_j = 1, N + enddo + enddo + !$acc end kernels loop + end subroutine nonlocal +end program main diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90 deleted file mode 100644 index 6cab798..0000000 --- gcc/testsuite/gfortran.dg/goacc/subroutines.f90 +++ /dev/null @@ -1,73 +0,0 @@ -! Exercise how tree-nested.c handles gang, worker vector and seq. - -! { dg-do compile } - -program main - integer, parameter :: N = 100 - integer :: nonlocal_arg - integer :: nonlocal_a(N) - integer :: nonlocal_i - integer :: nonlocal_j - - nonlocal_a (:) = 5 - nonlocal_arg = 5 - - call local () - call nonlocal () - -contains - - subroutine local () - integer :: local_i - integer :: local_arg - integer :: local_a(N) - integer :: local_j - - local_a (:) = 5 - local_arg = 5 - - !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg) - do local_i = 1, N - local_a(local_i) = 100 - !$acc loop seq - do local_j = 1, N - enddo - enddo - !$acc end kernels loop - - !$acc kernels loop gang(static:local_arg) worker(local_arg) & - !$acc vector(local_arg) - do local_i = 1, N - local_a(local_i) = 100 - !$acc loop seq - do local_j = 1, N - enddo - enddo - !$acc end kernels loop - end subroutine local - - subroutine nonlocal () - nonlocal_a (:) = 5 - nonlocal_arg = 5 - - !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) & - !$acc vector(nonlocal_arg) - do nonlocal_i = 1, N - nonlocal_a(nonlocal_i) = 100 - !$acc loop seq - do nonlocal_j = 1, N - enddo - enddo - !$acc end kernels loop - - !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) & - !$acc vector(nonlocal_arg) - do nonlocal_i = 1, N - nonlocal_a(nonlocal_i) = 100 - !$acc loop seq - do nonlocal_j = 1, N - enddo - enddo - !$acc end kernels loop - end subroutine nonlocal -end program main diff --git gcc/tree-nested.c gcc/tree-nested.c index 1433c3e..984fa81 100644 --- gcc/tree-nested.c +++ gcc/tree-nested.c @@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: /* Several OpenACC clauses have optional arguments. Check if they are present. */ if (OMP_CLAUSE_OPERAND (clause, 0)) @@ -1197,9 +1199,27 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: break; + case OMP_CLAUSE_DEVICE_TYPE: + /* TODO. */ + gcc_unreachable (); + + case OMP_CLAUSE_TILE: + /* OpenACC tile clauses are discarded during gimplification, so we + don't expect to see anything here. */ + gcc_unreachable (); + + case OMP_CLAUSE__CACHE_: + /* These clauses belong to the OpenACC cache directive, which is + discarded during gimplification, so we don't expect to see + anything here. */ + gcc_unreachable (); + case OMP_CLAUSE_BIND: + case OMP_CLAUSE_DEVICE_RESIDENT: case OMP_CLAUSE_NOHOST: default: gcc_unreachable (); @@ -1792,6 +1812,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: /* Several OpenACC clauses have optional arguments. Check if they are present. */ if (OMP_CLAUSE_OPERAND (clause, 0)) @@ -1880,9 +1902,27 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: break; + case OMP_CLAUSE_DEVICE_TYPE: + /* TODO. */ + gcc_unreachable (); + + case OMP_CLAUSE_TILE: + /* OpenACC tile clauses are discarded during gimplification, so we + don't expect to see anything here. */ + gcc_unreachable (); + + case OMP_CLAUSE__CACHE_: + /* These clauses belong to the OpenACC cache directive, which is + discarded during gimplification, so we don't expect to see + anything here. */ + gcc_unreachable (); + case OMP_CLAUSE_BIND: + case OMP_CLAUSE_DEVICE_RESIDENT: case OMP_CLAUSE_NOHOST: default: gcc_unreachable (); diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp index 17c26c5..af4e0d5 100644 --- libgomp/ChangeLog.gomp +++ libgomp/ChangeLog.gomp @@ -1,5 +1,16 @@ 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + PR middle-end/71373 + Backport from trunk r237291: + 2016-06-10 Thomas Schwinge <thomas@codesourcery.com> + Cesar Philippidis <cesar@codesourcery.com> + + * libgomp.oacc-c/nested-function-1.c: New file. + * libgomp.oacc-c/nested-function-2.c: Likewise. + * libgomp.oacc-fortran/nested-function-1.f90: Likewise. + * libgomp.oacc-fortran/nested-function-2.f90: Likewise. + * libgomp.oacc-fortran/nested-function-3.f90: Likewise. + PR c/71381 * testsuite/libgomp.oacc-fortran/cache-1.f90: Remove file. diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-1.c libgomp/testsuite/libgomp.oacc-c/nested-function-1.c new file mode 100644 index 0000000..fb2a3ac --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c/nested-function-1.c @@ -0,0 +1,52 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + void test1 () + { + int i, j, k; + int a[4][7][8]; + + __builtin_memset (a, 0, sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(4 - 1) + for (i = 1; i <= 3; i++) + for (j = 4; j <= 6; j++) + for (k = 5; k <= 7; k++) + a[i][j][k] = i + j + k; + + for (i = 1; i <= 3; i++) + for (j = 4; j <= 6; j++) + for (k = 5; k <= 7; k++) + if (a[i][j][k] != i + j + k) + __builtin_abort(); + } + + void test2 () + { + int i, j, k; + int a[4][4][4]; + + __builtin_memset (a, 0, sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(3) + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 1; k <= 3; k++) + a[i][j][k] = 1; + + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 1; k <= 3; k++) + if (a[i][j][k] != 1) + __builtin_abort (); + } + + test1 (); + test2 (); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-2.c libgomp/testsuite/libgomp.oacc-c/nested-function-2.c new file mode 100644 index 0000000..2c3f3fe --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c/nested-function-2.c @@ -0,0 +1,155 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1; + + void test1 () + { + int i, j, k; + int a[4][4][4]; + + __builtin_memset (a, '\0', sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(3) + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 2; k <= 3; k++) + a[i][j][k] = 1; + + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 2; k <= 3; k++) + if (a[i][j][k] != 1) + __builtin_abort(); + } + + void test2 (int v1, int v2, int v3, int v4, int v5, int v6) + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8, + int v9) + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + void test4 () + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8, + v9 = p9; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + test1 (); + test2 (p1, p2, p3, p4, p5, p6); + test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9); + test4 (); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 new file mode 100644 index 0000000..fdbca44 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 @@ -0,0 +1,70 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program collapse2 + call test1 + call test2 +contains + subroutine test1 + integer :: i, j, k, a(1:3, 4:6, 5:7) + logical :: l + l = .false. + a(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop worker vector collapse(4 - 1) + do 164 i = 1, 3 + do 164 j = 4, 6 + do 164 k = 5, 7 + a(i, j, k) = i + j + k +164 end do + !$acc loop worker vector reduction(.or.:l) collapse(2) +firstdo: do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + if (a(i, j, k) .ne. (i + j + k)) l = .true. + end do + end do + end do firstdo + !$acc end parallel + if (l) call abort + end subroutine test1 + + subroutine test2 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + a = 0 + !$acc parallel + ! Use "gang(static:1)" here and below to effectively turn gang-redundant + ! execution mode into something like gang-single. + !$acc loop gang(static:1) collapse(1) + do 115 k=1,3 + !$acc loop collapse(2) + dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc loop gang(static:1) collapse(1) + do k=1,3 + if (any(a(k,1:3,1:3).ne.1)) call abort + enddo + ! Use "gang(static:1)" here and below to effectively turn gang-redundant + ! execution mode into something like gang-single. + !$acc loop gang(static:1) collapse(1) + dol: do 120 l=1,3 + !$acc loop collapse(2) + doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc loop gang(static:1) collapse(1) + do l=1,3 + if (any(a(l,1:3,1:3).ne.2)) call abort + enddo + !$acc end parallel + end subroutine test2 + +end program collapse2 diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 new file mode 100644 index 0000000..4e28196 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 @@ -0,0 +1,173 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program collapse3 + integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9 + p1 = 2 + p2 = 6 + p3 = -2 + p4 = 4 + p5 = 13 + p6 = 18 + p7 = 1 + p8 = 1 + p9 = 1 + call test1 + call test2 (p1, p2, p3, p4, p5, p6) + call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9) + call test4 +contains + subroutine test1 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + !$acc parallel + !$acc loop collapse(3) + do 115 k=1,3 +dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.1)) call abort + !$acc parallel + !$acc loop collapse(3) +dol: do 120 l=1,3 +doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.2)) call abort + end subroutine test1 + + subroutine test2(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test2 + + subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test3 + + subroutine test4 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = p1 + v2 = p2 + v3 = p3 + v4 = p4 + v5 = p5 + v6 = p6 + v7 = p7 + v8 = p8 + v9 = p9 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test4 + +end program collapse3 diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 new file mode 100644 index 0000000..2f6485e --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 @@ -0,0 +1,244 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program sub_collapse_3 + call test1 + call test2 (2, 6, -2, 4, 13, 18) + call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1) + call test4 + call test5 (2, 6, -2, 4, 13, 18) + call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1) +contains + subroutine test1 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + !$acc parallel + !$acc loop collapse(3) + do 115 k=1,3 +dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.1)) call abort + !$acc parallel + !$acc loop collapse(3) +dol: do 120 l=1,3 +doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.2)) call abort + end subroutine test1 + + subroutine test2(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test2 + + subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test3 + + subroutine test4 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = 2 + v2 = 6 + v3 = -2 + v4 = 4 + v5 = 13 + v6 = 18 + v7 = 1 + v8 = 1 + v9 = 1 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test4 + + subroutine test5(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test5 + + subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + m = i * 100 + j * 10 + k + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test6 + +end program sub_collapse_3 GrÃÃe Thomas
Attachment:
signature.asc
Description: PGP signature
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |