[PATCH, 15/16] Add libgomp.oacc-c-c++-common/kernels-*.c

Tom de Vries Tom_deVries@mentor.com
Wed Mar 9 09:18:00 GMT 2016


On 09/11/15 21:10, Tom de Vries wrote:
> On 09/11/15 16:35, Tom de Vries wrote:
>> Hi,
>>
>> this patch series for stage1 trunk adds support to:
>> - parallelize oacc kernels regions using parloops, and
>> - map the loops onto the oacc gang dimension.
>>
>> The patch series contains these patches:
>>
>>       1    Insert new exit block only when needed in
>>          transform_to_exit_first_loop_alt
>>       2    Make create_parallel_loop return void
>>       3    Ignore reduction clause on kernels directive
>>       4    Implement -foffload-alias
>>       5    Add in_oacc_kernels_region in struct loop
>>       6    Add pass_oacc_kernels
>>       7    Add pass_dominator_oacc_kernels
>>       8    Add pass_ch_oacc_kernels
>>       9    Add pass_parallelize_loops_oacc_kernels
>>      10    Add pass_oacc_kernels pass group in passes.def
>>      11    Update testcases after adding kernels pass group
>>      12    Handle acc loop directive
>>      13    Add c-c++-common/goacc/kernels-*.c
>>      14    Add gfortran.dg/goacc/kernels-*.f95
>>      15    Add libgomp.oacc-c-c++-common/kernels-*.c
>>      16    Add libgomp.oacc-fortran/kernels-*.f95
>>
>> The first 9 patches are more or less independent, but patches 10-16 are
>> intended to be committed at the same time.
>>
>> Bootstrapped and reg-tested on x86_64.
>>
>> Build and reg-tested with nvidia accelerator, in combination with a
>> patch that enables accelerator testing (which is submitted at
>> https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).
>>
>> I'll post the individual patches in reply to this message.
>
> This patch adds C/C++ oacc kernels execution tests.
>

Retested on current trunk.

Committed, minus the kernels-parallel-loop-data-enter-exit.f95 test.

Thanks,
- Tom

> 0015-Add-libgomp.oacc-c-c-common-kernels-.c.patch
>
>
> Add libgomp.oacc-c-c++-common/kernels-*.c
>
> 2015-11-09  Tom de Vries  <tom@codesourcery.com>
>
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: New test.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c:
> 	Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c:
> 	Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-loop.c: Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c:
> 	Same.
> 	* testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c: Same.
> ---
>   .../libgomp.oacc-c-c++-common/kernels-loop-2.c     | 47 ++++++++++++++++++
>   .../libgomp.oacc-c-c++-common/kernels-loop-3.c     | 34 +++++++++++++
>   .../kernels-loop-and-seq-2.c                       | 36 ++++++++++++++
>   .../kernels-loop-and-seq-3.c                       | 37 ++++++++++++++
>   .../kernels-loop-and-seq-4.c                       | 36 ++++++++++++++
>   .../kernels-loop-and-seq-5.c                       | 37 ++++++++++++++
>   .../kernels-loop-and-seq-6.c                       | 36 ++++++++++++++
>   .../kernels-loop-and-seq.c                         | 37 ++++++++++++++
>   .../kernels-loop-collapse.c                        | 40 ++++++++++++++++
>   .../kernels-loop-data-2.c                          | 56 ++++++++++++++++++++++
>   .../kernels-loop-data-enter-exit-2.c               | 54 +++++++++++++++++++++
>   .../kernels-loop-data-enter-exit.c                 | 51 ++++++++++++++++++++
>   .../kernels-loop-data-update.c                     | 53 ++++++++++++++++++++
>   .../libgomp.oacc-c-c++-common/kernels-loop-data.c  | 50 +++++++++++++++++++
>   .../libgomp.oacc-c-c++-common/kernels-loop-g.c     |  5 ++
>   .../kernels-loop-mod-not-zero.c                    | 41 ++++++++++++++++
>   .../libgomp.oacc-c-c++-common/kernels-loop-n.c     | 47 ++++++++++++++++++
>   .../libgomp.oacc-c-c++-common/kernels-loop-nest.c  | 26 ++++++++++
>   .../libgomp.oacc-c-c++-common/kernels-loop.c       | 41 ++++++++++++++++
>   .../kernels-parallel-loop-data-enter-exit.c        | 52 ++++++++++++++++++++
>   .../libgomp.oacc-c-c++-common/kernels-reduction.c  | 37 ++++++++++++++
>   21 files changed, 853 insertions(+)
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c
>   create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c
>
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c
> new file mode 100644
> index 0000000..13e57bd
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N (1024 * 512)
> +#define COUNTERTYPE unsigned int
> +
> +int
> +main (void)
> +{
> +  unsigned int *__restrict a;
> +  unsigned int *__restrict b;
> +  unsigned int *__restrict c;
> +
> +  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +
> +#pragma acc kernels copyout (a[0:N])
> +  {
> +    for (COUNTERTYPE i = 0; i < N; i++)
> +      a[i] = i * 2;
> +  }
> +
> +#pragma acc kernels copyout (b[0:N])
> +  {
> +    for (COUNTERTYPE i = 0; i < N; i++)
> +      b[i] = i * 4;
> +  }
> +
> +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
> +  {
> +    for (COUNTERTYPE ii = 0; ii < N; ii++)
> +      c[ii] = a[ii] + b[ii];
> +  }
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    if (c[i] != a[i] + b[i])
> +      abort ();
> +
> +  free (a);
> +  free (b);
> +  free (c);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c
> new file mode 100644
> index 0000000..f61a74a
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c
> @@ -0,0 +1,34 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N (1024 * 512)
> +#define COUNTERTYPE unsigned int
> +
> +int
> +main (void)
> +{
> +  unsigned int i;
> +
> +  unsigned int *__restrict c;
> +
> +  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    c[i] = i * 2;
> +
> +#pragma acc kernels copy (c[0:N])
> +  {
> +    for (COUNTERTYPE ii = 0; ii < N; ii++)
> +      c[ii] = c[ii] + ii + 1;
> +  }
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    if (c[i] != i * 2 + i + 1)
> +      abort ();
> +
> +  free (c);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c
> new file mode 100644
> index 0000000..2e4100f
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N 32
> +
> +unsigned int
> +foo (int n, unsigned int *a)
> +{
> +#pragma acc kernels copy (a[0:N])
> +  {
> +    a[0] = a[0] + 1;
> +
> +    for (int i = 0; i < n; i++)
> +      a[i] = 1;
> +  }
> +
> +  return a[0];
> +}
> +
> +int
> +main (void)
> +{
> +  unsigned int a[N];
> +  unsigned res, i;
> +
> +  for (i = 0; i < N; ++i)
> +    a[i] = i % 4;
> +
> +  res = foo (N, a);
> +  if (res != 1)
> +    abort ();
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c
> new file mode 100644
> index 0000000..b3e736b
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c
> @@ -0,0 +1,37 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N 32
> +
> +unsigned int
> +foo (int n, unsigned int *a)
> +{
> +
> +#pragma acc kernels copy (a[0:N])
> +  {
> +    for (int i = 0; i < n; i++)
> +      a[i] = 1;
> +
> +    a[0] = 2;
> +  }
> +
> +  return a[0];
> +}
> +
> +int
> +main (void)
> +{
> +  unsigned int a[N];
> +  unsigned res, i;
> +
> +  for (i = 0; i < N; ++i)
> +    a[i] = i % 4;
> +
> +  res = foo (N, a);
> +  if (res != 2)
> +    abort ();
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c
> new file mode 100644
> index 0000000..8b9affa
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N 32
> +
> +unsigned int
> +foo (int n, unsigned int *a)
> +{
> +#pragma acc kernels copy (a[0:N])
> +  {
> +    a[0] = 2;
> +
> +    for (int i = 0; i < n; i++)
> +      a[i] = 1;
> +  }
> +
> +  return a[0];
> +}
> +
> +int
> +main (void)
> +{
> +  unsigned int a[N];
> +  unsigned res, i;
> +
> +  for (i = 0; i < N; ++i)
> +    a[i] = i % 4;
> +
> +  res = foo (N, a);
> +  if (res != 1)
> +    abort ();
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c
> new file mode 100644
> index 0000000..83d4e7f
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c
> @@ -0,0 +1,37 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N 32
> +
> +unsigned int
> +foo (int n, unsigned int *a)
> +{
> +  int r;
> +#pragma acc kernels copyout(r) copy (a[0:N])
> +  {
> +    r = a[0];
> +
> +    for (int i = 0; i < n; i++)
> +      a[i] = 1;
> +  }
> +
> +  return r;
> +}
> +
> +int
> +main (void)
> +{
> +  unsigned int a[N];
> +  unsigned res, i;
> +
> +  for (i = 0; i < N; ++i)
> +    a[i] = i % 4;
> +
> +  res = foo (N, a);
> +  if (res != 0)
> +    abort ();
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c
> new file mode 100644
> index 0000000..01d5e5e
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N 32
> +
> +unsigned int
> +foo (int n, unsigned int *a)
> +{
> +#pragma acc kernels copy (a[0:N])
> +  {
> +    int r = a[0];
> +
> +    for (int i = 0; i < n; i++)
> +      a[i] = 1 + r;
> +  }
> +
> +  return a[0];
> +}
> +
> +int
> +main (void)
> +{
> +  unsigned int a[N];
> +  unsigned res, i;
> +
> +  for (i = 0; i < N; ++i)
> +    a[i] = i % 4;
> +
> +  res = foo (N, a);
> +  if (res != 1)
> +    abort ();
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c
> new file mode 100644
> index 0000000..61d1283
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c
> @@ -0,0 +1,37 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N 32
> +
> +unsigned int
> +foo (int n, unsigned int *a)
> +{
> +
> +#pragma acc kernels copy (a[0:N])
> +  {
> +    for (int i = 0; i < n; i++)
> +      a[i] = 1;
> +
> +    a[0] = a[0] + 1;
> +  }
> +
> +  return a[0];
> +}
> +
> +int
> +main (void)
> +{
> +  unsigned int a[N];
> +  unsigned res, i;
> +
> +  for (i = 0; i < N; ++i)
> +    a[i] = i % 4;
> +
> +  res = foo (N, a);
> +  if (res != 2)
> +    abort ();
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c
> new file mode 100644
> index 0000000..f7f04cb
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c
> @@ -0,0 +1,40 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N 100
> +
> +int a[N][N];
> +
> +void __attribute__((noinline, noclone))
> +foo (int m, int n)
> +{
> +  int i, j;
> +  #pragma acc kernels
> +  {
> +#pragma acc loop collapse(2)
> +    for (i = 0; i < m; i++)
> +      for (j = 0; j < n; j++)
> +	a[i][j] = 1;
> +  }
> +}
> +
> +int
> +main (void)
> +{
> +  int i, j;
> +
> +  for (i = 0; i < N; i++)
> +    for (j = 0; j < N; j++)
> +      a[i][j] = 0;
> +
> +  foo (N, N);
> +
> +  for (i = 0; i < N; i++)
> +    for (j = 0; j < N; j++)
> +      if (a[i][j] != 1)
> +	abort ();
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c
> new file mode 100644
> index 0000000..b889ef9
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c
> @@ -0,0 +1,56 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N (1024 * 512)
> +#define COUNTERTYPE unsigned int
> +
> +int
> +main (void)
> +{
> +  unsigned int *__restrict a;
> +  unsigned int *__restrict b;
> +  unsigned int *__restrict c;
> +
> +  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +
> +#pragma acc data copyout (a[0:N])
> +  {
> +#pragma acc kernels present (a[0:N])
> +    {
> +      for (COUNTERTYPE i = 0; i < N; i++)
> +	a[i] = i * 2;
> +    }
> +  }
> +
> +#pragma acc data copyout (b[0:N])
> +  {
> +#pragma acc kernels present (b[0:N])
> +    {
> +      for (COUNTERTYPE i = 0; i < N; i++)
> +	b[i] = i * 4;
> +    }
> +  }
> +
> +#pragma acc data copyin (a[0:N], b[0:N]) copyout (c[0:N])
> +  {
> +#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
> +    {
> +      for (COUNTERTYPE ii = 0; ii < N; ii++)
> +	c[ii] = a[ii] + b[ii];
> +    }
> +  }
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    if (c[i] != a[i] + b[i])
> +      abort ();
> +
> +  free (a);
> +  free (b);
> +  free (c);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c
> new file mode 100644
> index 0000000..d508a44
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c
> @@ -0,0 +1,54 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N (1024 * 512)
> +#define COUNTERTYPE unsigned int
> +
> +int
> +main (void)
> +{
> +  unsigned int *__restrict a;
> +  unsigned int *__restrict b;
> +  unsigned int *__restrict c;
> +
> +  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +
> +#pragma acc enter data create (a[0:N])
> +#pragma acc kernels present (a[0:N])
> +  {
> +    for (COUNTERTYPE i = 0; i < N; i++)
> +      a[i] = i * 2;
> +  }
> +#pragma acc exit data copyout (a[0:N])
> +
> +#pragma acc enter data create (b[0:N])
> +#pragma acc kernels present (b[0:N])
> +  {
> +    for (COUNTERTYPE i = 0; i < N; i++)
> +      b[i] = i * 4;
> +  }
> +#pragma acc exit data copyout (b[0:N])
> +
> +
> +#pragma acc enter data copyin (a[0:N], b[0:N]) create (c[0:N])
> +#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
> +  {
> +    for (COUNTERTYPE ii = 0; ii < N; ii++)
> +      c[ii] = a[ii] + b[ii];
> +  }
> +#pragma acc exit data copyout (c[0:N])
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    if (c[i] != a[i] + b[i])
> +      abort ();
> +
> +  free (a);
> +  free (b);
> +  free (c);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c
> new file mode 100644
> index 0000000..11d82f7
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N (1024 * 512)
> +#define COUNTERTYPE unsigned int
> +
> +int
> +main (void)
> +{
> +  unsigned int *__restrict a;
> +  unsigned int *__restrict b;
> +  unsigned int *__restrict c;
> +
> +  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +
> +#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
> +
> +#pragma acc kernels present (a[0:N])
> +  {
> +    for (COUNTERTYPE i = 0; i < N; i++)
> +      a[i] = i * 2;
> +  }
> +
> +#pragma acc kernels present (b[0:N])
> +  {
> +    for (COUNTERTYPE i = 0; i < N; i++)
> +      b[i] = i * 4;
> +  }
> +
> +#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
> +  {
> +    for (COUNTERTYPE ii = 0; ii < N; ii++)
> +      c[ii] = a[ii] + b[ii];
> +  }
> +
> +#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N])
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    if (c[i] != a[i] + b[i])
> +      abort ();
> +
> +  free (a);
> +  free (b);
> +  free (c);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c
> new file mode 100644
> index 0000000..a7d4e84
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c
> @@ -0,0 +1,53 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N (1024 * 512)
> +#define COUNTERTYPE unsigned int
> +
> +int
> +main (void)
> +{
> +  unsigned int *__restrict a;
> +  unsigned int *__restrict b;
> +  unsigned int *__restrict c;
> +
> +  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +
> +#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
> +
> +#pragma acc kernels present (a[0:N])
> +  {
> +    for (COUNTERTYPE i = 0; i < N; i++)
> +      a[i] = i * 2;
> +  }
> +
> +  {
> +    for (COUNTERTYPE i = 0; i < N; i++)
> +      b[i] = i * 4;
> +  }
> +
> +#pragma acc update device (b[0:N])
> +
> +#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
> +  {
> +    for (COUNTERTYPE ii = 0; ii < N; ii++)
> +      c[ii] = a[ii] + b[ii];
> +  }
> +
> +#pragma acc exit data copyout (a[0:N], c[0:N])
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    if (c[i] != a[i] + b[i])
> +      abort ();
> +
> +  free (a);
> +  free (b);
> +  free (c);
> +
> +  return 0;
> +}
> +
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c
> new file mode 100644
> index 0000000..607d7de
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c
> @@ -0,0 +1,50 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N (1024 * 512)
> +#define COUNTERTYPE unsigned int
> +
> +int
> +main (void)
> +{
> +  unsigned int *__restrict a;
> +  unsigned int *__restrict b;
> +  unsigned int *__restrict c;
> +
> +  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +
> +#pragma acc data copyout (a[0:N], b[0:N], c[0:N])
> +  {
> +#pragma acc kernels present (a[0:N])
> +    {
> +      for (COUNTERTYPE i = 0; i < N; i++)
> +	a[i] = i * 2;
> +    }
> +
> +#pragma acc kernels present (b[0:N])
> +    {
> +      for (COUNTERTYPE i = 0; i < N; i++)
> +	b[i] = i * 4;
> +    }
> +
> +#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
> +    {
> +      for (COUNTERTYPE ii = 0; ii < N; ii++)
> +	c[ii] = a[ii] + b[ii];
> +    }
> +  }
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    if (c[i] != a[i] + b[i])
> +      abort ();
> +
> +  free (a);
> +  free (b);
> +  free (c);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c
> new file mode 100644
> index 0000000..96b6e4e
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c
> @@ -0,0 +1,5 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +/* { dg-additional-options "-g" } */
> +
> +#include "kernels-loop.c"
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c
> new file mode 100644
> index 0000000..1433cb2
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c
> @@ -0,0 +1,41 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N ((1024 * 512) + 1)
> +#define COUNTERTYPE unsigned int
> +
> +int
> +main (void)
> +{
> +  unsigned int *__restrict a;
> +  unsigned int *__restrict b;
> +  unsigned int *__restrict c;
> +
> +  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    a[i] = i * 2;
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    b[i] = i * 4;
> +
> +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
> +  {
> +    for (COUNTERTYPE ii = 0; ii < N; ii++)
> +      c[ii] = a[ii] + b[ii];
> +  }
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    if (c[i] != a[i] + b[i])
> +      abort ();
> +
> +  free (a);
> +  free (b);
> +  free (c);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c
> new file mode 100644
> index 0000000..fd0d5b1
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N ((1024 * 512) + 1)
> +#define COUNTERTYPE unsigned int
> +
> +static int __attribute__((noinline,noclone))
> +foo (COUNTERTYPE n)
> +{
> +  unsigned int *__restrict a;
> +  unsigned int *__restrict b;
> +  unsigned int *__restrict c;
> +
> +  a = (unsigned int *__restrict)malloc (n * sizeof (unsigned int));
> +  b = (unsigned int *__restrict)malloc (n * sizeof (unsigned int));
> +  c = (unsigned int *__restrict)malloc (n * sizeof (unsigned int));
> +
> +  for (COUNTERTYPE i = 0; i < n; i++)
> +    a[i] = i * 2;
> +
> +  for (COUNTERTYPE i = 0; i < n; i++)
> +    b[i] = i * 4;
> +
> +#pragma acc kernels copyin (a[0:n], b[0:n]) copyout (c[0:n])
> +  {
> +    for (COUNTERTYPE ii = 0; ii < n; ii++)
> +      c[ii] = a[ii] + b[ii];
> +  }
> +
> +  for (COUNTERTYPE i = 0; i < n; i++)
> +    if (c[i] != a[i] + b[i])
> +      abort ();
> +
> +  free (a);
> +  free (b);
> +  free (c);
> +
> +  return 0;
> +}
> +
> +int
> +main (void)
> +{
> +  return foo (N);
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c
> new file mode 100644
> index 0000000..21d2599
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c
> @@ -0,0 +1,26 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N 1000
> +
> +int
> +main (void)
> +{
> +  int x[N][N];
> +
> +#pragma acc kernels copyout (x)
> +  {
> +    for (int ii = 0; ii < N; ii++)
> +      for (int jj = 0; jj < N; jj++)
> +	x[ii][jj] = ii + jj + 3;
> +  }
> +
> +  for (int i = 0; i < N; i++)
> +    for (int j = 0; j < N; j++)
> +      if (x[i][j] != i + j + 3)
> +	abort ();
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop.c
> new file mode 100644
> index 0000000..3762e5a
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop.c
> @@ -0,0 +1,41 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N (1024 * 512)
> +#define COUNTERTYPE unsigned int
> +
> +int
> +main (void)
> +{
> +  unsigned int *__restrict a;
> +  unsigned int *__restrict b;
> +  unsigned int *__restrict c;
> +
> +  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    a[i] = i * 2;
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    b[i] = i * 4;
> +
> +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
> +  {
> +    for (COUNTERTYPE ii = 0; ii < N; ii++)
> +      c[ii] = a[ii] + b[ii];
> +  }
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    if (c[i] != a[i] + b[i])
> +      abort ();
> +
> +  free (a);
> +  free (b);
> +  free (c);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c
> new file mode 100644
> index 0000000..767f6c8
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c
> @@ -0,0 +1,52 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define N (1024 * 512)
> +#define COUNTERTYPE unsigned int
> +
> +int
> +main (void)
> +{
> +  unsigned int *__restrict a;
> +  unsigned int *__restrict b;
> +  unsigned int *__restrict c;
> +
> +  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
> +
> +#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
> +
> +#pragma acc kernels present (a[0:N])
> +  {
> +    for (COUNTERTYPE i = 0; i < N; i++)
> +      a[i] = i * 2;
> +  }
> +
> +#pragma acc parallel present (b[0:N])
> +  {
> +#pragma acc loop
> +    for (COUNTERTYPE i = 0; i < N; i++)
> +      b[i] = i * 4;
> +  }
> +
> +#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
> +  {
> +    for (COUNTERTYPE ii = 0; ii < N; ii++)
> +      c[ii] = a[ii] + b[ii];
> +  }
> +
> +#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N])
> +
> +  for (COUNTERTYPE i = 0; i < N; i++)
> +    if (c[i] != a[i] + b[i])
> +      abort ();
> +
> +  free (a);
> +  free (b);
> +  free (c);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c
> new file mode 100644
> index 0000000..511e25f
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c
> @@ -0,0 +1,37 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=32" } */
> +
> +#include <stdlib.h>
> +
> +#define n 10000
> +
> +unsigned int a[n];
> +
> +void  __attribute__((noinline,noclone))
> +foo (void)
> +{
> +  int i;
> +  unsigned int sum = 1;
> +
> +#pragma acc kernels copyin (a[0:n]) copy (sum)
> +  {
> +    for (i = 0; i < n; ++i)
> +      sum += a[i];
> +  }
> +
> +  if (sum != 5001)
> +    abort ();
> +}
> +
> +int
> +main ()
> +{
> +  int i;
> +
> +  for (i = 0; i < n; ++i)
> +    a[i] = i % 2;
> +
> +  foo ();
> +
> +  return 0;
> +}
>



More information about the Gcc-patches mailing list