This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4, committed] Revert "Use marked_independent in oacc kernels region"


Hi,

this patch reverts the independent clause support in the oacc kernels region.

The independent clause support is broken, in a subtle way. We currently set the marked_independent field in struct loop for loops with the independent clause in a kernels region. So that property holds for all the loads and stores present at source level. But, at omp-lowering, we introduce new loads and stores. Those new load and stores are supposed to be eliminated from the loop by the kernels pass group. But in general, we can't guarantuee that that happens. So, at parloops, we cannot assume based on marked_independent that in fact all loads and stores in the loop body are independent.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Revert "Use marked_independent in oacc kernels region"

2015-10-20  Tom de Vries  <tom@codesourcery.com>

	Revert:
	2015-07-14  Tom de Vries  <tom@codesourcery.com>

	* tree-parloops.c (parallelize_loops): Use marked_independent flag in
	oacc kernels region.

	* c-c++-common/goacc/kernels-independent.c: New test.

	* testsuite/libgomp.oacc-c-c++-common/kernels-independent.c: New test.
---
 .../c-c++-common/goacc/kernels-independent.c       | 41 --------------------
 gcc/tree-parloops.c                                | 21 ++--------
 .../kernels-independent.c                          | 45 ----------------------
 3 files changed, 3 insertions(+), 104 deletions(-)
 delete mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-independent.c
 delete mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-independent.c b/gcc/testsuite/c-c++-common/goacc/kernels-independent.c
deleted file mode 100644
index 1f36323..0000000
--- a/gcc/testsuite/c-c++-common/goacc/kernels-independent.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
-/* { dg-additional-options "-fdump-tree-optimized" } */
-
-#include <stdlib.h>
-
-#define N (1024 * 512)
-#define COUNTERTYPE unsigned int
-
-void
-foo (unsigned int *a,  unsigned int *b,  unsigned int *c)
-{
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    a[i] = i * 2;
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    b[i] = i * 4;
-
-#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
-  {
-    #pragma acc loop independent
-    for (COUNTERTYPE ii = 0; ii < N; ii++)
-      c[ii] = a[ii] + b[ii];
-  }
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    if (c[i] != a[i] + b[i])
-      abort ();
-}
-
-/* Check that only one loop is analyzed, and that it can be parallelized.  */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized, marked independent" 1 "parloops_oacc_kernels" } } */
-/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
-
-/* Check that the loop has been split off into a function.  */
-/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops_oacc_kernels" } } */
-
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 05827d1..b4039ad 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -3258,24 +3258,9 @@ parallelize_loops (bool oacc_kernels_p)
       if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
 	continue;
 
-      if (!flag_loop_parallelize_all)
-	{
-	  bool independent = (oacc_kernels_p
-			      && loop->marked_independent);
-
-	  if (independent)
-	    {
-	      if (dump_file
-		  && (dump_flags & TDF_DETAILS))
-		fprintf (dump_file,
-			 "  SUCCESS: may be parallelized, marked independent\n");
-	    }
-	  else
-	    independent = loop_parallel_p (loop, &parloop_obstack);
-
-	  if (!independent)
-	    continue;
-	}
+      if (!flag_loop_parallelize_all
+	  && !loop_parallel_p (loop, &parloop_obstack))
+	continue;
 
       if (oacc_kernels_p
 	&& !oacc_entry_exit_ok (loop, &reduction_list))
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c
deleted file mode 100644
index d169a5f..0000000
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
-#include <stdlib.h>
-
-#define N (1024 * 512)
-#define COUNTERTYPE unsigned int
-
-void __attribute__((noinline,noclone))
-foo (unsigned int *a,  unsigned int *b,  unsigned int *c)
-{
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    a[i] = i * 2;
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    b[i] = i * 4;
-
-#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
-  {
-    #pragma acc loop independent
-    for (COUNTERTYPE ii = 0; ii < N; ii++)
-      c[ii] = a[ii] + b[ii];
-  }
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    if (c[i] != a[i] + b[i])
-      abort ();
-}
-
-int
-main (void)
-{
-  unsigned int *__restrict a;
-  unsigned int *__restrict b;
-  unsigned int *__restrict c;
-
-  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
-  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
-  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
-
-  foo (a, b, c);
-
-  return 0;
-}
-- 
1.9.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]