This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] Don't mark OpenACC auto loops as independent inside acc parallel regions


The OpenACC 2.5 spec updated the behavior of acc loops inside acc
parallel regions such that loop with seq and auto clauses are not
implicitly independent. Back in OpenACC 2.0, all loops inside acc
parallel regions were implicitly independent. Oddly enough, if the user
just places an acc loop without any clauses, it is still implicitly
independent. E.g.

  #pragma acc loop

implies

  #pragma acc loop independent

which is not equal to

  #pragma acc loop auto

I suppose the auto flag is used to explicitly have the compiler
"automatically" detect loop dependencies and partition the loop
accordingly. This patch, which I've applied to gomp-4_0-branch makes GCC
comply with this new behavior.

Cesar
2017-05-03  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* omp-low.c (lower_oacc_head_mark): Don't mark OpenACC auto loops as
	independent inside acc parallel regions.

	gcc/testsuite/
	* c-c++-common/goacc/loop-auto-1.c: Adjust test case to conform to
	the new behavior of the auto clause in OpenACC 2.5.
	* c-c++-common/goacc/loop-auto-2.c: Likewise.
	* gcc.dg/goacc/loop-processing-1.c: Likewise.
	* c-c++-common/goacc/loop-auto-3.c: New test.
	* gfortran.dg/goacc/loop-auto-1.f90: New test.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust test case
	to conform to the new behavior of the auto clause in OpenACC 2.5.


diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index cf299c12..9e9a363 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -6638,9 +6638,10 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
       tag |= OLF_GANG_STATIC;
     }
 
-  /* In a parallel region, loops are implicitly INDEPENDENT.  */
+  /* In a parallel region, loops without auto and seq clauses are
+     implicitly INDEPENDENT.  */
   omp_context *tgt = enclosing_target_ctx (ctx);
-  if (!tgt || is_oacc_parallel (tgt))
+  if ((!tgt || is_oacc_parallel (tgt)) && !(tag & (OLF_SEQ | OLF_AUTO)))
     tag |= OLF_INDEPENDENT;
 
   if (tag & OLF_TILE)
diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
index 124befc..dcad07f 100644
--- a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
+++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
@@ -10,7 +10,7 @@ void Foo ()
 #pragma acc loop seq
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++) {}
       }
 
@@ -20,7 +20,7 @@ void Foo ()
 #pragma acc loop auto
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++)
 	  {
 #pragma acc loop vector
@@ -51,7 +51,7 @@ void Foo ()
 #pragma acc loop vector
 	for (int jx = 0; jx < 10; jx++)
 	  {
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	    for (int kx = 0; kx < 10; kx++) {}
 	  }
 
@@ -64,27 +64,27 @@ void Foo ()
 
       }
     
-#pragma acc loop auto
+#pragma acc loop auto independent
     for (int ix = 0; ix < 10; ix++)
       {
-#pragma acc loop auto
+#pragma acc loop auto independent
 	for (int jx = 0; jx < 10; jx++)
 	  {
-#pragma acc loop auto
+#pragma acc loop auto independent
 	    for (int kx = 0; kx < 10; kx++) {}
 	  }
       }
 
-#pragma acc loop auto
+#pragma acc loop auto independent
     for (int ix = 0; ix < 10; ix++)
       {
-#pragma acc loop auto
+#pragma acc loop auto independent
 	for (int jx = 0; jx < 10; jx++)
 	  {
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	    for (int kx = 0; kx < 10; kx++)
 	      {
-#pragma acc loop auto
+#pragma acc loop auto independent
 		for (int lx = 0; lx < 10; lx++) {}
 	      }
 	  }
@@ -101,7 +101,7 @@ void Gang (void)
 #pragma acc loop seq
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++) {}
       }
 
@@ -111,7 +111,7 @@ void Gang (void)
 #pragma acc loop auto
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++)
 	  {
 #pragma acc loop vector
@@ -142,7 +142,7 @@ void Gang (void)
 #pragma acc loop vector
 	for (int jx = 0; jx < 10; jx++)
 	  {
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	    for (int kx = 0; kx < 10; kx++) {}
 	  }
 
@@ -176,7 +176,7 @@ void Worker (void)
 #pragma acc loop seq
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++) {}
       }
 
@@ -186,7 +186,7 @@ void Worker (void)
 #pragma acc loop auto
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++)
 	  {
 #pragma acc loop vector
@@ -194,20 +194,20 @@ void Worker (void)
 	  }
       }
 
-#pragma acc loop auto
+#pragma acc loop
     for (int ix = 0; ix < 10; ix++)
       {
-#pragma acc loop auto
+#pragma acc loop
 	for (int jx = 0; jx < 10; jx++) {}
       }
 
-#pragma acc loop auto
+#pragma acc loop
     for (int ix = 0; ix < 10; ix++)
       {
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++)
 	  {
-#pragma acc loop auto
+#pragma acc loop
 	    for (int kx = 0; kx < 10; kx++) {}
 	  }
       }
@@ -222,17 +222,17 @@ void Vector (void)
 #pragma acc loop seq
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++) {}
       }
 
-#pragma acc loop auto
+#pragma acc loop auto independent
     for (int ix = 0; ix < 10; ix++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
     for (int ix = 0; ix < 10; ix++)
       {
-#pragma acc loop auto
+#pragma acc loop auto independent
 	for (int jx = 0; jx < 10; jx++) {}
       }
 }
@@ -240,6 +240,6 @@ void Vector (void)
 #pragma acc routine seq
 void Seq (void)
 {
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
     for (int ix = 0; ix < 10; ix++) {}
 }
diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c
index af3f0bd..5aa36e9 100644
--- a/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c
@@ -72,12 +72,12 @@ void Bad ()
 #pragma acc loop tile(*) gang vector
     for (int ix = 0; ix < 10; ix++)
       {
-	#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+	#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++)
 	  ;
       }
 
-#pragma acc loop tile(*) auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop tile(*) auto independent /* { dg-warning "insufficient partitioning" } */
     for (int ix = 0; ix < 10; ix++)
       {
 	#pragma acc loop worker
diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-3.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-3.c
new file mode 100644
index 0000000..42f8759
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-3.c
@@ -0,0 +1,81 @@
+/* Ensure that the auto clause falls back to seq parallelism when the
+   OpenACC loop is not explicitly independent.  */
+
+/* { dg-compile } */
+/* { dg-additional-options "-fopt-info-note-omp" } */
+
+void
+test ()
+{
+  int i, j, k, l, n = 100;
+  
+#pragma acc parallel loop auto /* { dg-message "Detected parallelism <acc loop seq>" } */
+  for (i = 0; i < n; i++)
+#pragma acc loop auto independent /* { dg-message "Detected parallelism <acc loop gang>" } */
+    for (j = 0; j < n; j++)
+#pragma acc loop worker vector /* { dg-message "Detected parallelism <acc loop worker vector>" } */
+      for (k = 0; k < n; k++)
+	;
+
+#pragma acc parallel loop auto independent /* { dg-message "Detected parallelism <acc loop gang worker>" } */
+  for (i = 0; i < n; i++)
+#pragma acc loop auto /* { dg-message "Detected parallelism <acc loop seq>" } */
+    for (j = 0; j < n; j++)
+#pragma acc loop auto /* { dg-message "Detected parallelism <acc loop seq>" } */
+      for (k = 0; k < n; k++)
+#pragma acc loop auto independent /* { dg-message "Detected parallelism <acc loop vector>" } */
+	for (l = 0; l < n; l++)
+	  ;
+
+#pragma acc parallel loop gang /* { dg-message "Detected parallelism <acc loop gang>" } */
+  for (i = 0; i < n; i++)
+#pragma acc loop worker /* { dg-message "Detected parallelism <acc loop worker>" } */
+    for (j = 0; j < n; j++)
+#pragma acc loop vector /* { dg-message "Detected parallelism <acc loop vector>" } */
+      for (k = 0; k < n; k++)
+	{
+#pragma acc loop auto independent /* { dg-message "Detected parallelism <acc loop seq>" } */
+	  for (l = 0; l < n; l++)
+	    ;
+#pragma acc loop auto /* { dg-message "Detected parallelism <acc loop seq>" } */
+	  for (l = 0; l < n; l++)
+	    ;
+	}
+
+#pragma acc parallel loop /* { dg-message "Detected parallelism <acc loop seq>" } */
+  for (i = 0; i < n; i++)
+    {
+#pragma acc loop gang worker /* { dg-message "Detected parallelism <acc loop gang worker>" } */
+      for (j = 0; j < n; j++)
+#pragma acc loop auto /* { dg-message "Detected parallelism <acc loop seq>" } */
+	for (k = 0; k < n; k++)
+	  {
+#pragma acc loop vector /* { dg-message "Detected parallelism <acc loop vector>" } */
+	    for (l = 0; l < n; l++)
+	      ;
+#pragma acc loop auto independent /* { dg-message "Detected parallelism <acc loop vector>" } */
+	    for (l = 0; l < n; l++)
+	      ;
+	  }
+#pragma acc loop worker /* { dg-message "Detected parallelism <acc loop worker>" } */
+      for (j = 0; j < n; j++)
+#pragma acc loop vector /* { dg-message "Detected parallelism <acc loop vector>" } */
+	for (k = 0; k < n; k++)
+	  ;
+    }
+
+#pragma acc parallel loop /* { dg-message "Detected parallelism <acc loop gang>" } */
+  for (i = 0; i < n; i++)
+#pragma acc loop /* { dg-message "Detected parallelism <acc loop worker>" } */
+    for (j = 0; j < n; j++)
+#pragma acc loop /* { dg-message "Detected parallelism <acc loop seq>" } */
+      for (k = 0; k < n; k++)
+#pragma acc loop /* { dg-message "Detected parallelism <acc loop vector>" } */
+	  for (l = 0; l < n; l++)
+	    ;
+}
+
+/* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 37 } */
+/* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 45 } */
+/* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 71 } */
+
diff --git a/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c b/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c
index 85e73b1..805bbb9 100644
--- a/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c
+++ b/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c
@@ -9,7 +9,7 @@ void vector_1 (int *ary, int size)
   {
 #pragma acc loop gang
     for (int jx = 0; jx < 1; jx++)
-#pragma acc loop auto
+#pragma acc loop auto independent
       for (int ix = 0; ix < size; ix++)
 	ary[ix] = place ();
   }
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-auto-1.f90 b/gcc/testsuite/gfortran.dg/goacc/loop-auto-1.f90
new file mode 100644
index 0000000..354d6fc7
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-auto-1.f90
@@ -0,0 +1,91 @@
+! Ensure that the auto clause falls back to seq parallelism when the
+!  OpenACC loop is not explicitly independent.
+
+! { dg-compile }
+! { dg-additional-options "-fopt-info-note-omp" }
+
+program test
+  implicit none
+  integer, parameter :: n = 100
+  integer i, j, k, l
+  
+  !$acc parallel loop auto ! { dg-message "Detected parallelism <acc loop seq>" }
+  do i = 1, n
+     !$acc loop auto independent ! { dg-message "Detected parallelism <acc loop gang>" }
+     do j = 1, n
+        !$acc loop worker vector ! { dg-message "Detected parallelism <acc loop worker vector>" }
+        do k = 1, n
+        end do
+     end do
+  end do
+ 
+  !$acc parallel loop auto independent ! { dg-message "Detected parallelism <acc loop gang worker>" }
+  do i = 1, n
+     !$acc loop auto ! { dg-message "Detected parallelism <acc loop seq>" }
+     do j = 1, n
+        !$acc loop auto ! { dg-message "Detected parallelism <acc loop seq>" }
+        do k = 1, n
+           !$acc loop auto independent ! { dg-message "Detected parallelism <acc loop vector>" }
+           do l = 1, n
+           end do
+        end do
+     end do
+  end do
+
+  !$acc parallel loop gang ! { dg-message "Detected parallelism <acc loop gang>" }
+  do i = 1, n
+     !$acc loop worker ! { dg-message "Detected parallelism <acc loop worker>" }
+     do j = 1, n
+        !$acc loop vector ! { dg-message "Detected parallelism <acc loop vector>" }
+        do k = 1, n
+           !$acc loop auto independent ! { dg-message "Detected parallelism <acc loop seq>" }
+           do l = 1, n
+           end do
+           !$acc loop auto ! { dg-message "Detected parallelism <acc loop seq>" }
+           do l = 1, n
+           end do
+        end do
+     end do
+  end do
+  
+
+  !$acc parallel loop ! { dg-message "Detected parallelism <acc loop seq>" }
+  do i = 1, n
+     !$acc loop gang worker ! { dg-message "Detected parallelism <acc loop gang worker>" }
+     do j = 1, n
+        !$acc loop auto ! { dg-message "Detected parallelism <acc loop seq>" }
+	do k = 1, n
+          !$acc loop vector ! { dg-message "Detected parallelism <acc loop vector>" }
+          do l = 1, n
+          end do
+       end do
+       !$acc loop auto independent ! { dg-message "Detected parallelism <acc loop vector>" }
+       do l = 1, n
+       end do
+    end do
+    !$acc loop worker ! { dg-message "Detected parallelism <acc loop worker>" }
+    do j = 1, n
+       !$acc loop vector ! { dg-message "Detected parallelism <acc loop vector>" }
+       do k = 1, n
+       end do
+    end do
+  end do
+
+  !$acc parallel loop ! { dg-message "Detected parallelism <acc loop gang>" }
+  do i = 1, n
+     !$acc loop ! { dg-message "Detected parallelism <acc loop worker>" }
+     do j = 1, n
+        !$acc loop ! { dg-message "Detected parallelism <acc loop seq>" }
+        do k = 1, n
+           !$acc loop ! { dg-message "Detected parallelism <acc loop vector>" }
+           do l = 1, n
+           end do
+        end do
+     end do
+  end do
+end program test
+
+! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 41 }
+! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 52 }
+! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 78 }
+
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
index 87ac1b1..4c1c091 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
@@ -107,7 +107,7 @@ int vector_1 (int *ary, int size)
   {
 #pragma acc loop gang
     for (int jx = 0; jx < 1; jx++)
-#pragma acc loop auto
+#pragma acc loop auto independent
       for (int ix = 0; ix < size; ix++)
 	ary[ix] = place ();
   }
@@ -123,7 +123,7 @@ int vector_2 (int *ary, int size)
   {
 #pragma acc loop worker
     for (int jx = 0; jx < size  / 64; jx++)
-#pragma acc loop auto
+#pragma acc loop auto independent
       for (int ix = 0; ix < 64; ix++)
 	ary[ix + jx * 64] = place ();
   }
@@ -139,7 +139,7 @@ int worker_1 (int *ary, int size)
   {
 #pragma acc loop gang
     for (int kx = 0; kx < 1; kx++)
-#pragma acc loop auto
+#pragma acc loop auto independent
       for (int jx = 0; jx <  size  / 64; jx++)
 #pragma acc loop vector
 	for (int ix = 0; ix < 64; ix++)
@@ -155,7 +155,7 @@ int gang_1 (int *ary, int size)
   
 #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } } */
   {
-#pragma acc loop auto
+#pragma acc loop auto independent
     for (int jx = 0; jx <  size  / 64; jx++)
 #pragma acc loop worker
       for (int ix = 0; ix < 64; ix++)
@@ -171,11 +171,11 @@ int gang_2 (int *ary, int size)
   
 #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
   {
-#pragma acc loop auto
+#pragma acc loop auto independent
     for (int kx = 0; kx < size / (32 * 32); kx++)
-#pragma acc loop auto
+#pragma acc loop auto independent
       for (int jx = 0; jx <  32; jx++)
-#pragma acc loop auto
+#pragma acc loop auto independent
 	for (int ix = 0; ix < 32; ix++)
 	  ary[ix + jx * 32 + kx * 32 * 32] = place ();
   }
@@ -189,9 +189,9 @@ int gang_3 (int *ary, int size)
   
 #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
   {
-#pragma acc loop auto
+#pragma acc loop auto independent
     for (int jx = 0; jx <  size  / 64; jx++)
-#pragma acc loop auto
+#pragma acc loop auto independent
       for (int ix = 0; ix < 64; ix++)
 	ary[ix + jx * 64] = place ();
   }
@@ -205,7 +205,7 @@ int gang_4 (int *ary, int size)
   
 #pragma acc parallel vector_length(32) copy(ary[0:size]) firstprivate (size)
   {
-#pragma acc loop auto
+#pragma acc loop auto independent
     for (int jx = 0; jx <  size; jx++)
       ary[jx] = place ();
   }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]