This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] auto partitioning


I've committed this patch to gomp4 branch.

It implements handling of the 'auto' clause on a loop. such loops can be implicitly partitioned, if they are (explicitly or implicitly) 'independent'. This patch walks the loop structure after explicit partitioning has been handled, and attempts to allocate a partitioning for such auto loops. If there's no available partitioning a diagnostic is emitted.

Auto partitioning caused a failure of a collapse testcase. I considered this a latent bug and forced that testcase to retain the original behaviour of a 'seq' loop.

nathan
2015-10-19  Nathan Sidwell  <nathan@codesourcery.com>

	gcc/
	* omp-low.c (oacc_loop_auto_partitions): New.
	(oacc_loop_partition): Call it.

	gcc/testsuite/
	* gfortran.dg/goacc/routine-4.f90: Add diagnostic.
	* gfortran.dg/goacc/routine-5.f90: Add diagnostic.
	* c-c++-common/goacc-gomp/nesting-1.c: Add diagnostic.
	* c-c++-common/goacc/routine-6.c: Add diagnostic.
	* c-c++-common/goacc/routine-7.c: Add diagnostic.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Force
	serialization.

Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 228960)
+++ gcc/omp-low.c	(working copy)
@@ -16244,6 +16244,50 @@ oacc_loop_fixed_partitions (oacc_loop *l
   return has_auto;
 }
 
+/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
+   OUTER_MASK is the partitioning this loop is contained within.
+   Return the cumulative partitioning used by this loop, siblings and
+   children.  */
+
+static unsigned
+oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
+{
+  unsigned inner_mask = 0;
+  bool noisy = true;
+
+#ifdef ACCEL_COMPILER
+  /* When device_type is supported, we want the device compiler to be
+     noisy, if the loop parameters are device_type-specific.  */
+  noisy = false;
+#endif
+
+  if (loop->child)
+    inner_mask |= oacc_loop_auto_partitions (loop->child,
+					     outer_mask | loop->mask);
+
+  if ((loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT))
+    {
+      unsigned this_mask = 0;
+      
+      /* Pick the innermost free partitioning.  */
+      this_mask = inner_mask | GOMP_DIM_MASK (GOMP_DIM_MAX);
+      this_mask = (this_mask & -this_mask) >> 1;
+      this_mask &= ~outer_mask;
+
+      if (!this_mask && noisy)
+	warning_at (loop->loc, 0,
+		    "insufficient parallelism available to partition loop");
+
+      loop->mask = this_mask;
+    }
+  inner_mask |= loop->mask;
+  
+  if (loop->sibling)
+    inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
+  
+  return inner_mask;
+}
+
 /* Walk the OpenACC loop heirarchy to check and assign partitioning
    axes.  */
 
@@ -16255,7 +16299,8 @@ oacc_loop_partition (oacc_loop *loop, in
   if (fn_level >= 0)
     outer_mask = GOMP_DIM_MASK (fn_level) - 1;
 
-  oacc_loop_fixed_partitions (loop, outer_mask);
+  if (oacc_loop_fixed_partitions (loop, outer_mask))
+    oacc_loop_auto_partitions (loop, outer_mask);
 }
 
 /* Default launch dimension validator.  Force everything to 1.  A
Index: gcc/testsuite/gfortran.dg/goacc/routine-4.f90
===================================================================
--- gcc/testsuite/gfortran.dg/goacc/routine-4.f90	(revision 228960)
+++ gcc/testsuite/gfortran.dg/goacc/routine-4.f90	(working copy)
@@ -44,7 +44,7 @@ program main
   !
 
   !$acc parallel copy (a)
-  !$acc loop
+  !$acc loop ! { dg-warning "insufficient parallelism" }
   do i = 1, N
      call gang (a)
   end do
Index: gcc/testsuite/gfortran.dg/goacc/routine-5.f90
===================================================================
--- gcc/testsuite/gfortran.dg/goacc/routine-5.f90	(revision 228960)
+++ gcc/testsuite/gfortran.dg/goacc/routine-5.f90	(working copy)
@@ -87,7 +87,7 @@ subroutine seq (a)
   integer, intent (inout) :: a(N)
   integer :: i
 
-  !$acc loop
+  !$acc loop ! { dg-warning "insufficient parallelism" }
   do i = 1, N
      a(i) = a(i) - a(i)
   end do
Index: gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c
===================================================================
--- gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c	(revision 228960)
+++ gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c	(working copy)
@@ -20,7 +20,7 @@ f_acc_kernels (void)
   }
 }
 
-#pragma acc routine
+#pragma acc routine vector
 void
 f_acc_loop (void)
 {
Index: gcc/testsuite/c-c++-common/goacc/routine-7.c
===================================================================
--- gcc/testsuite/c-c++-common/goacc/routine-7.c	(revision 228960)
+++ gcc/testsuite/c-c++-common/goacc/routine-7.c	(working copy)
@@ -74,7 +74,7 @@ vector (int red)
 int
 seq (int red)
 {
-#pragma acc loop reduction (+:red)
+#pragma acc loop reduction (+:red) // { dg-warning "insufficient parallelism" }
   for (int i = 0; i < 10; i++)
     red ++;
 
Index: gcc/testsuite/c-c++-common/goacc/routine-6.c
===================================================================
--- gcc/testsuite/c-c++-common/goacc/routine-6.c	(revision 228960)
+++ gcc/testsuite/c-c++-common/goacc/routine-6.c	(working copy)
@@ -36,7 +36,7 @@ main ()
 #pragma acc parallel copy (red)
   {
     /* Independent/seq loop tests.  */
-#pragma acc loop reduction (+:red)
+#pragma acc loop reduction (+:red) // { dg-warning "insufficient parallelism" }
     for (int i = 0; i < 10; i++)
       red += gang ();
 
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c	(revision 228960)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c	(working copy)
@@ -9,7 +9,7 @@ main (void)
   int m1 = 4, m2 = -5, m3 = 17;
 
   #pragma acc parallel copy(l)
-  #pragma acc loop collapse(3) reduction(+:l)
+  #pragma acc loop seq collapse(3) reduction(+:l)
     for (i = -2; i < m1; i++)
       for (j = m2; j < -2; j++)
 	{

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]