This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] backport fix for PR70626


This patch backports the change in the way that 'acc parallel loop'
reductions are handled in trunk. Before, the reduction clause only used
to be associated with the split acc loop. Now the reduction clause is
associated with both the loop and the parallel region. That's beneficial
because the gimplifier adds implicit copy clauses if necessary for any
reduction variable attached to a parallel construct.

I had to update reduction-2.f95 because of the way that gomp4 implements
device_type, which tends to rearrange the ordering of the clauses. Also,
libgomp.oacc-c++/template-reduction.C is broken in gomp4, so I had to
xfail it. Apparently, it exposed an async bug. My forthcoming patch
which uses firstprivate pointers for subarrays should fix it.

This patch has been committed to gomp-4_0-branch.

Cesar
2016-05-09  Cesar Philippidis  <cesar@codesourcery.com>

	Backport trunk r235651:
	2016-04-29  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/c-family/
	PR middle-end/70626
	* c-common.h (c_oacc_split_loop_clauses): Add boolean argument.
	* c-omp.c (c_oacc_split_loop_clauses): Use it to duplicate
	reduction clauses in acc parallel loops.

	gcc/c/
	PR middle-end/70626
	* c-parser.c (c_parser_oacc_loop): Don't augment mask with
	OACC_LOOP_CLAUSE_MASK.
	(c_parser_oacc_kernels_parallel): Update call to
	c_oacc_split_loop_clauses.

	gcc/cp/
	PR middle-end/70626
	* parser.c (cp_parser_oacc_loop): Don't augment mask with
	OACC_LOOP_CLAUSE_MASK.
	(cp_parser_oacc_kernels_parallel): Update call to
	c_oacc_split_loop_clauses.

	gcc/fortran/
	PR middle-end/70626
	* trans-openmp.c (gfc_trans_oacc_combined_directive): Duplicate
	the reduction clause in both parallel and loop directives.

	gcc/testsuite/
	PR middle-end/70626
	* c-c++-common/goacc/combined-reduction.c: New test.
	* gfortran.dg/goacc/reduction-2.f95: Add check for kernels reductions.

	libgomp/
	PR middle-end/70626
	* testsuite/libgomp.oacc-c++/template-reduction.C: Adjust test.
	* testsuite/libgomp.oacc-c-c++-common/combined-reduction.c: New test.
	* testsuite/libgomp.oacc-fortran/combined-reduction.f90: New test.

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index ef3493e..daa77f9 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1285,7 +1285,7 @@ extern bool c_omp_check_loop_iv (tree, tree, walk_tree_lh);
 extern bool c_omp_check_loop_iv_exprs (location_t, tree, tree, tree, tree,
 				       walk_tree_lh);
 extern tree c_finish_oacc_wait (location_t, tree, tree);
-extern tree c_oacc_split_loop_clauses (tree, tree *);
+extern tree c_oacc_split_loop_clauses (tree, tree *, bool);
 extern void c_omp_split_clauses (location_t, enum tree_code, omp_clause_mask,
 				 tree, tree *);
 extern tree c_omp_declare_simd_clauses_to_numbers (tree, tree);
diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c
index 4d3f7dc..614ee1f 100644
--- a/gcc/c-family/c-omp.c
+++ b/gcc/c-family/c-omp.c
@@ -861,9 +861,10 @@ c_omp_check_loop_iv_exprs (location_t stmt_loc, tree declv, tree decl,
    #pragma acc parallel loop  */
 
 tree
-c_oacc_split_loop_clauses (tree clauses, tree *not_loop_clauses)
+c_oacc_split_loop_clauses (tree clauses, tree *not_loop_clauses,
+			   bool is_parallel)
 {
-  tree next, loop_clauses;
+  tree next, loop_clauses, nc;
 
   loop_clauses = *not_loop_clauses = NULL_TREE;
   for (; clauses ; clauses = next)
@@ -882,7 +883,23 @@ c_oacc_split_loop_clauses (tree clauses, tree *not_loop_clauses)
 	case OMP_CLAUSE_SEQ:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_PRIVATE:
+	  OMP_CLAUSE_CHAIN (clauses) = loop_clauses;
+	  loop_clauses = clauses;
+	  break;
+
+	  /* Reductions must be duplicated on both constructs.  */
 	case OMP_CLAUSE_REDUCTION:
+	  if (is_parallel)
+	    {
+	      nc = build_omp_clause (OMP_CLAUSE_LOCATION (clauses),
+				     OMP_CLAUSE_REDUCTION);
+	      OMP_CLAUSE_DECL (nc) = OMP_CLAUSE_DECL (clauses);
+	      OMP_CLAUSE_REDUCTION_CODE (nc)
+		= OMP_CLAUSE_REDUCTION_CODE (clauses);
+	      OMP_CLAUSE_CHAIN (nc) = *not_loop_clauses;
+	      *not_loop_clauses = nc;
+	    }
+
 	  OMP_CLAUSE_CHAIN (clauses) = loop_clauses;
 	  loop_clauses = clauses;
 	  break;
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 48fa26a..0f2d871 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -14012,6 +14012,8 @@ static tree
 c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name,
 		    omp_clause_mask mask, tree *cclauses, bool *if_p)
 {
+  bool is_parallel = ((mask >> PRAGMA_OACC_CLAUSE_REDUCTION) & 1) == 1;
+
   strcat (p_name, " loop");
   mask |= OACC_LOOP_CLAUSE_MASK;
 
@@ -14020,7 +14022,7 @@ c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name,
 					    cclauses == NULL);
   if (cclauses)
     {
-      clauses = c_oacc_split_loop_clauses (clauses, cclauses);
+      clauses = c_oacc_split_loop_clauses (clauses, cclauses, is_parallel);
       if (*cclauses)
 	*cclauses = c_finish_omp_clauses (*cclauses, C_ORT_ACC);
       if (clauses)
@@ -14128,8 +14130,6 @@ c_parser_oacc_kernels_parallel (location_t loc, c_parser *parser,
       if (strcmp (p, "loop") == 0)
 	{
 	  c_parser_consume_token (parser);
-	  mask |= OACC_LOOP_CLAUSE_MASK;
-
 	  tree block = c_begin_omp_parallel ();
 	  tree clauses;
 	  c_parser_oacc_loop (loc, parser, p_name, mask, &clauses, if_p);
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index f4b2603..f43c962 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -35581,6 +35581,8 @@ static tree
 cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok, char *p_name,
 		     omp_clause_mask mask, tree *cclauses, bool *if_p)
 {
+  bool is_parallel = ((mask >> PRAGMA_OACC_CLAUSE_REDUCTION) & 1) == 1;
+
   strcat (p_name, " loop");
   mask |= OACC_LOOP_CLAUSE_MASK;
 
@@ -35589,7 +35591,7 @@ cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok, char *p_name,
 					     cclauses == NULL);
   if (cclauses)
     {
-      clauses = c_oacc_split_loop_clauses (clauses, cclauses);
+      clauses = c_oacc_split_loop_clauses (clauses, cclauses, is_parallel);
       if (*cclauses)
 	*cclauses = finish_omp_clauses (*cclauses, C_ORT_ACC);
       if (clauses)
@@ -35697,8 +35699,6 @@ cp_parser_oacc_kernels_parallel (cp_parser *parser, cp_token *pragma_tok,
       if (strcmp (p, "loop") == 0)
 	{
 	  cp_lexer_consume_token (parser->lexer);
-	  mask |= OACC_LOOP_CLAUSE_MASK;
-
 	  tree block = begin_omp_parallel ();
 	  tree clauses;
 	  cp_parser_oacc_loop (parser, pragma_tok, p_name, mask, &clauses,
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 765e08b..9945365 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -6703,7 +6703,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	      OMP_CLAUSE_DECL (c) = t;
 	    }
 	  if (TREE_CODE (t) == COMPONENT_REF
-	      && (ort & C_ORT_OMP_DECLARE_SIMD) == C_ORT_OMP
+	      && allow_fields
 	      && OMP_CLAUSE_CODE (c) != OMP_CLAUSE__CACHE_)
 	    {
 	      if (type_dependent_expression_p (t))
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 6a50a7b..2327b13 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -2948,6 +2948,7 @@ bool gfc_check_intrinsic_standard (const gfc_intrinsic_sym*, const char**,
 				      bool, locus);
 
 /* match.c -- FIXME */
+gfc_omp_namelist *gfc_copy_omp_namelist (gfc_omp_namelist *);
 void gfc_free_iterator (gfc_iterator *, int);
 void gfc_free_forall_iterator (gfc_forall_iterator *);
 void gfc_free_alloc_list (gfc_alloc *);
diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c
index 2490f85..30d571d 100644
--- a/gcc/fortran/match.c
+++ b/gcc/fortran/match.c
@@ -4744,6 +4744,28 @@ gfc_free_omp_namelist (gfc_omp_namelist *name)
     }
 }
 
+/* Duplicate an omp namelist.  */
+
+gfc_omp_namelist *
+gfc_copy_omp_namelist (gfc_omp_namelist *name)
+{
+  gfc_omp_namelist *nl = NULL, *t;
+
+  for (; name; name = name->next)
+    {
+      t = gfc_get_omp_namelist ();
+      t->sym = name->sym;
+      t->expr = gfc_copy_expr (name->expr);;
+      t->u = name->u;
+      t->udr = NULL;
+      t->where = name->where;
+      t->next = nl;
+      nl = t;
+    }
+
+  return nl;
+}
+
 
 /* Match a NAMELIST statement.  */
 
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index bbb5364..1a91901 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -3642,7 +3642,8 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
 
 static void
 gfc_filter_oacc_combined_clauses (gfc_omp_clauses **orig_clauses,
-				  gfc_omp_clauses **loop_clauses)
+				  gfc_omp_clauses **loop_clauses,
+				  enum tree_code construct_code)
 {
   if (*orig_clauses == NULL)
     {
@@ -3683,8 +3684,9 @@ gfc_filter_oacc_combined_clauses (gfc_omp_clauses **orig_clauses,
   (*loop_clauses)->tile_list = (*orig_clauses)->tile_list;
   (*orig_clauses)->tile_list = NULL;
   (*loop_clauses)->lists[OMP_LIST_REDUCTION]
-    = (*orig_clauses)->lists[OMP_LIST_REDUCTION];
-  (*orig_clauses)->lists[OMP_LIST_REDUCTION] = NULL;
+    = gfc_copy_omp_namelist ((*orig_clauses)->lists[OMP_LIST_REDUCTION]);
+  if (construct_code == OACC_KERNELS)
+    (*orig_clauses)->lists[OMP_LIST_REDUCTION] = NULL;
 #if 0
   (*loop_clauses)->lists[OMP_LIST_PRIVATE]
     = (*orig_clauses)->lists[OMP_LIST_PRIVATE];
@@ -3693,7 +3695,8 @@ gfc_filter_oacc_combined_clauses (gfc_omp_clauses **orig_clauses,
   (*loop_clauses)->device_types = (*orig_clauses)->device_types;
 
   gfc_filter_oacc_combined_clauses (&(*orig_clauses)->dtype_clauses,
-				    &(*loop_clauses)->dtype_clauses);
+				    &(*loop_clauses)->dtype_clauses,
+				    construct_code);
 }
 
 /* Combined OpenACC parallel loop and kernels loop. */
@@ -3723,7 +3726,8 @@ gfc_trans_oacc_combined_directive (gfc_code *code)
 
   gfc_start_block (&block);
 
-  gfc_filter_oacc_combined_clauses (&code->ext.omp_clauses, &loop_clauses);
+  gfc_filter_oacc_combined_clauses (&code->ext.omp_clauses, &loop_clauses,
+				    construct_code);
   oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses,
 					code->loc);
 
diff --git a/gcc/testsuite/c-c++-common/goacc/combined-reduction.c b/gcc/testsuite/c-c++-common/goacc/combined-reduction.c
new file mode 100644
index 0000000..ecf23f5
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/combined-reduction.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenacc -fdump-tree-gimple" } */
+
+#include <assert.h>
+
+int
+main ()
+{
+  int i, v1 = 0, n = 100;
+
+#pragma acc parallel loop reduction(+:v1)
+  for (i = 0; i < n; i++)
+    v1++;
+
+  assert (v1 == n);
+
+#pragma acc kernels loop reduction(+:v1)
+  for (i = 0; i < n; i++)
+    v1++;
+
+  assert (v1 == n);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "omp target oacc_parallel reduction.+:v1. map.tofrom:v1" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "acc loop reduction.+:v1. private.i." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_kernels map.force_tofrom:n .len: 4.. map.force_tofrom:v1 .len: 4.." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "acc loop reduction.+:v1. private.i." 1 "gimple" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/pr70688.c b/gcc/testsuite/c-c++-common/goacc/pr70688.c
new file mode 100644
index 0000000..37c3885
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/pr70688.c
@@ -0,0 +1,50 @@
+/* { dg-compile } */
+
+const int n = 100;
+
+int
+private_reduction ()
+{
+  int i, r;
+
+  #pragma acc parallel
+  #pragma acc loop private (r) reduction (+:r)
+  for (i = 0; i < 100; i++)
+    r += 10;
+
+  return r;
+}
+
+int
+parallel_reduction ()
+{
+  int sum = 0;
+  int dummy = 0;
+
+#pragma acc data copy (dummy)
+  {
+#pragma acc parallel num_gangs (10) copy (sum) reduction (+:sum) /* { dg-warning "region is gang partitioned" } */
+    {
+      int v = 5;
+      sum += 10 + v;
+    }
+  }
+
+  return sum;
+}
+
+int
+main ()
+{
+  int i, s = 0;
+
+#pragma acc parallel num_gangs (10) copy (s) reduction (+:s) /* { dg-warning "region is gang partitioned" } */
+  for (i = 0; i < n; i++)
+    s += i+1;
+
+#pragma acc parallel num_gangs (10) reduction (+:s) copy (s) /* { dg-warning "region is gang partitioned" } */
+  for (i = 0; i < n; i++)
+    s += i+1;
+
+  return 0;
+}
diff --git a/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 b/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
index 08d25ca..00b8822 100644
--- a/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
+++ b/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
@@ -165,5 +165,5 @@ end subroutine test
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" { xfail *-*-* } } }
+! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. reduction..:y." 2 "gimple" { xfail *-*-* } } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 b/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95
index 91a4b2c..d083c7b 100644
--- a/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95
@@ -1,3 +1,4 @@
+! { dg-do compile } 
 ! { dg-additional-options "-fdump-tree-gimple" }
 
 subroutine foo ()
@@ -14,5 +15,8 @@ subroutine foo ()
   !$acc end kernels loop
 end subroutine
 
-! { dg-final { scan-tree-dump-times "target oacc_parallel firstprivate.a." 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "target oacc_parallel reduction..:a. map.tofrom.a." 1 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop reduction..:a. private.p." 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "target oacc_kernels map.force_tofrom:a .len: 4.." 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "acc loop reduction..:a. private.k." 1 "gimple" } }
+
diff --git a/libgomp/testsuite/libgomp.oacc-c++/template-reduction.C b/libgomp/testsuite/libgomp.oacc-c++/template-reduction.C
index fb5924c..e2a0ca0 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/template-reduction.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/template-reduction.C
@@ -1,3 +1,7 @@
+// TODO: async_sum is currently failing on nvptx.
+
+// { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } }
+
 const int n = 100;
 
 // Check explicit template copy map
@@ -7,7 +11,7 @@ sum (T array[])
 {
    T s = 0;
 
-#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s, array[0:n])
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (array[0:n])
   for (int i = 0; i < n; i++)
     s += array[i];
 
@@ -25,7 +29,7 @@ sum ()
   for (int i = 0; i < n; i++)
     array[i] = i+1;
 
-#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s)
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s)
   for (int i = 0; i < n; i++)
     s += array[i];
 
@@ -43,7 +47,7 @@ async_sum (T array[])
    for (int i = 0; i < n; i++)
      array[i] = i+1;
 
-#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) copy (s) async wait (1)
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) async wait (1)
   for (int i = 0; i < n; i++)
     s += array[i];
 
@@ -59,7 +63,7 @@ async_sum (int c)
 {
    T s = 0;
 
-#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy(s) firstprivate (c) async wait (1)
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) firstprivate (c) async wait (1)
   for (int i = 0; i < n; i++)
     s += i+c;
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/combined-reduction.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/combined-reduction.c
new file mode 100644
index 0000000..b5ce4ed
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/combined-reduction.c
@@ -0,0 +1,23 @@
+/* Test a combined acc parallel loop reduction.  */
+
+/* { dg-do run } */
+
+#include <assert.h>
+
+int
+main ()
+{
+  int i, v1 = 0, v2 = 0, n = 100;
+
+#pragma acc parallel loop reduction(+:v1, v2)
+  for (i = 0; i < n; i++)
+    {
+      v1++;
+      v2++;
+    }
+
+  assert (v1 == n);
+  assert (v2 == n);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/combined-reduction.f90 b/libgomp/testsuite/libgomp.oacc-fortran/combined-reduction.f90
new file mode 100644
index 0000000..d3a61b5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/combined-reduction.f90
@@ -0,0 +1,19 @@
+! Test a combined acc parallel loop reduction.
+
+! { dg-do run }
+
+program test
+  implicit none
+  integer i, n, var
+
+  n = 100
+  var = 0
+
+  !$acc parallel loop reduction(+:var)
+  do i = 1, 100
+     var = var + 1
+  end do
+  !$acc end parallel loop
+
+  if (var .ne. n) call abort
+end program test

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]