This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] reduction bug fix


There is a reduction bug exposed in the following parallel block.

#pragma acc parallel copy(b[0:3][0:3]) copy(l)
    {
#pragma acc loop collapse(2) reduction(+:l)
	for (int i = 0; i < 2; i++)
	  for (int j = 0; j < 2; j++)
	    if (b[i][j] != 16)
		  l += 1;
    }

Because i and j are local, the collapsed loop is lowered into something
as follows

#pragma acc parallel ...
{
  int i
  {
     int j
     {
        #pragma acc loop ...

This is a problem because initialize_reduction_data originally expected
a GIMPLE_BIND at the very beginning of a parallel block. I also made the
assumption that collapse would create a single bind.

Looking at this some more, I may need revise initialize_reduction_data
to scan for multiple acc loops within a parallel block. E.g.,

#pragma acc parallel
{
#pragma acc loop reduction (+:foo)
  {
  }
...
#pragma acc loop reduction (-:bar)
  {
  }
}

I'll address this issue in a follow up patch.

This patch also includes a runtime test case. I won't apply it to
gomp-4_0-branch just yet. But I wanted to demonstrate a test case
nonetheless. Also, note that part of this patch also changes a comment.
I found some typos in the original comment, so I took the opportunity to
fix them, I hope.

Is this OK for gomp-4_0-branch?

Thanks,
Cesar
2014-10-02  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* omp-low.c (lower_reduction_clauses): Clarify comment.
	(process_reduction_data): Scan for nonempty bind statements at
	the beginning of parallel blocks.

	libgomp/
	* testsuite/libgomp.oacc-c/collapse-4.c: New test.


diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 3bb6a24..5c452c6 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -4434,10 +4434,10 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
 	  else
 	    {
 	  /* The atomic add at the end of the sum creates unnecessary
-	     write contention on accelerators.  To work around that,
-	     create an array or vector_length and assign an element to
-	     each thread.  Later, in lower_omp_for (for openacc), the
-	     values of array will be combined.  */
+	     write contention on accelerators.  To work around this,
+	     create an array to store the partial reductions. Later, in
+	     lower_omp_for (for openacc), the values of array will be
+	     combined.  */
 
 	  tree t = NULL_TREE, array, nthreads;
 	  tree type = get_base_type (var);
@@ -10140,11 +10140,20 @@ process_reduction_data (gimple_seq *body, gimple_seq *in_stmt_seqp,
   gimple stmt;
 
   /* A collapse clause may have inserted a new bind block.  */
-  stmt = gimple_seq_first (*body);
-  if (stmt && gimple_code (stmt) == GIMPLE_BIND)
+  gsi = gsi_start (*body);
+  while (!gsi_end_p (gsi))
     {
-      inner = gimple_bind_body (gimple_seq_first (*body));
-      body = &inner;
+      stmt = gsi_stmt (gsi);
+      if (gimple_code (stmt) == GIMPLE_BIND)
+	{
+	  inner = gimple_bind_body (stmt);
+	  body = &inner;
+	  gsi = gsi_start (*body);
+	}
+      else if (gimple_code (stmt) == GIMPLE_OMP_FOR)
+	break;
+      else
+	gsi_next (&gsi);
     }
 
   for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
diff --git a/libgomp/testsuite/libgomp.oacc-c/collapse-4.c b/libgomp/testsuite/libgomp.oacc-c/collapse-4.c
new file mode 100644
index 0000000..b08115a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/collapse-4.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -std=c99" } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int
+main (void)
+{
+  int i2, l = 0, r = 0;
+  int a[3][3][3];
+  int b[3][3];
+
+printf("&a %p\n", &a[0][0][0]);
+printf("&i2 %p\n", &i2);
+printf("&l %p\n", &l);
+printf("&r %p\n", &r);
+
+  memset (a, '\0', sizeof (a));
+  memset (b, '\0', sizeof (b));
+
+
+#pragma acc parallel copy(b[0:3][0:3]) copy(l)
+    {
+#pragma acc loop collapse(2) reduction(+:l)
+	for (int i = 0; i < 2; i++)
+	  for (int j = 0; j < 2; j++)
+	    if (b[i][j] != 16)
+		  l += 1;
+    }
+
+  return 0;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]