This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[nvptx, PR85204] Fix neutering of bb with only cond jump


Hi,

When compiling the test-case in the patch, the following ptx code is generated:
...
$L4:
  @ %r91 bra.uni $L24;
  selp.u32 %r95,1,0,%r80;
  st.shared.u32 [__worker_bcast],%r95;
 $L25:
 $L24:
  @ %r92 bra $L25;
...

There's an eternal loop starting at the last insn, and unsurprisingly the test-case hangs.

The last insn is a vector neutering branch, which should have been inserted after the worker neutering branch (the first insn).

In other words, we want:
...
 $L4:
   @ %r91 bra.uni $L24;
+  @ %r92 bra $L25;
   selp.u32 %r95,1,0,%r80;
   st.shared.u32 [__worker_bcast],%r95;
  $L25:
  $L24:
-  @ %r92 bra $L25;
...

This minimal stage4 patch fixes this problem. [ I filed a PR85223 "[nvptx] nvptx_single needs rewrite" for a stage1 rewrite of nvptx_single. ]

Build x86_64 with nvptx accelerator, and tested libgomp.

Committed to stage4 trunk.

Thanks,
- Tom
[nvptx] Fix neutering of bb with only cond jump

2018-04-05  Tom de Vries  <tom@codesourcery.com>

	PR target/85204
	* config/nvptx/nvptx.c (nvptx_single): Fix neutering of bb with only
	cond jump.

	* testsuite/libgomp.oacc-c-c++-common/broadcast-1.c: New test.

---
 gcc/config/nvptx/nvptx.c                           |  6 ++-
 .../libgomp.oacc-c-c++-common/broadcast-1.c        | 49 ++++++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index b2b150f..a9a3053 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -4048,6 +4048,7 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
   /* Insert the vector test inside the worker test.  */
   unsigned mode;
   rtx_insn *before = tail;
+  rtx_insn *neuter_start = NULL;
   for (mode = GOMP_DIM_WORKER; mode <= GOMP_DIM_VECTOR; mode++)
     if (GOMP_DIM_MASK (mode) & skip_mask)
       {
@@ -4065,7 +4066,10 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
 	  br = gen_br_true (pred, label);
 	else
 	  br = gen_br_true_uni (pred, label);
-	emit_insn_before (br, head);
+	if (neuter_start)
+	  neuter_start = emit_insn_after (br, neuter_start);
+	else
+	  neuter_start = emit_insn_before (br, head);
 
 	LABEL_NUSES (label)++;
 	if (tail_branch)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-1.c
new file mode 100644
index 0000000..ca0d37b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-1.c
@@ -0,0 +1,49 @@
+/* Ensure that worker-vector state conditional expressions are
+   properly handled by the nvptx backend.  */
+
+#include <assert.h>
+#include <math.h>
+
+
+#define N 1024
+
+int A[N][N] ;
+
+void test(int x)
+{
+#pragma acc parallel  num_gangs(16) num_workers(4) vector_length(32) copyout(A)
+  {
+#pragma acc loop gang
+    for(int j=0;j<N;j++)
+      {
+	if (x==1)
+	  {
+#pragma acc loop worker vector
+	    for(int i=0;i<N;i++)
+	      A[i][j] = 1;
+	  }
+	else
+	  {
+#pragma acc loop worker vector
+	    for(int i=0;i<N;i++)
+	      A[i][j] = -1;
+	  }
+      }
+  }
+}
+
+
+int main(void)
+{
+  test (0);
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      assert (A[i][j] == -1);
+
+  test (1);
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      assert (A[i][j] == 1);
+
+  return 0;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]