This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, 2/2][nvptx, PR83589] Workaround for branch-around-nothing JIT bug
- From: Thomas Schwinge <thomas at codesourcery dot com>
- To: <gcc-patches at gcc dot gnu dot org>
- Cc: Tom de Vries <vries at gcc dot gnu dot org>
- Date: Thu, 17 May 2018 08:34:31 +0200
- Subject: Re: [PATCH, 2/2][nvptx, PR83589] Workaround for branch-around-nothing JIT bug
- References: <34fb1d00-dc5d-04f2-d601-ee6fe710ac3b@mentor.com> <20180124110305.GZ2063@tucnak> <4909fc79-df36-16b1-78d0-e9cd9da4080e@mentor.com>
Hi!
On Wed, 24 Jan 2018 14:56:28 +0100, Tom de Vries <Tom_deVries@mentor.com> wrote:
> On 01/24/2018 12:03 PM, Jakub Jelinek wrote:
> > On Wed, Jan 24, 2018 at 11:41:45AM +0100, Tom de Vries wrote:
> >> +/* Insert a dummy ptx insn when encountering a branch to a label with no ptx
> >> + insn inbetween the branch and the label. This works around a JIT bug
> >> + observed at driver version 384.111, at -O0 for sm_50. */
> [...] committed as attached.
I pushed this to openacc-gcc-7-branch, including its later bug fix
"[nvptx] Fix prevent_branch_around_nothing":
commit 4cad9fa6b7a85e01da260d0b9e20de30d53f1881
Author: Tom de Vries <tom@codesourcery.com>
Date: Wed Jan 24 13:52:12 2018 +0000
[nvptx, PR83589] Workaround for branch-around-nothing JIT bug
gcc/
PR target/83589
* config/nvptx/nvptx.c (WORKAROUND_PTXJIT_BUG_2): Define to 1.
(nvptx_pc_set, nvptx_condjump_label): New function. Copy from jump.c.
Add strict parameter.
(prevent_branch_around_nothing): Insert dummy insn between branch to
label and label with no ptx insn inbetween.
* config/nvptx/nvptx.md (define_insn "fake_nop"): New insn.
PR target/84954
* config/nvptx/nvptx.c (prevent_branch_around_nothing): Also update
seen_label if seen_label is already set.
libgomp/
PR target/83589
* testsuite/libgomp.oacc-c-c++-common/pr83589.c: New test.
(cherry picked from trunk r257016 and r258674)
---
gcc/ChangeLog.openacc | 16 ++++
gcc/config/nvptx/nvptx.c | 93 ++++++++++++++++++++++
gcc/config/nvptx/nvptx.md | 9 +++
libgomp/ChangeLog.openacc | 5 ++
.../testsuite/libgomp.oacc-c-c++-common/pr83589.c | 21 +++++
5 files changed, 144 insertions(+)
diff --git gcc/ChangeLog.openacc gcc/ChangeLog.openacc
index 172f1fc..4d13080 100644
--- gcc/ChangeLog.openacc
+++ gcc/ChangeLog.openacc
@@ -1,3 +1,19 @@
+2018-03-20 Tom de Vries <tom@codesourcery.com>
+
+ PR target/84954
+ * config/nvptx/nvptx.c (prevent_branch_around_nothing): Also update
+ seen_label if seen_label is already set.
+
+2018-01-24 Tom de Vries <tom@codesourcery.com>
+
+ PR target/83589
+ * config/nvptx/nvptx.c (WORKAROUND_PTXJIT_BUG_2): Define to 1.
+ (nvptx_pc_set, nvptx_condjump_label): New function. Copy from jump.c.
+ Add strict parameter.
+ (prevent_branch_around_nothing): Insert dummy insn between branch to
+ label and label with no ptx insn inbetween.
+ * config/nvptx/nvptx.md (define_insn "fake_nop"): New insn.
+
2018-05-09 Tom de Vries <tom@codesourcery.com>
backport from trunk:
diff --git gcc/config/nvptx/nvptx.c gcc/config/nvptx/nvptx.c
index d659ab4..f636d8d 100644
--- gcc/config/nvptx/nvptx.c
+++ gcc/config/nvptx/nvptx.c
@@ -77,6 +77,7 @@
#include "target-def.h"
#define WORKAROUND_PTXJIT_BUG 1
+#define WORKAROUND_PTXJIT_BUG_2 1
#define WORKAROUND_PTXJIT_BUG_3 1
/* Define dimension sizes for known hardware. */
@@ -4636,6 +4637,94 @@ populate_offload_attrs (offload_attrs *oa)
oa->max_workers = oa->num_workers;
}
+#if WORKAROUND_PTXJIT_BUG_2
+/* Variant of pc_set that only requires JUMP_P (INSN) if STRICT. This variant
+ is needed in the nvptx target because the branches generated for
+ parititioning are NONJUMP_INSN_P, not JUMP_P. */
+
+static rtx
+nvptx_pc_set (const rtx_insn *insn, bool strict = true)
+{
+ rtx pat;
+ if ((strict && !JUMP_P (insn))
+ || (!strict && !INSN_P (insn)))
+ return NULL_RTX;
+ pat = PATTERN (insn);
+
+ /* The set is allowed to appear either as the insn pattern or
+ the first set in a PARALLEL. */
+ if (GET_CODE (pat) == PARALLEL)
+ pat = XVECEXP (pat, 0, 0);
+ if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == PC)
+ return pat;
+
+ return NULL_RTX;
+}
+
+/* Variant of condjump_label that only requires JUMP_P (INSN) if STRICT. */
+
+static rtx
+nvptx_condjump_label (const rtx_insn *insn, bool strict = true)
+{
+ rtx x = nvptx_pc_set (insn, strict);
+
+ if (!x)
+ return NULL_RTX;
+ x = SET_SRC (x);
+ if (GET_CODE (x) == LABEL_REF)
+ return x;
+ if (GET_CODE (x) != IF_THEN_ELSE)
+ return NULL_RTX;
+ if (XEXP (x, 2) == pc_rtx && GET_CODE (XEXP (x, 1)) == LABEL_REF)
+ return XEXP (x, 1);
+ if (XEXP (x, 1) == pc_rtx && GET_CODE (XEXP (x, 2)) == LABEL_REF)
+ return XEXP (x, 2);
+ return NULL_RTX;
+}
+
+/* Insert a dummy ptx insn when encountering a branch to a label with no ptx
+ insn inbetween the branch and the label. This works around a JIT bug
+ observed at driver version 384.111, at -O0 for sm_50. */
+
+static void
+prevent_branch_around_nothing (void)
+{
+ rtx_insn *seen_label = NULL;
+ for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (INSN_P (insn) && condjump_p (insn))
+ {
+ seen_label = label_ref_label (nvptx_condjump_label (insn, false));
+ continue;
+ }
+
+ if (seen_label == NULL)
+ continue;
+
+ if (NOTE_P (insn) || DEBUG_INSN_P (insn))
+ continue;
+
+ if (INSN_P (insn))
+ switch (recog_memoized (insn))
+ {
+ case CODE_FOR_nvptx_fork:
+ case CODE_FOR_nvptx_forked:
+ case CODE_FOR_nvptx_joining:
+ case CODE_FOR_nvptx_join:
+ continue;
+ default:
+ seen_label = NULL;
+ continue;
+ }
+
+ if (LABEL_P (insn) && insn == seen_label)
+ emit_insn_before (gen_fake_nop (), insn);
+
+ seen_label = NULL;
+ }
+ }
+#endif
+
#ifdef WORKAROUND_PTXJIT_BUG_3
/* Insert two membar.cta insns inbetween two subsequent bar.sync insns. This
works around a hang observed at driver version 390.48 for sm_50. */
@@ -4754,6 +4843,10 @@ nvptx_reorg (void)
if (TARGET_UNIFORM_SIMT)
nvptx_reorg_uniform_simt ();
+#if WORKAROUND_PTXJIT_BUG_2
+ prevent_branch_around_nothing ();
+#endif
+
#ifdef WORKAROUND_PTXJIT_BUG_3
workaround_barsyncs ();
#endif
diff --git gcc/config/nvptx/nvptx.md gcc/config/nvptx/nvptx.md
index ab63cf8..9e3001c 100644
--- gcc/config/nvptx/nvptx.md
+++ gcc/config/nvptx/nvptx.md
@@ -981,6 +981,15 @@
""
"exit;")
+(define_insn "fake_nop"
+ [(const_int 2)]
+ ""
+ "{
+ .reg .u32 %%nop_src;
+ .reg .u32 %%nop_dst;
+ mov.u32 %%nop_dst, %%nop_src;
+ }")
+
(define_insn "return"
[(return)]
""
diff --git libgomp/ChangeLog.openacc libgomp/ChangeLog.openacc
index d1cc107..add3b24 100644
--- libgomp/ChangeLog.openacc
+++ libgomp/ChangeLog.openacc
@@ -1,3 +1,8 @@
+2018-01-24 Tom de Vries <tom@codesourcery.com>
+
+ PR target/83589
+ * testsuite/libgomp.oacc-c-c++-common/pr83589.c: New test.
+
2018-05-09 Cesar Philippidis <cesar@codesourcery.com>
* libgomp.oacc-fortran/deviceptr-1.f90: Remove xfail for -O2 and -O3.
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/pr83589.c libgomp/testsuite/libgomp.oacc-c-c++-common/pr83589.c
new file mode 100644
index 0000000..a6ed5cf
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/pr83589.c
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var GOMP_NVPTX_JIT "-O0" } */
+
+#define n 32
+
+int
+main (void)
+{
+ int arr_a[n];
+
+#pragma acc parallel copyout(arr_a) num_gangs(1) num_workers(1) vector_length(32)
+ {
+ #pragma acc loop vector
+ for (int m = 0; m < 32; m++)
+ ;
+
+ #pragma acc loop vector
+ for (int m = 0; m < 32; m++)
+ arr_a[m] = 0;
+ }
+}
Grüße
Thomas