[Bug target/84952] [nvptx] bar.sync generated in divergent code
vries at gcc dot gnu.org
gcc-bugzilla@gcc.gnu.org
Mon Mar 19 15:58:00 GMT 2018
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84952
--- Comment #1 from Tom de Vries <vries at gcc dot gnu.org> ---
For stage1, I have the following patch set:
...
git log --pretty=%s --reverse HEAD^^^..HEAD | cat -n
1 Fix branch-around-nothing
2 Fix bar.sync position
3 Verify bar.sync position
...
where 1 is a fix for PR84025, 2 is a fix for this PR and 3 is a verification
for this PR.
Using this patch set, we generate (edited for readability):
...
// BEGIN PREAMBLE
.version 3.1
.target sm_30
.address_size 64
// END PREAMBLE
// BEGIN FUNCTION DECL: main$_omp_fn$0
.entry main$_omp_fn$0 (.param .u64 %in_ar0);
//:FUNC_MAP "main$_omp_fn$0", 0x1, 0x20, 0x20
// BEGIN VAR DEF: __worker_bcast
.shared .align 8 .u8 __worker_bcast[8];
// BEGIN FUNCTION DEF: main$_omp_fn$0
.entry main$_omp_fn$0 (.param .u64 %in_ar0)
{
.reg .u64 %ar0;
ld.param.u64 %ar0,[%in_ar0];
.reg .u32 %r24;
.reg .u64 %r25;
.reg .pred %r26;
.reg .u64 %r27;
.reg .u64 %r28;
.reg .u64 %r29;
.reg .u64 %r30;
.reg .u64 %r31;
.reg .u64 %r32;
.reg .pred %r33;
.reg .pred %r34;
{
.reg .u32 %y;
mov.u32 %y,%tid.y;
setp.ne.u32 %r34,%y,0;
}
{
.reg .u32 %x;
mov.u32 %x,%tid.x;
setp.ne.u32 %r33,%x,0;
}
@ %r34 bra.uni $L6;
@ %r33 bra $L7;
mov.u64 %r25,%ar0;
// fork 2;
cvta.shared.u64 %r32,__worker_bcast;
st.u64 [%r32],%r25;
$L7:
$L6:
bar.sync 0;
// forked 2;
@ %r33 bra $L5;
cvta.shared.u64 %r31,__worker_bcast;
ld.u64 %r25,[%r31];
mov.u32 %r24,%tid.y;
setp.le.s32 %r26,%r24,9;
@ %r26 bra $L2;
bra $L3;
$L2:
ld.u64 %r27,[%r25];
cvt.s64.s32 %r28,%r24;
shl.b64 %r29,%r28,2;
add.u64 %r30,%r27,%r29;
st.u32 [%r30],%r24;
$L3:
// joining 2;
$L5:
bar.sync 1;
// join 2;
ret;
}
...
More information about the Gcc-bugs
mailing list