[Bug target/84954] New: [nvptx] prevent_branch_around_nothing doesn't trigger often enough

vries at gcc dot gnu.org gcc-bugzilla@gcc.gnu.org
Mon Mar 19 16:50:00 GMT 2018


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84954

            Bug ID: 84954
           Summary: [nvptx] prevent_branch_around_nothing doesn't trigger
                    often enough
           Product: gcc
           Version: 8.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vries at gcc dot gnu.org
  Target Milestone: ---

[ Encountered at PR84952 - [nvptx] bar.sync generated in divergent code ]

Consider testcase workers.c:
...
int
main (void)
{
  int a[10];
#pragma acc parallel loop worker
  for (int i = 0; i < 10; i++)
    a[i] = i;

  return 0;
}
...

At -O2, we generate (edited for readability):
...
// BEGIN PREAMBLE
.version 3.1
.target sm_30
.address_size 64
// END PREAMBLE

// BEGIN FUNCTION DECL: main$_omp_fn$0
.entry main$_omp_fn$0 (.param .u64 %in_ar0);

//:FUNC_MAP "main$_omp_fn$0", 0x1, 0x20, 0x20

// BEGIN VAR DEF: __worker_bcast
.shared .align 8 .u8 __worker_bcast[8];

// BEGIN FUNCTION DEF: main$_omp_fn$0
.entry main$_omp_fn$0 (.param .u64 %in_ar0)
{
  .reg .u64 %ar0;
  ld.param.u64 %ar0,[%in_ar0];
  .reg .u32 %r24;
  .reg .u64 %r25;
  .reg .pred %r26;
  .reg .u64 %r27;
  .reg .u64 %r28;
  .reg .u64 %r29;
  .reg .u64 %r30;
  .reg .u64 %r31;
  .reg .u64 %r32;
  .reg .pred %r33;
  .reg .pred %r34;

  {
    .reg .u32 %y;
    mov.u32 %y,%tid.y;
    setp.ne.u32 %r34,%y,0;
  }

  {
    .reg .u32 %x;
    mov.u32 %x,%tid.x;
    setp.ne.u32 %r33,%x,0;
  }

  @ %r34 bra.uni $L6;
  @ %r33 bra $L7;
  mov.u64 %r25,%ar0;
  // fork 2;
  cvta.shared.u64 %r32,__worker_bcast;
  st.u64 [%r32],%r25;
 $L7:
 $L6:

  @ %r33 bra $L5;
  // forked 2;
  bar.sync 0;
  cvta.shared.u64 %r31,__worker_bcast;
  ld.u64 %r25,[%r31];
  mov.u32 %r24,%tid.y;
  setp.le.s32 %r26,%r24,9;
  @ %r26 bra $L2;
  bra $L3;
 $L2:
  ld.u64 %r27,[%r25];
  cvt.s64.s32 %r28,%r24;
  shl.b64 %r29,%r28,2;
  add.u64 %r30,%r27,%r29;
  st.u32 [%r30],%r24;
 $L3:
  bar.sync 1;
  // joining 2;
 $L5:

  @ %r34 bra.uni $L8;
  @ %r33 bra $L9;
  // join 2;
 $L9:
 $L8:

  ret;
}
...

prevent_branch_around_nothing should be generating a nop next to the "join 2".


More information about the Gcc-bugs mailing list