[Bug c++/92672] New: OpenMP shared clause not respected without pointer to the variable
me at cimba dot li
gcc-bugzilla@gcc.gnu.org
Tue Nov 26 11:41:00 GMT 2019
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92672
Bug ID: 92672
Summary: OpenMP shared clause not respected without pointer to
the variable
Product: gcc
Version: 9.2.1
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c++
Assignee: unassigned at gcc dot gnu.org
Reporter: me at cimba dot li
Target Milestone: ---
It seems that if the address of a stack variable is never taken, it is treated
by child OpenMP tasks with firstprivate semantics, even though it is declared
as shared.
The following example compiles to different code (that behaves differently) if
"&i;" is removed:
#include <iostream>
#include <omp.h>
int main()
{
omp_lock_t lock;
omp_init_lock(&lock);
int i = 1;
&i;
#pragma omp parallel shared(i)
#pragma omp single
{
omp_set_lock(&lock);
#pragma omp task shared(i)
{
// enter task, then suspend until i = 2
omp_set_lock(&lock);
std::cout << i;
omp_unset_lock(&lock);
}
i = 2;
// unset lock after i is set to 2 and child task is created
// child task is possibly started and suspended at this point
omp_unset_lock(&lock);
}
omp_destroy_lock(&lock);
std::cout << std::endl;
return 0;
}
- With "&i", the code prints 2. The task’s verbose assembly is as follows:
.LFE2010:
.size main._omp_fn.0, .-main._omp_fn.0
.type main._omp_fn.1, @function
main._omp_fn.1:
.LFB2011:
.cfi_startproc
.cfi_personality 0x3,__gxx_personality_v0
.cfi_lsda 0x3,.LLSDA2011
pushq %rbp #
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp #,
.cfi_def_cfa_register 6
subq $16, %rsp #,
movq %rdi, -8(%rbp) # .omp_data_i, .omp_data_i
# test.cc:18: omp_set_lock(&lock);
movq -8(%rbp), %rax # .omp_data_i, tmp86
movq (%rax), %rax # .omp_data_i_2(D)->lock, _3
movq %rax, %rdi # _3,
call omp_set_lock #
# test.cc:19: std::cout << i;
movq -8(%rbp), %rax # .omp_data_i, tmp87
movq 8(%rax), %rax # .omp_data_i_2(D)->i, _6
movl (%rax), %eax # *_6, i.0_7
# test.cc:19: std::cout << i;
movl %eax, %esi # i.0_7,
movl $_ZSt4cout, %edi #,
call _ZNSolsEi #
# test.cc:20: omp_unset_lock(&lock);
movq -8(%rbp), %rax # .omp_data_i, tmp88
movq (%rax), %rax # .omp_data_i_2(D)->lock, _9
movq %rax, %rdi # _9,
call omp_unset_lock #
# test.cc:15: #pragma omp task shared(i)
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
- Without "&i", the code prints 1. The task’s verbose assembly is as follows:
.LFE2010:
.size main._omp_fn.0, .-main._omp_fn.0
.type main._omp_fn.1, @function
main._omp_fn.1:
.LFB2011:
.cfi_startproc
.cfi_personality 0x3,__gxx_personality_v0
.cfi_lsda 0x3,.LLSDA2011
pushq %rbp #
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp #,
.cfi_def_cfa_register 6
subq $32, %rsp #,
movq %rdi, -24(%rbp) # .omp_data_i, .omp_data_i
# test.cc:15: #pragma omp task shared(i)
movq -24(%rbp), %rax # .omp_data_i, tmp84
movl 8(%rax), %eax # .omp_data_i_2(D)->i, tmp85
movl %eax, -4(%rbp) # tmp85, i
# test.cc:18: omp_set_lock(&lock);
movq -24(%rbp), %rax # .omp_data_i, tmp86
movq (%rax), %rax # .omp_data_i_2(D)->lock, _4
movq %rax, %rdi # _4,
call omp_set_lock #
# test.cc:19: std::cout << i;
movl -4(%rbp), %eax # i, tmp87
movl %eax, %esi # tmp87,
movl $_ZSt4cout, %edi #,
call _ZNSolsEi #
# test.cc:20: omp_unset_lock(&lock);
movq -24(%rbp), %rax # .omp_data_i, tmp88
movq (%rax), %rax # .omp_data_i_2(D)->lock, _8
movq %rax, %rdi # _8,
call omp_unset_lock #
# test.cc:15: #pragma omp task shared(i)
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
Basically the task input data, a structure called .omp_data_i, contains i and
the lock (sorry if the assembly is not the best way to look at this).
- With "&i", i is a pointer and is dereferenced after omp_set_lock is called
- Without "&i", i is passed as a value in this structure and is pushed on the
stack before omp_set_lock. Therefore, flushes implied by the lock functions
never affect the access to i and the task sees the outdated value when
acquiring the lock.
I have also tried:
- setting i as volatile, which has no impact
- adding flushes:
- both as OpenMP pragmas and volatile __asm__("mfence":::"memory"); on x86
- both after setting i in the generating task, and before accessing i in
the child task
More information about the Gcc-bugs
mailing list