[gomp] Fix COPYIN when copy is done by reference
Jakub Jelinek
jakub@redhat.com
Thu Oct 27 16:33:00 GMT 2005
Hi!
As shown on attached copyin-2.[cC] and threadprivate3.f90 testcases,
if COPYIN is done by reference, i.e. __omp_fn.* argument contains
a pointer into master thread's __thread variable, we need to insert
a barrier at the end of rec input, otherwise the master thread could
change its thread local var before the other threads get a chance to
copy it over.
Ok for gomp?
2005-10-27 Jakub Jelinek <jakub@redhat.com>
* omp-low.c (expand_rec_input_clauses): If any COPYIN copying is done
by reference, emit GOMP_barrier () call at the end of the COPYIN
sequence.
libgomp/
* testsuite/libgomp.dg/copyin-2.c: New test.
* testsuite/libgomp.c++/copyin-2.C: New test.
* testsuite/libgomp.fortran/threadprivate3.f90: New test.
* testsuite/libgomp.fortran/threadprivate2.f90: New test.
* testsuite/libgomp.fortran/sharing2.f90: New test.
--- gcc/omp-low.c.jj 2005-10-24 08:56:41.000000000 +0200
+++ gcc/omp-low.c 2005-10-27 18:04:11.000000000 +0200
@@ -1088,6 +1088,7 @@ expand_rec_input_clauses (tree clauses,
{
tree_stmt_iterator diter;
tree c, dtor, copyin_seq, x;
+ bool copyin_by_ref = false;
int pass;
/* Resolve private references for Fortran. Note that C++ disallows
@@ -1193,6 +1194,7 @@ expand_rec_input_clauses (tree clauses,
x = build_receiver_ref (var, by_ref, ctx);
x = lang_hooks.decls.omp_clause_assign_op (new_var, x);
append_to_statement_list (x, ©in_seq);
+ copyin_by_ref |= by_ref;
break;
case OMP_CLAUSE_REDUCTION:
@@ -1224,6 +1226,16 @@ expand_rec_input_clauses (tree clauses,
x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
gimplify_and_add (x, ilist);
}
+
+ /* If any copyin variable is passed by reference, we must ensure the
+ master thread doesn't modify it before it is copied over in all
+ threads. */
+ if (copyin_by_ref)
+ {
+ x = built_in_decls[BUILT_IN_GOMP_BARRIER];
+ x = build_function_call_expr (x, NULL);
+ gimplify_and_add (x, ilist);
+ }
}
/* Generate code to implement the LASTPRIVATE clauses. This is used for
--- libgomp/testsuite/libgomp.dg/copyin-2.c.jj 2005-10-27 17:14:52.000000000 +0200
+++ libgomp/testsuite/libgomp.dg/copyin-2.c 2005-10-27 17:21:56.000000000 +0200
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+struct { int t; char buf[64]; } thr = { 32, "" };
+#pragma omp threadprivate (thr)
+
+int
+main (void)
+{
+ int l = 0;
+
+ omp_set_dynamic (0);
+ omp_set_num_threads (6);
+
+#pragma omp parallel copyin (thr) reduction (||:l)
+ {
+ l = thr.t != 32;
+ thr.t = omp_get_thread_num () + 11;
+ }
+
+ if (l || thr.t != 11)
+ abort ();
+
+#pragma omp parallel reduction (||:l)
+ l = thr.t != omp_get_thread_num () + 11;
+
+ if (l)
+ abort ();
+ return 0;
+}
--- libgomp/testsuite/libgomp.c++/copyin-2.C.jj 2005-10-27 17:14:52.000000000 +0200
+++ libgomp/testsuite/libgomp.c++/copyin-2.C 2005-10-27 18:11:54.000000000 +0200
@@ -0,0 +1,33 @@
+// { dg-do run }
+
+#include <omp.h>
+
+extern "C" void abort (void);
+
+struct S { int t; char buf[64]; } thr = { 32, "" };
+#pragma omp threadprivate (thr)
+
+int
+main (void)
+{
+ int l = 0;
+
+ omp_set_dynamic (0);
+ omp_set_num_threads (6);
+
+#pragma omp parallel copyin (thr) reduction (||:l)
+ {
+ l = thr.t != 32;
+ thr.t = omp_get_thread_num () + 11;
+ }
+
+ if (l || thr.t != 11)
+ abort ();
+
+#pragma omp parallel reduction (||:l)
+ l = thr.t != omp_get_thread_num () + 11;
+
+ if (l)
+ abort ();
+ return 0;
+}
--- libgomp/testsuite/libgomp.fortran/threadprivate2.f90.jj 2005-10-27 14:26:11.000000000 +0200
+++ libgomp/testsuite/libgomp.fortran/threadprivate2.f90 2005-10-27 15:59:24.000000000 +0200
@@ -0,0 +1,93 @@
+! { dg-do run }
+
+module threadprivate2
+ integer, dimension(:,:), allocatable :: foo
+!$omp threadprivate (foo)
+end module threadprivate2
+
+ use omp_lib
+ use threadprivate2
+
+ integer, dimension(:), pointer :: bar1
+ integer, dimension(2), target :: bar2
+ common /thrc/ bar1, bar2
+!$omp threadprivate (/thrc/)
+
+ integer, dimension(:), pointer, save :: bar3 => NULL()
+!$omp threadprivate (bar3)
+
+ logical :: l
+ type tt
+ integer :: a
+ integer :: b = 32
+ end type tt
+ type (tt), save :: baz
+!$omp threadprivate (baz)
+
+ l = .false.
+ call omp_set_dynamic (.false.)
+ call omp_set_num_threads (4)
+
+!$omp parallel num_threads (4) reduction (.or.:l)
+ l = allocated (foo)
+ allocate (foo (6 + omp_get_thread_num (), 3))
+ l = l.or..not.allocated (foo)
+ l = l.or.size (foo).ne.(18 + 3 * omp_get_thread_num ())
+ foo = omp_get_thread_num () + 1
+
+ bar2 = omp_get_thread_num ()
+ l = l.or.associated (bar3)
+ bar1 => bar2
+ l = l.or..not.associated (bar1)
+ l = l.or..not.associated (bar1, bar2)
+ l = l.or.any (bar1.ne.omp_get_thread_num ())
+ nullify (bar1)
+ l = l.or.associated (bar1)
+ allocate (bar3 (4))
+ l = l.or..not.associated (bar3)
+ bar3 = omp_get_thread_num () - 2
+
+ l = l.or.(baz%b.ne.32)
+ baz%a = omp_get_thread_num () * 2
+ baz%b = omp_get_thread_num () * 2 + 1
+!$omp end parallel
+
+ if (l) call abort
+ if (.not.allocated (foo)) call abort
+ if (size (foo).ne.18) call abort
+ if (any (foo.ne.1)) call abort
+
+ if (associated (bar1)) call abort
+ if (.not.associated (bar3)) call abort
+ if (any (bar3 .ne. -2)) call abort
+ deallocate (bar3)
+ if (associated (bar3)) call abort
+
+!$omp parallel num_threads (4) reduction (.or.:l)
+ l = l.or..not.allocated (foo)
+ l = l.or.size (foo).ne.(18 + 3 * omp_get_thread_num ())
+ l = l.or.any (foo.ne.(omp_get_thread_num () + 1))
+ if (omp_get_thread_num () .ne. 0) then
+ deallocate (foo)
+ l = l.or.allocated (foo)
+ end if
+
+ l = l.or.associated (bar1)
+ if (omp_get_thread_num () .ne. 0) then
+ l = l.or..not.associated (bar3)
+ l = l.or.any (bar3 .ne. omp_get_thread_num () - 2)
+ deallocate (bar3)
+ end if
+ l = l.or.associated (bar3)
+
+ l = l.or.(baz%a.ne.(omp_get_thread_num () * 2))
+ l = l.or.(baz%b.ne.(omp_get_thread_num () * 2 + 1))
+!$omp end parallel
+
+ if (l) call abort
+ if (.not.allocated (foo)) call abort
+ if (size (foo).ne.18) call abort
+ if (any (foo.ne.1)) call abort
+ deallocate (foo)
+ if (allocated (foo)) call abort
+end
--- libgomp/testsuite/libgomp.fortran/threadprivate3.f90.jj 2005-10-27 15:58:20.000000000 +0200
+++ libgomp/testsuite/libgomp.fortran/threadprivate3.f90 2005-10-27 17:18:07.000000000 +0200
@@ -0,0 +1,105 @@
+! { dg-do run }
+
+module threadprivate3
+ integer, dimension(:,:), pointer :: foo => NULL()
+!$omp threadprivate (foo)
+end module threadprivate3
+
+ use omp_lib
+ use threadprivate3
+
+ integer, dimension(:), pointer :: bar1
+ integer, dimension(2), target :: bar2, var
+ common /thrc/ bar1, bar2
+!$omp threadprivate (/thrc/)
+
+ integer, dimension(:), pointer, save :: bar3 => NULL()
+!$omp threadprivate (bar3)
+
+ logical :: l
+ type tt
+ integer :: a
+ integer :: b = 32
+ end type tt
+ type (tt), save :: baz
+!$omp threadprivate (baz)
+
+ l = .false.
+ call omp_set_dynamic (.false.)
+ call omp_set_num_threads (4)
+ var = 6
+
+!$omp parallel num_threads (4) reduction (.or.:l)
+ bar2 = omp_get_thread_num ()
+ l = associated (bar3)
+ bar1 => bar2
+ l = l.or..not.associated (bar1)
+ l = l.or..not.associated (bar1, bar2)
+ l = l.or.any (bar1.ne.omp_get_thread_num ())
+ nullify (bar1)
+ l = l.or.associated (bar1)
+ allocate (bar3 (4))
+ l = l.or..not.associated (bar3)
+ bar3 = omp_get_thread_num () - 2
+ if (omp_get_thread_num () .ne. 0) then
+ deallocate (bar3)
+ if (associated (bar3)) call abort
+ else
+ bar1 => var
+ end if
+ bar2 = omp_get_thread_num () * 6 + 130
+
+ l = l.or.(baz%b.ne.32)
+ baz%a = omp_get_thread_num () * 2
+ baz%b = omp_get_thread_num () * 2 + 1
+!$omp end parallel
+
+ if (l) call abort
+ if (.not.associated (bar1)) call abort
+ if (any (bar1.ne.6)) call abort
+ if (.not.associated (bar3)) call abort
+ if (any (bar3 .ne. -2)) call abort
+ deallocate (bar3)
+ if (associated (bar3)) call abort
+
+ allocate (bar3 (10))
+ bar3 = 17
+
+!$omp parallel copyin (bar1, bar2, bar3, baz) num_threads (4) &
+!$omp& reduction (.or.:l)
+ l = l.or..not.associated (bar1)
+ l = l.or.any (bar1.ne.6)
+ l = l.or.any (bar2.ne.130)
+ l = l.or..not.associated (bar3)
+ l = l.or.size (bar3).ne.10
+ l = l.or.any (bar3.ne.17)
+ allocate (bar1 (4))
+ bar1 = omp_get_thread_num ()
+ bar2 = omp_get_thread_num () + 8
+
+ l = l.or.(baz%a.ne.0)
+ l = l.or.(baz%b.ne.1)
+ baz%a = omp_get_thread_num () * 3 + 4
+ baz%b = omp_get_thread_num () * 3 + 5
+
+!$omp barrier
+ if (omp_get_thread_num () .eq. 0) then
+ deallocate (bar3)
+ end if
+ bar3 => bar2
+!$omp barrier
+
+ l = l.or..not.associated (bar1)
+ l = l.or..not.associated (bar3)
+ l = l.or.any (bar1.ne.omp_get_thread_num ())
+ l = l.or.size (bar1).ne.4
+ l = l.or.any (bar2.ne.omp_get_thread_num () + 8)
+ l = l.or.any (bar3.ne.omp_get_thread_num () + 8)
+ l = l.or.size (bar3).ne.2
+
+ l = l.or.(baz%a .ne. omp_get_thread_num () * 3 + 4)
+ l = l.or.(baz%b .ne. omp_get_thread_num () * 3 + 5)
+!$omp end parallel
+
+ if (l) call abort
+end
--- libgomp/testsuite/libgomp.fortran/sharing2.f90.jj 2005-10-27 10:07:43.000000000 +0200
+++ libgomp/testsuite/libgomp.fortran/sharing2.f90 2005-10-27 15:43:35.000000000 +0200
@@ -0,0 +1,32 @@
+! { dg-do run }
+
+ use omp_lib
+ integer :: i, j, k, m, n
+ logical :: l
+ equivalence (i, m)
+ equivalence (j, n)
+ i = 4
+ j = 8
+ l = .false.
+!$omp parallel private (k) firstprivate (i) shared (j) num_threads (2) &
+!$omp& reduction (.or.:l)
+ l = l .or. i .ne. 4
+ l = l .or. j .ne. 8
+!$omp barrier
+ k = omp_get_thread_num ()
+ if (k .eq. 0) then
+ i = 14
+ j = 15
+ end if
+!$omp barrier
+ if (k .eq. 1) then
+ if (i .ne. 4 .or. j .ne. 15) l = .true.
+ i = 24
+ j = 25
+ end if
+!$omp barrier
+ if (j .ne. 25 .or. i .ne. (k * 10 + 14)) l = .true.
+!$omp end parallel
+ if (l) call abort
+ if (j .ne. 25) call abort
+end
Jakub
More information about the Gcc-patches
mailing list