[gomp] Fix COPYIN when copy is done by reference

Jakub Jelinek jakub@redhat.com
Thu Oct 27 16:33:00 GMT 2005


Hi!

As shown on attached copyin-2.[cC] and threadprivate3.f90 testcases,
if COPYIN is done by reference, i.e. __omp_fn.* argument contains
a pointer into master thread's __thread variable, we need to insert
a barrier at the end of rec input, otherwise the master thread could
change its thread local var before the other threads get a chance to
copy it over.

Ok for gomp?

2005-10-27  Jakub Jelinek  <jakub@redhat.com>

	* omp-low.c (expand_rec_input_clauses): If any COPYIN copying is done
	by reference, emit GOMP_barrier () call at the end of the COPYIN
	sequence.
libgomp/
	* testsuite/libgomp.dg/copyin-2.c: New test.
	* testsuite/libgomp.c++/copyin-2.C: New test.
	* testsuite/libgomp.fortran/threadprivate3.f90: New test.

	* testsuite/libgomp.fortran/threadprivate2.f90: New test.
	* testsuite/libgomp.fortran/sharing2.f90: New test.

--- gcc/omp-low.c.jj	2005-10-24 08:56:41.000000000 +0200
+++ gcc/omp-low.c	2005-10-27 18:04:11.000000000 +0200
@@ -1088,6 +1088,7 @@ expand_rec_input_clauses (tree clauses, 
 {
   tree_stmt_iterator diter;
   tree c, dtor, copyin_seq, x;
+  bool copyin_by_ref = false;
   int pass;
 
   /* Resolve private references for Fortran.  Note that C++ disallows
@@ -1193,6 +1194,7 @@ expand_rec_input_clauses (tree clauses, 
 	      x = build_receiver_ref (var, by_ref, ctx);
 	      x = lang_hooks.decls.omp_clause_assign_op (new_var, x);
 	      append_to_statement_list (x, &copyin_seq);
+	      copyin_by_ref |= by_ref;
 	      break;
 
 	    case OMP_CLAUSE_REDUCTION:
@@ -1224,6 +1226,16 @@ expand_rec_input_clauses (tree clauses, 
       x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
       gimplify_and_add (x, ilist);
     }
+
+  /* If any copyin variable is passed by reference, we must ensure the
+     master thread doesn't modify it before it is copied over in all
+     threads.  */
+  if (copyin_by_ref)
+    {
+      x = built_in_decls[BUILT_IN_GOMP_BARRIER];
+      x = build_function_call_expr (x, NULL);
+      gimplify_and_add (x, ilist);
+    }
 }
 
 /* Generate code to implement the LASTPRIVATE clauses.  This is used for
--- libgomp/testsuite/libgomp.dg/copyin-2.c.jj	2005-10-27 17:14:52.000000000 +0200
+++ libgomp/testsuite/libgomp.dg/copyin-2.c	2005-10-27 17:21:56.000000000 +0200
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+struct { int t; char buf[64]; } thr = { 32, "" };
+#pragma omp threadprivate (thr)
+
+int
+main (void)
+{
+  int l = 0;
+
+  omp_set_dynamic (0);
+  omp_set_num_threads (6);
+
+#pragma omp parallel copyin (thr) reduction (||:l)
+  {
+    l = thr.t != 32;
+    thr.t = omp_get_thread_num () + 11;
+  }
+
+  if (l || thr.t != 11)
+    abort ();
+
+#pragma omp parallel reduction (||:l)
+  l = thr.t != omp_get_thread_num () + 11;
+
+  if (l)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/copyin-2.C.jj	2005-10-27 17:14:52.000000000 +0200
+++ libgomp/testsuite/libgomp.c++/copyin-2.C	2005-10-27 18:11:54.000000000 +0200
@@ -0,0 +1,33 @@
+// { dg-do run }
+
+#include <omp.h>
+
+extern "C" void abort (void);
+
+struct S { int t; char buf[64]; } thr = { 32, "" };
+#pragma omp threadprivate (thr)
+
+int
+main (void)
+{
+  int l = 0;
+
+  omp_set_dynamic (0);
+  omp_set_num_threads (6);
+
+#pragma omp parallel copyin (thr) reduction (||:l)
+  {
+    l = thr.t != 32;
+    thr.t = omp_get_thread_num () + 11;
+  }
+
+  if (l || thr.t != 11)
+    abort ();
+
+#pragma omp parallel reduction (||:l)
+  l = thr.t != omp_get_thread_num () + 11;
+
+  if (l)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.fortran/threadprivate2.f90.jj	2005-10-27 14:26:11.000000000 +0200
+++ libgomp/testsuite/libgomp.fortran/threadprivate2.f90	2005-10-27 15:59:24.000000000 +0200
@@ -0,0 +1,93 @@
+! { dg-do run }
+
+module threadprivate2
+  integer, dimension(:,:), allocatable :: foo
+!$omp threadprivate (foo)
+end module threadprivate2
+
+  use omp_lib
+  use threadprivate2
+
+  integer, dimension(:), pointer :: bar1
+  integer, dimension(2), target :: bar2
+  common /thrc/ bar1, bar2
+!$omp threadprivate (/thrc/)
+
+  integer, dimension(:), pointer, save :: bar3 => NULL()
+!$omp threadprivate (bar3)
+
+  logical :: l
+  type tt
+    integer :: a
+    integer :: b = 32
+  end type tt
+  type (tt), save :: baz
+!$omp threadprivate (baz)
+
+  l = .false.
+  call omp_set_dynamic (.false.)
+  call omp_set_num_threads (4)
+
+!$omp parallel num_threads (4) reduction (.or.:l)
+  l = allocated (foo)
+  allocate (foo (6 + omp_get_thread_num (), 3))
+  l = l.or..not.allocated (foo)
+  l = l.or.size (foo).ne.(18 + 3 * omp_get_thread_num ())
+  foo = omp_get_thread_num () + 1
+
+  bar2 = omp_get_thread_num ()
+  l = l.or.associated (bar3)
+  bar1 => bar2
+  l = l.or..not.associated (bar1)
+  l = l.or..not.associated (bar1, bar2)
+  l = l.or.any (bar1.ne.omp_get_thread_num ())
+  nullify (bar1)
+  l = l.or.associated (bar1)
+  allocate (bar3 (4))
+  l = l.or..not.associated (bar3)
+  bar3 = omp_get_thread_num () - 2
+
+  l = l.or.(baz%b.ne.32)
+  baz%a = omp_get_thread_num () * 2
+  baz%b = omp_get_thread_num () * 2 + 1
+!$omp end parallel
+
+  if (l) call abort
+  if (.not.allocated (foo)) call abort
+  if (size (foo).ne.18) call abort
+  if (any (foo.ne.1)) call abort
+
+  if (associated (bar1)) call abort
+  if (.not.associated (bar3)) call abort
+  if (any (bar3 .ne. -2)) call abort
+  deallocate (bar3)
+  if (associated (bar3)) call abort
+
+!$omp parallel num_threads (4) reduction (.or.:l)
+  l = l.or..not.allocated (foo)
+  l = l.or.size (foo).ne.(18 + 3 * omp_get_thread_num ())
+  l = l.or.any (foo.ne.(omp_get_thread_num () + 1))
+  if (omp_get_thread_num () .ne. 0) then
+    deallocate (foo)
+    l = l.or.allocated (foo)
+  end if
+
+  l = l.or.associated (bar1)
+  if (omp_get_thread_num () .ne. 0) then
+    l = l.or..not.associated (bar3)
+    l = l.or.any (bar3 .ne. omp_get_thread_num () - 2)
+    deallocate (bar3)
+  end if
+  l = l.or.associated (bar3)
+
+  l = l.or.(baz%a.ne.(omp_get_thread_num () * 2))
+  l = l.or.(baz%b.ne.(omp_get_thread_num () * 2 + 1))
+!$omp end parallel
+
+  if (l) call abort
+  if (.not.allocated (foo)) call abort
+  if (size (foo).ne.18) call abort
+  if (any (foo.ne.1)) call abort
+  deallocate (foo)
+  if (allocated (foo)) call abort
+end
--- libgomp/testsuite/libgomp.fortran/threadprivate3.f90.jj	2005-10-27 15:58:20.000000000 +0200
+++ libgomp/testsuite/libgomp.fortran/threadprivate3.f90	2005-10-27 17:18:07.000000000 +0200
@@ -0,0 +1,105 @@
+! { dg-do run }
+
+module threadprivate3
+  integer, dimension(:,:), pointer :: foo => NULL()
+!$omp threadprivate (foo)
+end module threadprivate3
+
+  use omp_lib
+  use threadprivate3
+
+  integer, dimension(:), pointer :: bar1
+  integer, dimension(2), target :: bar2, var
+  common /thrc/ bar1, bar2
+!$omp threadprivate (/thrc/)
+
+  integer, dimension(:), pointer, save :: bar3 => NULL()
+!$omp threadprivate (bar3)
+
+  logical :: l
+  type tt
+    integer :: a
+    integer :: b = 32
+  end type tt
+  type (tt), save :: baz
+!$omp threadprivate (baz)
+
+  l = .false.
+  call omp_set_dynamic (.false.)
+  call omp_set_num_threads (4)
+  var = 6
+
+!$omp parallel num_threads (4) reduction (.or.:l)
+  bar2 = omp_get_thread_num ()
+  l = associated (bar3)
+  bar1 => bar2
+  l = l.or..not.associated (bar1)
+  l = l.or..not.associated (bar1, bar2)
+  l = l.or.any (bar1.ne.omp_get_thread_num ())
+  nullify (bar1)
+  l = l.or.associated (bar1)
+  allocate (bar3 (4))
+  l = l.or..not.associated (bar3)
+  bar3 = omp_get_thread_num () - 2
+  if (omp_get_thread_num () .ne. 0) then
+    deallocate (bar3)
+    if (associated (bar3)) call abort
+  else
+    bar1 => var
+  end if
+  bar2 = omp_get_thread_num () * 6 + 130
+
+  l = l.or.(baz%b.ne.32)
+  baz%a = omp_get_thread_num () * 2
+  baz%b = omp_get_thread_num () * 2 + 1
+!$omp end parallel
+
+  if (l) call abort
+  if (.not.associated (bar1)) call abort
+  if (any (bar1.ne.6)) call abort
+  if (.not.associated (bar3)) call abort
+  if (any (bar3 .ne. -2)) call abort
+  deallocate (bar3)
+  if (associated (bar3)) call abort
+
+  allocate (bar3 (10))
+  bar3 = 17
+
+!$omp parallel copyin (bar1, bar2, bar3, baz) num_threads (4) &
+!$omp& reduction (.or.:l)
+  l = l.or..not.associated (bar1)
+  l = l.or.any (bar1.ne.6)
+  l = l.or.any (bar2.ne.130)
+  l = l.or..not.associated (bar3)
+  l = l.or.size (bar3).ne.10
+  l = l.or.any (bar3.ne.17)
+  allocate (bar1 (4))
+  bar1 = omp_get_thread_num ()
+  bar2 = omp_get_thread_num () + 8
+
+  l = l.or.(baz%a.ne.0)
+  l = l.or.(baz%b.ne.1)
+  baz%a = omp_get_thread_num () * 3 + 4
+  baz%b = omp_get_thread_num () * 3 + 5
+
+!$omp barrier
+  if (omp_get_thread_num () .eq. 0) then
+    deallocate (bar3)
+  end if
+  bar3 => bar2
+!$omp barrier
+
+  l = l.or..not.associated (bar1)
+  l = l.or..not.associated (bar3)
+  l = l.or.any (bar1.ne.omp_get_thread_num ())
+  l = l.or.size (bar1).ne.4
+  l = l.or.any (bar2.ne.omp_get_thread_num () + 8)
+  l = l.or.any (bar3.ne.omp_get_thread_num () + 8)
+  l = l.or.size (bar3).ne.2
+
+  l = l.or.(baz%a .ne. omp_get_thread_num () * 3 + 4)
+  l = l.or.(baz%b .ne. omp_get_thread_num () * 3 + 5)
+!$omp end parallel
+
+  if (l) call abort
+end
--- libgomp/testsuite/libgomp.fortran/sharing2.f90.jj	2005-10-27 10:07:43.000000000 +0200
+++ libgomp/testsuite/libgomp.fortran/sharing2.f90	2005-10-27 15:43:35.000000000 +0200
@@ -0,0 +1,32 @@
+! { dg-do run }
+
+  use omp_lib
+  integer :: i, j, k, m, n
+  logical :: l
+  equivalence (i, m)
+  equivalence (j, n)
+  i = 4
+  j = 8
+  l = .false.
+!$omp parallel private (k) firstprivate (i) shared (j) num_threads (2) &
+!$omp& reduction (.or.:l)
+  l = l .or. i .ne. 4
+  l = l .or. j .ne. 8
+!$omp barrier
+  k = omp_get_thread_num ()
+  if (k .eq. 0) then
+    i = 14
+    j = 15
+  end if
+!$omp barrier
+  if (k .eq. 1) then
+    if (i .ne. 4 .or. j .ne. 15) l = .true.
+    i = 24
+    j = 25
+  end if
+!$omp barrier
+  if (j .ne. 25 .or. i .ne. (k * 10 + 14)) l = .true.
+!$omp end parallel
+  if (l) call abort
+  if (j .ne. 25) call abort
+end

	Jakub



More information about the Gcc-patches mailing list