This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[6/6] OpenMP 4.0 library testsuite


Hi!

2013-10-08  Jakub Jelinek  <jakub@redhat.com>
	    Richard Henderson  <rth@redhat.com>

libgomp/
	* testsuite/libgomp.c/atomic-14.c: Add parens to make it valid.
	* testsuite/libgomp.c/affinity-1.c: New test.
	* testsuite/libgomp.c/atomic-15.c: New test.
	* testsuite/libgomp.c/atomic-16.c: New test.
	* testsuite/libgomp.c/atomic-17.c: New test.
	* testsuite/libgomp.c/cancel-for-1.c: New test.
	* testsuite/libgomp.c/cancel-for-2.c: New test.
	* testsuite/libgomp.c/cancel-parallel-1.c: New test.
	* testsuite/libgomp.c/cancel-parallel-2.c: New test.
	* testsuite/libgomp.c/cancel-parallel-3.c: New test.
	* testsuite/libgomp.c/cancel-sections-1.c: New test.
	* testsuite/libgomp.c/cancel-taskgroup-1.c: New test.
	* testsuite/libgomp.c/cancel-taskgroup-2.c: New test.
	* testsuite/libgomp.c/depend-1.c: New test.
	* testsuite/libgomp.c/depend-2.c: New test.
	* testsuite/libgomp.c/depend-3.c: New test.
	* testsuite/libgomp.c/depend-4.c: New test.
	* testsuite/libgomp.c/for-1.c: New test.
	* testsuite/libgomp.c/for-1.h: New file.
	* testsuite/libgomp.c/for-2.c: New test.
	* testsuite/libgomp.c/for-2.h: New file.
	* testsuite/libgomp.c/for-3.c: New test.
	* testsuite/libgomp.c/pr58392.c: New test.
	* testsuite/libgomp.c/simd-1.c: New test.
	* testsuite/libgomp.c/simd-2.c: New test.
	* testsuite/libgomp.c/simd-3.c: New test.
	* testsuite/libgomp.c/simd-4.c: New test.
	* testsuite/libgomp.c/simd-5.c: New test.
	* testsuite/libgomp.c/simd-6.c: New test.
	* testsuite/libgomp.c/target-1.c: New test.
	* testsuite/libgomp.c/target-2.c: New test.
	* testsuite/libgomp.c/target-3.c: New test.
	* testsuite/libgomp.c/target-4.c: New test.
	* testsuite/libgomp.c/target-5.c: New test.
	* testsuite/libgomp.c/target-6.c: New test.
	* testsuite/libgomp.c/target-7.c: New test.
	* testsuite/libgomp.c/taskgroup-1.c: New test.
	* testsuite/libgomp.c/thread-limit-1.c: New test.
	* testsuite/libgomp.c/thread-limit-2.c: New test.
	* testsuite/libgomp.c/thread-limit-3.c: New test.
	* testsuite/libgomp.c/udr-1.c: New test.
	* testsuite/libgomp.c/udr-2.c: New test.
	* testsuite/libgomp.c/udr-3.c: New test.
	* testsuite/libgomp.c++/affinity-1.C: New test.
	* testsuite/libgomp.c++/atomic-10.C: New test.
	* testsuite/libgomp.c++/atomic-11.C: New test.
	* testsuite/libgomp.c++/atomic-12.C: New test.
	* testsuite/libgomp.c++/atomic-13.C: New test.
	* testsuite/libgomp.c++/atomic-14.C: New test.
	* testsuite/libgomp.c++/atomic-15.C: New test.
	* testsuite/libgomp.c++/cancel-for-1.C: New test.
	* testsuite/libgomp.c++/cancel-for-2.C: New test.
	* testsuite/libgomp.c++/cancel-parallel-1.C: New test.
	* testsuite/libgomp.c++/cancel-parallel-2.C: New test.
	* testsuite/libgomp.c++/cancel-parallel-3.C: New test.
	* testsuite/libgomp.c++/cancel-sections-1.C: New test.
	* testsuite/libgomp.c++/cancel-taskgroup-1.C: New test.
	* testsuite/libgomp.c++/cancel-taskgroup-2.C: New test.
	* testsuite/libgomp.c++/cancel-taskgroup-3.C: New test.
	* testsuite/libgomp.c++/cancel-test.h: New file.
	* testsuite/libgomp.c++/for-9.C: New test.
	* testsuite/libgomp.c++/for-10.C: New test.
	* testsuite/libgomp.c++/for-11.C: New test.
	* testsuite/libgomp.c++/simd-1.C: New test.
	* testsuite/libgomp.c++/simd-2.C: New test.
	* testsuite/libgomp.c++/simd-3.C: New test.
	* testsuite/libgomp.c++/simd-4.C: New test.
	* testsuite/libgomp.c++/simd-5.C: New test.
	* testsuite/libgomp.c++/simd-6.C: New test.
	* testsuite/libgomp.c++/simd-7.C: New test.
	* testsuite/libgomp.c++/simd-8.C: New test.
	* testsuite/libgomp.c++/target-1.C: New test.
	* testsuite/libgomp.c++/target-2.C: New test.
	* testsuite/libgomp.c++/target-2-aux.cc: New file.
	* testsuite/libgomp.c++/target-3.C: New test.
	* testsuite/libgomp.c++/taskgroup-1.C: New test.
	* testsuite/libgomp.c++/udr-1.C: New test.
	* testsuite/libgomp.c++/udr-2.C: New test.
	* testsuite/libgomp.c++/udr-3.C: New test.
	* testsuite/libgomp.c++/udr-4.C: New test.
	* testsuite/libgomp.c++/udr-5.C: New test.
	* testsuite/libgomp.c++/udr-6.C: New test.
	* testsuite/libgomp.c++/udr-7.C: New test.
	* testsuite/libgomp.c++/udr-8.C: New test.
	* testsuite/libgomp.c++/udr-9.C: New test.

--- libgomp/testsuite/libgomp.c++/affinity-1.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/affinity-1.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,4 @@
+// { dg-do run }
+// { dg-set-target-env-var OMP_PROC_BIND "true" }
+
+#include "../libgomp.c/affinity-1.c"
--- libgomp/testsuite/libgomp.c++/atomic-10.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/atomic-10.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,99 @@
+// { dg-do run }
+
+extern "C" void abort (void);
+int x = 6;
+
+int
+main ()
+{
+  int v, l = 2, s = 1;
+  #pragma omp atomic
+    x = -3 + x;
+  #pragma omp atomic read
+    v = x;
+  if (v != 3)
+    abort ();
+  #pragma omp atomic update
+    x = 3 * 2 * 1 + x;
+  #pragma omp atomic read
+    v = x;
+  if (v != 9)
+    abort ();
+  #pragma omp atomic capture
+    v = x = x | 16;
+  if (v != 25)
+    abort ();
+  #pragma omp atomic capture
+    v = x = x + 14 * 2 / 4;
+  if (v != 32)
+    abort ();
+  #pragma omp atomic capture
+    v = x = 5 | x;
+  if (v != 37)
+    abort ();
+  #pragma omp atomic capture
+    v = x = 40 + 12 - 2 - 7 - x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 3 + x; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = -1 * -1 * -1 * -1 - x; }
+  if (v != 9)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != -8)
+    abort ();
+  #pragma omp atomic capture
+    { x = 2 * 2 - x; v = x; }
+  if (v != 12)
+    abort ();
+  #pragma omp atomic capture
+    { x = 7 & x; v = x; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 6; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 7 * 8 + 23; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 79)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 23 + 6 * 4; }
+  if (v != 79)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = l ? 17 : 12; }
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = l = s++ + 3; }
+  if (v != 17 || l != 4 || s != 2)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 4)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/atomic-11.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/atomic-11.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,108 @@
+// { dg-do run }
+
+extern "C" void abort (void);
+
+template <typename T>
+void
+foo ()
+{
+  extern T x;
+  T v, l = 2, s = 1;
+  #pragma omp atomic
+    x = -3 + x;
+  #pragma omp atomic read
+    v = x;
+  if (v != 3)
+    abort ();
+  #pragma omp atomic update
+    x = 3 * 2 * 1 + x;
+  #pragma omp atomic read
+    v = x;
+  if (v != 9)
+    abort ();
+  #pragma omp atomic capture
+    v = x = x | 16;
+  if (v != 25)
+    abort ();
+  #pragma omp atomic capture
+    v = x = x + 14 * 2 / 4;
+  if (v != 32)
+    abort ();
+  #pragma omp atomic capture
+    v = x = 5 | x;
+  if (v != 37)
+    abort ();
+  #pragma omp atomic capture
+    v = x = 40 + 12 - 2 - 7 - x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 3 + x; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = -1 * -1 * -1 * -1 - x; }
+  if (v != 9)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != -8)
+    abort ();
+  #pragma omp atomic capture
+    { x = 2 * 2 - x; v = x; }
+  if (v != 12)
+    abort ();
+  #pragma omp atomic capture
+    { x = 7 & x; v = x; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 6; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 7 * 8 + 23; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 79)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 23 + 6 * 4; }
+  if (v != 79)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = l ? 17 : 12; }
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = l = s++ + 3; }
+  if (v != 17 || l != 4 || s != 2)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 4)
+    abort ();
+}
+
+int x = 6;
+
+int
+main ()
+{
+  foo <int> ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/atomic-12.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/atomic-12.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,58 @@
+// { dg-do run }
+
+extern "C" void abort ();
+int x = 6, cnt;
+
+int
+foo ()
+{
+  return cnt++;
+}
+
+int
+main ()
+{
+  int v, *p;
+  p = &x;
+  #pragma omp atomic update
+    p[foo (), 0] = 16 + 6 - p[foo (), 0];
+  #pragma omp atomic read
+    v = x;
+  if (cnt != 2 || v != 16)
+    abort ();
+  #pragma omp atomic capture
+    v = p[foo () + foo (), 0] = p[foo () + foo (), 0] + 3;
+  if (cnt != 6 || v != 19)
+    abort ();
+  #pragma omp atomic capture
+    v = p[foo (), 0] = 12 * 1 / 2 + (foo (), 0) + p[foo (), 0];
+  if (cnt != 9 || v != 25)
+    abort ();
+  #pragma omp atomic capture
+    {
+      v = p[foo () & 0]; p[foo () & 0] = (foo (), 1) * 9 - p[foo () & 0];
+    }
+  if (cnt != 13 || v != 25)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != -16)
+    abort ();
+  #pragma omp atomic capture
+    {
+      p[0 & foo ()] = 16 - 2 + 3 + p[0 & foo ()]; v = p[0 & foo ()];
+    }
+  if (cnt != 16 || v != 1)
+    abort ();
+  #pragma omp atomic capture
+    {
+      v = p[foo (), 0]; p[foo (), 0] = (foo (), 7) ? 13 : foo () + 6;
+    }
+  if (cnt != 19 || v != 1)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 13)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/atomic-13.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/atomic-13.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,68 @@
+// { dg-do run }
+
+extern "C" void abort ();
+int cnt;
+
+int
+foo ()
+{
+  return cnt++;
+}
+
+template <typename T>
+void
+bar ()
+{
+  extern T x;
+  T v, *p;
+  p = &x;
+  #pragma omp atomic update
+    p[foo (), 0] = 16 + 6 - p[foo (), 0];
+  #pragma omp atomic read
+    v = x;
+  if (cnt != 2 || v != 16)
+    abort ();
+  #pragma omp atomic capture
+    v = p[foo () + foo (), 0] = p[foo () + foo (), 0] + 3;
+  if (cnt != 6 || v != 19)
+    abort ();
+  #pragma omp atomic capture
+    v = p[foo (), 0] = 12 * 1 / 2 + (foo (), 0) + p[foo (), 0];
+  if (cnt != 9 || v != 25)
+    abort ();
+  #pragma omp atomic capture
+    {
+      v = p[foo () & 0]; p[foo () & 0] = (foo (), 1) * 9 - p[foo () & 0];
+    }
+  if (cnt != 13 || v != 25)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != -16)
+    abort ();
+  #pragma omp atomic capture
+    {
+      p[0 & foo ()] = 16 - 2 + 3 + p[0 & foo ()]; v = p[0 & foo ()];
+    }
+  if (cnt != 16 || v != 1)
+    abort ();
+  #pragma omp atomic capture
+    {
+      v = p[foo (), 0]; p[foo (), 0] = (foo (), 7) ? 13 : foo () + 6;
+    }
+  if (cnt != 19 || v != 1)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 13)
+    abort ();
+}
+
+int x = 6;
+
+int
+main ()
+{
+  bar <int> ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/atomic-14.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/atomic-14.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,99 @@
+// { dg-do run }
+
+extern "C" void abort (void);
+int x = 6;
+
+int
+main ()
+{
+  int v, l = 2, s = 1;
+  #pragma omp atomic seq_cst
+    x = -3 + x;
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 3)
+    abort ();
+  #pragma omp atomic update seq_cst
+    x = 3 * 2 * 1 + x;
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 9)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = x | 16;
+  if (v != 25)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = x + 14 * 2 / 4;
+  if (v != 32)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = 5 | x;
+  if (v != 37)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = 40 + 12 - 2 - 7 - x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 3 + x; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = -1 * -1 * -1 * -1 - x; }
+  if (v != 9)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != -8)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { x = 2 * 2 - x; v = x; }
+  if (v != 12)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { x = 7 & x; v = x; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 6; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 7 * 8 + 23; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 79)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 23 + 6 * 4; }
+  if (v != 79)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = l ? 17 : 12; }
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = l = s++ + 3; }
+  if (v != 17 || l != 4 || s != 2)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 4)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/atomic-15.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/atomic-15.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,108 @@
+// { dg-do run }
+
+extern "C" void abort (void);
+
+template <typename T>
+void
+foo ()
+{
+  extern T x;
+  T v, l = 2, s = 1;
+  #pragma omp atomic seq_cst
+    x = -3 + x;
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 3)
+    abort ();
+  #pragma omp atomic update seq_cst
+    x = 3 * 2 * 1 + x;
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 9)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = x | 16;
+  if (v != 25)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = x + 14 * 2 / 4;
+  if (v != 32)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = 5 | x;
+  if (v != 37)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = 40 + 12 - 2 - 7 - x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 3 + x; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = -1 * -1 * -1 * -1 - x; }
+  if (v != 9)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != -8)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { x = 2 * 2 - x; v = x; }
+  if (v != 12)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { x = 7 & x; v = x; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 6; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 7 * 8 + 23; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 79)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 23 + 6 * 4; }
+  if (v != 79)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = l ? 17 : 12; }
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = l = s++ + 3; }
+  if (v != 17 || l != 4 || s != 2)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 4)
+    abort ();
+}
+
+int x = 6;
+
+int
+main ()
+{
+  foo <int> ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/cancel-for-1.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/cancel-for-1.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,29 @@
+// { dg-do run }
+// { dg-set-target-env-var OMP_CANCELLATION "true" }
+
+#include <omp.h>
+#include "cancel-test.h"
+
+int
+main ()
+{
+  {
+    S c;
+    #pragma omp parallel num_threads (32)
+    {
+      S a, b;
+      int i;
+      #pragma omp for private (b) firstprivate (c)
+      for (i = 0; i < 1000; ++i)
+	{
+	  S d;
+	  #pragma omp cancel for
+	  if (omp_get_cancellation ())
+	    abort ();
+	  b.bump ();
+	  c.bump ();
+	}
+    }
+  }
+  S::verify ();
+}
--- libgomp/testsuite/libgomp.c++/cancel-for-2.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/cancel-for-2.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,126 @@
+// { dg-do run }
+// { dg-set-target-env-var OMP_CANCELLATION "true" }
+
+#include <omp.h>
+#include "cancel-test.h"
+
+__attribute__((noinline, noclone)) int
+foo (int *x)
+{
+  S a, b, c, d, e;
+  int v = 0, w = 0;
+  #pragma omp parallel num_threads (32) shared (v, w) private (c, d) firstprivate (e)
+  {
+    S g;
+    int i;
+    c.bump ();
+    e.bump ();
+    #pragma omp for private (d, g) firstprivate (b)
+    for (i = 0; i < 1000; ++i)
+      {
+	b.bump ();
+	d.bump ();
+	g.bump ();
+	#pragma omp cancel for if (x[0])
+	abort ();
+      }
+    #pragma omp for private (d, g) firstprivate (b)
+    for (i = 0; i < 1000; ++i)
+      {
+	b.bump ();
+	d.bump ();
+	g.bump ();
+	#pragma omp cancel for if (x[1])
+	#pragma omp atomic
+	v++;
+      }
+    #pragma omp for private (d, g) firstprivate (b)
+    for (i = 0; i < 1000; ++i)
+      {
+	b.bump ();
+	d.bump ();
+	g.bump ();
+	#pragma omp cancel for if (x[2])
+	#pragma omp atomic
+	w += 8;
+      }
+    #pragma omp for private (d, g) firstprivate (b)
+    for (i = 0; i < 1000; ++i)
+      {
+	b.bump ();
+	d.bump ();
+	g.bump ();
+	#pragma omp cancel for if (x[3])
+	#pragma omp atomic
+	v += 2;
+      }
+  }
+  if (v != 3000 || w != 0)
+    abort ();
+  #pragma omp parallel num_threads (32) shared (v, w) private (c, d) firstprivate (e)
+  {
+    S g, h;
+    int i;
+    c.bump ();
+    e.bump ();
+    /* None of these cancel directives should actually cancel anything,
+       but the compiler shouldn't know that and thus should use cancellable
+       barriers at the end of all the workshares.  */
+    #pragma omp cancel parallel if (omp_get_thread_num () == 1 && x[4])
+    #pragma omp for private (d, g) firstprivate (b)
+    for (i = 0; i < 1000; ++i)
+      {
+	b.bump ();
+	d.bump ();
+	g.bump ();
+	#pragma omp cancel for if (x[0])
+	abort ();
+      }
+    #pragma omp cancel parallel if (omp_get_thread_num () == 2 && x[4])
+    #pragma omp for private (d, g) firstprivate (b)
+    for (i = 0; i < 1000; ++i)
+      {
+	b.bump ();
+	d.bump ();
+	g.bump ();
+	#pragma omp cancel for if (x[1])
+	#pragma omp atomic
+	v++;
+      }
+    #pragma omp cancel parallel if (omp_get_thread_num () == 3 && x[4])
+    #pragma omp for private (d, g) firstprivate (b)
+    for (i = 0; i < 1000; ++i)
+      {
+	b.bump ();
+	d.bump ();
+	g.bump ();
+	#pragma omp cancel for if (x[2])
+	#pragma omp atomic
+	w += 8;
+      }
+    #pragma omp cancel parallel if (omp_get_thread_num () == 4 && x[4])
+    #pragma omp for private (d, g) firstprivate (b)
+    for (i = 0; i < 1000; ++i)
+      {
+	b.bump ();
+	d.bump ();
+	g.bump ();
+	#pragma omp cancel for if (x[3])
+	#pragma omp atomic
+	v += 2;
+      }
+    #pragma omp cancel parallel if (omp_get_thread_num () == 5 && x[4])
+  }
+  if (v != 6000 || w != 0)
+    abort ();
+  return 0;
+}
+
+int
+main ()
+{
+  int x[] = { 1, 0, 1, 0, 0 };
+  if (omp_get_cancellation ())
+    foo (x);
+  S::verify ();
+}
--- libgomp/testsuite/libgomp.c++/cancel-parallel-1.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/cancel-parallel-1.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,18 @@
+// { dg-do run }
+// { dg-set-target-env-var OMP_CANCELLATION "true" }
+
+#include <omp.h>
+#include "cancel-test.h"
+
+int
+main ()
+{
+  #pragma omp parallel num_threads (32)
+  {
+    S a;
+    #pragma omp cancel parallel
+    if (omp_get_cancellation ())
+      abort ();
+  }
+  S::verify ();
+}
--- libgomp/testsuite/libgomp.c++/cancel-parallel-2.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/cancel-parallel-2.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,57 @@
+// { dg-do run }
+// { dg-set-target-env-var OMP_CANCELLATION "true" }
+
+#include <unistd.h>
+#include <omp.h>
+#include "cancel-test.h"
+
+static void
+foo (int *x)
+{
+  S a, b, c;
+  #pragma omp parallel firstprivate(x, c) num_threads (32) private (b)
+  {
+    S d;
+    b.bump ();
+    c.bump ();
+    int thr = omp_get_thread_num ();
+    switch (x[thr])
+      {
+      case 4:
+	#pragma omp cancel parallel
+	break;
+      case 3:
+	#pragma omp task
+	usleep (1000);
+	#pragma omp task
+	usleep (2000);
+	#pragma omp task
+	usleep (4000);
+	break;
+      case 2:
+	usleep (1000);
+	/* FALLTHRU */
+      case 1:
+	#pragma omp cancellation point parallel
+	break;
+      }
+    #pragma omp barrier
+    if (omp_get_cancellation ())
+      abort ();
+  }
+}
+
+int
+main ()
+{
+  int i, j, x[32] = { 0, 1, 2, 4, 2, 2, 1, 0 };
+  foo (x);
+  for (i = 0; i < 32; i++)
+    {
+      for (j = 0; j < 32; j++)
+	x[j] = rand () & 3;
+      x[rand () & 31] = 4;
+      foo (x);
+    }
+  S::verify ();
+}
--- libgomp/testsuite/libgomp.c++/cancel-parallel-3.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/cancel-parallel-3.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,50 @@
+// { dg-do run }
+// { dg-set-target-env-var OMP_CANCELLATION "true" }
+
+#include <omp.h>
+#include <unistd.h>
+#include "cancel-test.h"
+
+static inline void
+do_some_work (void)
+{
+  asm volatile ("" : : : "memory");
+}
+
+void
+foo ()
+{
+  S a, b, c;
+  omp_set_dynamic (0);
+  omp_set_schedule (omp_sched_static, 1);
+  #pragma omp parallel num_threads (16) private (b) firstprivate (c)
+  {
+    S d;
+    int i, j;
+    b.bump ();
+    c.bump ();
+    do_some_work ();
+    #pragma omp barrier
+    if (omp_get_thread_num () == 1)
+      {
+	sleep (2);
+	#pragma omp cancellation point parallel
+      }
+    for (j = 3; j <= 16; j++)
+      #pragma omp for schedule (runtime) nowait
+      for (i = 0; i < j; i++)
+	do_some_work ();
+    if (omp_get_thread_num () == 0)
+      {
+	sleep (1);
+	#pragma omp cancel parallel
+      }
+  }
+}
+
+int
+main ()
+{
+  foo ();
+  S::verify ();
+}
--- libgomp/testsuite/libgomp.c++/cancel-sections-1.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/cancel-sections-1.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,43 @@
+// { dg-do run }
+// { dg-set-target-env-var OMP_CANCELLATION "true" }
+
+#include <omp.h>
+#include "cancel-test.h"
+
+int
+main ()
+{
+  if (!omp_get_cancellation ())
+    return 0;
+  #pragma omp parallel num_threads (32)
+  {
+    S a;
+    #pragma omp sections
+      {
+	{
+	  S b;
+	  #pragma omp cancel sections
+	  abort ();
+	}
+      #pragma omp section
+	{
+	  S c;
+	  #pragma omp cancel sections
+	  abort ();
+	}
+      #pragma omp section
+	{
+	  S d;
+	  #pragma omp cancel sections
+	  abort ();
+	}
+      #pragma omp section
+	{
+	  S e;
+	  #pragma omp cancel sections
+	  abort ();
+	}
+      }
+  }
+  S::verify ();
+}
--- libgomp/testsuite/libgomp.c++/cancel-taskgroup-1.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/cancel-taskgroup-1.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,4 @@
+// { dg-do run }
+// { dg-set-target-env-var OMP_CANCELLATION "true" }
+
+#include "../libgomp.c/cancel-taskgroup-1.c"
--- libgomp/testsuite/libgomp.c++/cancel-taskgroup-2.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/cancel-taskgroup-2.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,4 @@
+// { dg-do run }
+// { dg-set-target-env-var OMP_CANCELLATION "true" }
+
+#include "../libgomp.c/cancel-taskgroup-2.c"
--- libgomp/testsuite/libgomp.c++/cancel-taskgroup-3.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/cancel-taskgroup-3.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,58 @@
+// { dg-do run }
+// { dg-set-target-env-var OMP_CANCELLATION "true" }
+
+#include <unistd.h>
+#include <omp.h>
+#include "cancel-test.h"
+
+void
+foo ()
+{
+  S a, b, c, d, e, f;
+  #pragma omp parallel private (c, d) firstprivate (e, f)
+  #pragma omp taskgroup
+  {
+    c.bump ();
+    e.bump ();
+    #pragma omp task firstprivate (b, f) private (d)
+    {
+      S h;
+      b.bump ();
+      d.bump ();
+      f.bump ();
+      #pragma omp cancel taskgroup
+      if (omp_get_cancellation ())
+	abort ();
+    }
+  }
+  #pragma omp parallel private (c, d) firstprivate (e, f)
+  {
+    #pragma omp barrier
+    #pragma omp single
+    #pragma omp taskgroup
+    {
+      int i;
+      c.bump ();
+      e.bump ();
+      for (i = 0; i < 50; i++)
+	#pragma omp task firstprivate (b, f) private (d)
+	{
+	  S h;
+	  b.bump ();
+	  d.bump ();
+	  f.bump ();
+	  #pragma omp cancellation point taskgroup
+	  usleep (30);
+	  #pragma omp cancel taskgroup if (i > 5)
+	}
+    }
+    usleep (10);
+  }
+}
+
+int
+main ()
+{
+  foo ();
+  S::verify ();
+}
--- libgomp/testsuite/libgomp.c++/cancel-test.h	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/cancel-test.h	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,47 @@
+#include <stdlib.h>
+#include <omp.h>
+
+struct S
+{
+  static int s;
+  int v;
+  S ()
+  {
+    #pragma omp atomic
+    s++;
+  }
+
+  S (int x)
+  {
+    #pragma omp atomic
+    s++;
+    v = x;
+  }
+
+  ~S ()
+  {
+    #pragma omp atomic
+    s--;
+  }
+
+  S (const S &x)
+  {
+    #pragma omp atomic
+    s++;
+    v = x.v;
+  }
+
+  static void
+  verify ()
+  {
+    if (s) abort ();
+  }
+
+  void
+  bump ()
+  {
+    v++;
+  }
+};
+
+int S::s = 0;
--- libgomp/testsuite/libgomp.c++/for-10.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/for-10.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,44 @@
+extern "C" void abort ();
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#define F simd
+#define G simd
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F parallel for simd
+#define G pf_simd
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F for simd
+#define G f_simd
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+int
+main ()
+{
+  if (test_simd_normal ()
+      || test_pf_simd_static ()
+      || test_pf_simd_static32 ()
+      || test_pf_simd_auto ()
+      || test_pf_simd_guided32 ()
+      || test_pf_simd_runtime ()
+      || test_f_simd_static ()
+      || test_f_simd_static32 ()
+      || test_f_simd_auto ()
+      || test_f_simd_guided32 ()
+      || test_f_simd_runtime ())
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/for-11.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/for-11.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,108 @@
+extern "C" void abort ();
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#pragma omp declare target
+
+#define F distribute
+#define G d
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F distribute
+#define G d_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F distribute simd
+#define G ds
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F distribute simd
+#define G ds_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F distribute parallel for
+#define G dpf
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F distribute parallel for dist_schedule(static, 128)
+#define G dpf_ds128
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F distribute parallel for simd
+#define G dpfs
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F distribute parallel for simd dist_schedule(static, 128)
+#define G dpfs_ds128
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#pragma omp end declare target
+
+int
+main ()
+{
+  int err = 0;
+  #pragma omp target teams reduction(|:err)
+    {
+      err |= test_d_normal ();
+      err |= test_d_ds128_normal ();
+      err |= test_ds_normal ();
+      err |= test_ds_ds128_normal ();
+      err |= test_dpf_static ();
+      err |= test_dpf_static32 ();
+      err |= test_dpf_auto ();
+      err |= test_dpf_guided32 ();
+      err |= test_dpf_runtime ();
+      err |= test_dpf_ds128_static ();
+      err |= test_dpf_ds128_static32 ();
+      err |= test_dpf_ds128_auto ();
+      err |= test_dpf_ds128_guided32 ();
+      err |= test_dpf_ds128_runtime ();
+      err |= test_dpfs_static ();
+      err |= test_dpfs_static32 ();
+      err |= test_dpfs_auto ();
+      err |= test_dpfs_guided32 ();
+      err |= test_dpfs_runtime ();
+      err |= test_dpfs_ds128_static ();
+      err |= test_dpfs_ds128_static32 ();
+      err |= test_dpfs_ds128_auto ();
+      err |= test_dpfs_ds128_guided32 ();
+      err |= test_dpfs_ds128_runtime ();
+    }
+  if (err)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/for-9.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/for-9.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,33 @@
+extern "C" void abort ();
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#define F parallel for
+#define G pf
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F for
+#define G f
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+int
+main ()
+{
+  if (test_pf_static ()
+      || test_pf_static32 ()
+      || test_pf_auto ()
+      || test_pf_guided32 ()
+      || test_pf_runtime ()
+      || test_f_static ()
+      || test_f_static32 ()
+      || test_f_auto ()
+      || test_f_guided32 ()
+      || test_f_runtime ())
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/simd-1.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/simd-1.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,79 @@
+// { dg-do run }
+// { dg-options "-O2" }
+// { dg-additional-options "-msse2" { target sse2_runtime } }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+extern "C" void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+int b[1024] __attribute__((aligned (32))) = { 1 };
+int k, m;
+struct U { U (); ~U (); int u; };
+struct V
+{
+  V () : v (8) {}
+  ~V ()
+  {
+    if (v > 38 + 4 + 3 * 1024 + 1)
+      abort ();
+  }
+  V &operator= (const V &x) { v = x.v + 1; return *this; }
+  int v;
+};
+
+__attribute__((noinline, noclone))
+U::U () : u (6)
+{
+}
+
+__attribute__((noinline, noclone))
+U::~U ()
+{
+  if (u > 38 + 4 + 3 * 1023)
+    abort ();
+}
+
+__attribute__((noinline, noclone)) int
+foo (int *p)
+{
+  int i, s = 0;
+  U u;
+  V v;
+  #pragma omp simd aligned(a, p : 32) linear(k: m + 1) \
+		   reduction(+:s) lastprivate(u, v)
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] *= p[i];
+      u.u = p[i] + k;
+      k += m + 1;
+      v.v = p[i] + k;
+      s += p[i] + k;
+    }
+  if (u.u != 36 + 4 + 3 * 1023 || v.v != 36 + 4 + 3 * 1024 + 1)
+    abort ();
+  return s;
+}
+
+int
+main ()
+{
+#if __SIZEOF_INT__ >= 4
+  int i;
+  k = 4;
+  m = 2;
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] = i - 512;
+      b[i] = (i - 51) % 39;
+    }
+  int s = foo (b);
+  for (i = 0; i < 1024; i++)
+    {
+      if (b[i] != (i - 51) % 39
+	  || a[i] != (i - 512) * b[i])
+	abort ();
+    }
+  if (k != 4 + 3 * 1024 || s != 1596127)
+    abort ();
+#endif
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/simd-2.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/simd-2.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,36 @@
+// { dg-do run }
+// { dg-options "-O2" }
+// { dg-additional-options "-msse2" { target sse2_runtime } }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+extern "C" void abort ();
+__UINTPTR_TYPE__ arr[1027];
+
+__attribute__((noinline, noclone)) void
+foo ()
+{
+  int i, v;
+  #pragma omp simd private (v) safelen(16)
+  for (i = 0; i < 1027; i++)
+    arr[i] = (__UINTPTR_TYPE__) &v;
+}
+
+int
+main ()
+{
+  int i, j, cnt = 0;
+  __UINTPTR_TYPE__ arr2[16];
+  foo ();
+  for (i = 0; i < 1027; i++)
+    {
+      for (j = 0; j < cnt; j++)
+	if (arr[i] == arr2[j])
+	  break;
+      if (j != cnt)
+	continue;
+      if (cnt == 16)
+	abort ();
+      arr2[cnt++] = arr[i];
+    }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/simd-3.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/simd-3.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,131 @@
+// { dg-do run }
+// { dg-options "-O2" }
+// { dg-additional-options "-msse2" { target sse2_runtime } }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+extern "C" void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+int b[1024] __attribute__((aligned (32))) = { 1 };
+unsigned char c[1024] __attribute__((aligned (32))) = { 1 };
+int k, m;
+__UINTPTR_TYPE__ u, u2, u3;
+
+__attribute__((noinline, noclone)) int
+foo (int *p)
+{
+  int i, s = 0, s2 = 0, t, t2;
+  #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \
+		   lastprivate (t2)
+  for (i = 0; i < 512; i++)
+    {
+      a[i] *= p[i];
+      t2 = k + p[i];
+      k += m + 1;
+      s += p[i] + k;
+      c[i]++;
+    }
+  #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \
+		   lastprivate (t, u, u2, u3)
+  for (i = 512; i < 1024; i++)
+    {
+      a[i] *= p[i];
+      k += m + 1;
+      t = k + p[i];
+      u = (__UINTPTR_TYPE__) &k;
+      u2 = (__UINTPTR_TYPE__) &s2;
+      u3 = (__UINTPTR_TYPE__) &t;
+      s2 += t;
+      c[i]++;
+    }
+  return s + s2 + t + t2;
+}
+
+__attribute__((noinline, noclone)) long int
+bar (int *p, long int n, long int o)
+{
+  long int i, s = 0, s2 = 0, t, t2;
+  #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \
+		   lastprivate (t2)
+  for (i = 0; i < n; i++)
+    {
+      a[i] *= p[i];
+      t2 = k + p[i];
+      k += m + 1;
+      s += p[i] + k;
+      c[i]++;
+    }
+  #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \
+		   lastprivate (t, u, u2, u3)
+  for (i = n; i < o; i++)
+    {
+      a[i] *= p[i];
+      k += m + 1;
+      t = k + p[i];
+      u = (__UINTPTR_TYPE__) &k;
+      u2 = (__UINTPTR_TYPE__) &s2;
+      u3 = (__UINTPTR_TYPE__) &t;
+      s2 += t;
+      c[i]++;
+    }
+  return s + s2 + t + t2;
+}
+
+int
+main ()
+{
+#if __SIZEOF_INT__ >= 4
+  int i;
+  k = 4;
+  m = 2;
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] = i - 512;
+      b[i] = (i - 51) % 39;
+      c[i] = (unsigned char) i;
+    }
+  int s = foo (b);
+  for (i = 0; i < 1024; i++)
+    {
+      if (b[i] != (i - 51) % 39
+	  || a[i] != (i - 512) * b[i]
+	  || c[i] != (unsigned char) (i + 1))
+	abort ();
+      a[i] = i - 512;
+    }
+  if (k != 4 + 3 * 1024
+      || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023]))
+    abort ();
+  k = 4;
+  s = bar (b, 512, 1024);
+  for (i = 0; i < 1024; i++)
+    {
+      if (b[i] != (i - 51) % 39
+	  || a[i] != (i - 512) * b[i]
+	  || c[i] != (unsigned char) (i + 2))
+	abort ();
+      a[i] = i - 512;
+    }
+  if (k != 4 + 3 * 1024
+      || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023]))
+    abort ();
+  k = 4;
+  s = bar (b, 511, 1021);
+  for (i = 0; i < 1021; i++)
+    {
+      if (b[i] != (i - 51) % 39
+	  || a[i] != (i - 512) * b[i]
+	  || c[i] != (unsigned char) (i + 3))
+	abort ();
+      a[i] = i - 512;
+    }
+  for (i = 1021; i < 1024; i++)
+    if (b[i] != (i - 51) % 39
+	|| a[i] != i - 512
+	|| c[i] != (unsigned char) (i + 2))
+      abort ();
+  if (k != 4 + 3 * 1021
+      || s != 1586803 + (4 + 3 * 510 + b[510]) + (4 + 3 * 1021 + b[1020]))
+    abort ();
+#endif
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/simd-4.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/simd-4.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,45 @@
+// { dg-do run }
+// { dg-options "-O2" }
+// { dg-additional-options "-msse2" { target sse2_runtime } }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+extern "C" void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+struct S
+{
+  int s;
+  S () : s (0) {}
+  ~S () {}
+};
+#pragma omp declare reduction (+:S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:int:omp_out += omp_in)
+
+__attribute__((noinline, noclone)) int
+foo ()
+{
+  int i, u = 0;
+  S s, t;
+  #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u)
+  for (i = 0; i < 1024; i++)
+    {
+      int x = a[i];
+      s.s += x;
+      t.s += x;
+      u += x;
+    }
+  if (t.s != s.s || u != s.s)
+    abort ();
+  return s.s;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (i & 31) + (i / 128);
+  int s = foo ();
+  if (s != 19456)
+    abort ();
+}
--- libgomp/testsuite/libgomp.c++/simd-5.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/simd-5.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,47 @@
+// { dg-do run }
+// { dg-options "-O2" }
+// { dg-additional-options "-msse2" { target sse2_runtime } }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+extern "C" void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+struct S
+{
+  int s;
+  S () : s (0) {}
+  ~S () {}
+};
+#pragma omp declare reduction (+:S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:int:omp_out += omp_in)
+
+__attribute__((noinline, noclone)) int
+foo ()
+{
+  int i, u = 0, q = 0;
+  S s, t;
+  #pragma omp simd aligned(a : 32) reduction(+:s, q) reduction(foo:t, u) \
+	      safelen(1)
+  for (i = 0; i < 1024; i++)
+    {
+      int x = a[i];
+      s.s += x;
+      t.s += x;
+      u += x;
+      q++;
+    }
+  if (t.s != s.s || u != s.s || q != 1024)
+    abort ();
+  return s.s;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (i & 31) + (i / 128);
+  int s = foo ();
+  if (s != 19456)
+    abort ();
+}
--- libgomp/testsuite/libgomp.c++/simd-6.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/simd-6.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,70 @@
+// { dg-do run }
+// { dg-options "-O2" }
+// { dg-additional-options "-msse2" { target sse2_runtime } }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+extern "C" void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+struct S
+{
+  int s;
+  S () : s (0) {}
+  S (int x) : s (x) {}
+  ~S () {}
+};
+#pragma omp declare reduction (+:S:omp_out.s += omp_in.s) \
+		    initializer (omp_priv (0))
+#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s) \
+		    initializer (omp_priv (0))
+#pragma omp declare reduction (foo:int:omp_out += omp_in) \
+		    initializer (omp_priv = 0)
+
+__attribute__((noinline, noclone)) S
+foo (S s)
+{
+  int i, v = 0, &u = v;
+  S t;
+  #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u)
+  for (i = 0; i < 1024; i++)
+    {
+      int x = a[i];
+      s.s += x;
+      t.s += x;
+      u += x;
+    }
+  if (t.s != s.s || u != s.s)
+    abort ();
+  return t;
+}
+
+__attribute__((noinline, noclone)) int
+bar (S &s, S &t)
+{
+  int i, v = 0, &u = v;
+  #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u)
+  for (i = 0; i < 1024; i++)
+    {
+      int x = a[i];
+      s.s += x;
+      t.s += x;
+      u += x;
+    }
+  if (t.s != s.s || u != s.s)
+    abort ();
+  return s.s;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (i & 31) + (i / 128);
+  S q;
+  int s = foo (q).s;
+  if (s != 19456)
+    abort ();
+  S r, v;
+  if (bar (r, v) != s)
+    abort ();
+}
--- libgomp/testsuite/libgomp.c++/simd-7.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/simd-7.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,72 @@
+// { dg-do run }
+// { dg-options "-O2" }
+// { dg-additional-options "-msse2" { target sse2_runtime } }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+extern "C" void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+struct S
+{
+  int s;
+  S () : s (0) {}
+  S (int x) : s (x) {}
+  ~S () {}
+};
+#pragma omp declare reduction (+:S:omp_out.s += omp_in.s) \
+		    initializer (omp_priv (0))
+#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s) \
+		    initializer (omp_priv (0))
+#pragma omp declare reduction (foo:int:omp_out += omp_in) \
+		    initializer (omp_priv = 0)
+
+__attribute__((noinline, noclone)) S
+foo (S s)
+{
+  int i, v = 0, &u = v;
+  S t;
+  #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u) \
+		   safelen(1)
+  for (i = 0; i < 1024; i++)
+    {
+      int x = a[i];
+      s.s += x;
+      t.s += x;
+      u += x;
+    }
+  if (t.s != s.s || u != s.s)
+    abort ();
+  return t;
+}
+
+__attribute__((noinline, noclone)) int
+bar (S &s, S &t)
+{
+  int i, v = 0, &u = v;
+  #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u) \
+		   safelen(1)
+  for (i = 0; i < 1024; i++)
+    {
+      int x = a[i];
+      s.s += x;
+      t.s += x;
+      u += x;
+    }
+  if (t.s != s.s || u != s.s)
+    abort ();
+  return s.s;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (i & 31) + (i / 128);
+  S q;
+  int s = foo (q).s;
+  if (s != 19456)
+    abort ();
+  S r, v;
+  if (bar (r, v) != s)
+    abort ();
+}
--- libgomp/testsuite/libgomp.c++/simd-8.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/simd-8.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,47 @@
+// PR libgomp/58482
+// { dg-do run }
+// { dg-options "-O2" }
+// { dg-additional-options "-msse2" { target sse2_runtime } }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+extern "C" void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+struct S
+{
+  int s;
+  S () : s (0) {}
+  ~S () {}
+};
+#pragma omp declare reduction (+:S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:int:omp_out += omp_in)
+
+__attribute__((noinline, noclone)) int
+foo ()
+{
+  int i, u = 0;
+  S s, t;
+  #pragma omp parallel for simd aligned(a : 32) reduction(+:s) \
+				reduction(foo:t, u)
+  for (i = 0; i < 1024; i++)
+    {
+      int x = a[i];
+      s.s += x;
+      t.s += x;
+      u += x;
+    }
+  if (t.s != s.s || u != s.s)
+    abort ();
+  return s.s;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (i & 31) + (i / 128);
+  int s = foo ();
+  if (s != 19456)
+    abort ();
+}
--- libgomp/testsuite/libgomp.c++/target-1.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/target-1.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1 @@
+#include "../libgomp.c/target-1.c"
--- libgomp/testsuite/libgomp.c++/target-2-aux.cc	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/target-2-aux.cc	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,5 @@
+double f[1024];
+double (&fr) [1024] = f;
+double gbuf[1024];
+double *g = gbuf;
+double *&gr = g;
--- libgomp/testsuite/libgomp.c++/target-2.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/target-2.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,58 @@
+// { dg-options "-O2 -fopenmp" }
+// { dg-additional-sources "target-2-aux.cc" }
+
+extern "C" void abort (void);
+
+void
+fn1 (double *x, double *y, int z)
+{
+  int i;
+  for (i = 0; i < z; i++)
+    {
+      x[i] = i & 31;
+      y[i] = (i & 63) - 30;
+    }
+}
+
+double b[1024];
+double (&br) [1024] = b;
+double cbuf[1024];
+double *c = cbuf;
+double *&cr = c;
+extern double (&fr) [1024];
+extern double *&gr;
+
+double
+fn2 (int x, double (&dr) [1024], double *&er)
+{
+  double s = 0;
+  double h[1024];
+  double (&hr) [1024] = h;
+  double ibuf[1024];
+  double *i = ibuf;
+  double *&ir = i;
+  int j;
+  fn1 (hr + 2 * x, ir + 2 * x, x);
+  #pragma omp target map(to: br[:x], cr[0:x], dr[x:x], er[x:x]) \
+		     map(to: fr[0:x], gr[0:x], hr[2 * x:x], ir[2 * x:x])
+    #pragma omp parallel for reduction(+:s)
+      for (j = 0; j < x; j++)
+	s += br[j] * cr[j] + dr[x + j] + er[x + j]
+	     + fr[j] + gr[j] + hr[2 * x + j] + ir[2 * x + j];
+  return s;
+}
+
+int
+main ()
+{
+  double d[1024];
+  double ebuf[1024];
+  double *e = ebuf;
+  fn1 (br, cr, 128);
+  fn1 (d + 128, e + 128, 128);
+  fn1 (fr, gr, 128);
+  double h = fn2 (128, d, e);
+  if (h != 20416.0)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/target-3.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/target-3.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1 @@
+#include "../libgomp.c/target-2.c"
--- libgomp/testsuite/libgomp.c++/taskgroup-1.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/taskgroup-1.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1 @@
+#include "../libgomp.c/taskgroup-1.c"
--- libgomp/testsuite/libgomp.c++/udr-1.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/udr-1.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,82 @@
+// { dg-do run }
+
+extern "C" void abort ();
+
+struct S
+{
+  int s;
+  void foo (S &x) { s += x.s; }
+  void foo (S &x, bool y) { s += x.s; if (y) abort (); }
+  S (const S &x) { s = x.s + 1; }
+  S (const S &x, bool y) { s = x.s + 2; if (y) abort (); }
+  S () { s = 6; }
+  ~S ();
+};
+
+S::~S ()
+{
+  if (s < 6) abort ();
+  s = -1;
+  /* Ensure the above store is not DSEd.  */
+  asm volatile ("" : : "r" (&s) : "memory");
+}
+
+void
+bar (S &x)
+{
+  if (x.s != 6) abort ();
+  x.s = 15;
+}
+
+#pragma omp declare reduction (foo: S: omp_out.foo (omp_in)) \
+	initializer (omp_priv (omp_orig, false))
+#pragma omp declare reduction (foo: char, int, short: omp_out += omp_in - 4) \
+	initializer (omp_priv (4))
+#pragma omp declare reduction (+: S: omp_out.foo (omp_in, false)) \
+	initializer (omp_priv (omp_orig))
+
+namespace N
+{
+  #pragma omp declare reduction (foo: S: omp_out.foo (omp_in)) \
+	initializer (::bar (omp_priv))
+  namespace M {}
+}
+
+int
+main ()
+{
+  S a, b, c, s, t, u;
+  if (a.s != 6 || b.s != 6 || c.s != 6
+      || s.s != 6 || t.s != 6 || u.s != 6) abort ();
+  s.s = 9; t.s = 10; u.s = 11;
+  int d = 0, e = 0, f = 0, g = 0, h = 30, v = 2, q = 0;
+  #pragma omp declare reduction (foo: S: omp_out.foo (omp_in, true)) \
+	initializer (omp_priv = omp_orig)
+  {
+    #pragma omp declare reduction (foo: S: omp_out.foo (omp_in, false)) \
+	initializer (omp_priv = omp_orig)
+    #pragma omp parallel num_threads (4) reduction (N::operator +: q) \
+	reduction (operator +: a, d) reduction (::operator +: b, e) \
+	reduction (+: c, f) reduction (::N::M::operator +: g) \
+	reduction (::N::min: h) reduction (foo: s) reduction (N::foo: t) \
+	reduction (::foo: u) reduction (::foo: v)
+    {
+      if (a.s != 7 || b.s != 7 || c.s != 7
+	  || s.s != 10 || t.s != 15 || u.s != 13
+	  || v != 4 || d || e || f || g || h != __INT_MAX__) abort ();
+      asm volatile ("" : "+m" (a.s), "+m" (b.s));
+      asm volatile ("" : "+m" (c.s), "+r" (d));
+      asm volatile ("" : "+r" (e), "+r" (f));
+      asm volatile ("" : "+r" (g), "+r" (h));
+      asm volatile ("" : "+m" (s.s), "+m" (t.s));
+      asm volatile ("" : "+m" (u.s), "+r" (v));
+      a.s++; b.s++; c.s++; d++; e++; f++; g++; h = t.s;
+      s.s++; t.s++; u.s++; v++; q++;
+    }
+  }
+  if (a.s != 6 + q * 8 || b.s != 6 + q * 8 || c.s != 6 + q * 8
+      || d != q || e != q || f != q || g != q || h != 15
+      || s.s != 9 + q * 11 || t.s != 10 + q * 16 || u.s != 11 + q * 14
+      || v != 2 + q)
+    abort ();
+}
--- libgomp/testsuite/libgomp.c++/udr-2.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/udr-2.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,88 @@
+// { dg-do run }
+
+extern "C" void abort ();
+
+namespace NS
+{
+  struct U
+  {
+    void foo (U &, bool);
+    U ();
+  };
+  struct S
+  {
+    int s;
+    #pragma omp declare reduction (foo : U, S : omp_out.foo (omp_in, false))
+    #pragma omp declare reduction (foo : int : omp_out += omp_in) \
+	initializer (omp_priv = int ())
+    void baz (int v)
+    {
+      S s;
+      int q = 0;
+      if (s.s != 6 || v != 0) abort ();
+      s.s = 20;
+      #pragma omp parallel num_threads (4) reduction (foo : s, v) \
+	reduction (::NS::U::operator + : q)
+      {
+	if (s.s != 6 || q != 0 || v != 0) abort ();
+	asm volatile ("" : "+m" (s.s), "+r" (q), "+r" (v));
+	s.s++; q++; v++;
+      }
+      if (s.s != 20 + q * 7 || q != v) abort ();
+    }
+    void foo (S &x) { s += x.s; }
+    void foo (S &x, bool y) { s += x.s; if (y) abort (); }
+    S (const S &x) { s = x.s + 1; }
+    S (const S &x, bool y) { s = x.s + 2; if (y) abort (); }
+    S () { s = 6; }
+    S (int x) { s = x; }
+    ~S ();
+  };
+  #pragma omp declare reduction (bar : S : omp_out.foo (omp_in)) \
+	initializer (omp_priv (8))
+}
+
+NS::S::~S ()
+{
+  if (s < 6) abort ();
+  s = -1;
+  /* Ensure the above store is not DSEd.  */
+  asm volatile ("" : : "r" (&s) : "memory");
+}
+
+struct T : public NS::S
+{
+  void baz ()
+  {
+    S s;
+    int q = 0;
+    if (s.s != 6) abort ();
+    #pragma omp parallel num_threads (4) reduction (foo:s) \
+	reduction (+: q)
+    {
+      if (s.s != 6 || q != 0) abort ();
+      asm volatile ("" : "+m" (s.s), "+r" (q));
+      s.s += 2; q++;
+    }
+    if (s.s != 6 + q * 8) abort ();
+  }
+};
+
+int
+main ()
+{
+  NS::S s;
+  s.baz (0);
+  T t;
+  t.baz ();
+  int q = 0;
+  if (s.s != 6) abort ();
+  // Test ADL
+  #pragma omp parallel num_threads (4) reduction (bar:s) reduction (+:q)
+  {
+    if (s.s != 8 || q != 0) abort ();
+    asm volatile ("" : "+m" (s.s), "+r" (q));
+    s.s += 4; q++;
+  }
+  if (s.s != 6 + q * 12) abort ();
+}
--- libgomp/testsuite/libgomp.c++/udr-3.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/udr-3.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,149 @@
+// { dg-do run }
+
+extern "C" void abort ();
+
+void
+dblinit (double *p)
+{
+  *p = 2.0;
+}
+
+namespace NS
+{
+  template <int N>
+  struct U
+  {
+    void foo (U &, bool);
+    U ();
+  };
+  template <int N>
+  struct S
+  {
+    int s;
+    #pragma omp declare reduction (foo : U<0>, S : omp_out.foo (omp_in, false))
+    #pragma omp declare reduction (foo : int : omp_out += omp_in) \
+	initializer (omp_priv = N + 2)
+    #pragma omp declare reduction (foo : double : omp_out += omp_in) \
+	initializer (dblinit (&omp_priv))
+    void baz (int v)
+    {
+      S s;
+      int q = 0;
+      if (s.s != 6 || v != 0) abort ();
+      s.s = 20;
+      double d = 4.0;
+      #pragma omp parallel num_threads (4) reduction (foo : s, v, d) \
+	reduction (::NS::U<N>::operator + : q)
+      {
+	if (s.s != 6 || q != 0 || v != N + 2 || d != 2.0) abort ();
+	asm volatile ("" : "+m" (s.s), "+r" (q), "+r" (v));
+	s.s++; q++; v++;
+      }
+      if (s.s != 20 + q * 7 || (N + 3) * q != v || d != 4.0 + 2.0 * q)
+	abort ();
+    }
+    void foo (S &x) { s += x.s; }
+    void foo (S &x, bool y) { s += x.s; if (y) abort (); }
+    S (const S &x) { s = x.s + 1; }
+    S (const S &x, bool y) { s = x.s + 2; if (y) abort (); }
+    S () { s = 6; }
+    S (int x) { s = x; }
+    ~S ();
+  };
+  #pragma omp declare reduction (bar : S<1> : omp_out.foo (omp_in)) \
+	initializer (omp_priv (8))
+}
+
+template <int N>
+NS::S<N>::~S ()
+{
+  if (s < 6) abort ();
+  s = -1;
+  /* Ensure the above store is not DSEd.  */
+  asm volatile ("" : : "r" (&s) : "memory");
+}
+
+template <int N>
+struct T : public NS::S<N>
+{
+  void baz ()
+  {
+    NS::S<N> s;
+    int q = 0;
+    if (s.s != 6) abort ();
+    #pragma omp parallel num_threads (4) reduction (foo:s) \
+	reduction (+: q)
+    {
+      if (s.s != 6 || q != 0) abort ();
+      asm volatile ("" : "+m" (s.s), "+r" (q));
+      s.s += 2; q++;
+    }
+    if (s.s != 6 + q * 8) abort ();
+  }
+};
+
+struct W
+{
+  int v;
+  W () : v (6) {}
+  ~W () {}
+};
+
+template <typename T, typename D>
+struct V
+{
+  #pragma omp declare reduction (baz: T: omp_out.s += omp_in.s) \
+	initializer (omp_priv (11))
+  #pragma omp declare reduction (baz: D: omp_out += omp_in) \
+	initializer (dblinit (&omp_priv))
+  static void dblinit (D *x) { *x = 3.0; }
+  void baz ()
+  {
+    T t;
+    V v;
+    int q = 0;
+    D d = 4.0;
+    if (t.s != 6 || v.v != 4) abort ();
+    #pragma omp declare reduction (+ : V, W : omp_out.v -= omp_in.v) \
+	initializer (omp_priv (12))
+    {
+      #pragma omp declare reduction (+ : W, V : omp_out.v += omp_in.v) \
+	initializer (omp_priv (9))
+      #pragma omp parallel num_threads (4) reduction (+: v, q) \
+	reduction (baz: t, d)
+      {
+	if (t.s != 11 || v.v != 9 || q != 0 || d != 3.0) abort ();
+	asm volatile ("" : "+m" (t.s), "+m" (v.v), "+r" (q));
+	t.s += 2; v.v += 3; q++;
+      }
+      if (t.s != 6 + 13 * q || v.v != 4 + 12 * q || d != 4.0 + 3.0 * q)
+	abort ();
+    }
+  }
+  int v;
+  V () : v (4) {}
+  V (int x) : v (x) {}
+  ~V () {}
+};
+
+int
+main ()
+{
+  NS::S<0> u;
+  u.baz (0);
+  T<2> t;
+  t.baz ();
+  NS::S<1> s;
+  int q = 0;
+  if (s.s != 6) abort ();
+  // Test ADL
+  #pragma omp parallel num_threads (4) reduction (bar:s) reduction (+:q)
+  {
+    if (s.s != 8 || q != 0) abort ();
+    asm volatile ("" : "+m" (s.s), "+r" (q));
+    s.s += 4; q++;
+  }
+  if (s.s != 6 + q * 12) abort ();
+  V <NS::S <0>, double> v;
+  v.baz ();
+}
--- libgomp/testsuite/libgomp.c++/udr-4.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/udr-4.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,32 @@
+// { dg-do run }
+
+extern "C" void abort ();
+
+struct S
+{
+  int s;
+  S () : s (0) {}
+  ~S () {}
+};
+
+#pragma omp declare reduction (+:S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:int:omp_out += omp_in)
+
+int
+main ()
+{
+  int i, u = 0, q = 0;
+  S s, t;
+  if (s.s != 0 || t.s != 0) abort ();
+  #pragma omp parallel reduction(+:s, q) reduction(foo:t, u)
+  {
+    if (s.s != 0 || t.s != 0 || u != 0 || q != 0) abort ();
+    s.s = 6;
+    t.s = 8;
+    u = 9;
+    q++;
+  }
+  if (s.s != 6 * q || t.s != 8 * q || u != 9 * q) abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/udr-5.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/udr-5.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,49 @@
+// { dg-do run }
+
+extern "C" void abort ();
+
+struct S
+{
+  void foo ()
+  {
+    S s;
+    int j = 0;
+    #pragma omp declare reduction (bar : int : omp_out += omp_in)
+    #pragma omp parallel reduction (bar : s) reduction(S::operator+ : j)
+    s.a = 4, j = 1;
+    if (s.a != 4 * j) abort ();
+  }
+  #pragma omp declare reduction (bar : S : baz (omp_out, omp_in))
+  static void baz (S &x, S &y) { x.a += y.a; }
+  S () : a (0) {}
+  int a;
+};
+
+template <int N>
+struct T
+{
+  void foo ()
+  {
+    S s;
+    T t;
+    int j = 0;
+    #pragma omp declare reduction (bar : int : omp_out += omp_in)
+    #pragma omp parallel reduction (bar : t) reduction (S::bar : s) \
+			 reduction(T<N>::operator+ : j)
+    s.a = 4, t.a = 5, j = 1;
+    if (s.a != 4 * j || t.a != 5 * j) abort ();
+  }
+  #pragma omp declare reduction (bar : T<N> : baz (omp_out, omp_in))
+  static void baz (T &x, T &y) { x.a += y.a; }
+  T () : a (N) {}
+  int a;
+};
+
+int
+main ()
+{
+  S s;
+  s.foo ();
+  T<0> t;
+  t.foo ();
+}
--- libgomp/testsuite/libgomp.c++/udr-6.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/udr-6.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,70 @@
+// { dg-do run }
+
+extern "C" void abort ();
+
+struct A { int a; A () : a (6) {} };
+struct B { int b; B () : b (5) {} };
+struct C { int c; C () : c (4) {} };
+struct D { int d; D () : d (3) {} };
+struct E : A, B {};
+struct F : C, D {};
+struct G : E, F {};
+void foo (B &);
+void foo (F &);
+#pragma omp declare reduction (+:F:omp_out.c += omp_in.c) \
+		    initializer(foo (omp_priv))
+#pragma omp declare reduction (+:B:omp_out.b += omp_in.b) \
+		    initializer(foo (omp_priv))
+
+void
+foo (B &x)
+{
+  if (x.b != 5)
+    abort ();
+  x.b = 9;
+}
+
+template <typename T>
+void bar (T &x, T &y, int z)
+{
+  if (z)
+    abort ();
+  x.a += y.a;
+}
+
+namespace N1
+{
+  struct A { int a; A () : a (0) {} };
+  #pragma omp declare reduction (+:A:bar (omp_out, omp_in, 0))
+};
+namespace N2
+{
+  struct B : N1::A { };
+  #pragma omp declare reduction (+:N1::A:bar (omp_out, omp_in, 1))
+};
+
+int
+main ()
+{
+  G g;
+  int i = 0;
+  #pragma omp parallel reduction(+:g, i)
+    {
+      if (g.a != 6 || g.b != 9 || g.c != 4 || g.d != 3)
+	abort ();
+      g.a = 1, g.b = 2, g.c = 3, g.d = 4, i = 1;
+    }
+  if (g.a != 6 || g.b != 5 + 2 * i || g.c != 4 || g.d != 3)
+    abort ();
+  N2::B b;
+  i = 0;
+  #pragma omp parallel reduction (+:b, i)
+    {
+      if (b.a != 0)
+	abort ();
+      b.a = 4;
+      i = 1;
+    }
+  if (b.a != 4 * i)
+    abort ();
+}
--- libgomp/testsuite/libgomp.c++/udr-7.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/udr-7.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,72 @@
+// { dg-do run }
+
+extern "C" void abort ();
+
+struct S
+{
+  int s;
+  void foo (S &x) { s += x.s; }
+  S (const S &x) { s = x.s + 1; }
+  S () { s = 6; }
+  ~S () {}
+};
+
+void
+bar (S &x, S &y)
+{
+  if (x.s != 6 || y.s != 6)
+    abort ();
+  x.s = 8;
+}
+
+#pragma omp declare reduction (foo: S: omp_out.foo (omp_in)) \
+	initializer (omp_priv (omp_orig))
+#pragma omp declare reduction (bar : S: omp_out.foo (omp_in)) \
+	initializer (bar (omp_priv, omp_orig))
+
+S
+baz (S x)
+{
+  S r;
+  int i = 0;
+  if (x.s != 7 || r.s != 6)
+    abort ();
+  #pragma omp parallel reduction (foo: x) reduction (bar: r) \
+		       reduction (+: i)
+  {
+    if (x.s != 8 || r.s != 8)
+      abort ();
+    x.s = 12;
+    r.s = 14;
+    i = 1;
+  }
+  if (x.s != 7 + 12 * i || r.s != 6 + 14 * i)
+    abort ();
+  return r;
+}
+
+void
+baz (S &x, S &y)
+{
+  int i = 0, &j = i;
+  #pragma omp parallel reduction (foo: x) reduction (bar: y) \
+		       reduction (+: i)
+  {
+    if (x.s != 7 || y.s != 8)
+      abort ();
+    x.s = 12;
+    y.s = 14;
+    i = 1;
+  }
+  if (x.s != 6 + 12 * j || y.s != 6 + 14 * j)
+    abort ();
+}
+
+int
+main ()
+{
+  S s;
+  baz (s);
+  S t, u;
+  baz (t, u);
+}
--- libgomp/testsuite/libgomp.c++/udr-8.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/udr-8.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,39 @@
+// { dg-do run }
+
+extern "C" void abort ();
+
+struct S;
+void foo (S *, S *);
+void bar (S &, S &);
+#pragma omp declare reduction (+:S:foo (&omp_out, &omp_in))
+#pragma omp declare reduction (*:S:bar (omp_out, omp_in))
+struct S { int s; S () : s (0) {} };
+
+void
+foo (S *x, S *y)
+{
+  x->s += y->s;
+}
+
+void
+bar (S &x, S &y)
+{
+  x.s += y.s;
+}
+
+int
+main ()
+{
+  S s, t;
+  int i = 0;
+  #pragma omp parallel reduction (+:s, i) reduction (*:t)
+  {
+    if (s.s != 0 || t.s != 0)
+      abort ();
+    s.s = 2;
+    t.s = 3;
+    i = 1;
+  }
+  if (s.s != 2 * i || t.s != 3 * i)
+    abort ();
+}
--- libgomp/testsuite/libgomp.c++/udr-9.C	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c++/udr-9.C	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,3 @@
+// { dg-do run }
+
+#include "../libgomp.c/udr-1.c"
--- libgomp/testsuite/libgomp.c/affinity-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/affinity-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,1146 @@
+/* Affinity tests.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_PROC_BIND "false" } */
+/* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl" { target *-*-linux* } } */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include "config.h"
+#include <alloca.h>
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifdef DO_FORK
+#include <signal.h>
+#endif
+#ifdef HAVE_PTHREAD_AFFINITY_NP
+#include <sched.h>
+#include <pthread.h>
+#ifdef INTERPOSE_GETAFFINITY
+#include <dlfcn.h>
+#endif
+#endif
+
+struct place
+{
+  int start, len;
+};
+struct places
+{
+  char name[40];
+  int count;
+  struct place places[8];
+} places_array[] = {
+  { "", 1, { { -1, -1 } } },
+  { "{0}:8", 8,
+    { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 },
+      { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
+  { "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } },
+  { "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } },
+  { "{1}:7:1", 7,
+    { { 1, 1 }, { 2, 1 }, { 3, 1 },
+      { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
+  { "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5,
+    { { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }
+};
+
+unsigned long contig_cpucount;
+unsigned long min_cpusetsize;
+
+#if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \
+    && defined (CPU_ALLOC_SIZE)
+
+#if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY)
+int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *);
+
+int
+pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset)
+{
+  int ret;
+  unsigned long i, max;
+  if (orig_getaffinity_np == NULL)
+    {
+      orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *))
+			    dlsym (RTLD_NEXT, "pthread_getaffinity_np");
+      if (orig_getaffinity_np == NULL)
+	exit (0);
+    }
+  ret = orig_getaffinity_np (thread, cpusetsize, cpuset);
+  if (ret != 0)
+    return ret;
+  if (contig_cpucount == 0)
+    {
+      max = 8 * cpusetsize;
+      for (i = 0; i < max; i++)
+	if (!CPU_ISSET_S (i, cpusetsize, cpuset))
+	  break;
+      contig_cpucount = i;
+      min_cpusetsize = cpusetsize;
+    }
+  return ret;
+}
+#endif
+
+void
+print_affinity (struct place p)
+{
+  static unsigned long size;
+  if (size == 0)
+    {
+      if (min_cpusetsize)
+	size = min_cpusetsize;
+      else
+	{
+	  size = sysconf (_SC_NPROCESSORS_CONF);
+	  size = CPU_ALLOC_SIZE (size);
+	  if (size < sizeof (cpu_set_t))
+	    size = sizeof (cpu_set_t);
+	}
+    }
+  cpu_set_t *cpusetp = (cpu_set_t *) alloca (size);
+  if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0)
+    {
+      unsigned long i, len, max = 8 * size;
+      int notfirst = 0, unexpected = 1;
+
+      printf (" bound to {");
+      for (i = 0, len = 0; i < max; i++)
+	if (CPU_ISSET_S (i, size, cpusetp))
+	  {
+	    if (len == 0)
+	      {
+		if (notfirst)
+		  {
+		    unexpected = 1;
+		    printf (",");
+		  }
+		else if (i == (unsigned long) p.start)
+		  unexpected = 0;
+		notfirst = 1;
+		printf ("%lu", i);
+	      }
+	    ++len;
+	  }
+	else
+	  {
+	    if (len && len != (unsigned long) p.len)
+	      unexpected = 1;
+	    if (len > 1)
+	      printf (":%lu", len);
+	    len = 0;
+	  }
+      if (len && len != (unsigned long) p.len)
+	unexpected = 1;
+      if (len > 1)
+	printf (":%lu", len);
+      printf ("}");
+      if (p.start != -1 && unexpected)
+	{
+	  printf (", expected {%d", p.start);
+	  if (p.len != 1)
+	    printf (":%d", p.len);
+	  printf ("} instead");
+	}
+      else if (p.start != -1)
+	printf (", verified");
+    }
+}
+#else
+void
+print_affinity (struct place p)
+{
+  (void) p.start;
+  (void) p.len;
+}
+#endif
+
+
+int
+main ()
+{
+  char *env_proc_bind = getenv ("OMP_PROC_BIND");
+  int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0;
+  int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0;
+  int test_spread_master_close
+    = env_proc_bind && strcmp (env_proc_bind, "spread,master,close") == 0;
+  char *env_places = getenv ("OMP_PLACES");
+  int test_places = 0;
+
+#ifdef DO_FORK
+  if (env_places == NULL && contig_cpucount >= 8 && test_false
+      && getenv ("GOMP_AFFINITY") == NULL)
+    {
+      int i, j, status;
+      pid_t pid;
+      for (j = 0; j < 2; j++)
+	{
+	  if (setenv ("OMP_PROC_BIND", j ? "spread,master,close" : "true", 1)
+	      < 0)
+	    break;
+	  for (i = sizeof (places_array) / sizeof (places_array[0]) - 1;
+	       i; --i)
+	    {
+	      if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0)
+		break;
+	      pid = fork ();
+	      if (pid == -1)
+		break;
+	      if (pid == 0)
+		{
+		  execl ("/proc/self/exe", "affinity-1.exe", NULL);
+		  _exit (1);
+		}
+	      if (waitpid (pid, &status, 0) < 0)
+		break;
+	      if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT)
+		abort ();
+	      else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0)
+		break;
+	    }
+	  if (i)
+	    break;
+	}
+    }
+#endif
+
+  int first = 1;
+  if (env_proc_bind)
+    {
+      printf ("OMP_PROC_BIND='%s'", env_proc_bind);
+      first = 0;
+    }
+  if (env_places)
+    printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places);
+  printf ("\n");
+
+  if (env_places && contig_cpucount >= 8
+      && (test_true || test_spread_master_close))
+    {
+      for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1;
+	   test_places; --test_places)
+	if (strcmp (env_places, places_array[test_places].name) == 0)
+	  break;
+    }
+
+#define verify(if_true, if_s_m_c) \
+  if (test_false && omp_get_proc_bind () != omp_proc_bind_false)	\
+    abort ();								\
+  if (test_true && omp_get_proc_bind () != if_true)			\
+    abort ();								\
+  if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c)	\
+    abort ();
+
+  verify (omp_proc_bind_true, omp_proc_bind_spread);
+
+  printf ("Initial thread");
+  print_affinity (places_array[test_places].places[0]);
+  printf ("\n");
+  omp_set_nested (1);
+  omp_set_dynamic (0);
+
+  #pragma omp parallel if (0)
+  {
+    verify (omp_proc_bind_true, omp_proc_bind_master);
+    #pragma omp parallel if (0)
+    {
+      verify (omp_proc_bind_true, omp_proc_bind_close);
+      #pragma omp parallel if (0)
+      {
+	verify (omp_proc_bind_true, omp_proc_bind_close);
+      }
+      #pragma omp parallel if (0) proc_bind (spread)
+      {
+	verify (omp_proc_bind_spread, omp_proc_bind_spread);
+      }
+    }
+    #pragma omp parallel if (0) proc_bind (master)
+    {
+      verify (omp_proc_bind_master, omp_proc_bind_close);
+      #pragma omp parallel if (0)
+      {
+	verify (omp_proc_bind_master, omp_proc_bind_close);
+      }
+      #pragma omp parallel if (0) proc_bind (spread)
+      {
+	verify (omp_proc_bind_spread, omp_proc_bind_spread);
+      }
+    }
+  }
+
+  /* True/spread */
+  #pragma omp parallel num_threads (4)
+  {
+    verify (omp_proc_bind_true, omp_proc_bind_master);
+    #pragma omp critical
+    {
+      struct place p = places_array[0].places[0];
+      int thr = omp_get_thread_num ();
+      printf ("#1 thread %d", thr);
+      if (omp_get_num_threads () == 4 && test_spread_master_close)
+	switch (places_array[test_places].count)
+	  {
+	  case 8:
+	    /* T = 4, P = 8, each subpartition has 2 places.  */
+	  case 7:
+	    /* T = 4, P = 7, each subpartition has 2 places, but
+	       last partition, which has just one place.  */
+	    p = places_array[test_places].places[2 * thr];
+	    break;
+	  case 5:
+	    /* T = 4, P = 5, first subpartition has 2 places, the
+	       rest just one.  */
+	    p = places_array[test_places].places[thr ? 1 + thr : 0];
+	    break;
+	  case 3:
+	    /* T = 4, P = 3, unit sized subpartitions, first gets
+	       thr0 and thr3, second thr1, third thr2.  */
+	    p = places_array[test_places].places[thr == 3 ? 0 : thr];
+	    break;
+	  case 2:
+	    /* T = 4, P = 2, unit sized subpartitions, each with
+	       2 threads.  */
+	    p = places_array[test_places].places[thr / 2];
+	    break;
+	  }
+      print_affinity (p);
+      printf ("\n");
+    }
+    #pragma omp barrier
+    if (omp_get_thread_num () == 3)
+      {
+	/* True/spread, true/master.  */
+	#pragma omp parallel num_threads (3)
+	{
+	  verify (omp_proc_bind_true, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#1,#1 thread 3,%d", thr);
+	    if (omp_get_num_threads () == 3 && test_spread_master_close)
+	      /* Outer is spread, inner master, so just bind to the
+		 place or the master thread, which is thr 3 above.  */
+	      switch (places_array[test_places].count)
+		{
+		case 8:
+		case 7:
+		  p = places_array[test_places].places[6];
+		  break;
+		case 5:
+		  p = places_array[test_places].places[4];
+		  break;
+		case 3:
+		  p = places_array[test_places].places[0];
+		  break;
+		case 2:
+		  p = places_array[test_places].places[1];
+		  break;
+		}
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+	/* True/spread, spread.  */
+	#pragma omp parallel num_threads (5) proc_bind (spread)
+	{
+	  verify (omp_proc_bind_spread, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#1,#2 thread 3,%d", thr);
+	    if (omp_get_num_threads () == 5 && test_spread_master_close)
+	      /* Outer is spread, inner spread.  */
+	      switch (places_array[test_places].count)
+		{
+		case 8:
+		  /* T = 5, P = 2, unit sized subpartitions.  */
+		  p = places_array[test_places].places[thr == 4 ? 6
+						       : 6 + thr / 2];
+		  break;
+		/* The rest are T = 5, P = 1.  */
+		case 7:
+		  p = places_array[test_places].places[6];
+		  break;
+		case 5:
+		  p = places_array[test_places].places[4];
+		  break;
+		case 3:
+		  p = places_array[test_places].places[0];
+		  break;
+		case 2:
+		  p = places_array[test_places].places[1];
+		  break;
+		}
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	  #pragma omp barrier
+	  if (omp_get_thread_num () == 3)
+	    {
+	      /* True/spread, spread, close.  */
+	      #pragma omp parallel num_threads (5) proc_bind (close)
+	      {
+		verify (omp_proc_bind_close, omp_proc_bind_close);
+		#pragma omp critical
+		{
+		  struct place p = places_array[0].places[0];
+		  int thr = omp_get_thread_num ();
+		  printf ("#1,#2,#1 thread 3,3,%d", thr);
+		  if (omp_get_num_threads () == 5 && test_spread_master_close)
+		    /* Outer is spread, inner spread, innermost close.  */
+		    switch (places_array[test_places].count)
+		      {
+		      /* All are T = 5, P = 1.  */
+		      case 8:
+			p = places_array[test_places].places[7];
+			break;
+		      case 7:
+			p = places_array[test_places].places[6];
+			break;
+		      case 5:
+			p = places_array[test_places].places[4];
+			break;
+		      case 3:
+			p = places_array[test_places].places[0];
+			break;
+		      case 2:
+			p = places_array[test_places].places[1];
+			break;
+		      }
+		  print_affinity (p);
+		  printf ("\n");
+		}
+	      }
+	    }
+	}
+	/* True/spread, master.  */
+	#pragma omp parallel num_threads (4) proc_bind(master)
+	{
+	  verify (omp_proc_bind_master, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#1,#3 thread 3,%d", thr);
+	    if (omp_get_num_threads () == 4 && test_spread_master_close)
+	      /* Outer is spread, inner master, so just bind to the
+		 place or the master thread, which is thr 3 above.  */
+	      switch (places_array[test_places].count)
+		{
+		case 8:
+		case 7:
+		  p = places_array[test_places].places[6];
+		  break;
+		case 5:
+		  p = places_array[test_places].places[4];
+		  break;
+		case 3:
+		  p = places_array[test_places].places[0];
+		  break;
+		case 2:
+		  p = places_array[test_places].places[1];
+		  break;
+		}
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+	/* True/spread, close.  */
+	#pragma omp parallel num_threads (6) proc_bind (close)
+	{
+	  verify (omp_proc_bind_close, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#1,#4 thread 3,%d", thr);
+	    if (omp_get_num_threads () == 6 && test_spread_master_close)
+	      /* Outer is spread, inner close.  */
+	      switch (places_array[test_places].count)
+		{
+		case 8:
+		  /* T = 6, P = 2, unit sized subpartitions.  */
+		  p = places_array[test_places].places[6 + thr / 3];
+		  break;
+		/* The rest are T = 6, P = 1.  */
+		case 7:
+		  p = places_array[test_places].places[6];
+		  break;
+		case 5:
+		  p = places_array[test_places].places[4];
+		  break;
+		case 3:
+		  p = places_array[test_places].places[0];
+		  break;
+		case 2:
+		  p = places_array[test_places].places[1];
+		  break;
+		}
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+      }
+  }
+
+  /* Spread.  */
+  #pragma omp parallel num_threads (5) proc_bind(spread)
+  {
+    verify (omp_proc_bind_spread, omp_proc_bind_master);
+    #pragma omp critical
+    {
+      struct place p = places_array[0].places[0];
+      int thr = omp_get_thread_num ();
+      printf ("#2 thread %d", thr);
+      if (omp_get_num_threads () == 5
+	  && (test_spread_master_close || test_true))
+	switch (places_array[test_places].count)
+	  {
+	  case 8:
+	    /* T = 5, P = 8, first 3 subpartitions have 2 places, last
+	       2 one place.  */
+	    p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr];
+	    break;
+	  case 7:
+	    /* T = 5, P = 7, first 2 subpartitions have 2 places, last
+	       3 one place.  */
+	    p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr];
+	    break;
+	  case 5:
+	    /* T = 5, P = 5, unit sized subpartitions, each one with one
+	       thread.  */
+	    p = places_array[test_places].places[thr];
+	    break;
+	  case 3:
+	    /* T = 5, P = 3, unit sized subpartitions, first gets
+	       thr0 and thr3, second thr1 and thr4, third thr2.  */
+	    p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
+	    break;
+	  case 2:
+	    /* T = 5, P = 2, unit sized subpartitions, first with
+	       thr{0,1,4} and second with thr{2,3}.  */
+	    p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
+	    break;
+	  }
+      print_affinity (p);
+      printf ("\n");
+    }
+    #pragma omp barrier
+    if (omp_get_thread_num () == 3)
+      {
+	int pp = 0;
+	switch (places_array[test_places].count)
+	  {
+	  case 8: pp = 6; break;
+	  case 7: pp = 5; break;
+	  case 5: pp = 3; break;
+	  case 2: pp = 1; break;
+	  }
+	/* Spread, spread/master.  */
+	#pragma omp parallel num_threads (3) firstprivate (pp)
+	{
+	  verify (omp_proc_bind_spread, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#2,#1 thread 3,%d", thr);
+	    if (test_spread_master_close || test_true)
+	      /* Outer is spread, inner spread resp. master, bit we have
+		 just unit sized partitions.  */
+	      p = places_array[test_places].places[pp];
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+	/* Spread, spread.  */
+	#pragma omp parallel num_threads (5) proc_bind (spread) \
+			     firstprivate (pp)
+	{
+	  verify (omp_proc_bind_spread, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#2,#2 thread 3,%d", thr);
+	    if (test_spread_master_close || test_true)
+	      /* Outer is spread, inner spread, bit we have
+		 just unit sized partitions.  */
+	      p = places_array[test_places].places[pp];
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+	/* Spread, master.  */
+	#pragma omp parallel num_threads (4) proc_bind(master) \
+			     firstprivate(pp)
+	{
+	  verify (omp_proc_bind_master, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#2,#3 thread 3,%d", thr);
+	    if (test_spread_master_close || test_true)
+	      /* Outer is spread, inner master, bit we have
+		 just unit sized partitions.  */
+	      p = places_array[test_places].places[pp];
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+	/* Spread, close.  */
+	#pragma omp parallel num_threads (6) proc_bind (close) \
+			     firstprivate (pp)
+	{
+	  verify (omp_proc_bind_close, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#2,#4 thread 3,%d", thr);
+	    if (test_spread_master_close || test_true)
+	      /* Outer is spread, inner close, bit we have
+		 just unit sized partitions.  */
+	      p = places_array[test_places].places[pp];
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+      }
+  }
+
+  /* Master.  */
+  #pragma omp parallel num_threads (3) proc_bind(master)
+  {
+    verify (omp_proc_bind_master, omp_proc_bind_master);
+    #pragma omp critical
+    {
+      struct place p = places_array[0].places[0];
+      int thr = omp_get_thread_num ();
+      printf ("#3 thread %d", thr);
+      if (test_spread_master_close || test_true)
+	p = places_array[test_places].places[0];
+      print_affinity (p);
+      printf ("\n");
+    }
+    #pragma omp barrier
+    if (omp_get_thread_num () == 2)
+      {
+	/* Master, master.  */
+	#pragma omp parallel num_threads (4)
+	{
+	  verify (omp_proc_bind_master, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#3,#1 thread 2,%d", thr);
+	    if (test_spread_master_close || test_true)
+	      /* Outer is master, inner is master.  */
+	      p = places_array[test_places].places[0];
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+	/* Master, spread.  */
+	#pragma omp parallel num_threads (4) proc_bind (spread)
+	{
+	  verify (omp_proc_bind_spread, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#3,#2 thread 2,%d", thr);
+	    if (omp_get_num_threads () == 4
+		&& (test_spread_master_close || test_true))
+	      /* Outer is master, inner is spread.  */
+	      switch (places_array[test_places].count)
+		{
+		case 8:
+		  /* T = 4, P = 8, each subpartition has 2 places.  */
+		case 7:
+		  /* T = 4, P = 7, each subpartition has 2 places, but
+		     last partition, which has just one place.  */
+		  p = places_array[test_places].places[2 * thr];
+		  break;
+		case 5:
+		  /* T = 4, P = 5, first subpartition has 2 places, the
+		     rest just one.  */
+		  p = places_array[test_places].places[thr ? 1 + thr : 0];
+		  break;
+		case 3:
+		  /* T = 4, P = 3, unit sized subpartitions, first gets
+		     thr0 and thr3, second thr1, third thr2.  */
+		  p = places_array[test_places].places[thr == 3 ? 0 : thr];
+		  break;
+		case 2:
+		  /* T = 4, P = 2, unit sized subpartitions, each with
+		     2 threads.  */
+		  p = places_array[test_places].places[thr / 2];
+		  break;
+		}
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	  #pragma omp barrier
+	  if (omp_get_thread_num () == 0)
+	    {
+	      /* Master, spread, close.  */
+	      #pragma omp parallel num_threads (5) proc_bind (close)
+	      {
+		verify (omp_proc_bind_close, omp_proc_bind_close);
+		#pragma omp critical
+		{
+		  struct place p = places_array[0].places[0];
+		  int thr = omp_get_thread_num ();
+		  printf ("#3,#2,#1 thread 2,0,%d", thr);
+		  if (omp_get_num_threads () == 5
+		      && (test_spread_master_close || test_true))
+		    /* Outer is master, inner spread, innermost close.  */
+		    switch (places_array[test_places].count)
+		      {
+		      /* First 3 are T = 5, P = 2.  */
+		      case 8:
+		      case 7:
+		      case 5:
+			p = places_array[test_places].places[(thr & 2) / 2];
+			break;
+		      /* All the rest are T = 5, P = 1.  */
+		      case 3:
+		      case 2:
+			p = places_array[test_places].places[0];
+			break;
+		      }
+		  print_affinity (p);
+		  printf ("\n");
+		}
+	      }
+	    }
+	  #pragma omp barrier
+	  if (omp_get_thread_num () == 3)
+	    {
+	      /* Master, spread, close.  */
+	      #pragma omp parallel num_threads (5) proc_bind (close)
+	      {
+		verify (omp_proc_bind_close, omp_proc_bind_close);
+		#pragma omp critical
+		{
+		  struct place p = places_array[0].places[0];
+		  int thr = omp_get_thread_num ();
+		  printf ("#3,#2,#2 thread 2,3,%d", thr);
+		  if (omp_get_num_threads () == 5
+		      && (test_spread_master_close || test_true))
+		    /* Outer is master, inner spread, innermost close.  */
+		    switch (places_array[test_places].count)
+		      {
+		      case 8:
+			/* T = 5, P = 2.  */
+			p = places_array[test_places].places[6
+							     + (thr & 2) / 2];
+			break;
+		      /* All the rest are T = 5, P = 1.  */
+		      case 7:
+			p = places_array[test_places].places[6];
+			break;
+		      case 5:
+			p = places_array[test_places].places[4];
+			break;
+		      case 3:
+			p = places_array[test_places].places[0];
+			break;
+		      case 2:
+			p = places_array[test_places].places[1];
+			break;
+		      }
+		  print_affinity (p);
+		  printf ("\n");
+		}
+	      }
+	    }
+	}
+	/* Master, master.  */
+	#pragma omp parallel num_threads (4) proc_bind(master)
+	{
+	  verify (omp_proc_bind_master, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#3,#3 thread 2,%d", thr);
+	    if (test_spread_master_close || test_true)
+	      /* Outer is master, inner master.  */
+	      p = places_array[test_places].places[0];
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+	/* Master, close.  */
+	#pragma omp parallel num_threads (6) proc_bind (close)
+	{
+	  verify (omp_proc_bind_close, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#3,#4 thread 2,%d", thr);
+	    if (omp_get_num_threads () == 6
+		&& (test_spread_master_close || test_true))
+	      switch (places_array[test_places].count)
+		{
+		case 8:
+		  /* T = 6, P = 8.  */
+		case 7:
+		  /* T = 6, P = 7.  */
+		  p = places_array[test_places].places[thr];
+		  break;
+		case 5:
+		  /* T = 6, P = 5.  thr{0,5} go into the first place.  */
+		  p = places_array[test_places].places[thr == 5 ? 0 : thr];
+		  break;
+		case 3:
+		  /* T = 6, P = 3, two threads into each place.  */
+		  p = places_array[test_places].places[thr / 2];
+		  break;
+		case 2:
+		  /* T = 6, P = 2, 3 threads into each place.  */
+		  p = places_array[test_places].places[thr / 3];
+		  break;
+		}
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+      }
+  }
+
+  #pragma omp parallel num_threads (5) proc_bind(close)
+  {
+    verify (omp_proc_bind_close, omp_proc_bind_master);
+    #pragma omp critical
+    {
+      struct place p = places_array[0].places[0];
+      int thr = omp_get_thread_num ();
+      printf ("#4 thread %d", thr);
+      if (omp_get_num_threads () == 5
+	  && (test_spread_master_close || test_true))
+	switch (places_array[test_places].count)
+	  {
+	  case 8:
+	    /* T = 5, P = 8.  */
+	  case 7:
+	    /* T = 5, P = 7.  */
+	  case 5:
+	    /* T = 5, P = 5.  */
+	    p = places_array[test_places].places[thr];
+	    break;
+	  case 3:
+	    /* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second,
+	       thr2 in third.  */
+	    p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
+	    break;
+	  case 2:
+	    /* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second.  */
+	    p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
+	    break;
+	  }
+      print_affinity (p);
+      printf ("\n");
+    }
+    #pragma omp barrier
+    if (omp_get_thread_num () == 2)
+      {
+	int pp = 0;
+	switch (places_array[test_places].count)
+	  {
+	  case 8:
+	  case 7:
+	  case 5:
+	  case 3:
+	    pp = 2;
+	    break;
+	  case 2:
+	    pp = 1;
+	    break;
+	  }
+	/* Close, close/master.  */
+	#pragma omp parallel num_threads (4) firstprivate (pp)
+	{
+	  verify (omp_proc_bind_close, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#4,#1 thread 2,%d", thr);
+	    if (test_spread_master_close)
+	      /* Outer is close, inner is master.  */
+	      p = places_array[test_places].places[pp];
+	    else if (omp_get_num_threads () == 4 && test_true)
+	      /* Outer is close, inner is close.  */
+	      switch (places_array[test_places].count)
+		{
+		case 8:
+		  /* T = 4, P = 8.  */
+		case 7:
+		  /* T = 4, P = 7.  */
+		  p = places_array[test_places].places[2 + thr];
+		  break;
+		case 5:
+		  /* T = 4, P = 5.  There is wrap-around for thr3.  */
+		  p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr];
+		  break;
+		case 3:
+		  /* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2
+		     into p1.  */
+		  p = places_array[test_places].places[(2 + thr) % 3];
+		  break;
+		case 2:
+		  /* T = 4, P = 2, 2 threads into each place.  */
+		  p = places_array[test_places].places[1 - thr / 2];
+		  break;
+		}
+
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+	/* Close, spread.  */
+	#pragma omp parallel num_threads (4) proc_bind (spread)
+	{
+	  verify (omp_proc_bind_spread, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#4,#2 thread 2,%d", thr);
+	    if (omp_get_num_threads () == 4
+		&& (test_spread_master_close || test_true))
+	      /* Outer is close, inner is spread.  */
+	      switch (places_array[test_places].count)
+		{
+		case 8:
+		  /* T = 4, P = 8, each subpartition has 2 places.  */
+		case 7:
+		  /* T = 4, P = 7, each subpartition has 2 places, but
+		     last partition, which has just one place.  */
+		  p = places_array[test_places].places[thr == 3 ? 0
+						       : 2 + 2 * thr];
+		  break;
+		case 5:
+		  /* T = 4, P = 5, first subpartition has 2 places, the
+		     rest just one.  */
+		  p = places_array[test_places].places[thr == 3 ? 0
+						       : 2 + thr];
+		  break;
+		case 3:
+		  /* T = 4, P = 3, unit sized subpartitions, third gets
+		     thr0 and thr3, first thr1, second thr2.  */
+		  p = places_array[test_places].places[thr == 0 ? 2 : thr - 1];
+		  break;
+		case 2:
+		  /* T = 4, P = 2, unit sized subpartitions, each with
+		     2 threads.  */
+		  p = places_array[test_places].places[1 - thr / 2];
+		  break;
+		}
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	  #pragma omp barrier
+	  if (omp_get_thread_num () == 0)
+	    {
+	      /* Close, spread, close.  */
+	      #pragma omp parallel num_threads (5) proc_bind (close)
+	      {
+		verify (omp_proc_bind_close, omp_proc_bind_close);
+		#pragma omp critical
+		{
+		  struct place p = places_array[0].places[0];
+		  int thr = omp_get_thread_num ();
+		  printf ("#4,#2,#1 thread 2,0,%d", thr);
+		  if (omp_get_num_threads () == 5
+		      && (test_spread_master_close || test_true))
+		    /* Outer is close, inner spread, innermost close.  */
+		    switch (places_array[test_places].count)
+		      {
+		      case 8:
+		      case 7:
+			/* T = 5, P = 2.  */
+			p = places_array[test_places].places[2
+							     + (thr & 2) / 2];
+			break;
+		      /* All the rest are T = 5, P = 1.  */
+		      case 5:
+		      case 3:
+			p = places_array[test_places].places[2];
+			break;
+		      case 2:
+			p = places_array[test_places].places[1];
+			break;
+		      }
+		  print_affinity (p);
+		  printf ("\n");
+		}
+	      }
+	    }
+	  #pragma omp barrier
+	  if (omp_get_thread_num () == 2)
+	    {
+	      /* Close, spread, close.  */
+	      #pragma omp parallel num_threads (5) proc_bind (close)
+	      {
+		verify (omp_proc_bind_close, omp_proc_bind_close);
+		#pragma omp critical
+		{
+		  struct place p = places_array[0].places[0];
+		  int thr = omp_get_thread_num ();
+		  printf ("#4,#2,#2 thread 2,2,%d", thr);
+		  if (omp_get_num_threads () == 5
+		      && (test_spread_master_close || test_true))
+		    /* Outer is close, inner spread, innermost close.  */
+		    switch (places_array[test_places].count)
+		      {
+		      case 8:
+			/* T = 5, P = 2.  */
+			p = places_array[test_places].places[6
+							     + (thr & 2) / 2];
+			break;
+		      /* All the rest are T = 5, P = 1.  */
+		      case 7:
+			p = places_array[test_places].places[6];
+			break;
+		      case 5:
+			p = places_array[test_places].places[4];
+			break;
+		      case 3:
+			p = places_array[test_places].places[1];
+			break;
+		      case 2:
+			p = places_array[test_places].places[0];
+			break;
+		      }
+		  print_affinity (p);
+		  printf ("\n");
+		}
+	      }
+	    }
+	  #pragma omp barrier
+	  if (omp_get_thread_num () == 3)
+	    {
+	      /* Close, spread, close.  */
+	      #pragma omp parallel num_threads (5) proc_bind (close)
+	      {
+		verify (omp_proc_bind_close, omp_proc_bind_close);
+		#pragma omp critical
+		{
+		  struct place p = places_array[0].places[0];
+		  int thr = omp_get_thread_num ();
+		  printf ("#4,#2,#3 thread 2,3,%d", thr);
+		  if (omp_get_num_threads () == 5
+		      && (test_spread_master_close || test_true))
+		    /* Outer is close, inner spread, innermost close.  */
+		    switch (places_array[test_places].count)
+		      {
+		      case 8:
+		      case 7:
+		      case 5:
+			/* T = 5, P = 2.  */
+			p = places_array[test_places].places[(thr & 2) / 2];
+			break;
+		      /* All the rest are T = 5, P = 1.  */
+		      case 3:
+			p = places_array[test_places].places[2];
+			break;
+		      case 2:
+			p = places_array[test_places].places[0];
+			break;
+		      }
+		  print_affinity (p);
+		  printf ("\n");
+		}
+	      }
+	    }
+	}
+	/* Close, master.  */
+	#pragma omp parallel num_threads (4) proc_bind(master) \
+			     firstprivate (pp)
+	{
+	  verify (omp_proc_bind_master, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#4,#3 thread 2,%d", thr);
+	    if (test_spread_master_close || test_true)
+	      /* Outer is close, inner master.  */
+	      p = places_array[test_places].places[pp];
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+	/* Close, close.  */
+	#pragma omp parallel num_threads (6) proc_bind (close)
+	{
+	  verify (omp_proc_bind_close, omp_proc_bind_close);
+	  #pragma omp critical
+	  {
+	    struct place p = places_array[0].places[0];
+	    int thr = omp_get_thread_num ();
+	    printf ("#4,#4 thread 2,%d", thr);
+	    if (omp_get_num_threads () == 6
+		&& (test_spread_master_close || test_true))
+	      switch (places_array[test_places].count)
+		{
+		case 8:
+		  /* T = 6, P = 8.  */
+		  p = places_array[test_places].places[2 + thr];
+		  break;
+		case 7:
+		  /* T = 6, P = 7.  */
+		  p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr];
+		  break;
+		case 5:
+		  /* T = 6, P = 5.  thr{0,5} go into the third place.  */
+		  p = places_array[test_places].places[thr >= 3 ? thr - 3
+						       : 2 + thr];
+		  break;
+		case 3:
+		  /* T = 6, P = 3, two threads into each place.  */
+		  p = places_array[test_places].places[thr < 2 ? 2
+						       : thr / 2 - 1];
+		  break;
+		case 2:
+		  /* T = 6, P = 2, 3 threads into each place.  */
+		  p = places_array[test_places].places[1 - thr / 3];
+		  break;
+		}
+	    print_affinity (p);
+	    printf ("\n");
+	  }
+	}
+      }
+  }
+
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/atomic-14.c	(.../trunk)	(revision 203241)
+++ libgomp/testsuite/libgomp.c/atomic-14.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -16,7 +16,7 @@ main ()
   #pragma omp atomic update
     x = x + 7;
   #pragma omp atomic
-    x = x + 7 + 6;
+    x = x + (7 + 6);
   #pragma omp atomic update
     x = x + 2 * 3;
   #pragma omp atomic
@@ -65,7 +65,7 @@ main ()
   if (v != -8)
     abort ();
   #pragma omp atomic
-    x = x * -4 / 2;
+    x = x * (-4 / 2);
   #pragma omp atomic read
     v = x;
   if (v != 16)
--- libgomp/testsuite/libgomp.c/atomic-15.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/atomic-15.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,99 @@
+// { dg-do run }
+
+extern void abort (void);
+int x = 6;
+
+int
+main ()
+{
+  int v, l = 2, s = 1;
+  #pragma omp atomic
+    x = -3 + x;
+  #pragma omp atomic read
+    v = x;
+  if (v != 3)
+    abort ();
+  #pragma omp atomic update
+    x = 3 * 2 * 1 + x;
+  #pragma omp atomic read
+    v = x;
+  if (v != 9)
+    abort ();
+  #pragma omp atomic capture
+    v = x = x | 16;
+  if (v != 25)
+    abort ();
+  #pragma omp atomic capture
+    v = x = x + 14 * 2 / 4;
+  if (v != 32)
+    abort ();
+  #pragma omp atomic capture
+    v = x = 5 | x;
+  if (v != 37)
+    abort ();
+  #pragma omp atomic capture
+    v = x = 40 + 12 - 2 - 7 - x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 3 + x; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = -1 * -1 * -1 * -1 - x; }
+  if (v != 9)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != -8)
+    abort ();
+  #pragma omp atomic capture
+    { x = 2 * 2 - x; v = x; }
+  if (v != 12)
+    abort ();
+  #pragma omp atomic capture
+    { x = 7 & x; v = x; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 6; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 7 * 8 + 23; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 79)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = 23 + 6 * 4; }
+  if (v != 79)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = l ? 17 : 12; }
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x = l = s++ + 3; }
+  if (v != 17 || l != 4 || s != 2)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 4)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/atomic-16.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/atomic-16.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,58 @@
+// { dg-do run }
+
+extern void abort (void);
+int x = 6, cnt;
+
+int
+foo (void)
+{
+  return cnt++;
+}
+
+int
+main ()
+{
+  int v, *p;
+  p = &x;
+  #pragma omp atomic update
+    p[foo (), 0] = 16 + 6 - p[foo (), 0];
+  #pragma omp atomic read
+    v = x;
+  if (cnt != 2 || v != 16)
+    abort ();
+  #pragma omp atomic capture
+    v = p[foo () + foo (), 0] = p[foo () + foo (), 0] + 3;
+  if (cnt != 6 || v != 19)
+    abort ();
+  #pragma omp atomic capture
+    v = p[foo (), 0] = 12 * 1 / 2 + (foo (), 0) + p[foo (), 0];
+  if (cnt != 9 || v != 25)
+    abort ();
+  #pragma omp atomic capture
+    {
+      v = p[foo () & 0]; p[foo () & 0] = (foo (), 1) * 9 - p[foo () & 0];
+    }
+  if (cnt != 13 || v != 25)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != -16)
+    abort ();
+  #pragma omp atomic capture
+    {
+      p[0 & foo ()] = 16 - 2 + 3 + p[0 & foo ()]; v = p[0 & foo ()];
+    }
+  if (cnt != 16 || v != 1)
+    abort ();
+  #pragma omp atomic capture
+    {
+      v = p[foo (), 0]; p[foo (), 0] = (foo (), 7) ? 13 : foo () + 6;
+    }
+  if (cnt != 19 || v != 1)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 13)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/atomic-17.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/atomic-17.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,99 @@
+// { dg-do run }
+
+extern void abort (void);
+int x = 6;
+
+int
+main ()
+{
+  int v, l = 2, s = 1;
+  #pragma omp atomic seq_cst
+    x = -3 + x;
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 3)
+    abort ();
+  #pragma omp atomic update seq_cst
+    x = 3 * 2 * 1 + x;
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 9)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = x | 16;
+  if (v != 25)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = x + 14 * 2 / 4;
+  if (v != 32)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = 5 | x;
+  if (v != 37)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    v = x = 40 + 12 - 2 - 7 - x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 3 + x; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = -1 * -1 * -1 * -1 - x; }
+  if (v != 9)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != -8)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { x = 2 * 2 - x; v = x; }
+  if (v != 12)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { x = 7 & x; v = x; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 6; }
+  if (v != 4)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 7 * 8 + 23; }
+  if (v != 6)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 79)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = 23 + 6 * 4; }
+  if (v != 79)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = l ? 17 : 12; }
+  if (v != 47)
+    abort ();
+  #pragma omp atomic capture seq_cst
+    { v = x; x = l = s++ + 3; }
+  if (v != 17 || l != 4 || s != 2)
+    abort ();
+  #pragma omp atomic read seq_cst
+    v = x;
+  if (v != 4)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/cancel-for-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/cancel-for-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+int
+main ()
+{
+  #pragma omp parallel num_threads (32)
+  {
+    int i;
+    #pragma omp for
+    for (i = 0; i < 1000; ++i)
+      {
+	#pragma omp cancel for
+	if (omp_get_cancellation ())
+	  abort ();
+      }
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/cancel-for-2.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/cancel-for-2.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,95 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+__attribute__((noinline, noclone)) int
+foo (int *x)
+{
+  int v = 0, w = 0;
+  #pragma omp parallel num_threads (32) shared (v, w)
+  {
+    int i;
+    #pragma omp for
+    for (i = 0; i < 1000; ++i)
+      {
+	#pragma omp cancel for if (x[0])
+	abort ();
+      }
+    #pragma omp for
+    for (i = 0; i < 1000; ++i)
+      {
+	#pragma omp cancel for if (x[1])
+	#pragma omp atomic
+	v++;
+      }
+    #pragma omp for
+    for (i = 0; i < 1000; ++i)
+      {
+	#pragma omp cancel for if (x[2])
+	#pragma omp atomic
+	w += 8;
+      }
+    #pragma omp for
+    for (i = 0; i < 1000; ++i)
+      {
+	#pragma omp cancel for if (x[3])
+	#pragma omp atomic
+	v += 2;
+      }
+  }
+  if (v != 3000 || w != 0)
+    abort ();
+  #pragma omp parallel num_threads (32) shared (v, w)
+  {
+    int i;
+    /* None of these cancel directives should actually cancel anything,
+       but the compiler shouldn't know that and thus should use cancellable
+       barriers at the end of all the workshares.  */
+    #pragma omp cancel parallel if (omp_get_thread_num () == 1 && x[4])
+    #pragma omp for
+    for (i = 0; i < 1000; ++i)
+      {
+	#pragma omp cancel for if (x[0])
+	abort ();
+      }
+    #pragma omp cancel parallel if (omp_get_thread_num () == 2 && x[4])
+    #pragma omp for
+    for (i = 0; i < 1000; ++i)
+      {
+	#pragma omp cancel for if (x[1])
+	#pragma omp atomic
+	v++;
+      }
+    #pragma omp cancel parallel if (omp_get_thread_num () == 3 && x[4])
+    #pragma omp for
+    for (i = 0; i < 1000; ++i)
+      {
+	#pragma omp cancel for if (x[2])
+	#pragma omp atomic
+	w += 8;
+      }
+    #pragma omp cancel parallel if (omp_get_thread_num () == 4 && x[4])
+    #pragma omp for
+    for (i = 0; i < 1000; ++i)
+      {
+	#pragma omp cancel for if (x[3])
+	#pragma omp atomic
+	v += 2;
+      }
+    #pragma omp cancel parallel if (omp_get_thread_num () == 5 && x[4])
+  }
+  if (v != 6000 || w != 0)
+    abort ();
+  return 0;
+}
+
+int
+main ()
+{
+  int x[] = { 1, 0, 1, 0, 0 };
+  if (omp_get_cancellation ())
+    foo (x);
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/cancel-parallel-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/cancel-parallel-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+int
+main ()
+{
+  #pragma omp parallel num_threads (32)
+  {
+    #pragma omp cancel parallel
+    if (omp_get_cancellation ())
+      abort ();
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/cancel-parallel-2.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/cancel-parallel-2.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <omp.h>
+
+static void
+foo (int *x)
+{
+  #pragma omp parallel firstprivate(x) num_threads (32)
+  {
+    int thr = omp_get_thread_num ();
+    switch (x[thr])
+      {
+      case 4:
+	#pragma omp cancel parallel
+	break;
+      case 3:
+	#pragma omp task
+	usleep (1000);
+	#pragma omp task
+	usleep (2000);
+	#pragma omp task
+	usleep (4000);
+	break;
+      case 2:
+	usleep (1000);
+	/* FALLTHRU */
+      case 1:
+	#pragma omp cancellation point parallel
+	break;
+      }
+    #pragma omp barrier
+    if (omp_get_cancellation ())
+      abort ();
+  }
+}
+
+int
+main ()
+{
+  int i, j, x[32] = { 0, 1, 2, 4, 2, 2, 1, 0 };
+  foo (x);
+  for (i = 0; i < 32; i++)
+    {
+      for (j = 0; j < 32; j++)
+	x[j] = rand () & 3;
+      x[rand () & 31] = 4;
+      foo (x);
+    }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/cancel-parallel-3.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/cancel-parallel-3.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <omp.h>
+#include <unistd.h>
+
+static inline void
+do_some_work (void)
+{
+  asm volatile ("" : : : "memory");
+}
+
+int
+main ()
+{
+  omp_set_dynamic (0);
+  omp_set_schedule (omp_sched_static, 1);
+  #pragma omp parallel num_threads (16)
+  {
+    int i, j;
+    do_some_work ();
+    #pragma omp barrier
+    if (omp_get_thread_num () == 1)
+      {
+	sleep (2);
+	#pragma omp cancellation point parallel
+      }
+    for (j = 3; j <= 16; j++)
+      #pragma omp for schedule (runtime) nowait
+      for (i = 0; i < j; i++)
+	do_some_work ();
+    if (omp_get_thread_num () == 0)
+      {
+	sleep (1);
+	#pragma omp cancel parallel
+      }
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/cancel-sections-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/cancel-sections-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+int
+main ()
+{
+  if (!omp_get_cancellation ())
+    return 0;
+  #pragma omp parallel num_threads (32)
+  {
+    #pragma omp sections
+      {
+	{
+	  #pragma omp cancel sections
+	  abort ();
+	}
+      #pragma omp section
+	{
+	  #pragma omp cancel sections
+	  abort ();
+	}
+      #pragma omp section
+	{
+	  #pragma omp cancel sections
+	  abort ();
+	}
+      #pragma omp section
+	{
+	  #pragma omp cancel sections
+	  abort ();
+	}
+      }
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/cancel-taskgroup-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/cancel-taskgroup-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+struct T { struct T *children[2]; int val; };
+
+struct T *
+search (struct T *tree, int val, int lvl)
+{
+  if (tree == NULL || tree->val == val)
+    return tree;
+  struct T *ret = NULL;
+  int i;
+  for (i = 0; i < 2; i++)
+    #pragma omp task shared(ret) if(lvl < 10)
+    {
+      struct T *r = search (tree->children[i], val, lvl + 1);
+      if (r)
+	{
+	  #pragma omp atomic write
+	  ret = r;
+	  #pragma omp cancel taskgroup
+	}
+    }
+  #pragma omp taskwait
+  return ret;
+}
+
+struct T *
+searchp (struct T *tree, int val)
+{
+  struct T *ret;
+  #pragma omp parallel shared(ret) firstprivate (tree, val)
+  #pragma omp single
+  #pragma omp taskgroup
+  ret = search (tree, val, 0);
+  return ret;
+}
+
+int
+main ()
+{
+  /* Must be power of two minus 1.  */
+  int size = 0x7ffff;
+  struct T *trees = (struct T *) malloc (size * sizeof (struct T));
+  if (trees == NULL)
+    return 0;
+  int i, l = 1, b = 0;
+  for (i = 0; i < size; i++)
+    {
+      if (i == l)
+	{
+	  b = l;
+	  l = l * 2 + 1;
+	}
+      trees[i].val = i;
+      trees[i].children[0] = l == size ? NULL : &trees[l + (i - b) * 2];
+      trees[i].children[1] = l == size ? NULL : &trees[l + (i - b) * 2 + 1];
+    }
+  for (i = 0; i < 50; i++)
+    {
+      int v = random () & size;
+      if (searchp (&trees[0], v) != &trees[v])
+	abort ();
+    }
+  free (trees);
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/cancel-taskgroup-2.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/cancel-taskgroup-2.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <omp.h>
+
+int
+main ()
+{
+  #pragma omp parallel
+  #pragma omp taskgroup
+  #pragma omp task
+  {
+    #pragma omp cancel taskgroup
+    if (omp_get_cancellation ())
+      abort ();
+  }
+  #pragma omp parallel
+  {
+    #pragma omp barrier
+    #pragma omp single
+    #pragma omp taskgroup
+    {
+      int i;
+      for (i = 0; i < 50; i++)
+	#pragma omp task
+	{
+	  #pragma omp cancellation point taskgroup
+	  usleep (30);
+	  #pragma omp cancel taskgroup if (i > 5)
+	}
+    }
+    usleep (10);
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/depend-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,215 @@
+#include <stdlib.h>
+
+void
+dep (void)
+{
+  int x = 1;
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+  }
+}
+
+void
+dep2 (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp taskwait
+  }
+}
+
+void
+dep3 (void)
+{
+  #pragma omp parallel
+  {
+    int x = 1;
+    #pragma omp single
+    {
+      #pragma omp task shared (x) depend(out: x)
+      x = 2;
+      #pragma omp task shared (x) depend(in: x)
+      if (x != 2)
+	abort ();
+    }
+  }
+}
+
+void
+firstpriv (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp task depend(out: x)
+    x = 2;
+    #pragma omp task depend(in: x)
+    if (x != 1)
+      abort ();
+  }
+}
+
+void
+antidep (void)
+{
+  int x = 1;
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp task shared(x) depend(in: x)
+    if (x != 1)
+      abort ();
+    #pragma omp task shared(x) depend(out: x)
+    x = 2;
+  }
+}
+
+void
+antidep2 (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp taskgroup
+    {
+      #pragma omp task shared(x) depend(in: x)
+      if (x != 1)
+	abort ();
+      #pragma omp task shared(x) depend(out: x)
+      x = 2;
+    }
+  }
+}
+
+void
+antidep3 (void)
+{
+  #pragma omp parallel
+  {
+    int x = 1;
+    #pragma omp single
+    {
+      #pragma omp task shared(x) depend(in: x)
+      if (x != 1)
+	abort ();
+      #pragma omp task shared(x) depend(out: x)
+      x = 2;
+    }
+  }
+}
+
+
+void
+outdep (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 0;
+    #pragma omp task shared(x) depend(out: x)
+    x = 1;
+    #pragma omp task shared(x) depend(out: x)
+    x = 2;
+    #pragma omp taskwait
+    if (x != 2)
+      abort ();
+  }
+}
+
+void
+concurrent (void)
+{
+  int x = 1;
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+  }
+}
+
+void
+concurrent2 (void)
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1;
+    #pragma omp task shared (x) depend(out: x)
+    x = 2;
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp task shared (x) depend(in: x)
+    if (x != 2)
+      abort ();
+    #pragma omp taskwait
+  }
+}
+
+void
+concurrent3 (void)
+{
+  #pragma omp parallel
+  {
+    int x = 1;
+    #pragma omp single
+    {
+      #pragma omp task shared (x) depend(out: x)
+      x = 2;
+      #pragma omp task shared (x) depend(in: x)
+      if (x != 2)
+	abort ();
+      #pragma omp task shared (x) depend(in: x)
+      if (x != 2)
+	abort ();
+      #pragma omp task shared (x) depend(in: x)
+      if (x != 2)
+	abort ();
+    }
+  }
+}
+
+int
+main ()
+{
+  dep ();
+  dep2 ();
+  dep3 ();
+  firstpriv ();
+  antidep ();
+  antidep2 ();
+  antidep3 ();
+  outdep ();
+  concurrent ();
+  concurrent2 ();
+  concurrent3 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-2.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/depend-2.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,71 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+void
+foo (int do_sleep)
+{
+  int a[64], i, *p = a + 4, x = 0;
+  asm volatile ("" : "+r" (p));
+  for (i = 0; i < 64; i++)
+    a[i] = i + 8;
+  #pragma omp parallel private (i)
+  {
+    #pragma omp single nowait
+    {
+      for (i = 0; i < 8; i++)
+	{
+	  #pragma omp task depend(out: a[i * 8 : 4])
+	    a[i * 8] += (i + 2) * 9;
+	  #pragma omp task depend(out: p[i * 8 : 2])
+	    p[i * 8] += (i + 3) * 10;
+	  #pragma omp task depend(out: x)
+	    x = 1;
+	}
+      for (i = 0; i < 8; i++)
+	#pragma omp task depend(in: a[i * 8 : 4]) \
+			 depend(inout: a[i * 8 + 4 : 2]) \
+			 depend(in: a[0 : 4]) depend(in: x)
+	{
+	  if (a[0] != 8 + 2 * 9 || x != 1)
+	    abort ();
+	  if (a[i * 8] != i * 8 + 8 + (i + 2) * 9)
+	    abort ();
+	  if (a[4 + i * 8] != 4 + i * 8 + 8 + (i + 3) * 10)
+	    abort ();
+	  p[i * 8] += a[i * 8];
+	}
+      for (i = 0; i < 8; i++)
+	#pragma omp task depend(inout: a[i * 8 : 4]) \
+			 depend(in: p[i * 8 : 2]) \
+			 depend(in: p[0 : 2], x)
+	{
+	  if (p[0] != 4 + 8 + 3 * 10 + 0 + 8 + 2 * 9 || x != 1)
+	    abort ();
+	  if (a[i * 8] != i * 8 + 8 + (i + 2) * 9)
+	    abort ();
+	  if (a[4 + i * 8] != (4 + i * 8 + 8 + (i + 3) * 10
+			       + i * 8 + 8 + (i + 2) * 9))
+	    abort ();
+	  a[i * 8] += 2;
+	}
+      for (i = 0; i < 4; i++)
+	#pragma omp task depend(in: a[i * 16 : 4], a[i * 16 + 8 : 4], x)
+	{
+	  if (a[i * 16] != i * 16 + 8 + (2 * i + 2) * 9 + 2 || x != 1)
+	    abort ();
+	  if (p[i * 16 + 4] != i * 16 + 8 + 8 + (2 * i + 1 + 2) * 9 + 2)
+	    abort ();
+	}
+    }
+    if (do_sleep)
+      sleep (1);
+  }
+}
+
+int
+main ()
+{
+  foo (1);
+  foo (0);
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-3.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/depend-3.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,51 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1, y = 2;
+    #pragma omp taskgroup
+    {
+      #pragma omp task shared (x) depend(in: x)
+      {
+	usleep (10000);
+	if (x != 1)
+	  abort ();
+      }
+      #pragma omp taskgroup
+      {
+	#pragma omp task shared (x) depend(in: x)
+	{
+	  usleep (15000);
+	  if (x != 1)
+	    abort ();
+	}
+	#pragma omp task shared (y) depend(inout: y)
+	{
+	  if (y != 2)
+	    abort ();
+	  y = 3;
+	}
+	#pragma omp taskgroup
+	{
+	  #pragma omp task shared (x) depend(in: x)
+	  {
+	    usleep (13000);
+	    if (x != 1)
+	      abort ();
+	  }
+	  #pragma omp taskgroup
+	  {
+	    #pragma omp task shared (x) depend(out: x)
+	    x = 2;
+	  }
+	}
+      }
+    }
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/depend-4.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/depend-4.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,56 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+  #pragma omp parallel
+  #pragma omp single
+  {
+    int x = 1, y = 2, z = 3;
+    #pragma omp taskgroup
+    {
+      #pragma omp task shared (x, y, z) depend(inout: x, y) \
+		       depend (in: z) if (x > 10)
+      {
+	if (x != 1 || y != 2 || z != 3)
+	  abort ();
+	x = 4;
+	y = 5;
+      }
+      /* The above task has depend clauses, but no dependencies
+	 on earlier tasks, and is if (0), so must be scheduled
+	 immediately.  */
+      if (x != 4 || y != 5)
+	abort ();
+    }
+    #pragma omp taskgroup
+    {
+      #pragma omp task shared (x, y) depend(in: x, y)
+      {
+	usleep (10000);
+	if (x != 4 || y != 5 || z != 3)
+	  abort ();
+      }
+      #pragma omp task shared (x, y) depend(in: x, y)
+      {
+	usleep (10000);
+	if (x != 4 || y != 5 || z != 3)
+	  abort ();
+      }
+      #pragma omp task shared (x, y, z) depend(inout: x, y) \
+		       depend (in: z) if (x > 10)
+      {
+	if (x != 4 || y != 5 || z != 3)
+	  abort ();
+	x = 6;
+	y = 7;
+      }
+      /* The above task has depend clauses, and may have dependencies
+	 on earlier tasks, while it is if (0), it can be deferred.  */
+    }
+    if (x != 6 || y != 7)
+      abort ();
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/for-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/for-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,35 @@
+/* { dg-options "-std=gnu99 -fopenmp" } */
+
+extern void abort (void);
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#define F parallel for
+#define G pf
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F for
+#define G f
+#include "for-1.h"
+#undef F
+#undef G
+
+int
+main ()
+{
+  if (test_pf_static ()
+      || test_pf_static32 ()
+      || test_pf_auto ()
+      || test_pf_guided32 ()
+      || test_pf_runtime ()
+      || test_f_static ()
+      || test_f_static32 ()
+      || test_f_auto ()
+      || test_f_guided32 ()
+      || test_f_runtime ())
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/for-1.h	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/for-1.h	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,25 @@
+#define S
+#define N(x) M(x, G, static)
+#include "for-2.h"
+#undef S
+#undef N
+#define S schedule(static, 32)
+#define N(x) M(x, G, static32)
+#include "for-2.h"
+#undef S
+#undef N
+#define S schedule(auto)
+#define N(x) M(x, G, auto)
+#include "for-2.h"
+#undef S
+#undef N
+#define S schedule(guided, 32)
+#define N(x) M(x, G, guided32)
+#include "for-2.h"
+#undef S
+#undef N
+#define S schedule(runtime)
+#define N(x) M(x, G, runtime)
+#include "for-2.h"
+#undef S
+#undef N
--- libgomp/testsuite/libgomp.c/for-2.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/for-2.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,46 @@
+/* { dg-options "-std=gnu99 -fopenmp" } */
+
+extern void abort (void);
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#define F simd
+#define G simd
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F parallel for simd
+#define G pf_simd
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F for simd
+#define G f_simd
+#include "for-1.h"
+#undef F
+#undef G
+
+int
+main ()
+{
+  if (test_simd_normal ()
+      || test_pf_simd_static ()
+      || test_pf_simd_static32 ()
+      || test_pf_simd_auto ()
+      || test_pf_simd_guided32 ()
+      || test_pf_simd_runtime ()
+      || test_f_simd_static ()
+      || test_f_simd_static32 ()
+      || test_f_simd_auto ()
+      || test_f_simd_guided32 ()
+      || test_f_simd_runtime ())
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/for-2.h	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/for-2.h	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,269 @@
+#ifndef VARS
+#define VARS
+int a[1500];
+float b[10][15][10];
+__attribute__((noreturn)) void
+noreturn (void)
+{
+  for (;;);
+}
+#endif
+
+__attribute__((noinline, noclone)) void
+N(f0) (void)
+{
+  int i;
+#pragma omp F S
+  for (i = 0; i < 1500; i++)
+    a[i] += 2;
+}
+
+__attribute__((noinline, noclone)) void
+N(f1) (void)
+{
+#pragma omp F S
+  for (unsigned int i = __INT_MAX__; i < 3000U + __INT_MAX__; i += 2)
+    a[(i - __INT_MAX__) >> 1] -= 2;
+}
+
+__attribute__((noinline, noclone)) void
+N(f2) (void)
+{
+  unsigned long long i;
+#pragma omp F S
+  for (i = __LONG_LONG_MAX__ + 4500ULL - 27;
+       i > __LONG_LONG_MAX__ - 27ULL; i -= 3)
+    a[(i + 26LL - __LONG_LONG_MAX__) / 3] -= 4;
+}
+
+__attribute__((noinline, noclone)) void
+N(f3) (long long n1, long long n2, long long s3)
+{
+#pragma omp F S
+  for (long long i = n1 + 23; i > n2 - 25; i -= s3)
+    a[i + 48] += 7;
+}
+
+__attribute__((noinline, noclone)) void
+N(f4) (void)
+{
+  unsigned int i;
+#pragma omp F S
+  for (i = 30; i < 20; i += 2)
+    a[i] += 10;
+}
+
+__attribute__((noinline, noclone)) void
+N(f5) (int n11, int n12, int n21, int n22, int n31, int n32,
+       int s1, int s2, int s3)
+{
+  int v1, v2, v3;
+#pragma omp F S collapse(3)
+  for (v1 = n11; v1 < n12; v1 += s1)
+    for (v2 = n21; v2 < n22; v2 += s2)
+      for (v3 = n31; v3 < n32; v3 += s3)
+	b[v1][v2][v3] += 2.5;
+}
+
+__attribute__((noinline, noclone)) void
+N(f6) (int n11, int n12, int n21, int n22, long long n31, long long n32,
+       int s1, int s2, long long int s3)
+{
+  int v1, v2;
+  long long v3;
+#pragma omp F S collapse(3)
+  for (v1 = n11; v1 > n12; v1 += s1)
+    for (v2 = n21; v2 > n22; v2 += s2)
+      for (v3 = n31; v3 > n32; v3 += s3)
+	b[v1][v2 / 2][v3] -= 4.5;
+}
+
+__attribute__((noinline, noclone)) void
+N(f7) (void)
+{
+  unsigned int v1, v3;
+  unsigned long long v2;
+#pragma omp F S collapse(3)
+  for (v1 = 0; v1 < 20; v1 += 2)
+    for (v2 = __LONG_LONG_MAX__ + 16ULL;
+	 v2 > __LONG_LONG_MAX__ - 29ULL; v2 -= 3)
+      for (v3 = 10; v3 > 0; v3--)
+	b[v1 >> 1][(v2 - __LONG_LONG_MAX__ + 64) / 3 - 12][v3 - 1] += 5.5;
+}
+
+__attribute__((noinline, noclone)) void
+N(f8) (void)
+{
+  long long v1, v2, v3;
+#pragma omp F S collapse(3)
+  for (v1 = 0; v1 < 20; v1 += 2)
+    for (v2 = 30; v2 < 20; v2++)
+      for (v3 = 10; v3 < 0; v3--)
+	b[v1][v2][v3] += 5.5;
+}
+
+__attribute__((noinline, noclone)) void
+N(f9) (void)
+{
+  int i;
+#pragma omp F S
+  for (i = 20; i < 10; i++)
+    {
+      a[i] += 2;
+      noreturn ();
+      a[i] -= 4;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+N(f10) (void)
+{
+  int i;
+#pragma omp F S collapse(3)
+  for (i = 0; i < 10; i++)
+    for (int j = 10; j < 8; j++)
+      for (long k = -10; k < 10; k++)
+	{
+	  b[i][j][k] += 4;
+	  noreturn ();
+	  b[i][j][k] -= 8;
+	}
+}
+
+__attribute__((noinline, noclone)) void
+N(f11) (int n)
+{
+  int i;
+#pragma omp F S
+  for (i = 20; i < n; i++)
+    {
+      a[i] += 8;
+      noreturn ();
+      a[i] -= 16;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+N(f12) (int n)
+{
+  int i;
+#pragma omp F S collapse(3)
+  for (i = 0; i < 10; i++)
+    for (int j = n; j < 8; j++)
+      for (long k = -10; k < 10; k++)
+	{
+	  b[i][j][k] += 16;
+	  noreturn ();
+	  b[i][j][k] -= 32;
+	}
+}
+
+__attribute__((noinline, noclone)) void
+N(f13) (void)
+{
+  int *i;
+#pragma omp F S
+  for (i = a; i < &a[1500]; i++)
+    i[0] += 2;
+}
+
+__attribute__((noinline, noclone)) void
+N(f14) (void)
+{
+  float *i;
+#pragma omp F S collapse(3)
+  for (i = &b[0][0][0]; i < &b[0][0][10]; i++)
+    for (float *j = &b[0][15][0]; j > &b[0][0][0]; j -= 10)
+      for (float *k = &b[0][0][10]; k > &b[0][0][0]; --k)
+	b[i - &b[0][0][0]][(j - &b[0][0][0]) / 10 - 1][(k - &b[0][0][0]) - 1]
+	  -= 3.5;
+}
+
+__attribute__((noinline, noclone)) int
+N(test) (void)
+{
+  int i, j, k;
+  for (i = 0; i < 1500; i++)
+    a[i] = i - 25;
+  N(f0) ();
+  for (i = 0; i < 1500; i++)
+    if (a[i] != i - 23)
+      return 1;
+  N(f1) ();
+  for (i = 0; i < 1500; i++)
+    if (a[i] != i - 25)
+      return 1;
+  N(f2) ();
+  for (i = 0; i < 1500; i++)
+    if (a[i] != i - 29)
+      return 1;
+  N(f3) (1500LL - 1 - 23 - 48, -1LL + 25 - 48, 1LL);
+  for (i = 0; i < 1500; i++)
+    if (a[i] != i - 22)
+      return 1;
+  N(f3) (1500LL - 1 - 23 - 48, 1500LL - 1, 7LL);
+  for (i = 0; i < 1500; i++)
+    if (a[i] != i - 22)
+      return 1;
+  N(f4) ();
+  for (i = 0; i < 1500; i++)
+    if (a[i] != i - 22)
+      return 1;
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 15; j++)
+      for (k = 0; k < 10; k++)
+	b[i][j][k] = i - 2.5 + 1.5 * j - 1.5 * k;
+  N(f5) (0, 10, 0, 15, 0, 10, 1, 1, 1);
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 15; j++)
+      for (k = 0; k < 10; k++)
+	if (b[i][j][k] != i + 1.5 * j - 1.5 * k)
+	  return 1;
+  N(f5) (0, 10, 30, 15, 0, 10, 4, 5, 6);
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 15; j++)
+      for (k = 0; k < 10; k++)
+	if (b[i][j][k] != i + 1.5 * j - 1.5 * k)
+	  return 1;
+  N(f6) (9, -1, 29, 0, 9, -1, -1, -2, -1);
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 15; j++)
+      for (k = 0; k < 10; k++)
+	if (b[i][j][k] != i - 4.5 + 1.5 * j - 1.5 * k)
+	  return 1;
+  N(f7) ();
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 15; j++)
+      for (k = 0; k < 10; k++)
+	if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k)
+	  return 1;
+  N(f8) ();	  
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 15; j++)
+      for (k = 0; k < 10; k++)
+	if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k)
+	  return 1;
+  N(f9) ();
+  N(f10) ();
+  N(f11) (10);
+  N(f12) (12);
+  for (i = 0; i < 1500; i++)
+    if (a[i] != i - 22)
+      return 1;
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 15; j++)
+      for (k = 0; k < 10; k++)
+	if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k)
+	  return 1;
+  N(f13) ();
+  N(f14) ();
+  for (i = 0; i < 1500; i++)
+    if (a[i] != i - 20)
+      return 1;
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 15; j++)
+      for (k = 0; k < 10; k++)
+	if (b[i][j][k] != i - 2.5 + 1.5 * j - 1.5 * k)
+	  return 1;
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/for-3.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/for-3.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,110 @@
+/* { dg-options "-std=gnu99 -fopenmp" } */
+
+extern void abort ();
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#pragma omp declare target
+
+#define F distribute
+#define G d
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F distribute
+#define G d_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F distribute simd
+#define G ds
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F distribute simd
+#define G ds_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F distribute parallel for
+#define G dpf
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F distribute parallel for dist_schedule(static, 128)
+#define G dpf_ds128
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F distribute parallel for simd
+#define G dpfs
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F distribute parallel for simd dist_schedule(static, 128)
+#define G dpfs_ds128
+#include "for-1.h"
+#undef F
+#undef G
+
+#pragma omp end declare target
+
+int
+main ()
+{
+  int err = 0;
+  #pragma omp target teams reduction(|:err)
+    {
+      err |= test_d_normal ();
+      err |= test_d_ds128_normal ();
+      err |= test_ds_normal ();
+      err |= test_ds_ds128_normal ();
+      err |= test_dpf_static ();
+      err |= test_dpf_static32 ();
+      err |= test_dpf_auto ();
+      err |= test_dpf_guided32 ();
+      err |= test_dpf_runtime ();
+      err |= test_dpf_ds128_static ();
+      err |= test_dpf_ds128_static32 ();
+      err |= test_dpf_ds128_auto ();
+      err |= test_dpf_ds128_guided32 ();
+      err |= test_dpf_ds128_runtime ();
+      err |= test_dpfs_static ();
+      err |= test_dpfs_static32 ();
+      err |= test_dpfs_auto ();
+      err |= test_dpfs_guided32 ();
+      err |= test_dpfs_runtime ();
+      err |= test_dpfs_ds128_static ();
+      err |= test_dpfs_ds128_static32 ();
+      err |= test_dpfs_ds128_auto ();
+      err |= test_dpfs_ds128_guided32 ();
+      err |= test_dpfs_ds128_runtime ();
+    }
+  if (err)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/pr58392.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/pr58392.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,58 @@
+/* PR tree-optimization/58392 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort (void);
+int d[32 * 32];
+
+__attribute__((noinline, noclone)) int
+foo (int a, int b)
+{
+  int j, c = 0;
+  #pragma omp parallel for reduction(+: c)
+    for (j = 0; j < a; j += 32)
+      {
+	int l;
+	#pragma omp simd reduction(+: c)
+	  for (l = 0; l < b; ++l)
+	    c += d[j + l];
+      }
+  return c;
+}
+
+__attribute__((noinline, noclone)) int
+bar (int a)
+{
+  int j, c = 0;
+  #pragma omp parallel for simd reduction(+: c)
+    for (j = 0; j < a; ++j)
+      c += d[j];
+  return c;
+}
+
+__attribute__((noinline)) static int
+baz (int a)
+{
+  int j, c = 0;
+  #pragma omp simd reduction(+: c)
+    for (j = 0; j < a; ++j)
+      c += d[j];
+  return c;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 32 * 32; i++)
+    d[i] = (i & 31);
+  if (foo (32 * 32, 32) != (31 * 32 / 2) * 32)
+    abort ();
+  if (bar (32 * 32) != (31 * 32 / 2) * 32)
+    abort ();
+  if (baz (32 * 32) != (31 * 32 / 2) * 32)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/simd-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/simd-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+int b[1024] __attribute__((aligned (32))) = { 1 };
+int k, m;
+struct U { int u; };
+struct V { int v; };
+
+__attribute__((noinline, noclone)) int
+foo (int *p)
+{
+  int i, s = 0;
+  struct U u;
+  struct V v;
+  #pragma omp simd aligned(a, p : 32) linear(k: m + 1) \
+		   reduction(+:s) lastprivate(u, v)
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] *= p[i];
+      u.u = p[i] + k;
+      k += m + 1;
+      v.v = p[i] + k;
+      s += p[i] + k;
+    }
+  if (u.u != 36 + 4 + 3 * 1023 || v.v != 36 + 4 + 3 * 1024)
+    abort ();
+  return s;
+}
+
+int
+main ()
+{
+#if __SIZEOF_INT__ >= 4
+  int i;
+  k = 4;
+  m = 2;
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] = i - 512;
+      b[i] = (i - 51) % 39;
+    }
+  int s = foo (b);
+  for (i = 0; i < 1024; i++)
+    {
+      if (b[i] != (i - 51) % 39
+	  || a[i] != (i - 512) * b[i])
+	abort ();
+    }
+  if (k != 4 + 3 * 1024 || s != 1596127)
+    abort ();
+#endif
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/simd-2.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/simd-2.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort ();
+__UINTPTR_TYPE__ arr[1027];
+
+__attribute__((noinline, noclone)) void
+foo ()
+{
+  int i, v;
+  #pragma omp simd private (v) safelen(16)
+  for (i = 0; i < 1027; i++)
+    arr[i] = (__UINTPTR_TYPE__) &v;
+}
+
+int
+main ()
+{
+  int i, j, cnt = 0;
+  __UINTPTR_TYPE__ arr2[16];
+  foo ();
+  for (i = 0; i < 1027; i++)
+    {
+      for (j = 0; j < cnt; j++)
+	if (arr[i] == arr2[j])
+	  break;
+      if (j != cnt)
+	continue;
+      if (cnt == 16)
+	abort ();
+      arr2[cnt++] = arr[i];
+    }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/simd-3.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/simd-3.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,131 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+int b[1024] __attribute__((aligned (32))) = { 1 };
+unsigned char c[1024] __attribute__((aligned (32))) = { 1 };
+int k, m;
+__UINTPTR_TYPE__ u, u2, u3;
+
+__attribute__((noinline, noclone)) int
+foo (int *p)
+{
+  int i, s = 0, s2 = 0, t, t2;
+  #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \
+		   lastprivate (t2)
+  for (i = 0; i < 512; i++)
+    {
+      a[i] *= p[i];
+      t2 = k + p[i];
+      k += m + 1;
+      s += p[i] + k;
+      c[i]++;
+    }
+  #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \
+		   lastprivate (t, u, u2, u3)
+  for (i = 512; i < 1024; i++)
+    {
+      a[i] *= p[i];
+      k += m + 1;
+      t = k + p[i];
+      u = (__UINTPTR_TYPE__) &k;
+      u2 = (__UINTPTR_TYPE__) &s2;
+      u3 = (__UINTPTR_TYPE__) &t;
+      s2 += t;
+      c[i]++;
+    }
+  return s + s2 + t + t2;
+}
+
+__attribute__((noinline, noclone)) long int
+bar (int *p, long int n, long int o)
+{
+  long int i, s = 0, s2 = 0, t, t2;
+  #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \
+		   lastprivate (t2)
+  for (i = 0; i < n; i++)
+    {
+      a[i] *= p[i];
+      t2 = k + p[i];
+      k += m + 1;
+      s += p[i] + k;
+      c[i]++;
+    }
+  #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \
+		   lastprivate (t, u, u2, u3)
+  for (i = n; i < o; i++)
+    {
+      a[i] *= p[i];
+      k += m + 1;
+      t = k + p[i];
+      u = (__UINTPTR_TYPE__) &k;
+      u2 = (__UINTPTR_TYPE__) &s2;
+      u3 = (__UINTPTR_TYPE__) &t;
+      s2 += t;
+      c[i]++;
+    }
+  return s + s2 + t + t2;
+}
+
+int
+main ()
+{
+#if __SIZEOF_INT__ >= 4
+  int i;
+  k = 4;
+  m = 2;
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] = i - 512;
+      b[i] = (i - 51) % 39;
+      c[i] = (unsigned char) i;
+    }
+  int s = foo (b);
+  for (i = 0; i < 1024; i++)
+    {
+      if (b[i] != (i - 51) % 39
+	  || a[i] != (i - 512) * b[i]
+	  || c[i] != (unsigned char) (i + 1))
+	abort ();
+      a[i] = i - 512;
+    }
+  if (k != 4 + 3 * 1024
+      || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023]))
+    abort ();
+  k = 4;
+  s = bar (b, 512, 1024);
+  for (i = 0; i < 1024; i++)
+    {
+      if (b[i] != (i - 51) % 39
+	  || a[i] != (i - 512) * b[i]
+	  || c[i] != (unsigned char) (i + 2))
+	abort ();
+      a[i] = i - 512;
+    }
+  if (k != 4 + 3 * 1024
+      || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023]))
+    abort ();
+  k = 4;
+  s = bar (b, 511, 1021);
+  for (i = 0; i < 1021; i++)
+    {
+      if (b[i] != (i - 51) % 39
+	  || a[i] != (i - 512) * b[i]
+	  || c[i] != (unsigned char) (i + 3))
+	abort ();
+      a[i] = i - 512;
+    }
+  for (i = 1021; i < 1024; i++)
+    if (b[i] != (i - 51) % 39
+	|| a[i] != i - 512
+	|| c[i] != (unsigned char) (i + 2))
+      abort ();
+  if (k != 4 + 3 * 1021
+      || s != 1586803 + (4 + 3 * 510 + b[510]) + (4 + 3 * 1021 + b[1020]))
+    abort ();
+#endif
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/simd-4.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/simd-4.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+struct S { int s; };
+#pragma omp declare reduction (+:struct S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:struct S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:int:omp_out += omp_in)
+
+__attribute__((noinline, noclone)) int
+foo (void)
+{
+  int i, u = 0;
+  struct S s, t;
+  s.s = 0; t.s = 0;
+  #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u)
+  for (i = 0; i < 1024; i++)
+    {
+      int x = a[i];
+      s.s += x;
+      t.s += x;
+      u += x;
+    }
+  if (t.s != s.s || u != s.s)
+    abort ();
+  return s.s;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (i & 31) + (i / 128);
+  int s = foo ();
+  if (s != 19456)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/simd-5.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/simd-5.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+struct S { int s; };
+#pragma omp declare reduction (+:struct S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:struct S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:int:omp_out += omp_in)
+
+__attribute__((noinline, noclone)) int
+foo (void)
+{
+  int i, u = 0, q = 0;
+  struct S s, t;
+  s.s = 0; t.s = 0;
+  #pragma omp simd aligned(a : 32) reduction(+:s, q) reduction(foo:t, u) \
+	      safelen(1)
+  for (i = 0; i < 1024; i++)
+    {
+      int x = a[i];
+      s.s += x;
+      t.s += x;
+      u += x;
+      q++;
+    }
+  if (t.s != s.s || u != s.s || q != 1024)
+    abort ();
+  return s.s;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (i & 31) + (i / 128);
+  int s = foo ();
+  if (s != 19456)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/simd-6.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/simd-6.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,44 @@
+/* PR libgomp/58482 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort ();
+int a[1024] __attribute__((aligned (32))) = { 1 };
+struct S { int s; };
+#pragma omp declare reduction (+:struct S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:struct S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:int:omp_out += omp_in)
+
+__attribute__((noinline, noclone)) int
+foo (void)
+{
+  int i, u = 0;
+  struct S s, t;
+  s.s = 0; t.s = 0;
+  #pragma omp parallel for simd aligned(a : 32) reduction(+:s) \
+				reduction(foo:t, u)
+  for (i = 0; i < 1024; i++)
+    {
+      int x = a[i];
+      s.s += x;
+      t.s += x;
+      u += x;
+    }
+  if (t.s != s.s || u != s.s)
+    abort ();
+  return s.s;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (i & 31) + (i / 128);
+  int s = foo ();
+  if (s != 19456)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/target-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/target-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,90 @@
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+
+void
+fn1 (double *x, double *y, int z)
+{
+  int i;
+  for (i = 0; i < z; i++)
+    {
+      x[i] = i & 31;
+      y[i] = (i & 63) - 30;
+    }
+}
+
+#pragma omp declare target
+int tgtv = 6;
+int
+tgt (void)
+{
+  #pragma omp atomic update
+    tgtv++;
+  return 0;
+}
+#pragma omp end declare target
+
+double
+fn2 (int x, int y, int z)
+{
+  double b[1024], c[1024], s = 0;
+  int i, j;
+  fn1 (b, c, x);
+  #pragma omp target data map(to: b)
+  {
+    #pragma omp target map(tofrom: c)
+      #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x)
+	#pragma omp distribute dist_schedule(static, 4) collapse(1)
+	  for (j=0; j < x; j += y)
+	    #pragma omp parallel for reduction(+:s)
+	      for (i = j; i < j + y; i++)
+		tgt (), s += b[i] * c[i];
+    #pragma omp target update from(b, tgtv)
+  }
+  return s;
+}
+
+double
+fn3 (int x)
+{
+  double b[1024], c[1024], s = 0;
+  int i;
+  fn1 (b, c, x);
+  #pragma omp target map(to: b, c)
+    #pragma omp parallel for reduction(+:s)
+      for (i = 0; i < x; i++)
+	tgt (), s += b[i] * c[i];
+  return s;
+}
+
+double
+fn4 (int x, double *p)
+{
+  double b[1024], c[1024], d[1024], s = 0;
+  int i;
+  fn1 (b, c, x);
+  fn1 (d + x, p + x, x);
+  #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)])
+    #pragma omp parallel for reduction(+:s)
+      for (i = 0; i < x; i++)
+	s += b[i] * c[i] + d[x + i] + p[x + i];
+  return s;
+}
+
+int
+main ()
+{
+  double a = fn2 (128, 4, 6);
+  int b = tgtv;
+  double c = fn3 (61);
+  #pragma omp target update from(tgtv)
+  int d = tgtv;
+  double e[1024];
+  double f = fn4 (64, e);
+  if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
+      || f != 8032.0)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/target-2.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/target-2.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,88 @@
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+
+void
+fn1 (double *x, double *y, int z)
+{
+  int i;
+  for (i = 0; i < z; i++)
+    {
+      x[i] = i & 31;
+      y[i] = (i & 63) - 30;
+    }
+}
+
+double
+fn2 (int x)
+{
+  double s = 0;
+  double b[3 * x], c[3 * x], d[3 * x], e[3 * x];
+  int i;
+  fn1 (b, c, x);
+  fn1 (e, d + x, x);
+  #pragma omp target map(to: b, c[:x], d[x:x], e)
+    #pragma omp parallel for reduction(+:s)
+      for (i = 0; i < x; i++)
+	s += b[i] * c[i] + d[x + i] + sizeof (b) - sizeof (c);
+  return s;
+}
+
+double
+fn3 (int x)
+{
+  double s = 0;
+  double b[3 * x], c[3 * x], d[3 * x], e[3 * x];
+  int i;
+  fn1 (b, c, x);
+  fn1 (e, d, x);
+  #pragma omp target
+    #pragma omp parallel for reduction(+:s)
+      for (i = 0; i < x; i++)
+	s += b[i] * c[i] + d[i];
+  return s;
+}
+
+double
+fn4 (int x)
+{
+  double s = 0;
+  double b[3 * x], c[3 * x], d[3 * x], e[3 * x];
+  int i;
+  fn1 (b, c, x);
+  fn1 (e, d + x, x);
+  #pragma omp target data map(from: b, c[:x], d[x:x], e)
+    {
+      #pragma omp target update to(b, c[:x], d[x:x], e)
+      #pragma omp target map(c[:x], d[x:x])
+	#pragma omp parallel for reduction(+:s)
+	  for (i = 0; i < x; i++)
+	    {
+	      s += b[i] * c[i] + d[x + i] + sizeof (b) - sizeof (c);
+	      b[i] = i + 0.5;
+	      c[i] = 0.5 - i;
+	      d[x + i] = 0.5 * i;
+	    }
+    }
+  for (i = 0; i < x; i++)
+    if (b[i] != i + 0.5 || c[i] != 0.5 - i || d[x + i] != 0.5 * i)
+      abort ();
+  return s;
+}
+
+int
+main ()
+{
+  double a = fn2 (128);
+  if (a != 14080.0)
+    abort ();
+  double b = fn3 (128);
+  if (a != b)
+    abort ();
+  double c = fn4 (256);
+  if (c != 28160.0)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/target-3.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/target-3.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,17 @@
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  if (omp_get_level ())
+    abort ();
+  #pragma omp target if (0)
+  if (omp_get_level ())
+    abort ();
+  #pragma omp target if (0)
+  #pragma omp teams
+  if (omp_get_level ())
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/target-4.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/target-4.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,14 @@
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  omp_set_dynamic (0);
+  #pragma omp parallel num_threads (4)
+  #pragma omp target if (0)
+  #pragma omp single
+  if (omp_get_num_threads () != 1)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/target-5.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/target-5.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,83 @@
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int d_o = omp_get_dynamic ();
+  int n_o = omp_get_nested ();
+  omp_sched_t s_o;
+  int c_o;
+  omp_get_schedule (&s_o, &c_o);
+  int m_o = omp_get_max_threads ();
+  omp_set_dynamic (1);
+  omp_set_nested (1);
+  omp_set_schedule (omp_sched_static, 2);
+  omp_set_num_threads (4);
+  int d = omp_get_dynamic ();
+  int n = omp_get_nested ();
+  omp_sched_t s;
+  int c;
+  omp_get_schedule (&s, &c);
+  int m = omp_get_max_threads ();
+  if (!omp_is_initial_device ())
+    abort ();
+  #pragma omp target if (0)
+  {
+    omp_sched_t s_c;
+    int c_c;
+    omp_get_schedule (&s_c, &c_c);
+    if (d_o != omp_get_dynamic ()
+	|| n_o != omp_get_nested ()
+	|| s_o != s_c
+	|| c_o != c_c
+	|| m_o != omp_get_max_threads ())
+      abort ();
+    omp_set_dynamic (0);
+    omp_set_nested (0);
+    omp_set_schedule (omp_sched_dynamic, 4);
+    omp_set_num_threads (2);
+    if (!omp_is_initial_device ())
+      abort ();
+  }
+  if (!omp_is_initial_device ())
+    abort ();
+  omp_sched_t s_c;
+  int c_c;
+  omp_get_schedule (&s_c, &c_c);
+  if (d != omp_get_dynamic ()
+      || n != omp_get_nested ()
+      || s != s_c
+      || c != c_c
+      || m != omp_get_max_threads ())
+    abort ();
+  #pragma omp target if (0)
+  #pragma omp teams
+  {
+    omp_sched_t s_c;
+    int c_c;
+    omp_get_schedule (&s_c, &c_c);
+    if (d_o != omp_get_dynamic ()
+	|| n_o != omp_get_nested ()
+	|| s_o != s_c
+	|| c_o != c_c
+	|| m_o != omp_get_max_threads ())
+      abort ();
+    omp_set_dynamic (0);
+    omp_set_nested (0);
+    omp_set_schedule (omp_sched_dynamic, 4);
+    omp_set_num_threads (2);
+    if (!omp_is_initial_device ())
+      abort ();
+  }
+  if (!omp_is_initial_device ())
+    abort ();
+  omp_get_schedule (&s_c, &c_c);
+  if (d != omp_get_dynamic ()
+      || n != omp_get_nested ()
+      || s != s_c
+      || c != c_c
+      || m != omp_get_max_threads ())
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/target-6.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/target-6.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,68 @@
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  omp_set_dynamic (0);
+  omp_set_nested (1);
+  if (omp_in_parallel ())
+    abort ();
+  #pragma omp parallel num_threads (3)
+    if (omp_get_thread_num () == 2)
+      {
+	if (!omp_in_parallel ())
+	  abort ();
+	#pragma omp parallel num_threads (3)
+	  if (omp_get_thread_num () == 1)
+	    {
+	      if (!omp_in_parallel ()
+		  || omp_get_level () != 2
+		  || omp_get_ancestor_thread_num (0) != 0
+		  || omp_get_ancestor_thread_num (1) != 2
+		  || omp_get_ancestor_thread_num (2) != 1
+		  || omp_get_ancestor_thread_num (3) != -1)
+		abort ();
+	      #pragma omp target if (0)
+		{
+		  if (omp_in_parallel ()
+		      || omp_get_level () != 0
+		      || omp_get_ancestor_thread_num (0) != 0
+		      || omp_get_ancestor_thread_num (1) != -1)
+		    abort ();
+		  #pragma omp parallel num_threads (2)
+		  {
+		    if (!omp_in_parallel ()
+			|| omp_get_level () != 1
+			|| omp_get_ancestor_thread_num (0) != 0
+			|| omp_get_ancestor_thread_num (1)
+			   != omp_get_thread_num ()
+			|| omp_get_ancestor_thread_num (2) != -1)
+		      abort ();
+		  }
+		}
+	      #pragma omp target if (0)
+		{
+		  #pragma omp teams thread_limit (2)
+		    {
+		      if (omp_in_parallel ()
+			  || omp_get_level () != 0
+			  || omp_get_ancestor_thread_num (0) != 0
+			  || omp_get_ancestor_thread_num (1) != -1)
+			abort ();
+		      #pragma omp parallel num_threads (2)
+		      {
+			if (!omp_in_parallel ()
+			    || omp_get_level () != 1
+			    || omp_get_ancestor_thread_num (0) != 0
+			    || omp_get_ancestor_thread_num (1)
+			       != omp_get_thread_num ()
+			    || omp_get_ancestor_thread_num (2) != -1)
+			  abort ();
+		      }
+		    }
+		}
+	    }
+      }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/target-7.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/target-7.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,111 @@
+#include <omp.h>
+#include <stdlib.h>
+
+volatile int v;
+
+void
+foo (int f)
+{
+  int d = f ? omp_get_num_devices () : omp_get_default_device ();
+  int h = 5;
+  #pragma omp target device (d)
+  if (omp_get_level () != 0)
+    abort ();
+  #pragma omp target if (v > 1)
+  if (omp_get_level () != 0 || !omp_is_initial_device ())
+    abort ();
+  #pragma omp target device (d) if (v > 1)
+  if (omp_get_level () != 0 || !omp_is_initial_device ())
+    abort ();
+  #pragma omp target if (v <= 1)
+  if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
+    abort ();
+  #pragma omp target device (d) if (v <= 1)
+  if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
+    abort ();
+  #pragma omp target if (0)
+  if (omp_get_level () != 0 || !omp_is_initial_device ())
+    abort ();
+  #pragma omp target device (d) if (0)
+  if (omp_get_level () != 0 || !omp_is_initial_device ())
+    abort ();
+  #pragma omp target if (1)
+  if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
+    abort ();
+  #pragma omp target device (d) if (1)
+  if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
+    abort ();
+  #pragma omp target data device (d) map (to: h)
+  {
+    #pragma omp target device (d)
+    if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 5)
+      abort ();
+    #pragma omp target update device (d) from (h)
+  }
+  #pragma omp target data if (v > 1) map (to: h)
+  {
+    #pragma omp target if (v > 1)
+    if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 6)
+      abort ();
+    #pragma omp target update if (v > 1) from (h)
+  }
+  #pragma omp target data device (d) if (v > 1) map (to: h)
+  {
+    #pragma omp target device (d) if (v > 1)
+    if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 7)
+      abort ();
+    #pragma omp target update device (d) if (v > 1) from (h)
+  }
+  #pragma omp target data if (v <= 1) map (to: h)
+  {
+    #pragma omp target if (v <= 1)
+    if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 8)
+      abort ();
+    #pragma omp target update if (v <= 1) from (h)
+  }
+  #pragma omp target data device (d) if (v <= 1) map (to: h)
+  {
+    #pragma omp target device (d) if (v <= 1)
+    if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 9)
+      abort ();
+    #pragma omp target update device (d) if (v <= 1) from (h)
+  }
+  #pragma omp target data if (0) map (to: h)
+  {
+    #pragma omp target if (0)
+    if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 10)
+      abort ();
+    #pragma omp target update if (0) from (h)
+  }
+  #pragma omp target data device (d) if (0) map (to: h)
+  {
+    #pragma omp target device (d) if (0)
+    if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 11)
+      abort ();
+    #pragma omp target update device (d) if (0) from (h)
+  }
+  #pragma omp target data if (1) map (to: h)
+  {
+    #pragma omp target if (1)
+    if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 12)
+      abort ();
+    #pragma omp target update if (1) from (h)
+  }
+  #pragma omp target data device (d) if (1) map (to: h)
+  {
+    #pragma omp target device (d) if (1)
+    if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 13)
+      abort ();
+    #pragma omp target update device (d) if (1) from (h)
+  }
+  if (h != 14)
+    abort ();
+}
+
+int
+main ()
+{
+  foo (0);
+  foo (1);
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/taskgroup-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/taskgroup-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,83 @@
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+int v[16] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+
+int
+main ()
+{
+  #pragma omp parallel num_threads (4)
+  #pragma omp single
+    {
+      int i;
+      #pragma omp taskgroup
+      {
+	for (i = 0; i < 16; i += 2)
+	  #pragma omp task
+	    {
+	      #pragma omp task
+		v[i]++;
+	      #pragma omp task
+		v[i + 1]++;
+	    }
+      }
+      for (i = 0; i < 16; i++)
+	if (v[i] != i + 2)
+	  abort ();
+      #pragma omp taskgroup
+      {
+	for (i = 0; i < 16; i += 2)
+	  #pragma omp task
+	    {
+	      #pragma omp task
+		v[i]++;
+	      #pragma omp task
+		v[i + 1]++;
+	      #pragma omp taskwait
+	    }
+      }
+      for (i = 0; i < 16; i++)
+	if (v[i] != i + 3)
+	  abort ();
+      #pragma omp taskgroup
+      {
+	for (i = 0; i < 16; i += 2)
+	  #pragma omp task
+	    {
+	      #pragma omp task
+		v[i]++;
+	      v[i + 1]++;
+	    }
+	#pragma omp taskwait
+	for (i = 0; i < 16; i += 2)
+	  #pragma omp task
+	    v[i + 1]++;
+      }
+      for (i = 0; i < 16; i++)
+	if (v[i] != i + 4 + (i & 1))
+	  abort ();
+      #pragma omp taskgroup
+      {
+	for (i = 0; i < 16; i += 2)
+	  {
+	    #pragma omp taskgroup
+	      {
+		#pragma omp task
+		  v[i]++;
+		#pragma omp task
+		  v[i + 1]++;
+	      }
+	    if (v[i] != i + 5 || v[i + 1] != i + 7)
+	      abort ();
+	    #pragma omp task
+	    v[i]++;
+	  }
+      }
+      for (i = 0; i < 16; i++)
+	if (v[i] != i + 6)
+	  abort ();
+    }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/thread-limit-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/thread-limit-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_THREAD_LIMIT "6" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+  if (omp_get_thread_limit () != 6)
+    return 0;
+  omp_set_dynamic (0);
+  omp_set_nested (1);
+  #pragma omp parallel num_threads (3)
+  if (omp_get_num_threads () != 3)
+    abort ();
+  #pragma omp parallel num_threads (3)
+  if (omp_get_num_threads () != 3)
+    abort ();
+  #pragma omp parallel num_threads (8)
+  if (omp_get_num_threads () > 6)
+    abort ();
+  #pragma omp parallel num_threads (6)
+  if (omp_get_num_threads () != 6)
+    abort ();
+  int cnt = 0;
+  #pragma omp parallel num_threads (5)
+  #pragma omp parallel num_threads (5)
+  #pragma omp parallel num_threads (2)
+  {
+    int v;
+    #pragma omp atomic capture
+    v = ++cnt;
+    if (v > 6)
+      abort ();
+    usleep (10000);
+    #pragma omp atomic
+    --cnt;
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/thread-limit-2.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/thread-limit-2.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_THREAD_LIMIT "9" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+  if (omp_get_thread_limit () != 9)
+    return 0;
+  omp_set_dynamic (0);
+  #pragma omp parallel num_threads (8)
+  if (omp_get_num_threads () != 8)
+    abort ();
+  #pragma omp parallel num_threads (16)
+  if (omp_get_num_threads () > 9)
+    abort ();
+  #pragma omp target if (0)
+  #pragma omp teams thread_limit (6)
+  {
+    if (omp_get_thread_limit () > 6)
+      abort ();
+    if (omp_get_thread_limit () == 6)
+      {
+	omp_set_dynamic (0);
+	omp_set_nested (1);
+	#pragma omp parallel num_threads (3)
+	if (omp_get_num_threads () != 3)
+	  abort ();
+	#pragma omp parallel num_threads (3)
+	if (omp_get_num_threads () != 3)
+	  abort ();
+	#pragma omp parallel num_threads (8)
+	if (omp_get_num_threads () > 6)
+	  abort ();
+	#pragma omp parallel num_threads (6)
+	if (omp_get_num_threads () != 6)
+	  abort ();
+	int cnt = 0;
+	#pragma omp parallel num_threads (5)
+	#pragma omp parallel num_threads (5)
+	#pragma omp parallel num_threads (2)
+	{
+	  int v;
+	  #pragma omp atomic capture
+	  v = ++cnt;
+	  if (v > 6)
+	    abort ();
+	  usleep (10000);
+	  #pragma omp atomic
+	  --cnt;
+	}
+      }
+  }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/thread-limit-3.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/thread-limit-3.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,12 @@
+#include <stdlib.h>
+#include <omp.h>
+
+int
+main ()
+{
+  #pragma omp target if (0)
+  #pragma omp teams thread_limit (1)
+  if (omp_get_thread_limit () != 1)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/udr-1.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/udr-1.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,81 @@
+/* { dg-do run } */
+
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort ();
+
+struct S { int s; struct S *t; };
+
+void
+foo (struct S *out, struct S *in)
+{
+  out->s += in->s;
+}
+
+void
+bar (struct S *x)
+{
+  if (x->s != 6) abort ();
+  x->s = 15;
+}
+
+void
+baz (struct S *x, struct S *y)
+{
+  x->s = 6;
+  x->t = x;
+  (void) y;
+}
+
+#pragma omp declare reduction (foo: struct S: foo (&omp_out, &omp_in)) \
+	initializer (omp_priv = { 8, &omp_priv })
+#pragma omp declare reduction (foo: char, int, short: omp_out += omp_in - 4) \
+	initializer (omp_priv = 4)
+#pragma omp declare reduction (+: struct S: foo (&omp_out, &omp_in)) \
+	initializer (baz (&omp_priv, &omp_orig))
+
+void
+test (struct S s, struct S t)
+{
+  int q = 0;
+  #pragma omp parallel num_threads (4) reduction (+: s, q) reduction (foo: t)
+  {
+    if (s.s != 6 || s.t != &s || t.s != 8 || t.t != &t)
+      abort ();
+    s.s = 2;
+    t.s = 3;
+    q = 1;
+  }
+  if (s.s != 12 + 2 * q || t.s != 14 + 3 * q)
+    abort ();
+}
+
+int
+main ()
+{
+  struct S s, t;
+  s.s = 9; t.s = 10;
+  int h = 30, v = 2, q = 0;
+  #pragma omp declare reduction (foo: struct S: omp_out.s *= omp_in.s) \
+	initializer (omp_priv = omp_orig)
+  {
+    #pragma omp declare reduction (foo: struct S: omp_out.s += omp_in.s) \
+	initializer (omp_priv = omp_orig)
+    #pragma omp parallel num_threads (4) reduction (+: t, q) \
+	reduction (min: h) reduction (foo: s, v)
+    {
+      if (s.s != 9 || t.s != 6 || v != 4 || h != __INT_MAX__) abort ();
+      asm volatile ("" : "+m" (s.s), "+m" (t.s));
+      asm volatile ("" : "+r" (h), "+r" (v));
+      h = t.s; s.s++; t.s++; v++; q++;
+    }
+  }
+  if (h != 6 || s.s != 9 + q * 10 || t.s != 10 + q * 7 || v != 2 + q)
+    abort ();
+  s.s = 12;
+  t.s = 14;
+  test (s, t);
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/udr-2.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/udr-2.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+
+extern void abort ();
+
+struct S { int s; };
+
+#pragma omp declare reduction (+:struct S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:struct S:omp_out.s += omp_in.s)
+#pragma omp declare reduction (foo:int:omp_out += omp_in)
+
+int
+main ()
+{
+  int u = 0, q = 0;
+  struct S s, t;
+  s.s = 0; t.s = 0;
+  #pragma omp parallel reduction(+:s, q) reduction(foo:t, u)
+  {
+    if (s.s != 0 || t.s != 0 || u != 0 || q != 0) abort ();
+    s.s = 6;
+    t.s = 8;
+    u = 9;
+    q++;
+  }
+  if (s.s != 6 * q || t.s != 8 * q || u != 9 * q) abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/udr-3.c	(.../trunk)	(revision 0)
+++ libgomp/testsuite/libgomp.c/udr-3.c	(.../branches/gomp-4_0-branch)	(revision 203287)
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+
+extern void abort ();
+
+struct S;
+void foo (struct S *, struct S *);
+#pragma omp declare reduction (+:struct S:foo (&omp_out, &omp_in))
+struct S { int s; };
+
+void
+foo (struct S *x, struct S *y)
+{
+  x->s += y->s;
+}
+
+int
+main ()
+{
+  struct S s;
+  int i = 0;
+  s.s = 0;
+  #pragma omp parallel reduction (+:s, i)
+  {
+    if (s.s != 0)
+      abort ();
+    s.s = 2;
+    i = 1;
+  }
+  if (s.s != 2 * i)
+    abort ();
+  return 0;
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]