OpenACC atomic directive

Thomas Schwinge thomas@codesourcery.com
Mon Nov 2 13:10:00 GMT 2015


Hi!

The OpenACC atomic directive matches OpenMP's atomic directive (got that
clarified by the OpenACC committee), so they can share the same
implementation.  OK for trunk?

commit 826c7022d0e2b9e225215b168a95487823dce925
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Mon Nov 2 10:35:44 2015 +0100

    OpenACC atomic directive
    
    YYYY-MM-DD  Thomas Schwinge  <thomas@codesourcery.com>
    	    Chung-Lin Tang  <cltang@codesourcery.com>
    
    	gcc/c-family/
    	* c-pragma.c (oacc_pragmas): Add "atomic".
    	* c-pragma.h (pragma_kind): Add PRAGMA_OACC_ATOMIC.
    	gcc/c/
    	* c-parser.c (c_parser_omp_construct): Handle PRAGMA_OACC_ATOMIC.
    	gcc/cp/
    	* parser.c (cp_parser_omp_construct, cp_parser_pragma): Handle
    	PRAGMA_OACC_ATOMIC.
    	gcc/fortran/
    	* gfortran.h (gfc_statement): Add ST_OACC_ATOMIC,
    	ST_OACC_END_ATOMIC.
    	(gfc_exec_op): Add EXEC_OACC_ATOMIC.
    	* match.h (gfc_match_oacc_atomic): New prototype.
    	* openmp.c (gfc_match_omp_atomic, gfc_match_oacc_atomic): New
    	wrapper functions around...
    	(gfc_match_omp_oacc_atomic): ... this new function.
    	(oacc_code_to_statement, gfc_resolve_oacc_directive): Handle
    	EXEC_OACC_ATOMIC.
    	* parse.c (decode_oacc_directive): Handle "atomic", "end atomic".
    	(case_exec_markers): Add ST_OACC_ATOMIC.
    	(gfc_ascii_statement): Handle ST_OACC_ATOMIC, ST_OACC_END_ATOMIC.
    	(parse_omp_atomic): Rename to...
    	(parse_omp_oacc_atomic): ... this new function.  Add omp_p formal
    	parameter.  Adjust all users.
    	(parse_executable): Handle ST_OACC_ATOMIC.
    	(is_oacc): Handle EXEC_OACC_ATOMIC.
    	* resolve.c (gfc_resolve_blocks, gfc_resolve_code): Handle
    	EXEC_OACC_ATOMIC.
    	* st.c (gfc_free_statement): Handle EXEC_OACC_ATOMIC.
    	* trans-openmp.c (gfc_trans_oacc_directive): Handle
    	EXEC_OACC_ATOMIC.
    	* trans.c (trans_code): Handle EXEC_OACC_ATOMIC.
    	gcc/
    	* omp-low.c (check_omp_nesting_restrictions): Allow
    	GIMPLE_OMP_ATOMIC_LOAD, GIMPLE_OMP_ATOMIC_STORE inside OpenACC
    	contexts.
    
    YYYY-MM-DD  Thomas Schwinge  <thomas@codesourcery.com>
    
    	gcc/testsuite/
    	* c-c++-common/goacc-gomp/nesting-fail-1.c: Move "atomic" tests
    	from here to...
    	* c-c++-common/goacc-gomp/nesting-1.c: ... here, and expect them
    	to succeed.
    
    YYYY-MM-DD  James Norris  <jnorris@codesourcery.com>
    
    	libgomp/
    	* testsuite/libgomp.oacc-c-c++-common/atomic_capture-1.c: New
    	file.
    	* testsuite/libgomp.oacc-c-c++-common/atomic_capture-2.c:
    	Likewise.
    	* testsuite/libgomp.oacc-c-c++-common/atomic_rw-1.c: Likewise.
    	* testsuite/libgomp.oacc-c-c++-common/atomic_update-1.c: Likewise.
    	* testsuite/libgomp.oacc-fortran/atomic_capture-1.f90: Likewise.
    	* testsuite/libgomp.oacc-fortran/atomic_rw-1.f90: New file.
    	* testsuite/libgomp.oacc-fortran/atomic_update-1.f90: Likewise.
    
    YYYY-MM-DD  Julian Brown  <julian@codesourcery.com>
    	    Thomas Schwinge  <thomas@codesourcery.com>
    
    	libgomp/
    	* testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: New file.
    	* testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise.
    	* testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c:
    	Likewise.
    	* testsuite/libgomp.oacc-c-c++-common/worker-single-4.c: Likewise.
    	* testsuite/libgomp.oacc-c-c++-common/worker-single-6.c: Likewise.
---
 gcc/c-family/c-pragma.c                            |    1 +
 gcc/c-family/c-pragma.h                            |    1 +
 gcc/c/c-parser.c                                   |    3 +
 gcc/cp/parser.c                                    |    4 +
 gcc/fortran/gfortran.h                             |    3 +-
 gcc/fortran/match.h                                |    1 +
 gcc/fortran/openmp.c                               |   22 +-
 gcc/fortran/parse.c                                |   40 +-
 gcc/fortran/resolve.c                              |    2 +
 gcc/fortran/st.c                                   |    1 +
 gcc/fortran/trans-openmp.c                         |    2 +
 gcc/fortran/trans.c                                |    1 +
 gcc/omp-low.c                                      |    5 +-
 gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c  |   44 +-
 .../c-c++-common/goacc-gomp/nesting-fail-1.c       |   26 -
 .../libgomp.oacc-c-c++-common/atomic_capture-1.c   |  866 +++++++++++
 .../libgomp.oacc-c-c++-common/atomic_capture-2.c   | 1626 ++++++++++++++++++++
 .../libgomp.oacc-c-c++-common/atomic_rw-1.c        |   34 +
 .../libgomp.oacc-c-c++-common/atomic_update-1.c    |  760 +++++++++
 .../libgomp.oacc-c-c++-common/par-reduction-1.c    |   44 +
 .../libgomp.oacc-c-c++-common/par-reduction-2.c    |   48 +
 .../libgomp.oacc-c-c++-common/worker-single-1a.c   |   28 +
 .../libgomp.oacc-c-c++-common/worker-single-4.c    |   28 +
 .../libgomp.oacc-c-c++-common/worker-single-6.c    |   46 +
 .../libgomp.oacc-fortran/atomic_capture-1.f90      |  784 ++++++++++
 .../testsuite/libgomp.oacc-fortran/atomic_rw-1.f90 |   29 +
 .../libgomp.oacc-fortran/atomic_update-1.f90       |  338 ++++
 27 files changed, 4744 insertions(+), 43 deletions(-)

diff --git gcc/c-family/c-pragma.c gcc/c-family/c-pragma.c
index d99c2af..ac11838 100644
--- gcc/c-family/c-pragma.c
+++ gcc/c-family/c-pragma.c
@@ -1204,6 +1204,7 @@ static vec<pragma_ns_name> registered_pp_pragmas;
 
 struct omp_pragma_def { const char *name; unsigned int id; };
 static const struct omp_pragma_def oacc_pragmas[] = {
+  { "atomic", PRAGMA_OACC_ATOMIC },
   { "cache", PRAGMA_OACC_CACHE },
   { "data", PRAGMA_OACC_DATA },
   { "enter", PRAGMA_OACC_ENTER_DATA },
diff --git gcc/c-family/c-pragma.h gcc/c-family/c-pragma.h
index cec920f..69e7392 100644
--- gcc/c-family/c-pragma.h
+++ gcc/c-family/c-pragma.h
@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.  If not see
 enum pragma_kind {
   PRAGMA_NONE = 0,
 
+  PRAGMA_OACC_ATOMIC,
   PRAGMA_OACC_CACHE,
   PRAGMA_OACC_DATA,
   PRAGMA_OACC_ENTER_DATA,
diff --git gcc/c/c-parser.c gcc/c/c-parser.c
index 90038d5..ec88c65 100644
--- gcc/c/c-parser.c
+++ gcc/c/c-parser.c
@@ -16243,6 +16243,9 @@ c_parser_omp_construct (c_parser *parser)
 
   switch (p_kind)
     {
+    case PRAGMA_OACC_ATOMIC:
+      c_parser_omp_atomic (loc, parser);
+      return;
     case PRAGMA_OACC_CACHE:
       strcpy (p_name, "#pragma acc");
       stmt = c_parser_oacc_cache (loc, parser);
diff --git gcc/cp/parser.c gcc/cp/parser.c
index 24cb47f..a90bf3b 100644
--- gcc/cp/parser.c
+++ gcc/cp/parser.c
@@ -35464,6 +35464,9 @@ cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok)
 
   switch (pragma_tok->pragma_kind)
     {
+    case PRAGMA_OACC_ATOMIC:
+      cp_parser_omp_atomic (parser, pragma_tok);
+      return;
     case PRAGMA_OACC_CACHE:
       stmt = cp_parser_oacc_cache (parser, pragma_tok);
       break;
@@ -36040,6 +36043,7 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context)
       cp_parser_omp_declare (parser, pragma_tok, context);
       return false;
 
+    case PRAGMA_OACC_ATOMIC:
     case PRAGMA_OACC_CACHE:
     case PRAGMA_OACC_DATA:
     case PRAGMA_OACC_ENTER_DATA:
diff --git gcc/fortran/gfortran.h gcc/fortran/gfortran.h
index 13e730f..e13b4d4 100644
--- gcc/fortran/gfortran.h
+++ gcc/fortran/gfortran.h
@@ -209,6 +209,7 @@ enum gfc_statement
   ST_OACC_END_LOOP, ST_OACC_DECLARE, ST_OACC_UPDATE, ST_OACC_WAIT,
   ST_OACC_CACHE, ST_OACC_KERNELS_LOOP, ST_OACC_END_KERNELS_LOOP,
   ST_OACC_ENTER_DATA, ST_OACC_EXIT_DATA, ST_OACC_ROUTINE,
+  ST_OACC_ATOMIC, ST_OACC_END_ATOMIC,
   ST_OMP_ATOMIC, ST_OMP_BARRIER, ST_OMP_CRITICAL, ST_OMP_END_ATOMIC,
   ST_OMP_END_CRITICAL, ST_OMP_END_DO, ST_OMP_END_MASTER, ST_OMP_END_ORDERED,
   ST_OMP_END_PARALLEL, ST_OMP_END_PARALLEL_DO, ST_OMP_END_PARALLEL_SECTIONS,
@@ -2322,7 +2323,7 @@ enum gfc_exec_op
   EXEC_OACC_KERNELS_LOOP, EXEC_OACC_PARALLEL_LOOP,
   EXEC_OACC_PARALLEL, EXEC_OACC_KERNELS, EXEC_OACC_DATA, EXEC_OACC_HOST_DATA,
   EXEC_OACC_LOOP, EXEC_OACC_UPDATE, EXEC_OACC_WAIT, EXEC_OACC_CACHE,
-  EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA,
+  EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA, EXEC_OACC_ATOMIC,
   EXEC_OMP_CRITICAL, EXEC_OMP_DO, EXEC_OMP_FLUSH, EXEC_OMP_MASTER,
   EXEC_OMP_ORDERED, EXEC_OMP_PARALLEL, EXEC_OMP_PARALLEL_DO,
   EXEC_OMP_PARALLEL_SECTIONS, EXEC_OMP_PARALLEL_WORKSHARE,
diff --git gcc/fortran/match.h gcc/fortran/match.h
index 1b51a88..a52c189 100644
--- gcc/fortran/match.h
+++ gcc/fortran/match.h
@@ -124,6 +124,7 @@ gfc_common_head *gfc_get_common (const char *, int);
 /* openmp.c.  */
 
 /* OpenACC directive matchers.  */
+match gfc_match_oacc_atomic (void);
 match gfc_match_oacc_cache (void);
 match gfc_match_oacc_wait (void);
 match gfc_match_oacc_update (void);
diff --git gcc/fortran/openmp.c gcc/fortran/openmp.c
index e59139c..929a739 100644
--- gcc/fortran/openmp.c
+++ gcc/fortran/openmp.c
@@ -2452,8 +2452,8 @@ gfc_match_omp_ordered (void)
 }
 
 
-match
-gfc_match_omp_atomic (void)
+static match
+gfc_match_omp_oacc_atomic (bool omp_p)
 {
   gfc_omp_atomic_op op = GFC_OMP_ATOMIC_UPDATE;
   int seq_cst = 0;
@@ -2491,13 +2491,24 @@ gfc_match_omp_atomic (void)
       gfc_error ("Unexpected junk after $OMP ATOMIC statement at %C");
       return MATCH_ERROR;
     }
-  new_st.op = EXEC_OMP_ATOMIC;
+  new_st.op = (omp_p ? EXEC_OMP_ATOMIC : EXEC_OACC_ATOMIC);
   if (seq_cst)
     op = (gfc_omp_atomic_op) (op | GFC_OMP_ATOMIC_SEQ_CST);
   new_st.ext.omp_atomic = op;
   return MATCH_YES;
 }
 
+match
+gfc_match_oacc_atomic (void)
+{
+  return gfc_match_omp_oacc_atomic (false);
+}
+
+match
+gfc_match_omp_atomic (void)
+{
+  return gfc_match_omp_oacc_atomic (true);
+}
 
 match
 gfc_match_omp_barrier (void)
@@ -4317,6 +4328,8 @@ oacc_code_to_statement (gfc_code *code)
       return ST_OACC_KERNELS_LOOP;
     case EXEC_OACC_LOOP:
       return ST_OACC_LOOP;
+    case EXEC_OACC_ATOMIC:
+      return ST_OACC_ATOMIC;
     default:
       gcc_unreachable ();
     }
@@ -4661,6 +4674,9 @@ gfc_resolve_oacc_directive (gfc_code *code, gfc_namespace *ns ATTRIBUTE_UNUSED)
     case EXEC_OACC_LOOP:
       resolve_oacc_loop (code);
       break;
+    case EXEC_OACC_ATOMIC:
+      resolve_omp_atomic (code);
+      break;
     default:
       break;
     }
diff --git gcc/fortran/parse.c gcc/fortran/parse.c
index 650135b..b98dda1 100644
--- gcc/fortran/parse.c
+++ gcc/fortran/parse.c
@@ -637,6 +637,9 @@ decode_oacc_directive (void)
 
   switch (c)
     {
+    case 'a':
+      match ("atomic", gfc_match_oacc_atomic, ST_OACC_ATOMIC);
+      break;
     case 'c':
       match ("cache", gfc_match_oacc_cache, ST_OACC_CACHE);
       break;
@@ -645,6 +648,7 @@ decode_oacc_directive (void)
       match ("declare", gfc_match_oacc_declare, ST_OACC_DECLARE);
       break;
     case 'e':
+      match ("end atomic", gfc_match_omp_eos, ST_OACC_END_ATOMIC);
       match ("end data", gfc_match_omp_eos, ST_OACC_END_DATA);
       match ("end host_data", gfc_match_omp_eos, ST_OACC_END_HOST_DATA);
       match ("end kernels loop", gfc_match_omp_eos, ST_OACC_END_KERNELS_LOOP);
@@ -1373,7 +1377,8 @@ next_statement (void)
   case ST_OMP_DISTRIBUTE_PARALLEL_DO_SIMD: \
   case ST_CRITICAL: \
   case ST_OACC_PARALLEL_LOOP: case ST_OACC_PARALLEL: case ST_OACC_KERNELS: \
-  case ST_OACC_DATA: case ST_OACC_HOST_DATA: case ST_OACC_LOOP: case ST_OACC_KERNELS_LOOP
+  case ST_OACC_DATA: case ST_OACC_HOST_DATA: case ST_OACC_LOOP: \
+  case ST_OACC_KERNELS_LOOP: case ST_OACC_ATOMIC
 
 /* Declaration statements */
 
@@ -1937,6 +1942,12 @@ gfc_ascii_statement (gfc_statement st)
     case ST_OACC_ROUTINE:
       p = "!$ACC ROUTINE";
       break;
+    case ST_OACC_ATOMIC:
+      p = "!ACC ATOMIC";
+      break;
+    case ST_OACC_END_ATOMIC:
+      p = "!ACC END ATOMIC";
+      break;
     case ST_OMP_ATOMIC:
       p = "!$OMP ATOMIC";
       break;
@@ -4316,14 +4327,24 @@ parse_omp_do (gfc_statement omp_st)
 /* Parse the statements of OpenMP atomic directive.  */
 
 static gfc_statement
-parse_omp_atomic (void)
+parse_omp_oacc_atomic (bool omp_p)
 {
-  gfc_statement st;
+  gfc_statement st, st_atomic, st_end_atomic;
   gfc_code *cp, *np;
   gfc_state_data s;
   int count;
 
-  accept_statement (ST_OMP_ATOMIC);
+  if (omp_p)
+    {
+      st_atomic = ST_OMP_ATOMIC;
+      st_end_atomic = ST_OMP_END_ATOMIC;
+    }
+  else
+    {
+      st_atomic = ST_OACC_ATOMIC;
+      st_end_atomic = ST_OACC_END_ATOMIC;
+    }
+  accept_statement (st_atomic);
 
   cp = gfc_state_stack->tail;
   push_state (&s, COMP_OMP_STRUCTURED_BLOCK, NULL);
@@ -4350,7 +4371,7 @@ parse_omp_atomic (void)
   pop_state ();
 
   st = next_statement ();
-  if (st == ST_OMP_END_ATOMIC)
+  if (st == st_end_atomic)
     {
       gfc_clear_new_st ();
       gfc_commit_symbols ();
@@ -4646,7 +4667,7 @@ parse_omp_structured_block (gfc_statement omp_st, bool workshare_stmts_only)
 		  continue;
 
 		case ST_OMP_ATOMIC:
-		  st = parse_omp_atomic ();
+		  st = parse_omp_oacc_atomic (true);
 		  continue;
 
 		default:
@@ -4865,8 +4886,12 @@ parse_executable (gfc_statement st)
 	    return st;
 	  continue;
 
+	case ST_OACC_ATOMIC:
+	  st = parse_omp_oacc_atomic (false);
+	  continue;
+
 	case ST_OMP_ATOMIC:
-	  st = parse_omp_atomic ();
+	  st = parse_omp_oacc_atomic (true);
 	  continue;
 
 	default:
@@ -5782,6 +5807,7 @@ is_oacc (gfc_state_data *sd)
     case EXEC_OACC_CACHE:
     case EXEC_OACC_ENTER_DATA:
     case EXEC_OACC_EXIT_DATA:
+    case EXEC_OACC_ATOMIC:
       return true;
 
     default:
diff --git gcc/fortran/resolve.c gcc/fortran/resolve.c
index 1049c0c..bf2837c 100644
--- gcc/fortran/resolve.c
+++ gcc/fortran/resolve.c
@@ -9372,6 +9372,7 @@ gfc_resolve_blocks (gfc_code *b, gfc_namespace *ns)
 	case EXEC_OACC_CACHE:
 	case EXEC_OACC_ENTER_DATA:
 	case EXEC_OACC_EXIT_DATA:
+	case EXEC_OACC_ATOMIC:
 	case EXEC_OMP_ATOMIC:
 	case EXEC_OMP_CRITICAL:
 	case EXEC_OMP_DISTRIBUTE:
@@ -10644,6 +10645,7 @@ start:
 	case EXEC_OACC_CACHE:
 	case EXEC_OACC_ENTER_DATA:
 	case EXEC_OACC_EXIT_DATA:
+	case EXEC_OACC_ATOMIC:
 	  gfc_resolve_oacc_directive (code, ns);
 	  break;
 
diff --git gcc/fortran/st.c gcc/fortran/st.c
index 116af15..629b51d 100644
--- gcc/fortran/st.c
+++ gcc/fortran/st.c
@@ -240,6 +240,7 @@ gfc_free_statement (gfc_code *p)
       gfc_free_omp_namelist (p->ext.omp_namelist);
       break;
 
+    case EXEC_OACC_ATOMIC:
     case EXEC_OMP_ATOMIC:
     case EXEC_OMP_BARRIER:
     case EXEC_OMP_MASTER:
diff --git gcc/fortran/trans-openmp.c gcc/fortran/trans-openmp.c
index 7e01e72..5f4c382 100644
--- gcc/fortran/trans-openmp.c
+++ gcc/fortran/trans-openmp.c
@@ -4409,6 +4409,8 @@ gfc_trans_oacc_directive (gfc_code *code)
       return gfc_trans_oacc_executable_directive (code);
     case EXEC_OACC_WAIT:
       return gfc_trans_oacc_wait_directive (code);
+    case EXEC_OACC_ATOMIC:
+      return gfc_trans_omp_atomic (code);
     default:
       gcc_unreachable ();
     }
diff --git gcc/fortran/trans.c gcc/fortran/trans.c
index 4337fcb..9495450 100644
--- gcc/fortran/trans.c
+++ gcc/fortran/trans.c
@@ -1903,6 +1903,7 @@ trans_code (gfc_code * code, tree cond)
 	case EXEC_OACC_PARALLEL_LOOP:
 	case EXEC_OACC_ENTER_DATA:
 	case EXEC_OACC_EXIT_DATA:
+	case EXEC_OACC_ATOMIC:
 	  res = gfc_trans_oacc_directive (code);
 	  break;
 
diff --git gcc/omp-low.c gcc/omp-low.c
index d0264e9..ccf0b63 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -3212,7 +3212,10 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
     {
       for (omp_context *ctx_ = ctx; ctx_ != NULL; ctx_ = ctx_->outer)
 	if (is_gimple_omp (ctx_->stmt)
-	    && is_gimple_omp_oacc (ctx_->stmt))
+	    && is_gimple_omp_oacc (ctx_->stmt)
+	    /* Except for atomic codes that we share with OpenMP.  */
+	    && ! (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD
+		  || gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE))
 	  {
 	    error_at (gimple_location (stmt),
 		      "non-OpenACC construct inside of OpenACC region");
diff --git gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c
index 1c17818..dabba8c 100644
--- gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c
+++ gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c
@@ -1,12 +1,46 @@
 void
-f_omp_parallel (void)
+f_acc_data (void)
 {
-#pragma omp parallel
+#pragma acc data
   {
     int i;
+#pragma omp atomic write
+    i = 0;
+  }
+}
+
+void
+f_acc_kernels (void)
+{
+#pragma acc kernels
+  {
+    int i;
+#pragma omp atomic write
+    i = 0;
+  }
+}
 
-#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
-    for (i = 0; i < 2; ++i)
-      ;
+void
+f_acc_loop (void)
+{
+  int i;
+
+#pragma acc parallel
+#pragma acc loop
+  for (i = 0; i < 2; ++i)
+    {
+#pragma omp atomic write
+      i = 0;
+    }
+}
+
+void
+f_acc_parallel (void)
+{
+#pragma acc parallel
+  {
+    int i;
+#pragma omp atomic write
+    i = 0;
   }
 }
diff --git gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c
index 0c8ea54..e98258c 100644
--- gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c
+++ gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c
@@ -216,12 +216,6 @@ f_acc_parallel (void)
 
 #pragma acc parallel
   {
-#pragma omp atomic write
-    i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
-  }
-
-#pragma acc parallel
-  {
 #pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
     ;
   }
@@ -286,12 +280,6 @@ f_acc_kernels (void)
 
 #pragma acc kernels
   {
-#pragma omp atomic write
-    i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
-  }
-
-#pragma acc kernels
-  {
 #pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
     ;
   }
@@ -356,12 +344,6 @@ f_acc_data (void)
 
 #pragma acc data
   {
-#pragma omp atomic write
-    i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
-  }
-
-#pragma acc data
-  {
 #pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
     ;
   }
@@ -442,14 +424,6 @@ f_acc_loop (void)
 #pragma acc loop
   for (i = 0; i < 2; ++i)
     {
-#pragma omp atomic write
-      i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
-    }
-
-#pragma acc parallel
-#pragma acc loop
-  for (i = 0; i < 2; ++i)
-    {
 #pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
       ;
     }
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-1.c
new file mode 100644
index 0000000..ad958cd
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-1.c
@@ -0,0 +1,866 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+  int   iexp, igot;
+  long long lexp, lgot;
+  int   N = 32;
+  int   idata[N];
+  long long   ldata[N];
+  float fexp, fgot;
+  float fdata[N];
+  int i;
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic capture
+        idata[i] = igot++;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic capture
+        idata[i] = igot--;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic capture
+        idata[i] = ++igot;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic capture
+        idata[i] = --igot;
+      }
+  }
+
+  /* BINOP = + */
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        idata[i] = igot += expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        idata[i] = igot = igot + expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        idata[i] = igot = expr + igot;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  /* BINOP = * */
+  lgot = 1LL;
+  lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2LL;
+
+#pragma acc atomic capture
+        ldata[i] = lgot *= expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2LL;
+
+#pragma acc atomic capture
+        ldata[i] = lgot = lgot * expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2LL;
+
+#pragma acc atomic capture
+        ldata[i] = lgot = expr * lgot;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  /* BINOP = - */
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        idata[i] = igot -= expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        idata[i] = igot = igot - expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 32;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        idata[i] = igot = expr - igot;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+
+  /* BINOP = / */
+  lgot = 1LL << 32;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      long long expr = 2LL;
+
+#pragma acc atomic capture
+        ldata[i] = lgot /= expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL << 32;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2LL;
+
+#pragma acc atomic capture
+        ldata[i] = lgot = lgot / expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 2LL;
+  lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      long long expr = 1LL << N;
+
+#pragma acc atomic capture
+        ldata[i] = lgot = expr / lgot;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  /* BINOP = & */
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1 << i;
+
+#pragma acc atomic capture
+        idata[i] = igot &= expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1 << i;
+
+#pragma acc atomic capture
+        idata[i] = igot = igot & expr;
+    }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1 << i;
+
+#pragma acc atomic capture
+        idata[i] = igot = expr & igot;
+     }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  /* BINOP = ^ */
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1 << i;
+
+#pragma acc atomic capture
+        idata[i] = igot ^= expr;
+     }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1 << i;
+
+#pragma acc atomic capture
+        idata[i] = igot = igot ^ expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1 << i;
+
+#pragma acc atomic capture
+        idata[i] = igot = expr ^ igot;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  /* BINOP = | */
+  igot = 0;
+  iexp = ~0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1 << i;
+
+#pragma acc atomic capture
+        idata[i] = igot |= expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = ~0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1 << i;
+
+#pragma acc atomic capture
+        idata[i] = igot = igot | expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = ~0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1 << i;
+
+#pragma acc atomic capture
+        idata[i] = igot = expr | igot;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  /* BINOP = << */
+  lgot = 1LL;
+  lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        ldata[i] = lgot <<= expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        idata[i] = lgot = lgot << expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel
+    {
+      long long expr = 1LL;
+
+#pragma acc atomic capture
+      ldata[0] = lgot = expr << lgot;
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  /* BINOP = >> */
+  lgot = 1LL << N;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic capture
+        ldata[i] = lgot >>= expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL << N;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        ldata[i] = lgot = lgot >> expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL << 63;
+  lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel
+    {
+      long long expr = 1LL << 32;
+
+#pragma acc atomic capture
+      ldata[0] = lgot = expr >> lgot;
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic capture
+        fdata[i] = fgot++;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 32.0;
+  fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic capture
+        fdata[i] = fgot--;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic capture
+        fdata[i] = ++fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 32.0;
+  fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic capture
+        fdata[i] = --fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = + */
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        fdata[i] = fgot += expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        fdata[i] = fgot = fgot + expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        fdata[i] = fgot = expr + fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = * */
+  fgot = 1.0;
+  fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        fdata[i] = fgot *= expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1.0;
+  fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2LL;
+
+#pragma acc atomic capture
+        fdata[i] = fgot = fgot * expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1.0;
+  fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        fdata[i] = fgot = expr * fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = - */
+  fgot = 32.0;
+  fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        fdata[i] = fgot -= expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 32.0;
+  fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        fdata[i] = fgot = fgot - expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1.0;
+  fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 32.0;
+
+#pragma acc atomic capture
+        fdata[i] = fgot = expr - fgot;
+      }
+  }
+
+  for (i = 0; i < N; i++)
+    if (i % 2 == 0)
+      {
+	if (fdata[i] != 31.0)
+	  abort ();
+      }
+    else
+      {
+	if (fdata[i] != 1.0)
+	  abort ();
+      }
+
+
+  /* BINOP = / */
+  fexp = 1.0;
+  fgot = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        fdata[i] = fgot /= expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fexp = 1.0;
+  fgot = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        fdata[i] = fgot = fgot / expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fexp = 1.0;
+  fgot = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel
+    {
+      float expr = 8192.0*8192.0*64.0;
+
+#pragma acc atomic capture
+      fdata[0] = fgot = expr / fgot;
+    }
+  }
+
+  if (fexp != fgot)
+    abort ();
+  
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-2.c
new file mode 100644
index 0000000..842f2de
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-2.c
@@ -0,0 +1,1626 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+  int   iexp, igot, imax, imin;
+  long long lexp, lgot;
+  int   N = 32;
+  int	i;
+  int   idata[N];
+  long long ldata[N];
+  float fexp, fgot;
+  float fdata[N];
+
+  igot = 1234;
+  iexp = 31;
+
+  for (i = 0; i < N; i++)
+    idata[i] = i;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+#pragma acc atomic capture
+      { idata[i] = igot; igot = i; }
+  }
+
+  imax = 0;
+  imin = N;
+
+  for (i = 0; i < N; i++)
+    {
+      imax = idata[i] > imax ? idata[i] : imax;
+      imin = idata[i] < imin ? idata[i] : imin;
+    }
+
+  if (imax != 1234 || imin != 0)
+    abort ();
+
+  return 0;
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+#pragma acc atomic capture
+      { idata[i] = igot; igot++; }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+#pragma acc atomic capture
+      { idata[i] = igot; ++igot; }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+#pragma acc atomic capture
+      { ++igot; idata[i] = igot; }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+#pragma acc atomic capture
+      { igot++; idata[i] = igot; }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+#pragma acc atomic capture
+      { idata[i] = igot; igot--; }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+#pragma acc atomic capture
+      { idata[i] = igot; --igot; }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+#pragma acc atomic capture
+      { --igot; idata[i] = igot; }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+#pragma acc atomic capture
+      { igot--; idata[i] = igot; }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  /* BINOP = + */
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        { idata[i] = igot; igot += expr; }
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        { igot += expr; idata[i] = igot; }
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        { idata[i] = igot; igot = igot + expr; }
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        { idata[i] = igot; igot = expr + igot; }
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        { igot = igot + expr; idata[i] = igot; }
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+
+  igot = 0;
+  iexp = 32;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        { igot = expr + igot; idata[i] = igot; }
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  /* BINOP = * */
+  lgot = 1LL;
+  lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      long long expr = 2LL;
+
+#pragma acc atomic capture
+      { ldata[i] = lgot; lgot *= expr; }
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2LL;
+
+#pragma acc atomic capture
+        { lgot *= expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2LL;
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = lgot * expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      long long expr = 2LL;
+
+#pragma acc atomic capture
+      { ldata[i] = lgot; lgot = expr * lgot; }
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2LL;
+
+#pragma acc atomic capture
+        { lgot = lgot * expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      long long expr = 2;
+
+#pragma acc atomic capture
+      { lgot = expr * lgot; ldata[i] = lgot; }
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  /* BINOP = - */
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      int expr = 1;
+
+#pragma acc atomic capture
+      { idata[i] = igot; igot -= expr; }
+    }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        { igot -= expr; idata[i] = igot; }
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 32;
+  iexp = 0;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        { idata[i] = igot; igot = igot - expr; }
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 1;
+  iexp = 1;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      int expr = 1;
+
+#pragma acc atomic capture
+      { idata[i] = igot; igot = expr - igot; }
+    }
+  }
+
+  for (i = 0; i < N; i++)
+    if (i % 2 == 0)
+      {
+	if (idata[i] != 1)
+	  abort ();
+      }
+    else
+      {
+	if (idata[i] != 0)
+	  abort ();
+      }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 1;
+  iexp = -31;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        { igot = igot - expr; idata[i] = igot; }
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 1;
+  iexp = 1;
+
+#pragma acc data copy (igot, idata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = 1;
+
+#pragma acc atomic capture
+        { igot = expr - igot; idata[i] = igot; }
+      }
+  }
+
+  for (i = 0; i < N; i++)
+    if (i % 2 == 0)
+      {
+	if (idata[i] != 0)
+	  abort ();
+      }
+    else
+      {
+	if (idata[i] != 1)
+	  abort ();
+      }
+
+  if (iexp != igot)
+    abort ();
+
+  /* BINOP = / */
+  lgot = 1LL << 32;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2LL;
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot /= expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL << 32;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2LL;
+
+#pragma acc atomic capture
+        { lgot /= expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL << 32;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      long long expr = 2LL;
+
+#pragma acc atomic capture
+      { ldata[i] = lgot; lgot = lgot / expr; }
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 2LL;
+  lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL << N;
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = expr / lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 2LL;
+  lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL << N;
+
+#pragma acc atomic capture
+        { lgot = lgot / expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 2LL;
+  lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL << N;
+
+#pragma acc atomic capture
+        { lgot = expr / lgot; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  /* BINOP = & */
+  lgot = ~0LL;
+  lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot &= expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = ~0LL;
+  iexp = 0LL; 
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { lgot &= expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = ~0LL;
+  lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = lgot & expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = ~0LL;
+  lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = expr & lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = ~0LL;
+  iexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { lgot = lgot & expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = ~0LL;
+  lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+      { lgot = expr & lgot; ldata[i] = lgot; }
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  /* BINOP = ^ */
+  lgot = ~0LL;
+  lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      long long expr = 1 << i;
+
+#pragma acc atomic capture
+      { ldata[i] = lgot; lgot ^= expr; }
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = ~0LL;
+  iexp = 0LL; 
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { lgot ^= expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = ~0LL;
+  lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = lgot ^ expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = ~0LL;
+  lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+      { ldata[i] = lgot; lgot = expr ^ lgot; }
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = ~0LL;
+  iexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { lgot = lgot ^ expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = ~0LL;
+  lexp = 0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { lgot = expr ^ lgot; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  /* BINOP = | */
+  lgot = 0LL;
+  lexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1 << i;
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot |= expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 0LL;
+  iexp = ~0LL; 
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { lgot |= expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 0LL;
+  lexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = lgot | expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 0LL;
+  lexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = expr | lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 0LL;
+  iexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { lgot = lgot | expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 0LL;
+  lexp = ~0LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = ~(1 << i);
+
+#pragma acc atomic capture
+        { lgot = expr | lgot; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  /* BINOP = << */
+  lgot = 1LL;
+  lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot <<= expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  iexp = 1LL << N; 
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic capture
+        { lgot <<= expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << N;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = lgot << expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < 1; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = expr << lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < 1; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic capture
+        { lgot = lgot << expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 2LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < 1; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic capture
+        { lgot = expr << lgot; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  /* BINOP = >> */
+  lgot = 1LL << N;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+  
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot >>= expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL << N;
+  iexp = 1LL; 
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic capture
+        { lgot >>= expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL << N;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = lgot >> expr; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << (N - 1);
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < 1; i++)
+      {
+        long long expr = 1LL << N;
+
+#pragma acc atomic capture
+        { ldata[i] = lgot; lgot = expr >> lgot; }
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL << N;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic capture
+        { lgot = lgot >> expr; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << (N - 1);
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < 1; i++)
+      {
+        long long expr = 1LL << N;
+
+#pragma acc atomic capture
+        { lgot = expr >> lgot; ldata[i] = lgot; }
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  // FLOAT FLOAT FLOAT
+
+  /* BINOP = + */
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      float expr = 1.0;
+
+#pragma acc atomic capture
+      { fdata[i] = fgot; fgot += expr; }
+    }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        { fgot += expr; fdata[i] = fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        { idata[i] = fgot; fgot = fgot + expr; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      float expr = 1.0;
+
+#pragma acc atomic capture
+      { fdata[i] = fgot; fgot = expr + fgot; }
+    }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        { fgot = fgot + expr; fdata[i] = fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 0.0;
+  fexp = 32.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        { fgot = expr + fgot; fdata[i] = fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = * */
+  fgot = 1.0;
+  fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      float expr = 2.0;
+
+#pragma acc atomic capture
+      { fdata[i] = fgot; fgot *= expr; }
+    }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1.0;
+  fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        { fgot *= expr; fdata[i] = fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1.0;
+  fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        { fdata[i] = fgot; fgot = fgot * expr; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1.0;
+  fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        { fdata[i] = fgot; fgot = expr * fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << 32;
+
+#pragma acc data copy (lgot, ldata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      long long expr = 2LL;
+
+#pragma acc atomic capture
+      { lgot = lgot * expr; ldata[i] = lgot; }
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  fgot = 1.0;
+  fexp = 8192.0*8192.0*64.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 2;
+
+#pragma acc atomic capture
+        { fgot = expr * fgot; fdata[i] = fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = - */
+  fgot = 32.0;
+  fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+  
+#pragma acc atomic capture
+        { fdata[i] = fgot; fgot -= expr; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 32.0;
+  fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+      float expr = 1.0;
+
+#pragma acc atomic capture
+      { fgot -= expr; fdata[i] = fgot; }
+    }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 32.0;
+  fexp = 0.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        { fdata[i] = fgot; fgot = fgot - expr; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1.0;
+  fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        { fdata[i] = fgot; fgot = expr - fgot; }
+      }
+  }
+
+  for (i = 0; i < N; i++)
+    if (i % 2 == 0)
+      {
+	if (fdata[i] != 1.0)
+	  abort ();
+      }
+    else
+      {
+	if (fdata[i] != 0.0)
+	  abort ();
+      }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1.0;
+  fexp = -31.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        { fgot = fgot - expr; fdata[i] = fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1.0;
+  fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        { fgot = expr - fgot; fdata[i] = fgot; }
+      }
+  }
+
+  for (i = 0; i < N; i++)
+    if (i % 2 == 0)
+      {
+	if (fdata[i] != 0.0)
+	  abort ();
+      }
+    else
+      {
+	if (fdata[i] != 1.0)
+	  abort ();
+      }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = / */
+  fgot = 8192.0*8192.0*64.0;
+  fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        { fdata[i] = fgot; fgot /= expr; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 8192.0*8192.0*64.0;
+  fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        { fgot /= expr; fdata[i] = fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 8192.0*8192.0*64.0;
+  fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        { fdata[i] = fgot; fgot = fgot / expr; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 8192.0*8192.0*64.0;
+  fexp = 1.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+
+#pragma acc atomic capture
+        { fdata[i] = fgot; fgot = expr / fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 4.0;
+  fexp = 4.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL << N;
+
+#pragma acc atomic capture
+        { fgot = fgot / expr; fdata[i] = fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 4.0;
+  fexp = 4.0;
+
+#pragma acc data copy (fgot, fdata[0:N])
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic capture
+        { fgot = expr / fgot; fdata[i] = fgot; }
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_rw-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_rw-1.c
new file mode 100644
index 0000000..ae4f22e
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_rw-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+  int v1, v2;
+  int x;
+
+  x = 99;
+
+#pragma acc parallel copy (v1, v2, x)
+  {
+
+#pragma acc atomic read
+    v1 = x;
+
+#pragma acc atomic write
+    x = 32;
+
+#pragma acc atomic read
+    v2 = x;
+
+  }
+
+  if (v1 != 99)
+    abort ();
+
+  if (v2 != 32)
+    abort ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_update-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_update-1.c
new file mode 100644
index 0000000..18ee3aa
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_update-1.c
@@ -0,0 +1,760 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+  float fexp, fgot;
+  int   iexp, igot;
+  long long lexp, lgot;
+  int   N = 32;
+  int	i;
+
+  fgot = 1234.0;
+  fexp = 1235.0;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < 1; i++)
+#pragma acc atomic update
+      fgot++;
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = fgot - N;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic update
+        fgot--;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = fgot + N;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic update
+        ++fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = fgot - N;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+#pragma acc atomic update
+        --fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = + */
+
+  fgot = 1234.0;
+  fexp = fgot + N;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+#pragma acc atomic update
+        fgot += expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = fgot + N;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+#pragma acc atomic update
+        fgot = fgot + expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = fgot + N;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+#pragma acc atomic update
+        fgot = expr + fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 0.5;
+#pragma acc atomic update
+        fgot = (expr + expr) + fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = * */
+
+  fgot = 1234.0;
+  fexp = 1234.0;
+
+  for (i = 0; i < N; i++)
+    fexp *= 2.0;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+#pragma acc atomic update
+        fgot *= expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = 1234.0;
+
+  for (i = 0; i < N; i++)
+    fexp = fexp * 2.0;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+#pragma acc atomic update
+        fgot = fgot * expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = 1234.0;
+
+  for (i = 0; i < N; i++)
+    fexp = 2.0 * fexp;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+#pragma acc atomic update
+        fgot = expr * fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+#pragma acc atomic update
+        fgot = (expr + expr) * fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = - */
+
+  fgot = 1234.0;
+  fexp = 1234.0;
+
+  for (i = 0; i < N; i++)
+    fexp -= 2.0;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+#pragma acc atomic update
+        fgot -= expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = 1234.0;
+
+  for (i = 0; i < N; i++)
+    fexp = fexp - 2.0;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+#pragma acc atomic update
+        fgot = fgot - expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = 1234.0;
+
+  for (i = 0; i < N; i++)
+    fexp = 2.0 - fexp;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic update
+        fgot = expr - fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+#pragma acc atomic update
+        fgot = (expr + expr) - fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = / */
+
+  fgot = 1234.0;
+  fexp = 1234.0;
+
+  for (i = 0; i < N; i++)
+    fexp /= 2.0;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+#pragma acc atomic update
+        fgot /= expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = 1234.0;
+
+  for (i = 0; i < N; i++)
+    fexp = fexp / 2.0;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+	
+#pragma acc atomic update
+        fgot = fgot / expr;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = 1234.0;
+
+  for (i = 0; i < N; i++)
+    fexp = 2.0 / fexp;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 2.0;
+
+#pragma acc atomic update
+        fgot = expr / fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  fgot = 1234.0;
+  fexp = 1234.0;
+
+  for (i = 0; i < N; i++)
+    fexp = 2.0 / fexp;
+
+#pragma acc data copy (fgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        float expr = 1.0;
+#pragma acc atomic update
+        fgot = (expr + expr) / fgot;
+      }
+  }
+
+  if (fexp != fgot)
+    abort ();
+
+  /* BINOP = & */
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = ~(1 << i);
+
+#pragma acc atomic update
+        igot &= expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = ~(1 << i);
+#pragma acc atomic update
+        igot = igot / expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = ~(1 << i);
+#pragma acc atomic update
+        igot = expr & igot;
+     }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = ~(1 << i);
+        int zero = 0;
+
+#pragma acc atomic update
+        igot = (expr + zero) & igot;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  /* BINOP = ^ */
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = (1 << i);
+
+#pragma acc atomic update
+        igot ^= expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = (1 << i);
+
+#pragma acc atomic update
+        igot = igot ^ expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = (1 << i);
+
+#pragma acc atomic update
+        igot = expr ^ igot;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = ~0;
+  iexp = 0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = (1 << i);
+        int zero = 0;
+
+#pragma acc atomic update
+        igot = (expr + zero) ^ igot;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  /* BINOP = | */
+
+  igot = 0;
+  iexp = ~0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = (1 << i);
+
+#pragma acc atomic update
+        igot |= expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = ~0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = (1 << i);
+
+#pragma acc atomic update
+        igot = igot | expr;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = ~0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = (1 << i);
+
+#pragma acc atomic update
+        igot = expr | igot;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  igot = 0;
+  iexp = ~0;
+
+#pragma acc data copy (igot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        int expr = (1 << i);
+        int zero = 0;
+
+#pragma acc atomic update
+        igot = (expr + zero) | igot;
+      }
+  }
+
+  if (iexp != igot)
+    abort ();
+
+  /* BINOP = << */
+
+  lgot = 1LL;
+  lexp = 1LL << N;
+
+#pragma acc data copy (lgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic update
+        lgot <<= expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << N;
+
+#pragma acc data copy (lgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic update
+        lgot = lgot << expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 2LL;
+
+#pragma acc data copy (lgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < 1; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic update
+        lgot = expr << lgot;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 2LL;
+
+#pragma acc data copy (lgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < 1; i++)
+      {
+        long long expr = 1LL;
+        long long zero = 0LL;
+
+#pragma acc atomic update
+        lgot = (expr + zero) << lgot;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  /* BINOP = >> */
+
+  lgot = 1LL << N;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic update
+        lgot >>= expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL << N;
+  lexp = 1LL;
+
+#pragma acc data copy (lgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < N; i++)
+      {
+        long long expr = 1LL;
+
+#pragma acc atomic update
+        lgot = lgot >> expr;
+      }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << (N - 1);
+
+#pragma acc data copy (lgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < 1; i++)
+      {
+        long long expr = 1LL << N;
+
+#pragma acc atomic update
+        lgot = expr >> lgot;
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  lgot = 1LL;
+  lexp = 1LL << (N - 1);
+
+#pragma acc data copy (lgot)
+  {
+#pragma acc parallel loop
+    for (i = 0; i < 1; i++)
+      {
+        long long expr = 1LL << N;
+        long long zero = 0LL;
+
+#pragma acc atomic update
+        lgot = (expr + zero) >> lgot;
+    }
+  }
+
+  if (lexp != lgot)
+    abort ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c
new file mode 100644
index 0000000..dbe82fe
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c
@@ -0,0 +1,44 @@
+#include <assert.h>
+
+int
+main (int argc, char *argv[])
+{
+  int res, res2 = 0;
+
+#if defined(ACC_DEVICE_TYPE_host)
+# define GANGS 1
+#else
+# define GANGS 256
+#endif
+  #pragma acc parallel num_gangs(GANGS) num_workers(1) vector_length(1) \
+		       copy(res2)
+  {
+    #pragma acc atomic
+    res2 += 5;
+  }
+  res = GANGS * 5;
+
+  assert (res == res2);
+#undef GANGS
+
+  res = res2 = 1;
+
+#if defined(ACC_DEVICE_TYPE_host)
+# define GANGS 1
+#else
+# define GANGS 8
+#endif
+  #pragma acc parallel num_gangs(GANGS) num_workers(1) vector_length(1) \
+		       copy(res2)
+  {
+    #pragma acc atomic
+    res2 *= 5;
+  }
+  for (int i = 0; i < GANGS; ++i)
+    res *= 5;
+
+  assert (res == res2);
+#undef GANGS
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c
new file mode 100644
index 0000000..12ab552
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c
@@ -0,0 +1,48 @@
+#include <assert.h>
+#include <openacc.h>
+
+int
+main (int argc, char *argv[])
+{
+  int res, res2 = 0;
+
+#if defined(ACC_DEVICE_TYPE_host)
+# define GANGS 1
+#else
+# define GANGS 256
+#endif
+  #pragma acc parallel num_gangs(GANGS) num_workers(1) vector_length(1) \
+		       copy(res2) async(1)
+  {
+    #pragma acc atomic
+    res2 += 5;
+  }
+  res = GANGS * 5;
+
+  acc_wait (1);
+
+  assert (res == res2);
+#undef GANGS
+
+  res = res2 = 1;
+
+#if defined(ACC_DEVICE_TYPE_host)
+# define GANGS 1
+#else
+# define GANGS 8
+#endif
+  #pragma acc parallel num_gangs(GANGS) num_workers(1) vector_length(1) \
+		       copy(res2) async(1)
+  {
+    #pragma acc atomic
+    res2 *= 5;
+  }
+  for (int i = 0; i < GANGS; ++i)
+    res *= 5;
+
+  acc_wait (1);
+
+  assert (res == res2);
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c
new file mode 100644
index 0000000..99c6dfb
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c
@@ -0,0 +1,28 @@
+#include <assert.h>
+
+/* Test worker-single/vector-single mode.  */
+
+int
+main (int argc, char *argv[])
+{
+  int arr[32], i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = 0;
+
+  #pragma acc parallel copy(arr) num_gangs(8) num_workers(8) vector_length(32)
+  {
+    int j;
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc atomic
+	arr[j]++;
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    assert (arr[i] == 1);
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-4.c libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-4.c
new file mode 100644
index 0000000..84080d0
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-4.c
@@ -0,0 +1,28 @@
+#include <assert.h>
+
+/* Test worker-single/vector-partitioned mode.  */
+
+int
+main (int argc, char *argv[])
+{
+  int arr[32], i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(1) num_workers(8) vector_length(32)
+      {
+	int k;
+	#pragma acc loop vector
+	for (k = 0; k < 32; k++)
+	  {
+	    #pragma acc atomic
+	    arr[k]++;
+	  }
+      }
+
+  for (i = 0; i < 32; i++)
+    assert (arr[i] == i + 1);
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-6.c libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-6.c
new file mode 100644
index 0000000..cbc3e37
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-6.c
@@ -0,0 +1,46 @@
+#include <assert.h>
+
+#if defined(ACC_DEVICE_TYPE_host)
+#define ACTUAL_GANGS 1
+#else
+#define ACTUAL_GANGS 8
+#endif
+
+/* Test worker-single, vector-partitioned, gang-redundant mode.  */
+
+int
+main (int argc, char *argv[])
+{
+  int n, arr[32], i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = 0;
+
+  n = 0;
+
+  #pragma acc parallel copy(n, arr) num_gangs(ACTUAL_GANGS) num_workers(8) \
+	  vector_length(32)
+  {
+    int j;
+
+    #pragma acc atomic
+    n++;
+
+    #pragma acc loop vector
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc atomic
+	arr[j] += 1;
+      }
+
+    #pragma acc atomic
+    n++;
+  }
+
+  assert (n == ACTUAL_GANGS * 2);
+
+  for (i = 0; i < 32; i++)
+    assert (arr[i] == ACTUAL_GANGS);
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90
new file mode 100644
index 0000000..27c5c9e
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90
@@ -0,0 +1,784 @@
+! { dg-do run }
+
+program main
+  integer igot, iexp, itmp
+  real fgot, fexp, ftmp
+  logical lgot, lexp, ltmp
+  integer, parameter :: N = 32
+
+  igot = 0
+  iexp = N * 2
+
+  !$acc parallel copy (igot, itmp)
+    do i = 1, N
+  !$acc atomic capture
+      itmp = igot
+      igot = i + i
+  !$acc end atomic
+    end do
+  !$acc end parallel
+
+  if (igot /= iexp) call abort
+  if (itmp /= iexp - 2) call abort
+
+  fgot = 1234.0
+  fexp = 1266.0
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      ftmp = fgot
+      fgot = fgot + 1.0
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp - 1.0) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 1.0
+  fexp = 2.0**32
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      ftmp = fgot
+      fgot = fgot * 2.0
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp / 2.0) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 32.0
+  fexp = fgot - N
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      ftmp = fgot
+      fgot = fgot - 1.0
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp + 1.0) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 2**32.0
+  fexp = 1.0
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      ftmp = fgot
+      fgot = fgot / 2.0
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fgot * 2.0) call abort
+  if (fgot /= fexp) call abort
+
+  lgot = .TRUE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    ltmp = lgot
+    lgot = lgot .and. .FALSE.
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. .not. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    ltmp = lgot
+    lgot = lgot .or. .FALSE.
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    ltmp = lgot
+    lgot = lgot .eqv. .TRUE.
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .TRUE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    ltmp = lgot
+    lgot = lgot .neqv. .TRUE.
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. .not. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  fgot = 1234.0
+  fexp = 1266.0
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      ftmp = fgot
+      fgot = 1.0 + fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp - 1.0) call abort 
+  if (fgot /= fexp) call abort
+
+  fgot = 1.0
+  fexp = 2.0**32
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      ftmp = fgot
+      fgot = 2.0 * fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp / 2.0) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 32.0
+  fexp = 32.0
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      ftmp = fgot
+      fgot = 2.0 - fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= 2.0 - fexp) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 2.0**16
+  fexp = 2.0**16
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      ftmp = fgot
+      fgot = 2.0 / fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= 2.0 / fexp) call abort
+  if (fgot /= fexp) call abort
+
+  lgot = .TRUE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    ltmp = lgot
+    lgot = .FALSE. .and. lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. .not. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    ltmp = lgot
+    lgot = .FALSE. .or. lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    ltmp = lgot
+    lgot = .TRUE. .eqv. lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .TRUE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    ltmp = lgot
+    lgot = .TRUE. .neqv. lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. .not. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  igot = 1
+  iexp = N
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 1, N
+  !$acc atomic capture
+      itmp = igot
+      igot = max (igot, i)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp - 1) call abort
+  if (igot /= iexp) call abort
+
+  igot = N
+  iexp = 1
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 1, N
+  !$acc atomic capture
+      itmp = igot
+      igot = min (igot, i)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = ibclr (-2, i)
+  !$acc atomic capture
+      itmp = igot
+      igot = iand (igot, iexpr)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= ibset (iexp, N - 1)) call abort
+  if (igot /= iexp) call abort
+
+  igot = 0
+  iexp = -1 
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic capture
+      itmp = igot
+      igot = ior (igot, iexpr)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= ieor (iexp, lshift (1, N - 1))) call abort
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0 
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic capture
+      itmp = igot
+      igot = ieor (igot, iexpr)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= ior (iexp, lshift (1, N - 1))) call abort
+  if (igot /= iexp) call abort
+
+  igot = 1
+  iexp = N
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 1, N
+  !$acc atomic capture
+      itmp = igot
+      igot = max (i, igot)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp - 1) call abort
+  if (igot /= iexp) call abort
+
+  igot = N
+  iexp = 1
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 1, N
+  !$acc atomic capture
+      itmp = igot
+      igot = min (i, igot)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = ibclr (-2, i)
+  !$acc atomic capture
+      itmp = igot
+      igot = iand (iexpr, igot)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= ibset (iexp, N - 1)) call abort
+  if (igot /= iexp) call abort
+
+  igot = 0
+  iexp = -1 
+	!!
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic capture
+      itmp = igot
+      igot = ior (iexpr, igot)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= ieor (iexp, lshift (1, N - 1))) call abort
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0 
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic capture
+      itmp = igot
+      igot = ieor (iexpr, igot)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= ior (iexp, lshift (1, N - 1))) call abort
+  if (igot /= iexp) call abort
+
+  fgot = 1234.0
+  fexp = 1266.0
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      fgot = fgot + 1.0
+      ftmp = fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 1.0
+  fexp = 2.0**32
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      fgot = fgot * 2.0
+      ftmp = fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 32.0
+  fexp = fgot - N
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      fgot = fgot - 1.0
+      ftmp = fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 2**32.0
+  fexp = 1.0
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      fgot = fgot / 2.0
+      ftmp = fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp) call abort
+  if (fgot /= fexp) call abort
+
+  lgot = .TRUE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    lgot = lgot .and. .FALSE.
+    ltmp = lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    lgot = lgot .or. .FALSE.
+    ltmp = lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    lgot = lgot .eqv. .TRUE.
+    ltmp = lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .TRUE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    lgot = lgot .neqv. .TRUE.
+    ltmp = lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  fgot = 1234.0
+  fexp = 1266.0
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      fgot = 1.0 + fgot
+      ftmp = fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 1.0
+  fexp = 2.0**32
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      fgot = 2.0 * fgot
+      ftmp = fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 32.0
+  fexp = 32.0
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      fgot = 2.0 - fgot
+      ftmp = fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp) call abort
+  if (fgot /= fexp) call abort
+
+  fgot = 2.0**16
+  fexp = 2.0**16
+
+  !$acc parallel loop copy (fgot, ftmp)
+    do i = 1, N
+  !$acc atomic capture
+      fgot = 2.0 / fgot
+      ftmp = fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (ftmp /= fexp) call abort
+  if (fgot /= fexp) call abort
+
+  lgot = .TRUE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    lgot = .FALSE. .and. lgot
+    ltmp = lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    lgot = .FALSE. .or. lgot
+    ltmp = lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    lgot = .TRUE. .eqv. lgot
+    ltmp = lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .TRUE.
+
+  !$acc parallel copy (lgot, ltmp)
+  !$acc atomic capture
+    lgot = .TRUE. .neqv. lgot
+    ltmp = lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (ltmp .neqv. lexp) call abort
+  if (lgot .neqv. lexp) call abort
+
+  igot = 1
+  iexp = N
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 1, N
+  !$acc atomic capture
+      igot = max (igot, i)
+      itmp = igot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = N
+  iexp = 1
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 1, N
+  !$acc atomic capture
+      igot = min (igot, i)
+      itmp = igot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = ibclr (-2, i)
+  !$acc atomic capture
+      igot = iand (igot, iexpr)
+      itmp = igot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = 0
+  iexp = -1 
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic capture
+      igot = ior (igot, iexpr)
+      itmp = igot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0 
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic capture
+      igot = ieor (igot, iexpr)
+      itmp = igot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = 1
+  iexp = N
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 1, N
+  !$acc atomic capture
+      igot = max (i, igot)
+      itmp = igot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = N
+  iexp = 1
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 1, N
+  !$acc atomic capture
+      igot = min (i, igot)
+      itmp = igot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = ibclr (-2, i)
+  !$acc atomic capture
+      igot = iand (iexpr, igot)
+      itmp = igot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = 0
+  iexp = -1 
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic capture
+      igot = ior (iexpr, igot)
+      itmp = igot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0 
+
+  !$acc parallel loop copy (igot, itmp)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic capture
+      igot = ieor (iexpr, igot)
+      itmp = igot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (itmp /= iexp) call abort
+  if (igot /= iexp) call abort
+
+end program
diff --git libgomp/testsuite/libgomp.oacc-fortran/atomic_rw-1.f90 libgomp/testsuite/libgomp.oacc-fortran/atomic_rw-1.f90
new file mode 100644
index 0000000..51ec9aa
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/atomic_rw-1.f90
@@ -0,0 +1,29 @@
+! { dg-do run }
+
+program main
+  integer v1, v2
+  integer x
+
+  x = 99
+
+  !$acc parallel copy (v1, v2, x)
+
+  !$acc atomic read
+    v1 = x;
+  !$acc end atomic
+
+  !$acc atomic write
+    x = 32;
+  !$acc end atomic
+
+  !$acc atomic read
+    v2 = x;
+  !$acc end atomic
+
+  !$acc end parallel
+
+  if (v1 .ne. 99) call abort
+
+  if (v2 .ne. 32) call abort
+
+end program main
diff --git libgomp/testsuite/libgomp.oacc-fortran/atomic_update-1.f90 libgomp/testsuite/libgomp.oacc-fortran/atomic_update-1.f90
new file mode 100644
index 0000000..6607c77
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/atomic_update-1.f90
@@ -0,0 +1,338 @@
+! { dg-do run }
+
+program main
+  integer igot, iexp, iexpr
+  real fgot, fexp
+  integer i
+  integer, parameter :: N = 32
+  logical lgot, lexp
+
+  fgot = 1234.0
+  fexp = 1266.0
+
+  !$acc parallel loop copy (fgot)
+    do i = 1, N
+  !$acc atomic update
+      fgot = fgot + 1.0
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (fgot /= fexp) call abort
+
+  fgot = 1.0
+  fexp = 2.0**32
+
+  !$acc parallel loop copy (fgot)
+    do i = 1, N
+  !$acc atomic update
+      fgot = fgot * 2.0
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (fgot /= fexp) call abort
+
+  fgot = 32.0
+  fexp = fgot - N
+
+  !$acc parallel loop copy (fgot)
+    do i = 1, N
+  !$acc atomic update
+      fgot = fgot - 1.0
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (fgot /= fexp) call abort
+
+  fgot = 2**32.0
+  fexp = 1.0
+
+  !$acc parallel loop copy (fgot)
+    do i = 1, N
+  !$acc atomic update
+      fgot = fgot / 2.0
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (fgot /= fexp) call abort
+
+  lgot = .TRUE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot)
+  !$acc atomic update
+    lgot = lgot .and. .FALSE.
+  !$acc end atomic
+  !$acc end parallel
+
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot)
+  !$acc atomic update
+    lgot = lgot .or. .FALSE.
+  !$acc end atomic
+  !$acc end parallel
+
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot)
+  !$acc atomic update
+    lgot = lgot .eqv. .TRUE.
+  !$acc end atomic
+  !$acc end parallel
+
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .TRUE.
+
+  !$acc parallel copy (lgot)
+  !$acc atomic update
+    lgot = lgot .neqv. .TRUE.
+  !$acc end atomic
+  !$acc end parallel
+
+  if (lgot .neqv. lexp) call abort
+
+  fgot = 1234.0
+  fexp = 1266.0
+
+  !$acc parallel loop copy (fgot)
+    do i = 1, N
+  !$acc atomic update
+      fgot = 1.0 + fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (fgot /= fexp) call abort
+
+  fgot = 1.0
+  fexp = 2.0**32
+
+  !$acc parallel loop copy (fgot)
+    do i = 1, N
+  !$acc atomic update
+      fgot = 2.0 * fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (fgot /= fexp) call abort
+
+  fgot = 32.0
+  fexp = 32.0
+
+  !$acc parallel loop copy (fgot)
+    do i = 1, N
+  !$acc atomic update
+      fgot = 2.0 - fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (fgot /= fexp) call abort
+
+  fgot = 2.0**16
+  fexp = 2.0**16
+
+  !$acc parallel loop copy (fgot)
+    do i = 1, N
+  !$acc atomic update
+      fgot = 2.0 / fgot
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (fgot /= fexp) call abort
+
+  lgot = .TRUE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot)
+  !$acc atomic update
+    lgot = .FALSE. .and. lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot)
+  !$acc atomic update
+    lgot = .FALSE. .or. lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .FALSE.
+
+  !$acc parallel copy (lgot)
+  !$acc atomic update
+    lgot = .TRUE. .eqv. lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (lgot .neqv. lexp) call abort
+
+  lgot = .FALSE.
+  lexp = .TRUE.
+
+  !$acc parallel copy (lgot)
+  !$acc atomic update
+    lgot = .TRUE. .neqv. lgot
+  !$acc end atomic
+  !$acc end parallel
+
+  if (lgot .neqv. lexp) call abort
+
+  igot = 1
+  iexp = N
+
+  !$acc parallel loop copy (igot)
+    do i = 1, N
+  !$acc atomic update
+      igot = max (igot, i)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (igot /= iexp) call abort
+
+  igot = N
+  iexp = 1
+
+  !$acc parallel loop copy (igot)
+    do i = 1, N
+  !$acc atomic update
+      igot = min (igot, i)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0
+
+  !$acc parallel loop copy (igot)
+    do i = 0, N - 1
+      iexpr = ibclr (-2, i)
+  !$acc atomic update
+      igot = iand (igot, iexpr)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (igot /= iexp) call abort
+
+  igot = 0
+  iexp = -1 
+
+  !$acc parallel loop copy (igot)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic update
+      igot = ior (igot, iexpr)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0 
+
+  !$acc parallel loop copy (igot)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic update
+      igot = ieor (igot, iexpr)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (igot /= iexp) call abort
+
+  igot = 1
+  iexp = N
+
+  !$acc parallel loop copy (igot)
+    do i = 1, N
+  !$acc atomic update
+      igot = max (i, igot)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (igot /= iexp) call abort
+
+  igot = N
+  iexp = 1
+
+  !$acc parallel loop copy (igot)
+    do i = 1, N
+  !$acc atomic update
+      igot = min (i, igot)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0
+
+  !$acc parallel loop copy (igot)
+    do i = 0, N - 1
+      iexpr = ibclr (-2, i)
+  !$acc atomic update
+      igot = iand (iexpr, igot)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (igot /= iexp) call abort
+
+  igot = 0
+  iexp = -1 
+
+  !$acc parallel loop copy (igot)
+    do i = 0, N - 1
+        iexpr = lshift (1, i)
+  !$acc atomic update
+      igot = ior (iexpr, igot)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (igot /= iexp) call abort
+
+  igot = -1
+  iexp = 0 
+
+  !$acc parallel loop copy (igot)
+    do i = 0, N - 1
+      iexpr = lshift (1, i)
+  !$acc atomic update
+      igot = ieor (iexpr, igot)
+  !$acc end atomic
+    end do
+  !$acc end parallel loop
+
+  if (igot /= iexp) call abort
+
+end program


Grüße
 Thomas
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://gcc.gnu.org/pipermail/gcc-patches/attachments/20151102/d19e0584/attachment.sig>


More information about the Gcc-patches mailing list