This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, PR82428] Add __builtin_goacc_{gang,worker,vector}_{id,size}


On 01/17/2018 06:51 PM, Jakub Jelinek wrote:
On Wed, Jan 17, 2018 at 06:42:33PM +0100, Tom de Vries wrote:
+static rtx
+expand_builtin_goacc_parlevel_id_size (tree exp, rtx target, int ignore)
+{
+  tree fndecl = get_callee_fndecl (exp);
+
+  const char *name;
+  switch (DECL_FUNCTION_CODE (fndecl))
+    {
+    case BUILT_IN_GOACC_PARLEVEL_ID:
+      name = "__builtin_goacc_parlevel_id";
+      break;
+    case BUILT_IN_GOACC_PARLEVEL_SIZE:
+      name = "__builtin_goacc_parlevel_size";
+      break;

Can you avoid that many switches on DECL_FUNCTION_CODE?
Like initialize in this one not just name, but also the
fallback_retval and gen_fn variables and just use them later?

+    default:
+      gcc_unreachable ();
+    }
+
+  if (oacc_get_fn_attrib (current_function_decl) == NULL_TREE)
+    {
+      error ("%s only supported in openacc code", name);

OpenACC ?

+      return const0_rtx;
+    }
+
+  tree arg = CALL_EXPR_ARG (exp, 0);
+  if (TREE_CODE (arg) != INTEGER_CST)
+    {
+      error ("non-constant argument 0 to %s", name);

%qs instead of %s, 2 times.

+      return const0_rtx;
+    }
+
+  int dim = TREE_INT_CST_LOW (arg);
+  switch (dim)
+    {
+    case GOMP_DIM_GANG:
+    case GOMP_DIM_WORKER:
+    case GOMP_DIM_VECTOR:
+      break;
+    default:
+      error ("illegal argument 0 to %s", name);
+      return const0_rtx;
+    }
+
+  if (ignore)
+    return target;
+
+  if (!targetm.have_oacc_dim_size ())
+    {
+      rtx retval;
+      switch (DECL_FUNCTION_CODE (fndecl))
+	{
+	case BUILT_IN_GOACC_PARLEVEL_ID:
+	  retval = const0_rtx;
+	  break;
+	case BUILT_IN_GOACC_PARLEVEL_SIZE:
+	  retval = GEN_INT (1);

In addition to moving these assignments to a single switch,
this one can be fallback_retval = const1_rtx;

Otherwise LGTM for stage1.

Followed up on the comments above.

Retested as before.

Will commit as attached in stage 1.

Thanks,
- Tom
Add __builtin_goacc_parlevel_{id,size}

2018-01-06  Tom de Vries  <tom@codesourcery.com>

	PR libgomp/82428
	* builtins.def (DEF_GOACC_BUILTIN_ONLY): Define.
	* omp-builtins.def (BUILT_IN_GOACC_PARLEVEL_ID)
	(BUILT_IN_GOACC_PARLEVEL_SIZE): New builtin.
	* builtins.c (expand_builtin_goacc_parlevel_id_size): New function.
	(expand_builtin): Call expand_builtin_goacc_parlevel_id_size.
	* doc/extend.texi (Other Builtins): Add __builtin_goacc_parlevel_id and
	__builtin_goacc_parlevel_size.

	* f95-lang.c (DEF_GOACC_BUILTIN_ONLY): Define.

	* c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c: New test.
	* c-c++-common/goacc/builtin-goacc-parlevel-id-size.c: New test.

	* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Use
	__builtin_goacc_parlevel_{id,size}.
	* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-g-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-g-2.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/tile-1.c: Same.

---
 gcc/builtins.c                                     | 71 +++++++++++++++++++
 gcc/builtins.def                                   |  4 ++
 gcc/doc/extend.texi                                | 10 +++
 gcc/fortran/f95-lang.c                             |  4 ++
 gcc/omp-builtins.def                               |  5 ++
 .../goacc/builtin-goacc-parlevel-id-size-2.c       | 37 ++++++++++
 .../goacc/builtin-goacc-parlevel-id-size.c         | 79 ++++++++++++++++++++++
 .../libgomp.oacc-c-c++-common/gang-static-2.c      | 21 +++---
 .../libgomp.oacc-c-c++-common/loop-auto-1.c        | 18 ++---
 .../libgomp.oacc-c-c++-common/loop-dim-default.c   | 14 ++--
 .../testsuite/libgomp.oacc-c-c++-common/loop-g-1.c | 17 ++---
 .../testsuite/libgomp.oacc-c-c++-common/loop-g-2.c | 16 ++---
 .../libgomp.oacc-c-c++-common/loop-gwv-1.c         | 17 +++--
 .../libgomp.oacc-c-c++-common/loop-red-g-1.c       | 16 ++---
 .../libgomp.oacc-c-c++-common/loop-red-gwv-1.c     | 16 ++---
 .../libgomp.oacc-c-c++-common/loop-red-v-1.c       | 16 ++---
 .../libgomp.oacc-c-c++-common/loop-red-v-2.c       | 16 ++---
 .../libgomp.oacc-c-c++-common/loop-red-w-1.c       | 16 ++---
 .../libgomp.oacc-c-c++-common/loop-red-w-2.c       | 16 ++---
 .../libgomp.oacc-c-c++-common/loop-red-wv-1.c      | 12 ++--
 .../testsuite/libgomp.oacc-c-c++-common/loop-v-1.c | 16 ++---
 .../testsuite/libgomp.oacc-c-c++-common/loop-w-1.c | 16 ++---
 .../libgomp.oacc-c-c++-common/loop-wv-1.c          | 16 ++---
 .../libgomp.oacc-c-c++-common/parallel-dims.c      | 19 ++----
 .../libgomp.oacc-c-c++-common/routine-g-1.c        | 18 +++--
 .../libgomp.oacc-c-c++-common/routine-gwv-1.c      | 18 +++--
 .../libgomp.oacc-c-c++-common/routine-v-1.c        | 18 +++--
 .../libgomp.oacc-c-c++-common/routine-w-1.c        | 18 +++--
 .../libgomp.oacc-c-c++-common/routine-wv-1.c       | 18 +++--
 .../libgomp.oacc-c-c++-common/routine-wv-2.c       | 19 ++----
 .../testsuite/libgomp.oacc-c-c++-common/tile-1.c   | 15 ++--
 31 files changed, 382 insertions(+), 230 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 98eb804..fad2f77 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -70,6 +70,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "case-cfn-macros.h"
 #include "gimple-fold.h"
 #include "intl.h"
+#include "gomp-constants.h"
+#include "omp-general.h"
 
 struct target_builtins default_target_builtins;
 #if SWITCHABLE_TARGET
@@ -6602,6 +6604,71 @@ expand_stack_save (void)
   return ret;
 }
 
+/* Emit code to get the openacc gang, worker or vector id or size.  */
+
+static rtx
+expand_builtin_goacc_parlevel_id_size (tree exp, rtx target, int ignore)
+{
+  const char *name;
+  rtx fallback_retval;
+  rtx_insn *(*gen_fn) (rtx, rtx);
+  switch (DECL_FUNCTION_CODE (get_callee_fndecl (exp)))
+    {
+    case BUILT_IN_GOACC_PARLEVEL_ID:
+      name = "__builtin_goacc_parlevel_id";
+      fallback_retval = const0_rtx;
+      gen_fn = targetm.gen_oacc_dim_pos;
+      break;
+    case BUILT_IN_GOACC_PARLEVEL_SIZE:
+      name = "__builtin_goacc_parlevel_size";
+      fallback_retval = const1_rtx;
+      gen_fn = targetm.gen_oacc_dim_size;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (oacc_get_fn_attrib (current_function_decl) == NULL_TREE)
+    {
+      error ("%qs only supported in OpenACC code", name);
+      return const0_rtx;
+    }
+
+  tree arg = CALL_EXPR_ARG (exp, 0);
+  if (TREE_CODE (arg) != INTEGER_CST)
+    {
+      error ("non-constant argument 0 to %qs", name);
+      return const0_rtx;
+    }
+
+  int dim = TREE_INT_CST_LOW (arg);
+  switch (dim)
+    {
+    case GOMP_DIM_GANG:
+    case GOMP_DIM_WORKER:
+    case GOMP_DIM_VECTOR:
+      break;
+    default:
+      error ("illegal argument 0 to %qs", name);
+      return const0_rtx;
+    }
+
+  if (ignore)
+    return target;
+
+  if (!targetm.have_oacc_dim_size ())
+    {
+      emit_move_insn (target, fallback_retval);
+      return target;
+    }
+
+  rtx reg = MEM_P (target) ? gen_reg_rtx (GET_MODE (target)) : target;
+  emit_insn (gen_fn (reg, GEN_INT (dim)));
+  if (reg != target)
+    emit_move_insn (target, reg);
+
+  return target;
+}
 
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient
@@ -7732,6 +7799,10 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
 	 folding.  */
       break;
 
+    case BUILT_IN_GOACC_PARLEVEL_ID:
+    case BUILT_IN_GOACC_PARLEVEL_SIZE:
+      return expand_builtin_goacc_parlevel_id_size (exp, target, ignore);
+
     default:	/* just do library call, if unknown builtin */
       break;
     }
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 671097e..263dfc7ea 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -214,6 +214,10 @@ along with GCC; see the file COPYING3.  If not see
 #define DEF_GOACC_BUILTIN_COMPILER(ENUM, NAME, TYPE, ATTRS) \
   DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE,    \
 	       flag_openacc, true, true, ATTRS, false, true)
+#undef DEF_GOACC_BUILTIN_ONLY
+#define DEF_GOACC_BUILTIN_ONLY(ENUM, NAME, TYPE, ATTRS) \
+  DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, BT_LAST,    \
+	       false, false, true, ATTRS, false, flag_openacc)
 #undef DEF_GOMP_BUILTIN
 #define DEF_GOMP_BUILTIN(ENUM, NAME, TYPE, ATTRS) \
   DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE,    \
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 2a553ad..15093c6 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12335,6 +12335,16 @@ Similar to @code{__builtin_bswap32}, except the argument and return types
 are 64 bit.
 @end deftypefn
 
+@deftypefn {Built-in Function} int __builtin_goacc_parlevel_id (int x)
+Returns the openacc gang, worker or vector id depending on whether @var{x} is
+0, 1 or 2.
+@end deftypefn
+
+@deftypefn {Built-in Function} int __builtin_goacc_parlevel_size (int x)
+Returns the openacc gang, worker or vector size depending on whether @var{x} is
+0, 1 or 2.
+@end deftypefn
+
 @node Target Builtins
 @section Built-in Functions Specific to Particular Target Machines
 
diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c
index dc9a1ae..60a28b8 100644
--- a/gcc/fortran/f95-lang.c
+++ b/gcc/fortran/f95-lang.c
@@ -1202,6 +1202,10 @@ gfc_init_builtin_functions (void)
 #undef DEF_GOACC_BUILTIN_COMPILER
 #define DEF_GOACC_BUILTIN_COMPILER(code, name, type, attr) \
       gfc_define_builtin (name, builtin_types[type], code, name, attr);
+#undef DEF_GOACC_BUILTIN_ONLY
+#define DEF_GOACC_BUILTIN_ONLY(code, name, type, attr) \
+      gfc_define_builtin ("__builtin_" name, builtin_types[type], code, NULL, \
+			  attr);
 #undef DEF_GOMP_BUILTIN
 #define DEF_GOMP_BUILTIN(code, name, type, attr) /* ignore */
 #include "../omp-builtins.def"
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 69b73f4..7bcf971 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -51,6 +51,11 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait",
 DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device",
 			    BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
 
+DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_PARLEVEL_ID, "goacc_parlevel_id",
+			BT_FN_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_PARLEVEL_SIZE, "goacc_parlevel_size",
+			BT_FN_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+
 DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_THREAD_NUM, "omp_get_thread_num",
 		  BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_NUM_THREADS, "omp_get_num_threads",
diff --git a/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c
new file mode 100644
index 0000000..16c7b34
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile }  */
+/* { dg-additional-options "-O2" }  */
+
+#include "../../../../include/gomp-constants.h"
+
+void
+foo (void)
+{
+  __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+  /* { dg-error "'__builtin_goacc_parlevel_id' only supported in OpenACC code" "" { target *-*-* } .-1 } */
+  
+  __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+  /* { dg-error "'__builtin_goacc_parlevel_size' only supported in OpenACC code" "" { target *-*-* } .-1 } */
+}
+
+#pragma acc routine
+void
+foo2 (int arg)
+{
+  __builtin_goacc_parlevel_id (arg);
+  /* { dg-error "non-constant argument 0 to '__builtin_goacc_parlevel_id'" "" { target *-*-* } .-1 } */
+
+  __builtin_goacc_parlevel_size (arg);
+  /* { dg-error "non-constant argument 0 to '__builtin_goacc_parlevel_size'" "" { target *-*-* } .-1 } */
+
+  __builtin_goacc_parlevel_id (-1);
+  /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_id'" "" { target *-*-* } .-1 } */
+
+  __builtin_goacc_parlevel_id (-1);
+  /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_id'" "" { target *-*-* } .-1 } */
+
+  __builtin_goacc_parlevel_size (-1);
+  /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_size'" "" { target *-*-* } .-1 } */
+
+  __builtin_goacc_parlevel_size (3);
+  /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_size'" "" { target *-*-* } .-1 } */
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size.c b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size.c
new file mode 100644
index 0000000..5cda818
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size.c
@@ -0,0 +1,79 @@
+/* { dg-do compile }  */
+/* { dg-additional-options "-O2" }  */
+
+#include "../../../../include/gomp-constants.h"
+
+#pragma acc routine
+int
+foo (void)
+{
+  int res;
+  
+  __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+  __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+  __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
+  __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+  __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+  __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
+
+  res += __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+  res += __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+  res += __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
+  res += __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+  res += __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+  res += __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
+
+  return res;
+}
+
+void
+foo2 (void)
+{
+  int res;
+
+#pragma acc parallel
+  {
+    __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+    __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+    __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
+    __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
+
+    res += __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+    res += __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+    res += __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
+    res += __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    res += __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    res += __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
+  }
+}
+
+void
+foo3 (void)
+{
+  int res;
+
+#pragma acc kernels
+  {
+    __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+    __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+    __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
+    __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
+
+    res += __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+    res += __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+    res += __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
+    res += __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    res += __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    res += __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
+  }
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
index 6de739a..e273a79 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
@@ -1,25 +1,23 @@
-/* { dg-do run { target openacc_nvidia_accel_selected } } */
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <assert.h>
 #include <openacc.h>
+#include <gomp-constants.h>
 
 #define N 100
 
 #define GANG_ID(I)						\
-  (acc_on_device (acc_device_nvidia)				\
-   ? ({unsigned __r;						\
-       __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (__r));	\
-       __r; }) : (I))
+  (acc_on_device (acc_device_not_host)				\
+   ? __builtin_goacc_parlevel_id (GOMP_DIM_GANG)					\
+   : (I))
 
 void
 test_static(int *a, int num_gangs, int sarg)
 {
   int i, j;
 
-  if (sarg == 0)
+  if (acc_on_device (acc_device_host))
+    return;
+
+   if (sarg == 0)
     sarg = 1;
 
   for (i = 0; i < N / sarg; i++)
@@ -32,6 +30,9 @@ test_nonstatic(int *a, int gangs)
 {
   int i, j;
 
+  if (acc_on_device (acc_device_host))
+    return;
+
   for (i = 0; i < N; i+=gangs)
     for (j = 0; j < gangs; j++)
       assert (a[i+j] == i/gangs);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
index 863b6b3..34bc57e 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
@@ -1,11 +1,8 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 /* { dg-additional-options "-fopenacc-dim=32" } */
 
 #include <stdio.h>
 #include <openacc.h>
+#include <gomp-constants.h>
 
 int check (const int *ary, int size, int gp, int wp, int vp)
 {
@@ -79,15 +76,12 @@ static int __attribute__((noinline)) place ()
 {
   int r = 0;
 
-  if (acc_on_device (acc_device_nvidia))
-    {
-      int g = 0, w = 0, v = 0;
+  int g = 0, w = 0, v = 0;
+  g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+  w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+  v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+  r = (g << 16) | (w << 8) | v;
 
-      __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-      __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-      __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
-      r = (g << 16) | (w << 8) | v;
-    }
   return r;
 }
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
index e2b08c3..dd8107c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
@@ -1,25 +1,23 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.  */
-/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
 /* { dg-additional-options "-fopenacc-dim=16:16" } */
 
 #include <openacc.h>
 #include <alloca.h>
 #include <string.h>
 #include <stdio.h>
+#include <gomp-constants.h>
 
 #pragma acc routine
 static int __attribute__ ((noinline)) coord ()
 {
   int res = 0;
 
-  if (acc_on_device (acc_device_nvidia))
+  if (acc_on_device (acc_device_not_host))
     {
-      int g = 0, w = 0, v = 0;
+      int g, w, v;
 
-      __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-      __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-      __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+      g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+      w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+      v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
       res = (1 << 24) | (g << 16) | (w << 8) | v;
     }
   return res;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
index ae1d588..98f02e9 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -20,13 +18,12 @@ int main ()
 #pragma acc loop gang
     for (unsigned ix = 0; ix < N; ix++)
       {
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
-
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    int g, w, v;
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    ary[ix] = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
index c06d861..4152a4e 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -20,13 +18,13 @@ int main ()
 #pragma acc loop gang (static:1)
     for (unsigned ix = 0; ix < N; ix++)
       {
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    ary[ix] = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
index 42b612a..766e578 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -20,13 +18,14 @@ int main ()
 #pragma acc loop gang worker vector
     for (unsigned ix = 0; ix < N; ix++)
       {
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
+
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
 	    ary[ix] = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
index 929e01c..7107502 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -18,13 +16,13 @@ int main ()
       {
 	int val = ix;
 	
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    val = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
index 4ae4b7c..0bec6e1 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -18,13 +16,13 @@ int main ()
       {
 	int val = ix;
 	
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    val = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
index 0556455..da4921d 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 
@@ -19,13 +17,13 @@ int main ()
       {
 	int val = ix;
 	
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    val = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
index 16d8f9f..15e2bc2 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 
@@ -21,13 +19,13 @@ int main ()
       {
 	int val = ix;
 	
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    val = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
index 19021d9..6bbd04f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -18,13 +16,13 @@ int main ()
       {
 	int val = ix;
 	
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    val = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
index f0c9d81..c63a5d4 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -20,13 +18,13 @@ int main ()
       {
 	int val = ix;
 	
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    val = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
index 0fec2dc..6743afa 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
@@ -2,6 +2,8 @@
 /* { dg-additional-options "-O2" } */
 
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -17,13 +19,13 @@ int main ()
       {
 	int val = ix;
 	
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    val = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
index 2974807..6010cd2 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -20,13 +18,13 @@ int main ()
 #pragma acc loop vector
     for (unsigned ix = 0; ix < N; ix++)
       {
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    ary[ix] = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
index 33b6eae..fa6fb91 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -20,13 +18,13 @@ int main ()
 #pragma acc loop worker
     for (unsigned ix = 0; ix < N; ix++)
       {
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    ary[ix] = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
index 578cfad..cd4cc99 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 int main ()
@@ -20,13 +18,13 @@ int main ()
 #pragma acc loop worker vector
     for (unsigned ix = 0; ix < N; ix++)
       {
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    ary[ix] = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index 8308f7c..4a98546 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -3,6 +3,7 @@
 
 #include <limits.h>
 #include <openacc.h>
+#include <gomp-constants.h>
 
 /* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
    not behaving as expected for -O0.  */
@@ -12,11 +13,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
   if (acc_on_device ((int) acc_device_host))
     return 0;
   else if (acc_on_device ((int) acc_device_nvidia))
-    {
-      unsigned int r;
-      asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r));
-      return r;
-    }
+    return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
   else
     __builtin_abort ();
 }
@@ -27,11 +24,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
   if (acc_on_device ((int) acc_device_host))
     return 0;
   else if (acc_on_device ((int) acc_device_nvidia))
-    {
-      unsigned int r;
-      asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r));
-      return r;
-    }
+    return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
   else
     __builtin_abort ();
 }
@@ -42,11 +35,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
   if (acc_on_device ((int) acc_device_host))
     return 0;
   else if (acc_on_device ((int) acc_device_nvidia))
-    {
-      unsigned int r;
-      asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r));
-      return r;
-    }
+    return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
   else
     __builtin_abort ();
 }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
index b6ab713..a164f57 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 
@@ -12,13 +10,13 @@ void __attribute__ ((noinline)) gang (int ary[N])
 #pragma acc loop gang
     for (unsigned ix = 0; ix < N; ix++)
       {
-	if (__builtin_acc_on_device (5))
+	if (acc_on_device (acc_device_not_host))
 	  {
-	    int g = 0, w = 0, v = 0;
+	    int g, w, v;
 
-	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	    ary[ix] = (g << 16) | (w << 8) | v;
 	  }
 	else
@@ -38,7 +36,7 @@ int main ()
   
 #pragma acc parallel num_gangs(32) copy(ary) copy(ondev)
   {
-    ondev = __builtin_acc_on_device (5);
+    ondev = acc_on_device (acc_device_not_host);
     gang (ary);
   }
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
index ace2f49..a97e046 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 
@@ -12,13 +10,13 @@ void __attribute__ ((noinline)) gang (int ary[N])
 #pragma acc loop gang worker vector
   for (unsigned ix = 0; ix < N; ix++)
     {
-      if (__builtin_acc_on_device (5))
+      if (acc_on_device (acc_device_not_host))
 	{
-	  int g = 0, w = 0, v = 0;
+	  int g, w, v;
 
-	  __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	  __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	  __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	  g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	  w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	  v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	  ary[ix] = (g << 16) | (w << 8) | v;
 	}
       else
@@ -38,7 +36,7 @@ int main ()
   
 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
   {
-    ondev = __builtin_acc_on_device (5);
+    ondev = acc_on_device (acc_device_not_host);
     gang (ary);
   }
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
index 2503e8d..b1e3e3a 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 
@@ -12,13 +10,13 @@ void __attribute__ ((noinline)) vector (int ary[N])
 #pragma acc loop vector
   for (unsigned ix = 0; ix < N; ix++)
     {
-      if (__builtin_acc_on_device (5))
+      if (acc_on_device (acc_device_not_host))
 	{
-	  int g = 0, w = 0, v = 0;
+	  int g, w, v;
 
-	  __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	  __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	  __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	  g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	  w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	  v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	  ary[ix] = (g << 16) | (w << 8) | v;
 	}
       else
@@ -38,7 +36,7 @@ int main ()
   
 #pragma acc parallel vector_length(32) copy(ary) copy(ondev)
   {
-    ondev = __builtin_acc_on_device (5);
+    ondev = acc_on_device (acc_device_not_host);
     vector (ary);
   }
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
index 80cd462..81f1e03 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 
@@ -12,13 +10,13 @@ void __attribute__ ((noinline)) worker (int ary[N])
 #pragma acc loop worker
   for (unsigned ix = 0; ix < N; ix++)
     {
-      if (__builtin_acc_on_device (5))
+      if (acc_on_device (acc_device_not_host))
 	{
-	  int g = 0, w = 0, v = 0;
+	  int g, w, v;
 
-	  __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	  __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	  __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	  g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	  w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	  v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	  ary[ix] = (g << 16) | (w << 8) | v;
 	}
       else
@@ -38,7 +36,7 @@ int main ()
   
 #pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
   {
-    ondev = __builtin_acc_on_device (5);
+    ondev = acc_on_device (acc_device_not_host);
     worker (ary);
   }
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
index 5e45fad..23dbc1a 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
 
 #define N (32*32*32+17)
 
@@ -12,13 +10,13 @@ void __attribute__ ((noinline)) worker (int ary[N])
 #pragma acc loop worker vector
   for (unsigned ix = 0; ix < N; ix++)
     {
-      if (__builtin_acc_on_device (5))
+      if (acc_on_device (acc_device_not_host))
 	{
-	  int g = 0, w = 0, v = 0;
+	  int g, w, v;
 
-	  __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-	  __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-	  __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	  g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	  w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	  v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
 	  ary[ix] = (g << 16) | (w << 8) | v;
 	}
       else
@@ -38,7 +36,7 @@ int main ()
   
 #pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
   {
-    ondev = __builtin_acc_on_device (5);
+    ondev = acc_on_device (acc_device_not_host);
     worker (ary);
   }
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
index b5cbc90..8862148 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
@@ -1,9 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 #include <stdio.h>
 #include <openacc.h>
+#include <gomp-constants.h>
 
 #define NUM_WORKERS 16
 #define NUM_VECTORS 32
@@ -11,15 +8,13 @@
 #define HEIGHT 32
 
 #define WORK_ID(I,N)						\
-  (acc_on_device (acc_device_nvidia)				\
-   ? ({unsigned __r;						\
-       __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (__r));	\
-       __r; }) : (I % N))
+  (acc_on_device (acc_device_not_host)				\
+   ? __builtin_goacc_parlevel_id (GOMP_DIM_WORKER)				\
+   : (I % N))
 #define VEC_ID(I,N)						\
-  (acc_on_device (acc_device_nvidia)				\
-   ? ({unsigned __r;						\
-       __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (__r));	\
-       __r; }) : (I % N))
+  (acc_on_device (acc_device_not_host)				\
+   ? __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR)				\
+   : (I % N))
 
 #pragma acc routine worker
 void __attribute__ ((noinline))
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
index 8dcb956..5130591 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
@@ -1,11 +1,8 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
-   not optimized away at -O0, and then confuses the target assembler.
-   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
 /* { dg-additional-options "-fopenacc-dim=32" } */
 
 #include <stdio.h>
 #include <openacc.h>
+#include <gomp-constants.h>
 
 static int check (const int *ary, int size, int gp, int wp, int vp)
 {
@@ -79,13 +76,13 @@ static int __attribute__((noinline)) place ()
 {
   int r = 0;
 
-  if (acc_on_device (acc_device_nvidia))
+  if (acc_on_device (acc_device_not_host))
     {
-      int g = 0, w = 0, v = 0;
+      int g, w, v;
 
-      __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
-      __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
-      __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+      g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+      w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+      v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
       r = (g << 16) | (w << 8) | v;
     }
   return r;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]