This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] internal fns for id & nid


I've committed this to gomp4 branch. It replaces the regular builtins __builtin_GOACC_nid/__builtin_GOACC_id with internal functions IFN_OACC_DIM_SIZE and IFN_OACC_DIM_POS -- moving further away from the PTX-specific naming of id & nid. These functions should never turn into library calls or be accessible by the user.

A later patch will optimize the OACC_DIM_SIZE function in the oacc-xform pass.

nathan
2015-08-03  Nathan Sidwell  <nathan@codesourcery.com>

	gcc/
	* internal-fn.def (GOACC_DIM_SIZE, GOACC_DFIM_POS): New.
	* internal-fn.c (expand_GOACC_DIM_SIZE, expand_GOACC_DIM_POS): New.
	* config/nvptx.md (UNSPEC_NID, UNSPEC_ID): Rename to ...
	(UNSPEC_DIM_SIZE, UNSPEC_DIM_POS): ... here.
	(oacc_nid, oacc_id): Rename to ...
	(oacc_dim_size, oacc_dim_pos): ... here. Adjust.
	* config/nvptx.c (nvptx_single): Adjust.
	* omp-low.c (expand_oacc_get_num_threads,
	expand_oacc_get_thread_num, oacc_init_count_vars): Use new
	internal builtins.
	* omp-builtins.def (BUILT_IN_GOACC_ID, BUILT_IN_GOACC_NID): Delete.
	* builtins.c (expand_oacc_id): Delete.
	(expand_builtin, is_simpe_biltin): Adjust.

	libgomp/
	* testuite/libgomp.oacc-c-c++-common/gang-static-2.c: Use asm insert.

Index: gcc/internal-fn.def
===================================================================
--- gcc/internal-fn.def	(revision 226515)
+++ gcc/internal-fn.def	(working copy)
@@ -66,3 +66,5 @@ DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | E
 DEF_INTERNAL_FN (GOACC_DATA_END_WITH_ARG, ECF_NOTHROW, ".r")
 DEF_INTERNAL_FN (GOACC_FORK, ECF_NOTHROW | ECF_LEAF, ".")
 DEF_INTERNAL_FN (GOACC_JOIN, ECF_NOTHROW | ECF_LEAF, ".")
+DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".")
+DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_NOTHROW | ECF_LEAF, ".")
Index: gcc/omp-builtins.def
===================================================================
--- gcc/omp-builtins.def	(revision 226515)
+++ gcc/omp-builtins.def	(working copy)
@@ -58,10 +58,6 @@ DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait",
 		   BT_FN_VOID_INT_INT_VAR,
 		   ATTR_NOTHROW_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ID, "GOACC_id",
-		   BT_FN_UINT_UINT, ATTR_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NID, "GOACC_nid",
-		   BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_GANGLOCAL_PTR, "GOACC_get_ganglocal_ptr",
 		   BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr",
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md	(revision 226515)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -49,7 +49,7 @@
 
    UNSPEC_ALLOCA
 
-   UNSPEC_NID
+   UNSPEC_DIM_SIZE
 
    UNSPEC_SHARED_DATA
 
@@ -65,7 +65,7 @@
    UNSPECV_CAS
    UNSPECV_XCHG
    UNSPECV_BARSYNC
-   UNSPECV_ID
+   UNSPECV_DIM_POS
 
    UNSPECV_FORK
    UNSPECV_FORKED
@@ -1335,9 +1335,10 @@
   DONE;
 })
 
-(define_insn "oacc_nid"
+(define_insn "oacc_dim_size"
   [(set (match_operand:SI 0 "nvptx_register_operand" "")
-	(unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NID))]
+	(unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
+		   UNSPEC_DIM_SIZE))]
   ""
 {
   static const char *const asms[] =
@@ -1349,10 +1350,10 @@
   return asms[INTVAL (operands[1])];
 })
 
-(define_insn "oacc_id"
+(define_insn "oacc_dim_pos"
   [(set (match_operand:SI 0 "nvptx_register_operand" "")
 	(unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
-			UNSPECV_ID))]
+			    UNSPECV_DIM_POS))]
   ""
 {
   static const char *const asms[] =
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 226515)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -2771,7 +2771,7 @@ nvptx_single (unsigned mask, basic_block
 	rtx pred = gen_reg_rtx (BImode);
 	rtx_code_label *label = gen_label_rtx ();
 
-	emit_insn_before (gen_oacc_id (id, GEN_INT (mode)), head);
+	emit_insn_before (gen_oacc_dim_pos (id, GEN_INT (mode)), head);
 	rtx cond = gen_rtx_SET (pred, gen_rtx_NE (BImode, id, const0_rtx));
 	emit_insn_before (cond, head);
 	rtx br;
Index: gcc/internal-fn.c
===================================================================
--- gcc/internal-fn.c	(revision 226515)
+++ gcc/internal-fn.c	(working copy)
@@ -1984,6 +1984,42 @@ expand_GOACC_JOIN (gcall *stmt ATTRIBUTE
 #endif
 }
 
+static void
+expand_GOACC_DIM_SIZE (gcall *stmt)
+{
+  tree lhs = gimple_call_lhs (stmt);
+
+  if (!lhs)
+    return;
+  
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  rtx val = expand_expr (gimple_call_arg (stmt, 0), NULL_RTX,
+			 VOIDmode, EXPAND_NORMAL);
+#ifdef HAVE_oacc_dim_size
+  emit_insn (gen_oacc_dim_size (target, val));
+#else
+  emit_move_insn (target, const1_rtx);
+#endif
+}
+
+static void
+expand_GOACC_DIM_POS (gcall *stmt)
+{
+  tree lhs = gimple_call_lhs (stmt);
+
+  if (!lhs)
+    return;
+  
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  rtx val = expand_expr (gimple_call_arg (stmt, 0), NULL_RTX,
+			 VOIDmode, EXPAND_NORMAL);
+#ifdef HAVE_oacc_dim_pos
+  emit_insn (gen_oacc_dim_pos (target, val));
+#else
+  emit_move_insn (target, const0_rtx);
+#endif
+}
+
 /* Routines to expand each internal function, indexed by function number.
    Each routine has the prototype:
 
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 226515)
+++ gcc/builtins.c	(working copy)
@@ -5921,59 +5921,6 @@ expand_builtin_acc_on_device (tree exp,
   return target;
 }
 
-/* Expand a thread-id/thread-count builtin for OpenACC.  */
-
-static rtx
-expand_oacc_id (enum built_in_function fcode, tree exp, rtx target)
-{
-  tree arg0 = CALL_EXPR_ARG (exp, 0);
-  rtx result = const0_rtx;
-  rtx arg;
-
-  arg = expand_normal (arg0);
-
-  if (GET_CODE (arg) != CONST_INT || UINTVAL (arg) >= GOMP_DIM_MAX)
-    {
-      error ("argument to %D must be constant in range 0 to %d",
-	     get_callee_fndecl (exp), GOMP_DIM_MAX - 1);
-      return result;
-    }
-
-  enum insn_code icode = CODE_FOR_nothing;
-  switch (fcode)
-    {
-    case BUILT_IN_GOACC_NID:
-#ifdef HAVE_oacc_nid
-      icode = CODE_FOR_oacc_nid;
-#endif
-      result = const1_rtx;
-      break;
-    case BUILT_IN_GOACC_ID:
-#ifdef HAVE_oacc_id
-      icode = CODE_FOR_oacc_id;
-#endif
-      break;
-    default:
-      gcc_unreachable ();
-      break;
-    }
-
-  if (icode != CODE_FOR_nothing)
-    {
-      machine_mode mode = insn_data[icode].operand[0].mode;
-      rtx tmp = target;
-      if (!REG_P (tmp) || GET_MODE (tmp) != mode)
-	tmp = gen_reg_rtx (mode);
-      rtx insn = GEN_FCN (icode) (tmp, arg);
-      if (insn != NULL_RTX)
-	{
-	  emit_insn (insn);
-	  return tmp;
-	}
-    }
-  return result;
-}
-
 static rtx
 expand_oacc_ganglocal_ptr (rtx target ATTRIBUTE_UNUSED)
 {
@@ -7135,10 +7082,6 @@ expand_builtin (tree exp, rtx target, rt
 	return target;
       break;
 
-    case BUILT_IN_GOACC_ID:
-    case BUILT_IN_GOACC_NID:
-      return expand_oacc_id (fcode, exp, target);
-
     case BUILT_IN_GOACC_GET_GANGLOCAL_PTR:
       target = expand_oacc_ganglocal_ptr (target);
       if (target)
@@ -12497,8 +12440,6 @@ is_simple_builtin (tree decl)
       case BUILT_IN_EH_FILTER:
       case BUILT_IN_EH_POINTER:
       case BUILT_IN_EH_COPY_VALUES:
-	/* Just a special register read.  */
-      case BUILT_IN_GOACC_NID:
 	return true;
 
       default:
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 226515)
+++ gcc/omp-low.c	(working copy)
@@ -4676,7 +4676,6 @@ static tree
 expand_oacc_get_num_threads (gimple_seq *seq, int gwv_bits)
 {
   tree res = build_int_cst (unsigned_type_node, 1);
-  tree  decl = builtin_decl_explicit (BUILT_IN_GOACC_NID);
   unsigned ix;
 
   for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
@@ -4684,7 +4683,7 @@ expand_oacc_get_num_threads (gimple_seq
       {
 	tree arg = build_int_cst (unsigned_type_node, ix);
 	tree count = create_tmp_var (unsigned_type_node);
-	gimple call = gimple_build_call (decl, 1, arg);
+	gimple call = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
 	
 	gimple_call_set_lhs (call, count);
 	gimple_seq_add_stmt (seq, call);
@@ -4702,8 +4701,6 @@ static tree
 expand_oacc_get_thread_num (gimple_seq *seq, int gwv_bits)
 {
   tree res = NULL_TREE;
-  tree id_decl = builtin_decl_explicit (BUILT_IN_GOACC_ID);
-  tree nid_decl = builtin_decl_explicit (BUILT_IN_GOACC_NID);
   unsigned ix;
 
   /* Start at gang level, and examine relevant dimension indices.  */
@@ -4717,7 +4714,8 @@ expand_oacc_get_thread_num (gimple_seq *
 	    /* We had an outer index, so scale that by the size of
 	       this dimension.  */
 	    tree n = create_tmp_var (unsigned_type_node);
-	    gimple call = gimple_build_call (nid_decl, 1, arg);
+	    gimple call
+	      = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
 	    
 	    gimple_call_set_lhs (call, n);
 	    gimple_seq_add_stmt (seq, call);
@@ -4726,7 +4724,7 @@ expand_oacc_get_thread_num (gimple_seq *
 
 	/* Determine index in this dimension.  */
 	tree id = create_tmp_var (unsigned_type_node);
-	gimple call = gimple_build_call (id_decl, 1, arg);
+	gimple call = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
 	
 	gimple_call_set_lhs (call, id);
 	gimple_seq_add_stmt (seq, call);
@@ -11671,8 +11669,6 @@ lower_omp_taskreg (gimple_stmt_iterator
 static void
 oacc_init_count_vars (omp_context *ctx, tree clauses ATTRIBUTE_UNUSED)
 {
-  tree getid = builtin_decl_explicit (BUILT_IN_GOACC_ID);
-  tree getnid = builtin_decl_explicit (BUILT_IN_GOACC_NID);
   tree worker_var, worker_count;
   
   if (ctx->gwv_this & GOMP_DIM_MASK (GOMP_DIM_WORKER))
@@ -11682,11 +11678,11 @@ oacc_init_count_vars (omp_context *ctx,
       worker_var = create_tmp_var (unsigned_type_node, ".worker");
       worker_count = create_tmp_var (unsigned_type_node, ".workercount");
       
-      gimple call1 = gimple_build_call (getid, 1, arg);
+      gimple call1 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
       gimple_call_set_lhs (call1, worker_var);
       gimple_seq_add_stmt (&ctx->ganglocal_init, call1);
 
-      gimple call2 = gimple_build_call (getnid, 1, arg);
+      gimple call2 = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
       gimple_call_set_lhs (call2, worker_count);
       gimple_seq_add_stmt (&ctx->ganglocal_init, call2);
     }
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c	(revision 226515)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c	(working copy)
@@ -1,9 +1,17 @@
 /* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-O2" } */
 
 #include <assert.h>
+#include <openacc.h>
 
 #define N 100
 
+#define GANG_ID(I)						\
+  (acc_on_device (acc_device_nvidia)				\
+   ? ({unsigned __r;						\
+       __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (__r));	\
+       __r; }) : (I))
+
 int
 test_static(int *a, int num_gangs, int sarg)
 {
@@ -35,38 +43,38 @@ main ()
 
 #pragma acc parallel loop gang (static:*) num_gangs (10)
   for (i = 0; i < 100; i++)
-    a[i] = __builtin_GOACC_id (0);
+    a[i] = GANG_ID (i);
 
   test_nonstatic (a, 10);
 
 #pragma acc parallel loop gang (static:1) num_gangs (10)
   for (i = 0; i < 100; i++)
-    a[i] = __builtin_GOACC_id (0);
+    a[i] = GANG_ID (i);
 
   test_static (a, 10, 1);
 
 #pragma acc parallel loop gang (static:2) num_gangs (10)
   for (i = 0; i < 100; i++)
-    a[i] = __builtin_GOACC_id (0);
+    a[i] = GANG_ID (i);
 
   test_static (a, 10, 2);
 
 #pragma acc parallel loop gang (static:5) num_gangs (10)
   for (i = 0; i < 100; i++)
-    a[i] = __builtin_GOACC_id (0);
+    a[i] = GANG_ID (i);
 
   test_static (a, 10, 5);
 
 #pragma acc parallel loop gang (static:20) num_gangs (10)
   for (i = 0; i < 100; i++)
-    a[i] = __builtin_GOACC_id (0);
+    a[i] = GANG_ID (i);
 
   test_static (a, 10, 20);
 
   /* Non-static gang.  */
 #pragma acc parallel loop gang num_gangs (10)
   for (i = 0; i < 100; i++)
-    a[i] = __builtin_GOACC_id (0);
+    a[i] = GANG_ID (i);
 
   test_nonstatic (a, 10);
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]