This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Bulk] [OpenACC 0/7] host_data construct


Hi,

This a re-posting of the original note incorporating the suggestions
from Joseph and Nathan (thank you).

    This patch adds the processing of OpenACC host_data construct in C
    and C++. (Note: Support in Fortran is already in trunk.) The patch
    also adds the required support in the middle-end and libgomp.

    Background
        The host data construct is used to make an address of device
        data available on the host.

        The following illustrates use of the host data construct in
        conjunction with arrays which are already device-resident
        and an accelerator-only function.

                int main(int argc, char **argv)
                {
                  float *x, *y;
                  const int n = 1024;
                  int i;

                  x = (float*) malloc (n * sizeof(float));
                  y = (float*) malloc (n * sizeof(float));

                  /* Copy the arrays out to the device. */
                  #pragma acc data create(x[0:n]) copyout(y[0:n])
                  {
                    #pragma acc parallel
                    {
                      for (i = 0; i < n; i++)
                        {
                          x[i] = 1.0f;
                          y[i] = 0.0f;
                        }
                    }

                    /*
                     * The arrays are already on the device, so
                     * pass the device addresses to saxpy. NOTE:
                     * saxpy has been previously defined as an
                     * accelerator function.
                     */
                    #pragma acc host_data use_device(x, y)
                    {
                      saxpy(n, 2.0, x, 1, y, 1);
                    }
                  }

                  fprintf(stdout, "y[0] = %f\n", y[0]);
                  return 0;
                }


    C and C++ front-ends

        Definitions for use by C and C++ were added to identify the
        host_data construct pragma and its' only valid clause: use_device.

        New functionality was added to do the parsing of the host_data
        pragma and validate the sole clause valid clause: use_device.
        As the host_data construct has associated with it a structured
        block, new functionality was added to build the compound
        statement to represent the block.

    Middle-end

        A gimple definition: GOVD_USE_DEVICE, has been added to indicate
        the use of the use_device clause. This flag is asserted as part
        of installing mappings into a omp context. The flag is subsequently
        reacted to during the gimplying of the host_data region's body.
        When this flag is encountered, an GOACC_deviceptr builtin call
        is inserted at the appropriate place.

    libgomp

        A new function has been added to handle pointer lookup for host
        data regions. As the comment in the code describes, this function
        will return the appropriate address based on whether it is called
        for the host or the target. This function is used in response to
        usage of the use_device clause.

    Tests

        New compile and runtime tests have been added.

    All of the code is in the gomp-4_0-branch.

    Regtested on x86_64-linux.

    Thanks!
    Jim

Attachment: ChangeLog
Description: Text document

diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index 834a916..b748e2f 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -1214,6 +1214,7 @@ static const struct omp_pragma_def oacc_pragmas[] = {
   { "data", PRAGMA_OACC_DATA },
   { "enter", PRAGMA_OACC_ENTER_DATA },
   { "exit", PRAGMA_OACC_EXIT_DATA },
+  { "host_data", PRAGMA_OACC_HOST_DATA },
   { "kernels", PRAGMA_OACC_KERNELS },
   { "loop", PRAGMA_OACC_LOOP },
   { "parallel", PRAGMA_OACC_PARALLEL },
diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
index cec920f..23a72a3 100644
--- a/gcc/c-family/c-pragma.h
+++ b/gcc/c-family/c-pragma.h
@@ -31,6 +31,7 @@ enum pragma_kind {
   PRAGMA_OACC_DATA,
   PRAGMA_OACC_ENTER_DATA,
   PRAGMA_OACC_EXIT_DATA,
+  PRAGMA_OACC_HOST_DATA,
   PRAGMA_OACC_KERNELS,
   PRAGMA_OACC_LOOP,
   PRAGMA_OACC_PARALLEL,
@@ -161,6 +162,7 @@ enum pragma_omp_clause {
   PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE,
   PRAGMA_OACC_CLAUSE_SELF,
   PRAGMA_OACC_CLAUSE_SEQ,
+  PRAGMA_OACC_CLAUSE_USE_DEVICE,
   PRAGMA_OACC_CLAUSE_VECTOR,
   PRAGMA_OACC_CLAUSE_VECTOR_LENGTH,
   PRAGMA_OACC_CLAUSE_WAIT,
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 704ebc6..ead98b9 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -10116,6 +10116,8 @@ c_parser_omp_clause_name (c_parser *parser)
 	    result = PRAGMA_OMP_CLAUSE_UNTIED;
 	  else if (!strcmp ("use_device_ptr", p))
 	    result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR;
+	  else if (!strcmp ("use_device", p))
+	    result = PRAGMA_OACC_CLAUSE_USE_DEVICE;
 	  break;
 	case 'v':
 	  if (!strcmp ("vector", p))
@@ -11219,6 +11221,15 @@ c_parser_oacc_clause_async (c_parser *parser, tree list)
   return list;
 }
 
+/* OpenACC 2.0:
+   use_device ( variable-list ) */
+
+static tree
+c_parser_oacc_clause_use_device (c_parser *parser, tree list)
+{
+  return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_USE_DEVICE, list);
+}
+
 /* OpenACC:
    wait ( int-expr-list ) */
 
@@ -12474,6 +12485,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask,
 	  clauses = c_parser_oacc_data_clause (parser, c_kind, clauses);
 	  c_name = "self";
 	  break;
+	case PRAGMA_OACC_CLAUSE_USE_DEVICE:
+	  clauses = c_parser_oacc_clause_use_device (parser, clauses);
+	  c_name = "use_device";
+	  break;
 	case PRAGMA_OACC_CLAUSE_VECTOR_LENGTH:
 	  clauses = c_parser_omp_clause_vector_length (parser, clauses);
 	  c_name = "vector_length";
@@ -13003,6 +13018,29 @@ c_parser_oacc_enter_exit_data (c_parser *parser, bool enter)
 
 
 /* OpenACC 2.0:
+   # pragma acc host_data oacc-data-clause[optseq] new-line
+     structured-block
+*/
+
+#define OACC_HOST_DATA_CLAUSE_MASK					\
+	( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) )
+
+static tree
+c_parser_oacc_host_data (location_t loc, c_parser *parser)
+{
+  tree stmt, clauses, block;
+
+  clauses = c_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK,
+				       "#pragma acc host_data");
+
+  block = c_begin_omp_parallel ();
+  add_stmt (c_parser_omp_structured_block (parser));
+  stmt = c_finish_oacc_host_data (loc, clauses, block);
+  return stmt;
+}
+
+
+/* OpenACC 2.0:
 
    # pragma acc loop oacc-loop-clause[optseq] new-line
      structured-block
@@ -16075,6 +16113,9 @@ c_parser_omp_construct (c_parser *parser)
     case PRAGMA_OACC_DATA:
       stmt = c_parser_oacc_data (loc, parser);
       break;
+    case PRAGMA_OACC_HOST_DATA:
+      stmt = c_parser_oacc_host_data (loc, parser);
+      break;
     case PRAGMA_OACC_KERNELS:
       strcpy (p_name, "#pragma acc");
       stmt = c_parser_oacc_kernels (loc, parser, p_name);
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index bee03d3..a9c5975 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -643,6 +643,7 @@ extern tree c_expr_to_decl (tree, bool *, bool *);
 extern tree c_finish_oacc_parallel (location_t, tree, tree);
 extern tree c_finish_oacc_kernels (location_t, tree, tree);
 extern tree c_finish_oacc_data (location_t, tree, tree);
+extern tree c_finish_oacc_host_data (location_t, tree, tree);
 extern tree c_begin_omp_parallel (void);
 extern tree c_finish_omp_parallel (location_t, tree, tree);
 extern tree c_begin_omp_task (void);
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index bc43602..a5e2a4a 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -11510,6 +11510,25 @@ c_finish_oacc_data (location_t loc, tree clauses, tree block)
   return add_stmt (stmt);
 }
 
+/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound
+   statement.  LOC is the location of the OACC_HOST_DATA.  */
+
+tree
+c_finish_oacc_host_data (location_t loc, tree clauses, tree block)
+{
+  tree stmt;
+
+  block = c_end_compound_stmt (loc, block, true);
+
+  stmt = make_node (OACC_HOST_DATA);
+  TREE_TYPE (stmt) = void_type_node;
+  OACC_HOST_DATA_CLAUSES (stmt) = clauses;
+  OACC_HOST_DATA_BODY (stmt) = block;
+  SET_EXPR_LOCATION (stmt, loc);
+
+  return add_stmt (stmt);
+}
+
 /* Like c_begin_compound_stmt, except force the retention of the BLOCK.  */
 
 tree
@@ -12942,6 +12961,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_USE_DEVICE:
 	  pc = &OMP_CLAUSE_CHAIN (c);
 	  continue;
 
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 16db41f..76ece42 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6318,6 +6318,7 @@ extern void finish_omp_threadprivate		(tree);
 extern tree begin_omp_structured_block		(void);
 extern tree finish_omp_structured_block		(tree);
 extern tree finish_oacc_data			(tree, tree);
+extern tree finish_oacc_host_data		(tree, tree);
 extern tree finish_oacc_kernels			(tree, tree);
 extern tree finish_oacc_parallel		(tree, tree);
 extern tree begin_omp_parallel			(void);
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index f07a5e4..714e69c 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -29235,6 +29235,8 @@ cp_parser_omp_clause_name (cp_parser *parser)
 	    result = PRAGMA_OMP_CLAUSE_UNTIED;
 	  else if (!strcmp ("use_device_ptr", p))
 	    result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR;
+	  else if (!strcmp ("use_device", p))
+	    result = PRAGMA_OACC_CLAUSE_USE_DEVICE;
 	  break;
 	case 'v':
 	  if (!strcmp ("vector_length", p))
@@ -31381,6 +31383,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask,
 	  clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses);
 	  c_name = "self";
 	  break;
+	case PRAGMA_OACC_CLAUSE_USE_DEVICE:
+	  clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE,
+					    clauses);
+	  c_name = "use_device";
+	  break;
 	case PRAGMA_OACC_CLAUSE_VECTOR_LENGTH:
 	  clauses = cp_parser_oacc_clause_vector_length (parser, clauses);
 	  c_name = "vector_length";
@@ -34221,6 +34228,30 @@ cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok)
   return stmt;
 }
 
+#define OACC_HOST_DATA_CLAUSE_MASK					\
+  ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) )
+
+/* OpenACC 2.0:
+  # pragma acc host_data <clauses> new-line
+  structured-block  */
+
+static tree
+cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok)
+{
+  tree stmt, clauses, block;
+  unsigned int save;
+
+  clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK,
+					"#pragma acc host_data", pragma_tok);
+
+  block = begin_omp_parallel ();
+  save = cp_parser_begin_omp_structured_block (parser);
+  cp_parser_statement (parser, NULL_TREE, false, NULL);
+  cp_parser_end_omp_structured_block (parser, save);
+  stmt = finish_oacc_host_data (clauses, block);
+  return stmt;
+}
+
 /* OpenACC 2.0:
    # pragma acc enter data oacc-enter-data-clause[optseq] new-line
 
@@ -35288,6 +35319,9 @@ cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok)
     case PRAGMA_OACC_EXIT_DATA:
       stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, false);
       break;
+    case PRAGMA_OACC_HOST_DATA:
+      stmt = cp_parser_oacc_host_data (parser, pragma_tok);
+      break;
     case PRAGMA_OACC_KERNELS:
       stmt = cp_parser_oacc_kernels (parser, pragma_tok);
       break;
@@ -35856,6 +35890,7 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context)
     case PRAGMA_OACC_DATA:
     case PRAGMA_OACC_ENTER_DATA:
     case PRAGMA_OACC_EXIT_DATA:
+    case PRAGMA_OACC_HOST_DATA:
     case PRAGMA_OACC_KERNELS:
     case PRAGMA_OACC_PARALLEL:
     case PRAGMA_OACC_LOOP:
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index c0a8b32..25482e7 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -6689,6 +6689,7 @@ finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE__CILK_FOR_COUNT_:
+	case OMP_CLAUSE_USE_DEVICE:
 	  break;
 
 	case OMP_CLAUSE_INBRANCH:
@@ -7119,6 +7120,24 @@ finish_oacc_data (tree clauses, tree block)
   return add_stmt (stmt);
 }
 
+/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound
+   statement.  */
+
+tree
+finish_oacc_host_data (tree clauses, tree block)
+{
+  tree stmt;
+
+  block = finish_omp_structured_block (block);
+
+  stmt = make_node (OACC_HOST_DATA);
+  TREE_TYPE (stmt) = void_type_node;
+  OACC_HOST_DATA_CLAUSES (stmt) = clauses;
+  OACC_HOST_DATA_BODY (stmt) = block;
+
+  return add_stmt (stmt);
+}
+
 /* Generate OACC_KERNELS, with CLAUSES and BLOCK as its compound
    statement.  LOC is the location of the OACC_KERNELS.  */
 
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index ab9e540..0c32219 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -93,6 +93,8 @@ enum gimplify_omp_var_data
 
   GOVD_MAP_0LEN_ARRAY = 32768,
 
+  GOVD_USE_DEVICE = 65536,
+
   GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE
 			   | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR
 			   | GOVD_LOCAL)
@@ -116,7 +118,9 @@ enum omp_region_type
   ORT_COMBINED_TARGET = 33,
   /* Dummy OpenMP region, used to disable expansion of
      DECL_VALUE_EXPRs in taskloop pre body.  */
-  ORT_NONE = 64
+  ORT_NONE = 64,
+  /* An OpenACC host-data region.  */
+  ORT_HOST_DATA = 128
 };
 
 /* Gimplify hashtable helper.  */
@@ -6338,6 +6342,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		decl = TREE_OPERAND (decl, 0);
 	    }
 	  goto do_add_decl;
+	case OMP_CLAUSE_USE_DEVICE:
+	  flags = GOVD_USE_DEVICE | GOVD_EXPLICIT;
+	  check_non_private = "use_device";
+	  goto do_add;
 	case OMP_CLAUSE_LINEAR:
 	  if (gimplify_expr (&OMP_CLAUSE_LINEAR_STEP (c), pre_p, NULL,
 			     is_gimple_val, fb_rvalue) == GS_ERROR)
@@ -7005,7 +7013,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 	  break;
 
 	case OMP_CLAUSE_DEVICE_RESIDENT:
-	case OMP_CLAUSE_USE_DEVICE:
 	case OMP_CLAUSE_INDEPENDENT:
 	  remove = true;
 	  break;
@@ -7529,6 +7536,127 @@ gimplify_oacc_cache (tree *expr_p, gimple_seq *pre_p)
   *expr_p = NULL_TREE;
 }
 
+static tree
+gimplify_oacc_host_data_1 (tree *tp, int *walk_subtrees,
+			   void *data ATTRIBUTE_UNUSED)
+{
+  splay_tree_node n = NULL;
+  location_t loc = EXPR_LOCATION (*tp);
+
+  switch (TREE_CODE (*tp))
+    {
+    case ADDR_EXPR:
+      {
+	tree decl = TREE_OPERAND (*tp, 0);
+
+	switch (TREE_CODE (decl))
+	  {
+	  case ARRAY_REF:
+	  case ARRAY_RANGE_REF:
+	  case COMPONENT_REF:
+	  case VIEW_CONVERT_EXPR:
+	  case REALPART_EXPR:
+	  case IMAGPART_EXPR:
+	    if (TREE_CODE (TREE_OPERAND (decl, 0)) == VAR_DECL)
+	      n = splay_tree_lookup (gimplify_omp_ctxp->variables,
+				     (splay_tree_key) TREE_OPERAND (decl, 0));
+	    break;
+
+	  case VAR_DECL:
+	    n = splay_tree_lookup (gimplify_omp_ctxp->variables,
+				   (splay_tree_key) decl);
+	    break;
+
+	  default:
+	    ;
+	  }
+
+	if (n != NULL && (n->value & GOVD_USE_DEVICE) != 0)
+	  {
+	    tree t = builtin_decl_explicit (BUILT_IN_GOACC_DEVICEPTR);
+	    *tp = build_call_expr_loc (loc, t, 1, *tp);
+	  }
+
+	*walk_subtrees = 0;
+      }
+      break;
+
+    case VAR_DECL:
+      {
+	tree decl = *tp;
+
+	n = splay_tree_lookup (gimplify_omp_ctxp->variables,
+			       (splay_tree_key) decl);
+
+	if (n != NULL && (n->value & GOVD_USE_DEVICE) != 0)
+	  {
+	    if (!POINTER_TYPE_P (TREE_TYPE (decl)))
+	      return decl;
+
+	    tree t = builtin_decl_explicit (BUILT_IN_GOACC_DEVICEPTR);
+	    *tp = build_call_expr_loc (loc, t, 1, *tp);
+	    *walk_subtrees = 0;
+	  }
+      }
+      break;
+
+    case OACC_PARALLEL:
+    case OACC_KERNELS:
+    case OACC_LOOP:
+      *walk_subtrees = 0;
+      break;
+
+    default:
+      ;
+    }
+
+  return NULL_TREE;
+}
+
+static enum gimplify_status
+gimplify_oacc_host_data (tree *expr_p, gimple_seq *pre_p)
+{
+  tree expr = *expr_p, orig_body;
+  gimple_seq body = NULL;
+
+  gimplify_scan_omp_clauses (&OACC_HOST_DATA_CLAUSES (expr), pre_p,
+			     ORT_HOST_DATA, OACC_HOST_DATA);
+
+  orig_body = OACC_HOST_DATA_BODY (expr);
+
+  /* Perform a pre-pass over the host_data region's body, inserting calls to
+     GOACC_deviceptr where appropriate.  */
+
+  tree ret = walk_tree_without_duplicates (&orig_body,
+					   &gimplify_oacc_host_data_1, 0);
+
+  if (ret)
+    {
+      error_at (EXPR_LOCATION (expr),
+		"undefined use of variable %qE in host_data region",
+		DECL_NAME (ret));
+      gimplify_adjust_omp_clauses (pre_p, &OACC_HOST_DATA_CLAUSES (expr),
+				   OACC_HOST_DATA);
+      return GS_ERROR;
+    }
+
+  push_gimplify_context ();
+
+  gimple *g = gimplify_and_return_first (orig_body, &body);
+
+  if (gimple_code (g) == GIMPLE_BIND)
+    pop_gimplify_context (g);
+  else
+    pop_gimplify_context (NULL);
+
+  gimplify_adjust_omp_clauses (pre_p, &OACC_HOST_DATA_CLAUSES (expr),
+			       OACC_HOST_DATA);
+
+  gimplify_seq_add_stmt (pre_p, g);
+
+  return GS_ALL_DONE;
+}
+
 /* Gimplify the contents of an OMP_PARALLEL statement.  This involves
    gimplification of the body, as well as scanning the body for used
    variables.  We need to do this scan now, because variable-sized
@@ -9595,6 +9723,9 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	  break;
 
 	case OACC_HOST_DATA:
+	  ret = gimplify_oacc_host_data (expr_p, pre_p);
+	  break;
+
 	case OACC_DECLARE:
 	  sorry ("directive not yet implemented");
 	  ret = GS_ALL_DONE;
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index ea9cf0d..9ed075f 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -47,6 +47,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update",
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait",
 		   BT_FN_VOID_INT_INT_VAR,
 		   ATTR_NOTHROW_LIST)
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr",
+		   BT_FN_PTR_PTR, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_THREAD_NUM, "GOACC_get_thread_num",
 		   BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_NUM_THREADS, "GOACC_get_num_threads",
diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-1.c b/gcc/testsuite/c-c++-common/goacc/host_data-1.c
new file mode 100644
index 0000000..521c854
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/host_data-1.c
@@ -0,0 +1,13 @@
+/* Test valid use of host_data directive.  */
+/* { dg-do compile } */
+
+int v0;
+int v1[3][3];
+
+void
+f (void)
+{
+  int v2 = 3;
+#pragma acc host_data use_device(v2, v0, v1)
+  ;
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-2.c b/gcc/testsuite/c-c++-common/goacc/host_data-2.c
new file mode 100644
index 0000000..e5213a0
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/host_data-2.c
@@ -0,0 +1,13 @@
+/* Test invalid use of host_data directive.  */
+/* { dg-do compile } */
+
+int v0;
+#pragma acc host_data use_device(v0) /* { dg-error "expected" } */
+
+void
+f (void)
+{
+  int v2 = 3;
+#pragma acc host_data copy(v2) /* { dg-error "not valid for" } */
+  ;
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-3.c b/gcc/testsuite/c-c++-common/goacc/host_data-3.c
new file mode 100644
index 0000000..f9621c9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/host_data-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+
+int main (int argc, char* argv[])
+{
+  int x = 5, y;
+
+  #pragma acc enter data copyin (x)
+  /* It's not clear what attempts to use non-pointer variables "directly"
+     (rather than merely taking their address) should do in host_data regions. 
+     We choose to make it an error.  */
+  #pragma acc host_data use_device (x) /* TODO { dg-error "" } */
+  {
+    y = x;
+  }
+  #pragma acc exit data delete (x)
+
+  return y - 5;
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-4.c b/gcc/testsuite/c-c++-common/goacc/host_data-4.c
new file mode 100644
index 0000000..3dac5f3
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/host_data-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+
+int main (int argc, char* argv[])
+{
+  int x[100];
+
+  #pragma acc enter data copyin (x)
+  /* Specifying an array index is not valid for host_data/use_device.  */
+  #pragma acc host_data use_device (x[4]) /* { dg-error "expected '\\\)' before '\\\[' token" } */
+    ;
+  #pragma acc exit data delete (x)
+
+  return 0;
+}
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 2153661..2a43a8c 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -378,6 +378,7 @@ GOACC_2.0 {
 	GOACC_wait;
 	GOACC_get_thread_num;
 	GOACC_get_num_threads;
+	GOACC_deviceptr;
 };
 
 GOACC_2.0.1 {
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index af067d6..497ab92 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -204,6 +204,38 @@ acc_deviceptr (void *h)
   return d;
 }
 
+/* This function is used as a helper in generated code to implement pointer
+   lookup in host_data regions.  Unlike acc_deviceptr, it returns its argument
+   unchanged on a shared-memory system (e.g. the host).  */
+
+void *
+GOACC_deviceptr (void *h)
+{
+  splay_tree_key n;
+  void *d;
+  void *offset;
+
+  goacc_lazy_initialize ();
+
+  struct goacc_thread *thr = goacc_thread ();
+
+  if ((thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) == 0)
+    {
+      n = lookup_host (thr->dev, h, 1);
+
+      if (!n)
+	return NULL;
+
+      offset = h - n->host_start;
+
+      d = n->tgt->tgt_start + n->tgt_offset + offset;
+
+      return d;
+    }
+  else
+    return h;
+}
+
 /* Return the host pointer that corresponds to device data D.  Or NULL
    if no mapping.  */
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c
new file mode 100644
index 0000000..15ccb27
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c
@@ -0,0 +1,125 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+
+void
+saxpy_host (int n, float a, float *x, float *y)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    y[i] = y[i] + a * x[i];
+}
+
+#pragma acc routine
+void
+saxpy_target (int n, float a, float *x, float *y)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    y[i] = y[i] + a * x[i];
+}
+
+int
+main(int argc, char **argv)
+{
+  const int N = 8;
+  int i;
+  float *x_ref, *y_ref;
+  float *x, *y;
+  cublasHandle_t h;
+  float a = 2.0;
+
+  x_ref = (float*) malloc (N * sizeof(float));
+  y_ref = (float*) malloc (N * sizeof(float));
+
+  x = (float*) malloc (N * sizeof(float));
+  y = (float*) malloc (N * sizeof(float));
+
+#pragma acc data copyin (x[0:N]) copy (y[0:N])
+  {
+    float *xp, *yp;
+#pragma acc host_data use_device (x, y)
+    {
+#pragma acc parallel pcopy (xp, yp) present (x, y)
+      {
+        xp = x;
+	yp = y;
+      }
+    }
+
+    if (xp != acc_deviceptr (x) || yp != acc_deviceptr (y))
+	abort ();
+  }
+
+  for (i = 0; i < N; i++)
+    {
+      x[i] = x_ref[i] = 4.0 + i;
+      y[i] = y_ref[i] = 3.0;
+    }
+
+  saxpy_host (N, a, x_ref, y_ref);
+
+  cublasCreate (&h);
+
+#pragma acc data copyin (x[0:N]) copy (y[0:N])
+  {
+#pragma acc host_data use_device (x, y)
+    {
+      cublasSaxpy (h, N, &a, x, 1, y, 1);
+    }
+  }
+
+  for (i = 0; i < N; i++)
+    {
+      if (y[i] != y_ref[i])
+        abort ();
+    }
+
+#pragma acc data create (x[0:N]) copyout (y[0:N])
+  {
+#pragma acc kernels
+    for (i = 0; i < N; i++)
+      y[i] = 3.0;
+
+#pragma acc host_data use_device (x, y)
+    {
+      cublasSaxpy (h, N, &a, x, 1, y, 1);
+    }
+  }
+
+  cublasDestroy (h);
+
+  for (i = 0; i < N; i++)
+    {
+      if (y[i] != y_ref[i])
+        abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    y[i] = 3.0;
+
+#pragma acc data copyin (x[0:N]) copyin (a, N) copy (y[0:N])
+  {
+#pragma acc host_data use_device (x, y)
+    {
+#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a, N)
+      saxpy_target (N, a, x, y);
+    }
+  }
+
+  for (i = 0; i < N; i++)
+    {
+      if (y[i] != y_ref[i])
+        abort ();
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c
new file mode 100644
index 0000000..511ec64
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+struct by_lightning {
+  int a;
+  int b;
+  int c;
+};
+
+int main (int argc, char* argv[])
+{
+  int x;
+  void *q = NULL, *r = NULL, *p = NULL, *s = NULL, *t = NULL;
+  long u;
+  struct by_lightning on_the_head = {1, 2, 3};
+  int arr[10], *f = NULL;
+  _Complex float cf;
+  #pragma acc enter data copyin (x, arr, on_the_head, cf)
+  #pragma acc host_data use_device (x, arr, on_the_head, cf)
+  {
+    q = &x;
+    {
+      f = &arr[5];
+      r = f;
+      s = &__real__ cf;
+      t = &on_the_head.c;
+      u = (long) &__imag__ cf;
+      #pragma acc parallel copyout(p) present (x, arr, on_the_head, cf)
+      {
+        /* This will not (and must not) call GOACC_deviceptr, but '&x' will be
+	   the address on the device (if appropriate) regardless.  */
+	p = &x;
+      }
+    }
+  }
+  #pragma acc exit data delete (x)
+
+#if ACC_MEM_SHARED
+  if (q != &x || f != &arr[5] || r != f || s != &(__real__ cf)
+      || t != &on_the_head.c || u != (long) &(__imag__ cf) || p != &x)
+    abort ();
+#else
+  if (q == &x || f == &arr[5] || r != f || s == &(__real__ cf)
+      || t == &on_the_head.c || u == (long) &(__imag__ cf) || p == &x)
+    abort ();
+#endif
+
+  return 0;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]