This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Next set of OpenACC changes: middle end, libgomp


Hi!

On Tue, 05 May 2015 10:54:02 +0200, I wrote:
> In follow-up messages, I'll be posting the separated parts (for easier
> review) of a next set of OpenACC changes that we'd like to commit.
> ChangeLog updates not yet written; will do that before commit, obviously.

 gcc/gimplify.c                                     |   16 +-
 gcc/omp-low.c                                      |   11 +-
 gcc/tree-core.h                                    |   14 +-
 gcc/tree-pretty-print.c                            |    6 +
 gcc/tree.c                                         |   13 +-
 gcc/tree.h                                         |   21 +-
 include/gomp-constants.h                           |    4 +
 libgomp/oacc-mem.c                                 |    3 +
 libgomp/oacc-ptx.h                                 |   28 +
 libgomp/plugin/plugin-nvptx.c                      |   10 +

diff --git gcc/gimplify.c gcc/gimplify.c
index bda62ce..12efdc8 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -6385,6 +6385,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 	case OMP_CLAUSE_MERGEABLE:
 	case OMP_CLAUSE_PROC_BIND:
 	case OMP_CLAUSE_SAFELEN:
+	case OMP_CLAUSE_TILE:
 	  break;
 
 	case OMP_CLAUSE_ALIGNED:
@@ -6770,6 +6771,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, tree *list_p)
 	case OMP_CLAUSE_VECTOR:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_TILE:
 	  break;
 
 	default:
@@ -8410,21 +8412,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	  break;
 
 	case OACC_KERNELS:
-	  if (OACC_KERNELS_COMBINED (*expr_p))
-	    sorry ("directive not yet implemented");
-	  else
-	    gimplify_omp_workshare (expr_p, pre_p);
-	  ret = GS_ALL_DONE;
-	  break;
-
 	case OACC_PARALLEL:
-	  if (OACC_PARALLEL_COMBINED (*expr_p))
-	    sorry ("directive not yet implemented");
-	  else
-	    gimplify_omp_workshare (expr_p, pre_p);
-	  ret = GS_ALL_DONE;
-	  break;
-
 	case OACC_DATA:
 	case OMP_SECTIONS:
 	case OMP_SINGLE:
diff --git gcc/omp-low.c gcc/omp-low.c
index 34e2e5c..6ec5145 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -1928,6 +1928,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_BIND:
+	case OMP_CLAUSE_NOHOST:
+	case OMP_CLAUSE_TILE:
 	  sorry ("Clause not supported yet");
 	  break;
 
@@ -2055,6 +2058,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_BIND:
+	case OMP_CLAUSE_NOHOST:
+	case OMP_CLAUSE_TILE:
 	  sorry ("Clause not supported yet");
 	  break;
 
@@ -2742,7 +2748,10 @@ check_omp_nesting_restrictions (gimple stmt, omp_context *ctx)
     {
       for (omp_context *ctx_ = ctx; ctx_ != NULL; ctx_ = ctx_->outer)
 	if (is_gimple_omp (ctx_->stmt)
-	    && is_gimple_omp_oacc (ctx_->stmt))
+	    && is_gimple_omp_oacc (ctx_->stmt)
+	    /* Except for atomic codes that we share with OpenMP.  */
+	    && ! (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD
+		  || gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE))
 	  {
 	    error_at (gimple_location (stmt),
 		      "non-OpenACC construct inside of OpenACC region");
diff --git gcc/tree-core.h gcc/tree-core.h
index ad1bb23..ffbccda 100644
--- gcc/tree-core.h
+++ gcc/tree-core.h
@@ -390,7 +390,19 @@ enum omp_clause_code {
   OMP_CLAUSE_NUM_WORKERS,
 
   /* OpenACC clause: vector_length (integer-expression).  */
-  OMP_CLAUSE_VECTOR_LENGTH
+  OMP_CLAUSE_VECTOR_LENGTH,
+
+  /* OpenACC clause: bind ( identifer | string ).  */
+  OMP_CLAUSE_BIND,
+
+  /* OpenACC clause: nohost.  */
+  OMP_CLAUSE_NOHOST,
+
+  /* OpenACC clause: tile ( size-expr-list ).  */
+  OMP_CLAUSE_TILE,
+
+  /* OpenACC clause: device_type ( device-type-list).  */
+  OMP_CLAUSE_DEVICE_TYPE
 };
 
 #undef DEFTREESTRUCT
diff --git gcc/tree-pretty-print.c gcc/tree-pretty-print.c
index d7c049f..5eb4daf 100644
--- gcc/tree-pretty-print.c
+++ gcc/tree-pretty-print.c
@@ -799,6 +799,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags)
     case OMP_CLAUSE_INDEPENDENT:
       pp_string (pp, "independent");
       break;
+    case OMP_CLAUSE_TILE:
+      pp_string (pp, "tile(");
+      dump_generic_node (pp, OMP_CLAUSE_TILE_LIST (clause),
+			 spc, flags, false);
+      pp_right_paren (pp);
+      break;
 
     default:
       /* Should never happen.  */
diff --git gcc/tree.c gcc/tree.c
index daf0292..43f80b7 100644
--- gcc/tree.c
+++ gcc/tree.c
@@ -369,6 +369,10 @@ unsigned const char omp_clause_num_ops[] =
   1, /* OMP_CLAUSE_NUM_GANGS  */
   1, /* OMP_CLAUSE_NUM_WORKERS  */
   1, /* OMP_CLAUSE_VECTOR_LENGTH  */
+  1, /* OMP_CLAUSE_BIND  */
+  0, /* OMP_CLAUSE_NOHOST  */
+  1, /* OMP_CLAUSE_TILE */
+  2  /* OMP_CLAUSE_DEVICE_TYPE */
 };
 
 const char * const omp_clause_code_name[] =
@@ -427,7 +431,11 @@ const char * const omp_clause_code_name[] =
   "vector",
   "num_gangs",
   "num_workers",
-  "vector_length"
+  "vector_length",
+  "bind",
+  "nohost",
+  "tile",
+  "device_type"
 };
 
 
@@ -11237,6 +11245,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data,
 	case OMP_CLAUSE__LOOPTEMP_:
 	case OMP_CLAUSE__SIMDUID_:
 	case OMP_CLAUSE__CILK_FOR_COUNT_:
+	case OMP_CLAUSE_BIND:
 	  WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0));
 	  /* FALLTHRU */
 
@@ -11255,6 +11264,8 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data,
 	case OMP_CLAUSE_TASKGROUP:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_NOHOST:
+	case OMP_CLAUSE_TILE:
 	  WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
 
 	case OMP_CLAUSE_LASTPRIVATE:
diff --git gcc/tree.h gcc/tree.h
index e17bd9b..55c5a6d 100644
--- gcc/tree.h
+++ gcc/tree.h
@@ -1312,15 +1312,6 @@ extern void protected_set_expr_location (tree, location_t);
 #define OMP_SECTION_LAST(NODE) \
   (OMP_SECTION_CHECK (NODE)->base.private_flag)
 
-/* True on an OACC_KERNELS statement if is represents combined kernels loop
-   directive.  */
-#define OACC_KERNELS_COMBINED(NODE) \
-  (OACC_KERNELS_CHECK (NODE)->base.private_flag)
-
-/* Like OACC_KERNELS_COMBINED, but for parallel loop directive.  */
-#define OACC_PARALLEL_COMBINED(NODE) \
-  (OACC_PARALLEL_CHECK (NODE)->base.private_flag)
-
 /* True on an OMP_PARALLEL statement if it represents an explicit
    combined parallel work-sharing constructs.  */
 #define OMP_PARALLEL_COMBINED(NODE) \
@@ -1391,6 +1382,9 @@ extern void protected_set_expr_location (tree, location_t);
 #define OMP_CLAUSE_VECTOR_LENGTH_EXPR(NODE) \
   OMP_CLAUSE_OPERAND ( \
     OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_VECTOR_LENGTH), 0)
+#define OMP_CLAUSE_BIND_NAME(NODE) \
+  OMP_CLAUSE_OPERAND ( \
+    OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_BIND), 0)
 
 #define OMP_CLAUSE_DEPEND_KIND(NODE) \
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEPEND)->omp_clause.subcode.depend_kind)
@@ -1495,6 +1489,15 @@ extern void protected_set_expr_location (tree, location_t);
 #define OMP_CLAUSE_DEFAULT_KIND(NODE) \
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEFAULT)->omp_clause.subcode.default_kind)
 
+#define OMP_CLAUSE_TILE_LIST(NODE) \
+  OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 0)
+
+#define OMP_CLAUSE_DEVICE_TYPE_DEVICES(NODE) \
+  OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE_TYPE), 0)
+
+#define OMP_CLAUSE_DEVICE_TYPE_CLAUSES(NODE) \
+  OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE_TYPE), 1)
+
 /* SSA_NAME accessors.  */
 
 /* Returns the IDENTIFIER_NODE giving the SSA name a name or NULL_TREE
diff --git include/gomp-constants.h include/gomp-constants.h
index e3d2820..45370b8 100644
--- include/gomp-constants.h
+++ include/gomp-constants.h
@@ -70,6 +70,10 @@ enum gomp_map_kind
     /* Is a device pointer.  OMP_CLAUSE_SIZE for these is unused; is implicitly
        POINTER_SIZE_UNITS.  */
     GOMP_MAP_FORCE_DEVICEPTR =		(GOMP_MAP_FLAG_SPECIAL_1 | 0),
+    /* OpenACC device_resident.  */
+    GOMP_MAP_DEVICE_RESIDENT =		(GOMP_MAP_FLAG_SPECIAL_1 | 1),
+    /* OpenACC link.  */
+    GOMP_MAP_LINK =			(GOMP_MAP_FLAG_SPECIAL_1 | 2),
     /* Allocate.  */
     GOMP_MAP_FORCE_ALLOC =		(GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC),
     /* ..., and copy to device.  */
diff --git libgomp/oacc-mem.c libgomp/oacc-mem.c
index 89ef5fc..0164b3d 100644
--- libgomp/oacc-mem.c
+++ libgomp/oacc-mem.c
@@ -479,6 +479,9 @@ update_dev_host (int is_dev, void *h, size_t s)
 {
   splay_tree_key n;
   void *d;
+
+  goacc_lazy_initialize ();
+
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
diff --git libgomp/oacc-ptx.h libgomp/oacc-ptx.h
index 2419a46..104f297 100644
--- libgomp/oacc-ptx.h
+++ libgomp/oacc-ptx.h
@@ -424,3 +424,31 @@
   "st.param.u32 [%out_retval],%retval;\n"				\
   "ret;\n"								\
   "}\n"
+
+ #define GOMP_ATOMIC_PTX \
+  ".version 3.1\n" \
+  ".target sm_30\n" \
+  ".address_size 64\n" \
+  ".global .align 4 .u32 libgomp_ptx_lock;\n" \
+  ".visible .func GOMP_atomic_start;\n" \
+  ".visible .func GOMP_atomic_start\n" \
+  "{\n" \
+  "  .reg .pred    %p<2>;\n" \
+  "  .reg .s32     %r<2>;\n" \
+  "  .reg .s64     %rd<2>;\n" \
+  "BB5_1:\n" \
+  "  mov.u64       %rd1, libgomp_ptx_lock;\n" \
+  "  atom.global.cas.b32   %r1, [%rd1], 0, 1;\n" \
+  "  setp.ne.s32   %p1, %r1, 0;\n" \
+  "  @%p1 bra      BB5_1;\n" \
+  "  ret;\n" \
+  "}\n" \
+  ".visible .func GOMP_atomic_end;\n" \
+  ".visible .func GOMP_atomic_end\n" \
+  "{\n" \
+  "  .reg .s32     %r<2>;\n" \
+  "  .reg .s64     %rd<2>;\n" \
+  "  mov.u64       %rd1, libgomp_ptx_lock;\n" \
+  "  atom.global.exch.b32  %r1, [%rd1], 0;\n" \
+  "  ret;\n" \
+  "}\n"
diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c
index 583ec87..ad1163d 100644
--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -863,6 +863,16 @@ link_ptx (CUmodule *module, char *ptx_code)
 			 cuda_error (r));
     }
 
+  char *gomp_atomic_ptx = GOMP_ATOMIC_PTX;
+  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, gomp_atomic_ptx,
+		     strlen (gomp_atomic_ptx) + 1, 0, 0, 0, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+      GOMP_PLUGIN_fatal ("cuLinkAddData (gomp_atomic_ptx) error: %s",
+			 cuda_error (r));
+    }
+
   r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code,
               strlen (ptx_code) + 1, 0, 0, 0, 0);
   if (r != CUDA_SUCCESS)


GrÃÃe,
 Thomas

Attachment: pgp6DQsIquQal.pgp
Description: PGP signature


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]