This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, OpenACC] Add support for gang local storage allocation in shared memory


This patch adds support for placing gang-private variables in NVPTX
per-CU shared memory. This is done by marking up addressable variables
declared at the appropriate parallelism level with an attribute ("oacc
gangprivate") in omp-low.c.

Target-dependent code in the NVPTX backend then modifies the symbol
associated with the variable at expand time via a new target hook
(TARGET_GOACC_EXPAND_ACCEL_VAR) in order to place it in shared memory,
which is faster to access than the ".local" memory that would otherwise
be used for such variables. This has (theoretical, at least)
consequences on program semantics, in that the shared memory is also
statically-allocated rather than obeying stack discipline -- but you
can't have recursive routine calls in OpenACC anyway, so that's no big
deal.

Other targets can use the same attribute in different ways, as
appropriate.

OK for trunk?

Thanks,

Julian

2018-08-10  Julian Brown  <julian@codesourcery.com>
            Chung-Lin Tang  <cltang@codesourcery.com>

        gcc/
        * config/nvptx/nvptx.c (tree-hash-traits.h): Include.
        (gangprivate_shared_size): New global variable.
        (gangprivate_shared_align): Likewise.
        (gangprivate_shared_sym): Likewise.
        (gangprivate_shared_hmap): Likewise.
        (nvptx_option_override): Initialize gangprivate_shared_sym,
        gangprivate_shared_align.
        (nvptx_file_end): Output gangprivate_shared_sym.
        (nvptx_goacc_expand_accel_var): New function.
        (nvptx_set_current_function): New function.
        (TARGET_SET_CURRENT_FUNCTION): Define hook.
        (TARGET_GOACC_EXPAND_ACCEL): Likewise.
        * doc/tm.texi (TARGET_GOACC_EXPAND_ACCEL_VAR): Document new hook.
        * doc/tm.texi.in (TARGET_GOACC_EXPAND_ACCEL_VAR): Likewise.
        * expr.c (expand_expr_real_1): Remap decls marked with the
        "oacc gangprivate" atttribute.
        * omp-low.c (omp_context): Add oacc_partitioning_level and oacc_decls
        fields.
        (new_omp_context): Initialize oacc_decls in new omp_context.
        (delete_omp_context): Delete oacc_decls in old omp_context.
        (lower_oacc_head_tail): Record partitioning-level count in omp context.
        (oacc_record_private_var_clauses, oacc_record_vars_in_bind)
        (mark_oacc_gangprivate): New functions.
        (lower_omp_for): Call oacc_record_private_var_clauses with "for"
        clauses.  Call mark_oacc_gangprivate for gang-partitioned loops.
        (lower_omp_target): Call oacc_record_private_var_clauses with "target"
        clauses.
        Call mark_oacc_gangprivate for offloaded target regions.
        (lower_omp_1): Call vars_in_bind for GIMPLE_BIND within OMP regions.
        * target.def (expand_accel_var): New hook.

        libgomp/
        * testsuite/libgomp.oacc-c-c++-common/gang-private-1.c: New test.
        * testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: New test.
        * testsuite/libgomp.oacc-c/pr85465.c: New test.
commit 9637e7ea887e100f35d99b8d12101f9f8a9b94e3
Author: Julian Brown <julian@codesourcery.com>
Date:   Thu Aug 9 20:27:04 2018 -0700

    [OpenACC] Add support for gang local storage allocation in shared memory
    
    2018-08-10  Julian Brown  <julian@codesourcery.com>
    	    Chung-Lin Tang  <cltang@codesourcery.com>
    
    	gcc/
    	* config/nvptx/nvptx.c (tree-hash-traits.h): Include.
    	(gangprivate_shared_size): New global variable.
    	(gangprivate_shared_align): Likewise.
    	(gangprivate_shared_sym): Likewise.
    	(gangprivate_shared_hmap): Likewise.
    	(nvptx_option_override): Initialize gangprivate_shared_sym,
    	gangprivate_shared_align.
    	(nvptx_file_end): Output gangprivate_shared_sym.
    	(nvptx_goacc_expand_accel_var): New function.
    	(nvptx_set_current_function): New function.
    	(TARGET_SET_CURRENT_FUNCTION): Define hook.
    	(TARGET_GOACC_EXPAND_ACCEL): Likewise.
    	* doc/tm.texi (TARGET_GOACC_EXPAND_ACCEL_VAR): Document new hook.
    	* doc/tm.texi.in (TARGET_GOACC_EXPAND_ACCEL_VAR): Likewise.
    	* expr.c (expand_expr_real_1): Remap decls marked with the
    	"oacc gangprivate" atttribute.
    	* omp-low.c (omp_context): Add oacc_partitioning_level and oacc_decls
    	fields.
    	(new_omp_context): Initialize oacc_decls in new omp_context.
    	(delete_omp_context): Delete oacc_decls in old omp_context.
    	(lower_oacc_head_tail): Record partitioning-level count in omp context.
    	(oacc_record_private_var_clauses, oacc_record_vars_in_bind)
    	(mark_oacc_gangprivate): New functions.
    	(lower_omp_for): Call oacc_record_private_var_clauses with "for"
    	clauses.  Call mark_oacc_gangprivate for gang-partitioned loops.
    	(lower_omp_target): Call oacc_record_private_var_clauses with "target"
    	clauses.
    	Call mark_oacc_gangprivate for offloaded target regions.
    	(lower_omp_1): Call vars_in_bind for GIMPLE_BIND within OMP regions.
    	* target.def (expand_accel_var): New hook.
    
    	libgomp/
    	* testsuite/libgomp.oacc-c-c++-common/gang-private-1.c: New test.
    	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: New test.
    	* testsuite/libgomp.oacc-c/pr85465.c: New test.

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index c0b0a2e..14eb842 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -73,6 +73,7 @@
 #include "cfgloop.h"
 #include "fold-const.h"
 #include "intl.h"
+#include "tree-hash-traits.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -137,6 +138,12 @@ static unsigned worker_red_size;
 static unsigned worker_red_align;
 static GTY(()) rtx worker_red_sym;
 
+/* Shared memory block for gang-private variables.  */
+static unsigned gangprivate_shared_size;
+static unsigned gangprivate_shared_align;
+static GTY(()) rtx gangprivate_shared_sym;
+static hash_map<tree_decl_hash, unsigned int> gangprivate_shared_hmap;
+
 /* Global lock variable, needed for 128bit worker & gang reductions.  */
 static GTY(()) tree global_lock_var;
 
@@ -210,6 +217,10 @@ nvptx_option_override (void)
   SET_SYMBOL_DATA_AREA (worker_red_sym, DATA_AREA_SHARED);
   worker_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
 
+  gangprivate_shared_sym = gen_rtx_SYMBOL_REF (Pmode, "__gangprivate_shared");
+  SET_SYMBOL_DATA_AREA (gangprivate_shared_sym, DATA_AREA_SHARED);
+  gangprivate_shared_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
+
   diagnose_openacc_conflict (TARGET_GOMP, "-mgomp");
   diagnose_openacc_conflict (TARGET_SOFT_STACK, "-msoft-stack");
   diagnose_openacc_conflict (TARGET_UNIFORM_SIMT, "-muniform-simt");
@@ -4968,6 +4979,10 @@ nvptx_file_end (void)
     write_worker_buffer (asm_out_file, worker_red_sym,
 			 worker_red_align, worker_red_size);
 
+  if (gangprivate_shared_size)
+    write_worker_buffer (asm_out_file, gangprivate_shared_sym,
+			 gangprivate_shared_align, gangprivate_shared_size);
+
   if (need_softstack_decl)
     {
       write_var_marker (asm_out_file, false, true, "__nvptx_stacks");
@@ -5915,6 +5930,47 @@ nvptx_can_change_mode_class (machine_mode, machine_mode, reg_class_t)
   return false;
 }
 
+static rtx
+nvptx_goacc_expand_accel_var (tree var)
+{
+  if (TREE_CODE (var) == VAR_DECL
+      && lookup_attribute ("oacc gangprivate", DECL_ATTRIBUTES (var)))
+    {
+      unsigned int offset, *poffset;
+      poffset = gangprivate_shared_hmap.get (var);
+      if (poffset)
+	offset = *poffset;
+      else
+	{
+	  unsigned HOST_WIDE_INT align = DECL_ALIGN (var);
+	  gangprivate_shared_size =
+	    (gangprivate_shared_size + align - 1) & ~(align - 1);
+	  if (gangprivate_shared_align < align)
+	    gangprivate_shared_align = align;
+
+	  offset = gangprivate_shared_size;
+	  bool existed = gangprivate_shared_hmap.put (var, offset);
+	  gcc_assert (!existed);
+	  gangprivate_shared_size += tree_to_uhwi (DECL_SIZE_UNIT (var));
+	}
+      rtx addr = plus_constant (Pmode, gangprivate_shared_sym, offset);
+      return gen_rtx_MEM (TYPE_MODE (TREE_TYPE (var)), addr);
+    }
+  return NULL_RTX;
+}
+
+static GTY(()) tree nvptx_previous_fndecl;
+
+static void
+nvptx_set_current_function (tree fndecl)
+{
+  if (!fndecl || fndecl == nvptx_previous_fndecl)
+    return;
+
+  gangprivate_shared_hmap.empty ();
+  nvptx_previous_fndecl = fndecl;
+}
+
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE nvptx_option_override
 
@@ -6051,6 +6107,12 @@ nvptx_can_change_mode_class (machine_mode, machine_mode, reg_class_t)
 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
+#undef TARGET_GOACC_EXPAND_ACCEL_VAR
+#define TARGET_GOACC_EXPAND_ACCEL_VAR nvptx_goacc_expand_accel_var
+
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION nvptx_set_current_function
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-nvptx.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index a40f45a..fb87f67 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6064,6 +6064,14 @@ like @code{cond_add@var{m}}.  The default implementation returns a zero
 constant of type @var{type}.
 @end deftypefn
 
+@deftypefn {Target Hook} rtx TARGET_GOACC_EXPAND_ACCEL_VAR (tree @var{var})
+This hook, if defined, is used by accelerator target back-ends to expand
+specially handled kinds of VAR_DECL expressions.  A particular use is to
+place variables with specific attributes inside special accelarator
+memories.  A return value of NULL indicates that the target does not
+handle this VAR_DECL, and normal RTL expanding is resumed.
+@end deftypefn
+
 @node Anchored Addresses
 @section Anchored Addresses
 @cindex anchored addresses
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 39a214e..beace61 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4151,6 +4151,8 @@ address;  but often a machine-dependent strategy can generate better code.
 
 @hook TARGET_PREFERRED_ELSE_VALUE
 
+@hook TARGET_GOACC_EXPAND_ACCEL_VAR
+
 @node Anchored Addresses
 @section Anchored Addresses
 @cindex anchored addresses
diff --git a/gcc/expr.c b/gcc/expr.c
index de6709d..2c62bf9 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9854,8 +9854,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
       exp = SSA_NAME_VAR (ssa_name);
       goto expand_decl_rtl;
 
-    case PARM_DECL:
     case VAR_DECL:
+      /* Allow accel compiler to handle specific cases of variables,
+	 specifically those tagged with the "oacc gangprivate" attribute,
+	 which may intended to be placed in special memory in GPUs.  */
+      if (flag_openacc && targetm.goacc.expand_accel_var)
+	{
+	  temp = targetm.goacc.expand_accel_var (exp);
+	  if (temp)
+	    return temp;
+	}
+      /* ... fall through ...  */
+
+    case PARM_DECL:
       /* If a static var's type was incomplete when the decl was written,
 	 but the type is complete now, lay out the decl now.  */
       if (DECL_SIZE (exp) == 0
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 843c66f..354e182 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -124,6 +124,12 @@ struct omp_context
 
   /* True if this construct can be cancelled.  */
   bool cancellable;
+
+  /* The number of levels of OpenACC partitioning invoked in this context.  */
+  int oacc_partitioning_levels;
+
+  /* Decls in this context.  */
+  vec<tree> *oacc_decls;
 };
 
 static splay_tree all_contexts;
@@ -850,6 +856,7 @@ new_omp_context (gimple *stmt, omp_context *outer_ctx)
     }
 
   ctx->cb.decl_map = new hash_map<tree, tree>;
+  ctx->oacc_decls = new vec<tree> ();
 
   return ctx;
 }
@@ -925,6 +932,8 @@ delete_omp_context (splay_tree_value value)
   if (is_task_ctx (ctx))
     finalize_task_copyfn (as_a <gomp_task *> (ctx->stmt));
 
+  delete ctx->oacc_decls;
+
   XDELETE (ctx);
 }
 
@@ -5716,6 +5725,9 @@ lower_oacc_head_tail (location_t loc, tree clauses,
   tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
 
   gcc_assert (count);
+
+  ctx->oacc_partitioning_levels = count;
+
   for (unsigned done = 1; count; count--, done++)
     {
       gimple_seq fork_seq = NULL;
@@ -6732,6 +6744,66 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
     }
 }
 
+/* Record vars listed in private clauses in CLAUSES in CTX.  This information
+   is used to mark up variables that should be made private per-gang.  */
+
+static void
+oacc_record_private_var_clauses (omp_context *ctx, tree clauses)
+{
+  tree c;
+
+  if (!ctx)
+    return;
+
+  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+    switch (OMP_CLAUSE_CODE (c))
+      {
+      case OMP_CLAUSE_PRIVATE:
+	{
+	  tree decl = OMP_CLAUSE_DECL (c);
+	  ctx->oacc_decls->safe_push (decl);
+	}
+	break;
+
+      default:
+	/* Empty.  */;
+      }
+}
+
+/* Record vars declared in BINDVARS in CTX.  This information is used to mark
+   up variables that should be made private per-gang.  */
+
+static void
+oacc_record_vars_in_bind (omp_context *ctx, tree bindvars)
+{
+  if (!ctx)
+    return;
+
+  for (tree v = bindvars; v; v = DECL_CHAIN (v))
+    ctx->oacc_decls->safe_push (v);
+}
+
+/* Mark variables which are declared implicitly or explicitly as gang private
+   with a special attribute.  These may need to have their declarations altered
+   later on in compilation (e.g. in execute_oacc_device_lower or the backend,
+   depending on how the OpenACC execution model is implemented on a given
+   target) to ensure that sharing semantics are correct.
+   Only variables which have their address taken need to be considered.  */
+
+static void
+mark_oacc_gangprivate (vec<tree> *decls)
+{
+  int i;
+  tree decl;
+
+  FOR_EACH_VEC_ELT (*decls, i, decl)
+    {
+      if (TREE_CODE (decl) == VAR_DECL && TREE_ADDRESSABLE (decl))
+	DECL_ATTRIBUTES (decl)
+	  = tree_cons (get_identifier ("oacc gangprivate"),
+		       NULL, DECL_ATTRIBUTES (decl));
+    }
+}
 
 /* Lower code for an OMP loop directive.  */
 
@@ -6748,6 +6820,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   push_gimplify_context ();
 
+  oacc_record_private_var_clauses (ctx, gimple_omp_for_clauses (stmt));
+
   lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
 
   block = make_node (BLOCK);
@@ -6878,7 +6952,20 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   /* Add OpenACC partitioning and reduction markers just before the loop.  */
   if (oacc_head)
-    gimple_seq_add_seq (&body, oacc_head);
+    {
+      gimple_seq_add_seq (&body, oacc_head);
+
+      int level_total = 0;
+      omp_context *thisctx;
+
+      for (thisctx = ctx; thisctx; thisctx = thisctx->outer)
+        level_total += thisctx->oacc_partitioning_levels;
+
+      /* If the current context and parent contexts are distributed over a
+	 total of one parallelism level, we have gang partitioning.  */
+      if (level_total == 1)
+        mark_oacc_gangprivate (ctx->oacc_decls);
+    }
 
   lower_omp_for_lastprivate (&fd, &body, &dlist, ctx);
 
@@ -7511,6 +7598,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   clauses = gimple_omp_target_clauses (stmt);
 
+  oacc_record_private_var_clauses (ctx, clauses);
+
   gimple_seq dep_ilist = NULL;
   gimple_seq dep_olist = NULL;
   if (omp_find_clause (clauses, OMP_CLAUSE_DEPEND))
@@ -7761,6 +7850,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   if (offloaded)
     {
+      mark_oacc_gangprivate (ctx->oacc_decls);
+
       /* Declare all the variables created by mapping and the variables
 	 declared in the scope of the target body.  */
       record_vars_into (ctx->block_vars, child_fn);
@@ -8755,6 +8846,7 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 		 ctx);
       break;
     case GIMPLE_BIND:
+      oacc_record_vars_in_bind (ctx, gimple_bind_vars (as_a <gbind *> (stmt)));
       lower_omp (gimple_bind_body_ptr (as_a <gbind *> (stmt)), ctx);
       maybe_remove_omp_member_access_dummy_vars (as_a <gbind *> (stmt));
       break;
diff --git a/gcc/target.def b/gcc/target.def
index c570f38..b3b24b8 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1701,6 +1701,16 @@ for allocating any storage for reductions when necessary.",
 void, (gcall *call),
 default_goacc_reduction)
 
+DEFHOOK
+(expand_accel_var,
+"This hook, if defined, is used by accelerator target back-ends to expand\n\
+specially handled kinds of VAR_DECL expressions.  A particular use is to\n\
+place variables with specific attributes inside special accelarator\n\
+memories.  A return value of NULL indicates that the target does not\n\
+handle this VAR_DECL, and normal RTL expanding is resumed.",
+rtx, (tree var),
+NULL)
+
 HOOK_VECTOR_END (goacc)
 
 /* Functions relating to vectorization.  */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-private-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-private-1.c
new file mode 100644
index 0000000..f378346
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-private-1.c
@@ -0,0 +1,38 @@
+#include <assert.h>
+
+int main (void)
+{
+  int ret;
+
+  #pragma acc parallel num_gangs(1) num_workers(32) copyout(ret)
+  {
+    int w = 0;
+
+    #pragma acc loop worker
+    for (int i = 0; i < 32; i++)
+      {
+        #pragma acc atomic update
+	w++;
+      }
+
+    ret = (w == 32);
+  }
+  assert (ret);
+
+  #pragma acc parallel num_gangs(1) vector_length(32) copyout(ret)
+  {
+    int v = 0;
+
+    #pragma acc loop vector
+    for (int i = 0; i < 32; i++)
+      {
+        #pragma acc atomic update
+	v++;
+      }
+
+    ret = (v == 32);
+  }
+  assert (ret);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
new file mode 100644
index 0000000..2fa708a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
@@ -0,0 +1,106 @@
+/* { dg-xfail-run-if "gangprivate failure" { openacc_nvidia_accel_selected } { "-O0" } { "" } } */
+
+#include <stdio.h>
+#include <openacc.h>
+#include <alloca.h>
+#include <string.h>
+#include <gomp-constants.h>
+#include <stdlib.h>
+
+#if 0
+#define DEBUG(DIM, IDX, VAL) \
+  fprintf (stderr, "%sdist[%d] = %d\n", (DIM), (IDX), (VAL))
+#else
+#define DEBUG(DIM, IDX, VAL)
+#endif
+
+#define N (32*32*32)
+
+int
+check (const char *dim, int *dist, int dimsize)
+{
+  int ix;
+  int exit = 0;
+
+  for (ix = 0; ix < dimsize; ix++)
+    {
+      DEBUG(dim, ix, dist[ix]);
+      if (dist[ix] < (N) / (dimsize + 0.5)
+	  || dist[ix] > (N) / (dimsize - 0.5))
+	{
+	  fprintf (stderr, "did not distribute to %ss (%d not between %d "
+		   "and %d)\n", dim, dist[ix], (int) ((N) / (dimsize + 0.5)),
+		   (int) ((N) / (dimsize - 0.5)));
+	  exit |= 1;
+	}
+    }
+
+  return exit;
+}
+
+int main ()
+{
+  int ary[N];
+  int ix;
+  int exit = 0;
+  int ondev = 0;
+  int gangsize = 0, workersize = 0, vectorsize = 0;
+  int *gangdist, *workerdist, *vectordist;
+
+  for (ix = 0; ix < N;ix++)
+    ary[ix] = -1;
+
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
+  {
+#pragma acc loop gang worker vector
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	if (acc_on_device (acc_device_not_host))
+	  {
+	    int g, w, v;
+
+	    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
+	    ary[ix] = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	else
+	  ary[ix] = ix;
+      }
+
+    gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
+  }
+
+  gangdist = (int *) alloca (gangsize * sizeof (int));
+  workerdist = (int *) alloca (workersize * sizeof (int));
+  vectordist = (int *) alloca (vectorsize * sizeof (int));
+  memset (gangdist, 0, gangsize * sizeof (int));
+  memset (workerdist, 0, workersize * sizeof (int));
+  memset (vectordist, 0, vectorsize * sizeof (int));
+
+  /* Test that work is shared approximately equally amongst each active
+     gang/worker/vector.  */
+  for (ix = 0; ix < N; ix++)
+    {
+      if (ondev)
+	{
+	  int g = (ary[ix] >> 16) & 255;
+	  int w = (ary[ix] >> 8) & 255;
+	  int v = ary[ix] & 255;
+
+	  gangdist[g]++;
+	  workerdist[w]++;
+	  vectordist[v]++;
+	}
+    }
+
+  exit = check ("gang", gangdist, gangsize);
+  exit |= check ("worker", workerdist, workersize);
+  exit |= check ("vector", vectordist, vectorsize);
+
+  return exit;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/pr85465.c b/libgomp/testsuite/libgomp.oacc-c/pr85465.c
new file mode 100644
index 0000000..329e8a0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/pr85465.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-w" } */
+
+int
+main (void)
+{
+#pragma acc parallel
+  foo ();
+
+  return 0;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]