This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] nvptx reductions


This patch implements the the goacc.reduction hook introduced here
<https://gcc.gnu.org/ml/gcc-patches/2015-08/msg01130.html> for the nvptx
target. Each new nvptx_goacc_reduction_* function is commented with a
description of how each internal function gets implemented for each
parallel dimension. Nathan goes into that in more detail in his design.

For the most part, all of the reductions use atomic operations with
spinlocks. Nvidia targets have different spinlock requirements then
pthreads on the host, so we couldn't recycle GOMP_atomic_start/end.
Well, we probably could have, but we decided it would be better to
introduce two new GOACC_LOCK and GOACC_UNLOCK functions, so that we
could inline those locks in the compiler. This has several advantages;
different loops can use different locks, and the locks for different
parallel dimensions can be optimized (e.g. worker locks are stored in
.shared memory). Placement of these locking functions happens in the
lowering code, so the nvptx backend (and really goacc.reduction in
general) expects the appropriate threads to evaluate the expanded
instructions.

The exception for the atomic reductions on nvptx targets are vectors.
Since spinlocks don't work on nvptx targets, we're forced to implement a
parallel-tree reduction using shfl.down instructions. You'll note that
nvptx_generate_vector_shuffle expands GOACC_REDUCTION_FINI into an
unrolled sequence of shfl.down instead of using a loop. A loop would be
nice here to handle the case where OACC_DIM_SIZE (GOMP_DIM_VECTOR) = 1.
However, we don't yet have the necessary infrastructure to ensure that
the branch in that loop will be unified. If the branch isn't unified,
bad things happen on the gpu when you try to synchronize all of the threads.

Updating the ssa is kind of messy because nvptx_goacc_reduction_init
creates a couple of new basic block for the vectors. It also inserts a
call to GOACC_DIM_POS, so that's why oacc_transfrom may need to rescan
all of the basic blocks for internal functions.

Is this patch ok for gomp-4_0-branch after the infrastructure patch goes in?

Cesar
2015-08-20  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* config/nvptx/nvptx.c (enum nvptx_builtins): New enum.
	(NVPTX_BUILTIN_MAX): Delete.
	(nvptx_get_worker_red_addr_fn): New function.
	(nvptx_generate_vector_shuffle): New function.
	(nvptx_shuffle_reduction): New function.
	(nvptx_goacc_reduction_setup): New function.
	(nvptx_goacc_reduction_init): New function.
	(nvptx_goacc_reduction_fini): New function.
	(nvptx_goacc_reduction_teardown): New function.
	(nvptx_goacc_reduction): New function.


diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index a05c767..26e28c1 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -53,13 +53,27 @@
 #include "target.h"
 #include "diagnostic.h"
 #include "cfgrtl.h"
+#include "cfghooks.h"
+#include "cfgloop.h"
 #include "stor-layout.h"
 #include "df.h"
 #include "dumpfile.h"
 #include "builtins.h"
 #include "dominance.h"
 #include "cfg.h"
+#include "tree-cfg.h"
 #include "omp-low.h"
+#include "fold-const.h"
+#include "stringpool.h"
+#include "internal-fn.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimple-ssa.h"
+#include "gimplify.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
+#include "tree-ssanames.h"
+#include "tree-into-ssa.h"
 #include "gomp-constants.h"
 #include "gimple.h"
 
@@ -3443,6 +3457,20 @@ enum nvptx_types
     NT_MAX
   };
 
+/* Codes for all the NVPTX builtins.  */
+enum nvptx_builtins
+{
+  NVPTX_BUILTIN_SHUFFLE_DOWN,
+  NVPTX_BUILTIN_SHUFFLE_DOWNLL,
+  NVPTX_BUILTIN_SHUFFLE_DOWNF,
+  NVPTX_BUILTIN_SHUFFLE_DOWND,
+  NVPTX_BUILTIN_WORK_RED_ADDR,
+  NVPTX_BUILTIN_WORK_RED_ADDRLL,
+  NVPTX_BUILTIN_WORK_RED_ADDRF,
+  NVPTX_BUILTIN_WORK_RED_ADDRD,
+  NVPTX_BUILTIN_MAX
+};
+
 static const struct builtin_description builtins[] =
 {
   {"__builtin_nvptx_shuffle_down", NT_UINT_UINT_INT,
@@ -3463,8 +3491,6 @@ static const struct builtin_description builtins[] =
    nvptx_expand_work_red_addr},
 };
 
-#define NVPTX_BUILTIN_MAX (sizeof (builtins) / sizeof (builtins[0]))
-
 static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX];
 
 /* Return the NVPTX builtin for CODE.  */
@@ -3642,6 +3668,549 @@ nvptx_xform_lock_unlock (gimple stmt, const int *ARG_UNUSED (dims),
   
   return TREE_INT_CST_LOW (arg) > GOMP_DIM_WORKER;
 }
+
+static tree
+nvptx_get_worker_red_addr_fn (tree var, tree rid, tree lid)
+{
+  tree vartype = TREE_TYPE (var);
+  tree fndecl, call;
+  enum nvptx_builtins fn;
+  machine_mode mode = TYPE_MODE (vartype);
+
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+      fn = NVPTX_BUILTIN_WORK_RED_ADDR;
+      break;
+    case DImode:
+      fn = NVPTX_BUILTIN_WORK_RED_ADDRLL;
+      break;
+    case DFmode:
+      fn = NVPTX_BUILTIN_WORK_RED_ADDRD;
+      break;
+    case SFmode:
+      fn = NVPTX_BUILTIN_WORK_RED_ADDRF;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  fndecl = nvptx_builtin_decl (fn, true);
+  call = build_call_expr (fndecl, 2, lid, rid);
+
+  return call;
+}
+
+/* Emit a SHFL.DOWN using index SHFL of VAR into DEST_VAR.  This function
+   will cast the variable if necessary.  */
+
+static void
+nvptx_generate_vector_shuffle (tree dest_var, tree var, int shfl,
+			       gimple_seq *seq)
+{
+  tree vartype = TREE_TYPE (var);
+  enum nvptx_builtins fn = NVPTX_BUILTIN_SHUFFLE_DOWN;
+  machine_mode mode = TYPE_MODE (vartype);
+  tree casted_dest = dest_var;
+  tree casted_var = var;
+  tree call_arg_type;
+
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+      fn = NVPTX_BUILTIN_SHUFFLE_DOWN;
+      call_arg_type = unsigned_type_node;
+      break;
+    case DImode:
+      fn = NVPTX_BUILTIN_SHUFFLE_DOWNLL;
+      call_arg_type = long_long_unsigned_type_node;
+      break;
+    case DFmode:
+      fn = NVPTX_BUILTIN_SHUFFLE_DOWND;
+      call_arg_type = double_type_node;
+      break;
+    case SFmode:
+      fn = NVPTX_BUILTIN_SHUFFLE_DOWNF;
+      call_arg_type = float_type_node;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* All of the integral types need to be unsigned.  Furthermore, small
+     integral types may need to be extended to 32-bits.  */
+  bool need_conversion = !types_compatible_p (vartype, call_arg_type);
+
+  if (need_conversion)
+    {
+      casted_var = make_ssa_name (call_arg_type);
+      tree t1 = fold_build1 (NOP_EXPR, call_arg_type, var);
+      gassign *conv1 = gimple_build_assign (casted_var, t1);
+      gimple_seq_add_stmt (seq, conv1);
+    }
+
+  tree fndecl = nvptx_builtin_decl (fn, true);
+  tree shift =  build_int_cst (unsigned_type_node, shfl);
+  gimple call = gimple_build_call (fndecl, 2, casted_var, shift);
+
+  gimple_seq_add_stmt (seq, call);
+
+  if (need_conversion)
+    {
+      casted_dest = make_ssa_name (call_arg_type);
+      tree t2 = fold_build1 (NOP_EXPR, vartype, casted_dest);
+      gassign *conv2 = gimple_build_assign (dest_var, t2);
+      gimple_seq_add_stmt (seq, conv2);
+    }
+
+  update_stmt (call);
+  gimple_call_set_lhs (call, casted_dest);
+}
+
+/* Fold an OpenACC vector reduction using shuffle down instructions.  */
+
+static void
+nvptx_shuffle_reduction (tree lhs, tree var, enum tree_code code,
+			 gimple_seq *seq)
+{
+  /* Generate a sequence of instructions to preform a tree reduction using
+     shfl.down as an intermediate step.  */
+
+  tree type = TREE_TYPE (var);
+  tree t, t2;
+  gassign *g;
+
+  if (code == TRUTH_ANDIF_EXPR)
+    code = BIT_AND_EXPR;
+  else if (code == TRUTH_ORIF_EXPR)
+    code = BIT_IOR_EXPR;
+
+  for (int shfl = PTX_VECTOR_LENGTH / 2; shfl > 0; shfl = shfl >> 1)
+    {
+      t = make_ssa_name (type);
+      nvptx_generate_vector_shuffle (t, var, shfl, seq);
+      t2 = make_ssa_name (create_tmp_var (type));
+      g = gimple_build_assign (t2, fold_build2 (code, type, var, t));
+      gimple_seq_add_stmt (seq, g);
+      var = t2;
+    }
+
+  /* Restore the type of the comparison operand.  */
+  if (code == EQ_EXPR || code == NE_EXPR)
+    {
+      type = TREE_TYPE (lhs);
+      t = make_ssa_name (type);
+      t2 = fold_build1 (NOP_EXPR, type, var);
+      g = gimple_build_assign (t, t2);
+      gimple_seq_add_stmt (seq, g);
+      var = t;
+    }
+
+  g = gimple_build_assign (lhs, var);
+  gimple_seq_add_stmt (seq, g);
+}
+
+/* NVPTX implementation of GOACC_REDUCTION_SETUP.  Reserve shared
+   memory for worker reductions.
+
+   Given:
+
+     V = IFN_RED_SETUP (RES_PTR, LOCAL, LEVEL, OP, LID, RID)
+
+   Expand to:
+
+   Vector:
+
+     V = LOCAL;
+
+   Worker:
+
+     *ptx_work_red_addr<T> (LID, RID) = LOCAL;
+     ptx_mem_bar (WORKER)  // Should be inserted automatically by the
+                           // predication framework.
+
+   Gang:
+
+     if (RES_PTR != NULL)
+       V = LOCAL
+*/
+
+static bool
+nvptx_goacc_reduction_setup (gimple call)
+{
+  gimple_stmt_iterator gsi = gsi_for_stmt (call);
+  tree v = gimple_call_lhs (call);
+  tree local_var = gimple_call_arg (call, 1);
+  int loop_dim = tree_to_shwi (gimple_call_arg (call, 2));
+  tree lid = gimple_call_arg (call, 4);
+  tree rid = gimple_call_arg (call, 5);
+  gimple_seq seq = NULL;
+
+  push_gimplify_context (true);
+
+  switch (loop_dim)
+    {
+    case GOMP_DIM_GANG:
+    case GOMP_DIM_VECTOR:
+      gimplify_assign (v, local_var, &seq);
+      break;
+    case GOMP_DIM_WORKER:
+      {
+	tree ptr = make_ssa_name (build_pointer_type (TREE_TYPE (local_var)));
+	tree call = nvptx_get_worker_red_addr_fn (local_var, rid, lid);
+	tree ref;
+
+	gimplify_assign (ptr, call, &seq);
+	ref = build_simple_mem_ref (ptr);
+	TREE_THIS_VOLATILE (ref) = 1;
+	gimplify_assign (ref, local_var, &seq);
+	if (v)
+	  gimplify_assign (v, local_var, &seq);
+      }
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  pop_gimplify_context (NULL);
+
+  gsi_replace_with_seq (&gsi, seq, true);
+
+  return false;
+}
+
+/* NVPTX implementation of GOACC_REDUCTION_INIT. Initialize the private
+   reduction variables.  Vectors are special in that tid.x = 0 contains
+   the original value of LOCAL prior to the reduction.
+
+   Given:
+
+     V = IFN_RED_INIT (RES_PTR, LOCAL, LEVEL, OP, LID, RID)
+
+   Expand to:
+
+   Vector:
+
+     V = IFN_OACC_DIM_POS (VECTOR) ? {init_val<T> (OP)} : LOCAL
+
+   Worker:
+
+     V = {init_val<T> (OPERATOR)}
+
+   Gang:
+
+     V = {init_val<T> (OPERATOR)}
+*/
+
+static bool
+nvptx_goacc_reduction_init (gimple call)
+{
+  gimple_stmt_iterator gsi = gsi_for_stmt (call);
+  tree v = gimple_call_lhs (call);
+  tree local_var = gimple_call_arg (call, 1);
+  tree clause = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_REDUCTION);
+  int loop_dim = tree_to_shwi (gimple_call_arg (call, 2));
+  tree local_vartype = TREE_TYPE (local_var);
+  enum tree_code op;
+  gimple_seq seq = NULL;
+  bool retval = false;
+
+  op = (enum tree_code) tree_to_shwi (gimple_call_arg (call, 3));
+  OMP_CLAUSE_REDUCTION_CODE (clause) = op;
+
+  switch (loop_dim)
+    {
+    case GOMP_DIM_GANG:
+    case GOMP_DIM_WORKER:
+      push_gimplify_context (true);
+      gimplify_assign(v, omp_reduction_init (clause, local_vartype), &seq);
+      pop_gimplify_context (NULL);
+      gsi_replace_with_seq (&gsi, seq, true);
+      break;
+    case GOMP_DIM_VECTOR:
+      {
+	tree tid = make_ssa_name (unsigned_type_node);
+	tree dim_vector = build_int_cst (unsigned_type_node, GOMP_DIM_VECTOR);
+	gimple tid_call = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1,
+						      dim_vector);
+
+	gimple_call_set_lhs (tid_call, tid);
+	gsi_insert_before (&gsi, tid_call, GSI_SAME_STMT);
+
+	tree zero = build_int_cst (unsigned_type_node, 0);
+	gimple cond_stmt = gimple_build_cond (NE_EXPR, tid, zero,
+					      NULL_TREE, NULL_TREE);
+
+	gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
+
+	basic_block cond_bb = gsi_bb (gsi);
+	edge e = split_block (cond_bb, cond_stmt);
+	basic_block fallthru_bb = e->dest;
+	basic_block true_bb = create_empty_bb (cond_bb);
+	basic_block false_bb = create_empty_bb (cond_bb);
+	gimple_stmt_iterator true_gsi = gsi_start_bb (true_bb);
+	gimple_stmt_iterator false_gsi = gsi_start_bb (false_bb);
+
+	/* True case: v = gomp_init_reduction () */
+
+	make_edge (cond_bb, true_bb, EDGE_TRUE_VALUE);
+	edge etrue = make_edge (true_bb, fallthru_bb, EDGE_FALLTHRU);
+	remove_edge (e);
+
+	add_bb_to_loop (true_bb, cond_bb->loop_father);
+	set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
+
+	tree true_v = make_ssa_name (local_vartype);
+	seq = NULL;
+
+	push_gimplify_context (true);
+	gimplify_assign (true_v, omp_reduction_init (clause, local_vartype),
+			 &seq);
+	pop_gimplify_context (NULL);
+	gsi_insert_seq_after (&true_gsi, seq, GSI_CONTINUE_LINKING);
+	gsi = gsi_start_bb (fallthru_bb);
+
+	/* False case: v = local_var  */
+
+	make_edge (cond_bb, false_bb, EDGE_FALSE_VALUE);
+	edge efalse = make_edge (false_bb, fallthru_bb, EDGE_FALLTHRU);
+
+	tree false_v = make_ssa_name (local_vartype);
+	seq = NULL;
+
+	push_gimplify_context (true);
+	gimplify_assign (false_v, local_var, &seq);
+	pop_gimplify_context (NULL);
+	gsi_insert_seq_after (&false_gsi, seq, GSI_CONTINUE_LINKING);
+
+	gsi = gsi_for_stmt (call);
+
+	add_bb_to_loop (false_bb, cond_bb->loop_father);
+	set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
+
+	gsi_remove (&gsi, true);
+
+	/* Update phi.  */
+
+	gphi *phi = create_phi_node (v, fallthru_bb);
+	add_phi_arg (phi, true_v, etrue, UNKNOWN_LOCATION);
+	add_phi_arg (phi, false_v, efalse, UNKNOWN_LOCATION);
+
+	retval = true;
+      }
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return retval;
+}
+
+/* NVPTX implementation of GOACC_REDUCTION_FINI. For vectors, preform
+   a tree reduction on LOCAL, otherwise, preform the reduction operation
+   atomically.
+
+   Given:
+
+     V = IFN_RED_INIT (RES_PTR, LOCAL, LEVEL, OP, LID, RID)
+
+   Expand to:
+
+   Vector:
+
+     for (ix = IFN_OACC_DIM_SIZE (VECTOR); ix >>= 1;)
+       {
+          T tmp = ptx_shuffle_down<T> (LOCAL, ix);
+          LOCAL = OP (LOCAL, tmp);
+       }
+     V = LOCAL
+
+   Worker:
+
+     T tmp = *ptx_work_red_addr<T> (LID, RID);
+     tmp = OP (tmp, LOCAL);
+     *ptx_work_red_addr<T> (LID, RID) = tmp;
+
+   Gang:
+
+     V = OPERATOR (*RES_PTR, LOCAL);
+     *RES_PTR = V;
+*/
+
+static bool
+nvptx_goacc_reduction_fini (gimple call)
+{
+  gimple_stmt_iterator gsi = gsi_for_stmt (call);
+  tree v = gimple_call_lhs (call);
+  tree ref_to_res = gimple_call_arg (call, 0);
+  tree local_var = gimple_call_arg (call, 1);
+  int loop_dim = tree_to_shwi (gimple_call_arg (call, 2));
+  enum tree_code op = (enum tree_code)tree_to_shwi (gimple_call_arg (call, 3));
+  tree lid = gimple_call_arg (call, 4);
+  tree rid = gimple_call_arg (call, 5);
+  tree local_vartype = TREE_TYPE (local_var);
+  gimple_seq seq = NULL;
+
+  if (op == TRUTH_ANDIF_EXPR)
+    op = BIT_AND_EXPR;
+  else if (op == TRUTH_ORIF_EXPR)
+    op = BIT_IOR_EXPR;
+
+  push_gimplify_context (true);
+
+  switch (loop_dim)
+    {
+    case GOMP_DIM_GANG:
+      {
+	tree ref = build_simple_mem_ref (ref_to_res);
+	tree t = make_ssa_name (local_vartype);
+
+	gimplify_assign (t, fold_build2 (op, local_vartype, ref, local_var),
+			 &seq);
+	gimplify_assign (ref, t, &seq);
+
+	if (v != NULL)
+	  {
+	    push_gimplify_context (true);
+	    gimplify_assign (v, t, &seq);
+	  }
+      }
+      break;
+    case GOMP_DIM_WORKER:
+      {
+	tree ptr = make_ssa_name (build_pointer_type (local_vartype));
+	tree call = nvptx_get_worker_red_addr_fn (local_var, rid, lid);
+	tree t1 = make_ssa_name (local_vartype);
+	tree t2 = make_ssa_name (local_vartype);
+	tree ref;
+
+	gimplify_assign (ptr, call, &seq);
+	ref = build_simple_mem_ref (ptr);
+	TREE_THIS_VOLATILE (ref) = 1;
+	gimplify_assign (t1, ref, &seq);
+	gimplify_assign (t2, fold_build2 (op, local_vartype, t1, local_var),
+			 &seq);
+	ref = build_simple_mem_ref (ptr);
+	gimplify_assign (ref, t2, &seq);
+	TREE_THIS_VOLATILE (ref) = 1;
+
+	if (v != NULL)
+	  gimplify_assign (v, t2, &seq);
+      }
+      break;
+    case GOMP_DIM_VECTOR:
+      nvptx_shuffle_reduction (v, local_var, op, &seq);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  push_gimplify_context (true);
+
+  gsi_replace_with_seq (&gsi, seq, true);
+
+  return false;
+}
+
+/* NVPTX implementation of GOACC_REDUCTION_TEARDOWN.  For workers
+   and vectors, ensure that V has the final reduction value.  Likewise,
+   for gangs, writeback V to RES_PTR if necessary.
+
+   Given:
+
+     V = IFN_RED_TEARDOWN (RES_PTR, LOCAL, LEVEL, OP, LID, RID)
+
+   Expand to:
+
+   Vector:
+
+     V = LOCAL;
+
+   Worker:
+
+     ptx_mem_bar (WORKER)
+     V = *ptx_work_red_addr<T> (LID, RID);
+
+   Gang:
+
+     if (RES_PTR != NULL)
+       V = LOCAL
+*/
+
+static bool
+nvptx_goacc_reduction_teardown (gimple call)
+{
+  gimple_stmt_iterator gsi = gsi_for_stmt (call);
+  tree v = gimple_call_lhs (call);
+  tree local_var = gimple_call_arg (call, 1);
+  int loop_dim = tree_to_shwi (gimple_call_arg (call, 2));
+  tree lid = gimple_call_arg (call, 4);
+  tree rid = gimple_call_arg (call, 5);
+  gimple_seq seq = NULL;
+
+  push_gimplify_context (true);
+
+  switch (loop_dim)
+    {
+    case GOMP_DIM_GANG:
+    case GOMP_DIM_VECTOR:
+      if (v)
+	gimplify_assign (v, local_var, &seq);
+      break;
+    case GOMP_DIM_WORKER:
+      {
+	tree ptr = make_ssa_name (build_pointer_type (TREE_TYPE (local_var)));
+	tree call = nvptx_get_worker_red_addr_fn (local_var, rid, lid);
+	tree ref;
+
+	gimplify_assign (ptr, call, &seq);
+	ref = build_simple_mem_ref (ptr);
+	TREE_THIS_VOLATILE (ref) = 1;
+	gimplify_assign (v, ref, &seq);
+      }
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  pop_gimplify_context (NULL);
+
+  gsi_replace_with_seq (&gsi, seq, true);
+
+  return false;
+}
+
+/* Default goacc.reduction early expander.  */
+
+bool
+nvptx_goacc_reduction (gimple call)
+{
+  /* Reductions modify the SSA names in complicated ways.  Let update_ssa
+     correct it.  */
+  mark_virtual_operands_for_renaming (cfun);
+
+  switch (gimple_call_internal_fn (call))
+    {
+    case IFN_GOACC_REDUCTION_SETUP:
+      return nvptx_goacc_reduction_setup (call);
+
+    case IFN_GOACC_REDUCTION_INIT:
+      return nvptx_goacc_reduction_init (call);
+
+    case IFN_GOACC_REDUCTION_FINI:
+      return nvptx_goacc_reduction_fini (call);
+
+    case IFN_GOACC_REDUCTION_TEARDOWN:
+      return nvptx_goacc_reduction_teardown (call);
+
+    default:
+      gcc_unreachable ();
+    }
+}
 
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE nvptx_option_override
@@ -3749,6 +4318,9 @@ nvptx_xform_lock_unlock (gimple stmt, const int *ARG_UNUSED (dims),
 #undef TARGET_GOACC_LOCK_UNLOCK
 #define TARGET_GOACC_LOCK_UNLOCK nvptx_xform_lock_unlock
 
+#undef TARGET_GOACC_REDUCTION
+#define TARGET_GOACC_REDUCTION nvptx_goacc_reduction
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-nvptx.h"

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]