This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] Adjust UNQUE ifn


I've applied this patch to gomp4 branch. It's the reworking of IFN_UNIQUE suggested by Richard & Jakub.

1) IFN_UNIQUE is a ctrl-altering call, and thus ends up at the end of a BB.
2) tracer only needs to check that stmt (and it'a already looking at it for other reasons)
3) IFN_UNIQUE is no longer ECF_LEAF
4) Inserted a data dependency chain to the had & tail call sequence. The 2nd param is the result of the previous call in the chain.

Preparing updated  trunk patches now ...

nathan
2015-10-25  Nathan Sidwell  <nathan@codesourcery.com>

	* internal-fn.def (IFN_UNIQUE): Not a leaf.
	(IFN_UNIQUE, IFN_GOACC_LOOP): Move sub codes to ...
	* internal-fn.h (enum ifn_unique_kind, enum ifn_goacc_loop_kind):
	... here.  New enums.
	* internal-fn.c (expand_UNIQUE): Deal with data dependency var.
	* tree-cfg.c (gimple_call_initialize_ctrl_altering): Check for
	unique internal fn call.
	* config/nvptx/nvptx.md (oacc_fork, oacc_join): Deal with data
	dependency src & dest.
	* config/nvptx/nvptx.c (nvptx_xform_fork_join): Rename to ...
	(nvptx_goacc_fork_join): ... here.  Skip date dependency arg.
	* tracer.c (ignore_bb_p): Just look at last stmt for UNIQUE.
	* omp-low.c  (lower_oacc_head_mark): Take data dependency arg.
	Use quick_push.
	(lower_oacc_loop_marker): Take data dependency arg.
	(lower_oacc_head_tail): Insert data dependency var.
	(new_oacc_loop): Adjust arg numbering.
	(dump_oacc_loop_part): Cope with block-straddling sequences.
	(oacc_loop_discover_walk): Likewise.
	(oacc_loop_xform_head_tail): Likewise.
	(execute_oacc_device_lower): Use two bools for scanning &
	deletion.

Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md	(revision 229276)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -1400,20 +1400,28 @@
 )
 
 (define_expand "oacc_fork"
-  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
-		       UNSPECV_FORKED)]
+  [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
+        (match_operand:SI 1 "nvptx_general_operand" ""))
+   (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
+		        UNSPECV_FORKED)]
   ""
 {
-  nvptx_expand_oacc_fork (INTVAL (operands[0]));
+  if (operands[0] != const0_rtx)
+    emit_move_insn (operands[0], operands[1]);
+  nvptx_expand_oacc_fork (INTVAL (operands[2]));
   DONE;
 })
 
 (define_expand "oacc_join"
-  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
-		       UNSPECV_JOIN)]
+  [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
+        (match_operand:SI 1 "nvptx_general_operand" ""))
+   (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
+		        UNSPECV_JOIN)]
   ""
 {
-  nvptx_expand_oacc_join (INTVAL (operands[0]));
+  if (operands[0] != const0_rtx)
+    emit_move_insn (operands[0], operands[1]);
+  nvptx_expand_oacc_join (INTVAL (operands[2]));
   DONE;
 })
 
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 229276)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -4296,10 +4296,10 @@ nvptx_dim_limit (unsigned axis)
 /* Determine whether fork & joins are needed.  */
 
 static bool
-nvptx_xform_fork_join (gcall *call, const int dims[],
+nvptx_goacc_fork_join (gcall *call, const int dims[],
 		       bool ARG_UNUSED (is_fork))
 {
-  tree arg = gimple_call_arg (call, 1);
+  tree arg = gimple_call_arg (call, 2);
   unsigned axis = TREE_INT_CST_LOW (arg);
 
   /* We only care about worker and vector partitioning.  */
@@ -4844,7 +4844,7 @@ nvptx_use_anchors_for_symbol (const_rtx
 #define TARGET_GOACC_DIM_LIMIT nvptx_dim_limit
 
 #undef TARGET_GOACC_FORK_JOIN
-#define TARGET_GOACC_FORK_JOIN nvptx_xform_fork_join
+#define TARGET_GOACC_FORK_JOIN nvptx_goacc_fork_join
 
 #undef TARGET_GOACC_REDUCTION
 #define TARGET_GOACC_REDUCTION nvptx_goacc_reduction
Index: gcc/tracer.c
===================================================================
--- gcc/tracer.c	(revision 229276)
+++ gcc/tracer.c	(working copy)
@@ -93,25 +93,20 @@ bb_seen_p (basic_block bb)
 static bool
 ignore_bb_p (basic_block bb)
 {
-  gimple_stmt_iterator gsi;
-  gimple *g;
-
   if (bb->index < NUM_FIXED_BLOCKS)
     return true;
   if (optimize_bb_for_size_p (bb))
     return true;
 
-  /* A transaction is a single entry multiple exit region.  It must be
-     duplicated in its entirety or not at all.  */
-  g = last_stmt (CONST_CAST_BB (bb));
-  if (g && gimple_code (g) == GIMPLE_TRANSACTION)
-    return true;
-
-  /* Ignore blocks containing non-clonable function calls.  */
-  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+  if (gimple *g = last_stmt (CONST_CAST_BB (bb)))
     {
-      g = gsi_stmt (gsi);
+      /* A transaction is a single entry multiple exit region.  It
+	 must be duplicated in its entirety or not at all.  */
+      if (gimple_code (g) == GIMPLE_TRANSACTION)
+	return true;
 
+      /* An IFN_UNIQUE call must be duplicated as part of its group,
+	 or not at all.  */
       if (is_gimple_call (g) && gimple_call_internal_p (g)
 	  && gimple_call_internal_unique_p (g))
 	return true;
Index: gcc/internal-fn.def
===================================================================
--- gcc/internal-fn.def	(revision 229276)
+++ gcc/internal-fn.def	(working copy)
@@ -70,20 +70,8 @@ DEF_INTERNAL_FN (GOACC_DATA_END_WITH_ARG
 /* An unduplicable, uncombinable function.  Generally used to preserve
    a CFG property in the face of jump threading, tail merging or
    other such optimizations.  The first argument distinguishes
-   between uses.  Other arguments are as needed for use.  The return
-   type depends on use too.  */
-DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW | ECF_LEAF, NULL)
-#define IFN_UNIQUE_UNSPEC 0  /* Undifferentiated UNIQUE.  */
-
-/* FORK and JOIN mark the points at which OpenACC partitioned
-   execution is entered or exited.  They take an INTEGER_CST argument,
-   indicating the axis of forking or joining and return nothing.  */
-#define IFN_UNIQUE_OACC_FORK 1
-#define IFN_UNIQUE_OACC_JOIN 2
-/* HEAD_MARK and TAIL_MARK are used to demark the sequence entering or
-   leaving partitioned execution.  */
-#define IFN_UNIQUE_OACC_HEAD_MARK 3
-#define IFN_UNIQUE_OACC_TAIL_MARK 4
+   between uses. See internal-fn.h for usage.  */
+DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW, NULL)
 
 /* DIM_SIZE and DIM_POS return the size of a particular compute
    dimension and the executing thread's position within that
@@ -112,28 +100,5 @@ DEF_INTERNAL_FN (GOACC_REDUCTION_INIT, E
 DEF_INTERNAL_FN (GOACC_REDUCTION_FINI, ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOACC_REDUCTION_TEARDOWN, ECF_NOTHROW, NULL)
 
-/* OpenACC looping abstraction.  Allows the precise stepping of
-   the compute geometry over the loop iterations to be deferred until
-   it is known which compiler is generating the code.  The action is
-   encoded in a constant first argument.
-
-     CHUNK_MAX = LOOP (CODE_CHUNKS, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
-     STEP = LOOP (CODE_STEP, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
-     OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, CHUNK_NO)
-     BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, OFFSET)
-
-     DIR - +1 for up loop, -1 for down loop
-     RANGE - Range of loop (END - BASE)
-     STEP - iteration step size
-     CHUNKING - size of chunking, (constant zero for no chunking)
-     CHUNK_NO - chunk number
-     MASK - partitioning mask.
-
-   TODO: The partitioning mask and chunk size are a transition stage,
-   they will be removed once the required infrastructure is in place.  */
-
+/* OpenACC looping abstraction.  See internal-fn.h for usage.  */
 DEF_INTERNAL_FN (GOACC_LOOP, ECF_PURE | ECF_NOTHROW, NULL)
-#define IFN_GOACC_LOOP_CHUNKS 0  /* Number  of chunks.  */
-#define IFN_GOACC_LOOP_STEP 1    /* Size of each thread's step.  */
-#define IFN_GOACC_LOOP_OFFSET 2  /* Initial iteration value.  */
-#define IFN_GOACC_LOOP_BOUND 3   /* Limit of iteration value.  */
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 229276)
+++ gcc/omp-low.c	(working copy)
@@ -5517,16 +5517,17 @@ lower_oacc_reductions (location_t loc, t
    be partitioned over.  */
 
 static unsigned
-lower_oacc_head_mark (location_t loc, tree clauses,
+lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
 		      gimple_seq *seq, omp_context *ctx)
 {
   unsigned levels = 0;
   unsigned tag = 0;
   tree gang_static = NULL_TREE;
-  auto_vec<tree, 1> args;
+  auto_vec<tree, 5> args;
 
   args.quick_push (build_int_cst
 		   (integer_type_node, IFN_UNIQUE_OACC_HEAD_MARK));
+  args.quick_push (ddvar);
   for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
     {
       switch (OMP_CLAUSE_CODE (c))
@@ -5594,13 +5595,14 @@ lower_oacc_head_mark (location_t loc, tr
   if (!levels)
     levels++;
 
-  args.safe_push (build_int_cst (integer_type_node, levels));
-  args.safe_push (build_int_cst (integer_type_node, tag));
+  args.quick_push (build_int_cst (integer_type_node, levels));
+  args.quick_push (build_int_cst (integer_type_node, tag));
   if (gang_static)
-    args.safe_push (gang_static);
+    args.quick_push (gang_static);
 
   gcall *call = gimple_build_call_internal_vec (IFN_UNIQUE, args);
   gimple_set_location (call, loc);
+  gimple_set_lhs (call, ddvar);
   gimple_seq_add_stmt (seq, call);
 
   return levels;
@@ -5610,15 +5612,17 @@ lower_oacc_head_mark (location_t loc, tr
    partitioning level of the enclosed region.  */ 
 
 static void
-lower_oacc_loop_marker (location_t loc, bool head, tree tofollow,
-			gimple_seq *seq)
+lower_oacc_loop_marker (location_t loc, tree ddvar, bool head,
+			tree tofollow, gimple_seq *seq)
 {
-  tree marker = build_int_cst
-    (integer_type_node, (head ? IFN_UNIQUE_OACC_HEAD_MARK
-			 : IFN_UNIQUE_OACC_TAIL_MARK));
-  gcall *call = gimple_build_call_internal
-    (IFN_UNIQUE, 1 + (tofollow != NULL_TREE), marker, tofollow);
+  int marker_kind = (head ? IFN_UNIQUE_OACC_HEAD_MARK
+		     : IFN_UNIQUE_OACC_TAIL_MARK);
+  tree marker = build_int_cst (integer_type_node, marker_kind);
+  int nargs = 2 + (tofollow != NULL_TREE);
+  gcall *call = gimple_build_call_internal (IFN_UNIQUE, nargs,
+					    marker, ddvar, tofollow);
   gimple_set_location (call, loc);
+  gimple_set_lhs (call, ddvar);
   gimple_seq_add_stmt (seq, call);
 }
 
@@ -5631,32 +5635,38 @@ lower_oacc_head_tail (location_t loc, tr
 		      gimple_seq *head, gimple_seq *tail, omp_context *ctx)
 {
   bool inner = false;
-  unsigned count = lower_oacc_head_mark (loc, clauses, head, ctx);
-  
+  tree ddvar = create_tmp_var (integer_type_node, ".data_dep");
+  gimple_seq_add_stmt (head, gimple_build_assign (ddvar, integer_zero_node));
+
+  unsigned count = lower_oacc_head_mark (loc, ddvar, clauses, head, ctx);
   if (!count)
-    lower_oacc_loop_marker (loc, false, integer_zero_node, tail);
+    lower_oacc_loop_marker (loc, ddvar, false, integer_zero_node, tail);
+  
+  tree fork_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
+  tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
 
   for (unsigned done = 1; count; count--, done++)
     {
-      tree place = build_int_cst (integer_type_node, -1);
-      gcall *fork = gimple_build_call_internal
-	(IFN_UNIQUE, 2,
-	 build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK), place);
-      gcall *join = gimple_build_call_internal
-	(IFN_UNIQUE, 2,
-	 build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN), place);
       gimple_seq fork_seq = NULL;
       gimple_seq join_seq = NULL;
 
+      tree place = build_int_cst (integer_type_node, -1);
+      gcall *fork = gimple_build_call_internal (IFN_UNIQUE, 3,
+						fork_kind, ddvar, place);
       gimple_set_location (fork, loc);
+      gimple_set_lhs (fork, ddvar);
+
+      gcall *join = gimple_build_call_internal (IFN_UNIQUE, 3,
+						join_kind, ddvar, place);
       gimple_set_location (join, loc);
+      gimple_set_lhs (join, ddvar);
 
       /* Mark the beginning of this level sequence.  */
       if (inner)
-	lower_oacc_loop_marker (loc, true,
+	lower_oacc_loop_marker (loc, ddvar, true,
 				build_int_cst (integer_type_node, count),
 				&fork_seq);
-      lower_oacc_loop_marker (loc, false,
+      lower_oacc_loop_marker (loc, ddvar, false,
 			      build_int_cst (integer_type_node, done),
 			      &join_seq);
 
@@ -5673,8 +5683,8 @@ lower_oacc_head_tail (location_t loc, tr
     }
 
   /* Mark the end of the sequence.  */
-  lower_oacc_loop_marker (loc, true, NULL_TREE, head);
-  lower_oacc_loop_marker (loc, false, NULL_TREE, tail);
+  lower_oacc_loop_marker (loc, ddvar, true, NULL_TREE, head);
+  lower_oacc_loop_marker (loc, ddvar, false, NULL_TREE, tail);
 }
 
 /* Generate code to implement the REDUCTION clauses.  */
@@ -19167,11 +19177,11 @@ new_oacc_loop (oacc_loop *parent, gcall
   /* TODO: This is where device_type flattening would occur for the loop
      flags.   */
 
-  loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 2));
+  loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
 
   tree chunk_size = integer_zero_node;
   if (loop->flags & OLF_GANG_STATIC)
-    chunk_size = gimple_call_arg (marker, 3);
+    chunk_size = gimple_call_arg (marker, 4);
   loop->chunk_size = chunk_size;
 
   return loop;
@@ -19223,25 +19233,27 @@ static void
 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
 		     const char *title, int level)
 {
-  gimple_stmt_iterator gsi = gsi_for_stmt (from);
   unsigned code = TREE_INT_CST_LOW (gimple_call_arg (from, 0));
 
   fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
-  for (gimple *stmt = from; ;)
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
     {
-      print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
-      gsi_next (&gsi);
-      stmt = gsi_stmt (gsi);
+      gimple *stmt = gsi_stmt (gsi);
 
-      if (!is_gimple_call (stmt))
-	continue;
+      if (is_gimple_call (stmt)
+	  && gimple_call_internal_p (stmt)
+	  && gimple_call_internal_fn (stmt) == IFN_UNIQUE)
+	{
+	  unsigned c = TREE_INT_CST_LOW (gimple_call_arg (stmt, 0));
 
-      gcall *call = as_a <gcall *> (stmt);
-      
-      if (gimple_call_internal_p (call)
-	  && gimple_call_internal_fn (call) == IFN_UNIQUE
-	  && code == TREE_INT_CST_LOW (gimple_call_arg (call, 0)))
-	break;
+	  if (c == code && stmt != from)
+	    break;
+	}
+      print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
+
+      gsi_next (&gsi);
+      while (gsi_end_p (gsi))
+	gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
     }
 }
 
@@ -19295,12 +19307,14 @@ debug_oacc_loop (oacc_loop *loop)
 static void
 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
 {
+  int marker = 0;
+  int remaining = 0;
+
   if (bb->flags & BB_VISITED)
     return;
-  bb->flags |= BB_VISITED;
 
-  int marker = 0;
-  int remaining = 0;
+ follow:
+  bb->flags |= BB_VISITED;
 
   /* Scan for loop markers.  */
   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
@@ -19331,7 +19345,7 @@ oacc_loop_discover_walk (oacc_loop *loop
       if (code == IFN_UNIQUE_OACC_HEAD_MARK
 	  || code == IFN_UNIQUE_OACC_TAIL_MARK)
 	{
-	  if (gimple_call_num_args (call) == 1)
+	  if (gimple_call_num_args (call) == 2)
 	    {
 	      gcc_assert (marker && !remaining);
 	      marker = 0;
@@ -19342,7 +19356,7 @@ oacc_loop_discover_walk (oacc_loop *loop
 	    }
 	  else
 	    {
-	      int count = TREE_INT_CST_LOW (gimple_call_arg (call, 1));
+	      int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
 
 	      if (!marker)
 		{
@@ -19363,7 +19377,12 @@ oacc_loop_discover_walk (oacc_loop *loop
 	    }
 	}
     }
-  gcc_assert (!remaining && !marker);
+  if (remaining || marker)
+    {
+      bb = single_succ (bb);
+      gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
+      goto follow;
+    }
 
   /* Walk successor blocks.  */
   edge e;
@@ -19424,50 +19443,35 @@ oacc_loop_discovery ()
 static void
 oacc_loop_xform_head_tail (gcall *from, int level)
 {
-  gimple_stmt_iterator gsi = gsi_for_stmt (from);
   unsigned code = TREE_INT_CST_LOW (gimple_call_arg (from, 0));
   tree replacement  = build_int_cst (unsigned_type_node, level);
 
-  for (gimple *stmt = from; ;)
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
     {
-      gsi_next (&gsi);
-      stmt = gsi_stmt (gsi);
-
-      if (!is_gimple_call (stmt))
-	continue;
-
-      gcall *call = as_a <gcall *> (stmt);
+      gimple *stmt = gsi_stmt (gsi);
       
-      if (!gimple_call_internal_p (call))
-	continue;
-
-      switch (gimple_call_internal_fn (call))
-	{
-	case IFN_UNIQUE:
-	  {
-	    unsigned c = TREE_INT_CST_LOW (gimple_call_arg (call, 0));
-
-	    if (c == code)
-	      goto break2;
-
-	    if (c == IFN_UNIQUE_OACC_FORK || c == IFN_UNIQUE_OACC_JOIN)
-	      *gimple_call_arg_ptr (call, 1) = replacement;
-	  }
-	  break;
-
-	case IFN_GOACC_REDUCTION_SETUP:
-	case IFN_GOACC_REDUCTION_INIT:
-	case IFN_GOACC_REDUCTION_FINI:
-	case IFN_GOACC_REDUCTION_TEARDOWN:
-	  *gimple_call_arg_ptr (call, 2) = replacement;
-	  break;
-
-	default:
-	  break;
+      if (!is_gimple_call (stmt)
+	  || !gimple_call_internal_p (stmt))
+	;
+      else if (gimple_call_internal_fn (stmt) == IFN_UNIQUE)
+	{
+	  unsigned c = TREE_INT_CST_LOW (gimple_call_arg (stmt, 0));
+
+	  if (c == IFN_UNIQUE_OACC_FORK || c == IFN_UNIQUE_OACC_JOIN)
+	    *gimple_call_arg_ptr (stmt, 2) = replacement;
+	  else if (c == code && stmt != from)
+	    break;
 	}
-    }
+      else if (gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_SETUP
+	       || gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_INIT
+	       || gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_FINI
+	       || gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_TEARDOWN)
+	*gimple_call_arg_ptr (stmt, 2) = replacement;
 
- break2:;
+      gsi_next (&gsi);
+      while (gsi_end_p (gsi))
+	gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
+    }
 }
 
 /* Transform the IFN_GOACC_LOOP internal functions by providing the
@@ -19875,7 +19879,7 @@ execute_oacc_device_lower ()
 
 	/* Rewind to allow rescan.  */
 	gsi_prev (&gsi);
-	int rescan = 0;
+	bool rescan = false, remove = false;
 	unsigned ifn_code = gimple_call_internal_fn (call);
 
 	switch (ifn_code)
@@ -19885,16 +19889,17 @@ execute_oacc_device_lower ()
 	  case IFN_GOACC_DIM_POS:
 	  case IFN_GOACC_DIM_SIZE:
 	    if (gimple_call_lhs (call) == NULL_TREE)
-	      rescan = -1;
-	    else if (oacc_xform_dim (call, dims, ifn_code == IFN_GOACC_DIM_POS))
-	      rescan = 1;
+	      remove = true;
+	    else if (oacc_xform_dim (call, dims,
+				     ifn_code == IFN_GOACC_DIM_POS))
+	      rescan = true;
 	    break;
 
 	  case IFN_GOACC_LOOP:
 	    oacc_xform_loop (call);
-	    rescan = 1;
+	    rescan = true;
 	    break;
-	    
+
 	  case IFN_GOACC_REDUCTION_SETUP:
 	  case IFN_GOACC_REDUCTION_INIT:
 	  case IFN_GOACC_REDUCTION_FINI:
@@ -19908,7 +19913,7 @@ execute_oacc_device_lower ()
 	      default_goacc_reduction (call);
 	    else
 	      targetm.goacc.reduction (call);
-	    rescan = 1;
+	    rescan = true;
 	    break;
 
 	  case IFN_UNIQUE:
@@ -19919,16 +19924,16 @@ execute_oacc_device_lower ()
 		{
 		case IFN_UNIQUE_OACC_FORK:
 		case IFN_UNIQUE_OACC_JOIN:
-		  if (integer_minus_onep (gimple_call_arg (call, 1)))
-		    rescan = -1;
+		  if (integer_minus_onep (gimple_call_arg (call, 2)))
+		    remove = true;
 		  else if (targetm.goacc.fork_join
 			   (call, dims, code == IFN_UNIQUE_OACC_FORK))
-		    rescan = -1;
+		    remove = true;
 		  break;
 
 		case IFN_UNIQUE_OACC_HEAD_MARK:
 		case IFN_UNIQUE_OACC_TAIL_MARK:
-		  rescan = -1;
+		  remove = true;
 		  break;
 		}
 	      break;
@@ -19942,16 +19947,24 @@ execute_oacc_device_lower ()
 	  /* Undo the rewind.  */
 	  gsi_next (&gsi);
 
-	if (!rescan)
-	  /* If not rescanning, advance over the call.  */
-	  gsi_next (&gsi);
-	else if (rescan < 0)
+	if (remove)
 	  {
 	    if (gimple_vdef (call))
 	      replace_uses_by (gimple_vdef (call),
 			       gimple_vuse (call));
-	    gsi_remove (&gsi, true);
+	    if (gimple_call_lhs (call))
+	      {
+		/* Propagate the data dependency var.  */
+		gimple *ass = gimple_build_assign (gimple_call_lhs (call),
+						   gimple_call_arg (call, 1));
+		gsi_replace (&gsi, ass,  false);
+	      }
+	    else
+	      gsi_remove (&gsi, true);
 	  }
+	else if (!rescan)
+	  /* If not rescanning, advance over the call.  */
+	  gsi_next (&gsi);
       }
 
   free_oacc_loop (loops);
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(revision 229276)
+++ gcc/tree-cfg.c	(working copy)
@@ -487,7 +487,11 @@ gimple_call_initialize_ctrl_altering (gi
       || ((flags & ECF_TM_BUILTIN)
 	  && is_tm_ending_fndecl (gimple_call_fndecl (stmt)))
       /* BUILT_IN_RETURN call is same as return statement.  */
-      || gimple_call_builtin_p (stmt, BUILT_IN_RETURN))
+      || gimple_call_builtin_p (stmt, BUILT_IN_RETURN)
+      /* IFN_UNIQUE should be the last insn, to make checking for it
+	 as cheap as possible.  */
+      || (gimple_call_internal_p (stmt)
+	  && gimple_call_internal_unique_p (stmt)))
     gimple_call_set_ctrl_altering (stmt, true);
   else
     gimple_call_set_ctrl_altering (stmt, false);
Index: gcc/internal-fn.c
===================================================================
--- gcc/internal-fn.c	(revision 229276)
+++ gcc/internal-fn.c	(working copy)
@@ -1962,8 +1962,9 @@ static void
 expand_UNIQUE (gcall *stmt)
 {
   rtx pattern = NULL_RTX;
+  int code = TREE_INT_CST_LOW (gimple_call_arg (stmt, 0));
 
-  switch (TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)))
+  switch (code)
     {
     default:
       gcc_unreachable ();
@@ -1975,21 +1976,34 @@ expand_UNIQUE (gcall *stmt)
       break;
 
     case IFN_UNIQUE_OACC_FORK:
+    case IFN_UNIQUE_OACC_JOIN:
+      {
+	tree lhs = gimple_call_lhs (stmt);
+	rtx target = const0_rtx;
+
+	if (lhs)
+	  target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+
+	rtx data_dep = expand_normal (gimple_call_arg (stmt, 1));
+	rtx axis = expand_normal (gimple_call_arg (stmt, 2));
+
+	if (code == IFN_UNIQUE_OACC_FORK)
+	  {
 #ifdef HAVE_oacc_fork
-      pattern = expand_normal (gimple_call_arg (stmt, 1));
-      pattern = gen_oacc_fork (pattern);
+	    pattern = gen_oacc_fork (target, data_dep, axis);
 #else
-      gcc_unreachable ();
+	    gcc_unreachable ();
 #endif
-      break;
-
-    case IFN_UNIQUE_OACC_JOIN:
+	  }
+	else
+	  {
 #ifdef HAVE_oacc_join
-      pattern = expand_normal (gimple_call_arg (stmt, 1));
-      pattern = gen_oacc_join (pattern);
+	    pattern = gen_oacc_join (target, data_dep, axis);
 #else
-      gcc_unreachable ();
+	    gcc_unreachable ();
 #endif
+	  }
+      }
       break;
     }
 
Index: gcc/internal-fn.h
===================================================================
--- gcc/internal-fn.h	(revision 229276)
+++ gcc/internal-fn.h	(working copy)
@@ -20,6 +20,52 @@ along with GCC; see the file COPYING3.
 #ifndef GCC_INTERNAL_FN_H
 #define GCC_INTERNAL_FN_H
 
+/* INTEGER_CST values for IFN_UNIQUE function arg-0.  */
+enum ifn_unique_kind {
+  IFN_UNIQUE_UNSPEC,  /* Undifferentiated UNIQUE.  */
+
+  /* FORK and JOIN mark the points at which OpenACC partitioned
+     execution is entered or exited.
+     return: data dependency value
+     arg-1: data dependency var
+     arg-2: INTEGER_CST argument, indicating the axis.  */
+  IFN_UNIQUE_OACC_FORK,
+  IFN_UNIQUE_OACC_JOIN,
+
+  /* HEAD_MARK and TAIL_MARK are used to demark the sequence entering
+     or leaving partitioned execution.
+     return: data dependency value
+     arg-1: data dependency var
+     arg-2: INTEGER_CST argument, remaining markers in this sequence
+     arg-3...: varargs on primary header  */
+  IFN_UNIQUE_OACC_HEAD_MARK,
+  IFN_UNIQUE_OACC_TAIL_MARK
+};
+
+/* INTEGER_CST values for IFN_GOACC_LOOP arg-0.  Allows the precise
+   stepping of the compute geometry over the loop iterations to be
+   deferred until it is known which compiler is generating the code.
+   The action is encoded in a constant first argument.
+
+     CHUNK_MAX = LOOP (CODE_CHUNKS, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
+     STEP = LOOP (CODE_STEP, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
+     OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, CHUNK_NO)
+     BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, OFFSET)
+
+     DIR - +1 for up loop, -1 for down loop
+     RANGE - Range of loop (END - BASE)
+     STEP - iteration step size
+     CHUNKING - size of chunking, (constant zero for no chunking)
+     CHUNK_NO - chunk number
+     MASK - partitioning mask.  */
+
+enum ifn_goacc_loop_kind {
+  IFN_GOACC_LOOP_CHUNKS,  /* Number of chunks.  */
+  IFN_GOACC_LOOP_STEP,    /* Size of each thread's step.  */
+  IFN_GOACC_LOOP_OFFSET,  /* Initial iteration value.  */
+  IFN_GOACC_LOOP_BOUND    /* Limit of iteration value.  */
+};
+
 /* Initialize internal function tables.  */
 
 extern void init_internal_fns ();

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]