This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] backport changes


I've committed this to gomp4 branch. It reduces the delta from trunk by removing unneeded code and porting some changes that occurred on the route to my recent merge of code to trunk.

nathan
2015-10-28  Nathan Sidwell  <nathan@codesourcery.com>

	* target.def (goacc.fork_join): Invert sense of result.
	* doc/tm.texi: Rebuilt.
	* config/nvptx/nvptx.md (nvptx_membar): Delete.
	* config/nvptx/nvptx.c: Update comments & formatting from trunk.
	(BARRIER_SHARED, BARRIER_GLOBAL, BARRIER_SYS): Delete.
	(worker_bcast_hwm): Rename to ...
	(worker_bcast_size): ... here.  Update users.
	(nvptx_emit_forking): Update SESE comment.
	(struct wcast_data_t): Document fields.
	(nvptx_print_operand): Remove 'B' handling.
	(propagator_fn): New typedef.
	(nvptx_propagate): Use it.
	(nvptx_goacc_fork_join): Invert result sense.
	* omp-low.c: Update comments and formatting from trunk.
	(scan_omp_target): Use flag_checking, not ENABLE_CHECKING.
	(struct oacc_collapse, expand_oacc_collapse_init,
	expand_oacc_collapse_vars): Move earlier.
	(default_goacc_fork_join): Use targetm, invert sense of result.
	(execute_oacc_device_lower): Adjust goacc.fork_join handling.

Index: target.def
===================================================================
--- target.def	(revision 229493)
+++ target.def	(working copy)
@@ -1667,10 +1667,13 @@ default_goacc_dim_limit)
 
 DEFHOOK
 (fork_join,
-"This hook should convert IFN_GOACC_FORK and IFN_GOACC_JOIN function\n\
-calls to target-specific gimple.  It is executed during the oacc_xform\n\
-pass.  It should return true, if the functions should be deleted.  The\n\
-default hook returns true, if there are no RTL expanders for them.",
+"This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN\n\
+function calls to target-specific gimple, or indicate whether they\n\
+should be retained.  It is executed during the oacc_device_lower pass.\n\
+It should return true, if the call should be retained.  It should\n\
+return false, if it is to be deleted (either because target-specific\n\
+gimple has been inserted before it, or there is no need for it).\n\
+The default hook returns false, if there are no RTL expanders for them.",
 bool, (gcall *call, const int dims[], bool is_fork),
 default_goacc_fork_join)
 
Index: config/nvptx/nvptx.md
===================================================================
--- config/nvptx/nvptx.md	(revision 229493)
+++ config/nvptx/nvptx.md	(working copy)
@@ -1560,9 +1560,3 @@
 		    UNSPECV_BARSYNC)]
   ""
   "\\tbar.sync\\t%0;")
-
-(define_insn "nvptx_membar"
-  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
-		    UNSPECV_MEMBAR)]
-  ""
-  "%.\\tmembar%B0;")
Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c	(revision 229493)
+++ config/nvptx/nvptx.c	(working copy)
@@ -81,11 +81,6 @@
 #define SHUFFLE_BFLY 2
 #define SHUFFLE_IDX 3
 
-/* Memory barrier levels.  */
-#define BARRIER_SHARED 0
-#define BARRIER_GLOBAL 1
-#define BARRIER_SYS 2
-
 /* Record the function decls we've written, and the libfuncs and function
    decls corresponding to them.  */
 static std::stringstream func_decls;
@@ -113,7 +108,7 @@ static GTY((cache)) hash_table<tree_hash
    by all functions emitted.  The buffer is placed in shared memory.
    It'd be nice if PTX supported common blocks, because then this
    could be shared across TUs (taking the largest size).  */
-static unsigned worker_bcast_hwm;
+static unsigned worker_bcast_size;
 static unsigned worker_bcast_align;
 #define worker_bcast_name "__worker_bcast"
 static GTY(()) rtx worker_bcast_sym;
@@ -259,7 +254,10 @@ nvptx_emit_forking (unsigned mask, bool
     {
       rtx op = GEN_INT (mask | (is_call << GOMP_DIM_MAX));
       
-      /* Emit fork at all levels, this helps form SESE regions..  */
+      /* Emit fork at all levels.  This helps form SESE regions, as
+	 it creates a block with a single successor before entering a
+	 partitooned region.  That is a good candidate for the end of
+	 an SESE region.  */
       if (!is_call)
 	emit_insn (gen_nvptx_fork (op));
       emit_insn (gen_nvptx_forked (op));
@@ -990,6 +988,7 @@ nvptx_expand_call (rtx retval, rtx addre
 	  write_func_decl_from_insn (func_decls, retval, pat, callee);
 	}
     }
+
   nvptx_emit_forking (parallel, true);
   emit_call_insn (pat);
   nvptx_emit_joining (parallel, true);
@@ -1321,9 +1320,9 @@ nvptx_gen_vcast (rtx reg)
 
 struct wcast_data_t
 {
-  rtx base;
-  rtx ptr;
-  unsigned offset;
+  rtx base;  /* Register holding base addr of buffer.  */
+  rtx ptr;  /* Iteration var,  if needed.  */
+  unsigned offset; /* Offset into worker buffer.  */
 };
 
 /* Direction of the spill/fill and looping setup/teardown indicator.  */
@@ -2000,7 +1999,6 @@ nvptx_print_operand_address (FILE *file,
    A -- print an address space identifier for a MEM
    c -- print an opcode suffix for a comparison operator, including a type code
    f -- print a full reg even for something that must always be split
-   B -- print a memory barrier level specified by CONST_INT
    R -- print an address space specified by CONST_INT
    S -- print a shuffle kind specified by CONST_INT
    t -- print a type opcode suffix, promoting QImode to 32 bits
@@ -2044,15 +2042,6 @@ nvptx_print_operand (FILE *file, rtx x,
       }
       break;
 
-    case 'B':
-      {
-	unsigned kind = UINTVAL (x);
-	static const char *const kinds[] = 
-	  {"cta", "gl", "sys"};
-	fprintf (file, ".%s", kinds[kind]);
-      }
-      break;
-
     case 't':
       op_mode = nvptx_underlying_object_mode (x);
       fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, true));
@@ -2078,7 +2067,7 @@ nvptx_print_operand (FILE *file, rtx x,
 	fprintf (file, ".%s", kinds[kind]);
       }
       break;
-      
+
     case 'T':
       fprintf (file, "%d", GET_MODE_BITSIZE (GET_MODE (x)));
       break;
@@ -2329,9 +2318,8 @@ nvptx_reorg_subreg (void)
     }
 }
 
-/* Loop structure of the function.The entire function is described as
-   a NULL loop.  We should be able to extend this to represent
-   superblocks.  */
+/* Loop structure of the function.  The entire function is described
+   as a NULL loop.  */
 
 struct parallel
 {
@@ -2422,9 +2410,9 @@ nvptx_split_blocks (bb_insn_map_t *map)
     {
       bool seen_insn = false;
 
-      // Clear visited flag, for use by parallel locator  */
+      /* Clear visited flag, for use by parallel locator  */
       block->flags &= ~BB_VISITED;
-      
+
       FOR_BB_INSNS (block, insn)
 	{
 	  if (!INSN_P (insn))
@@ -3253,10 +3241,11 @@ nvptx_find_sese (auto_vec<basic_block> &
    the partitioned regions and (b) only propagating stack entries that
    are used.  The latter might be quite hard to determine.  */
 
+typedef rtx (*propagator_fn) (rtx, propagate_mask, unsigned, void *);
+
 static void
 nvptx_propagate (basic_block block, rtx_insn *insn, propagate_mask rw,
-		 rtx (*fn) (rtx, propagate_mask,
-			    unsigned, void *), void *data)
+		 propagator_fn fn, void *data)
 {
   bitmap live = DF_LIVE_IN (block);
   bitmap_iterator iterator;
@@ -3287,7 +3276,7 @@ nvptx_propagate (basic_block block, rtx_
 	  label = gen_label_rtx ();
 	  
 	  emit_insn (gen_rtx_SET (idx, GEN_INT (fs)));
-	  /* Allow worker function to initialize anything needed */
+	  /* Allow worker function to initialize anything needed.  */
 	  rtx init = fn (tmp, PM_loop_begin, fs, data);
 	  if (init)
 	    emit_insn (init);
@@ -3403,8 +3392,8 @@ nvptx_wpropagate (bool pre_p, basic_bloc
       rtx init = gen_rtx_SET (data.base, worker_bcast_sym);
       emit_insn_after (init, insn);
       
-      if (worker_bcast_hwm < data.offset)
-	worker_bcast_hwm = data.offset;
+      if (worker_bcast_size < data.offset)
+	worker_bcast_size = data.offset;
     }
 }
 
@@ -3473,7 +3462,8 @@ nvptx_single (unsigned mask, basic_block
       /* If this is a dummy insn, do nothing.  */
       switch (recog_memoized (head))
 	{
-	default:break;
+	default:
+	  break;
 	case CODE_FOR_nvptx_fork:
 	case CODE_FOR_nvptx_forked:
 	case CODE_FOR_nvptx_joining:
@@ -3541,8 +3531,8 @@ nvptx_single (unsigned mask, basic_block
 	  data.base = worker_bcast_sym;
 	  data.ptr = 0;
 
-	  if (worker_bcast_hwm < GET_MODE_SIZE (SImode))
-	    worker_bcast_hwm = GET_MODE_SIZE (SImode);
+	  if (worker_bcast_size < GET_MODE_SIZE (SImode))
+	    worker_bcast_size = GET_MODE_SIZE (SImode);
 
 	  data.offset = 0;
 	  emit_insn_before (nvptx_gen_wcast (pvar, PM_read, 0, &data),
@@ -3664,7 +3654,7 @@ nvptx_process_pars (parallel *par)
     }
 
   if (par->mask & GOMP_DIM_MASK (GOMP_DIM_MAX))
-    { /* No propagation needed for a call.  */ }
+    /* No propagation needed for a call.  */;
   else if (par->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
     {
       nvptx_wpropagate (false, par->forked_block, par->forked_insn);
@@ -3689,8 +3679,9 @@ nvptx_process_pars (parallel *par)
 static void
 nvptx_neuter_pars (parallel *par, unsigned modes, unsigned outer)
 {
-  unsigned me = par->mask
-    & (GOMP_DIM_MASK (GOMP_DIM_WORKER) | GOMP_DIM_MASK (GOMP_DIM_VECTOR));
+  unsigned me = (par->mask
+		 & (GOMP_DIM_MASK (GOMP_DIM_WORKER)
+		    | GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
   unsigned  skip_mask = 0, neuter_mask = 0;
   
   if (par->inner)
@@ -3699,9 +3690,9 @@ nvptx_neuter_pars (parallel *par, unsign
   for (unsigned mode = GOMP_DIM_WORKER; mode <= GOMP_DIM_VECTOR; mode++)
     {
       if ((outer | me) & GOMP_DIM_MASK (mode))
-	{ /* Mode is partitioned: no neutering.  */ }
+	{} /* Mode is partitioned: no neutering.  */
       else if (!(modes & GOMP_DIM_MASK (mode)))
-	{ /* Mode  is not used: nothing to do.  */ }
+	{} /* Mode  is not used: nothing to do.  */
       else if (par->inner_mask & GOMP_DIM_MASK (mode)
 	       || !par->forked_insn)
 	/* Partitioned in inner parallels, or we're not a partitioned
@@ -3713,7 +3704,7 @@ nvptx_neuter_pars (parallel *par, unsign
 	   parallel at this level.  */
 	skip_mask |= GOMP_DIM_MASK (mode);
       else
-	{ /* Parent will skip this parallel itself.  */ }
+	{} /* Parent will skip this parallel itself.  */
     }
 
   if (neuter_mask)
@@ -3831,8 +3822,9 @@ nvptx_reorg (void)
       delete pars;
     }
 
+  /* Replace subregs.  */
   nvptx_reorg_subreg ();
-  
+
   regstat_free_n_sets_and_refs ();
 
   df_finish_pass (true);
@@ -3933,7 +3925,7 @@ nvptx_record_offload_symbol (tree decl)
 	fprintf (asm_out_file, "\n");
       }
       break;
-  
+
     default:
       gcc_unreachable ();
     }
@@ -3967,17 +3959,17 @@ nvptx_file_end (void)
     nvptx_record_fndecl (decl, true);
   fputs (func_decls.str().c_str(), asm_out_file);
 
-  if (worker_bcast_hwm)
+  if (worker_bcast_size)
     {
       /* Define the broadcast buffer.  */
 
-      worker_bcast_hwm = (worker_bcast_hwm + worker_bcast_align - 1)
+      worker_bcast_size = (worker_bcast_size + worker_bcast_align - 1)
 	& ~(worker_bcast_align - 1);
       
       fprintf (asm_out_file, "// BEGIN VAR DEF: %s\n", worker_bcast_name);
       fprintf (asm_out_file, ".shared .align %d .u8 %s[%d];\n",
 	       worker_bcast_align,
-	       worker_bcast_name, worker_bcast_hwm);
+	       worker_bcast_name, worker_bcast_size);
     }
 
   if (worker_red_size)
@@ -4237,13 +4229,13 @@ nvptx_goacc_fork_join (gcall *call, cons
 
   /* We only care about worker and vector partitioning.  */
   if (axis < GOMP_DIM_WORKER)
-    return true;
+    return false;
 
   /* If the size is 1, there's no partitioning.  */
   if (dims[axis] == 1)
-    return true;
+    return false;
 
-  return false;
+  return true;
 }
 
 static tree
Index: doc/tm.texi
===================================================================
--- doc/tm.texi	(revision 229493)
+++ doc/tm.texi	(working copy)
@@ -5772,10 +5772,13 @@ or zero if unbounded.
 @end deftypefn
 
 @deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (gcall *@var{call}, const int @var{dims[]}, bool @var{is_fork})
-This hook should convert IFN_GOACC_FORK and IFN_GOACC_JOIN function
-calls to target-specific gimple.  It is executed during the oacc_xform
-pass.  It should return true, if the functions should be deleted.  The
-default hook returns true, if there are no RTL expanders for them.
+This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN
+function calls to target-specific gimple, or indicate whether they
+should be retained.  It is executed during the oacc_device_lower pass.
+It should return true, if the call should be retained.  It should
+return false, if it is to be deleted (either because target-specific
+gimple has been inserted before it, or there is no need for it).
+The default hook returns false, if there are no RTL expanders for them.
 @end deftypefn
 
 @deftypefn {Target Hook} void TARGET_GOACC_REDUCTION (gcall *@var{call})
Index: omp-low.c
===================================================================
--- omp-low.c	(revision 229493)
+++ omp-low.c	(working copy)
@@ -258,21 +258,21 @@ struct oacc_loop
 
 /*  Flags for an OpenACC loop.  */
 
-enum oacc_loop_flags
-  {
-    OLF_SEQ	= 1u << 0,  /* Explicitly sequential  */
-    OLF_AUTO	= 1u << 1,	/* Compiler chooses axes.  */
-    OLF_INDEPENDENT = 1u << 2,	/* Iterations are known independent.  */
-    OLF_GANG_STATIC = 1u << 3,	/* Gang partitioning is static (has op). */
-
-    /* Explicitly specified loop axes.  */
-    OLF_DIM_BASE = 4,
-    OLF_DIM_GANG   = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG),
-    OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER),
-    OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR),
+enum oacc_loop_flags {
+  OLF_SEQ	= 1u << 0,  /* Explicitly sequential  */
+  OLF_AUTO	= 1u << 1,	/* Compiler chooses axes.  */
+  OLF_INDEPENDENT = 1u << 2,	/* Iterations are known independent.  */
+  OLF_GANG_STATIC = 1u << 3,	/* Gang partitioning is static (has op). */
+
+  /* Explicitly specified loop axes.  */
+  OLF_DIM_BASE = 4,
+  OLF_DIM_GANG   = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG),
+  OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER),
+  OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR),
+
+  OLF_MAX = OLF_DIM_BASE + GOMP_DIM_MAX
+};
 
-    OLF_MAX = OLF_DIM_BASE + GOMP_DIM_MAX
-  };
 
 static splay_tree all_contexts;
 static int taskreg_nesting_level;
@@ -295,6 +295,8 @@ static gphi *find_phi_with_arg_on_edge (
       *handled_ops_p = false; \
       break;
 
+/* Return true if CTX corresponds to an oacc parallel region.  */
+
 static bool
 is_oacc_parallel (omp_context *ctx)
 {
@@ -507,6 +509,7 @@ is_combined_parallel (struct omp_region
   return region->is_combined_parallel;
 }
 
+
 /* Extract the header elements of parallel loop FOR_STMT and store
    them into *FD.  */
 
@@ -1609,6 +1612,7 @@ omp_copy_decl (tree var, copy_body_data
   return error_mark_node;
 }
 
+
 /* Debugging dumps for parallel regions.  */
 void dump_omp_region (FILE *, struct omp_region *, int);
 void debug_omp_region (struct omp_region *);
@@ -1746,6 +1750,7 @@ new_omp_context (gimple *stmt, omp_conte
       ctx->cb.transform_call_graph_edges = CB_CGE_MOVE;
       ctx->depth = 1;
     }
+
   ctx->cb.decl_map = new hash_map<tree, tree>;
 
   return ctx;
@@ -2924,12 +2929,15 @@ finish_taskreg_scan (omp_context *ctx)
     }
 }
 
+/* Find the enclosing offload context.  */
+
 static omp_context *
 enclosing_target_ctx (omp_context *ctx)
 {
-  while (ctx != NULL
-	 && gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET)
-    ctx = ctx->outer;
+  for (; ctx; ctx = ctx->outer)
+    if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET)
+      break;
+
   return ctx;
 }
 
@@ -2998,12 +3006,15 @@ check_oacc_kernel_gwv (gomp_for *stmt, o
   if (checking)
     {
       if (has_seq && (this_mask || has_auto))
-	error_at (gimple_location (stmt), "%<seq%> overrides other OpenACC loop specifiers");
+	error_at (gimple_location (stmt), "%<seq%> overrides other"
+		  " OpenACC loop specifiers");
       else if (has_auto && this_mask)
-	error_at (gimple_location (stmt), "%<auto%> conflicts with other OpenACC loop specifiers");
+	error_at (gimple_location (stmt), "%<auto%> conflicts with other"
+		  " OpenACC loop specifiers");
 
       if (this_mask & outer_mask)
-	error_at (gimple_location (stmt), "inner loop uses same  OpenACC parallelism as containing loop");
+	error_at (gimple_location (stmt), "inner loop uses same  OpenACC"
+		  " parallelism as containing loop");
     }
 
   return outer_mask | this_mask;
@@ -3049,7 +3060,7 @@ scan_omp_for (gomp_for *stmt, omp_contex
 
 	    if (check && OMP_CLAUSE_OPERAND (c, 0))
 	      error_at (gimple_location (stmt),
-			"argument not permitted on %<%s%> clause in"
+			"argument not permitted on %qs clause in"
 			" OpenACC %<parallel%>", check);
 	  }
 
@@ -3159,14 +3170,15 @@ scan_omp_target (gomp_target *stmt, omp_
     {
       TYPE_FIELDS (ctx->record_type)
 	= nreverse (TYPE_FIELDS (ctx->record_type));
-#ifdef ENABLE_CHECKING
-      tree field;
-      unsigned int align = DECL_ALIGN (TYPE_FIELDS (ctx->record_type));
-      for (field = TYPE_FIELDS (ctx->record_type);
-	   field;
-	   field = DECL_CHAIN (field))
-	gcc_assert (DECL_ALIGN (field) == align);
-#endif
+      if (flag_checking)
+	{
+	  tree field;
+	  unsigned int align = DECL_ALIGN (TYPE_FIELDS (ctx->record_type));
+	  for (field = TYPE_FIELDS (ctx->record_type);
+	       field;
+	       field = DECL_CHAIN (field))
+	    gcc_assert (DECL_ALIGN (field) == align);
+	}
       layout_type (ctx->record_type);
       if (offloaded)
 	fixup_child_record_type (ctx);
@@ -7144,6 +7156,149 @@ expand_omp_taskreg (struct omp_region *r
     update_ssa (TODO_update_ssa_only_virtuals);
 }
 
+/* Information about members of an OpenACC collapsed loop nest.  */
+
+struct oacc_collapse
+{
+  tree base;  /* Base value. */
+  tree iters; /* Number of steps.  */
+  tree step;  /* step size.  */
+};
+
+/* Helper for expand_oacc_for.  Determine collapsed loop information.
+   Fill in COUNTS array.  Emit any initialization code before GSI.
+   Return the calculated outer loop bound of BOUND_TYPE.  */
+
+static tree
+expand_oacc_collapse_init (const struct omp_for_data *fd,
+			   gimple_stmt_iterator *gsi,
+			   oacc_collapse *counts, tree bound_type)
+{
+  tree total = build_int_cst (bound_type, 1);
+  int ix;
+  
+  gcc_assert (integer_onep (fd->loop.step));
+  gcc_assert (integer_zerop (fd->loop.n1));
+
+  for (ix = 0; ix != fd->collapse; ix++)
+    {
+      const omp_for_data_loop *loop = &fd->loops[ix];
+
+      tree iter_type = TREE_TYPE (loop->v);
+      tree diff_type = iter_type;
+      tree plus_type = iter_type;
+
+      gcc_assert (loop->cond_code == fd->loop.cond_code);
+      
+      if (POINTER_TYPE_P (iter_type))
+	plus_type = sizetype;
+      if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
+	diff_type = signed_type_for (diff_type);
+
+      tree b = loop->n1;
+      tree e = loop->n2;
+      tree s = loop->step;
+      bool up = loop->cond_code == LT_EXPR;
+      tree dir = build_int_cst (diff_type, up ? +1 : -1);
+      bool negating;
+      tree expr;
+
+      b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
+				    true, GSI_SAME_STMT);
+      e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
+				    true, GSI_SAME_STMT);
+
+      /* Convert the step, avoiding possible unsigned->signed overflow. */
+      negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
+      if (negating)
+	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
+      s = fold_convert (diff_type, s);
+      if (negating)
+	s = fold_build1 (NEGATE_EXPR, diff_type, s);
+      s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
+				    true, GSI_SAME_STMT);
+
+      /* Determine the range, avoiding possible unsigned->signed overflow. */
+      negating = !up && TYPE_UNSIGNED (iter_type);
+      expr = fold_build2 (MINUS_EXPR, plus_type,
+			  fold_convert (plus_type, negating ? b : e),
+			  fold_convert (plus_type, negating ? e : b));
+      expr = fold_convert (diff_type, expr);
+      if (negating)
+	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
+      tree range = force_gimple_operand_gsi
+	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
+
+      /* Determine number of iterations.  */
+      expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
+      expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
+      expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
+
+      tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
+					     true, GSI_SAME_STMT);
+
+      counts[ix].base = b;
+      counts[ix].iters = iters;
+      counts[ix].step = s;
+
+      total = fold_build2 (MULT_EXPR, bound_type, total,
+			   fold_convert (bound_type, iters));
+    }
+
+  return total;
+}
+
+/* Emit initializers for collapsed loop members.  IVAR is the outer
+   loop iteration variable, from which collapsed loop iteration values
+   are  calculated.  COUNTS array has been initialized by
+   expand_oacc_collapse_inits.  */
+
+static void
+expand_oacc_collapse_vars (const struct omp_for_data *fd,
+			   gimple_stmt_iterator *gsi,
+			   const oacc_collapse *counts, tree ivar)
+{
+  tree ivar_type = TREE_TYPE (ivar);
+
+  /*  The most rapidly changing iteration variable is the innermost
+      one.  */
+  for (int ix = fd->collapse; ix--;)
+    {
+      const omp_for_data_loop *loop = &fd->loops[ix];
+      const oacc_collapse *collapse = &counts[ix];
+      tree iter_type = TREE_TYPE (loop->v);
+      tree diff_type = TREE_TYPE (collapse->step);
+      tree plus_type = iter_type;
+      enum tree_code plus_code = PLUS_EXPR;
+      tree expr;
+
+      if (POINTER_TYPE_P (iter_type))
+	{
+	  plus_code = POINTER_PLUS_EXPR;
+	  plus_type = sizetype;
+	}
+
+      expr = build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
+		     fold_convert (ivar_type, collapse->iters));
+      expr = build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
+		     collapse->step);
+      expr = build2 (plus_code, iter_type, collapse->base,
+		     fold_convert (plus_type, expr));
+      expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
+				       true, GSI_SAME_STMT);
+      gassign *ass = gimple_build_assign (loop->v, expr);
+      gsi_insert_before (gsi, ass, GSI_SAME_STMT);
+
+      if (ix)
+	{
+	  expr = build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
+			 fold_convert (ivar_type, collapse->iters));
+	  ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
+					   true, GSI_SAME_STMT);
+	}
+    }
+}
+
 
 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
    of the combined collapse > 1 loop constructs, generate code like:
@@ -10703,149 +10858,6 @@ expand_omp_taskloop_for_inner (struct om
     }
 }
 
-/* Information about members of an OpenACC collapsed loop nest.  */
-
-struct oacc_collapse
-{
-  tree base;  /* Base value. */
-  tree iters; /* Number of steps.  */
-  tree step;  /* step size.  */
-};
-
-/* Helper for expand_oacc_for.  Determine collapsed loop information.
-   Fill in COUNTS array.  Emit any initialization code before GSI.
-   Return the calculated outer loop bound of BOUND_TYPE.  */
-
-static tree
-expand_oacc_collapse_init (const struct omp_for_data *fd,
-			   gimple_stmt_iterator *gsi,
-			   oacc_collapse *counts, tree bound_type)
-{
-  tree total = build_int_cst (bound_type, 1);
-  int ix;
-  
-  gcc_assert (integer_onep (fd->loop.step));
-  gcc_assert (integer_zerop (fd->loop.n1));
-
-  for (ix = 0; ix != fd->collapse; ix++)
-    {
-      const omp_for_data_loop *loop = &fd->loops[ix];
-
-      tree iter_type = TREE_TYPE (loop->v);
-      tree diff_type = iter_type;
-      tree plus_type = iter_type;
-
-      gcc_assert (loop->cond_code == fd->loop.cond_code);
-      
-      if (POINTER_TYPE_P (iter_type))
-	plus_type = sizetype;
-      if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
-	diff_type = signed_type_for (diff_type);
-
-      tree b = loop->n1;
-      tree e = loop->n2;
-      tree s = loop->step;
-      bool up = loop->cond_code == LT_EXPR;
-      tree dir = build_int_cst (diff_type, up ? +1 : -1);
-      bool negating;
-      tree expr;
-
-      b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
-				    true, GSI_SAME_STMT);
-      e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
-				    true, GSI_SAME_STMT);
-
-      /* Convert the step, avoiding possible unsigned->signed overflow. */
-      negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
-      if (negating)
-	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
-      s = fold_convert (diff_type, s);
-      if (negating)
-	s = fold_build1 (NEGATE_EXPR, diff_type, s);
-      s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
-				    true, GSI_SAME_STMT);
-
-      /* Determine the range, avoiding possible unsigned->signed overflow. */
-      negating = !up && TYPE_UNSIGNED (iter_type);
-      expr = fold_build2 (MINUS_EXPR, plus_type,
-			  fold_convert (plus_type, negating ? b : e),
-			  fold_convert (plus_type, negating ? e : b));
-      expr = fold_convert (diff_type, expr);
-      if (negating)
-	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
-      tree range = force_gimple_operand_gsi
-	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
-
-      /* Determine number of iterations.  */
-      expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
-      expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
-      expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
-
-      tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
-					     true, GSI_SAME_STMT);
-
-      counts[ix].base = b;
-      counts[ix].iters = iters;
-      counts[ix].step = s;
-
-      total = fold_build2 (MULT_EXPR, bound_type, total,
-			   fold_convert (bound_type, iters));
-    }
-
-  return total;
-}
-
-/* Emit initializers for collapsed loop members.  IVAR is the outer
-   loop iteration variable, from which collapsed loop iteration values
-   are  calculated.  COUNTS array has been initialized by
-   expand_oacc_collapse_inits.  */
-
-static void
-expand_oacc_collapse_vars (const struct omp_for_data *fd,
-			   gimple_stmt_iterator *gsi,
-			   const oacc_collapse *counts, tree ivar)
-{
-  tree ivar_type = TREE_TYPE (ivar);
-
-  /*  The most rapidly changing iteration variable is the innermost
-      one.  */
-  for (int ix = fd->collapse; ix--;)
-    {
-      const omp_for_data_loop *loop = &fd->loops[ix];
-      const oacc_collapse *collapse = &counts[ix];
-      tree iter_type = TREE_TYPE (loop->v);
-      tree diff_type = TREE_TYPE (collapse->step);
-      tree plus_type = iter_type;
-      enum tree_code plus_code = PLUS_EXPR;
-      tree expr;
-
-      if (POINTER_TYPE_P (iter_type))
-	{
-	  plus_code = POINTER_PLUS_EXPR;
-	  plus_type = sizetype;
-	}
-
-      expr = build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
-		     fold_convert (ivar_type, collapse->iters));
-      expr = build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
-		     collapse->step);
-      expr = build2 (plus_code, iter_type, collapse->base,
-		     fold_convert (plus_type, expr));
-      expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
-				       true, GSI_SAME_STMT);
-      gassign *ass = gimple_build_assign (loop->v, expr);
-      gsi_insert_before (gsi, ass, GSI_SAME_STMT);
-
-      if (ix)
-	{
-	  expr = build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
-			 fold_convert (ivar_type, collapse->iters));
-	  ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
-					   true, GSI_SAME_STMT);
-	}
-    }
-}
-
 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
    partitioned loop.  The lowering here is abstracted, in that the
    loop parameters are passed through internal functions, which are
@@ -19789,19 +19801,9 @@ default_goacc_fork_join (gcall *ARG_UNUS
 			 const int *ARG_UNUSED (dims), bool is_fork)
 {
   if (is_fork)
-    {
-#ifndef HAVE_oacc_fork
-      return true;
-#endif
-    }
+    return targetm.have_oacc_fork ();
   else
-    {
-#ifndef HAVE_oacc_join
-      return true;
-#endif
-    }
-
-  return false;
+    return targetm.have_oacc_join ();
 }
 
 /* Default goacc.reduction early expander.
@@ -19948,7 +19950,7 @@ execute_oacc_device_lower ()
 		case IFN_UNIQUE_OACC_JOIN:
 		  if (integer_minus_onep (gimple_call_arg (call, 2)))
 		    remove = true;
-		  else if (targetm.goacc.fork_join
+		  else if (!targetm.goacc.fork_join
 			   (call, dims, code == IFN_UNIQUE_OACC_FORK))
 		    remove = true;
 		  break;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]