This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[committed, gomp4, 2/3] Handle sequential code in kernels region


On 12/10/15 19:12, Tom de Vries wrote:
Hi,

I've committed the following patch series.

      1    Add get_bbs_in_oacc_kernels_region
      2    Handle sequential code in kernels region
      3    Handle sequential code in kernels region - Testcases

The patch series adds detection of whether sequential code (that is,
code in the oacc kernels region before and after the loop that is to be
parallelized), is safe to execute in parallel.

Bootstrapped and reg-tested on x86_64.

I'll post the patches individually, in reply to this email.

This patch checks in parloops, for each non-loop stmt in the oacc kernels region, that it's not a load aliasing with a store anywhere in the region, and vice versa.

An exception are loads and stores for reductions, which are later-on transformed into an atomic update.

Thanks,
- Tom
Handle sequential code in kernels region

2015-10-12  Tom de Vries  <tom@codesourcery.com>

	* omp-low.c (lower_omp_for): Don't call lower_oacc_head_tail for oacc
	kernels regions.
	* tree-parloops.c (try_create_reduction_list): Initialize keep_res
	field.
	(dead_load_p, ref_conflicts_with_region, oacc_entry_exit_ok_1)
	(oacc_entry_exit_ok): New function.
	(parallelize_loops): Call oacc_entry_exit_ok.
---
 gcc/omp-low.c       |   3 +-
 gcc/tree-parloops.c | 245 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 247 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index f6e0247..e700dd1 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11949,7 +11949,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   /* Once lowered, extract the bounds and clauses.  */
   extract_omp_for_data (stmt, &fd, NULL);
 
-  if (is_gimple_omp_oacc (ctx->stmt))
+  if (is_gimple_omp_oacc (ctx->stmt)
+      && !ctx_in_oacc_kernels_region (ctx))
     lower_oacc_head_tail (gimple_location (stmt),
 			  gimple_omp_for_clauses (stmt),
 			  &oacc_head, &oacc_tail, ctx);
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 4b67793..d4eb32a 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -58,6 +58,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "cgraph.h"
 #include "tree-ssa.h"
 #include "params.h"
+#include "tree-ssa-alias.h"
+#include "tree-eh.h"
 
 /* This pass tries to distribute iterations of loops into several threads.
    The implementation is straightforward -- for each loop we test whether its
@@ -2672,6 +2674,7 @@ try_create_reduction_list (loop_p loop,
 			 "  FAILED: it is not a part of reduction.\n");
 	      return false;
 	    }
+	  red->keep_res = phi;
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    {
 	      fprintf (dump_file, "reduction phi is  ");
@@ -2764,6 +2767,240 @@ try_create_reduction_list (loop_p loop,
   return true;
 }
 
+/* Return true if STMT is a load of which the result is unused, and can be
+   safely deleted.  */
+
+static bool
+dead_load_p (gimple *stmt)
+{
+  if (!gimple_assign_load_p (stmt))
+    return false;
+
+  tree lhs = gimple_assign_lhs (stmt);
+  return (TREE_CODE (lhs) == SSA_NAME
+	  && has_zero_uses (lhs)
+	  && !gimple_has_side_effects (stmt)
+	  && !stmt_could_throw_p (stmt));
+}
+
+static bool
+ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
+			   bool ref_is_store, vec<basic_block> region_bbs,
+			   unsigned int i, gimple *skip_stmt)
+{
+  basic_block bb = region_bbs[i];
+  gsi_next (&gsi);
+
+  while (true)
+    {
+      for (; !gsi_end_p (gsi);
+	   gsi_next (&gsi))
+	{
+	  gimple *stmt = gsi_stmt (gsi);
+	  if (stmt == skip_stmt)
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "skipping reduction store: ");
+		  print_gimple_stmt (dump_file, stmt, 0, 0);
+		}
+	      continue;
+	    }
+
+	  if (!gimple_vdef (stmt)
+	      && !gimple_vuse (stmt))
+	    continue;
+
+	  if (ref_is_store)
+	    {
+	      if (dead_load_p (stmt))
+		{
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "skipping dead load: ");
+		      print_gimple_stmt (dump_file, stmt, 0, 0);
+		    }
+		  continue;
+		}
+
+	      if (ref_maybe_used_by_stmt_p (stmt, ref))
+		{
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "Stmt ");
+		      print_gimple_stmt (dump_file, stmt, 0, 0);
+		    }
+		  return true;
+		}
+	    }
+	  else
+	    {
+	      if (stmt_may_clobber_ref_p_1 (stmt, ref))
+		{
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "Stmt ");
+		      print_gimple_stmt (dump_file, stmt, 0, 0);
+		    }
+		  return true;
+		}
+	    }
+	}
+      i++;
+      if (i == region_bbs.length ())
+	break;
+      bb = region_bbs[i];
+      gsi = gsi_start_bb (bb);
+    }
+
+  return false;
+}
+
+static bool
+oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
+		      tree omp_data_i,
+		      reduction_info_table_type *reduction_list)
+{
+  unsigned i;
+  basic_block bb;
+  FOR_EACH_VEC_ELT (region_bbs, i, bb)
+    {
+      if (bitmap_bit_p (in_loop_bbs, bb->index))
+	continue;
+
+      gimple_stmt_iterator gsi;
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+	   gsi_next (&gsi))
+	{
+	  gimple *stmt = gsi_stmt (gsi);
+	  gimple *skip_stmt = NULL;
+
+	  if (is_gimple_debug (stmt)
+	      || gimple_code (stmt) == GIMPLE_COND)
+	    continue;
+
+	  ao_ref ref;
+	  bool ref_is_store = false;
+	  if (gimple_assign_load_p (stmt))
+	    {
+	      tree rhs = gimple_assign_rhs1 (stmt);
+	      tree base = get_base_address (rhs);
+	      if (TREE_CODE (base) == MEM_REF
+		  && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
+		continue;
+
+	      /* By testing for dead loads (here and in
+		 ref_conflicts_with_region), we avoid having to run pass_dce
+		 before pass_parallelize_loops_oacc_kernels.  */
+	      if (dead_load_p (stmt))
+		{
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "skipping dead load: ");
+		      print_gimple_stmt (dump_file, stmt, 0, 0);
+		    }
+		  continue;
+		}
+
+	      tree lhs = gimple_assign_lhs (stmt);
+	      if (TREE_CODE (lhs) == SSA_NAME
+		  && has_single_use (lhs))
+		{
+		  use_operand_p use_p;
+		  gimple *use_stmt;
+		  single_imm_use (lhs, &use_p, &use_stmt);
+		  if (gimple_code (use_stmt) == GIMPLE_PHI)
+		    {
+		      struct reduction_info *red;
+		      red = reduction_phi (reduction_list, use_stmt);
+		      tree val = PHI_RESULT (red->keep_res);
+		      if (has_single_use (val))
+			{
+			  single_imm_use (val, &use_p, &use_stmt);
+			  if (gimple_store_p (use_stmt))
+			    {
+			      skip_stmt = use_stmt;
+			      if (dump_file)
+				{
+				  fprintf (dump_file, "found reduction load: ");
+				  print_gimple_stmt (dump_file, stmt, 0, 0);
+				}
+			    }
+			}
+		    }
+		}
+
+	      ao_ref_init (&ref, rhs);
+	    }
+	  else if (gimple_store_p (stmt))
+	    {
+	      ao_ref ref;
+	      ao_ref_init (&ref, gimple_assign_lhs (stmt));
+	      ref_is_store = true;
+	    }
+	  else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
+	    continue;
+	  else if (gimple_stmt_omp_data_i_init_p (stmt))
+	    continue;
+	  else if (!gimple_has_side_effects (stmt)
+		   && !gimple_could_trap_p (stmt)
+		   && !stmt_could_throw_p (stmt)
+		   && !gimple_vdef (stmt)
+		   && !gimple_vuse (stmt))
+	    continue;
+	  else
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "Unhandled stmt in entry/exit: ");
+		  print_gimple_stmt (dump_file, stmt, 0, 0);
+		}
+	      return false;
+	    }
+
+	  if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
+					 i, skip_stmt))
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "conflicts with entry/exit stmt: ");
+		  print_gimple_stmt (dump_file, stmt, 0, 0);
+		}
+	      return false;
+	    }
+	}
+    }
+
+  return true;
+}
+
+static bool
+oacc_entry_exit_ok (struct loop *loop, basic_block region_entry,
+		    reduction_info_table_type *reduction_list)
+{
+  basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
+  basic_block region_exit
+    = get_oacc_kernels_region_exit (single_succ (region_entry));
+  vec<basic_block> region_bbs
+    = get_bbs_in_oacc_kernels_region (region_entry, region_exit);
+  tree omp_data_i = get_omp_data_i (region_entry);
+  gcc_assert (omp_data_i != NULL_TREE);
+
+  bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
+  bitmap_clear (in_loop_bbs);
+  for (unsigned int i = 0; i < loop->num_nodes; i++)
+    bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
+
+  bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, omp_data_i,
+				   reduction_list);
+
+  free (loop_bbs);
+
+  BITMAP_FREE (in_loop_bbs);
+
+  return res;
+}
+
 /* Detect parallel loops and generate parallel code using libgomp
    primitives.  Returns true if some loop was parallelized, false
    otherwise.  */
@@ -2901,6 +3138,14 @@ parallelize_loops (bool oacc_kernels_p)
 	    continue;
 	}
 
+      if (oacc_kernels_p
+	  && !oacc_entry_exit_ok (loop, region_entry, &reduction_list))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "entry/exit not ok: FAILED\n");
+	  continue;
+	}
+
       changed = true;
       /* Skip inner loop(s) of parallelized loop.  */
       skip_loop = loop->inner;
-- 
1.9.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]