This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch][gomp4] openacc loops


This patch, which is derived from Ilmir Usmanov's work posted here
<https://gcc.gnu.org/ml/gcc-patches/2014-04/msg00027.html>, implements
the loop directive in openacc. The original patch is mostly intact,
however, I did disable support for do concurrent loops since openacc
2.0a supports fortran up to fortran 2003. Furthermore, in order to make
the patch yield more interesting results, I've also enabled the private
clause. Is this patch ok for the gomp-4_0-branch?

One item on my to do list is adding support for subarrays in openacc in
fortran. So far I've got Ilmir's patch
<https://gcc.gnu.org/ml/gcc-patches/2014-05/msg01832.html> to work with
some local arrays, but not with allocatable arrays. I saw some chatter
on IRC this morning regarding array pointers and allocatable arrays. I'm
curious about how aliasing is going to be detected. Is that going to be
handled inside libgomp or by the compiler? Eg, consider a subroutine
which takes in to allocatable arrays as parameters, a and b. What
happens when a == b? We don't want to have two different copies of
whatever a and b point to on the target.

Thanks,
Cesar
2014-06-03  Ilmir Usmanov  <i.usmanov@samsung.com>
	    Cesar Philippidis  <cesar@codesourcery.com>	

	gcc/
	* c/c-parser.c (c_parser_oacc_all_clauses): Update handling for 
	OMP_CLAUSE_COLLAPSE and OMP_CLAUSE_PRIVATE.
	(c_parser_oacc_kernels): Likewise.
	(c_parser_omp_for_loop): Likewise.
	* gimple.h (is_gimple_omp_oacc_specifically): Likewise.
	* omp-low.c (scan_sharing_clauses): Likewise.
	* fortran/trans-openmp.c (gfc_trans_oacc_loop): New function.
        (gfc_trans_oacc_combined_directive): Call it.
        (gfc_trans_oacc_directive): Likewise.
        (gfc_trans_oacc_loop_generate_for): New helper function.
        (gfc_trans_oacc_loop_generate_mask_conds): Likewise.

	gcc/testsuite/
	* c-c++-common/goacc/collapse-1.c: New test.
	* gfortran.dg/goacc/loop-4.f95: Likewise.
	* gfortran.dg/goacc/loop-tree.f95: Likewise.

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index e20348e..7b3f52c 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -11228,6 +11228,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask,
 
       switch (c_kind)
 	{
+	case PRAGMA_OMP_CLAUSE_COLLAPSE:
+	  clauses = c_parser_omp_clause_collapse (parser, clauses);
+	  c_name = "collapse";
+	  break;
 	case PRAGMA_OMP_CLAUSE_COPY:
 	  clauses = c_parser_oacc_data_clause (parser, c_kind, clauses);
 	  c_name = "copy";
@@ -11648,7 +11652,7 @@ c_parser_oacc_kernels (location_t loc, c_parser *parser, char *p_name)
 */
 
 #define OACC_LOOP_CLAUSE_MASK						\
-	(OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NONE)
+	(OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_COLLAPSE)
 
 static tree
 c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name)
@@ -12217,8 +12221,8 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
   for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
     if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
       {
-	gcc_assert (code != OACC_LOOP);
-      collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl));
+	//gcc_assert (code != OACC_LOOP);
+	collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl));
       }
 
   gcc_assert (collapse >= 1);
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 08f6faa..2f0d498 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -1856,11 +1856,237 @@ typedef struct dovar_init_d {
   tree init;
 } dovar_init;
 
+/* Helper function to generate a single for loop.  */
+static void
+gfc_trans_oacc_loop_generate_for (stmtblock_t *pblock, gfc_se *se, 
+				  gfc_expr *var_expr, gfc_expr *start_expr,
+				  gfc_expr *end_expr, gfc_expr *step_expr,
+				  int i, tree *init, tree *cond, tree *incr,
+				  vec<dovar_init>* inits)
+{
+  int simple = 0;
+  tree dovar, from, to, step, type, tmp, count = NULL_TREE;
+
+  /* Evaluate all the expressions.  */
+  gfc_init_se (se, NULL);
+  gfc_conv_expr_lhs (se, var_expr);
+  gfc_add_block_to_block (pblock, &se->pre);
+  dovar = se->expr;
+  type = TREE_TYPE (dovar);
+  gcc_assert (TREE_CODE (type) == INTEGER_TYPE);
+
+  gfc_init_se (se, NULL);
+  gfc_conv_expr_val (se, start_expr);
+  gfc_add_block_to_block (pblock, &se->pre);
+  from = gfc_evaluate_now (se->expr, pblock);
+
+  gfc_init_se (se, NULL);
+  gfc_conv_expr_val (se, end_expr);
+  gfc_add_block_to_block (pblock, &se->pre);
+  to = gfc_evaluate_now (se->expr, pblock);
+
+  gfc_init_se (se, NULL);
+  gfc_conv_expr_val (se, step_expr);
+  gfc_add_block_to_block (pblock, &se->pre);
+  step = gfc_evaluate_now (se->expr, pblock);
+
+  /* Special case simple loops.  */
+  if (TREE_CODE (dovar) == VAR_DECL)
+    {
+      if (integer_onep (step))
+	simple = 1;
+      else if (tree_int_cst_equal (step, integer_minus_one_node))
+	simple = -1;
+    }
+
+  /* Loop body.  */
+  if (simple)
+    {
+      TREE_VEC_ELT (*init, i) = build2_v (MODIFY_EXPR, dovar, from);
+      /* The condition should not be folded.  */
+      TREE_VEC_ELT (*cond, i) = build2_loc (input_location, simple > 0
+					    ? LE_EXPR : GE_EXPR,
+					    boolean_type_node, dovar, to);
+      TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location, PLUS_EXPR,
+						 type, dovar, step);
+      TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location,
+						 MODIFY_EXPR,
+						 type, dovar,
+						 TREE_VEC_ELT (*incr, i));
+    }
+  else
+    {
+      /* STEP is not 1 or -1.  Use:
+	 for (count = 0; count < (to + step - from) / step; count++)
+	   {
+	     dovar = from + count * step;
+	     body;
+	   cycle_label:;
+	   }  */
+      tmp = fold_build2_loc (input_location, MINUS_EXPR, type, step, from);
+      tmp = fold_build2_loc (input_location, PLUS_EXPR, type, to, tmp);
+      tmp = fold_build2_loc (input_location, TRUNC_DIV_EXPR, type, tmp,
+			     step);
+      tmp = gfc_evaluate_now (tmp, pblock);
+      count = gfc_create_var (type, "count");
+      TREE_VEC_ELT (*init, i) = build2_v (MODIFY_EXPR, count,
+					 build_int_cst (type, 0));
+      /* The condition should not be folded.  */
+      TREE_VEC_ELT (*cond, i) = build2_loc (input_location, LT_EXPR,
+					    boolean_type_node,
+					    count, tmp);
+      TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location, PLUS_EXPR,
+						 type, count,
+						 build_int_cst (type, 1));
+      TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location,
+						 MODIFY_EXPR, type, count,
+						 TREE_VEC_ELT (*incr, i));
+
+      /* Initialize DOVAR.  */
+      tmp = fold_build2_loc (input_location, MULT_EXPR, type, count, step);
+      tmp = fold_build2_loc (input_location, PLUS_EXPR, type, from, tmp);
+      dovar_init e = {dovar, tmp};
+      inits->safe_push (e);
+    }
+}
+
+/* Recursively generate conditional expressions.  */
+static tree
+gfc_trans_oacc_loop_generate_mask_conds (gfc_code *code, int collapse)
+{
+  if (collapse > 1)
+    return gfc_trans_oacc_loop_generate_mask_conds (code->block->next, 
+						    collapse - 1);
+  else
+    return gfc_trans_omp_code (code->block->next, true);
+}
+
+static tree
+gfc_trans_oacc_loop (gfc_code *code, stmtblock_t *pblock,
+		     gfc_omp_clauses *loop_clauses)
+{
+  gfc_se se;
+  tree init, cond, incr, stmt, cycle_label, tmp, omp_clauses;
+  stmtblock_t block;
+  stmtblock_t body;
+  gfc_omp_clauses *clauses = code->ext.omp_clauses;
+  int i, collapse = clauses->collapse;
+  vec<dovar_init> inits = vNULL;
+  dovar_init *di;
+  unsigned ix;
+  gfc_code *old_code;
+
+  /* DO CONCURRENT specific vars.  */
+  int nforloops = 0;
+  int current_for = 0;
+
+  if (collapse <= 0)
+    collapse = 1;
+
+  code = code->block->next;
+
+  if (code->op == EXEC_DO_CONCURRENT)
+    gfc_error ("!$ACC LOOP directive is unsupported on DO CONCURRENT %L",
+	       &code->loc);
+  
+  gcc_assert (code->op == EXEC_DO);
+
+  if (pblock == NULL)
+    {
+      gfc_start_block (&block);
+      pblock = &block;
+    }
+
+  /* Calculate number of required for loops.  */
+  old_code = code;
+  for (i = 0; i < collapse; i++)
+    {
+      if (code->op == EXEC_DO)
+	nforloops++;
+      else 
+	gcc_unreachable ();
+      code = code->block->next;
+    }
+  code = old_code;
+
+  /* Set the number of required for loops for collapse.  */
+  /* FIXME: this is probably correct, but OMP_CLAUSE_COLLAPSE isn't supported
+     yet.  */
+  loop_clauses->collapse = nforloops;
+
+  omp_clauses = gfc_trans_omp_clauses (pblock, loop_clauses, code->loc);
+
+  init = make_tree_vec (nforloops);
+  cond = make_tree_vec (nforloops);
+  incr = make_tree_vec (nforloops);
+
+  for (i = 0; i < collapse; i++)
+    {
+      if (code->op == EXEC_DO)
+	gfc_trans_oacc_loop_generate_for (pblock, &se, code->ext.iterator->var,
+					  code->ext.iterator->start, 
+					  code->ext.iterator->end,
+					  code->ext.iterator->step,
+					  current_for++, &init, &cond, &incr,
+					  &inits);
+      else
+	gcc_unreachable ();
+      if (i + 1 < collapse)
+	code = code->block->next;
+    }
+
+  if (pblock != &block)
+    {
+      pushlevel ();
+      gfc_start_block (&block);
+    }
+
+  gfc_start_block (&body);
+
+  /* Generate complicated dovars.  */
+  FOR_EACH_VEC_ELT (inits, ix, di)
+    gfc_add_modify (&body, di->var, di->init);
+  inits.release ();
+
+  /* Cycle statement is implemented with a goto.  Exit statement must not be
+     present for this loop.  */
+  cycle_label = gfc_build_label_decl (NULL_TREE);
+
+  /* Put these labels where they can be found later.  */
+
+  code->cycle_label = cycle_label;
+  code->exit_label = NULL_TREE;
+
+  /* Main loop body.  */
+  tmp = gfc_trans_oacc_loop_generate_mask_conds (old_code, collapse);
+  gfc_add_expr_to_block (&body, tmp);
+
+  /* Label for cycle statements (if needed).  */
+  if (TREE_USED (cycle_label))
+    {
+      tmp = build1_v (LABEL_EXPR, cycle_label);
+      gfc_add_expr_to_block (&body, tmp);
+    }
+
+  /* End of loop body.  */
+  stmt = make_node (OACC_LOOP);
+
+  TREE_TYPE (stmt) = void_type_node;
+  OMP_FOR_BODY (stmt) = gfc_finish_block (&body);
+  OMP_FOR_CLAUSES (stmt) = omp_clauses;
+  OMP_FOR_INIT (stmt) = init;
+  OMP_FOR_COND (stmt) = cond;
+  OMP_FOR_INCR (stmt) = incr;
+  gfc_add_expr_to_block (&block, stmt);
+
+  return gfc_finish_block (&block);
+}
+
 /* parallel loop and kernels loop. */
 static tree
 gfc_trans_oacc_combined_directive (gfc_code *code)
 {
-  stmtblock_t block;
+  stmtblock_t block, *pblock = NULL;
   gfc_omp_clauses construct_clauses, loop_clauses;
   tree stmt, oacc_clauses = NULL_TREE;
   enum tree_code construct_code;
@@ -1899,11 +2125,21 @@ gfc_trans_oacc_combined_directive (gfc_code *code)
       oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses,
 					    code->loc);
     }
-    
-  gfc_error ("!$ACC LOOP directive not implemented yet %L", &code->loc);
-  stmt = gfc_trans_omp_code (code->block->next, true);
+  if (!loop_clauses.seq)
+    pblock = &block;
+  else
+    pushlevel ();
+  stmt = gfc_trans_oacc_loop (code, pblock, &loop_clauses);
+  if (TREE_CODE (stmt) != BIND_EXPR)
+    stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+  else
+    poplevel (0, 0);
   stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
 		     oacc_clauses);
+  if (code->op == EXEC_OACC_KERNELS_LOOP)
+    OACC_KERNELS_COMBINED (stmt) = 1;
+  else
+    OACC_PARALLEL_COMBINED (stmt) = 1;
   gfc_add_expr_to_block (&block, stmt);
   return gfc_finish_block (&block);
 }
@@ -2763,8 +2999,7 @@ gfc_trans_oacc_directive (gfc_code *code)
     case EXEC_OACC_HOST_DATA:
       return gfc_trans_oacc_construct (code);
     case EXEC_OACC_LOOP:
-      gfc_error ("!$ACC LOOP directive not implemented yet %L", &code->loc);
-      return NULL_TREE;
+      return gfc_trans_oacc_loop (code, NULL, code->ext.omp_clauses);
     case EXEC_OACC_UPDATE:
     case EXEC_OACC_WAIT:
     case EXEC_OACC_CACHE:
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 60b4896..13486ca 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -5809,15 +5809,25 @@ is_gimple_omp (const_gimple stmt)
    need any special handling for OpenACC.  */
 
 static inline bool
-is_gimple_omp_oacc_specifically (const_gimple stmt)
+is_gimple_omp_oacc_specifically (const_gimple stmt, 
+				 enum omp_clause_code code = OMP_CLAUSE_ERROR)
 {
   gcc_assert (is_gimple_omp (stmt));
   switch (gimple_code (stmt))
     {
     case GIMPLE_OACC_KERNELS:
     case GIMPLE_OACC_PARALLEL:
-      return true;
+      switch (code)
+	{
+	case OMP_CLAUSE_COLLAPSE:
+	case OMP_CLAUSE_PRIVATE:
+	  return false;
+	default:
+	  return true;
+	}
     case GIMPLE_OMP_FOR:
+      if (code == OMP_CLAUSE_COLLAPSE || code == OMP_CLAUSE_PRIVATE)
+	return false;
       switch (gimple_omp_for_kind (stmt))
 	{
 	case GF_OMP_FOR_KIND_OACC_LOOP:
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 3e282c0..2d53db2 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -1534,7 +1534,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
       switch (OMP_CLAUSE_CODE (c))
 	{
 	case OMP_CLAUSE_PRIVATE:
-	  gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt));
+	  gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt,
+							OMP_CLAUSE_CODE (c)));
 	  decl = OMP_CLAUSE_DECL (c);
 	  if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
 	    goto do_private;
@@ -1762,7 +1763,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 		}
 	    }
 	  break;
-
 	case OMP_CLAUSE_NOWAIT:
 	case OMP_CLAUSE_ORDERED:
 	case OMP_CLAUSE_COLLAPSE:
@@ -1770,7 +1770,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 	case OMP_CLAUSE_MERGEABLE:
 	case OMP_CLAUSE_PROC_BIND:
 	case OMP_CLAUSE_SAFELEN:
-	  gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt));
+	  gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt,
+							OMP_CLAUSE_CODE (c)));
 	  break;
 
 	case OMP_CLAUSE_ALIGNED:
@@ -1817,13 +1818,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 	case OMP_CLAUSE_PRIVATE:
 	case OMP_CLAUSE_FIRSTPRIVATE:
 	case OMP_CLAUSE_REDUCTION:
-	  if (is_gimple_omp_oacc_specifically (ctx->stmt))
-	    {
-	      sorry ("clause not supported yet");
-	      break;
-	    }
 	case OMP_CLAUSE_LINEAR:
-	  gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt));
+	  gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt,
+							OMP_CLAUSE_CODE (c)));
 	  decl = OMP_CLAUSE_DECL (c);
 	  if (is_variable_sized (decl))
 	    install_var_local (decl, ctx);
@@ -1896,6 +1893,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 	      sorry ("clause not supported yet");
 	      break;
 	    }
+	  break;
 	case OMP_CLAUSE_COPYPRIVATE:
 	case OMP_CLAUSE_COPYIN:
 	case OMP_CLAUSE_DEFAULT:
@@ -1918,7 +1916,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 	case OMP_CLAUSE__LOOPTEMP_:
 	case OMP_CLAUSE_TO:
 	case OMP_CLAUSE_FROM:
-	  gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt));
+	  gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt,
+							OMP_CLAUSE_CODE (c)));
 	case OMP_CLAUSE_NUM_GANGS:
 	case OMP_CLAUSE_NUM_WORKERS:
 	case OMP_CLAUSE_VECTOR_LENGTH:
diff --git a/gcc/testsuite/c-c++-common/goacc/collapse-1.c b/gcc/testsuite/c-c++-common/goacc/collapse-1.c
new file mode 100644
index 0000000..1321301
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/collapse-1.c
@@ -0,0 +1,16 @@
+void
+foo (void)
+{
+  int i, j;
+#pragma acc parallel
+#pragma acc loop collapse(1)
+  for (i = 0; i < 10; i++)
+    ;
+
+#pragma acc parallel
+#pragma acc loop collapse(2)
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 10; j++)
+      ;
+
+}
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-4.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-4.f95
new file mode 100644
index 0000000..eba20af
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-4.f95
@@ -0,0 +1,16 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original -std=f2008" } 
+
+PROGRAM test
+  IMPLICIT NONE
+  INTEGER :: a(64), b(64), c(64), i, j, k
+  ! Must be replaced by three loops.
+  !$acc loop
+  DO CONCURRENT (i=1:64, j=1:64, k=1:64, i==j .and. j==k)
+    a(i) = b(j)
+    c(k) = b(j)
+  END DO
+END PROGRAM test
+! { dg-prune-output "sorry, unimplemented: Clause not supported yet" }
+! { dg-final { scan-tree-dump-times "collapse\\(3\\)" 1 "original" } } 
+! { dg-final { cleanup-tree-dump "original" } } 
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95
new file mode 100644
index 0000000..ec1fb1f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95
@@ -0,0 +1,50 @@
+! { dg-do compile } 
+! { dg-additional-options "-fdump-tree-original -std=f2008" } 
+
+! test for tree-dump-original and spaces-commas
+
+program test
+  implicit none
+  integer :: i, j, k, m, sum
+  REAL :: a(64), b(64), c(64)
+
+  !$acc kernels 
+  !$acc loop seq collapse(2)
+  DO i = 1,10
+    DO j = 1,10
+    ENDDO
+  ENDDO
+
+  !$acc loop independent gang (3)
+  DO i = 1,10
+    !$acc loop worker(3) ! { dg-error "work-sharing region may not be closely nested inside of work-sharing, critical, ordered, master or explicit task region" }
+    DO j = 1,10
+      !$acc loop vector(5)
+      DO k = 1,10
+      ENDDO
+    ENDDO
+  ENDDO
+  !$acc end kernels
+
+  sum = 0
+  !$acc parallel
+  !$acc loop private(m) reduction(+:sum)
+  DO i = 1,10
+    sum = sum + 1
+  ENDDO
+  !$acc end parallel
+
+end program test
+! { dg-prune-output "sorry, unimplemented: Clause not supported yet" }
+! { dg-final { scan-tree-dump-times "pragma acc loop" 5 "original" } } 
+
+! { dg-final { scan-tree-dump-times "ordered" 1 "original" } }
+! { dg-final { scan-tree-dump-times "collapse\\(2\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "independent" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "gang\\(3\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "worker\\(3\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "vector\\(5\\)" 1 "original" } } 
+
+! { dg-final { scan-tree-dump-times "private\\(m\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "reduction\\(\\+:sum\\)" 1 "original" } } 
+! { dg-final { cleanup-tree-dump "original" } } 
\ No newline at end of file

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]