Status of OpenMP workshare patch

Jakub Jelinek jakub@redhat.com
Thu Apr 16 15:59:00 GMT 2009


On Mon, Apr 06, 2009 at 01:53:11AM -0500, Vasilis Liaskovitis wrote:
> sorry for the delay, I haven't worked on this for a long while. 
> My copyright assignment is finally in place.
> 
> I believe the second (unreviewed) patch 
> http://gcc.gnu.org/ml/fortran/2008-08/msg00032.html
> applies cleanly to trunk (145581). This provides parallelization for WORKSHARE array assignments and where statements.

Thanks for the patch.

Here is updated version of the patch, I've added another testcase (though,
we really need more than the current 3 or 4 !$omp workshare runtime
testcases), fixed a bug that caused miscompilation of omp_parse4.f90
(if no dependency checking is performed, OMP_SINGLE created from singleblock
because it is followed by OMP_FOR should not be OMP_CLAUSE_NOWAIT just
because the OMP_FOR is last and !$omp workshare is nowait), fixed ICE
where build2_v was used to create OMP_SINGLE (build2_v folds, which is
invalid if there are no clauses), changed ws_data into an int bitfield
and fixed up formatting a little bit.

I'll bootstrap/regtest it (so far just regtested with -C gcc check
RUNTESTFLAGS=gomp.exp and check-target-libgomp) and apply to trunk tomorrow,
unless I hear complains from Vasilis or others.

Vasilis, are you going to work on the dependency analysis or worksharing of
other stuff (say forall)?

2009-04-16  Vasilis Liaskovitis  <vliaskov@gmail.com>
	    Jakub Jelinek  <jakub@redhat.com>

	PR fortran/35423
	* trans.h (OMPWS_WORKSHARE_FLAG, OMPWS_CURR_SINGLEUNIT,
	OMPWS_SCALARIZER_WS, OMPWS_NOWAIT): Define.
	(ompws_flags): New extern decl.
	* trans-array.c (gfc_trans_scalarized_loop_end): Build OMP_FOR
	for the outer dimension if ompws_flags allow it.
	* trans.c (gfc_generate_code): Clear ompws_flags.
	* trans-expr.c (gfc_trans_assignment_1): Allow worksharing
	array assignments inside of !$omp workshare.
	* trans-stmt.c (gfc_trans_where_3): Similarly for where statements
	and constructs.
	* trans-openmp.c (ompws_flags): New variable.
	(gfc_trans_omp_workshare): Rewritten.

	* testsuite/libgomp.fortran/workshare2.f90: New test.

--- gcc/fortran/trans.h.jj	2008-12-19 10:19:10.000000000 +0100
+++ gcc/fortran/trans.h	2009-04-16 15:54:15.000000000 +0200
@@ -1,6 +1,6 @@
 /* Header for code translation functions
-   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software
-   Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
    Contributed by Paul Brook
 
 This file is part of GCC.
@@ -766,5 +766,12 @@ extern const char gfc_msg_bounds[];
 extern const char gfc_msg_fault[];
 extern const char gfc_msg_wrong_return[];
 
+#define OMPWS_WORKSHARE_FLAG	1	/* Set if in a workshare construct.  */
+#define OMPWS_CURR_SINGLEUNIT	2	/* Set if current gfc_code in workshare
+					   construct is not workshared.  */
+#define OMPWS_SCALARIZER_WS	4	/* Set if scalarizer should attempt
+					   to create parallel loops.  */
+#define OMPWS_NOWAIT		8	/* Use NOWAIT on OMP_FOR.  */
+extern int ompws_flags;
 
 #endif /* GFC_TRANS_H */
--- gcc/fortran/trans-array.c.jj	2009-04-14 16:33:46.000000000 +0200
+++ gcc/fortran/trans-array.c	2009-04-16 15:54:15.000000000 +0200
@@ -2697,41 +2697,96 @@ gfc_trans_scalarized_loop_end (gfc_loopi
   tree tmp;
   tree loopbody;
   tree exit_label;
+  tree stmt;
+  tree init;
+  tree incr;
 
-  loopbody = gfc_finish_block (pbody);
+  if ((ompws_flags & (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_WS))
+      == (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_WS)
+      && n == loop->dimen - 1)
+    {
+      /* We create an OMP_FOR construct for the outermost scalarized loop.  */
+      init = make_tree_vec (1);
+      cond = make_tree_vec (1);
+      incr = make_tree_vec (1);
+
+      /* Cycle statement is implemented with a goto.  Exit statement must not
+	 be present for this loop.  */
+      exit_label = gfc_build_label_decl (NULL_TREE);
+      TREE_USED (exit_label) = 1;
+
+      /* Label for cycle statements (if needed).  */
+      tmp = build1_v (LABEL_EXPR, exit_label);
+      gfc_add_expr_to_block (pbody, tmp);
+
+      stmt = make_node (OMP_FOR);
+
+      TREE_TYPE (stmt) = void_type_node;
+      OMP_FOR_BODY (stmt) = loopbody = gfc_finish_block (pbody);
+
+      OMP_FOR_CLAUSES (stmt) = build_omp_clause (OMP_CLAUSE_SCHEDULE);
+      OMP_CLAUSE_SCHEDULE_KIND (OMP_FOR_CLAUSES (stmt))
+	= OMP_CLAUSE_SCHEDULE_STATIC;
+      if (ompws_flags & OMPWS_NOWAIT)
+	OMP_CLAUSE_CHAIN (OMP_FOR_CLAUSES (stmt))
+	  = build_omp_clause (OMP_CLAUSE_NOWAIT);
+
+      /* Initialize the loopvar.  */
+      TREE_VEC_ELT (init, 0) = build2_v (MODIFY_EXPR, loop->loopvar[n],
+					 loop->from[n]);
+      OMP_FOR_INIT (stmt) = init;
+      /* The exit condition.  */
+      TREE_VEC_ELT (cond, 0) = build2 (LE_EXPR, boolean_type_node,
+				       loop->loopvar[n], loop->to[n]);
+      OMP_FOR_COND (stmt) = cond;
+      /* Increment the loopvar.  */
+      tmp = build2 (PLUS_EXPR, gfc_array_index_type,
+	  loop->loopvar[n], gfc_index_one_node);
+      TREE_VEC_ELT (incr, 0) = fold_build2 (MODIFY_EXPR,
+	  void_type_node, loop->loopvar[n], tmp);
+      OMP_FOR_INCR (stmt) = incr;
 
-  /* Initialize the loopvar.  */
-  gfc_add_modify (&loop->code[n], loop->loopvar[n], loop->from[n]);
+      ompws_flags &= ~OMPWS_CURR_SINGLEUNIT;
+      gfc_add_expr_to_block (&loop->code[n], stmt);
+    }
+  else
+    {
+      loopbody = gfc_finish_block (pbody);
 
-  exit_label = gfc_build_label_decl (NULL_TREE);
+      /* Initialize the loopvar.  */
+      gfc_add_modify (&loop->code[n], loop->loopvar[n], loop->from[n]);
 
-  /* Generate the loop body.  */
-  gfc_init_block (&block);
+      exit_label = gfc_build_label_decl (NULL_TREE);
 
-  /* The exit condition.  */
-  cond = fold_build2 (GT_EXPR, boolean_type_node,
-		      loop->loopvar[n], loop->to[n]);
-  tmp = build1_v (GOTO_EXPR, exit_label);
-  TREE_USED (exit_label) = 1;
-  tmp = build3_v (COND_EXPR, cond, tmp, build_empty_stmt ());
-  gfc_add_expr_to_block (&block, tmp);
+      /* Generate the loop body.  */
+      gfc_init_block (&block);
 
-  /* The main body.  */
-  gfc_add_expr_to_block (&block, loopbody);
+      /* The exit condition.  */
+      cond = fold_build2 (GT_EXPR, boolean_type_node,
+			 loop->loopvar[n], loop->to[n]);
+      tmp = build1_v (GOTO_EXPR, exit_label);
+      TREE_USED (exit_label) = 1;
+      tmp = build3_v (COND_EXPR, cond, tmp, build_empty_stmt ());
+      gfc_add_expr_to_block (&block, tmp);
 
-  /* Increment the loopvar.  */
-  tmp = fold_build2 (PLUS_EXPR, gfc_array_index_type,
-		     loop->loopvar[n], gfc_index_one_node);
-  gfc_add_modify (&block, loop->loopvar[n], tmp);
+      /* The main body.  */
+      gfc_add_expr_to_block (&block, loopbody);
 
-  /* Build the loop.  */
-  tmp = gfc_finish_block (&block);
-  tmp = build1_v (LOOP_EXPR, tmp);
-  gfc_add_expr_to_block (&loop->code[n], tmp);
+      /* Increment the loopvar.  */
+      tmp = fold_build2 (PLUS_EXPR, gfc_array_index_type,
+			 loop->loopvar[n], gfc_index_one_node);
+      gfc_add_modify (&block, loop->loopvar[n], tmp);
+
+      /* Build the loop.  */
+      tmp = gfc_finish_block (&block);
+      tmp = build1_v (LOOP_EXPR, tmp);
+      gfc_add_expr_to_block (&loop->code[n], tmp);
+
+      /* Add the exit label.  */
+      tmp = build1_v (LABEL_EXPR, exit_label);
+      gfc_add_expr_to_block (&loop->code[n], tmp);
+    }
 
-  /* Add the exit label.  */
-  tmp = build1_v (LABEL_EXPR, exit_label);
-  gfc_add_expr_to_block (&loop->code[n], tmp);
 }
 
 
--- gcc/fortran/trans.c.jj	2009-04-14 16:33:46.000000000 +0200
+++ gcc/fortran/trans.c	2009-04-16 15:54:15.000000000 +0200
@@ -1259,6 +1259,7 @@ gfc_trans_code (gfc_code * code)
 void
 gfc_generate_code (gfc_namespace * ns)
 {
+  ompws_flags = 0;
   if (ns->is_block_data)
     {
       gfc_generate_block_data (ns);
--- gcc/fortran/trans-expr.c.jj	2009-04-01 18:23:02.000000000 +0200
+++ gcc/fortran/trans-expr.c	2009-04-16 15:54:15.000000000 +0200
@@ -4598,6 +4598,10 @@ gfc_trans_assignment_1 (gfc_expr * expr1
   rss = NULL;
   if (lss != gfc_ss_terminator)
     {
+      /* Allow the scalarizer to workshare array assignments.  */
+      if (ompws_flags & OMPWS_WORKSHARE_FLAG)
+	ompws_flags |= OMPWS_SCALARIZER_WS;
+
       /* The assignment needs scalarization.  */
       lss_section = lss;
 
--- gcc/fortran/trans-stmt.c.jj	2009-04-06 11:48:30.000000000 +0200
+++ gcc/fortran/trans-stmt.c	2009-04-16 15:54:15.000000000 +0200
@@ -3696,6 +3696,10 @@ gfc_trans_where_3 (gfc_code * cblock, gf
   gfc_ss *edss = 0;
   gfc_ss *esss = 0;
 
+  /* Allow the scalarizer to workshare simple where loops.  */
+  if (ompws_flags & OMPWS_WORKSHARE_FLAG)
+    ompws_flags |= OMPWS_SCALARIZER_WS;
+
   cond = cblock->expr;
   tdst = cblock->next->expr;
   tsrc = cblock->next->expr2;
--- gcc/fortran/trans-openmp.c.jj	2008-10-23 13:21:25.000000000 +0200
+++ gcc/fortran/trans-openmp.c	2009-04-16 17:35:22.000000000 +0200
@@ -1,5 +1,5 @@
 /* OpenMP directive translation -- generate GCC trees from gfc_code.
-   Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+   Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
    Contributed by Jakub Jelinek <jakub@redhat.com>
 
 This file is part of GCC.
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  
 #include "trans-const.h"
 #include "arith.h"
 
+int ompws_flags;
 
 /* True if OpenMP should privatize what this DECL points to rather
    than the DECL itself.  */
@@ -1544,8 +1545,162 @@ gfc_trans_omp_taskwait (void)
 static tree
 gfc_trans_omp_workshare (gfc_code *code, gfc_omp_clauses *clauses)
 {
-  /* XXX */
-  return gfc_trans_omp_single (code, clauses);
+  tree res, tmp, stmt;
+  stmtblock_t block, *pblock = NULL;
+  stmtblock_t singleblock;
+  int saved_ompws_flags;
+  bool singleblock_in_progress = false;
+  /* True if previous gfc_code in workshare construct is not workshared.  */
+  bool prev_singleunit;
+
+  code = code->block->next;
+
+  pushlevel (0);
+
+  if (!code)
+    return build_empty_stmt ();
+
+  gfc_start_block (&block);
+  pblock = █
+
+  ompws_flags = OMPWS_WORKSHARE_FLAG;
+  prev_singleunit = false;
+
+  /* Translate statements one by one to trees until we reach
+     the end of the workshare construct.  Adjacent gfc_codes that
+     are a single unit of work are clustered and encapsulated in a
+     single OMP_SINGLE construct.  */
+  for (; code; code = code->next)
+    {
+      if (code->here != 0)
+	{
+	  res = gfc_trans_label_here (code);
+	  gfc_add_expr_to_block (pblock, res);
+	}
+
+      /* No dependence analysis, use for clauses with wait.
+	 If this is the last gfc_code, use default omp_clauses.  */
+      if (code->next == NULL && clauses->nowait)
+	ompws_flags |= OMPWS_NOWAIT;
+
+      /* By default, every gfc_code is a single unit of work.  */
+      ompws_flags |= OMPWS_CURR_SINGLEUNIT;
+      ompws_flags &= ~OMPWS_SCALARIZER_WS;
+
+      switch (code->op)
+	{
+	case EXEC_NOP:
+	  res = NULL_TREE;
+	  break;
+
+	case EXEC_ASSIGN:
+	  res = gfc_trans_assign (code);
+	  break;
+
+	case EXEC_POINTER_ASSIGN:
+	  res = gfc_trans_pointer_assign (code);
+	  break;
+
+	case EXEC_INIT_ASSIGN:
+	  res = gfc_trans_init_assign (code);
+	  break;
+
+	case EXEC_FORALL:
+	  res = gfc_trans_forall (code);
+	  break;
+
+	case EXEC_WHERE:
+	  res = gfc_trans_where (code);
+	  break;
+
+	case EXEC_OMP_ATOMIC:
+	  res = gfc_trans_omp_directive (code);
+	  break;
+
+	case EXEC_OMP_PARALLEL:
+	case EXEC_OMP_PARALLEL_DO:
+	case EXEC_OMP_PARALLEL_SECTIONS:
+	case EXEC_OMP_PARALLEL_WORKSHARE:
+	case EXEC_OMP_CRITICAL:
+	  saved_ompws_flags = ompws_flags;
+	  ompws_flags = 0;
+	  res = gfc_trans_omp_directive (code);
+	  ompws_flags = saved_ompws_flags;
+	  break;
+	
+	default:
+	  internal_error ("gfc_trans_omp_workshare(): Bad statement code");
+	}
+
+      gfc_set_backend_locus (&code->loc);
+
+      if (res != NULL_TREE && ! IS_EMPTY_STMT (res))
+	{
+	  if (TREE_CODE (res) == STATEMENT_LIST)
+	    tree_annotate_all_with_location (&res, input_location);
+	  else
+	    SET_EXPR_LOCATION (res, input_location);
+
+	  if (prev_singleunit)
+	    {
+	      if (ompws_flags & OMPWS_CURR_SINGLEUNIT)
+		/* Add current gfc_code to single block.  */
+		gfc_add_expr_to_block (&singleblock, res);
+	      else
+		{
+		  /* Finish single block and add it to pblock.  */
+		  tmp = gfc_finish_block (&singleblock);
+		  tmp = build2 (OMP_SINGLE, void_type_node, tmp, NULL_TREE);
+		  gfc_add_expr_to_block (pblock, tmp);
+		  /* Add current gfc_code to pblock.  */
+		  gfc_add_expr_to_block (pblock, res);
+		  singleblock_in_progress = false;
+		}
+	    }
+	  else
+	    {
+	      if (ompws_flags & OMPWS_CURR_SINGLEUNIT)
+		{
+		  /* Start single block.  */
+		  gfc_init_block (&singleblock);
+		  gfc_add_expr_to_block (&singleblock, res);
+		  singleblock_in_progress = true;
+		}
+	      else
+		/* Add the new statement to the block.  */
+		gfc_add_expr_to_block (pblock, res);
+	    }
+	  prev_singleunit = (ompws_flags & OMPWS_CURR_SINGLEUNIT) != 0;
+	}
+    }
+
+  /* Finish remaining SINGLE block, if we were in the middle of one.  */
+  if (singleblock_in_progress)
+    {
+      /* Finish single block and add it to pblock.  */
+      tmp = gfc_finish_block (&singleblock);
+      tmp = build2 (OMP_SINGLE, void_type_node, tmp,
+		    clauses->nowait
+		    ? build_omp_clause (OMP_CLAUSE_NOWAIT) : NULL_TREE);
+      gfc_add_expr_to_block (pblock, tmp);
+    }
+
+  stmt = gfc_finish_block (pblock);
+  if (TREE_CODE (stmt) != BIND_EXPR)
+    {
+      if (!IS_EMPTY_STMT (stmt))
+	{
+	  tree bindblock = poplevel (1, 0, 0);
+	  stmt = build3_v (BIND_EXPR, NULL, stmt, bindblock);
+	}
+      else
+	poplevel (0, 0, 0);
+    }
+  else
+    poplevel (0, 0, 0);
+
+  ompws_flags = 0;
+  return stmt;
 }
 
 tree
--- libgomp/testsuite/libgomp.fortran/workshare2.f90.jj	2009-04-16 16:37:46.000000000 +0200
+++ libgomp/testsuite/libgomp.fortran/workshare2.f90	2009-04-16 16:28:30.000000000 +0200
@@ -0,0 +1,37 @@
+subroutine f1
+  integer a(20:50,70:90)
+!$omp parallel workshare
+  a(:,:) = 17
+!$omp end parallel workshare
+  if (any (a.ne.17)) call abort
+end subroutine f1
+subroutine f2
+  integer a(20:50,70:90),d(15),e(15),f(15)
+  integer b, c, i
+!$omp parallel workshare
+  c = 5
+  a(:,:) = 17
+  b = 4
+  d = (/ 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 0, 0, 13, 14 /)
+  forall (i=1:15, d(i) /= 0)
+     d(i) = 0
+  end forall
+  e = (/ 4, 5, 2, 6, 4, 5, 2, 6, 4, 5, 2, 6, 4, 5, 2 /)
+  f = 7
+  where (e.ge.5) f = f + 1
+!$omp end parallel workshare
+  if (any (a.ne.17)) call abort
+  if (c.ne.5.or.b.ne.4) call abort
+  if (any(d.ne.0)) call abort
+  do i = 1, 15
+    if (e(i).ge.5) then
+      if (f(i).ne.8) call abort
+    else
+      if (f(i).ne.7) call abort
+    end if
+  end do
+end subroutine f2
+
+  call f1
+  call f2
+end


	Jakub



More information about the Gcc-patches mailing list