This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4.1] Taskloop support


Hi!

This patch finishes the C #pragma omp taskloop support on the gomp 4.1
branch, including library support.

2015-05-21  Jakub Jelinek  <jakub@redhat.com>

	* tree.h (OMP_STANDALONE_CLAUSES): Adjust to cover
	OMP_TARGET_{ENTER,EXIT}_DATA.
	(OMP_CLAUSE_SHARED_FIRSTPRIVATE): Define.
	* gimplify.c (gimplify_scan_omp_clauses): Add lastprivate
	clause to outer taskloop if needed.
	(gimplify_omp_for): Fix a typo.  Fixup OMP_TASKLOOP
	gimplification.
	* omp-low.c (omp_copy_decl_2): If var is TREE_ADDRESSABLE
	listed in task_shared_vars, clear TREE_ADDRESSABLE on the
	copy.
	(build_outer_var_ref): Add lastprivate argument, pass it through
	recursively.  Handle lastprivate on taskloop construct.
	(install_var_field): Allow multiple fields for a single
	decl - one for firstprivate, another for shared clauses
	on task.
	(scan_sharing_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
	(add_taskreg_looptemp_clauses): Add one more _looptemp_ clause
	for taskloop GIMPLE_OMP_TASK, if it is collapse > 1 with
	non-constant iteration count and there is lastprivate clause
	on the inner GIMPLE_OMP_FOR.
	(finish_taskreg_scan): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
	(lower_rec_input_clauses): Likewise.  Ignore all
	OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE clauses on taskloop construct.
	(lower_lastprivate_clauses): For OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE
	on taskloop lookup decl in outer context.  Pass true
	to build_outer_var_ref lastprivate argument.
	(lower_send_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
	(lower_send_shared_vars): Ignore fields with NULL or
	FIELD_DECL abstract origin.
	(expand_task_call): Use GOMP_TASK_* defines instead of
	hardcoded integers.
	(expand_omp_simd): Handle addressable fd->loop.v.
	(expand_omp_taskloop_for_outer): Initialize the last
	_looptemp_ with total iteration count if needed.
	(expand_omp_taskloop_for_inner): Handle bias and broken_loop.
	(lower_omp_for_lastprivate): Use last _looptemp_ clause
	on taskloop for comparison.
	(create_task_copyfn): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
gcc/c-family/
	* c-omp.c (c_finish_omp_for): Clear DECL_INITIAL.
gcc/testsuite/
	* gcc.dg/gomp/taskloop-1.c: New test.
include/
	* gomp-constants.h (GOMP_TASK_FLAG_UNTIED, GOMP_TASK_FLAG_FINAL,
	GOMP_TASK_FLAG_MERGEABLE, GOMP_TASK_FLAG_DEPEND, GOMP_TASK_FLAG_UP,
	GOMP_TASK_FLAG_GRAINSIZE, GOMP_TASK_FLAG_IF, GOMP_TASK_FLAG_NOGROUP):
	Define.
libgomp/
	* libgomp.map (GOMP_4.1): Export GOMP_taskloop and GOMP_taskloop_ull.
	* task.c: Include gomp-constants.h.  Include taskloop.c twice
	with appropriate macros.
	(GOMP_task): Use GOMP_TASK_FLAG_* defines instead of hardcoded
	constants.
	* taskloop.c: New file.
	* testsuite/libgomp.c/for-4.c: New test.
	* testsuite/libgomp.c/taskloop-1.c: New test.
	* testsuite/libgomp.c/taskloop-2.c: New test.
	* testsuite/libgomp.c/taskloop-3.c: New test.

--- gcc/tree.h.jj	2015-05-19 18:56:50.982256719 +0200
+++ gcc/tree.h	2015-05-19 19:04:52.496759752 +0200
@@ -1206,7 +1206,7 @@ extern void protected_set_expr_location
 
 /* Generic accessors for OMP nodes that keep clauses as operand 0.  */
 #define OMP_STANDALONE_CLAUSES(NODE) \
-  TREE_OPERAND (TREE_RANGE_CHECK (NODE, OACC_CACHE, OMP_TARGET_UPDATE), 0)
+  TREE_OPERAND (TREE_RANGE_CHECK (NODE, OACC_CACHE, OMP_TARGET_EXIT_DATA), 0)
 
 #define OACC_PARALLEL_BODY(NODE) \
   TREE_OPERAND (OACC_PARALLEL_CHECK (NODE), 0)
@@ -1366,6 +1366,12 @@ extern void protected_set_expr_location
 #define OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ(NODE) \
   (OMP_CLAUSE_CHECK (NODE))->omp_clause.gimple_reduction_init
 
+/* True on a SHARED clause if a FIRSTPRIVATE clause for the same
+   decl is present in the chain (this can happen only for taskloop
+   with FIRSTPRIVATE/LASTPRIVATE on it originally.  */
+#define OMP_CLAUSE_SHARED_FIRSTPRIVATE(NODE) \
+  (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SHARED)->base.public_flag)
+
 #define OMP_CLAUSE_FINAL_EXPR(NODE) \
   OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_FINAL), 0)
 #define OMP_CLAUSE_IF_EXPR(NODE) \
--- gcc/gimplify.c.jj	2015-05-19 19:02:52.230632257 +0200
+++ gcc/gimplify.c	2015-05-20 19:07:01.317440243 +0200
@@ -6167,6 +6167,12 @@ gimplify_scan_omp_clauses (tree *list_p,
 					 (splay_tree_key) decl) == NULL)
 	    omp_add_variable (outer_ctx, decl, GOVD_SHARED | GOVD_SEEN);
 	  else if (outer_ctx
+		   && (outer_ctx->region_type & ORT_TASK) != 0
+		   && outer_ctx->combined_loop
+		   && splay_tree_lookup (outer_ctx->variables,
+					 (splay_tree_key) decl) == NULL)
+	    omp_add_variable (outer_ctx, decl, GOVD_LASTPRIVATE | GOVD_SEEN);
+	  else if (outer_ctx
 		   && outer_ctx->region_type == ORT_WORKSHARE
 		   && outer_ctx->combined_loop
 		   && splay_tree_lookup (outer_ctx->variables,
@@ -6227,6 +6233,10 @@ gimplify_scan_omp_clauses (tree *list_p,
 		      else if (omp_check_private (octx, decl, false))
 			break;
 		    }
+		  else if (octx
+			   && (octx->region_type & ORT_TASK) != 0
+			   && octx->combined_loop)
+		    ;
 		  else
 		    break;
 		  gcc_checking_assert (splay_tree_lookup (octx->variables,
@@ -7061,7 +7071,7 @@ gimplify_omp_for (tree *expr_p, gimple_s
 
   /* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear
      clause for the IV.  */
-  if (org == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1)
+  if (ort == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1)
     {
       t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), 0);
       gcc_assert (TREE_CODE (t) == MODIFY_EXPR);
@@ -7075,7 +7085,8 @@ gimplify_omp_for (tree *expr_p, gimple_s
 	  }
     }
 
-  gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, ort);
+  if (TREE_CODE (for_stmt) != OMP_TASKLOOP)
+    gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, ort);
   if (TREE_CODE (for_stmt) == OMP_DISTRIBUTE)
     gimplify_omp_ctxp->distribute = true;
 
@@ -7113,9 +7124,69 @@ gimplify_omp_for (tree *expr_p, gimple_s
       for_stmt = walk_tree (&OMP_FOR_BODY (for_stmt), find_combined_omp_for,
 			    NULL, NULL);
       gcc_assert (for_stmt != NULL_TREE);
-      gimplify_omp_ctxp->combined_loop = true;
     }
 
+  /* For taskloop, need to gimplify the start, end and step before the
+     taskloop, outside of the taskloop omp context.  */
+  if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+    {
+      for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
+	{
+	  t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
+	  if (!is_gimple_constant (TREE_OPERAND (t, 1)))
+	    {
+	      TREE_OPERAND (t, 1)
+		= get_initialized_tmp_var (TREE_OPERAND (t, 1),
+					   pre_p, NULL);
+	      tree c = build_omp_clause (input_location,
+					 OMP_CLAUSE_FIRSTPRIVATE);
+	      OMP_CLAUSE_DECL (c) = TREE_OPERAND (t, 1);
+	      OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt);
+	      OMP_FOR_CLAUSES (orig_for_stmt) = c;
+	    }
+
+	  /* Handle OMP_FOR_COND.  */
+	  t = TREE_VEC_ELT (OMP_FOR_COND (for_stmt), i);
+	  if (!is_gimple_constant (TREE_OPERAND (t, 1)))
+	    {
+	      TREE_OPERAND (t, 1)
+		= get_initialized_tmp_var (TREE_OPERAND (t, 1),
+					   pre_p, NULL);
+	      tree c = build_omp_clause (input_location,
+					 OMP_CLAUSE_FIRSTPRIVATE);
+	      OMP_CLAUSE_DECL (c) = TREE_OPERAND (t, 1);
+	      OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt);
+	      OMP_FOR_CLAUSES (orig_for_stmt) = c;
+	    }
+
+	  /* Handle OMP_FOR_INCR.  */
+	  t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i);
+	  if (TREE_CODE (t) == MODIFY_EXPR)
+	    {
+	      decl = TREE_OPERAND (t, 0);
+	      t = TREE_OPERAND (t, 1);
+	      tree *tp = &TREE_OPERAND (t, 1);
+	      if (TREE_CODE (t) == PLUS_EXPR && *tp == decl)
+		tp = &TREE_OPERAND (t, 0);
+
+	      if (!is_gimple_constant (*tp))
+		{
+		  *tp = get_initialized_tmp_var (*tp, pre_p, NULL);
+		  tree c = build_omp_clause (input_location,
+					     OMP_CLAUSE_FIRSTPRIVATE);
+		  OMP_CLAUSE_DECL (c) = *tp;
+		  OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt);
+		  OMP_FOR_CLAUSES (orig_for_stmt) = c;
+		}
+	    }
+	}
+
+      gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (orig_for_stmt), pre_p, ort);
+    }
+
+  if (orig_for_stmt != for_stmt)
+    gimplify_omp_ctxp->combined_loop = true;
+
   for_body = NULL;
   gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
 	      == TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
@@ -7175,6 +7246,11 @@ gimplify_omp_for (tree *expr_p, gimple_s
 		      else if (omp_check_private (outer, decl, false))
 			outer = NULL;
 		    }
+		  else if (((outer->region_type & ORT_TASK) != 0)
+			   && outer->combined_loop
+			   && !omp_check_private (gimplify_omp_ctxp,
+						  decl, false))
+		    ;
 		  else if (outer->region_type != ORT_COMBINED_PARALLEL)
 		    outer = NULL;
 		  if (outer)
@@ -7206,6 +7282,11 @@ gimplify_omp_for (tree *expr_p, gimple_s
 		      else if (omp_check_private (outer, decl, false))
 			outer = NULL;
 		    }
+		  else if (((outer->region_type & ORT_TASK) != 0)
+			   && outer->combined_loop
+			   && !omp_check_private (gimplify_omp_ctxp,
+						  decl, false))
+		    ;
 		  else if (outer->region_type != ORT_COMBINED_PARALLEL)
 		    outer = NULL;
 		  if (outer)
@@ -7418,14 +7499,39 @@ gimplify_omp_for (tree *expr_p, gimple_s
 
   BITMAP_FREE (has_decl_expr);
 
-  gimplify_and_add (OMP_FOR_BODY (orig_for_stmt), &for_body);
+  if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+    {
+      push_gimplify_context ();
+      if (TREE_CODE (OMP_FOR_BODY (orig_for_stmt)) != BIND_EXPR)
+	{
+	  OMP_FOR_BODY (orig_for_stmt)
+	    = build3 (BIND_EXPR, void_type_node, NULL,
+		      OMP_FOR_BODY (orig_for_stmt), NULL);
+	  TREE_SIDE_EFFECTS (OMP_FOR_BODY (orig_for_stmt)) = 1;
+	}
+    }
+
+  gimple g = gimplify_and_return_first (OMP_FOR_BODY (orig_for_stmt),
+					&for_body);
+
+  if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+    {
+      if (gimple_code (g) == GIMPLE_BIND)
+	pop_gimplify_context (g);
+      else
+	pop_gimplify_context (NULL);
+    }
 
   if (orig_for_stmt != for_stmt)
     for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
       {
 	t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
 	decl = TREE_OPERAND (t, 0);
+	struct gimplify_omp_ctx *ctx = gimplify_omp_ctxp;
+	if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+	  gimplify_omp_ctxp = ctx->outer_context;
 	var = create_tmp_var (TREE_TYPE (decl), get_name (decl));
+	gimplify_omp_ctxp = ctx;
 	omp_add_variable (gimplify_omp_ctxp, var, GOVD_PRIVATE | GOVD_SEEN);
 	TREE_OPERAND (t, 0) = var;
 	t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i);
@@ -7524,14 +7630,18 @@ gimplify_omp_for (tree *expr_p, gimple_s
 	    gforo_clauses_ptr = &OMP_CLAUSE_CHAIN (*gforo_clauses_ptr);
 	    break;
 	  /* For lastprivate, keep the clause on inner taskloop, and add
-	     a shared clause on task.  */
+	     a shared clause on task.  If the same decl is also firstprivate,
+	     add also firstprivate clause on the inner taskloop.  */
 	  case OMP_CLAUSE_LASTPRIVATE:
 	    *gfor_clauses_ptr = c;
 	    gfor_clauses_ptr = &OMP_CLAUSE_CHAIN (c);
-	    *gtask_clauses_ptr = build_omp_clause (OMP_CLAUSE_LOCATION (c),
-						   OMP_CLAUSE_SHARED);
+	    *gtask_clauses_ptr
+	      = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_SHARED);
 	    OMP_CLAUSE_DECL (*gtask_clauses_ptr) = OMP_CLAUSE_DECL (c);
-	    gtask_clauses_ptr = &OMP_CLAUSE_CHAIN (*gtask_clauses_ptr);
+	    if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
+	      OMP_CLAUSE_SHARED_FIRSTPRIVATE (*gtask_clauses_ptr) = 1;
+	    gtask_clauses_ptr
+	      = &OMP_CLAUSE_CHAIN (*gtask_clauses_ptr);
 	    break;
 	  default:
 	    gcc_unreachable ();
@@ -7539,8 +7649,7 @@ gimplify_omp_for (tree *expr_p, gimple_s
       *gfor_clauses_ptr = NULL_TREE;
       *gtask_clauses_ptr = NULL_TREE;
       *gforo_clauses_ptr = NULL_TREE;
-      gimple g
-	= gimple_build_bind (NULL_TREE, gfor, NULL_TREE);
+      g = gimple_build_bind (NULL_TREE, gfor, NULL_TREE);
       g = gimple_build_omp_task (g, task_clauses, NULL_TREE, NULL_TREE,
 				 NULL_TREE, NULL_TREE, NULL_TREE);
       gimple_omp_task_set_taskloop_p (g, true);
--- gcc/omp-low.c.jj	2015-05-19 18:56:55.730182802 +0200
+++ gcc/omp-low.c	2015-05-20 19:20:25.828928071 +0200
@@ -1129,6 +1129,14 @@ omp_copy_decl_2 (tree var, tree name, tr
 
   DECL_CONTEXT (copy) = current_function_decl;
   DECL_CHAIN (copy) = ctx->block_vars;
+  /* If VAR is listed in task_shared_vars, it means it wasn't
+     originally addressable and is just because task needs to take
+     it's address.  But we don't need to take address of privatizations
+     from that var.  */
+  if (TREE_ADDRESSABLE (var)
+      && task_shared_vars
+      && bitmap_bit_p (task_shared_vars, DECL_UID (var)))
+    TREE_ADDRESSABLE (copy) = 0;
   ctx->block_vars = copy;
 
   return copy;
@@ -1179,7 +1187,7 @@ build_receiver_ref (tree var, bool by_re
    this is some variable.  */
 
 static tree
-build_outer_var_ref (tree var, omp_context *ctx)
+build_outer_var_ref (tree var, omp_context *ctx, bool lastprivate = false)
 {
   tree x;
 
@@ -1188,7 +1196,7 @@ build_outer_var_ref (tree var, omp_conte
   else if (is_variable_sized (var))
     {
       x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
-      x = build_outer_var_ref (x, ctx);
+      x = build_outer_var_ref (x, ctx, lastprivate);
       x = build_simple_mem_ref (x);
     }
   else if (is_taskreg_ctx (ctx))
@@ -1209,6 +1217,33 @@ build_outer_var_ref (tree var, omp_conte
       if (x == NULL_TREE)
 	x = var;
     }
+  else if (lastprivate && is_taskloop_ctx (ctx))
+    {
+      gcc_assert (ctx->outer);
+      splay_tree_node n
+	= splay_tree_lookup (ctx->outer->field_map,
+			     (splay_tree_key) &DECL_UID (var));
+      if (n == NULL)
+	{
+	  if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx->outer)))
+	    x = var;
+	  else
+	    x = lookup_decl (var, ctx->outer);
+	}
+      else
+	{
+	  tree field = (tree) n->value;
+	  /* If the receiver record type was remapped in the child function,
+	     remap the field into the new record type.  */
+	  x = maybe_lookup_field (field, ctx->outer);
+	  if (x != NULL)
+	    field = x;
+
+	  x = build_simple_mem_ref (ctx->outer->receiver_decl);
+	  x = omp_build_component_ref (x, field);
+	  x = build_simple_mem_ref (x);
+	}
+    }
   else if (ctx->outer)
     x = lookup_decl (var, ctx->outer);
   else if (is_reference (var))
@@ -1239,11 +1274,17 @@ static void
 install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
 {
   tree field, type, sfield = NULL_TREE;
+  splay_tree_key key = (splay_tree_key) var;
 
+  if ((mask & 8) != 0)
+    {
+      key = (splay_tree_key) &DECL_UID (var);
+      gcc_checking_assert (key != (splay_tree_key) var);
+    }
   gcc_assert ((mask & 1) == 0
-	      || !splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
+	      || !splay_tree_lookup (ctx->field_map, key));
   gcc_assert ((mask & 2) == 0 || !ctx->sfield_map
-	      || !splay_tree_lookup (ctx->sfield_map, (splay_tree_key) var));
+	      || !splay_tree_lookup (ctx->sfield_map, key));
   gcc_assert ((mask & 3) == 3
 	      || !is_gimple_omp_oacc (ctx->stmt));
 
@@ -1298,7 +1339,7 @@ install_var_field (tree var, bool by_ref
 	  ctx->sfield_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
 	  for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
 	    {
-	      sfield = build_decl (DECL_SOURCE_LOCATION (var),
+	      sfield = build_decl (DECL_SOURCE_LOCATION (t),
 				   FIELD_DECL, DECL_NAME (t), TREE_TYPE (t));
 	      DECL_ABSTRACT_ORIGIN (sfield) = DECL_ABSTRACT_ORIGIN (t);
 	      insert_field_into_struct (ctx->srecord_type, sfield);
@@ -1313,11 +1354,9 @@ install_var_field (tree var, bool by_ref
     }
 
   if (mask & 1)
-    splay_tree_insert (ctx->field_map, (splay_tree_key) var,
-		       (splay_tree_value) field);
+    splay_tree_insert (ctx->field_map, key, (splay_tree_value) field);
   if ((mask & 2) && ctx->sfield_map)
-    splay_tree_insert (ctx->sfield_map, (splay_tree_key) var,
-		       (splay_tree_value) sfield);
+    splay_tree_insert (ctx->sfield_map, key, (splay_tree_value) sfield);
 }
 
 static tree
@@ -1718,6 +1757,11 @@ scan_sharing_clauses (tree clauses, omp_
 	  if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
 	    break;
 	  by_ref = use_pointer_for_field (decl, ctx);
+	  if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+	    {
+	      gcc_assert (by_ref);
+	      break;
+	    }
 	  if (! TREE_READONLY (decl)
 	      || TREE_ADDRESSABLE (decl)
 	      || by_ref
@@ -1998,8 +2042,14 @@ scan_sharing_clauses (tree clauses, omp_
 	  if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
 	    break;
 	  decl = OMP_CLAUSE_DECL (c);
-	  if (! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
-	    fixup_remapped_decl (decl, ctx, false);
+	  if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
+	    break;
+	  if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+	    {
+	      install_var_field (decl, true, 11, ctx);
+	      break;
+	    }
+	  fixup_remapped_decl (decl, ctx, false);
 	  break;
 
 	case OMP_CLAUSE_MAP:
@@ -2336,7 +2386,16 @@ add_taskreg_looptemp_clauses (enum gf_ma
       tree type = fd.iter_type;
       if (fd.collapse > 1
 	  && TREE_CODE (fd.loop.n2) != INTEGER_CST)
-	count += fd.collapse - 1;
+	{
+	  count += fd.collapse - 1;
+	  /* For taskloop, if there are lastprivate clauses on the inner
+	     GIMPLE_OMP_FOR, add one more temporaries for the total number
+	     of iterations (product of count1 ... countN-1).  */
+	  if (msk == GF_OMP_FOR_KIND_TASKLOOP
+	      && find_omp_clause (gimple_omp_for_clauses (for_stmt),
+				  OMP_CLAUSE_LASTPRIVATE))
+	    count++;
+	}
       for (i = 0; i < count; i++)
 	{
 	  tree temp = create_tmp_var (type);
@@ -2480,7 +2539,8 @@ finish_taskreg_scan (omp_context *ctx)
 
       for (c = gimple_omp_taskreg_clauses (ctx->stmt);
 	   c; c = OMP_CLAUSE_CHAIN (c))
-	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED)
+	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
+	    && !OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
 	  {
 	    tree decl = OMP_CLAUSE_DECL (c);
 
@@ -3755,7 +3815,8 @@ lower_rec_input_clauses (tree clauses, g
 		continue;
 	      if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
 		{
-		  gcc_assert (is_global_var (OMP_CLAUSE_DECL (c)));
+		  gcc_assert (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)
+			      || is_global_var (OMP_CLAUSE_DECL (c)));
 		  continue;
 		}
 	    case OMP_CLAUSE_FIRSTPRIVATE:
@@ -3775,7 +3836,7 @@ lower_rec_input_clauses (tree clauses, g
 	      if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
 		{
 		  lastprivate_firstprivate = true;
-		  if (pass != 0)
+		  if (pass != 0 || is_taskloop_ctx (ctx))
 		    continue;
 		}
 	      /* Even without corresponding firstprivate, if
@@ -3936,6 +3997,11 @@ lower_rec_input_clauses (tree clauses, g
 	      /* Shared global vars are just accessed directly.  */
 	      if (is_global_var (new_var))
 		break;
+	      /* For taskloop firstprivate/lastprivate, represented
+		 as firstprivate and shared clause on the task, new_var
+		 is the firstprivate var.  */
+	      if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+		break;
 	      /* Set up the DECL_VALUE_EXPR for shared variables now.  This
 		 needs to be delayed until after fixup_child_record_type so
 		 that we get the correct type during the dereference.  */
@@ -4467,7 +4533,15 @@ lower_lastprivate_clauses (tree clauses,
 	      && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c)))
 	{
 	  var = OMP_CLAUSE_DECL (c);
-	  new_var = lookup_decl (var, ctx);
+	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
+	      && OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)
+	      && is_taskloop_ctx (ctx))
+	    {
+	      gcc_checking_assert (ctx->outer && is_task_ctx (ctx->outer));
+	      new_var = lookup_decl (var, ctx->outer);
+	    }
+	  else
+	    new_var = lookup_decl (var, ctx);
 
 	  if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
 	    {
@@ -4511,7 +4585,7 @@ lower_lastprivate_clauses (tree clauses,
 	      OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c) = NULL;
 	    }
 
-	  x = build_outer_var_ref (var, ctx);
+	  x = build_outer_var_ref (var, ctx, true);
 	  if (is_reference (var))
 	    new_var = build_simple_mem_ref_loc (clause_loc, new_var);
 	  x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
@@ -4792,6 +4866,10 @@ lower_send_clauses (tree clauses, gimple
 	case OMP_CLAUSE_LASTPRIVATE:
 	case OMP_CLAUSE_REDUCTION:
 	  break;
+	case OMP_CLAUSE_SHARED:
+	  if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+	    break;
+	  continue;
 	case OMP_CLAUSE__LOOPTEMP_:
 	  if (ignored_looptemp)
 	    {
@@ -4809,6 +4887,25 @@ lower_send_clauses (tree clauses, gimple
       if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
 	  && is_global_var (var))
 	continue;
+
+      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED)
+	{
+	  /* Handle taskloop firstprivate/lastprivate, where the
+	     lastprivate on GIMPLE_OMP_TASK is represented as
+	     OMP_CLAUSE_SHARED_FIRSTPRIVATE.  */
+	  tree f
+	    = (tree)
+	      splay_tree_lookup (ctx->sfield_map
+				 ? ctx->sfield_map : ctx->field_map,
+				 (splay_tree_key) &DECL_UID (val))->value;
+	  gcc_assert (use_pointer_for_field (val, ctx));
+	  x = omp_build_component_ref (ctx->sender_decl, f);
+	  var = build_fold_addr_expr (var);
+	  gimplify_assign (x, var, ilist);
+	  DECL_ABSTRACT_ORIGIN (f) = NULL;
+	  continue;
+	}
+
       if (is_variable_sized (val))
 	continue;
       by_ref = use_pointer_for_field (val, NULL);
@@ -4879,6 +4976,9 @@ lower_send_shared_vars (gimple_seq *ilis
   for (f = TYPE_FIELDS (record_type); f ; f = DECL_CHAIN (f))
     {
       ovar = DECL_ABSTRACT_ORIGIN (f);
+      if (!ovar || TREE_CODE (ovar) == FIELD_DECL)
+	continue;
+
       nvar = maybe_lookup_decl (ovar, ctx);
       if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
 	continue;
@@ -5158,7 +5258,9 @@ expand_task_call (struct omp_region *reg
   tree finalc = find_omp_clause (clauses, OMP_CLAUSE_FINAL);
 
   unsigned int iflags
-    = (untied ? 1 : 0) | (mergeable ? 4 : 0) | (depend ? 8 : 0);
+    = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
+      | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
+      | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
 
   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
@@ -5178,7 +5280,7 @@ expand_task_call (struct omp_region *reg
       endvar = OMP_CLAUSE_DECL (endvar);
       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
       if (fd.loop.cond_code == LT_EXPR)
-	iflags |= 256;
+	iflags |= GOMP_TASK_FLAG_UP;
       tree tclauses = gimple_omp_for_clauses (g);
       num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
       if (num_tasks)
@@ -5188,7 +5290,7 @@ expand_task_call (struct omp_region *reg
 	  num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
 	  if (num_tasks)
 	    {
-	      iflags |= 512;
+	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
 	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
 	    }
 	  else
@@ -5196,9 +5298,9 @@ expand_task_call (struct omp_region *reg
 	}
       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
       if (ifc == NULL_TREE)
-	iflags |= 1024;
+	iflags |= GOMP_TASK_FLAG_IF;
       if (find_omp_clause (tclauses, OMP_CLAUSE_NOGROUP))
-	iflags |= 2048;
+	iflags |= GOMP_TASK_FLAG_NOGROUP;
       ull = fd.iter_type == long_long_unsigned_type_node;
     }
 
@@ -5211,7 +5313,8 @@ expand_task_call (struct omp_region *reg
 	{
 	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
 	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
-			       build_int_cst (unsigned_type_node, 1024),
+			       build_int_cst (unsigned_type_node,
+					      GOMP_TASK_FLAG_IF),
 			       build_int_cst (unsigned_type_node, 0));
 	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
 				   flags, t);
@@ -5224,7 +5327,8 @@ expand_task_call (struct omp_region *reg
     {
       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
-			   build_int_cst (unsigned_type_node, 2),
+			   build_int_cst (unsigned_type_node,
+					  GOMP_TASK_FLAG_FINAL),
 			   build_int_cst (unsigned_type_node, 0));
       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
     }
@@ -7925,7 +8029,11 @@ expand_omp_simd (struct omp_region *regi
   t = fold_convert (type, n2);
   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
 				false, GSI_CONTINUE_LINKING);
-  t = build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t);
+  tree v = fd->loop.v;
+  if (DECL_P (v) && TREE_ADDRESSABLE (v))
+    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
+				  false, GSI_CONTINUE_LINKING);
+  t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
   cond_stmt = gimple_build_cond_empty (t);
   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
@@ -8124,6 +8232,28 @@ expand_omp_taskloop_for_outer (struct om
   innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
   gcc_assert (innerc);
   tree endvar = OMP_CLAUSE_DECL (innerc);
+  if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
+    {
+      gcc_assert (innerc);
+      for (i = 1; i < fd->collapse; i++)
+	{
+	  innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+				    OMP_CLAUSE__LOOPTEMP_);
+	  gcc_assert (innerc);
+	}
+      innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+				OMP_CLAUSE__LOOPTEMP_);
+      if (innerc)
+	{
+	  /* If needed (inner taskloop has lastprivate clause), propagate
+	     down the total number of iterations.  */
+	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
+					     NULL_TREE, false,
+					     GSI_CONTINUE_LINKING);
+	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
+	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+	}
+    }
 
   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
 				 GSI_CONTINUE_LINKING);
@@ -8167,7 +8297,7 @@ expand_omp_taskloop_for_inner (struct om
 			       struct omp_for_data *fd,
 			       gimple inner_stmt)
 {
-  tree e, t, type, itype, vmain, vback;
+  tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
   basic_block fin_bb;
   gimple_stmt_iterator gsi;
@@ -8180,6 +8310,29 @@ expand_omp_taskloop_for_inner (struct om
   if (POINTER_TYPE_P (type))
     itype = signed_type_for (type);
 
+  /* See if we need to bias by LLONG_MIN.  */
+  if (fd->iter_type == long_long_unsigned_type_node
+      && TREE_CODE (type) == INTEGER_TYPE
+      && !TYPE_UNSIGNED (type))
+    {
+      tree n1, n2;
+
+      if (fd->loop.cond_code == LT_EXPR)
+	{
+	  n1 = fd->loop.n1;
+	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
+	}
+      else
+	{
+	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
+	  n2 = fd->loop.n1;
+	}
+      if (TREE_CODE (n1) != INTEGER_CST
+	  || TREE_CODE (n2) != INTEGER_CST
+	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
+	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
+    }
+
   entry_bb = region->entry;
   cont_bb = region->cont;
   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
@@ -8220,6 +8373,11 @@ expand_omp_taskloop_for_inner (struct om
   innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
   gcc_assert (innerc);
   n2 = OMP_CLAUSE_DECL (innerc);
+  if (bias)
+    {
+      n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
+      n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
+    }
   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
 				 true, NULL_TREE, true, GSI_SAME_STMT);
   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
@@ -8310,7 +8468,13 @@ expand_omp_taskloop_for_inner (struct om
   gsi_remove (&gsi, true);
 
   FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
-  remove_edge (BRANCH_EDGE (entry_bb));
+  if (!broken_loop)
+    remove_edge (BRANCH_EDGE (entry_bb));
+  else
+    {
+      remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
+      region->outer->cont = NULL;
+    }
 
   /* Connect all the blocks.  */
   if (!broken_loop)
@@ -8334,8 +8498,9 @@ expand_omp_taskloop_for_inner (struct om
 
   set_immediate_dominator (CDI_DOMINATORS, body_bb,
 			   recompute_dominator (CDI_DOMINATORS, body_bb));
-  set_immediate_dominator (CDI_DOMINATORS, fin_bb,
-			   recompute_dominator (CDI_DOMINATORS, fin_bb));
+  if (!broken_loop)
+    set_immediate_dominator (CDI_DOMINATORS, fin_bb,
+			     recompute_dominator (CDI_DOMINATORS, fin_bb));
 
   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
     {
@@ -11054,15 +11219,42 @@ lower_omp_for_lastprivate (struct omp_fo
   tree n2 = fd->loop.n2;
   if (fd->collapse > 1
       && TREE_CODE (n2) != INTEGER_CST
-      && gimple_omp_for_combined_into_p (fd->for_stmt)
-      && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
+      && gimple_omp_for_combined_into_p (fd->for_stmt))
     {
-      gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
-      if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR)
+      struct omp_context *task_ctx = NULL;
+      if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
 	{
-	  struct omp_for_data outer_fd;
-	  extract_omp_for_data (gfor, &outer_fd, NULL);
-	  n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
+	  gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
+	  if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR)
+	    {
+	      struct omp_for_data outer_fd;
+	      extract_omp_for_data (gfor, &outer_fd, NULL);
+	      n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
+	    }
+	  else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP)
+	    task_ctx = ctx->outer->outer;
+	}
+      else if (is_task_ctx (ctx->outer))
+	task_ctx = ctx->outer;
+      if (task_ctx)
+	{
+	  int i;
+	  tree innerc
+	    = find_omp_clause (gimple_omp_task_clauses (task_ctx->stmt),
+			       OMP_CLAUSE__LOOPTEMP_);
+	  gcc_assert (innerc);
+	  for (i = 0; i < fd->collapse; i++)
+	    {
+	      innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+					OMP_CLAUSE__LOOPTEMP_);
+	      gcc_assert (innerc);
+	    }
+	  innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+				    OMP_CLAUSE__LOOPTEMP_);
+	  if (innerc)
+	    n2 = fold_convert (TREE_TYPE (n2),
+			       lookup_decl (OMP_CLAUSE_DECL (innerc),
+					    task_ctx));
 	}
     }
   cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2);
@@ -11426,6 +11618,13 @@ create_task_copyfn (gomp_task *task_stmt
 	n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
 	if (n == NULL)
 	  break;
+	if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+	  {
+	    decl = (tree) n->value;
+	    n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
+	    if (n == NULL)
+	      break;
+	  }
 	f = (tree) n->value;
 	if (tcctx.cb.decl_map)
 	  f = *tcctx.cb.decl_map->get (f);
--- gcc/c-family/c-omp.c.jj	2015-05-19 18:54:16.202666384 +0200
+++ gcc/c-family/c-omp.c	2015-05-19 19:04:52.500759690 +0200
@@ -491,6 +491,7 @@ c_finish_omp_for (location_t locus, enum
 	      init = integer_zero_node;
 	      fail = true;
 	    }
+	  DECL_INITIAL (decl) = NULL_TREE;
 
 	  init = build_modify_expr (elocus, decl, NULL_TREE, NOP_EXPR,
 	      			    /* FIXME diagnostics: This should
--- gcc/testsuite/gcc.dg/gomp/taskloop-1.c.jj	2015-05-19 19:04:52.495759768 +0200
+++ gcc/testsuite/gcc.dg/gomp/taskloop-1.c	2015-05-19 19:04:52.495759768 +0200
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+int e;
+int bar (int, int);
+void baz (int, int, int, int *, int *, int *);
+
+void
+foo (int a, int b, int c, int d, int f, int g, int h, int j, int k, int l)
+{
+  int i;
+  #pragma omp taskloop if (a) final (b) untied default(none) mergeable \
+    private(c) firstprivate (e) shared (d) num_tasks(f) collapse(1)
+  for (i = bar (g, h) + j; i < k; i += l)
+    baz (i, d, e++, &c, &d, &e);
+}
--- include/gomp-constants.h.jj	2015-05-19 18:54:15.724673826 +0200
+++ include/gomp-constants.h	2015-05-19 19:04:52.494759783 +0200
@@ -113,4 +113,14 @@ enum gomp_map_kind
 #define GOMP_DEVICE_ICV			-1
 #define GOMP_DEVICE_HOST_FALLBACK	-2
 
+/* GOMP_task/GOMP_taskloop* flags argument.  */
+#define GOMP_TASK_FLAG_UNTIED		(1 << 0)
+#define GOMP_TASK_FLAG_FINAL		(1 << 1)
+#define GOMP_TASK_FLAG_MERGEABLE	(1 << 2)
+#define GOMP_TASK_FLAG_DEPEND		(1 << 3)
+#define GOMP_TASK_FLAG_UP		(1 << 8)
+#define GOMP_TASK_FLAG_GRAINSIZE	(1 << 9)
+#define GOMP_TASK_FLAG_IF		(1 << 10)
+#define GOMP_TASK_FLAG_NOGROUP		(1 << 11)
+
 #endif
--- libgomp/libgomp.map.jj	2015-05-19 18:54:16.332664361 +0200
+++ libgomp/libgomp.map	2015-05-19 19:04:52.493759799 +0200
@@ -240,6 +240,12 @@ GOMP_4.0.1 {
 	GOMP_offload_unregister;
 } GOMP_4.0;
 
+GOMP_4.1 {
+  global:
+	GOMP_taskloop;
+	GOMP_taskloop_ull;
+} GOMP_4.0.1;
+
 OACC_2.0 {
   global:
 	acc_get_num_devices;
--- libgomp/task.c.jj	2015-05-19 18:54:16.322664516 +0200
+++ libgomp/task.c	2015-05-19 19:04:52.494759783 +0200
@@ -29,6 +29,7 @@
 #include "libgomp.h"
 #include <stdlib.h>
 #include <string.h>
+#include "gomp-constants.h"
 
 typedef struct gomp_task_depend_entry *hash_entry_type;
 
@@ -126,8 +127,7 @@ GOMP_task (void (*fn) (void *), void *da
      might be running on different thread than FN.  */
   if (cpyfn)
     if_clause = false;
-  if (flags & 1)
-    flags &= ~1;
+  flags &= ~GOMP_TASK_FLAG_UNTIED;
 #endif
 
   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
@@ -148,12 +148,14 @@ GOMP_task (void (*fn) (void *), void *da
 	 depend clauses for non-deferred tasks other than this, because
 	 the parent task is suspended until the child task finishes and thus
 	 it can't start further child tasks.  */
-      if ((flags & 8) && thr->task && thr->task->depend_hash)
+      if ((flags & GOMP_TASK_FLAG_DEPEND)
+	  && thr->task && thr->task->depend_hash)
 	gomp_task_maybe_wait_for_dependencies (depend);
 
       gomp_init_task (&task, thr->task, gomp_icv (false));
       task.kind = GOMP_TASK_IFFALSE;
-      task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
+      task.final_task = (thr->task && thr->task->final_task)
+			|| (flags & GOMP_TASK_FLAG_FINAL);
       if (thr->task)
 	{
 	  task.in_tied_task = thr->task->in_tied_task;
@@ -196,7 +198,7 @@ GOMP_task (void (*fn) (void *), void *da
       bool do_wake;
       size_t depend_size = 0;
 
-      if (flags & 8)
+      if (flags & GOMP_TASK_FLAG_DEPEND)
 	depend_size = ((uintptr_t) depend[0]
 		       * sizeof (struct gomp_task_depend_entry));
       task = gomp_malloc (sizeof (*task) + depend_size
@@ -219,7 +221,7 @@ GOMP_task (void (*fn) (void *), void *da
       task->kind = GOMP_TASK_WAITING;
       task->fn = fn;
       task->fn_data = arg;
-      task->final_task = (flags & 2) >> 1;
+      task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
       gomp_mutex_lock (&team->task_lock);
       /* If parallel or taskgroup has been cancelled, don't start new
 	 tasks.  */
@@ -412,6 +414,25 @@ GOMP_task (void (*fn) (void *), void *da
     }
 }
 
+ialias (GOMP_taskgroup_start)
+ialias (GOMP_taskgroup_end)
+
+#define TYPE long
+#define UTYPE unsigned long
+#define TYPE_is_long 1
+#include "taskloop.c"
+#undef TYPE
+#undef UTYPE
+#undef TYPE_is_long
+
+#define TYPE unsigned long long
+#define UTYPE TYPE
+#define GOMP_taskloop GOMP_taskloop_ull
+#include "taskloop.c"
+#undef TYPE
+#undef UTYPE
+#undef GOMP_taskloop
+
 static inline bool
 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
 		   struct gomp_taskgroup *taskgroup, struct gomp_team *team)
--- libgomp/taskloop.c.jj	2015-05-19 19:04:52.493759799 +0200
+++ libgomp/taskloop.c	2015-05-20 18:37:38.584454280 +0200
@@ -0,0 +1,360 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file handles the taskloop construct.  It is included twice, once
+   for the long and once for unsigned long long variant.  */
+
+/* Called when encountering an explicit task directive.  If IF_CLAUSE is
+   false, then we must not delay in executing the task.  If UNTIED is true,
+   then the task may be executed by any member of the team.  */
+
+void
+GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
+	       long arg_size, long arg_align, unsigned flags,
+	       unsigned long num_tasks,
+	       TYPE start, TYPE end, TYPE step)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+
+#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
+  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
+     tied to one thread all the time.  This means UNTIED tasks must be
+     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
+     might be running on different thread than FN.  */
+  if (cpyfn)
+    flags &= ~GOMP_TASK_FLAG_IF;
+  flags &= ~GOMP_TASK_FLAG_UNTIED;
+#endif
+
+  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+  if (team && gomp_team_barrier_cancelled (&team->barrier))
+    return;
+
+#ifdef TYPE_is_long
+  TYPE s = step;
+  if (step > 0)
+    {
+      if (start >= end)
+	return;
+      s--;
+    }
+  else
+    {
+      if (start <= end)
+	return;
+      s++;
+    }
+  UTYPE n = (end - start + s) / step;
+#else
+  UTYPE n;
+  if (flags & GOMP_TASK_FLAG_UP)
+    {
+      if (start >= end)
+	return;
+      n = (end - start + step - 1) / step;
+    }
+  else
+    {
+      if (start <= end)
+	return;
+      n = (start - end - step - 1) / -step;
+    }
+#endif
+
+  TYPE task_step = step;
+  unsigned long nfirst = n;
+  if (flags & GOMP_TASK_FLAG_GRAINSIZE)
+    {
+      unsigned long grainsize = num_tasks;
+#ifdef TYPE_is_long
+      num_tasks = n / grainsize;
+#else
+      UTYPE ndiv = n / grainsize;
+      num_tasks = ndiv;
+      if (num_tasks != ndiv)
+	num_tasks = ~0UL;
+#endif
+      if (num_tasks <= 1)
+	{
+	  num_tasks = 1;
+	  task_step = end - start;
+	}
+      else if (num_tasks >= grainsize
+#ifndef TYPE_is_long
+	       && num_tasks != ~0UL
+#endif
+	      )
+	{
+	  UTYPE mul = num_tasks * grainsize;
+	  task_step = (TYPE) grainsize * step;
+	  if (mul != n)
+	    {
+	      task_step += step;
+	      nfirst = n - mul - 1;
+	    }
+	}
+      else
+	{
+	  UTYPE div = n / num_tasks;
+	  UTYPE mod = n % num_tasks;
+	  task_step = (TYPE) div * step;
+	  if (mod)
+	    {
+	      task_step += step;
+	      nfirst = mod - 1;
+	    }
+	}
+    }
+  else
+    {
+      if (num_tasks == 0)
+	num_tasks = team ? team->nthreads : 1;
+      if (num_tasks >= n)
+	num_tasks = n;
+      else
+	{
+	  UTYPE div = n / num_tasks;
+	  UTYPE mod = n % num_tasks;
+	  task_step = (TYPE) div * step;
+	  if (mod)
+	    {
+	      task_step += step;
+	      nfirst = mod - 1;
+	    }
+	}
+    }
+
+  if (flags & GOMP_TASK_FLAG_NOGROUP)
+    {
+      if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
+	return;
+    }
+  else
+    ialias_call (GOMP_taskgroup_start) ();
+
+  if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
+      || (thr->task && thr->task->final_task)
+      || team->task_count + num_tasks > 64 * team->nthreads)
+    {
+      unsigned long i;
+      if (__builtin_expect (cpyfn != NULL, 0))
+	{
+	  struct gomp_task task[num_tasks];
+	  struct gomp_task *parent = thr->task;
+	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
+	  char buf[num_tasks * arg_size + arg_align - 1];
+	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
+				& ~(uintptr_t) (arg_align - 1));
+	  char *orig_arg = arg;
+	  for (i = 0; i < num_tasks; i++)
+	    {
+	      gomp_init_task (&task[i], parent, gomp_icv (false));
+	      task[i].kind = GOMP_TASK_IFFALSE;
+	      task[i].final_task = (thr->task && thr->task->final_task)
+				   || (flags & GOMP_TASK_FLAG_FINAL);
+	      if (thr->task)
+		{
+		  task[i].in_tied_task = thr->task->in_tied_task;
+		  task[i].taskgroup = thr->task->taskgroup;
+		}
+	      thr->task = &task[i];
+	      cpyfn (arg, data);
+	      arg += arg_size;
+	    }
+	  arg = orig_arg;
+	  for (i = 0; i < num_tasks; i++)
+	    {
+	      thr->task = &task[i];
+	      ((TYPE *)arg)[0] = start;
+	      start += task_step;
+	      ((TYPE *)arg)[1] = start;
+	      if (i == nfirst)
+		task_step -= step;
+	      fn (arg);
+	      arg += arg_size;
+	      if (task[i].children != NULL)
+		{
+		  gomp_mutex_lock (&team->task_lock);
+		  gomp_clear_parent (task[i].children);
+		  gomp_mutex_unlock (&team->task_lock);
+		}
+	      gomp_end_task ();
+	    }
+	}
+      else
+	for (i = 0; i < num_tasks; i++)
+	  {
+	    struct gomp_task task;
+
+	    gomp_init_task (&task, thr->task, gomp_icv (false));
+	    task.kind = GOMP_TASK_IFFALSE;
+	    task.final_task = (thr->task && thr->task->final_task)
+			      || (flags & GOMP_TASK_FLAG_FINAL);
+	    if (thr->task)
+	      {
+		task.in_tied_task = thr->task->in_tied_task;
+		task.taskgroup = thr->task->taskgroup;
+	      }
+	    thr->task = &task;
+	    ((TYPE *)data)[0] = start;
+	    start += task_step;
+	    ((TYPE *)data)[1] = start;
+	    if (i == nfirst)
+	      task_step -= step;
+	    fn (data);
+	    if (task.children != NULL)
+	      {
+		gomp_mutex_lock (&team->task_lock);
+		gomp_clear_parent (task.children);
+		gomp_mutex_unlock (&team->task_lock);
+	      }
+	    gomp_end_task ();
+	  }
+    }
+  else
+    {
+      struct gomp_task *tasks[num_tasks];
+      struct gomp_task *parent = thr->task;
+      struct gomp_taskgroup *taskgroup = parent->taskgroup;
+      char *arg;
+      int do_wake;
+      unsigned long i;
+
+      for (i = 0; i < num_tasks; i++)
+	{
+	  struct gomp_task *task
+	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
+	  tasks[i] = task;
+	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
+			  & ~(uintptr_t) (arg_align - 1));
+	  gomp_init_task (task, parent, gomp_icv (false));
+	  task->kind = GOMP_TASK_IFFALSE;
+	  task->in_tied_task = parent->in_tied_task;
+	  task->taskgroup = taskgroup;
+	  thr->task = task;
+	  if (cpyfn)
+	    {
+	      cpyfn (arg, data);
+	      task->copy_ctors_done = true;
+	    }
+	  else
+	    memcpy (arg, data, arg_size);
+	  ((TYPE *)arg)[0] = start;
+	  start += task_step;
+	  ((TYPE *)arg)[1] = start;
+	  if (i == nfirst)
+	    task_step -= step;
+	  thr->task = parent;
+	  task->kind = GOMP_TASK_WAITING;
+	  task->fn = fn;
+	  task->fn_data = arg;
+	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
+	}
+      gomp_mutex_lock (&team->task_lock);
+      /* If parallel or taskgroup has been cancelled, don't start new
+	 tasks.  */
+      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
+			     || (taskgroup && taskgroup->cancelled))
+			    && cpyfn == NULL, 0))
+	{
+	  gomp_mutex_unlock (&team->task_lock);
+	  for (i = 0; i < num_tasks; i++)
+	    {
+	      gomp_finish_task (tasks[i]);
+	      free (tasks[i]);
+	    }
+	  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+	    ialias_call (GOMP_taskgroup_end) ();
+	  return;
+	}
+      if (taskgroup)
+	taskgroup->num_children += num_tasks;
+      for (i = 0; i < num_tasks; i++)
+	{
+	  struct gomp_task *task = tasks[i];
+	  if (parent->children)
+	    {
+	      task->next_child = parent->children;
+	      task->prev_child = parent->children->prev_child;
+	      task->next_child->prev_child = task;
+	      task->prev_child->next_child = task;
+	    }
+	  else
+	    {
+	      task->next_child = task;
+	      task->prev_child = task;
+	    }
+	  parent->children = task;
+	  if (taskgroup)
+	    {
+	      if (taskgroup->children)
+		{
+		  task->next_taskgroup = taskgroup->children;
+		  task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+		  task->next_taskgroup->prev_taskgroup = task;
+		  task->prev_taskgroup->next_taskgroup = task;
+		}
+	      else
+		{
+		  task->next_taskgroup = task;
+		  task->prev_taskgroup = task;
+		}
+	      taskgroup->children = task;
+	    }
+	  if (team->task_queue)
+	    {
+	      task->next_queue = team->task_queue;
+	      task->prev_queue = team->task_queue->prev_queue;
+	      task->next_queue->prev_queue = task;
+	      task->prev_queue->next_queue = task;
+	    }
+	  else
+	    {
+	      task->next_queue = task;
+	      task->prev_queue = task;
+	      team->task_queue = task;
+	    }
+	  ++team->task_count;
+	  ++team->task_queued_count;
+	}
+      gomp_team_barrier_set_task_pending (&team->barrier);
+      if (team->task_running_count + !parent->in_tied_task
+	  < team->nthreads)
+	{
+	  do_wake = team->nthreads - team->task_running_count
+		    - !parent->in_tied_task;
+	  if ((unsigned long) do_wake > num_tasks)
+	    do_wake = num_tasks;
+	}
+      else
+	do_wake = 0;
+      gomp_mutex_unlock (&team->task_lock);
+      if (do_wake)
+	gomp_team_barrier_wake (&team->barrier, do_wake);
+    }
+  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+    ialias_call (GOMP_taskgroup_end) ();
+}
--- libgomp/testsuite/libgomp.c/for-4.c.jj	2015-05-19 19:04:52.491759830 +0200
+++ libgomp/testsuite/libgomp.c/for-4.c	2015-05-20 18:37:38.583454296 +0200
@@ -0,0 +1,42 @@
+/* { dg-options "-std=gnu99 -fopenmp" } */
+
+extern void abort (void);
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#define F taskloop
+#define G taskloop
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F taskloop simd
+#define G taskloop_simd
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+int
+main ()
+{
+  int err = 0;
+  #pragma omp parallel reduction(|:err)
+    #pragma omp single
+      {
+	if (test_taskloop_normal ()
+	    || test_taskloop_simd_normal ())
+	  err = 1;
+      }
+  if (err)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/taskloop-1.c.jj	2015-05-19 19:04:52.492759814 +0200
+++ libgomp/testsuite/libgomp.c/taskloop-1.c	2015-05-20 18:37:38.583454296 +0200
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp -std=c99" } */
+
+int q, r, e;
+
+__attribute__((noinline, noclone)) void
+foo (long a, long b)
+{
+  #pragma omp taskloop lastprivate (q) nogroup
+    for (long d = a; d < b; d += 2)
+      {
+	q = d;
+	if (d < 2 || d > 6 || (d & 1))
+	  #pragma omp atomic
+	    e |= 1;
+      }
+}
+
+__attribute__((noinline, noclone)) int
+bar (int a, int b)
+{
+  int q = 7;
+  #pragma omp taskloop lastprivate (q)
+    for (int d = a; d < b; d++)
+      {
+	if (d < 12 || d > 17)
+	  #pragma omp atomic
+	    e |= 1;
+	q = d;
+      }
+  return q;
+}
+
+int
+main ()
+{
+  #pragma omp parallel
+    #pragma omp single
+      {
+	foo (2, 7);
+	r = bar (12, 18);
+      }
+  if (q != 6 || r != 17 || e)
+    __builtin_abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/taskloop-2.c.jj	2015-05-19 19:04:52.492759814 +0200
+++ libgomp/testsuite/libgomp.c/taskloop-2.c	2015-05-20 18:37:38.582454311 +0200
@@ -0,0 +1,147 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -std=c99" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+int u[1024], v[1024], w[1024], m;
+
+__attribute__((noinline, noclone)) void
+f1 (long a, long b)
+{
+  #pragma omp taskloop simd default(none) shared(u, v, w) nogroup
+  for (long d = a; d < b; d++)
+    u[d] = v[d] + w[d];
+}
+
+__attribute__((noinline, noclone)) int
+f2 (long a, long b, long c)
+{
+  int d, e;
+  #pragma omp taskloop simd default(none) shared(u, v, w) linear(d:1) linear(c:5) lastprivate(e)
+  for (d = a; d < b; d++)
+    {
+      u[d] = v[d] + w[d];
+      c = c + 5;
+      e = c + 9;
+    }
+  return d + c + e;
+}
+
+__attribute__((noinline, noclone)) int
+f3 (long a, long b)
+{
+  int d;
+  #pragma omp taskloop simd default(none) shared(u, v, w)
+  for (d = a; d < b; d++)
+    {
+      int *p = &d;
+      u[d] = v[d] + w[d];
+    }
+  return d;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (long a, long b, long c, long d)
+{
+  int e, f, g;
+  #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2) lastprivate(g)
+  for (e = a; e < b; e++)
+    for (f = c; f < d; f++)
+      {
+	int *p = &e;
+	int *q = &f;
+	int r = 32 * e + f;
+	u[r] = v[r] + w[r];
+	g = r;
+      }
+  return e + f + g;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (long a, long b, long c, long d)
+{
+  int e, f;
+  #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2)
+  for (e = a; e < b; e++)
+    for (f = c; f < d; f++)
+      {
+	int r = 32 * e + f;
+	u[r] = v[r] + w[r];
+      }
+  return e + f;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    {
+      v[i] = i;
+      w[i] = i + 1;
+    }
+  #pragma omp parallel
+    #pragma omp single
+      f1 (0, 1024);
+  for (i = 0; i < 1024; i++)
+    if (u[i] != 2 * i + 1)
+      __builtin_abort ();
+    else
+      {
+	v[i] = 1024 - i;
+	w[i] = 512 - i;
+      }
+  #pragma omp parallel
+    #pragma omp single
+      m = f2 (2, 1022, 17);
+  for (i = 0; i < 1024; i++)
+    if ((i < 2 || i >= 1022) ? u[i] != 2 * i + 1 : u[i] != 1536 - 2 * i)
+      __builtin_abort ();
+    else
+      {
+	v[i] = i;
+	w[i] = i + 1;
+      }
+  if (m != 1022 + 2 * (1020 * 5 + 17) + 9)
+    __builtin_abort ();
+  #pragma omp parallel
+    #pragma omp single
+      m = f3 (0, 1024);
+  for (i = 0; i < 1024; i++)
+    if (u[i] != 2 * i + 1)
+      __builtin_abort ();
+    else
+      {
+	v[i] = 1024 - i;
+	w[i] = 512 - i;
+      }
+  if (m != 1024)
+    __builtin_abort ();
+  #pragma omp parallel
+    #pragma omp single
+      m = f4 (0, 32, 0, 32);
+  for (i = 0; i < 1024; i++)
+    if (u[i] != 1536 - 2 * i)
+      __builtin_abort ();
+    else
+      {
+	v[i] = i;
+	w[i] = i + 1;
+      }
+  if (m != 32 + 32 + 1023)
+    __builtin_abort ();
+  #pragma omp parallel
+    #pragma omp single
+      m = f5 (0, 32, 0, 32);
+  for (i = 0; i < 1024; i++)
+    if (u[i] != 2 * i + 1)
+      __builtin_abort ();
+    else
+      {
+	v[i] = 1024 - i;
+	w[i] = 512 - i;
+      }
+  if (m != 32 + 32)
+    __builtin_abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/taskloop-3.c.jj	2015-05-19 19:04:52.492759814 +0200
+++ libgomp/testsuite/libgomp.c/taskloop-3.c	2015-05-20 18:37:38.583454296 +0200
@@ -0,0 +1,84 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp -std=c99" } */
+
+int g;
+int a[1024];
+
+__attribute__((noinline, noclone)) int
+f1 (int x)
+{
+  #pragma omp taskloop firstprivate (x) lastprivate (x)
+  for (int i = 0; i < 64; i++)
+    {
+      if (x != 74)
+	__builtin_abort ();
+      if (i == 63)
+	x = i + 4;
+    }
+  return x;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  #pragma omp taskloop firstprivate (g) lastprivate (g) nogroup
+  for (int i = 0; i < 64; i++)
+    {
+      if (g != 77)
+	__builtin_abort ();
+      if (i == 63)
+	g = i + 9;
+    }
+}
+
+__attribute__((noinline, noclone)) long long
+f3 (long long a, long long b, long long c)
+{
+  long long i;
+  int l;
+  #pragma omp taskloop default (none) lastprivate (i, l)
+  for (i = a; i < b; i += c)
+    l = i;
+  return l * 7 + i;
+}
+
+__attribute__((noinline, noclone)) long long
+f4 (long long a, long long b, long long c, long long d,
+    long long e, long long f, int k)
+{
+  long long i, j;
+  int l;
+  #pragma omp taskloop default (none) collapse(2) \
+	      firstprivate (k) lastprivate (i, j, k, l)
+  for (i = a; i < b; i += e)
+    for (j = c; j < d; j += f)
+      {
+	if (k != 73)
+	  __builtin_abort ();
+	if (i == 31 && j == 46)
+	  k = i;
+	l = j;
+      }
+  return i + 5 * j + 11 * k + 17 * l;
+}
+
+int
+main ()
+{
+  #pragma omp parallel
+    #pragma omp single
+      {
+	if (f1 (74) != 63 + 4)
+	  __builtin_abort ();
+	g = 77;
+	f2 ();
+	#pragma omp taskwait
+	if (g != 63 + 9)
+	  __builtin_abort ();
+	if (f3 (7, 12, 2) != 11 * 7 + 13)
+	  __builtin_abort ();
+	if (f4 (0, 32, 16, 48, 1, 2, 73) != 32 + 5 * 48 + 11 * 31 + 17 * 46)
+	  __builtin_abort ();
+      }
+  return 0;
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]