[gomp] fix 26651

Richard Henderson rth@redhat.com
Thu Apr 13 23:55:00 GMT 2006


The bug is that we forgot to use the dedicated GOMP_loop_end
functions and friends instead of just a plain GOMP_barrier to
synchronize at the end of the workshare.  That's how the memory
for the workshare was supposed to get freed, as well as 
preventing the bookkeeping associated with workshares from
growing without bound.

Fixing that immediately lead to the question of how to retain
the extraneous barrier removal optimization; now that we no
longer are looking for GOMP_barrier calls and eliding ones we
can show aren't needed.  The thought here was to use OMP_RETURN
with a bit (OMP_RETURN_NOWAIT) set or unset, allowing us to 
emit the call to the proper function (GOMP_loop_end{,_nowait}).

Which lead to the discovery that the OMP_RETURN was *not* placed
at the end of the construct, as one would be lead to believe by
it's name and documentation, for the FOR and SECTIONS constructs.
Instead, they were placed in the middle, where some internal
looping bits were supposed to be emitted.  Which lead to the
creation of the OMP_CONTINUE node, and moving the OMP_RETURN
node to where it belonged.

Add to that the normal set of mistakes when attempting to change
too many things at once, and this took much longer than it ought 
to have done.  But it does pass all the tests, so, whew.


r~



	PR 26651
	* gimple-low.c (lower_omp_directive): Remove dead code.
	(lower_stmt): Do nothing except for openmp, except for OMP_PARALLEL.
	* gimplify.c (gimplify_expr): Update for OMP_RETURN, OMP_CONTINUE.
	* omp-low.c (struct omp_region): Move to tree-flow.h.
	(root_omp_region): Export.
	(omp_regions, lookup_omp_region): Remove.
	(determine_parallel_type): Update for struct omp_region changes.
	(dump_omp_region): Dump regions with block numbers.
	(new_omp_region): Take type and block instead of stmt; malloc
	instead of ggc.
	(free_omp_region_1, free_omp_regions): New.
	(expand_parallel_call): Take entry_stmt as argument; update for
	changes to omp_region.
	(remove_exit_barrier): Rewrite to update OMP_RETURN_NOWAIT.
	(remove_exit_barriers): New.
	(expand_omp_parallel): Update for struct omp_region changes.
	(expand_omp_synch): Likewise.
	(expand_omp): Likewise.
	(expand_omp_for_static_nochunk): Likewise; update for OMP_CONTINUE.
	(expand_omp_for_static_chunk): Likewise.
	(expand_omp_for): Likewise.
	(expand_omp_for_generic): Likewise; emit the proper GOMP_loop_end call.
	(expand_omp_sections): Similarly, with GOMP_sections_end.
	(expand_omp_single): New.
	(build_omp_regions_1): Update for OMP_CONTINUE.
	(execute_expand_omp): Call remove_exit_barriers, free_omp_regions.
	(lower_omp_sections): Expand with OMP_CONTINUE, do not collect
	OMP_SECTIONS_SECTIONS.
	(lower_omp_single_simple): Don't emit barrier here.
	(lower_omp_single_copy): Likewise.
	(lower_omp_single): Fix bindings, and lower to straightline now.
	(lower_omp_master, lower_omp_ordered): Likewise.
	(lower_omp_critical): Likewise.
	(lower_omp_for): Likewise.  Emit OMP_CONTINUE.
	* tree-cfg.c (make_edges): Collect an omp_region tree, use it for
	omp edges, free it afterward.
	(make_omp_sections_edges): Remove.
	(is_ctrl_altering_stmt): Don't bother checking flag_openmp.
	(move_stmt_r): Handle OMP_CONTINUE.
	* tree-flow.h (struct omp_region): Move from omp-low.c.  Switch 
	statement pointers to basic blocks.  Add type member.
	(root_omp_region, new_omp_region, free_omp_regions): Declare.
	* tree-gimple.c (is_gimple_stmt): Handle OMP_RETURN, OMP_CONTINUE.
	* tree-inline.c (estimate_num_insns_1): Likewise.
	* tree-pretty-print.c (dump_generic_node): Likewise.
	* tree-ssa-operands.c (get_expr_operands): Likewise.
	* tree.def (OMP_SECTIONS): Remove OMP_SECTIONS_SECTIONS operand.
	(OMP_RETURN): Rename from OMP_RETURN_EXPR.
	(OMP_CONTINUE): New.
	* tree.h (OMP_DIRECTIVE_P): Add OMP_CONTINUE.
	(OMP_SECTIONS_SECTIONS): Remove.
	(OMP_SECTION_LAST): New.
	(OMP_RETURN_NOWAIT): New.
fortran/
        * trans-openmp.c (gfc_trans_omp_sections): Adjust for changed
        number of operands to OMP_SECTIONS.
testsuite/
        * g++.dg/gomp/block-0.C: Update expected matches.

--- fortran/trans-openmp.c	(revision 112944)
+++ fortran/trans-openmp.c	(local)
@@ -1161,7 +1161,7 @@ gfc_trans_omp_sections (gfc_code *code, 
     }
   stmt = gfc_finish_block (&body);
 
-  stmt = build3_v (OMP_SECTIONS, stmt, omp_clauses, NULL);
+  stmt = build2_v (OMP_SECTIONS, stmt, omp_clauses);
   gfc_add_expr_to_block (&block, stmt);
 
   return gfc_finish_block (&block);
--- gimple-low.c	(revision 112944)
+++ gimple-low.c	(local)
@@ -159,14 +159,10 @@ lower_stmt_body (tree expr, struct lower
 static void
 lower_omp_directive (tree_stmt_iterator *tsi, struct lower_data *data)
 {
-  tree clause, stmt;
+  tree stmt;
   
   stmt = tsi_stmt (*tsi);
 
-  clause = (TREE_CODE (stmt) >= OMP_PARALLEL && TREE_CODE (stmt) <= OMP_SINGLE)
-	   ? OMP_CLAUSES (stmt)
-	   : NULL_TREE;
-
   lower_stmt_body (OMP_BODY (stmt), data);
   tsi_link_before (tsi, stmt, TSI_SAME_STMT);
   tsi_link_before (tsi, OMP_BODY (stmt), TSI_SAME_STMT);
@@ -216,10 +212,6 @@ lower_stmt (tree_stmt_iterator *tsi, str
     case GOTO_EXPR:
     case LABEL_EXPR:
     case SWITCH_EXPR:
-    case OMP_RETURN_EXPR:
-      break;
-
-    case OMP_PARALLEL:
     case OMP_FOR:
     case OMP_SECTIONS:
     case OMP_SECTION:
@@ -227,6 +219,11 @@ lower_stmt (tree_stmt_iterator *tsi, str
     case OMP_MASTER:
     case OMP_ORDERED:
     case OMP_CRITICAL:
+    case OMP_RETURN:
+    case OMP_CONTINUE:
+      break;
+
+    case OMP_PARALLEL:
       lower_omp_directive (tsi, data);
       return;
 
--- gimplify.c	(revision 112944)
+++ gimplify.c	(local)
@@ -5587,7 +5587,8 @@ gimplify_expr (tree *expr_p, tree *pre_p
 	  ret = gimplify_omp_atomic (expr_p, pre_p);
 	  break;
 
-	case OMP_RETURN_EXPR:
+	case OMP_RETURN:
+	case OMP_CONTINUE:
 	  ret = GS_ALL_DONE;
 	  break;
 
--- omp-low.c	(revision 112944)
+++ omp-low.c	(local)
@@ -54,40 +54,6 @@ Software Foundation, 51 Franklin Street,
    scanned for parallel regions which are then moved to a new
    function, to be invoked by the thread library.  */
 
-/* Parallel region information.  Every parallel and workshare
-   directive is enclosed between two markers, the OMP_* directive
-   and a corresponding OMP_RETURN_EXPR statement.  */
-
-struct omp_region GTY(())
-{
-  /* The enclosing region.  */
-  struct omp_region *outer;
-
-  /* First child region.  */
-  struct omp_region *inner;
-
-  /* Next peer region.  */
-  struct omp_region *next;
-
-  /* Entry point to this region.  */
-  tree entry;
-
-  /* Exit label from this region.  */
-  tree exit;
-
-  /* Region number.  */
-  int num;
-
-  /* True if this is a combined parallel+workshare region.  */
-  bool is_combined_parallel;
-
-  /* If this is a combined parallel+workshare region, this is a list
-     of additional arguments needed by the combined parallel+workshare
-     library call.  */
-  tree ws_args;
-};
-
-
 /* Context structure.  Used to store information about each parallel
    directive in the code.  */
 
@@ -142,8 +108,7 @@ struct omp_for_data
 
 static splay_tree all_contexts;
 static int parallel_nesting_level;
-static splay_tree omp_regions;
-static struct omp_region *root_omp_region;
+struct omp_region *root_omp_region;
 
 static void scan_omp (tree *, omp_context *);
 static void lower_omp (tree *, omp_context *);
@@ -402,24 +367,25 @@ determine_parallel_type (struct omp_regi
     return;
 
   /* We only support parallel+for and parallel+sections.  */
-  if (TREE_CODE (region->entry) != OMP_PARALLEL
-      || (TREE_CODE (region->inner->entry) != OMP_FOR
-	  && TREE_CODE (region->inner->entry) != OMP_SECTIONS))
+  if (region->type != OMP_PARALLEL
+      || (region->inner->type != OMP_FOR
+	  && region->inner->type != OMP_SECTIONS))
     return;
 
   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
      WS_EXIT_BB -> PAR_EXIT_BB.  */
-  par_entry_bb = bb_for_stmt (region->entry);
-  par_exit_bb = bb_for_stmt (region->exit);
-
-  ws_entry_bb = bb_for_stmt (region->inner->entry);
-  ws_exit_bb = bb_for_stmt (region->inner->exit);
+  par_entry_bb = region->entry;
+  par_exit_bb = region->exit;
+  ws_entry_bb = region->inner->entry;
+  ws_exit_bb = region->inner->exit;
 
   if (single_succ (par_entry_bb) == ws_entry_bb
       && single_succ (ws_exit_bb) == par_exit_bb
       && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb))
     {
-      if (TREE_CODE (region->inner->entry) == OMP_FOR)
+      tree ws_stmt = last_stmt (region->inner->entry);
+
+      if (region->inner->type == OMP_FOR)
 	{
 	  /* If this is a combined parallel loop, we need to determine
 	     whether or not to use the combined library calls.  There
@@ -430,7 +396,7 @@ determine_parallel_type (struct omp_regi
 	     parallel loop call would still need extra synchronization
 	     to implement ordered semantics, so there would not be any
 	     gain in using the combined call.  */
-	  tree clauses = OMP_FOR_CLAUSES (region->inner->entry);
+	  tree clauses = OMP_FOR_CLAUSES (ws_stmt);
 	  tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
 	  if (c == NULL
 	      || OMP_CLAUSE_SCHEDULE_KIND (c) == OMP_CLAUSE_SCHEDULE_STATIC
@@ -444,7 +410,7 @@ determine_parallel_type (struct omp_regi
 
       region->is_combined_parallel = true;
       region->inner->is_combined_parallel = true;
-      region->ws_args = get_ws_args_for (region->inner->entry);
+      region->ws_args = get_ws_args_for (ws_stmt);
     }
 }
 
@@ -735,14 +701,6 @@ omp_copy_decl (tree var, copy_body_data 
 
 /* Return the parallel region associated with STMT.  */
 
-static inline struct omp_region *
-lookup_omp_region (tree stmt)
-{
-  splay_tree_node n = splay_tree_lookup (omp_regions, (splay_tree_key) stmt);
-  return n ? (struct omp_region *) n->value : NULL;
-}
-
-
 /* Debugging dumps for parallel regions.  */
 void dump_omp_region (FILE *, struct omp_region *, int);
 void debug_omp_region (struct omp_region *);
@@ -753,23 +711,26 @@ void debug_all_omp_regions (void);
 void
 dump_omp_region (FILE *file, struct omp_region *region, int indent)
 {
-  fprintf (file, "%*s", indent, "");
-  print_generic_stmt (file, region->entry, TDF_SLIM);
+  fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
+	   tree_code_name[region->type]);
 
   if (region->inner)
     dump_omp_region (file, region->inner, indent + 4);
 
-  fprintf (file, "%*s", indent, "");
+  if (region->cont)
+    {
+      fprintf (file, "%*sbb %d: OMP_CONTINUE", indent, "",
+	       region->cont->index);
+    }
+    
   if (region->exit)
-    print_generic_stmt (file, region->exit, 0);
+    fprintf (file, "%*sbb: %d: OMP_RETURN", indent, "",
+	     region->exit->index);
   else
-    fprintf (file, "[no exit marker]\n");
+    fprintf (file, "%*s[no exit marker]\n", indent, "");
 
   if (region->next)
-    {
-      fprintf (file, "\n");
-      dump_omp_region (file, region->next, indent);
-    }
+    dump_omp_region (file, region->next, indent);
 }
 
 void
@@ -787,15 +748,14 @@ debug_all_omp_regions (void)
 
 /* Create a new parallel region starting at STMT inside region PARENT.  */
 
-static struct omp_region *
-new_omp_region (tree stmt, struct omp_region *parent)
+struct omp_region *
+new_omp_region (basic_block bb, enum tree_code type, struct omp_region *parent)
 {
-  struct omp_region *region = ggc_alloc_cleared (sizeof (*region));
-  static int num = 0;
+  struct omp_region *region = xcalloc (1, sizeof (*region));
 
   region->outer = parent;
-  region->entry = stmt;
-  region->num = num++;
+  region->entry = bb;
+  region->type = type;
 
   if (parent)
     {
@@ -804,24 +764,45 @@ new_omp_region (tree stmt, struct omp_re
       region->next = parent->inner;
       parent->inner = region;
     }
-  else if (omp_regions)
+  else
     {
       /* This is a toplevel region.  Add it to the list of toplevel
 	 regions in ROOT_OMP_REGION.  */
       region->next = root_omp_region;
       root_omp_region = region;
     }
-  else
+
+  return region;
+}
+
+/* Release the memory associated with the region tree rooted at REGION.  */
+
+static void
+free_omp_region_1 (struct omp_region *region)
+{
+  struct omp_region *i, *n;
+
+  for (i = region->inner; i ; i = n)
     {
-      /* Create a new root region with the first region we find.  */
-      root_omp_region = region;
-      omp_regions = splay_tree_new (splay_tree_compare_pointers, 0, 0);
+      n = i->next;
+      free_omp_region_1 (i);
     }
 
-  splay_tree_insert (omp_regions, (splay_tree_key) stmt,
-		     (splay_tree_value) region);
+  free (region);
+}
 
-  return region;
+/* Release the memory for the entire omp region tree.  */
+
+void
+free_omp_regions (void)
+{
+  struct omp_region *r, *n;
+  for (r = root_omp_region; r ; r = n)
+    {
+      n = r->next;
+      free_omp_region_1 (r);
+    }
+  root_omp_region = NULL;
 }
 
 
@@ -2033,13 +2014,14 @@ lower_send_shared_vars (tree *ilist, tre
    the workshare construct.  */
 
 static void
-expand_parallel_call (struct omp_region *region, basic_block bb, tree ws_args)
+expand_parallel_call (struct omp_region *region, basic_block bb,
+		      tree entry_stmt, tree ws_args)
 {
   tree t, args, val, cond, c, list, clauses;
   block_stmt_iterator si;
   int start_ix;
 
-  clauses = OMP_PARALLEL_CLAUSES (region->entry);
+  clauses = OMP_PARALLEL_CLAUSES (entry_stmt);
   push_gimplify_context ();
 
   /* Determine what flavor of GOMP_parallel_start we will be
@@ -2047,18 +2029,23 @@ expand_parallel_call (struct omp_region 
   start_ix = BUILT_IN_GOMP_PARALLEL_START;
   if (is_combined_parallel (region))
     {
-      tree stmt = region->inner->entry;
-
-      if (TREE_CODE (stmt) == OMP_FOR)
+      switch (region->inner->type)
 	{
-	  struct omp_for_data fd;
-	  extract_omp_for_data (stmt, &fd);
-	  start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START + fd.sched_kind;
+	case OMP_FOR:
+	  {
+	    tree stmt = last_stmt (region->inner->entry);
+	    struct omp_for_data fd;
+	    extract_omp_for_data (stmt, &fd);
+	    start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START
+	      + fd.sched_kind;
+	  }
+	  break;
+	case OMP_SECTIONS:
+	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START;
+	  break;
+	default:
+	  gcc_unreachable ();
 	}
-      else if (TREE_CODE (stmt) == OMP_SECTIONS)
-	start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START;
-      else
-	gcc_unreachable ();
     }
 
   /* By default, the value of NUM_THREADS is zero (selected at run time)
@@ -2142,13 +2129,13 @@ expand_parallel_call (struct omp_region 
 
   list = NULL_TREE;
   args = tree_cons (NULL, val, NULL);
-  t = OMP_PARALLEL_DATA_ARG (region->entry);
+  t = OMP_PARALLEL_DATA_ARG (entry_stmt);
   if (t == NULL)
     t = null_pointer_node;
   else
     t = build_fold_addr_expr (t);
   args = tree_cons (NULL, t, args);
-  t = build_fold_addr_expr (OMP_PARALLEL_FN (region->entry));
+  t = build_fold_addr_expr (OMP_PARALLEL_FN (entry_stmt));
   args = tree_cons (NULL, t, args);
 
   if (ws_args)
@@ -2158,13 +2145,13 @@ expand_parallel_call (struct omp_region 
   t = build_function_call_expr (t, args);
   gimplify_and_add (t, &list);
 
-  t = OMP_PARALLEL_DATA_ARG (region->entry);
+  t = OMP_PARALLEL_DATA_ARG (entry_stmt);
   if (t == NULL)
     t = null_pointer_node;
   else
     t = build_fold_addr_expr (t);
   args = tree_cons (NULL, t, NULL);
-  t = build_function_call_expr (OMP_PARALLEL_FN (region->entry), args);
+  t = build_function_call_expr (OMP_PARALLEL_FN (entry_stmt), args);
   gimplify_and_add (t, &list);
 
   t = built_in_decls[BUILT_IN_GOMP_PARALLEL_END];
@@ -2239,27 +2226,51 @@ remove_exit_barrier (struct omp_region *
 {
   block_stmt_iterator si;
   basic_block exit_bb;
+  edge_iterator ei;
+  edge e;
   tree t;
 
-  gcc_assert (TREE_CODE (region->entry) == OMP_PARALLEL);
-
-  exit_bb = bb_for_stmt (region->exit);
+  exit_bb = region->exit;
 
-  /* The barrier should be immediately before OMP_RETURN_EXPR.
-     Otherwise, we cannot remove it.  */
+  /* The last insn in the block will be the parallel's OMP_RETURN.  The
+     workshare's OMP_RETURN will be in a preceeding block.  The kinds of
+     statements that can appear in between are extremely limited -- no
+     memory operations at all.  Here, we allow nothing at all, so the
+     only thing we allow to preceed this OMP_RETURN is a label.  */
   si = bsi_last (exit_bb);
-  t = bsi_stmt (si);
-  gcc_assert (TREE_CODE (t) == OMP_RETURN_EXPR);
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
   bsi_prev (&si);
-  if (bsi_end_p (si))
+  if (!bsi_end_p (si) && TREE_CODE (bsi_stmt (si)) != LABEL_EXPR)
     return;
 
-  t = bsi_stmt (si);
-  if (TREE_CODE (t) == CALL_EXPR
-      && get_callee_fndecl (t) == built_in_decls[BUILT_IN_GOMP_BARRIER])
-    bsi_remove (&si, true);
+  FOR_EACH_EDGE (e, ei, exit_bb->preds)
+    {
+      si = bsi_last (e->src);
+      if (bsi_end_p (si))
+	continue;
+      t = bsi_stmt (si);
+      if (TREE_CODE (t) == OMP_RETURN)
+	OMP_RETURN_NOWAIT (t) = 1;
+    }
 }
 
+static void
+remove_exit_barriers (struct omp_region *region)
+{
+  if (region->type == OMP_PARALLEL)
+    remove_exit_barrier (region);
+
+  if (region->inner)
+    {
+      region = region->inner;
+      remove_exit_barriers (region);
+      while (region->next)
+	{
+	  region = region->next;
+	  remove_exit_barriers (region);
+	}
+    }
+}
 
 /* Expand the OpenMP parallel directive starting at REGION.  */
 
@@ -2270,29 +2281,23 @@ expand_omp_parallel (struct omp_region *
   struct function *child_cfun, *saved_cfun;
   tree child_fn, block, t, ws_args;
   block_stmt_iterator si;
+  tree entry_stmt;
   edge e;
 
-  child_fn = OMP_PARALLEL_FN (region->entry);
+  entry_stmt = last_stmt (region->entry);
+  child_fn = OMP_PARALLEL_FN (entry_stmt);
   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
   saved_cfun = cfun;
 
-  entry_bb = bb_for_stmt (region->entry);
-  exit_bb = bb_for_stmt (region->exit);
+  entry_bb = region->entry;
+  exit_bb = region->exit;
 
   if (is_combined_parallel (region))
-    {
-      ws_args = region->ws_args;
-
-      /* For combined parallel+workshare calls, barriers at the end of
-	 the function are not necessary and can be removed.  Since the
-	 caller will have a barrier of its own, the workshare barrier is
-	 superfluous.  */
-      remove_exit_barrier (region);
-    }
+    ws_args = region->ws_args;
   else
     ws_args = NULL_TREE;
 
-  if (DECL_STRUCT_FUNCTION (OMP_PARALLEL_FN (region->entry))->cfg)
+  if (child_cfun->cfg)
     {
       /* Due to inlining, it may happen that we have already outlined
 	 the region, in which case all we need to do is make the
@@ -2304,7 +2309,7 @@ expand_omp_parallel (struct omp_region *
       exit_succ_e = single_succ_edge (exit_bb);
 
       si = bsi_last (entry_bb);
-      gcc_assert (!bsi_end_p (si) && TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL);
+      gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL);
       bsi_remove (&si, true);
 
       new_bb = entry_bb;
@@ -2325,7 +2330,7 @@ expand_omp_parallel (struct omp_region *
 	 a function call that has been inlined, the original PARM_DECL
 	 .OMP_DATA_I may have been converted into a different local
 	 variable.  In which case, we need to keep the assignment.  */
-      if (OMP_PARALLEL_DATA_ARG (region->entry))
+      if (OMP_PARALLEL_DATA_ARG (entry_stmt))
 	{
 	  basic_block entry_succ_bb = single_succ (entry_bb);
 	  block_stmt_iterator si = bsi_start (entry_succ_bb);
@@ -2337,7 +2342,7 @@ expand_omp_parallel (struct omp_region *
 	  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR
 		      && TREE_CODE (TREE_OPERAND (stmt, 1)) == ADDR_EXPR
 		      && TREE_OPERAND (TREE_OPERAND (stmt, 1), 0)
-			 == OMP_PARALLEL_DATA_ARG (region->entry));
+			 == OMP_PARALLEL_DATA_ARG (entry_stmt));
 
 	  if (TREE_OPERAND (stmt, 0) == DECL_ARGUMENTS (child_fn))
 	    bsi_remove (&si, true);
@@ -2377,15 +2382,14 @@ expand_omp_parallel (struct omp_region *
 
       /* Convert OMP_RETURN into a RETURN_EXPR.  */
       si = bsi_last (exit_bb);
-      gcc_assert (!bsi_end_p (si)
-	          && TREE_CODE (bsi_stmt (si)) == OMP_RETURN_EXPR);
+      gcc_assert (!bsi_end_p (si) && TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
       t = build1 (RETURN_EXPR, void_type_node, NULL);
       bsi_insert_after (&si, t, TSI_SAME_STMT);
       bsi_remove (&si, true);
     }
 
   /* Emit a library call to launch the children threads.  */
-  expand_parallel_call (region, new_bb, ws_args);
+  expand_parallel_call (region, new_bb, entry_stmt, ws_args);
 }
 
 
@@ -2412,7 +2416,7 @@ expand_omp_parallel (struct omp_region *
     If this is a combined omp parallel loop, instead of the call to
     GOMP_loop_foo_start, we emit 'goto L3'.  */
 
-static basic_block
+static void
 expand_omp_for_generic (struct omp_region *region,
 			struct omp_for_data *fd,
 			enum built_in_function start_fn,
@@ -2421,8 +2425,7 @@ expand_omp_for_generic (struct omp_regio
   tree l0, l1, l2, l3;
   tree type, istart0, iend0, iend;
   tree t, args, list;
-  basic_block entry_bb, exit_bb, l0_bb, l1_bb, l2_bb;
-  edge exit_edge;
+  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l3_bb;
   block_stmt_iterator si;
   bool in_combined_parallel = is_combined_parallel (region);
 
@@ -2430,28 +2433,30 @@ expand_omp_for_generic (struct omp_regio
 
   istart0 = create_tmp_var (long_integer_type_node, ".istart0");
   iend0 = create_tmp_var (long_integer_type_node, ".iend0");
+  iend = create_tmp_var (type, NULL);
   TREE_ADDRESSABLE (istart0) = 1;
   TREE_ADDRESSABLE (iend0) = 1;
 
-  l0 = create_artificial_label ();
-  l1 = create_artificial_label ();
-  l2 = create_artificial_label ();
-  l3 = create_artificial_label ();
-  iend = create_tmp_var (type, NULL);
-
-  entry_bb = bb_for_stmt (region->entry);
+  entry_bb = region->entry;
+  l0_bb = create_empty_bb (entry_bb);
   l1_bb = single_succ (entry_bb);
-  exit_bb = bb_for_stmt (region->exit);
+  cont_bb = region->cont;
+  l2_bb = create_empty_bb (cont_bb);
+  l3_bb = single_succ (cont_bb);
+  exit_bb = region->exit;
+
+  l0 = tree_block_label (l0_bb);
+  l1 = tree_block_label (l1_bb);
+  l2 = tree_block_label (l2_bb);
+  l3 = tree_block_label (l3_bb);
 
   si = bsi_last (entry_bb);
-  gcc_assert (bsi_stmt (si) && TREE_CODE (bsi_stmt (si)) == OMP_FOR);
-  bsi_remove (&si, true);
-  list = alloc_stmt_list ();
-
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
   if (!in_combined_parallel)
     {
       /* If this is not a combined parallel loop, emit a call to
 	 GOMP_loop_foo_start in ENTRY_BB.  */
+      list = alloc_stmt_list ();
       t = build_fold_addr_expr (iend0);
       args = tree_cons (NULL, t, NULL);
       t = build_fold_addr_expr (istart0);
@@ -2472,15 +2477,12 @@ expand_omp_for_generic (struct omp_regio
       t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0),
 		  build_and_jump (&l3));
       append_to_statement_list (t, &list);
-      si = bsi_last (entry_bb);
-      bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
+      bsi_insert_after (&si, list, BSI_SAME_STMT);
     }
+  bsi_remove (&si, true);
 
   /* Iteration setup for sequential loop goes in L0_BB.  */
   list = alloc_stmt_list ();
-  t = build1 (LABEL_EXPR, void_type_node, l0);
-  gimplify_and_add (t, &list);
-
   t = fold_convert (type, istart0);
   t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
   gimplify_and_add (t, &list);
@@ -2489,16 +2491,9 @@ expand_omp_for_generic (struct omp_regio
   t = build2 (MODIFY_EXPR, void_type_node, iend, t);
   gimplify_and_add (t, &list);
 
-  l0_bb = create_empty_bb (entry_bb);
   si = bsi_start (l0_bb);
   bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
 
-  /* Loop body goes in L1_BB.  */
-  list = alloc_stmt_list ();
-  si = bsi_start (l1_bb);
-  bsi_insert_before (&si, build1 (LABEL_EXPR, void_type_node, l1),
-		     BSI_CONTINUE_LINKING);
-
   /* Code to control the increment and predicate for the sequential
      loop goes in the first half of EXIT_BB (we split EXIT_BB so
      that we can inherit all the edges going out of the loop
@@ -2515,20 +2510,13 @@ expand_omp_for_generic (struct omp_regio
 	      build_and_jump (&l2));
   append_to_statement_list (t, &list);
 
-  si = bsi_last (exit_bb);
-  t = bsi_stmt (si);
-  gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
+  si = bsi_last (cont_bb);
+  bsi_insert_after (&si, list, BSI_SAME_STMT);
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
   bsi_remove (&si, true);
-  exit_edge = split_block (exit_bb, t);
-  exit_edge->flags = EDGE_FALSE_VALUE;
-
-  si = bsi_last (exit_bb);
-  bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
 
   /* Emit code to get the next parallel iteration in L2_BB.  */
   list = alloc_stmt_list ();
-  t = build1 (LABEL_EXPR, void_type_node, l2);
-  gimplify_and_add (t, &list);
 
   t = build_fold_addr_expr (iend0);
   args = tree_cons (NULL, t, NULL);
@@ -2540,15 +2528,18 @@ expand_omp_for_generic (struct omp_regio
 	      build_and_jump (&l3));
   append_to_statement_list (t, &list);
   
-  l2_bb = exit_edge->dest;
   si = bsi_start (l2_bb);
   bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
 
-  /* Insert exit label on EXIT_EDGE.  */
-  exit_edge = single_succ_edge (l2_bb);
-  t = build1 (LABEL_EXPR, void_type_node, l3);
-  bsi_insert_on_edge_immediate (exit_edge, t);
-  exit_edge->flags = EDGE_FALSE_VALUE;
+  /* Add the loop cleanup function.  */
+  si = bsi_last (exit_bb);
+  if (OMP_RETURN_NOWAIT (bsi_stmt (si)))
+    t = built_in_decls[BUILT_IN_GOMP_LOOP_END_NOWAIT];
+  else
+    t = built_in_decls[BUILT_IN_GOMP_LOOP_END];
+  t = build_function_call_expr (t, NULL);
+  bsi_insert_after (&si, t, BSI_SAME_STMT);
+  bsi_remove (&si, true);
 
   /* Connect the new blocks.  */
   remove_edge (single_succ_edge (entry_bb));
@@ -2557,14 +2548,17 @@ expand_omp_for_generic (struct omp_regio
   else
     {
       make_edge (entry_bb, l0_bb, EDGE_TRUE_VALUE);
-      make_edge (entry_bb, exit_edge->dest, EDGE_FALSE_VALUE);
+      make_edge (entry_bb, l3_bb, EDGE_FALSE_VALUE);
     }
 
   make_edge (l0_bb, l1_bb, EDGE_FALLTHRU);
-  make_edge (exit_bb, l1_bb, EDGE_TRUE_VALUE);
-  make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
 
-  return exit_edge->dest;
+  remove_edge (single_succ_edge (cont_bb));
+  make_edge (cont_bb, l1_bb, EDGE_TRUE_VALUE);
+  make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
+
+  make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
+  make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
 }
 
 
@@ -2596,29 +2590,33 @@ expand_omp_for_generic (struct omp_regio
     L2:
 */
 
-static basic_block
+static void
 expand_omp_for_static_nochunk (struct omp_region *region,
 			       struct omp_for_data *fd)
 {
   tree l0, l1, l2, n, q, s0, e0, e, t, nthreads, threadid;
   tree type, utype, list;
-  basic_block entry_bb, exit_bb, seq_start_bb, body_bb, new_exit_bb;
+  basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb;
+  basic_block fin_bb;
   block_stmt_iterator si;
-  edge exit_edge;
 
-  l0 = create_artificial_label ();
-  l1 = create_artificial_label ();
-  l2 = create_artificial_label ();
-  
   type = TREE_TYPE (fd->v);
   utype = lang_hooks.types.unsigned_type (type);
 
-  entry_bb = bb_for_stmt (region->entry);
+  entry_bb = region->entry;
+  seq_start_bb = create_empty_bb (entry_bb);
   body_bb = single_succ (entry_bb);
-  exit_bb = bb_for_stmt (region->exit);
+  cont_bb = region->cont;
+  fin_bb = single_succ (cont_bb);
+  exit_bb = region->exit;
+
+  l0 = tree_block_label (seq_start_bb);
+  l1 = tree_block_label (body_bb);
+  l2 = tree_block_label (fin_bb);
 
   /* Iteration space partitioning goes in ENTRY_BB.  */
   list = alloc_stmt_list ();
+
   t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS];
   t = build_function_call_expr (t, NULL);
   t = fold_convert (utype, t);
@@ -2673,17 +2671,13 @@ expand_omp_for_static_nochunk (struct om
   append_to_statement_list (t, &list);
 
   si = bsi_last (entry_bb);
-  gcc_assert (bsi_stmt (si) && TREE_CODE (bsi_stmt (si)) == OMP_FOR);
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
+  bsi_insert_after (&si, list, BSI_SAME_STMT);
   bsi_remove (&si, true);
-  si = bsi_last (entry_bb);
-  bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
 
   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
   list = alloc_stmt_list ();
 
-  t = build1 (LABEL_EXPR, void_type_node, l0);
-  gimplify_and_add (t, &list);
-
   t = fold_convert (type, s0);
   t = build2 (MULT_EXPR, type, t, fd->step);
   t = build2 (PLUS_EXPR, type, t, fd->n1);
@@ -2695,24 +2689,10 @@ expand_omp_for_static_nochunk (struct om
   t = build2 (PLUS_EXPR, type, t, fd->n1);
   e = get_formal_tmp_var (t, &list);
 
-  seq_start_bb = create_empty_bb (entry_bb);
   si = bsi_start (seq_start_bb);
   bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
 
-  /* Original body goes in BODY_BB.  */
-  si = bsi_start (body_bb);
-  t = build1 (LABEL_EXPR, void_type_node, l1);
-  bsi_insert_before (&si, t, BSI_CONTINUE_LINKING);
-
-  /* Split EXIT_BB at the OMP_RETURN.  The code controlling the
-     sequential loop goes in the original EXIT_BB.  The exit out of
-     the parallel loop goes in the new block (NEW_EXIT_BB).  */
-  si = bsi_last (exit_bb);
-  t = bsi_stmt (si);
-  bsi_remove (&si, true);
-  gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
-  exit_edge = split_block (exit_bb, t);
-  new_exit_bb = exit_edge->dest;
+  /* The code controlling the sequential loop replaces the OMP_CONTINUE.  */
   list = alloc_stmt_list ();
 
   t = build2 (PLUS_EXPR, type, fd->v, fd->step);
@@ -2725,26 +2705,30 @@ expand_omp_for_static_nochunk (struct om
 	      build_and_jump (&l2));
   append_to_statement_list (t, &list);
 
-  si = bsi_last (exit_bb);
-  bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
+  si = bsi_last (cont_bb);
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
+  bsi_insert_after (&si, list, BSI_SAME_STMT);
+  bsi_remove (&si, true);
 
-  /* Add the exit label to NEW_EXIT_BB.  */
-  si = bsi_start (new_exit_bb);
-  t = build1 (LABEL_EXPR, void_type_node, l2);
-  bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
-  single_succ_edge (new_exit_bb)->flags = EDGE_FALLTHRU;
+  /* Replace the OMP_RETURN with a barrier, or nothing.  */
+  si = bsi_last (exit_bb);
+  if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
+    {
+      list = alloc_stmt_list ();
+      build_omp_barrier (&list);
+      bsi_insert_after (&si, list, BSI_SAME_STMT);
+    }
+  bsi_remove (&si, true);
 
   /* Connect all the blocks.  */
   make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU);
 
   remove_edge (single_succ_edge (entry_bb));
-  make_edge (entry_bb, new_exit_bb, EDGE_TRUE_VALUE);
+  make_edge (entry_bb, fin_bb, EDGE_TRUE_VALUE);
   make_edge (entry_bb, seq_start_bb, EDGE_FALSE_VALUE);
 
-  make_edge (exit_bb, body_bb, EDGE_TRUE_VALUE);
-  find_edge (exit_bb, new_exit_bb)->flags = EDGE_FALSE_VALUE;
-
-  return new_exit_bb;
+  make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE);
+  find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
 }
 
 
@@ -2779,31 +2763,34 @@ expand_omp_for_static_nochunk (struct om
     L4:
 */
 
-static basic_block
+static void
 expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
 {
   tree l0, l1, l2, l3, l4, n, s0, e0, e, t;
   tree trip, nthreads, threadid;
   tree type, utype;
   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
-  basic_block trip_update_bb, new_exit_bb;
-  edge exit_edge;
+  basic_block trip_update_bb, cont_bb, fin_bb;
   tree list;
   block_stmt_iterator si;
 
-  l0 = create_artificial_label ();
-  l1 = create_artificial_label ();
-  l2 = create_artificial_label ();
-  l3 = create_artificial_label ();
-  l4 = create_artificial_label ();
-  
   type = TREE_TYPE (fd->v);
   utype = lang_hooks.types.unsigned_type (type);
 
-  entry_bb = bb_for_stmt (region->entry);
+  entry_bb = region->entry;
+  iter_part_bb = create_empty_bb (entry_bb);
+  seq_start_bb = create_empty_bb (iter_part_bb);
   body_bb = single_succ (entry_bb);
-
-  exit_bb = bb_for_stmt (region->exit);
+  cont_bb = region->cont;
+  trip_update_bb = create_empty_bb (cont_bb);
+  fin_bb = single_succ (cont_bb);
+  exit_bb = region->exit;
+
+  l0 = tree_block_label (iter_part_bb);
+  l1 = tree_block_label (seq_start_bb);
+  l2 = tree_block_label (body_bb);
+  l3 = tree_block_label (trip_update_bb);
+  l4 = tree_block_label (fin_bb);
 
   /* Trip and adjustment setup goes in ENTRY_BB.  */
   list = alloc_stmt_list ();
@@ -2849,17 +2836,13 @@ expand_omp_for_static_chunk (struct omp_
   trip = get_initialized_tmp_var (t, &list, NULL);
 
   si = bsi_last (entry_bb);
-  gcc_assert (bsi_stmt (si) && TREE_CODE (bsi_stmt (si)) == OMP_FOR);
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
+  bsi_insert_after (&si, list, BSI_SAME_STMT);
   bsi_remove (&si, true);
-  si = bsi_last (entry_bb);
-  bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
 
   /* Iteration space partitioning goes in ITER_PART_BB.  */
   list = alloc_stmt_list ();
 
-  t = build1 (LABEL_EXPR, void_type_node, l0);
-  gimplify_and_add (t, &list);
-
   t = build2 (MULT_EXPR, utype, trip, nthreads);
   t = build2 (PLUS_EXPR, utype, t, threadid);
   t = build2 (MULT_EXPR, utype, t, fd->chunk_size);
@@ -2874,16 +2857,12 @@ expand_omp_for_static_chunk (struct omp_
 	      build_and_jump (&l1), build_and_jump (&l4));
   append_to_statement_list (t, &list);
 
-  iter_part_bb = create_empty_bb (entry_bb);
   si = bsi_start (iter_part_bb);
   bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
 
   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
   list = alloc_stmt_list ();
 
-  t = build1 (LABEL_EXPR, void_type_node, l1);
-  gimplify_and_add (t, &list);
-
   t = fold_convert (type, s0);
   t = build2 (MULT_EXPR, type, t, fd->step);
   t = build2 (PLUS_EXPR, type, t, fd->n1);
@@ -2895,18 +2874,11 @@ expand_omp_for_static_chunk (struct omp_
   t = build2 (PLUS_EXPR, type, t, fd->n1);
   e = get_formal_tmp_var (t, &list);
 
-  seq_start_bb = create_empty_bb (iter_part_bb);
   si = bsi_start (seq_start_bb);
   bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
 
-  /* Main loop body goes in BODY_BB.  */
-  si = bsi_start (body_bb);
-  t = build1 (LABEL_EXPR, void_type_node, l2);
-  bsi_insert_before (&si, t, BSI_CONTINUE_LINKING);
-
-  /* Split EXIT_BB.  The code controlling the sequential loop goes in
-     the first half.  The trip update code goes into the second half
-     (TRIP_UPDATE_BB).  */
+  /* The code controlling the sequential loop goes in CONT_BB,
+     replacing the OMP_CONTINUE.  */
   list = alloc_stmt_list ();
 
   t = build2 (PLUS_EXPR, type, fd->v, fd->step);
@@ -2919,21 +2891,14 @@ expand_omp_for_static_chunk (struct omp_
 	      build_and_jump (&l2), build_and_jump (&l3));
   append_to_statement_list (t, &list);
   
-  si = bsi_last (exit_bb);
-  t = bsi_stmt (si);
-  gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
+  si = bsi_last (cont_bb);
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
+  bsi_insert_after (&si, list, BSI_SAME_STMT);
   bsi_remove (&si, true);
-  exit_edge = split_block (exit_bb, t);
-  si = bsi_last (exit_bb);
-  bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
 
   /* Trip update code goes into TRIP_UPDATE_BB.  */
-  trip_update_bb = exit_edge->dest;
   list = alloc_stmt_list ();
 
-  t = build1 (LABEL_EXPR, void_type_node, l3);
-  gimplify_and_add (t, &list);
-
   t = build_int_cst (utype, 1);
   t = build2 (PLUS_EXPR, utype, trip, t);
   t = build2 (MODIFY_EXPR, void_type_node, trip, t);
@@ -2941,30 +2906,31 @@ expand_omp_for_static_chunk (struct omp_
 
   si = bsi_start (trip_update_bb);
   bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
-  exit_edge = single_succ_edge (trip_update_bb);
-  exit_edge->flags = EDGE_FALLTHRU;
-  new_exit_bb = exit_edge->dest;
-
-  /* Insert exit label on EXIT_EDGE.  */
-  t = build1 (LABEL_EXPR, void_type_node, l4);
-  bsi_insert_on_edge_immediate (exit_edge, t);
+
+  /* Replace the OMP_RETURN with a barrier, or nothing.  */
+  si = bsi_last (exit_bb);
+  if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
+    {
+      list = alloc_stmt_list ();
+      build_omp_barrier (&list);
+      bsi_insert_after (&si, list, BSI_SAME_STMT);
+    }
+  bsi_remove (&si, true);
 
   /* Connect the new blocks.  */
   remove_edge (single_succ_edge (entry_bb));
   make_edge (entry_bb, iter_part_bb, EDGE_FALLTHRU);
 
   make_edge (iter_part_bb, seq_start_bb, EDGE_TRUE_VALUE);
-  make_edge (iter_part_bb, new_exit_bb, EDGE_FALSE_VALUE);
-  remove_edge (exit_edge);
+  make_edge (iter_part_bb, fin_bb, EDGE_FALSE_VALUE);
 
   make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU);
 
-  make_edge (exit_bb, body_bb, EDGE_TRUE_VALUE);
-  find_edge (exit_bb, trip_update_bb)->flags = EDGE_FALSE_VALUE;
+  remove_edge (single_succ_edge (cont_bb));
+  make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE);
+  make_edge (cont_bb, trip_update_bb, EDGE_FALSE_VALUE);
 
   make_edge (trip_update_bb, iter_part_bb, EDGE_FALLTHRU);
-
-  return new_exit_bb;
 }
 
 
@@ -2974,25 +2940,24 @@ static void
 expand_omp_for (struct omp_region *region)
 {
   struct omp_for_data fd;
-  basic_block last_bb = NULL;
 
   push_gimplify_context ();
 
-  extract_omp_for_data (region->entry, &fd);
+  extract_omp_for_data (last_stmt (region->entry), &fd);
 
   if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC && !fd.have_ordered)
     {
       if (fd.chunk_size == NULL)
-	last_bb = expand_omp_for_static_nochunk (region, &fd);
+	expand_omp_for_static_nochunk (region, &fd);
       else
-	last_bb = expand_omp_for_static_chunk (region, &fd);
+	expand_omp_for_static_chunk (region, &fd);
     }
   else
     {
       int fn_index = fd.sched_kind + fd.have_ordered * 4;
       int start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index;
       int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
-      last_bb = expand_omp_for_generic (region, &fd, start_ix, next_ix);
+      expand_omp_for_generic (region, &fd, start_ix, next_ix);
     }
 
   pop_gimplify_context (NULL);
@@ -3029,19 +2994,23 @@ expand_omp_for (struct omp_region *regio
 static void
 expand_omp_sections (struct omp_region *region)
 {
-  tree label_vec, l0, l1, l2, t, u, v;
+  tree label_vec, l0, l1, l2, t, u, v, sections_stmt;
   unsigned i, len;
-  basic_block entry_bb, exit_bb, l0_bb, l1_bb, default_bb;
-  edge e, entry_edge, exit_edge;
-  edge_iterator ei;
+  basic_block entry_bb, exit_bb, l0_bb, l1_bb, l2_bb, default_bb;
   block_stmt_iterator si;
+  struct omp_region *inner;
+  edge e;
 
-  entry_bb = bb_for_stmt (region->entry);
-  exit_bb = bb_for_stmt (region->exit);
-
-  l0 = create_artificial_label ();
-  l1 = create_artificial_label ();
-  l2 = create_artificial_label ();
+  entry_bb = region->entry;
+  l0_bb = create_empty_bb (entry_bb);
+  l1_bb = region->cont;
+  l2_bb = single_succ (l1_bb);
+  default_bb = create_empty_bb (l1_bb->prev_bb);
+  exit_bb = region->exit;
+
+  l0 = tree_block_label (l0_bb);
+  l1 = tree_block_label (l1_bb);
+  l2 = tree_block_label (l2_bb);
 
   v = create_tmp_var (unsigned_type_node, ".section");
 
@@ -3051,15 +3020,11 @@ expand_omp_sections (struct omp_region *
   len = EDGE_COUNT (entry_bb->succs);
   label_vec = make_tree_vec (len + 2);
 
-  /* Split ENTRY_BB.  The call to GOMP_sections_start goes in the
-     first half.  The second half contains the switch().  */
+  /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
+     OMP_SECTIONS statement.  */
   si = bsi_last (entry_bb);
-  t = bsi_stmt (si);
-  gcc_assert (t && TREE_CODE (t) == OMP_SECTIONS);
-  bsi_remove (&si, true);
-  entry_edge = split_block (entry_bb, t);
-  l0_bb = entry_edge->dest;
-
+  sections_stmt = bsi_stmt (si);
+  gcc_assert (TREE_CODE (sections_stmt) == OMP_SECTIONS);
   if (!is_combined_parallel (region))
     {
       /* If we are not inside a combined parallel+sections region,
@@ -3069,15 +3034,12 @@ expand_omp_sections (struct omp_region *
       u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START];
       t = build_function_call_expr (u, t);
       t = build2 (MODIFY_EXPR, void_type_node, v, t);
-      si = bsi_last (entry_bb);
-      bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
+      bsi_insert_after (&si, t, BSI_SAME_STMT);
     }
+  bsi_remove (&si, true);
 
   /* The switch() statement replacing OMP_SECTIONS goes in L0_BB.  */
-  si = bsi_last (l0_bb);
-
-  t = build1 (LABEL_EXPR, void_type_node, l0);
-  bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
+  si = bsi_start (l0_bb);
 
   t = build3 (SWITCH_EXPR, void_type_node, v, NULL, label_vec);
   bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
@@ -3085,119 +3047,145 @@ expand_omp_sections (struct omp_region *
   t = build3 (CASE_LABEL_EXPR, void_type_node,
 	      build_int_cst (unsigned_type_node, 0), NULL, l2);
   TREE_VEC_ELT (label_vec, 0) = t;
+  make_edge (l0_bb, l2_bb, 0);
   
   /* Convert each OMP_SECTION into a CASE_LABEL_EXPR.  */
-  i = 1;
-  FOR_EACH_EDGE (e, ei, l0_bb->succs)
+  for (inner = region->inner, i = 1; inner; inner = inner->next, ++i)
     {
       basic_block s_entry_bb, s_exit_bb;
 
-      e->flags = 0;
-      s_entry_bb = e->dest;
-      si = bsi_last (s_entry_bb);
-      t = bsi_stmt (si);
-      gcc_assert (t && TREE_CODE (t) == OMP_SECTION);
-      s_exit_bb = bb_for_stmt (lookup_omp_region (t)->exit);
-      bsi_remove (&si, true);
+      s_entry_bb = inner->entry;
+      s_exit_bb = inner->exit;
 
-      t = create_artificial_label ();
+      t = tree_block_label (s_entry_bb);
       u = build_int_cst (unsigned_type_node, i);
       u = build3 (CASE_LABEL_EXPR, void_type_node, u, NULL, t);
       TREE_VEC_ELT (label_vec, i) = u;
-      t = build1 (LABEL_EXPR, void_type_node, t);
+
       si = bsi_last (s_entry_bb);
-      bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
-      i++;
-      single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
+      gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTION);
+      gcc_assert (i < len || OMP_SECTION_LAST (bsi_stmt (si)));
+      bsi_remove (&si, true);
 
       si = bsi_last (s_exit_bb);
-      t = bsi_stmt (si);
-      gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
+      gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
       bsi_remove (&si, true);
+
+      e = single_pred_edge (s_entry_bb);
+      e->flags = 0;
+      redirect_edge_pred (e, l0_bb);
+
+      single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
     }
 
   /* Error handling code goes in DEFAULT_BB.  */
-  default_bb = create_empty_bb (entry_bb);
-  si = bsi_start (default_bb);
-  t = create_artificial_label ();
+  t = tree_block_label (default_bb);
   u = build3 (CASE_LABEL_EXPR, void_type_node, NULL, NULL, t);
   TREE_VEC_ELT (label_vec, len + 1) = u;
-  t = build1 (LABEL_EXPR, void_type_node, t);
-  bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
+  make_edge (l0_bb, default_bb, 0);
 
+  si = bsi_start (default_bb);
   t = built_in_decls[BUILT_IN_TRAP];
   t = build_function_call_expr (t, NULL);
   bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
 
-  make_edge (l0_bb, default_bb, 0);
-
   /* Code to get the next section goes in L1_BB.  */
-  si = bsi_last (exit_bb);
-  t = bsi_stmt (si);
-  gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
-  bsi_remove (&si, true);
-  exit_edge = split_block (exit_bb, t);
-  l1_bb = exit_edge->src;
-  exit_bb = exit_edge->dest;
-  si = bsi_start (l1_bb);
-  t = build1 (LABEL_EXPR, void_type_node, l1);
-  bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
+  si = bsi_last (l1_bb);
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
 
   t = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT];
   t = build_function_call_expr (t, NULL);
   t = build2 (MODIFY_EXPR, void_type_node, v, t);
-  bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
-
-  remove_edge (single_succ_edge (l1_bb));
-  make_edge (l1_bb, l0_bb, EDGE_FALLTHRU);
+  bsi_insert_after (&si, t, BSI_SAME_STMT);
+  bsi_remove (&si, true);
 
-  /* Exit label in EXIT_BB.  */
+  /* Cleanup function replaces OMP_RETURN in EXIT_BB.  */
   si = bsi_last (exit_bb);
-  t = build1 (LABEL_EXPR, void_type_node, l2);
-  bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
-
-  make_edge (l0_bb, exit_bb, 0);
-  single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
+  if (OMP_RETURN_NOWAIT (bsi_stmt (si)))
+    t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END_NOWAIT];
+  else
+    t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END];
+  t = build_function_call_expr (t, NULL);
+  bsi_insert_after (&si, t, BSI_SAME_STMT);
+  bsi_remove (&si, true);
 
+  /* Connect the new blocks.  */
   if (is_combined_parallel (region))
     {
       /* If this was a combined parallel+sections region, we did not
 	 emit a GOMP_sections_start in the entry block, so we just
 	 need to jump to L1_BB to get the next section.  */
-      remove_edge (single_succ_edge (entry_bb));
       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
     }
+  else
+    make_edge (entry_bb, l0_bb, EDGE_FALLTHRU);
+
+  e = single_succ_edge (l1_bb);
+  redirect_edge_succ (e, l0_bb);
+  e->flags = EDGE_FALLTHRU;
 }
 
 
-/* Generic expansion for OpenMP synchronization directives: single,
-   master, ordered and critical.  All we need to do here is remove the
-   entry and exit markers for REGION.  */
+/* Expand code for an OpenMP single directive.  We've already expanded
+   much of the code, here we simply place the GOMP_barrier call.  */
+
+static void
+expand_omp_single (struct omp_region *region)
+{
+  basic_block entry_bb, exit_bb;
+  block_stmt_iterator si;
+  bool need_barrier = false;
+
+  entry_bb = region->entry;
+  exit_bb = region->exit;
+
+  si = bsi_last (entry_bb);
+  /* The terminal barrier at the end of a GOMP_single_copy sequence cannot
+     be removed.  We need to ensure that the thread that entered the single
+     does not exit before the data is copied out by the other threads.  */
+  if (find_omp_clause (OMP_SINGLE_CLAUSES (bsi_stmt (si)),
+		       OMP_CLAUSE_COPYPRIVATE))
+    need_barrier = true;
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE);
+  bsi_remove (&si, true);
+  single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
+
+  si = bsi_last (exit_bb);
+  if (!OMP_RETURN_NOWAIT (bsi_stmt (si)) || need_barrier)
+    {
+      tree t = alloc_stmt_list ();
+      build_omp_barrier (&t);
+      bsi_insert_after (&si, t, BSI_SAME_STMT);
+    }
+  bsi_remove (&si, true);
+  single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
+}
+
+
+/* Generic expansion for OpenMP synchronization directives: master,
+   ordered and critical.  All we need to do here is remove the entry
+   and exit markers for REGION.  */
 
 static void
 expand_omp_synch (struct omp_region *region)
 {
   basic_block entry_bb, exit_bb;
   block_stmt_iterator si;
-  tree t;
 
-  entry_bb = bb_for_stmt (region->entry);
-  exit_bb = bb_for_stmt (region->exit);
+  entry_bb = region->entry;
+  exit_bb = region->exit;
 
   si = bsi_last (entry_bb);
-  t = bsi_stmt (si);
-  gcc_assert (t
-              && (TREE_CODE (t) == OMP_SINGLE
-		  || TREE_CODE (t) == OMP_MASTER
-		  || TREE_CODE (t) == OMP_ORDERED
-		  || TREE_CODE (t) == OMP_CRITICAL));
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE
+	      || TREE_CODE (bsi_stmt (si)) == OMP_MASTER
+	      || TREE_CODE (bsi_stmt (si)) == OMP_ORDERED
+	      || TREE_CODE (bsi_stmt (si)) == OMP_CRITICAL);
   bsi_remove (&si, true);
   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
 
   si = bsi_last (exit_bb);
-  t = bsi_stmt (si);
-  gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
+  gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
   bsi_remove (&si, true);
   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
 }
@@ -3214,47 +3202,41 @@ expand_omp (struct omp_region *region)
 {
   while (region)
     {
-      enum tree_code code = TREE_CODE (region->entry);
-
       if (region->inner)
 	expand_omp (region->inner);
 
-      switch (code)
+      switch (region->type)
 	{
-	  case OMP_PARALLEL:
-	    expand_omp_parallel (region);
-	    break;
+	case OMP_PARALLEL:
+	  expand_omp_parallel (region);
+	  break;
 
-	  case OMP_FOR:
-	    expand_omp_for (region);
-	    break;
+	case OMP_FOR:
+	  expand_omp_for (region);
+	  break;
 
-	  case OMP_SECTIONS:
-	    expand_omp_sections (region);
-	    break;
+	case OMP_SECTIONS:
+	  expand_omp_sections (region);
+	  break;
 
-	  case OMP_SECTION:
-	    /* Individual omp sections are handled together with their
-	       parent OMP_SECTIONS region.  */
-	    break;
+	case OMP_SECTION:
+	  /* Individual omp sections are handled together with their
+	     parent OMP_SECTIONS region.  */
+	  break;
 
-	  case OMP_SINGLE:
-	  case OMP_MASTER:
-	  case OMP_ORDERED:
-	  case OMP_CRITICAL:
-	    expand_omp_synch (region);
-	    break;
+	case OMP_SINGLE:
+	  expand_omp_single (region);
+	  break;
 
-	  default:
-	    gcc_unreachable ();
-	}
+	case OMP_MASTER:
+	case OMP_ORDERED:
+	case OMP_CRITICAL:
+	  expand_omp_synch (region);
+	  break;
 
-      /* Expansion adds and removes basic block, edges, creates
-	 and exposes unreachable regions that need to be cleaned up
-	 before proceeding.  */
-      free_dominance_info (CDI_DOMINATORS);
-      free_dominance_info (CDI_POST_DOMINATORS);
-      cleanup_tree_cfg ();
+	default:
+	  gcc_unreachable ();
+	}
 
       region = region->next;
     }
@@ -3275,41 +3257,38 @@ build_omp_regions_1 (basic_block bb, str
   if (!bsi_end_p (si) && OMP_DIRECTIVE_P (bsi_stmt (si)))
     {
       struct omp_region *region;
+      enum tree_code code;
 
       stmt = bsi_stmt (si);
+      code = TREE_CODE (stmt);
 
-      if (TREE_CODE (stmt) == OMP_RETURN_EXPR)
+      if (code == OMP_RETURN)
 	{
 	  /* STMT is the return point out of region PARENT.  Mark it
 	     as the exit point and make PARENT the immediately
 	     enclosing region.  */
 	  gcc_assert (parent);
 	  region = parent;
-	  region->exit = stmt;
+	  region->exit = bb;
 	  parent = parent->outer;
 
 	  /* If REGION is a parallel region, determine whether it is
 	     a combined parallel+workshare region.  */
-	  if (TREE_CODE (region->entry) == OMP_PARALLEL)
+	  if (region->type == OMP_PARALLEL)
 	    determine_parallel_type (region);
 	}
+      else if (code == OMP_CONTINUE)
+	{
+	  gcc_assert (parent);
+	  parent->cont = bb;
+	}
       else
 	{
 	  /* Otherwise, this directive becomes the parent for a new
 	     region.  */
-	  region = new_omp_region (stmt, parent);
+	  region = new_omp_region (bb, code, parent);
 	  parent = region;
 	}
-
-      gcc_assert (region);
-      if (omp_regions == NULL)
-	{
-	  omp_regions = splay_tree_new (splay_tree_compare_pointers, 0, 0);
-	  root_omp_region = region;
-	}
-
-      splay_tree_insert (omp_regions, (splay_tree_key) stmt,
-	                 (splay_tree_value) region);
     }
 
   for (son = first_dom_son (CDI_DOMINATORS, bb);
@@ -3325,7 +3304,7 @@ build_omp_regions_1 (basic_block bb, str
 static void
 build_omp_regions (void)
 {
-  gcc_assert (omp_regions == NULL && root_omp_region == NULL);
+  gcc_assert (root_omp_region == NULL);
   calculate_dominance_info (CDI_DOMINATORS);
   build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL);
 }
@@ -3338,20 +3317,26 @@ execute_expand_omp (void)
 {
   build_omp_regions ();
 
-  if (root_omp_region)
-    {
-      if (dump_file)
-	{
-	  fprintf (dump_file, "\nOMP region tree\n\n");
-	  dump_omp_region (dump_file, root_omp_region, 0);
-	  fprintf (dump_file, "\n");
-	}
+  if (!root_omp_region)
+    return 0;
 
-      expand_omp (root_omp_region);
-      splay_tree_delete (omp_regions);
-      root_omp_region = NULL;
-      omp_regions = NULL;
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nOMP region tree\n\n");
+      dump_omp_region (dump_file, root_omp_region, 0);
+      fprintf (dump_file, "\n");
     }
+
+  remove_exit_barriers (root_omp_region);
+
+  expand_omp (root_omp_region);
+
+  free_dominance_info (CDI_DOMINATORS);
+  free_dominance_info (CDI_POST_DOMINATORS);
+  cleanup_tree_cfg ();
+
+  free_omp_regions ();
+
   return 0;
 }
 
@@ -3386,14 +3371,12 @@ static void
 lower_omp_sections (tree *stmt_p, omp_context *ctx)
 {
   tree new_stmt, stmt, body, bind, block, ilist, olist, new_body;
-  tree dlist, region_exit;
+  tree t, dlist;
   tree_stmt_iterator tsi;
   unsigned i, len;
 
   stmt = *stmt_p;
 
-  gcc_assert (OMP_SECTIONS_SECTIONS (stmt) == NULL_TREE);
-
   push_gimplify_context ();
 
   dlist = NULL;
@@ -3404,41 +3387,34 @@ lower_omp_sections (tree *stmt_p, omp_co
   for (len = 0; !tsi_end_p (tsi); len++, tsi_next (&tsi))
     continue;
 
-  /* There are two markers per section and one end marker for the
-     whole construct.  */
-  OMP_SECTIONS_SECTIONS (stmt) = make_tree_vec (2 * len + 1);
-
   tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
   body = alloc_stmt_list ();
   for (i = 0; i < len; i++, tsi_next (&tsi))
     {
       omp_context *sctx;
-      tree sec_start, sec_end, sec_body;
+      tree sec_start, sec_end;
 
       sec_start = tsi_stmt (tsi);
-      sec_body = alloc_stmt_list ();
       sctx = maybe_lookup_ctx (sec_start);
       gcc_assert (sctx);
 
+      append_to_statement_list (sec_start, &body);
+
       lower_omp (&OMP_SECTION_BODY (sec_start), sctx);
-      append_to_statement_list (OMP_SECTION_BODY (sec_start), &sec_body);
+      append_to_statement_list (OMP_SECTION_BODY (sec_start), &body);
+      OMP_SECTION_BODY (sec_start) = NULL;
 
       if (i == len - 1)
 	{
 	  tree l = alloc_stmt_list ();
 	  lower_lastprivate_clauses (OMP_SECTIONS_CLAUSES (stmt), NULL,
 				     &l, ctx);
-	  append_to_statement_list (l, &sec_body);
+	  append_to_statement_list (l, &body);
+	  OMP_SECTION_LAST (sec_start) = 1;
 	}
       
-      sec_end = make_node (OMP_RETURN_EXPR);
-
-      OMP_SECTION_BODY (sec_start) = sec_body;
-      append_to_statement_list (sec_start, &body);
+      sec_end = make_node (OMP_RETURN);
       append_to_statement_list (sec_end, &body);
-
-      TREE_VEC_ELT (OMP_SECTIONS_SECTIONS (stmt), i * 2) = sec_start;
-      TREE_VEC_ELT (OMP_SECTIONS_SECTIONS (stmt), i * 2 + 1) = sec_end;
     }
 
   block = make_node (BLOCK);
@@ -3448,31 +3424,30 @@ lower_omp_sections (tree *stmt_p, omp_co
   olist = NULL_TREE;
   lower_reduction_clauses (OMP_SECTIONS_CLAUSES (stmt), &olist, ctx);
 
-  /* Unless there's a nowait clause, add a barrier afterward.  */
-  if (!find_omp_clause (OMP_SECTIONS_CLAUSES (stmt), OMP_CLAUSE_NOWAIT))
-    build_omp_barrier (&olist);
-
   pop_gimplify_context (NULL_TREE);
   record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
 
   new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
   TREE_SIDE_EFFECTS (new_stmt) = 1;
-  OMP_SECTIONS_BODY (stmt) = body;
-
-  region_exit = make_node (OMP_RETURN_EXPR);
 
   new_body = alloc_stmt_list ();
   append_to_statement_list (ilist, &new_body);
   append_to_statement_list (stmt, &new_body);
-  /* ??? The OMP_RETURN doesn't logically belong here, but in
-     expand_omp_sections we expect this marker to be where the
-     individual sections join after completing the loop.  */
-  append_to_statement_list (region_exit, &new_body);
+  append_to_statement_list (bind, &new_body);
+
+  t = make_node (OMP_CONTINUE);
+  append_to_statement_list (t, &new_body);
+
   append_to_statement_list (olist, &new_body);
   append_to_statement_list (dlist, &new_body);
-  BIND_EXPR_BODY (new_stmt) = new_body;
 
-  TREE_VEC_ELT (OMP_SECTIONS_SECTIONS (stmt), 2 * len) = region_exit;
+  t = make_node (OMP_RETURN);
+  OMP_RETURN_NOWAIT (t) = !!find_omp_clause (OMP_SECTIONS_CLAUSES (stmt),
+					     OMP_CLAUSE_NOWAIT);
+  append_to_statement_list (t, &new_body);
+
+  BIND_EXPR_BODY (new_stmt) = new_body;
+  OMP_SECTIONS_BODY (stmt) = NULL;
 
   *stmt_p = new_stmt;
 }
@@ -3499,9 +3474,6 @@ lower_omp_single_simple (tree single_stm
   t = build3 (COND_EXPR, void_type_node, t,
 	      OMP_SINGLE_BODY (single_stmt), NULL);
   gimplify_and_add (t, pre_p);
-
-  if (!find_omp_clause (OMP_SINGLE_CLAUSES (single_stmt), OMP_CLAUSE_NOWAIT))
-    build_omp_barrier (pre_p);
 }
 
 
@@ -3585,8 +3557,6 @@ lower_omp_single_copy (tree single_stmt,
 
   t = build1 (LABEL_EXPR, void_type_node, l2);
   gimplify_and_add (t, pre_p);
-
-  build_omp_barrier (pre_p);
 }
 
 
@@ -3600,29 +3570,34 @@ lower_omp_single (tree *stmt_p, omp_cont
   push_gimplify_context ();
 
   block = make_node (BLOCK);
-  bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
+  *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
   TREE_SIDE_EFFECTS (bind) = 1;
 
   lower_rec_input_clauses (OMP_SINGLE_CLAUSES (single_stmt),
 			   &BIND_EXPR_BODY (bind), &dlist, ctx);
   lower_omp (&OMP_SINGLE_BODY (single_stmt), ctx);
+  maybe_catch_exception (&OMP_SINGLE_BODY (single_stmt));
+
+  append_to_statement_list (single_stmt, &BIND_EXPR_BODY (bind));
 
   if (ctx->record_type)
     lower_omp_single_copy (single_stmt, &BIND_EXPR_BODY (bind), ctx);
   else
     lower_omp_single_simple (single_stmt, &BIND_EXPR_BODY (bind));
 
+  OMP_SINGLE_BODY (single_stmt) = NULL;
+
   append_to_statement_list (dlist, &BIND_EXPR_BODY (bind));
-  maybe_catch_exception (&BIND_EXPR_BODY (bind));
-  t = make_node (OMP_RETURN_EXPR);
+
+  t = make_node (OMP_RETURN);
+  OMP_RETURN_NOWAIT (t) = !!find_omp_clause (OMP_SINGLE_CLAUSES (single_stmt),
+					     OMP_CLAUSE_NOWAIT);
   append_to_statement_list (t, &BIND_EXPR_BODY (bind));
+
   pop_gimplify_context (bind);
 
   BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
   BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
-
-  OMP_SINGLE_BODY (single_stmt) = alloc_stmt_list ();
-  append_to_statement_list (bind, &OMP_SINGLE_BODY (single_stmt));
 }
 
 
@@ -3636,9 +3611,11 @@ lower_omp_master (tree *stmt_p, omp_cont
   push_gimplify_context ();
 
   block = make_node (BLOCK);
-  bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
+  *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
   TREE_SIDE_EFFECTS (bind) = 1;
 
+  append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
+
   x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
   x = build_function_call_expr (x, NULL);
   x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node);
@@ -3646,20 +3623,21 @@ lower_omp_master (tree *stmt_p, omp_cont
   gimplify_and_add (x, &BIND_EXPR_BODY (bind));
 
   lower_omp (&OMP_MASTER_BODY (stmt), ctx);
+  maybe_catch_exception (&OMP_MASTER_BODY (stmt));
   append_to_statement_list (OMP_MASTER_BODY (stmt), &BIND_EXPR_BODY (bind));
+  OMP_MASTER_BODY (stmt) = NULL;
 
   x = build1 (LABEL_EXPR, void_type_node, lab);
   gimplify_and_add (x, &BIND_EXPR_BODY (bind));
-  maybe_catch_exception (&BIND_EXPR_BODY (bind));
-  x = make_node (OMP_RETURN_EXPR);
+
+  x = make_node (OMP_RETURN);
+  OMP_RETURN_NOWAIT (x) = 1;
   append_to_statement_list (x, &BIND_EXPR_BODY (bind));
+
   pop_gimplify_context (bind);
 
   BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
   BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
-
-  OMP_MASTER_BODY (stmt) = alloc_stmt_list ();
-  append_to_statement_list (bind, &OMP_MASTER_BODY (stmt));
 }
 
 
@@ -3673,29 +3651,32 @@ lower_omp_ordered (tree *stmt_p, omp_con
   push_gimplify_context ();
 
   block = make_node (BLOCK);
-  bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
+  *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
   TREE_SIDE_EFFECTS (bind) = 1;
 
+  append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
+
   x = built_in_decls[BUILT_IN_GOMP_ORDERED_START];
   x = build_function_call_expr (x, NULL);
   gimplify_and_add (x, &BIND_EXPR_BODY (bind));
 
   lower_omp (&OMP_ORDERED_BODY (stmt), ctx);
+  maybe_catch_exception (&OMP_ORDERED_BODY (stmt));
   append_to_statement_list (OMP_ORDERED_BODY (stmt), &BIND_EXPR_BODY (bind));
+  OMP_ORDERED_BODY (stmt) = NULL;
 
   x = built_in_decls[BUILT_IN_GOMP_ORDERED_END];
   x = build_function_call_expr (x, NULL);
   gimplify_and_add (x, &BIND_EXPR_BODY (bind));
-  maybe_catch_exception (&BIND_EXPR_BODY (bind));
-  x = make_node (OMP_RETURN_EXPR);
+
+  x = make_node (OMP_RETURN);
+  OMP_RETURN_NOWAIT (x) = 1;
   append_to_statement_list (x, &BIND_EXPR_BODY (bind));
+
   pop_gimplify_context (bind);
 
   BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
   BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
-
-  OMP_ORDERED_BODY (stmt) = alloc_stmt_list ();
-  append_to_statement_list (bind, &OMP_ORDERED_BODY (stmt));
 }
 
 
@@ -3766,25 +3747,27 @@ lower_omp_critical (tree *stmt_p, omp_co
   push_gimplify_context ();
 
   block = make_node (BLOCK);
-  bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
+  *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
   TREE_SIDE_EFFECTS (bind) = 1;
 
+  append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
+
   gimplify_and_add (lock, &BIND_EXPR_BODY (bind));
 
   lower_omp (&OMP_CRITICAL_BODY (stmt), ctx);
   maybe_catch_exception (&OMP_CRITICAL_BODY (stmt));
   append_to_statement_list (OMP_CRITICAL_BODY (stmt), &BIND_EXPR_BODY (bind));
+  OMP_CRITICAL_BODY (stmt) = NULL;
 
   gimplify_and_add (unlock, &BIND_EXPR_BODY (bind));
-  t = make_node (OMP_RETURN_EXPR);
+
+  t = make_node (OMP_RETURN);
+  OMP_RETURN_NOWAIT (t) = 1;
   append_to_statement_list (t, &BIND_EXPR_BODY (bind));
 
   pop_gimplify_context (bind);
   BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
   BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
-
-  OMP_CRITICAL_BODY (stmt) = alloc_stmt_list ();
-  append_to_statement_list (bind, &OMP_CRITICAL_BODY (stmt));
 }
 
 
@@ -3871,30 +3854,28 @@ lower_omp_for (tree *stmt_p, omp_context
   /* Once lowered, extract the bounds and clauses.  */
   extract_omp_for_data (stmt, &fd);
 
-  /* Region exit marker goes at the end of the loop body.  */
-  t = make_node (OMP_RETURN_EXPR);
-  append_to_statement_list (t, &OMP_FOR_BODY (stmt));
-  maybe_catch_exception (&OMP_FOR_BODY (stmt));
   append_to_statement_list (stmt, body_p);
 
+  maybe_catch_exception (&OMP_FOR_BODY (stmt));
+  append_to_statement_list (OMP_FOR_BODY (stmt), body_p);
+
+  t = make_node (OMP_CONTINUE);
+  append_to_statement_list (t, body_p);
+
   /* After the loop, add exit clauses.  */
   lower_omp_for_lastprivate (&fd, &dlist, ctx);
   lower_reduction_clauses (OMP_FOR_CLAUSES (stmt), body_p, ctx);
   append_to_statement_list (dlist, body_p);
 
-  /* Add a barrier unless the user specified NOWAIT.  Note that if
-     this is a combined parallel+loop construct, the barrier will be
-     optimized away during expansion (see expand_omp_for).  */
-  if (!fd.have_nowait)
-    {
-      tree stmt = alloc_stmt_list ();
-      build_omp_barrier (&stmt);
-      append_to_statement_list (stmt, body_p);
-    }
+  /* Region exit marker goes at the end of the loop body.  */
+  t = make_node (OMP_RETURN);
+  OMP_RETURN_NOWAIT (t) = fd.have_nowait;
+  append_to_statement_list (t, body_p);
 
   pop_gimplify_context (NULL_TREE);
   record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
 
+  OMP_FOR_BODY (stmt) = NULL_TREE;
   OMP_FOR_PRE_BODY (stmt) = NULL_TREE;
   *stmt_p = new_stmt;
 }
@@ -3959,7 +3940,7 @@ lower_omp_parallel (tree *stmt_p, omp_co
   append_to_statement_list (par_ilist, &new_body);
   append_to_statement_list (par_body, &new_body);
   append_to_statement_list (par_olist, &new_body);
-  t = make_node (OMP_RETURN_EXPR);
+  t = make_node (OMP_RETURN);
   append_to_statement_list (t, &new_body);
   OMP_PARALLEL_BODY (stmt) = new_body;
 
@@ -4155,8 +4136,8 @@ struct tree_opt_pass pass_lower_omp = 
 };
 
 /* The following is a utility to diagnose OpenMP structured block violations.
-   It's part of the "omplower" pass, as that's invoked too late.  It should
-   be invoked by the respective front ends after gimplification.  */
+   It is not part of the "omplower" pass, as that's invoked too late.  It
+   should be invoked by the respective front ends after gimplification.  */
 
 static splay_tree all_labels;
 
--- testsuite/g++.dg/gomp/block-0.C	(revision 112944)
+++ testsuite/g++.dg/gomp/block-0.C	(local)
@@ -29,5 +29,5 @@ void foo()
     }
 }
 
-// { dg-final { scan-tree-dump-times "terminate" 8 "omplower" } }
+// { dg-final { scan-tree-dump-times "terminate" 10 "omplower" } }
 // { dg-final { cleanup-tree-dump "omplower" } }
--- tree-cfg.c	(revision 112944)
+++ tree-cfg.c	(local)
@@ -103,7 +103,6 @@ static void make_edges (void);
 static void make_cond_expr_edges (basic_block);
 static void make_switch_expr_edges (basic_block);
 static void make_goto_expr_edges (basic_block);
-static void make_omp_sections_edges (basic_block);
 static edge tree_redirect_edge_and_branch (edge, basic_block);
 static edge tree_try_redirect_by_replacing_jump (edge, basic_block);
 static unsigned int split_critical_edges (void);
@@ -447,6 +446,7 @@ static void
 make_edges (void)
 {
   basic_block bb;
+  struct omp_region *cur_region = NULL;
 
   /* Create an edge from entry to the first block with executable
      statements in it.  */
@@ -460,7 +460,8 @@ make_edges (void)
 
       if (last)
 	{
-	  switch (TREE_CODE (last))
+	  enum tree_code code = TREE_CODE (last);
+	  switch (code)
 	    {
 	    case GOTO_EXPR:
 	      make_goto_expr_edges (bb);
@@ -522,20 +523,55 @@ make_edges (void)
 	    case OMP_ORDERED:
 	    case OMP_CRITICAL:
 	    case OMP_SECTION:
+	      cur_region = new_omp_region (bb, code, cur_region);
 	      fallthru = true;
 	      break;
 
-	    case OMP_RETURN_EXPR:
-	      /* In the case of an OMP_SECTION, we may have already made
-		 an edge in make_omp_sections_edges.  */
-	      fallthru = EDGE_COUNT (bb->succs) == 0;
-	      break;
-
 	    case OMP_SECTIONS:
-	      make_omp_sections_edges (bb);
+	      cur_region = new_omp_region (bb, code, cur_region);
 	      fallthru = false;
 	      break;
 
+	    case OMP_RETURN:
+	      /* In the case of an OMP_SECTION, the edge will go somewhere
+		 other than the next block.  This will be created later.  */
+	      cur_region->exit = bb;
+	      fallthru = cur_region->type != OMP_SECTION;
+	      cur_region = cur_region->outer;
+	      break;
+
+	    case OMP_CONTINUE:
+	      cur_region->cont = bb;
+	      switch (cur_region->type)
+		{
+		case OMP_FOR:
+		  /* ??? Technically there should be a some sort of loopback
+		     edge here, but it goes to a block that doesn't exist yet,
+		     and without it, updating the ssa form would be a real
+		     bear.  Fortunately, we don't yet do ssa before expanding
+		     these nodes.  */
+		  break;
+
+		case OMP_SECTIONS:
+		  /* Wire up the edges into and out of the nested sections.  */
+		  /* ??? Similarly wrt loopback.  */
+		  {
+		    struct omp_region *i;
+		    for (i = cur_region->inner; i ; i = i->next)
+		      {
+			gcc_assert (i->type == OMP_SECTION);
+			make_edge (cur_region->entry, i->entry, 0);
+			make_edge (i->exit, bb, EDGE_FALLTHRU);
+		      }
+		  }
+		  break;
+		     
+		default:
+		  gcc_unreachable ();
+		}
+	      fallthru = true;
+	      break;
+
 	    default:
 	      gcc_assert (!stmt_ends_bb_p (last));
 	      fallthru = true;
@@ -548,6 +584,9 @@ make_edges (void)
 	make_edge (bb, bb->next_bb, EDGE_FALLTHRU);
     }
 
+  if (root_omp_region)
+    free_omp_regions ();
+
   /* Fold COND_EXPR_COND of each COND_EXPR.  */
   fold_cond_expr_cond ();
 
@@ -556,35 +595,6 @@ make_edges (void)
 }
 
 
-/* Link an OMP_SECTIONS block to all the OMP_SECTION blocks in its body.  */
-
-static void
-make_omp_sections_edges (basic_block bb)
-{
-  basic_block exit_bb;
-  size_t i, n;
-  tree vec, stmt;
-
-  stmt = last_stmt (bb);
-  vec = OMP_SECTIONS_SECTIONS (stmt);
-  n = TREE_VEC_LENGTH (vec);
-  exit_bb = bb_for_stmt (TREE_VEC_ELT (vec, n - 1));
-
-  for (i = 0; i < n - 1; i += 2)
-    {
-      basic_block start_bb = bb_for_stmt (TREE_VEC_ELT (vec, i));
-      basic_block end_bb = bb_for_stmt (TREE_VEC_ELT (vec, i + 1));
-      make_edge (bb, start_bb, 0);
-      make_edge (end_bb, exit_bb, EDGE_FALLTHRU);
-    }
-
-  /* Once the CFG has been built, the vector of sections is no longer
-     useful.  The region can be easily obtained with build_omp_regions.
-     Furthermore, this sharing of tree expressions is not allowed by the
-     statement verifier.  */
-  OMP_SECTIONS_SECTIONS (stmt) = NULL_TREE;
-}
-
 /* Create the edges for a COND_EXPR starting at block BB.
    At this point, both clauses must contain only simple gotos.  */
 
@@ -2498,7 +2508,7 @@ is_ctrl_altering_stmt (tree t)
     }
 
   /* OpenMP directives alter control flow.  */
-  if (flag_openmp && OMP_DIRECTIVE_P (t))
+  if (OMP_DIRECTIVE_P (t))
     return true;
 
   /* If a statement can throw, it alters control flow.  */
@@ -4549,7 +4559,9 @@ move_stmt_r (tree *tp, int *walk_subtree
   if (p->block && IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (TREE_CODE (t))))
     TREE_BLOCK (t) = p->block;
 
-  if (OMP_DIRECTIVE_P (t) && TREE_CODE (t) != OMP_RETURN_EXPR)
+  if (OMP_DIRECTIVE_P (t)
+      && TREE_CODE (t) != OMP_RETURN
+      && TREE_CODE (t) != OMP_CONTINUE)
     {
       /* Do not remap variables inside OMP directives.  Variables
 	 referenced in clauses and directive header belong to the
--- tree-flow.h	(revision 112944)
+++ tree-flow.h	(local)
@@ -470,6 +470,51 @@ extern void bsi_insert_after (block_stmt
 extern void bsi_replace (const block_stmt_iterator *, tree, bool);
 
 /*---------------------------------------------------------------------------
+			      OpenMP Region Tree
+---------------------------------------------------------------------------*/
+
+/* Parallel region information.  Every parallel and workshare
+   directive is enclosed between two markers, the OMP_* directive
+   and a corresponding OMP_RETURN statement.  */
+
+struct omp_region
+{
+  /* The enclosing region.  */
+  struct omp_region *outer;
+
+  /* First child region.  */
+  struct omp_region *inner;
+
+  /* Next peer region.  */
+  struct omp_region *next;
+
+  /* Block containing the omp directive as its last stmt.  */
+  basic_block entry;
+
+  /* Block containing the OMP_RETURN as its last stmt.  */
+  basic_block exit;
+
+  /* Block containing the OMP_CONTINUE as its last stmt.  */
+  basic_block cont;
+
+  /* If this is a combined parallel+workshare region, this is a list
+     of additional arguments needed by the combined parallel+workshare
+     library call.  */
+  tree ws_args;
+
+  /* The code for the omp directive of this region.  */
+  enum tree_code type;
+
+  /* True if this is a combined parallel+workshare region.  */
+  bool is_combined_parallel;
+};
+
+extern struct omp_region *root_omp_region;
+extern struct omp_region *new_omp_region (basic_block, enum tree_code,
+					  struct omp_region *);
+extern void free_omp_regions (void);
+
+/*---------------------------------------------------------------------------
 			      Function prototypes
 ---------------------------------------------------------------------------*/
 /* In tree-cfg.c  */
--- tree-gimple.c	(revision 112944)
+++ tree-gimple.c	(local)
@@ -224,7 +224,8 @@ is_gimple_stmt (tree t)
     case OMP_MASTER:
     case OMP_ORDERED:
     case OMP_CRITICAL:
-    case OMP_RETURN_EXPR:
+    case OMP_RETURN:
+    case OMP_CONTINUE:
       /* These are always void.  */
       return true;
 
--- tree-inline.c	(revision 112944)
+++ tree-inline.c	(local)
@@ -1603,7 +1603,8 @@ estimate_num_insns_1 (tree *tp, int *wal
     case PHI_NODE:
     case WITH_SIZE_EXPR:
     case OMP_CLAUSE:
-    case OMP_RETURN_EXPR:
+    case OMP_RETURN:
+    case OMP_CONTINUE:
       break;
 
     /* We don't account constants for now.  Assume that the cost is amortized
--- tree-pretty-print.c	(revision 112944)
+++ tree-pretty-print.c	(local)
@@ -1822,8 +1822,15 @@ dump_generic_node (pretty_printer *buffe
       dump_omp_clauses (buffer, OMP_SINGLE_CLAUSES (node), spc, flags);
       goto dump_omp_body;
 
-    case OMP_RETURN_EXPR:
+    case OMP_RETURN:
       pp_string (buffer, "OMP_RETURN");
+      if (OMP_RETURN_NOWAIT (node))
+	pp_string (buffer, " [nowait]");
+      is_expr = false;
+      break;
+
+    case OMP_CONTINUE:
+      pp_string (buffer, "OMP_CONTINUE");
       is_expr = false;
       break;
 
--- tree-ssa-operands.c	(revision 112944)
+++ tree-ssa-operands.c	(local)
@@ -2096,11 +2096,12 @@ get_expr_operands (tree stmt, tree *expr
     case OMP_PARALLEL:
     case OMP_SECTIONS:
     case OMP_FOR:
-    case OMP_RETURN_EXPR:
     case OMP_SINGLE:
     case OMP_MASTER:
     case OMP_ORDERED:
     case OMP_CRITICAL:
+    case OMP_RETURN:
+    case OMP_CONTINUE:
       /* Expressions that make no memory references.  */
       return;
 
--- tree.def	(revision 112944)
+++ tree.def	(local)
@@ -987,11 +987,8 @@ DEFTREECODE (OMP_FOR, "omp_for", tcc_sta
 
 /* OpenMP - #pragma omp sections [clause1 ... clauseN]
    Operand 0: OMP_SECTIONS_BODY: Sections body.
-   Operand 1: OMP_SECTIONS_CLAUSES: List of clauses.
-   Operand 2: OMP_SECTIONS_SECTIONS: Vector of the different sections
-	      in the body.  Only valid after lowering and destroyed
-	      after the CFG has been built.  */
-DEFTREECODE (OMP_SECTIONS, "omp_sections", tcc_statement, 3)
+   Operand 1: OMP_SECTIONS_CLAUSES: List of clauses.  */
+DEFTREECODE (OMP_SECTIONS, "omp_sections", tcc_statement, 2)
 
 /* OpenMP - #pragma omp single
    Operand 0: OMP_SINGLE_BODY: Single section body.
@@ -1015,6 +1012,13 @@ DEFTREECODE (OMP_ORDERED, "omp_ordered",
    Operand 1: OMP_CRITICAL_NAME: Identifier for critical section.  */
 DEFTREECODE (OMP_CRITICAL, "omp_critical", tcc_statement, 2)
 
+/* Return from an OpenMP directive.  */
+DEFTREECODE (OMP_RETURN, "omp_return", tcc_statement, 0)
+
+/* OpenMP - An intermediate tree code to mark the location of the
+   loop or sections iteration in the partially lowered code.  */
+DEFTREECODE (OMP_CONTINUE, "omp_continue", tcc_statement, 0)
+
 /* OpenMP - #pragma omp atomic
    Operand 0: The address at which the atomic operation is to be performed.
 	This address should be stabilized with save_expr.
@@ -1026,9 +1030,6 @@ DEFTREECODE (OMP_ATOMIC, "omp_atomic", t
 /* OpenMP clauses.  */
 DEFTREECODE (OMP_CLAUSE, "omp_clause", tcc_exceptional, 0)
 
-/* Return from an OpenMP directive.  */
-DEFTREECODE (OMP_RETURN_EXPR, "omp_return", tcc_statement, 0)
-
 /* Reduction operations. 
    Operations that take a vector of elements and "reduce" it to a scalar
    result (e.g. summing the elements of the vector, finding the minimum over
--- tree.h	(revision 112944)
+++ tree.h	(local)
@@ -170,14 +170,15 @@ extern const enum tree_code_class tree_c
 
 #define OMP_DIRECTIVE_P(NODE)				\
     (TREE_CODE (NODE) == OMP_PARALLEL			\
-     || TREE_CODE (NODE) == OMP_SECTIONS		\
-     || TREE_CODE (NODE) == OMP_SECTION			\
      || TREE_CODE (NODE) == OMP_FOR			\
-     || TREE_CODE (NODE) == OMP_RETURN_EXPR		\
+     || TREE_CODE (NODE) == OMP_SECTIONS		\
      || TREE_CODE (NODE) == OMP_SINGLE			\
+     || TREE_CODE (NODE) == OMP_SECTION			\
      || TREE_CODE (NODE) == OMP_MASTER			\
      || TREE_CODE (NODE) == OMP_ORDERED			\
-     || TREE_CODE (NODE) == OMP_CRITICAL)
+     || TREE_CODE (NODE) == OMP_CRITICAL		\
+     || TREE_CODE (NODE) == OMP_RETURN			\
+     || TREE_CODE (NODE) == OMP_CONTINUE)
 
 /* Number of argument-words in each kind of tree-node.  */
 
@@ -437,6 +438,10 @@ struct tree_common GTY(())
            CALL_EXPR
        DECL_BY_REFERENCE in
            PARM_DECL, RESULT_DECL
+       OMP_RETURN_NOWAIT in
+	   OMP_RETURN
+       OMP_SECTION_LAST in
+	   OMP_SECTION
 
    protected_flag:
 
@@ -1546,7 +1551,6 @@ struct tree_constructor GTY(())
 
 #define OMP_SECTIONS_BODY(NODE)    TREE_OPERAND (OMP_SECTIONS_CHECK (NODE), 0)
 #define OMP_SECTIONS_CLAUSES(NODE) TREE_OPERAND (OMP_SECTIONS_CHECK (NODE), 1)
-#define OMP_SECTIONS_SECTIONS(NODE) TREE_OPERAND (OMP_SECTIONS_CHECK (NODE), 2)
 
 #define OMP_SECTION_BODY(NODE)	   TREE_OPERAND (OMP_SECTION_CHECK (NODE), 0)
 
@@ -1566,6 +1570,18 @@ struct tree_constructor GTY(())
 					      OMP_CLAUSE_PRIVATE,	\
 	                                      OMP_CLAUSE_COPYPRIVATE), 0)
 
+/* True on an OMP_SECTION statement that was the last lexical member.
+   This status is meaningful in the implementation of lastprivate.  */
+#define OMP_SECTION_LAST(NODE) \
+  TREE_PRIVATE (OMP_SECTION_CHECK (NODE))
+
+/* True on an OMP_RETURN statement if the return does not require a
+   thread synchronization via some sort of barrier.  The exact barrier
+   that would otherwise be emitted is dependent on the OMP statement
+   with which this return is associated.  */
+#define OMP_RETURN_NOWAIT(NODE) \
+  TREE_PRIVATE (OMP_RETURN_CHECK (NODE))
+
 /* True on a PRIVATE clause if its decl is kept around for debugging
    information only and its DECL_VALUE_EXPR is supposed to point
    to what it has been remapped to.  */



More information about the Gcc-patches mailing list