This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[gomp] handle firstprivate, lastprivate, reduction
- From: Richard Henderson <rth at twiddle dot net>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 27 Sep 2005 22:46:49 -0700
- Subject: [gomp] handle firstprivate, lastprivate, reduction
Previously, we hadn't bothered actually emitting code for these
clauses. Do so now. Also fix two bugs in the testsuite.
r~
* c-omp.c (c_split_parallel_clauses): Add lastprivate to parallel.
(c_finish_omp_bindings): Check for firstprivate+lastprivate and
do not create two decls.
* gimplify.c (gimplify_omp_reduction): New.
(gimplify_omp_for_lastprivate): New.
(gimplify_omp_for_generic): Use it.
(gimplify_omp_for_static_nochunk): Likewise.
(gimplify_omp_for_static_chunk): Likewise.
(gimplify_omp_for): Emit VAR_INIT and VAR_REDUC.
(gimplify_omp_sections): Likewise.
* omp-low.c (add_omp_data_field): Don't create duplicate fields.
(get_lastprivate_sequence): New.
(lower_omp_parallel): Use it.
* testsuite/libgomp.dg/omp-loop03.c: Add initial barrier.
* testsuite/libgomp.dg/omp-parallel-for.c: Specify static schedule.
Index: gcc/c-omp.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/c-omp.c,v
retrieving revision 1.1.2.8
diff -u -p -r1.1.2.8 c-omp.c
--- gcc/c-omp.c 27 Sep 2005 23:35:30 -0000 1.1.2.8
+++ gcc/c-omp.c 28 Sep 2005 05:37:14 -0000
@@ -199,6 +199,7 @@ c_split_parallel_clauses (tree clauses,
case OMP_CLAUSE_PRIVATE:
case OMP_CLAUSE_SHARED:
case OMP_CLAUSE_FIRSTPRIVATE:
+ case OMP_CLAUSE_LASTPRIVATE:
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_COPYIN:
case OMP_CLAUSE_IF:
@@ -219,7 +220,6 @@ c_split_parallel_clauses (tree clauses,
/* FALLTHRU */
case OMP_CLAUSE_ORDERED:
- case OMP_CLAUSE_LASTPRIVATE:
TREE_CHAIN (clauses) = *ws_clauses;
*ws_clauses = clauses;
break;
@@ -399,6 +399,7 @@ c_finish_omp_bindings (tree omp_clauses,
{
tree old, new;
bool decl_ok = true;
+ bool existing_remap = false;
bitmap update_map;
old = TREE_PURPOSE (*plist);
@@ -409,12 +410,20 @@ c_finish_omp_bindings (tree omp_clauses,
firstprivate and lastprivate clauses. */
if (bitmap_bit_p (&spr_head, DECL_UID (old)))
decl_ok = false;
- if (kind != OMP_CLAUSE_LASTPRIVATE
- && bitmap_bit_p (&fp_head, DECL_UID (old)))
- decl_ok = false;
- if (kind != OMP_CLAUSE_FIRSTPRIVATE
- && bitmap_bit_p (&lp_head, DECL_UID (old)))
- decl_ok = false;
+ if (bitmap_bit_p (&fp_head, DECL_UID (old)))
+ {
+ if (kind == OMP_CLAUSE_LASTPRIVATE)
+ existing_remap = true;
+ else
+ decl_ok = false;
+ }
+ if (bitmap_bit_p (&lp_head, DECL_UID (old)))
+ {
+ if (kind == OMP_CLAUSE_FIRSTPRIVATE)
+ existing_remap = true;
+ else
+ decl_ok = false;
+ }
if (!decl_ok)
{
if (!bitmap_bit_p (&error_head, DECL_UID (old)))
@@ -426,9 +435,12 @@ c_finish_omp_bindings (tree omp_clauses,
continue;
}
+ if (existing_remap)
+ new = lookup_name (DECL_NAME (old));
+
/* OpenMP 2.5 section 2.8.1.1: Variables with predetermined
sharing attributes may not be listed in data-sharing clauses. */
- if (c_omp_sharing_predetermined (old))
+ else if (c_omp_sharing_predetermined (old))
{
if (!bitmap_bit_p (&error_head, DECL_UID (old)))
{
@@ -438,8 +450,9 @@ c_finish_omp_bindings (tree omp_clauses,
*plist = TREE_CHAIN (*plist);
continue;
}
+ else
+ new = c_omp_remap_decl (old, kind == OMP_CLAUSE_SHARED);
- new = c_omp_remap_decl (old, kind != OMP_CLAUSE_SHARED);
TREE_VALUE (*plist) = new;
/* Shared variables can be remapped to themselves. When this
Index: gcc/gimplify.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/gimplify.c,v
retrieving revision 2.135.4.20
diff -u -p -r2.135.4.20 gimplify.c
--- gcc/gimplify.c 27 Sep 2005 15:52:12 -0000 2.135.4.20
+++ gcc/gimplify.c 28 Sep 2005 05:37:14 -0000
@@ -3911,6 +3911,37 @@ gimplify_to_stmt_list (tree *stmt_p)
}
}
+/* A subroutine of gimplify_omp_for and gimplify_omp_sections. We need to
+ do something about synchronization during reductions. There are lots
+ of plausible-sounding possibilities. One thing to be careful about
+ though, is that atomic operations are expensive, so if there are more
+ than a couple of reduction variables, it's probably cheaper to take a
+ mutex around the lot.
+
+ ??? For now, just assume we'll be fine reusing a critical section. */
+
+static void
+gimplify_omp_reduction (tree expr, tree *pre_p)
+{
+ expr = build2 (OMP_CRITICAL, void_type_node, NULL, expr);
+ gimplify_and_add (expr, pre_p);
+}
+
+/* A subroutine of gimplify_omp_for. Generate code to emit the
+ for for a lastprivate clause. Given a loop control predicate
+ of (V cond N2), we gate the clause on (!(V cond N2)). */
+
+static void
+gimplify_omp_for_lastprivate (tree v, tree n2, enum tree_code cond_code,
+ tree body, tree *pre_p)
+{
+ tree t;
+
+ t = build2 (cond_code, boolean_type_node, v, n2);
+ t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, body);
+ gimplify_and_add (t, pre_p);
+}
+
/* A subroutine of gimplify_omp_for. Generate code for a parallel
loop with any schedule. Given parameters:
@@ -3926,21 +3957,21 @@ gimplify_to_stmt_list (tree *stmt_p)
L1:
BODY;
V += STEP;
- if (V cond iend) goto L1; else goto L2;
- L2:
+ if (V cond iend) goto L1;
more = GOMP_loop_foo_next (&istart0, &iend0);
- if (more) goto L0; else goto L3;
+ if (more) goto L0;
+ lastprivate;
L3:
*/
static void
gimplify_omp_for_generic (tree v, tree n1, tree n2, tree step,
- tree chunk_size, tree body,
+ tree chunk_size, tree for_stmt,
enum tree_code cond_code, tree *pre_p,
enum built_in_function start_fn,
enum built_in_function next_fn)
{
- tree l0, l1, l2, l3;
+ tree l0, l1, l3;
tree type, istart0, iend0, iend;
tree t, args;
@@ -3961,7 +3992,6 @@ gimplify_omp_for_generic (tree v, tree n
l0 = create_artificial_label ();
l1 = create_artificial_label ();
- l2 = create_artificial_label ();
l3 = create_artificial_label ();
iend = create_tmp_var (type, NULL);
@@ -3999,18 +4029,14 @@ gimplify_omp_for_generic (tree v, tree n
t = build1 (LABEL_EXPR, void_type_node, l1);
gimplify_and_add (t, pre_p);
- gimplify_and_add (body, pre_p);
+ gimplify_and_add (OMP_FOR_BODY (for_stmt), pre_p);
t = build2 (PLUS_EXPR, type, v, step);
t = build2 (MODIFY_EXPR, void_type_node, v, t);
gimplify_and_add (t, pre_p);
t = build2 (cond_code, boolean_type_node, v, iend);
- t = build3 (COND_EXPR, void_type_node, t,
- build_and_jump (&l1), build_and_jump (&l2));
- gimplify_and_add (t, pre_p);
-
- t = build1 (LABEL_EXPR, void_type_node, l2);
+ t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), NULL);
gimplify_and_add (t, pre_p);
t = build_fold_addr_expr (iend0);
@@ -4018,9 +4044,12 @@ gimplify_omp_for_generic (tree v, tree n
t = build_fold_addr_expr (istart0);
args = tree_cons (NULL, t, args);
t = build_function_call_expr (built_in_decls[next_fn], args);
- t = build3 (COND_EXPR, void_type_node, t,
- build_and_jump (&l0), build_and_jump (&l3));
+ t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0), NULL);
gimplify_and_add (t, pre_p);
+
+ if (OMP_FOR_VAR_LAST (for_stmt))
+ gimplify_omp_for_lastprivate (v, n2, cond_code,
+ OMP_FOR_VAR_LAST (for_stmt), pre_p);
t = build1 (LABEL_EXPR, void_type_node, l3);
gimplify_and_add (t, pre_p);
@@ -4049,13 +4078,14 @@ gimplify_omp_for_generic (tree v, tree n
L1:
BODY;
V += STEP;
- if (V cond e) goto L1; else goto L2;
+ if (V cond e) goto L1;
+ lastprivate;
L2:
*/
static void
gimplify_omp_for_static_nochunk (tree v, tree n1, tree n2, tree step,
- tree body, enum tree_code cond_code,
+ tree for_stmt, enum tree_code cond_code,
tree *pre_p)
{
tree l0, l1, l2, n, q, s0, e0, e, t, nthreads, threadid;
@@ -4138,17 +4168,20 @@ gimplify_omp_for_static_nochunk (tree v,
t = build1 (LABEL_EXPR, void_type_node, l1);
gimplify_and_add (t, pre_p);
- gimplify_and_add (body, pre_p);
+ gimplify_and_add (OMP_FOR_BODY (for_stmt), pre_p);
t = build2 (PLUS_EXPR, type, v, step);
t = build2 (MODIFY_EXPR, void_type_node, v, t);
gimplify_and_add (t, pre_p);
t = build2 (cond_code, boolean_type_node, v, e);
- t = build3 (COND_EXPR, void_type_node, t,
- build_and_jump (&l1), build_and_jump (&l2));
+ t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), NULL);
gimplify_and_add (t, pre_p);
+ if (OMP_FOR_VAR_LAST (for_stmt))
+ gimplify_omp_for_lastprivate (v, n2, cond_code,
+ OMP_FOR_VAR_LAST (for_stmt), pre_p);
+
t = build1 (LABEL_EXPR, void_type_node, l2);
gimplify_and_add (t, pre_p);
}
@@ -4181,14 +4214,17 @@ gimplify_omp_for_static_nochunk (tree v,
trip += 1;
goto L0;
L4:
+ if (trip == 0) goto L5;
+ lastprivate;
+ L5:
*/
static void
gimplify_omp_for_static_chunk (tree v, tree n1, tree n2, tree step,
- tree body, enum tree_code cond_code,
+ tree for_stmt, enum tree_code cond_code,
tree chunk, tree *pre_p)
{
- tree l0, l1, l2, l3, l4, n, s0, e0, e, t;
+ tree l0, l1, l2, l3, l4, l5, n, s0, e0, e, t;
tree trip, nthreads, threadid;
tree type, utype;
@@ -4197,6 +4233,7 @@ gimplify_omp_for_static_chunk (tree v, t
l2 = create_artificial_label ();
l3 = create_artificial_label ();
l4 = create_artificial_label ();
+ l5 = create_artificial_label ();
type = TREE_TYPE (v);
utype = lang_hooks.types.unsigned_type (type);
@@ -4275,7 +4312,7 @@ gimplify_omp_for_static_chunk (tree v, t
t = build1 (LABEL_EXPR, void_type_node, l2);
gimplify_and_add (t, pre_p);
- gimplify_and_add (body, pre_p);
+ gimplify_and_add (OMP_FOR_BODY (for_stmt), pre_p);
t = build2 (PLUS_EXPR, type, v, step);
t = build2 (MODIFY_EXPR, void_type_node, v, t);
@@ -4299,6 +4336,17 @@ gimplify_omp_for_static_chunk (tree v, t
t = build1 (LABEL_EXPR, void_type_node, l4);
gimplify_and_add (t, pre_p);
+
+ t = build_int_cst (utype, 0);
+ t = build2 (EQ_EXPR, boolean_type_node, trip, t);
+ t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l5), NULL);
+
+ if (OMP_FOR_VAR_LAST (for_stmt))
+ gimplify_omp_for_lastprivate (v, n2, cond_code,
+ OMP_FOR_VAR_LAST (for_stmt), pre_p);
+
+ t = build1 (LABEL_EXPR, void_type_node, l5);
+ gimplify_and_add (t, pre_p);
}
/* Gimplify a OMP_FOR statement. */
@@ -4400,15 +4448,16 @@ gimplify_omp_for (tree *expr_p, tree *pr
break;
}
+ if (OMP_FOR_VAR_INIT (for_stmt))
+ gimplify_and_add (OMP_FOR_VAR_INIT (for_stmt), pre_p);
+
if (sched_kind == OMP_CLAUSE_SCHEDULE_STATIC && !have_ordered)
{
if (chunk_size == NULL)
- gimplify_omp_for_static_nochunk (v, n1, n2, step,
- OMP_FOR_BODY (for_stmt),
+ gimplify_omp_for_static_nochunk (v, n1, n2, step, for_stmt,
cond_code, pre_p);
else
- gimplify_omp_for_static_chunk (v, n1, n2, step,
- OMP_FOR_BODY (for_stmt), cond_code,
+ gimplify_omp_for_static_chunk (v, n1, n2, step, for_stmt, cond_code,
chunk_size, pre_p);
}
else
@@ -4423,11 +4472,14 @@ gimplify_omp_for (tree *expr_p, tree *pr
fn_index = sched_kind + have_ordered * 4;
gimplify_omp_for_generic (v, n1, n2, step, chunk_size,
- OMP_FOR_BODY (for_stmt), cond_code, pre_p,
+ for_stmt, cond_code, pre_p,
BUILT_IN_GOMP_LOOP_STATIC_START + fn_index,
BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index);
}
+ if (OMP_FOR_VAR_REDUC (for_stmt))
+ gimplify_omp_reduction (OMP_FOR_VAR_REDUC (for_stmt), pre_p);
+
if (have_nowait)
{
*expr_p = NULL;
@@ -4442,6 +4494,7 @@ gimplify_omp_for (tree *expr_p, tree *pr
/* Gimplify an OMP_SECTIONS statement. In pseudo code, we generate
+ VAR_INIT;
v = GOMP_sections_start (n);
L0:
switch (v)
@@ -4453,6 +4506,9 @@ gimplify_omp_for (tree *expr_p, tree *pr
goto L1;
case 2:
...
+ case n:
+ ...
+ VAR_LAST;
default:
abort ();
}
@@ -4460,17 +4516,35 @@ gimplify_omp_for (tree *expr_p, tree *pr
v = GOMP_sections_next ();
goto L0;
L2:
+ VAR_REDUC;
*/
static enum gimplify_status
gimplify_omp_sections (tree *expr_p, tree *pre_p)
{
VEC(tree,heap) *labels, *saved_labels;
- tree saved_exit, label_vec;
+ tree sec_stmt, saved_exit, label_vec;
tree l0, l1, l2, default_label;
tree t, u, v;
size_t i, len;
+ sec_stmt = *expr_p;
+
+ if (OMP_SECTIONS_VAR_INIT (sec_stmt))
+ gimplify_and_add (OMP_SECTIONS_VAR_INIT (sec_stmt), pre_p);
+
+ if (OMP_SECTIONS_VAR_LAST (sec_stmt))
+ {
+ tree last = expr_last (OMP_SECTIONS_BODY (*expr_p));
+ gcc_assert (TREE_CODE (last) == OMP_SECTION);
+
+ t = OMP_SECTION_BODY (last);
+ OMP_SECTION_BODY (last) = NULL;
+ append_to_statement_list (t, &OMP_SECTION_BODY (last));
+ append_to_statement_list (OMP_SECTIONS_VAR_LAST (sec_stmt),
+ &OMP_SECTION_BODY (last));
+ }
+
l0 = create_artificial_label ();
l1 = create_artificial_label ();
l2 = create_artificial_label ();
@@ -4546,6 +4620,9 @@ gimplify_omp_sections (tree *expr_p, tre
t = build1 (LABEL_EXPR, void_type_node, l2);
gimplify_and_add (t, pre_p);
+ if (OMP_SECTIONS_VAR_REDUC (sec_stmt))
+ gimplify_omp_reduction (OMP_SECTIONS_VAR_REDUC (sec_stmt), pre_p);
+
/* Unless there's a nowait clause, add a barrier afterward. */
for (t = OMP_SECTIONS_CLAUSES (*expr_p); t ; t = TREE_CHAIN (t))
Index: gcc/omp-low.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/omp-low.c,v
retrieving revision 1.1.2.6
diff -u -p -r1.1.2.6 omp-low.c
--- gcc/omp-low.c 27 Sep 2005 23:35:35 -0000 1.1.2.6
+++ gcc/omp-low.c 28 Sep 2005 05:37:14 -0000
@@ -153,6 +153,11 @@ add_omp_data_field (tree var, bool by_re
{
tree field, type;
+ /* We can have both firstprivate and lastprivate on a parallel.
+ Avoid creating two fields. */
+ if (splay_tree_lookup (ri_p->map, (splay_tree_key)var) != NULL)
+ return;
+
type = TREE_TYPE (var);
if (by_ref)
type = build_pointer_type (type);
@@ -163,7 +168,6 @@ add_omp_data_field (tree var, bool by_re
create_data_decl (ri_p);
insert_field_into_struct (TREE_TYPE (ri_p->omp_data_send), field);
- gcc_assert (splay_tree_lookup (ri_p->map, (splay_tree_key)var) == NULL);
splay_tree_insert (ri_p->map, (splay_tree_key)var, (splay_tree_value)field);
}
@@ -633,6 +637,33 @@ remap_labels_child (tree fn)
}
+/* Grr. The sequence for lastprivate on a parallel for is not in the
+ parallel node itself. Nor should it be, since it needs to be placed
+ by the loop code. But we need to invoke remap_variables_receiver on it.
+
+ Grovel it out. At this point the child function consists of a root
+ BIND_EXPR, and within that the original BIND_EXPR of the parallel. So
+ if this is a combined parallel for, the single component of that second
+ BIND_EXPR should be an OMP_FOR node. */
+
+static tree *
+get_lastprivate_sequence (tree_stmt_iterator *tsi)
+{
+ tree t;
+
+ t = tsi_stmt (*tsi);
+ t = expr_only (BIND_EXPR_BODY (t));
+
+ if (t == NULL)
+ return NULL;
+ if (TREE_CODE (t) == OMP_FOR)
+ return &OMP_FOR_VAR_LAST (t);
+ else if (TREE_CODE (t) == OMP_SECTIONS)
+ return &OMP_SECTIONS_VAR_LAST (t);
+
+ return NULL;
+}
+
/* Lower the OpenMP parallel directive pointed by TSI. Build a new
function with the body of the pragma and emit the appropriate
runtime call. */
@@ -641,7 +672,7 @@ static void
lower_omp_parallel (tree *stmt_p)
{
tree par_stmt = *stmt_p;
- tree bind_stmt, fn;
+ tree bind_stmt, fn, *lastpriv;
struct remap_info_d *ri_p;
tree_stmt_iterator fn_tsi;
@@ -659,6 +690,10 @@ lower_omp_parallel (tree *stmt_p)
fn = ri_p->omp_fn;
fn_tsi = tsi_start (BIND_EXPR_BODY (DECL_SAVED_TREE (fn)));
+ lastpriv = get_lastprivate_sequence (&fn_tsi);
+ if (lastpriv)
+ walk_tree (lastpriv, remap_variables_receiver, ri_p, NULL);
+
if (OMP_PARALLEL_VAR_INIT (par_stmt))
{
walk_tree (&OMP_PARALLEL_VAR_INIT (par_stmt),
Index: libgomp/testsuite/libgomp.dg/omp-loop03.c
===================================================================
RCS file: /cvs/gcc/gcc/libgomp/testsuite/libgomp.dg/Attic/omp-loop03.c,v
retrieving revision 1.1.2.1
diff -u -p -r1.1.2.1 omp-loop03.c
--- libgomp/testsuite/libgomp.dg/omp-loop03.c 27 Sep 2005 17:37:13 -0000 1.1.2.1
+++ libgomp/testsuite/libgomp.dg/omp-loop03.c 28 Sep 2005 05:39:26 -0000
@@ -6,6 +6,7 @@ foo ()
{
int i;
a = 30;
+#pragma omp barrier
#pragma omp for lastprivate (a)
for (i = 0; i < 1024; i++)
{
Index: libgomp/testsuite/libgomp.dg/omp-parallel-for.c
===================================================================
RCS file: /cvs/gcc/gcc/libgomp/testsuite/libgomp.dg/Attic/omp-parallel-for.c,v
retrieving revision 1.1.2.1
diff -u -p -r1.1.2.1 omp-parallel-for.c
--- libgomp/testsuite/libgomp.dg/omp-parallel-for.c 27 Sep 2005 16:43:27 -0000 1.1.2.1
+++ libgomp/testsuite/libgomp.dg/omp-parallel-for.c 28 Sep 2005 05:39:26 -0000
@@ -6,7 +6,8 @@ main()
a = 30;
-#pragma omp parallel for firstprivate (a) lastprivate (a) num_threads (2)
+#pragma omp parallel for firstprivate (a) lastprivate (a) \
+ num_threads (2) schedule(static)
for (i = 0; i < 10; i++)
a = a + i;