[gomp4.1] fold ordered depend(sink) clauses
Aldy Hernandez
aldyh@redhat.com
Fri Jul 31 19:28:00 GMT 2015
On 07/31/2015 09:38 AM, Jakub Jelinek wrote:
> On Thu, Jul 30, 2015 at 07:27:51PM -0700, Aldy Hernandez wrote:
>> +static gimple
>> +gimplify_omp_ordered (tree expr, gimple_seq body)
>> +{
>> + tree c, decls;
>> + int failures = 0;
>> + unsigned int i;
>> +
>> + if (gimplify_omp_ctxp)
>> + for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
>> + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
>> + && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
>> + {
>> + bool fail = false;
>> + for (decls = OMP_CLAUSE_DECL (c), i = 0;
>> + decls && TREE_CODE (decls) == TREE_LIST;
>> + decls = TREE_CHAIN (decls), ++i)
>> + if (i < gimplify_omp_ctxp->loop_iter_var.length ()
>> + && TREE_VALUE (decls) != gimplify_omp_ctxp->loop_iter_var[i])
>> + {
>> + error_at (OMP_CLAUSE_LOCATION (c),
>> + "variable %qE is not an iteration "
>> + "of outermost loop %d, expected %qE",
>> + TREE_VALUE (decls), i + 1,
>> + gimplify_omp_ctxp->loop_iter_var[i]);
>> + fail = true;
>> + failures++;
>> + }
>> + /* Avoid being too redundant. */
>> + if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length ())
>> + {
>> + error_at (OMP_CLAUSE_LOCATION (c),
>> + "number of variables in depend(sink) "
>> + "clause does not match number of "
>> + "iteration variables");
>> + failures++;
>> + }
>
> failures seems to be a write only variable.
> Perhaps if fail is true (set it to true after this error too),
> don't create the ordered at all? Or drop the bogus clauses.
Oops. Left over from when we were doing everything in the gimplifier.
I think we can just emit a NOP when error here. There's no sense in
removing individual clauses and all that since lower_omp_1 is going to
ignore sources with error anyhow:
/* If we have issued syntax errors, avoid doing any heavy lifting.
Just replace the OMP directives with a NOP to avoid
confusing RTL expansion. */
if (seen_error () && is_gimple_omp (stmt))
{
gsi_replace (gsi_p, gimple_build_nop (), true);
return;
}
>
>> +
>> + /* ?? This is stupid. We need to call extract_omp_for_data just
>> + to get the number of ordered loops... */
>> + extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, NULL);
>> + if (!fd.ordered)
>> + return;
>> + struct omp_for_data_loop *loops
>> + = (struct omp_for_data_loop *)
>> + alloca (fd.ordered * sizeof (struct omp_for_data_loop));
>
> You can do just what expand_omp_for does:
> struct omp_for_data fd;
> struct omp_for_data_loop *loops;
>
> loops
> = (struct omp_for_data_loop *)
> alloca (gimple_omp_for_collapse (ctx->outer->stmt)
> * sizeof (struct omp_for_data_loop));
> extract_omp_for_data (as_a <gomp_for *> (ctx->outer_stmt), &fd, loops);
Ah! The collapse of the the outer statement! Done.
>
>> + For example:
>> +
>> + for (i=0; i < N; ++i)
>> + depend(sink:i-8,j-1)
>> + depend(sink:i,j-2) // Completely ignored because i+0.
>> + depend(sink:i-4,j+3)
>> + depend(sink:i-6,j+2)
>
> Even when writing comments, it is better to make it valid:
> #pragma omp for ordered(2)
> for (i=0; i < N; ++i)
> for (j=0; j < M; ++j)
> {
> #pragma omp ordered \
> depend(sink:i-8,j-1) \
> depend(sink:i,j-2) \ // Completely ignored because i+0.
> depend(sink:i-4,j+3) \
> depend(sink:i-6,j+2)
> #pragma omp ordered depend(source)
> }
You have _got_ to be kidding? Really? Oh well...done.
>
>> +
>> + Folded clause is:
>> +
>> + depend(sink:-gcd(8,4,6),min(-1,3,2))
>
> Spaces here instead of desirable tab.
Done.
>
>> + -or-
>> + depend(sink:-2,-1)
>> + */
>> +
>> + /* FIXME: Computing GCD's where the first element is zero is
>> + non-trivial in the presence of collapsed loops. Do this later. */
>> + if (fd.collapse > 1)
>> + return;
>
> Better ad an gcc_assert for now.
Done.
>
>> + enum {
>> + DIR_UNKNOWN,
>> + DIR_FORWARD,
>> + DIR_BACKWARD
>> + } loop_dir;
>> + switch (fd.loops[i].cond_code)
>> + {
>> + case LT_EXPR:
>> + case LE_EXPR:
>> + loop_dir = DIR_FORWARD;
>> + break;
>> + case GT_EXPR:
>> + case GE_EXPR:
>> + loop_dir = DIR_BACKWARD;
>> + break;
>> + default:
>> + loop_dir = DIR_UNKNOWN;
>> + gcc_unreachable ();
>> + }
>
> I think there is no point in doing this, extract_omp_for_data
> canonicalizes cond_code already, so it is always
> LT_EXPR or GT_EXPR. So just gcc_assert it is these two and
Well, [LG]E_EXPR can sneak through for i >= fd->collapse in
extract_omp_for_data:
case LE_EXPR:
if (i >= fd->collapse)
break;
if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
loop->n2 = fold_build_pointer_plus_hwi_loc (loc, loop->n2, 1);
else
loop->n2 = fold_build2_loc (loc,
PLUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
build_int_cst (TREE_TYPE (loop->n2), 1));
loop->cond_code = LT_EXPR;
break;
How about we assert that it's LE/LT/GT/GE and set a `bool forward' as
suggested?
> compare it directly, or add bool forward = fd.loops[i].cond_code == LT_EXPR;
> ?
>> +
>> + /* While the committee makes up its mind, bail if we have any
>> + non-constant steps. */
>> + if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
>> + goto gimplify_omp_ordered_ret;
>
> Misnamed label. lower instead?
Fixed.
>
>> + wide_int offset = TREE_PURPOSE (decls);
>> + if (!iter_vars[i])
>> + iter_vars[i] = TREE_VALUE (decls);
>> +
>> + /* Ignore invalid offsets that are not multiples of the step. */
>> + if (!wi::multiple_of_p
>> + (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
>> + UNSIGNED))
>
> What does wi::abs do with very large unsigned integers?
> #pragma omp ordered(2)
> for (unsigned int i = 64; i > 32; i += -1)
> for (unsigned int j = 0; j < 32; j++)
> ?
> step of i is 0xffffffff and cond_code is GT_EXPR. I suppose for unsigned
> steps you don't want to use wi::abs, but just negate the step if
> iterating downward?
Seems to work just fine for:
#pragma omp parallel for ordered(2)
for (unsigned int i = N; i > 32; i += -1)
for (unsigned int j = 0; j < 32; j++)
{
#pragma omp ordered depend(sink:i+1,j-2) depend(sink:i+2,j-3)
...
...
(gdb) call debug_generic_stmt(fd.loops[i].step)
4294967295
(gdb) p/x 4294967295
$17 = 0xffffffff
The conversion from tree -> wide-int correctly interprets this:
(gdb) print x
$18 = (const generic_wide_int<wide_int_storage> &) @0x7fffffffd5d0:
{<wide_int_storage> = {val = {-1, 29799175824, 140737488344576}, len = 1,
precision = 32}, static is_sign_extended = <optimized out>}
(gdb) call x.dump()
[0xffffffffffffffff], precision = 32
..and then negates it correctly. For the i+1,j-2, we end up with
(correctly):
(gdb) call x.dump()
[0x2], precision = 32
(gdb) call y.dump()
[0x1], precision = 32
Then the folded dependence is depend(sink:i+1,j+4294967293), which is -3
and what I would expect. Unless I'm missing something...
How does the attached look?
Aldy
-------------- next part --------------
commit 92e33750f31c7954b8a92763d20630f4e1af9b6b
Author: Aldy Hernandez <aldyh@redhat.com>
Date: Wed Jul 29 13:39:06 2015 -0700
* wide-int.h (wi::gcd): New.
* gimplify.c (struct gimplify_omp_ctx): Rename iter_vars to
loop_iter_var.
(delete_omp_context): Same.
(gimplify_expr): Move code handling OMP_ORDERED into...
(gimplify_omp_ordered): ...here. New.
* omp-low.c (lower_omp_ordered_clauses): New.
(lower_omp_ordered): Call lower_omp_ordered_clauses.
testsuite/
* gcc.dg/gomp/sink-fold-1.c: New.
* gcc.dg/gomp/sink-fold-2.c: New.
* gcc.dg/gomp/sink-fold-3.c: New.
* c-c++-common/gomp/sink-4.c: Look in omplower dump file instead.
* g++.dg/gomp/sink-3.C: Have sink offset be in the opposite
direction. Make variables more readable.
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 2331001..250442f 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -153,7 +153,7 @@ struct gimplify_omp_ctx
splay_tree variables;
hash_set<tree> *privatized_types;
/* Iteration variables in an OMP_FOR. */
- vec<tree> iter_vars;
+ vec<tree> loop_iter_var;
location_t location;
enum omp_clause_default_kind default_kind;
enum omp_region_type region_type;
@@ -392,7 +392,7 @@ delete_omp_context (struct gimplify_omp_ctx *c)
{
splay_tree_delete (c->variables);
delete c->privatized_types;
- c->iter_vars.release ();
+ c->loop_iter_var.release ();
XDELETE (c);
}
@@ -7490,8 +7490,15 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
== TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
== TREE_VEC_LENGTH (OMP_FOR_INCR (for_stmt)));
- gimplify_omp_ctxp->iter_vars.create (TREE_VEC_LENGTH
- (OMP_FOR_INIT (for_stmt)));
+
+ tree c = find_omp_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_ORDERED);
+ bool is_doacross = false;
+ if (c && OMP_CLAUSE_ORDERED_EXPR (c))
+ {
+ is_doacross = true;
+ gimplify_omp_ctxp->loop_iter_var.create (TREE_VEC_LENGTH
+ (OMP_FOR_INIT (for_stmt)));
+ }
for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
{
t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
@@ -7500,11 +7507,14 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
gcc_assert (DECL_P (decl));
gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
|| POINTER_TYPE_P (TREE_TYPE (decl)));
- if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
- gimplify_omp_ctxp->iter_vars.quick_push
- (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
- else
- gimplify_omp_ctxp->iter_vars.quick_push (decl);
+ if (is_doacross)
+ {
+ if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
+ gimplify_omp_ctxp->loop_iter_var.quick_push
+ (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
+ else
+ gimplify_omp_ctxp->loop_iter_var.quick_push (decl);
+ }
/* Make sure the iteration variable is private. */
tree c = NULL_TREE;
@@ -8387,6 +8397,56 @@ gimplify_transaction (tree *expr_p, gimple_seq *pre_p)
return GS_ALL_DONE;
}
+/* Gimplify an OMP_ORDERED construct. EXPR is the tree version. BODY
+ is the OMP_BODY of the original EXPR (which has already been
+ gimplified so it's not present in the EXPR).
+
+ Return the gimplified GIMPLE_OMP_ORDERED tuple. */
+
+static gimple
+gimplify_omp_ordered (tree expr, gimple_seq body)
+{
+ tree c, decls;
+ int failures = 0;
+ unsigned int i;
+
+ if (gimplify_omp_ctxp)
+ for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
+ && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
+ {
+ bool fail = false;
+ for (decls = OMP_CLAUSE_DECL (c), i = 0;
+ decls && TREE_CODE (decls) == TREE_LIST;
+ decls = TREE_CHAIN (decls), ++i)
+ if (i < gimplify_omp_ctxp->loop_iter_var.length ()
+ && TREE_VALUE (decls) != gimplify_omp_ctxp->loop_iter_var[i])
+ {
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "variable %qE is not an iteration "
+ "of outermost loop %d, expected %qE",
+ TREE_VALUE (decls), i + 1,
+ gimplify_omp_ctxp->loop_iter_var[i]);
+ fail = true;
+ failures++;
+ }
+ /* Avoid being too redundant. */
+ if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length ())
+ {
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "number of variables in depend(sink) "
+ "clause does not match number of "
+ "iteration variables");
+ fail = true;
+ failures++;
+ }
+ }
+
+ if (failures)
+ return gimple_build_nop ();
+ return gimple_build_omp_ordered (body, OMP_ORDERED_CLAUSES (expr));
+}
+
/* Convert the GENERIC expression tree *EXPR_P to GIMPLE. If the
expression produces a value to be used as an operand inside a GIMPLE
statement, the value will be stored back in *EXPR_P. This value will
@@ -9200,38 +9260,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
}
break;
case OMP_ORDERED:
- if (gimplify_omp_ctxp)
- for (tree c = OMP_ORDERED_CLAUSES (*expr_p);
- c; c = OMP_CLAUSE_CHAIN (c))
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
- && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
- {
- unsigned int n = 0;
- bool fail = false;
- for (tree decls = OMP_CLAUSE_DECL (c);
- decls && TREE_CODE (decls) == TREE_LIST;
- decls = TREE_CHAIN (decls), ++n)
- if (n < gimplify_omp_ctxp->iter_vars.length ()
- && TREE_VALUE (decls)
- != gimplify_omp_ctxp->iter_vars[n])
- {
- error_at (OMP_CLAUSE_LOCATION (c),
- "variable %qE is not an iteration "
- "of outermost loop %d, expected %qE",
- TREE_VALUE (decls), n + 1,
- gimplify_omp_ctxp->iter_vars[n]);
- fail = true;
- }
- /* Avoid being too redundant. */
- if (!fail
- && n != gimplify_omp_ctxp->iter_vars.length ())
- error_at (OMP_CLAUSE_LOCATION (c),
- "number of variables in depend(sink) clause "
- "does not match number of iteration variables");
- }
-
- g = gimple_build_omp_ordered (body,
- OMP_ORDERED_CLAUSES (*expr_p));
+ g = gimplify_omp_ordered (*expr_p, body);
break;
case OMP_CRITICAL:
gimplify_scan_omp_clauses (&OMP_CRITICAL_CLAUSES (*expr_p),
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e7d21ea..489e939 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11969,6 +11969,219 @@ lower_omp_taskgroup (gimple_stmt_iterator *gsi_p, omp_context *ctx)
}
+/* Fold the OMP_ORDERED_CLAUSES for the OMP_ORDERED in STMT if possible. */
+
+static void
+lower_omp_ordered_clauses (gomp_ordered *ord_stmt, omp_context *ctx)
+{
+ struct omp_for_data fd;
+ if (!ctx->outer || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR)
+ return;
+
+ unsigned int len = gimple_omp_for_collapse (ctx->outer->stmt);
+ if (!len)
+ return;
+ struct omp_for_data_loop *loops
+ = (struct omp_for_data_loop *)
+ alloca (len * sizeof (struct omp_for_data_loop));
+ extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops);
+ if (!fd.ordered)
+ return;
+
+ /* Canonicalize sink dependence clauses into one folded clause if
+ possible.
+
+ The basic algorithm is to create a sink vector whose first
+ element is the GCD of all the first elements, and whose remaining
+ elements are the minimum of the subsequent columns.
+
+ We ignore dependence vectors whose first element is zero because
+ such dependencies are known to be executed by the same thread.
+
+ We take into account the direction of the loop, so a minimum
+ becomes a maximum if the loop is iterating backwards. We also
+ ignore sink clauses where the loop direction is unknown, or where
+ the offsets are clearly invalid because they are not a multiple
+ of the loop increment.
+
+ For example:
+
+ #pragma omp for ordered(2)
+ for (i=0; i < N; ++i)
+ for (j=0; j < M; ++j)
+ {
+ #pragma omp ordered \
+ depend(sink:i-8,j-1) \
+ depend(sink:i,j-2) \ // Completely ignored because i+0.
+ depend(sink:i-4,j+3) \
+ depend(sink:i-6,j+2)
+ #pragma omp ordered depend(source)
+ }
+
+ Folded clause is:
+
+ depend(sink:-gcd(8,4,6),min(-1,3,2))
+ -or-
+ depend(sink:-2,-1)
+ */
+
+ /* FIXME: Computing GCD's where the first element is zero is
+ non-trivial in the presence of collapsed loops. Do this later. */
+ gcc_assert (fd.collapse <= 1);
+
+ vec<wide_int> folded_deps;
+ folded_deps.create (len);
+ folded_deps.quick_grow_cleared (len);
+ /* Bitmap representing dimensions in the final dependency vector that
+ have been set. */
+ sbitmap folded_deps_used = sbitmap_alloc (len);
+ bitmap_clear (folded_deps_used);
+ /* TRUE if the first dimension's offset is negative. */
+ bool neg_offset_p = false;
+
+ /* ?? We need to save the original iteration variables stored in the
+ depend clauses, because those in fd.loops[].v have already been
+ gimplified. Perhaps we should use the gimplified versions. ?? */
+ tree *iter_vars = (tree *) alloca (sizeof (tree) * len);
+ memset (iter_vars, 0, sizeof (tree) * len);
+
+ tree *list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
+ tree c;
+ unsigned int i;
+ while ((c = *list_p) != NULL)
+ {
+ bool remove = false;
+
+ if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND
+ || OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
+ goto next_ordered_clause;
+
+ tree decls;
+ for (decls = OMP_CLAUSE_DECL (c), i = 0;
+ decls && TREE_CODE (decls) == TREE_LIST;
+ decls = TREE_CHAIN (decls), ++i)
+ {
+ gcc_assert (i < len);
+
+ /* extract_omp_for_data has canonicalized the condition. */
+ gcc_assert (fd.loops[i].cond_code == LT_EXPR
+ || fd.loops[i].cond_code == LE_EXPR
+ || fd.loops[i].cond_code == GT_EXPR
+ || fd.loops[i].cond_code == GE_EXPR);
+ bool forward = fd.loops[i].cond_code == LT_EXPR
+ || fd.loops[i].cond_code == LE_EXPR;
+
+ /* While the committee makes up its mind, bail if we have any
+ non-constant steps. */
+ if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
+ goto lower_omp_ordered_ret;
+
+ wide_int offset = TREE_PURPOSE (decls);
+ if (!iter_vars[i])
+ iter_vars[i] = TREE_VALUE (decls);
+
+ /* Ignore invalid offsets that are not multiples of the step. */
+ if (!wi::multiple_of_p
+ (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
+ UNSIGNED))
+ {
+ warning_at (OMP_CLAUSE_LOCATION (c), 0,
+ "ignoring sink clause with offset that is not "
+ "a multiple of the loop step");
+ remove = true;
+ goto next_ordered_clause;
+ }
+
+ /* Calculate the first dimension. The first dimension of
+ the folded dependency vector is the GCD of the first
+ elements, while ignoring any first elements whose offset
+ is 0. */
+ if (i == 0)
+ {
+ /* Ignore dependence vectors whose first dimension is 0. */
+ if (offset == 0)
+ {
+ remove = true;
+ goto next_ordered_clause;
+ }
+ else
+ {
+ neg_offset_p =
+ wi::neg_p (offset,
+ TYPE_SIGN (TREE_TYPE (TREE_PURPOSE (decls))));
+ if ((forward && !neg_offset_p)
+ || (!forward && neg_offset_p))
+ {
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "first offset must be in opposite direction "
+ "of loop iterations");
+ goto lower_omp_ordered_ret;
+ }
+ /* Initialize the first time around. */
+ if (!bitmap_bit_p (folded_deps_used, 0))
+ {
+ bitmap_set_bit (folded_deps_used, 0);
+ folded_deps[0] = wi::abs (offset);
+ }
+ else
+ folded_deps[i] = wi::gcd (folded_deps[0], offset, UNSIGNED);
+ }
+ }
+ /* Calculate minimum for the remaining dimensions. */
+ else
+ {
+ if (!bitmap_bit_p (folded_deps_used, i))
+ {
+ bitmap_set_bit (folded_deps_used, i);
+ folded_deps[i] = offset;
+ }
+ else if ((forward && wi::lts_p (offset, folded_deps[i]))
+ || (!forward && wi::gts_p (offset, folded_deps[i])))
+ folded_deps[i] = offset;
+ }
+ }
+
+ remove = true;
+
+ next_ordered_clause:
+ if (remove)
+ *list_p = OMP_CLAUSE_CHAIN (c);
+ else
+ list_p = &OMP_CLAUSE_CHAIN (c);
+ }
+
+ for (i = 0; i < len; ++i)
+ if (!bitmap_bit_p (folded_deps_used, i))
+ break;
+ if (i == len)
+ {
+ if (neg_offset_p)
+ folded_deps[0] = -folded_deps[0];
+
+ tree vec = NULL;
+ i = len;
+ do
+ {
+ i--;
+ vec = tree_cons (wide_int_to_tree (TREE_TYPE (fd.loops[i].v),
+ folded_deps[i]),
+ iter_vars[i], vec);
+ }
+ while (i);
+
+ c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEPEND);
+ OMP_CLAUSE_DEPEND_KIND (c) = OMP_CLAUSE_DEPEND_SINK;
+ OMP_CLAUSE_DECL (c) = vec;
+ OMP_CLAUSE_CHAIN (c) = gimple_omp_ordered_clauses (ord_stmt);
+ *gimple_omp_ordered_clauses_ptr (ord_stmt) = c;
+ }
+
+ lower_omp_ordered_ret:
+ sbitmap_free (folded_deps_used);
+ folded_deps.release ();
+}
+
+
/* Expand code for an OpenMP ordered directive. */
static void
@@ -11979,6 +12192,8 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gcall *x;
gbind *bind;
+ lower_omp_ordered_clauses (as_a <gomp_ordered *> (stmt), ctx);
+
push_gimplify_context ();
block = make_node (BLOCK);
diff --git a/gcc/testsuite/c-c++-common/gomp/sink-4.c b/gcc/testsuite/c-c++-common/gomp/sink-4.c
index 7934de2..111178b 100644
--- a/gcc/testsuite/c-c++-common/gomp/sink-4.c
+++ b/gcc/testsuite/c-c++-common/gomp/sink-4.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple" } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
/* Test that we adjust pointer offsets for sink variables
correctly. */
@@ -22,4 +22,4 @@ funk (foo *begin, foo *end)
}
}
-/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400\\)" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400.\\)" 1 "omplower" } } */
diff --git a/gcc/testsuite/g++.dg/gomp/sink-1.C b/gcc/testsuite/g++.dg/gomp/sink-1.C
index a83adbf..982d36c 100644
--- a/gcc/testsuite/g++.dg/gomp/sink-1.C
+++ b/gcc/testsuite/g++.dg/gomp/sink-1.C
@@ -12,6 +12,7 @@ void baz ()
{
#pragma omp ordered depend(sink:i-3,j)
bar (i, j, 0);
+#pragma omp ordered depend(source)
}
}
diff --git a/gcc/testsuite/g++.dg/gomp/sink-3.C b/gcc/testsuite/g++.dg/gomp/sink-3.C
index 83a742e..4271d66 100644
--- a/gcc/testsuite/g++.dg/gomp/sink-3.C
+++ b/gcc/testsuite/g++.dg/gomp/sink-3.C
@@ -8,7 +8,7 @@ typedef struct {
char stuff[400];
} foo;
-foo *p, *q, *r;
+foo *end, *begin, *p;
template<int N>
void
@@ -16,7 +16,7 @@ funk ()
{
int i,j;
#pragma omp parallel for ordered(1)
- for (p=q; p < q; p--)
+ for (p=end; p > begin; p--)
{
#pragma omp ordered depend(sink:p+1)
void bar ();
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c
new file mode 100644
index 0000000..f2961b9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
+
+/* Test depend(sink) clause folding. */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+ for (i=0; i < N; i++)
+ for (j=0; j < N; ++j)
+ {
+/* We remove the (sink:i,j-2) by virtue of it the i+0. The remaining
+ clauses get folded with a GCD of -2 for `i' and a minimum of -1 for
+ 'j'. */
+#pragma omp ordered \
+ depend(sink:i-8,j-1) \
+ depend(sink:i, j-2) \
+ depend(sink:i-4,j+3) \
+ depend(sink:i-6,j+2)
+ bar();
+#pragma omp ordered depend(source)
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "omp ordered depend\\(sink:i-2,j-1\\)" 1 "omplower" } } */
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c
new file mode 100644
index 0000000..b3b4ac7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+ for (i=0; i < N; i += 3)
+ for (j=0; j < N; ++j)
+ {
+#pragma omp ordered depend(sink:i-8,j-1) /* { dg-warning "ignoring sink clause with offset that is not a multiple" } */
+#pragma omp ordered depend(sink:i+3,j-1) /* { dg-error "first offset must be in opposite direction" } */
+ bar();
+#pragma omp ordered depend(source)
+ }
+}
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c
new file mode 100644
index 0000000..4d6293c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
+
+/* Test that we fold sink offsets correctly while taking into account
+ pointer sizes. */
+
+typedef struct {
+ char stuff[400];
+} foo;
+
+void
+funk (foo *begin, foo *end)
+{
+ foo *p;
+#pragma omp parallel for ordered(1)
+ for (p=end; p > begin; p--)
+ {
+#pragma omp ordered depend(sink:p+2) depend(sink:p+4)
+ void bar ();
+ bar();
+#pragma omp ordered depend(source)
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+800B\\)" 1 "omplower" } } */
diff --git a/gcc/wide-int.h b/gcc/wide-int.h
index d8f7b46..c20db61 100644
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@@ -514,6 +514,7 @@ namespace wi
BINARY_FUNCTION div_round (const T1 &, const T2 &, signop, bool * = 0);
BINARY_FUNCTION divmod_trunc (const T1 &, const T2 &, signop,
WI_BINARY_RESULT (T1, T2) *);
+ BINARY_FUNCTION gcd (const T1 &, const T2 &, signop = UNSIGNED);
BINARY_FUNCTION mod_trunc (const T1 &, const T2 &, signop, bool * = 0);
BINARY_FUNCTION smod_trunc (const T1 &, const T2 &);
BINARY_FUNCTION umod_trunc (const T1 &, const T2 &);
@@ -2653,6 +2654,27 @@ wi::divmod_trunc (const T1 &x, const T2 &y, signop sgn,
return quotient;
}
+/* Compute the greatest common divisor of two numbers A and B using
+ Euclid's algorithm. */
+template <typename T1, typename T2>
+inline WI_BINARY_RESULT (T1, T2)
+wi::gcd (const T1 &a, const T2 &b, signop sgn)
+{
+ T1 x, y, z;
+
+ x = wi::abs (a);
+ y = wi::abs (b);
+
+ while (gt_p (x, 0, sgn))
+ {
+ z = mod_trunc (y, x, sgn);
+ y = x;
+ x = z;
+ }
+
+ return y;
+}
+
/* Compute X / Y, rouding towards 0, and return the remainder.
Treat X and Y as having the signedness given by SGN. Indicate
in *OVERFLOW if the division overflows. */
More information about the Gcc-patches
mailing list