This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[committed] Fix #pragma omp simd reductions (PR libgomp/58756)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Mon, 16 Dec 2013 11:39:47 +0100
- Subject: [committed] Fix #pragma omp simd reductions (PR libgomp/58756)
- Authentication-results: sourceware.org; auth=none
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
For simd reductions when we force safelen(1) (either user did, or we punt),
my thoughts were that there is just one private var to merge and people
should initialize the reduction var to the right initialization value
already, so we can just copy it over instead of merging. But as the
testcase shows, we can have nested reductions and at that point it makes a
difference. On x86_64/i686 this is reproduceable with explicit safelen(1),
on other targets where vectorization is not on by default it is
reproduceable even on the original testcase.
Regtested on x86_64-linux, committed to trunk.
2013-12-16 Jakub Jelinek <jakub@redhat.com>
PR libgomp/58756
* omp-low.c (lower_rec_input_clauses) <case OMP_CLAUSE_REDUCTION>: For
reductions without placeholder if is_simd, but when not using
GOMP_SIMD* internal calls, also perform the reduction operation
on the outer var rather than simple assignment.
* testsuite/libgomp.c/pr58756.c: New test.
--- gcc/omp-low.c.jj 2013-12-10 08:52:06.000000000 +0100
+++ gcc/omp-low.c 2013-12-16 11:07:20.716421854 +0100
@@ -3565,20 +3565,21 @@ lower_rec_input_clauses (tree clauses, g
{
x = omp_reduction_init (c, TREE_TYPE (new_var));
gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
+ enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
+
+ /* reduction(-:var) sums up the partial results, so it
+ acts identically to reduction(+:var). */
+ if (code == MINUS_EXPR)
+ code = PLUS_EXPR;
+
if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, max_vf,
idx, lane, ivar, lvar))
{
- enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
tree ref = build_outer_var_ref (var, ctx);
gimplify_assign (unshare_expr (ivar), x, &llist[0]);
- /* reduction(-:var) sums up the partial results, so it
- acts identically to reduction(+:var). */
- if (code == MINUS_EXPR)
- code = PLUS_EXPR;
-
x = build2 (code, TREE_TYPE (ref), ref, ivar);
ref = build_outer_var_ref (var, ctx);
gimplify_assign (ref, x, &llist[1]);
@@ -3587,8 +3588,13 @@ lower_rec_input_clauses (tree clauses, g
{
gimplify_assign (new_var, x, ilist);
if (is_simd)
- gimplify_assign (build_outer_var_ref (var, ctx),
- new_var, dlist);
+ {
+ tree ref = build_outer_var_ref (var, ctx);
+
+ x = build2 (code, TREE_TYPE (ref), ref, new_var);
+ ref = build_outer_var_ref (var, ctx);
+ gimplify_assign (ref, x, dlist);
+ }
}
}
break;
--- libgomp/testsuite/libgomp.c/pr58756.c.jj 2013-12-16 11:09:55.910617308 +0100
+++ libgomp/testsuite/libgomp.c/pr58756.c 2013-12-16 11:10:25.647462546 +0100
@@ -0,0 +1,58 @@
+/* PR libgomp/58756 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort (void);
+int d[32 * 32];
+
+__attribute__((noinline, noclone)) int
+foo (int a, int b)
+{
+ int j, c = 0;
+ #pragma omp parallel for reduction(+: c)
+ for (j = 0; j < a; j += 32)
+ {
+ int l;
+ #pragma omp simd reduction(+: c) safelen(1)
+ for (l = 0; l < b; ++l)
+ c += d[j + l];
+ }
+ return c;
+}
+
+__attribute__((noinline, noclone)) int
+bar (int a)
+{
+ int j, c = 0;
+ #pragma omp parallel for simd reduction(+: c) safelen(1)
+ for (j = 0; j < a; ++j)
+ c += d[j];
+ return c;
+}
+
+__attribute__((noinline)) static int
+baz (int a)
+{
+ int j, c = 0;
+ #pragma omp simd reduction(+: c) safelen(1)
+ for (j = 0; j < a; ++j)
+ c += d[j];
+ return c;
+}
+
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 32 * 32; i++)
+ d[i] = (i & 31);
+ if (foo (32 * 32, 32) != (31 * 32 / 2) * 32)
+ abort ();
+ if (bar (32 * 32) != (31 * 32 / 2) * 32)
+ abort ();
+ if (baz (32 * 32) != (31 * 32 / 2) * 32)
+ abort ();
+ return 0;
+}
Jakub