This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[committed] Fix #pragma omp simd reductions (PR libgomp/58756)


Hi!

For simd reductions when we force safelen(1) (either user did, or we punt),
my thoughts were that there is just one private var to merge and people
should initialize the reduction var to the right initialization value
already, so we can just copy it over instead of merging.  But as the
testcase shows, we can have nested reductions and at that point it makes a
difference.  On x86_64/i686 this is reproduceable with explicit safelen(1),
on other targets where vectorization is not on by default it is
reproduceable even on the original testcase.

Regtested on x86_64-linux, committed to trunk.

2013-12-16  Jakub Jelinek  <jakub@redhat.com>

	PR libgomp/58756
	* omp-low.c (lower_rec_input_clauses) <case OMP_CLAUSE_REDUCTION>: For
	reductions without placeholder if is_simd, but when not using
	GOMP_SIMD* internal calls, also perform the reduction operation
	on the outer var rather than simple assignment.

	* testsuite/libgomp.c/pr58756.c: New test.

--- gcc/omp-low.c.jj	2013-12-10 08:52:06.000000000 +0100
+++ gcc/omp-low.c	2013-12-16 11:07:20.716421854 +0100
@@ -3565,20 +3565,21 @@ lower_rec_input_clauses (tree clauses, g
 		{
 		  x = omp_reduction_init (c, TREE_TYPE (new_var));
 		  gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
+		  enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
+
+		  /* reduction(-:var) sums up the partial results, so it
+		     acts identically to reduction(+:var).  */
+		  if (code == MINUS_EXPR)
+		    code = PLUS_EXPR;
+
 		  if (is_simd
 		      && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
 						       idx, lane, ivar, lvar))
 		    {
-		      enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
 		      tree ref = build_outer_var_ref (var, ctx);
 
 		      gimplify_assign (unshare_expr (ivar), x, &llist[0]);
 
-		      /* reduction(-:var) sums up the partial results, so it
-			 acts identically to reduction(+:var).  */
-		      if (code == MINUS_EXPR)
-			code = PLUS_EXPR;
-
 		      x = build2 (code, TREE_TYPE (ref), ref, ivar);
 		      ref = build_outer_var_ref (var, ctx);
 		      gimplify_assign (ref, x, &llist[1]);
@@ -3587,8 +3588,13 @@ lower_rec_input_clauses (tree clauses, g
 		    {
 		      gimplify_assign (new_var, x, ilist);
 		      if (is_simd)
-			gimplify_assign (build_outer_var_ref (var, ctx),
-					 new_var, dlist);
+			{
+			  tree ref = build_outer_var_ref (var, ctx);
+
+			  x = build2 (code, TREE_TYPE (ref), ref, new_var);
+			  ref = build_outer_var_ref (var, ctx);
+			  gimplify_assign (ref, x, dlist);
+			}
 		    }
 		}
 	      break;
--- libgomp/testsuite/libgomp.c/pr58756.c.jj	2013-12-16 11:09:55.910617308 +0100
+++ libgomp/testsuite/libgomp.c/pr58756.c	2013-12-16 11:10:25.647462546 +0100
@@ -0,0 +1,58 @@
+/* PR libgomp/58756 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort (void);
+int d[32 * 32];
+
+__attribute__((noinline, noclone)) int
+foo (int a, int b)
+{
+  int j, c = 0;
+  #pragma omp parallel for reduction(+: c)
+    for (j = 0; j < a; j += 32)
+      {
+	int l;
+	#pragma omp simd reduction(+: c) safelen(1)
+	  for (l = 0; l < b; ++l)
+	    c += d[j + l];
+      }
+  return c;
+}
+
+__attribute__((noinline, noclone)) int
+bar (int a)
+{
+  int j, c = 0;
+  #pragma omp parallel for simd reduction(+: c) safelen(1)
+    for (j = 0; j < a; ++j)
+      c += d[j];
+  return c;
+}
+
+__attribute__((noinline)) static int
+baz (int a)
+{
+  int j, c = 0;
+  #pragma omp simd reduction(+: c) safelen(1)
+    for (j = 0; j < a; ++j)
+      c += d[j];
+  return c;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 32 * 32; i++)
+    d[i] = (i & 31);
+  if (foo (32 * 32, 32) != (31 * 32 / 2) * 32)
+    abort ();
+  if (bar (32 * 32) != (31 * 32 / 2) * 32)
+    abort ();
+  if (baz (32 * 32) != (31 * 32 / 2) * 32)
+    abort ();
+  return 0;
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]