[committed] Fix handling of references in reduction(inscan, ...) and inclusive clauses

Jakub Jelinek jakub@redhat.com
Wed Jun 19 08:33:00 GMT 2019


Hi!

References are a nightmare to support :(.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-06-19  Jakub Jelinek  <jakub@redhat.com>

	* omp-low.c (lower_rec_input_clauses): Handle references properly
	in inscan clauses.
	(lower_omp_scan): Likewise.
cp/
	* cp-gimplify.c (cp_genericize_r): Handle OMP_CLAUSE_{IN,EX}CLUSIVE
	like OMP_CLAUSE_SHARED.
testsuite/
	* g++.dg/vect/simd-3.cc: New test.
	* g++.dg/vect/simd-4.cc: New test.
	* g++.dg/vect/simd-5.cc: New test.

--- gcc/omp-low.c.jj	2019-06-17 23:18:53.614850166 +0200
+++ gcc/omp-low.c	2019-06-18 12:21:25.911696625 +0200
@@ -5237,10 +5237,13 @@ lower_rec_input_clauses (tree clauses, g
 			      if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
 				{
 				  tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
-				  x = DECL_VALUE_EXPR (new_var);
-				  SET_DECL_VALUE_EXPR (new_var, nv);
+				  x = DECL_VALUE_EXPR (new_vard);
+				  tree vexpr = nv;
+				  if (new_vard != new_var)
+				    vexpr = build_fold_addr_expr (nv);
+				  SET_DECL_VALUE_EXPR (new_vard, vexpr);
 				  lower_omp (&tseq, ctx);
-				  SET_DECL_VALUE_EXPR (new_var, x);
+				  SET_DECL_VALUE_EXPR (new_vard, x);
 				  gimple_seq_add_seq (ilist, tseq);
 				  OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
 				}
@@ -5366,20 +5369,23 @@ lower_rec_input_clauses (tree clauses, g
 		    {
 		      if (x)
 			{
-			  tree nv = create_tmp_var_raw (TREE_TYPE (new_vard));
+			  tree nv = create_tmp_var_raw (TREE_TYPE (new_var));
 			  gimple_add_tmp_var (nv);
-			  ctx->cb.decl_map->put (new_var, nv);
+			  ctx->cb.decl_map->put (new_vard, nv);
 			  x = lang_hooks.decls.omp_clause_default_ctor
 				(c, nv, build_outer_var_ref (var, ctx));
 			  gimplify_and_add (x, ilist);
 			  if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
 			    {
 			      tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
-			      SET_DECL_VALUE_EXPR (new_var, nv);
-			      DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+			      tree vexpr = nv;
+			      if (new_vard != new_var)
+				vexpr = build_fold_addr_expr (nv);
+			      SET_DECL_VALUE_EXPR (new_vard, vexpr);
+			      DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
 			      lower_omp (&tseq, ctx);
-			      SET_DECL_VALUE_EXPR (new_var, NULL_TREE);
-			      DECL_HAS_VALUE_EXPR_P (new_var) = 0;
+			      SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
+			      DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
 			      gimple_seq_add_seq (ilist, tseq);
 			    }
 			  OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
@@ -5523,10 +5529,12 @@ lower_rec_input_clauses (tree clauses, g
 		      gimplify_assign (ref, x, &llist[1]);
 
 		    }
-		  else if (rvarp == NULL)
+		  else
 		    {
 		      if (omp_is_reference (var) && is_simd)
 			handle_simd_reference (clause_loc, new_vard, ilist);
+		      if (rvarp)
+			break;
 		      gimplify_assign (new_var, x, ilist);
 		      if (is_simd)
 			{
@@ -8586,14 +8594,26 @@ lower_omp_scan (gimple_stmt_iterator *gs
 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
 	    && OMP_CLAUSE_REDUCTION_INSCAN (c))
 	  {
+	    location_t clause_loc = OMP_CLAUSE_LOCATION (c);
 	    tree var = OMP_CLAUSE_DECL (c);
 	    tree new_var = lookup_decl (var, octx);
 	    tree val = new_var;
 	    tree var2 = NULL_TREE;
 	    tree var3 = NULL_TREE;
-	    if (DECL_HAS_VALUE_EXPR_P (new_var))
+	    tree new_vard = new_var;
+	    if (omp_is_reference (var))
+	      {
+		new_var = build_simple_mem_ref_loc (clause_loc, new_var);
+		val = new_var;
+	      }
+	    if (DECL_HAS_VALUE_EXPR_P (new_vard))
 	      {
-		val = DECL_VALUE_EXPR (new_var);
+		val = DECL_VALUE_EXPR (new_vard);
+		if (omp_is_reference (var))
+		  {
+		    gcc_assert (TREE_CODE (val) == ADDR_EXPR);
+		    val = TREE_OPERAND (val, 0);
+		  }
 		if (TREE_CODE (val) == ARRAY_REF
 		    && VAR_P (TREE_OPERAND (val, 0)))
 		  {
@@ -8617,14 +8637,15 @@ lower_omp_scan (gimple_stmt_iterator *gs
 			  var2 = val;
 		      }
 		  }
+		gcc_assert (var2);
 	      }
 	    else
 	      {
 		var2 = build_outer_var_ref (var, octx);
 		if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
 		  {
-		    var3 = maybe_lookup_decl (new_var, octx);
-		    if (var3 == new_var)
+		    var3 = maybe_lookup_decl (new_vard, octx);
+		    if (var3 == new_vard)
 		      var3 = NULL_TREE;
 		  }
 	      }
@@ -8645,18 +8666,22 @@ lower_omp_scan (gimple_stmt_iterator *gs
 		      {
 			/* Otherwise, assign to it the identity element.  */
 			gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
-			tree x = (DECL_HAS_VALUE_EXPR_P (new_var)
-				  ? DECL_VALUE_EXPR (new_var) : NULL_TREE);
 			tree ref = build_outer_var_ref (var, octx);
-			SET_DECL_VALUE_EXPR (new_var, val);
+			tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
+				  ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
+			if (x)
+			  {
+			    if (omp_is_reference (var))
+			      val = build_fold_addr_expr_loc (clause_loc, val);
+			    SET_DECL_VALUE_EXPR (new_vard, val);
+			  }
 			SET_DECL_VALUE_EXPR (placeholder, ref);
 			DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
 			lower_omp (&tseq, octx);
-			SET_DECL_VALUE_EXPR (new_var, x);
+			if (x)
+			  SET_DECL_VALUE_EXPR (new_vard, x);
 			SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
 			DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
-			if (x == NULL_TREE)
-			  DECL_HAS_VALUE_EXPR_P (new_var) = 0;
 			gimple_seq_add_seq (&before, tseq);
 			OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
 		      }
@@ -8664,15 +8689,20 @@ lower_omp_scan (gimple_stmt_iterator *gs
 		else
 		  {
 		    gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
-		    tree x = (DECL_HAS_VALUE_EXPR_P (new_var)
-			      ? DECL_VALUE_EXPR (new_var) : NULL_TREE);
-		    SET_DECL_VALUE_EXPR (new_var, val);
+		    tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
+			      ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
+		    tree vexpr = val;
+		    if (x && omp_is_reference (var))
+		      vexpr = build_fold_addr_expr_loc (clause_loc, val);
+		    if (x)
+		      SET_DECL_VALUE_EXPR (new_vard, vexpr);
 		    SET_DECL_VALUE_EXPR (placeholder, var2);
 		    DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
 		    lower_omp (&tseq, octx);
 		    gimple_seq_add_seq (&before, tseq);
 		    OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
-		    SET_DECL_VALUE_EXPR (new_var, x);
+		    if (x)
+		      SET_DECL_VALUE_EXPR (new_vard, x);
 		    SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
 		    DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
 		    x = lang_hooks.decls.omp_clause_assign_op (c, val, var2);
--- gcc/cp/cp-gimplify.c.jj	2019-06-10 18:17:59.000000000 +0200
+++ gcc/cp/cp-gimplify.c	2019-06-18 11:27:17.376467414 +0200
@@ -1238,6 +1238,8 @@ cp_genericize_r (tree *stmt_p, int *walk
 	case OMP_CLAUSE_FIRSTPRIVATE:
 	case OMP_CLAUSE_COPYIN:
 	case OMP_CLAUSE_COPYPRIVATE:
+	case OMP_CLAUSE_INCLUSIVE:
+	case OMP_CLAUSE_EXCLUSIVE:
 	  /* Don't dereference an invisiref in OpenMP clauses.  */
 	  if (is_invisiref_parm (OMP_CLAUSE_DECL (stmt)))
 	    *walk_subtrees = 0;
--- gcc/testsuite/g++.dg/vect/simd-3.cc.jj	2019-06-18 10:43:04.339992008 +0200
+++ gcc/testsuite/g++.dg/vect/simd-3.cc	2019-06-18 10:45:35.918828020 +0200
@@ -0,0 +1,120 @@
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+int r, a[1024], b[1024], q;
+
+__attribute__((noipa)) void
+foo (int *a, int *b, int &r)
+{
+  #pragma omp simd reduction (inscan, +:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      r += a[i];
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  int &s = q;
+  q = 0;
+  #pragma omp simd reduction (inscan, +:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      s += 2 * a[i];
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, int &r)
+{
+  #pragma omp simd reduction (inscan, +:r) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+    {
+      r += a[i];
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  int &s = q;
+  q = 0;
+  #pragma omp simd reduction (inscan, +:s) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+    {
+      s += 2 * a[i];
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  check_vect ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b, r);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = 25;
+    }
+  if (bar () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -1;
+    }
+  r = 0;
+  baz (a, b, r);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -25;
+    }
+  if (qux () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s)
+	abort ();
+    }
+  return 0;
+}
--- gcc/testsuite/g++.dg/vect/simd-4.cc.jj	2019-06-18 10:44:53.858392831 +0200
+++ gcc/testsuite/g++.dg/vect/simd-4.cc	2019-06-18 10:45:42.131744592 +0200
@@ -0,0 +1,122 @@
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+int r, a[1024], b[1024], q;
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b, int &r)
+{
+  #pragma omp simd reduction (inscan, foo:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      r += a[i];
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  int &s = q;
+  q = 0;
+  #pragma omp simd reduction (inscan, foo:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      s += 2 * a[i];
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, int &r)
+{
+  #pragma omp simd reduction (inscan, foo:r) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+    {
+      r += a[i];
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  int &s = q;
+  q = 0;
+  #pragma omp simd reduction (inscan, foo:s) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+    {
+      s += 2 * a[i];
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  check_vect ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b, r);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = 25;
+    }
+  if (bar () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -1;
+    }
+  r = 0;
+  baz (a, b, r);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -25;
+    }
+  if (qux () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s)
+	abort ();
+    }
+  return 0;
+}
--- gcc/testsuite/g++.dg/vect/simd-5.cc.jj	2019-06-18 12:26:44.124630462 +0200
+++ gcc/testsuite/g++.dg/vect/simd-5.cc	2019-06-18 12:26:38.869714120 +0200
@@ -0,0 +1,153 @@
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+struct S {
+  inline S ();
+  inline ~S ();
+  inline S (const S &);
+  inline S & operator= (const S &);
+  int s;
+};
+
+S::S () : s (0)
+{
+}
+
+S::~S ()
+{
+}
+
+S::S (const S &x)
+{
+  s = x.s;
+}
+
+S &
+S::operator= (const S &x)
+{
+  s = x.s;
+  return *this;
+}
+
+static inline void
+ini (S &x)
+{
+  x.s = 0;
+}
+
+S r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+__attribute__((noipa)) void
+foo (S *a, S *b, S &r)
+{
+  #pragma omp simd reduction (inscan, +:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      r.s += a[i].s;
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) S
+bar (void)
+{
+  S s;
+  #pragma omp simd reduction (inscan, plus:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      s.s += 2 * a[i].s;
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (S *a, S *b, S &r)
+{
+  #pragma omp simd reduction (inscan, +:r) simdlen(1)
+  for (int i = 0; i < 1024; i++)
+    {
+      r.s += a[i].s;
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) S
+qux (void)
+{
+  S s;
+  #pragma omp simd if (0) reduction (inscan, plus:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      s.s += 2 * a[i].s;
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return s;
+}
+
+int
+main ()
+{
+  S s;
+  check_vect ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i].s = i;
+      b[i].s = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b, r);
+  if (r.s != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      s.s += i;
+      if (b[i].s != s.s)
+	abort ();
+      else
+	b[i].s = 25;
+    }
+  if (bar ().s != 1024 * 1023)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s.s += 2 * i;
+      if (b[i].s != s.s)
+	abort ();
+    }
+  r.s = 0;
+  baz (a, b, r);
+  if (r.s != 1024 * 1023 / 2)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s.s += i;
+      if (b[i].s != s.s)
+	abort ();
+      else
+	b[i].s = 25;
+    }
+  if (qux ().s != 1024 * 1023)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s.s += 2 * i;
+      if (b[i].s != s.s)
+	abort ();
+    }
+  return 0;
+}

	Jakub



More information about the Gcc-patches mailing list