This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, PR46032] Handle BUILT_IN_GOMP_PARALLEL in ipa-pta


On 30/11/15 14:24, Richard Biener wrote:
On Mon, 30 Nov 2015, Tom de Vries wrote:

On 30/11/15 10:16, Richard Biener wrote:
On Mon, 30 Nov 2015, Tom de Vries wrote:

Hi,

this patch fixes PR46032.

It handles a call:
...
    __builtin_GOMP_parallel (fn, data, num_threads, flags)
...
as:
...
    fn (data)
...
in ipa-pta.

This improves ipa-pta alias analysis in the parallelized function fn, and
allows vectorization in the testcase without a runtime alias test.

Bootstrapped and reg-tested on x86_64.

OK for stage3 trunk?

+             /* Assign the passed argument to the appropriate incoming
+                parameter of the function.  */
+             struct constraint_expr lhs ;
+             lhs = get_function_part_constraint (fi, fi_parm_base + 0);
+             auto_vec<ce_s, 2> rhsc;
+             struct constraint_expr *rhsp;
+             get_constraint_for_rhs (arg, &rhsc);
+             while (rhsc.length () != 0)
+               {
+                 rhsp = &rhsc.last ();
+                 process_constraint (new_constraint (lhs, *rhsp));
+                 rhsc.pop ();
+               }

please use style used elsewhere with

               FOR_EACH_VEC_ELT (rhsc, j, rhsp)
                 process_constraint (new_constraint (lhs, *rhsp));
               rhsc.truncate (0);


That code was copied from find_func_aliases_for_call.
I've factored out the bit that I copied as find_func_aliases_for_call_arg, and
fixed the style there (and dropped 'rhsc.truncate (0)' since AFAIU it's
redundant at the end of a function).

+             /* Parameter passed by value is used.  */
+             lhs = get_function_part_constraint (fi, fi_uses);
+             struct constraint_expr *rhsp;
+             get_constraint_for_address_of (arg, &rhsc);

This isn't correct - you want to use get_constraint_for (arg, &rhsc).
After all rhs is already an ADDR_EXPR.


Can we add an assert somewhere to detect this incorrect usage?

+             FOR_EACH_VEC_ELT (rhsc, j, rhsp)
+               process_constraint (new_constraint (lhs, *rhsp));
+             rhsc.truncate (0);
+
+             /* The caller clobbers what the callee does.  */
+             lhs = get_function_part_constraint (fi, fi_clobbers);
+             rhs = get_function_part_constraint (cfi, fi_clobbers);
+             process_constraint (new_constraint (lhs, rhs));
+
+             /* The caller uses what the callee does.  */
+             lhs = get_function_part_constraint (fi, fi_uses);
+             rhs = get_function_part_constraint (cfi, fi_uses);
+             process_constraint (new_constraint (lhs, rhs));

I don't see why you need those.  The solver should compute these
in even better precision (context sensitive on the call side).

The same is true for the function parameter.  That is, the only
needed part of the patch should be that making sure we see
the "direct" call and assign parameters correctly.


Dropped this bit.

OK for stage3 trunk if bootstrap and reg-test succeeds?

-                        || node->address_taken);
+                        || (node->address_taken
+                            && !node->parallelized_function));

please add a comment here on why this is safe.

Ok with this change.

Updated with comment, committed as attached.

Thanks,
- Tom


Handle BUILT_IN_GOMP_PARALLEL in ipa-pta

2015-11-30  Tom de Vries  <tom@codesourcery.com>

	PR tree-optimization/46032
	* tree-ssa-structalias.c (find_func_aliases_for_call_arg): New function,
	factored out of ...
	(find_func_aliases_for_call): ... here.
	(find_func_aliases_for_builtin_call, find_func_clobbers): Handle
	BUILT_IN_GOMP_PARALLEL.
	(ipa_pta_execute): Same.  Handle node->parallelized_function as a local
	function.

	* gcc.dg/pr46032.c: New test.

	* testsuite/libgomp.c/pr46032.c: New test.

---
 gcc/testsuite/gcc.dg/pr46032.c        | 47 +++++++++++++++++++++++
 gcc/tree-ssa-structalias.c            | 71 ++++++++++++++++++++++++++++-------
 libgomp/testsuite/libgomp.c/pr46032.c | 44 ++++++++++++++++++++++
 3 files changed, 149 insertions(+), 13 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr46032.c b/gcc/testsuite/gcc.dg/pr46032.c
new file mode 100644
index 0000000..b91190e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr46032.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fopenmp -ftree-vectorize -std=c99 -fipa-pta -fdump-tree-vect-all" } */
+
+extern void abort (void);
+
+#define nEvents 1000
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+init (unsigned *results, unsigned *pData)
+{
+  unsigned int i;
+  for (i = 0; i < nEvents; ++i)
+    pData[i] = i % 3;
+}
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+check (unsigned *results)
+{
+  unsigned sum = 0;
+  for (int idx = 0; idx < (int)nEvents; idx++)
+    sum += results[idx];
+
+  if (sum != 1998)
+    abort ();
+}
+
+int
+main (void)
+{
+  unsigned results[nEvents];
+  unsigned pData[nEvents];
+  unsigned coeff = 2;
+
+  init (&results[0], &pData[0]);
+
+#pragma omp parallel for
+  for (int idx = 0; idx < (int)nEvents; idx++)
+    results[idx] = coeff * pData[idx];
+
+  check (&results[0]);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "versioning for alias required" "vect" } } */
+
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index f24ebeb..7f4a8ad 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -4139,6 +4139,24 @@ get_fi_for_callee (gcall *call)
   return get_vi_for_tree (fn);
 }
 
+/* Create constraints for assigning call argument ARG to the incoming parameter
+   INDEX of function FI.  */
+
+static void
+find_func_aliases_for_call_arg (varinfo_t fi, unsigned index, tree arg)
+{
+  struct constraint_expr lhs;
+  lhs = get_function_part_constraint (fi, fi_parm_base + index);
+
+  auto_vec<ce_s, 2> rhsc;
+  get_constraint_for_rhs (arg, &rhsc);
+
+  unsigned j;
+  struct constraint_expr *rhsp;
+  FOR_EACH_VEC_ELT (rhsc, j, rhsp)
+    process_constraint (new_constraint (lhs, *rhsp));
+}
+
 /* Create constraints for the builtin call T.  Return true if the call
    was handled, otherwise false.  */
 
@@ -4488,6 +4506,25 @@ find_func_aliases_for_builtin_call (struct function *fn, gcall *t)
 	    }
 	  return true;
 	}
+      case BUILT_IN_GOMP_PARALLEL:
+	{
+	  /* Handle __builtin_GOMP_parallel (fn, data, num_threads, flags) as
+	     fn (data).  */
+	  if (in_ipa_mode)
+	    {
+	      tree fnarg = gimple_call_arg (t, 0);
+	      gcc_assert (TREE_CODE (fnarg) == ADDR_EXPR);
+	      tree fndecl = TREE_OPERAND (fnarg, 0);
+	      tree arg = gimple_call_arg (t, 1);
+	      gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
+
+	      varinfo_t fi = get_vi_for_tree (fndecl);
+	      find_func_aliases_for_call_arg (fi, 0, arg);
+	      return true;
+	    }
+	  /* Else fallthru to generic call handling.  */
+	  break;
+	}
       /* printf-style functions may have hooks to set pointers to
 	 point to somewhere into the generated string.  Leave them
 	 for a later exercise...  */
@@ -4546,18 +4583,8 @@ find_func_aliases_for_call (struct function *fn, gcall *t)
 	 parameters of the function.  */
       for (j = 0; j < gimple_call_num_args (t); j++)
 	{
-	  struct constraint_expr lhs ;
-	  struct constraint_expr *rhsp;
 	  tree arg = gimple_call_arg (t, j);
-
-	  get_constraint_for_rhs (arg, &rhsc);
-	  lhs = get_function_part_constraint (fi, fi_parm_base + j);
-	  while (rhsc.length () != 0)
-	    {
-	      rhsp = &rhsc.last ();
-	      process_constraint (new_constraint (lhs, *rhsp));
-	      rhsc.pop ();
-	    }
+	  find_func_aliases_for_call_arg (fi, j, arg);
 	}
 
       /* If we are returning a value, assign it to the result.  */
@@ -5036,6 +5063,8 @@ find_func_clobbers (struct function *fn, gimple *origt)
 	  case BUILT_IN_VA_START:
 	  case BUILT_IN_VA_END:
 	    return;
+	  case BUILT_IN_GOMP_PARALLEL:
+	    return;
 	  /* printf-style functions may have hooks to set pointers to
 	     point to somewhere into the generated string.  Leave them
 	     for a later exercise...  */
@@ -7345,6 +7374,18 @@ ipa_pta_execute (void)
 
       gcc_assert (!node->clone_of);
 
+      /* When parallelizing a code region, we split the region off into a
+	 separate function, to be run by several threads in parallel.  So for a
+	 function foo, we split off a region into a function
+	 foo._0 (void *foodata), and replace the region with some variant of a
+	 function call run_on_threads (&foo._0, data).  The '&foo._0' sets the
+	 address_taken bit for function foo._0, which would make it non-local.
+	 But for the purpose of ipa-pta, we can regard the run_on_threads call
+	 as a local call foo._0 (data),  so we ignore address_taken on nodes
+	 with parallelized_function set.  */
+      bool node_address_taken = (node->address_taken
+				 && !node->parallelized_function);
+
       /* For externally visible or attribute used annotated functions use
 	 local constraints for their arguments.
 	 For local functions we see all callers and thus do not need initial
@@ -7352,7 +7393,7 @@ ipa_pta_execute (void)
       bool nonlocal_p = (node->used_from_other_partition
 			 || node->externally_visible
 			 || node->force_output
-			 || node->address_taken);
+			 || node_address_taken);
 
       vi = create_function_info_for (node->decl,
 				     alias_get_name (node->decl), false,
@@ -7504,7 +7545,11 @@ ipa_pta_execute (void)
 		continue;
 
 	      /* Handle direct calls to functions with body.  */
-	      decl = gimple_call_fndecl (stmt);
+	      if (gimple_call_builtin_p (stmt, BUILT_IN_GOMP_PARALLEL))
+		decl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
+	      else
+		decl = gimple_call_fndecl (stmt);
+
 	      if (decl
 		  && (fi = lookup_vi_for_tree (decl))
 		  && fi->is_fn_info)
diff --git a/libgomp/testsuite/libgomp.c/pr46032.c b/libgomp/testsuite/libgomp.c/pr46032.c
new file mode 100644
index 0000000..2178aa7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr46032.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -std=c99 -fipa-pta" } */
+
+
+extern void abort (void);
+
+#define nEvents 1000
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+init (unsigned *results, unsigned *pData)
+{
+  unsigned int i;
+  for (i = 0; i < nEvents; ++i)
+    pData[i] = i % 3;
+}
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+check (unsigned *results)
+{
+  unsigned sum = 0;
+  for (int idx = 0; idx < (int)nEvents; idx++)
+    sum += results[idx];
+
+  if (sum != 1998)
+    abort ();
+}
+
+int
+main (void)
+{
+  unsigned results[nEvents];
+  unsigned pData[nEvents];
+  unsigned coeff = 2;
+
+  init (&results[0], &pData[0]);
+
+#pragma omp parallel for
+  for (int idx = 0; idx < (int)nEvents; idx++)
+    results[idx] = coeff * pData[idx];
+
+  check (&results[0]);
+
+  return 0;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]