This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp, rfc] What can be considered combined paralle (PR libgomp/32468)


Hi!

ATM determine_parallel_type will consider even all cases in
the attached testcase as combined parallel for or sections.
But before doing the inner omp for or sections or after
it the code calls some function and that surely should be
able e.g. to use #pragma omp sections or #pragma omp for.
This patch does some combined parallel detection already before
omp lowering and then just handles easily detectable cases
(where there is no other code in WS_ENTRY_BB resp. PAR_EXIT_BB than
the omp directives).
This patch changes 3 gomp testcases from combined parallels to separate
ones:
gcc.dg/gomp/pr27388-2.c
void
bar (void)
{
  int j = 0;
#pragma omp parallel shared (j)
  {
    j++;
#pragma omp for
    for (j = 0; j < 2; j++)
      baz (j);
  }
}
libgomp.c/omp_matvec.c
/* Create a team of threads and scope variables */
#pragma omp parallel shared(A,b,c,total) private(tid,i)
  {
  tid = omp_get_thread_num();
/* Loop work-sharing construct - distribute rows of matrix */
#pragma omp for private(j)
  for (i=0; i < SIZE; i++)
    {
    }
  }
libgomp.c/shared-1.c
  #pragma omp parallel shared (a, b, A) num_threads (5)
    {
      int i, j;
      #pragma omp atomic
      a += omp_get_num_threads ();
      #pragma omp atomic
      b += omp_get_num_threads ();
      #pragma omp for private (j)
      for (i = 0; i < 10; i++)
        for (j = 0; j < 5; j++)
          A[i][j].y.l[3][3] += 20;
    }


The question is when we can use the combined
parallel workshare routines (GOMP_parallel_{loop,sections}*).

In the strict sense none of the above ones is a combined parallel,
but GOMP_parallel_{loop,sections}* means only that the workshare is
already created when the omp_fn.* function is entered.

So, perhaps we could change determine_parallel_type
in the patch below, allow any statements before #pragma omp {for,sections}
resp. after OMP_RETURN that don't call any non-pure non-const function
(or call only a few selected builtin functions, like __builtin_alloca (),
__builtin_omp_get_num_threads (), __builtin_omp_get_thread_num ()).

Looking at OMP_PARALLEL_COMBINED and setting it up in lower_omp_prallel
if not explicit in the program is IMHO still desirable, as WS_ENTRY_BB
or PAR_EXIT_BB can have e.g. ctors and dtors function calls from combined
parallels and without looking at OMP_PARALLEL_COMBINED we would just
reject them.

2007-06-25  Jakub Jelinek  <jakub@redhat.com>

	PR libgomp/32468
	* omp-low.c (check_combined_parallel): New function.
	(lower_omp_parallel): Call it via walk_stmts, set
	OMP_PARALLEL_COMBINED if appropriate.
	(determine_parallel_type): If OMP_FOR resp. OMP_SECTIONS
	isn't the only statement in WS_ENTRY_BB or OMP_RETURN
	the only one in PAR_EXIT_BB and not OMP_PARALLEL_COMBINED,
	don't consider it as combined parallel.

--- gcc/omp-low.c.jj	2007-06-21 13:38:10.000000000 +0200
+++ gcc/omp-low.c	2007-06-25 19:21:35.000000000 +0200
@@ -385,10 +385,15 @@ determine_parallel_type (struct omp_regi
 
   if (single_succ (par_entry_bb) == ws_entry_bb
       && single_succ (ws_exit_bb) == par_exit_bb
-      && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb))
+      && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb)
+      && (OMP_PARALLEL_COMBINED (last_stmt (par_entry_bb))
+	  || (last_and_only_stmt (ws_entry_bb)
+	      && last_and_only_stmt (par_exit_bb))))
     {
-      tree ws_stmt = last_stmt (region->inner->entry);
+      tree ws_stmt = last_stmt (ws_entry_bb);
 
+      /* Make sure there aren't any side-effects before the OMP directive
+	 in WS_ENTRY_BB and in PAR_EXIT_BB.  */
       if (region->inner->type == OMP_FOR)
 	{
 	  /* If this is a combined parallel loop, we need to determine
@@ -4060,6 +4065,28 @@ lower_omp_for (tree *stmt_p, omp_context
   *stmt_p = new_stmt;
 }
 
+/* Callback for walk_stmts.  Check if *TP only contains OMP_FOR
+   or OMP_PARALLEL.  */
+
+static tree
+check_combined_parallel (tree *tp, int *walk_subtrees, void *data)
+{
+  struct walk_stmt_info *wi = data;
+  int *info = wi->info;
+
+  *walk_subtrees = 0;
+  switch (TREE_CODE (*tp))
+    {
+    case OMP_FOR:
+    case OMP_SECTIONS:
+      *info = *info == 0 ? 1 : -1;
+      break;
+    default:
+      *info = -1;
+      break;
+    }
+  return NULL;
+}
 
 /* Lower the OpenMP parallel directive in *STMT_P.  CTX holds context
    information for the directive.  */
@@ -4077,6 +4104,19 @@ lower_omp_parallel (tree *stmt_p, omp_co
   par_bind = OMP_PARALLEL_BODY (stmt);
   par_body = BIND_EXPR_BODY (par_bind);
   child_fn = ctx->cb.dst_fn;
+  if (!OMP_PARALLEL_COMBINED (stmt))
+    {
+      struct walk_stmt_info wi;
+      int ws_num = 0;
+
+      memset (&wi, 0, sizeof (wi));
+      wi.callback = check_combined_parallel;
+      wi.info = &ws_num;
+      wi.val_only = true;
+      walk_stmts (&wi, &par_bind);
+      if (ws_num == 1)
+	OMP_PARALLEL_COMBINED (stmt) = 1;
+    }
 
   push_gimplify_context ();
 
--- gcc/testsuite/gcc.dg/gomp/pr32468-1.c.jj	2007-06-25 21:04:31.000000000 +0200
+++ gcc/testsuite/gcc.dg/gomp/pr32468-1.c	2007-06-25 21:07:35.000000000 +0200
@@ -0,0 +1,100 @@
+/* PR libgomp/32468 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fopenmp -fdump-tree-ompexp" } */
+
+extern int printf (const char *, ...);
+extern int omp_get_thread_num (void), omp_get_num_threads (void);
+extern int bar (void);
+extern int baz (const char *, ...);
+
+void
+f1 (void)
+{
+#pragma omp parallel
+  {
+    baz ("%d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+    #pragma omp sections
+      {
+	#pragma omp section
+	printf ("section1 %d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+	#pragma omp section
+	printf ("section2 %d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+      }
+  }
+}
+
+void
+f2 (void)
+{
+#pragma omp parallel
+  {
+    #pragma omp sections
+      {
+	#pragma omp section
+	printf ("section1 %d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+	#pragma omp section
+	printf ("section2 %d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+      }
+    baz ("%d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+  }
+}
+
+void
+f3 (void)
+{
+#pragma omp parallel
+  {
+    int bb = bar ();
+    #pragma omp sections
+      {
+	#pragma omp section
+	printf ("section1 %d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+	#pragma omp section
+	printf ("section2 %d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+      }
+  }
+}
+
+void
+f4 (void)
+{
+  int i;
+#pragma omp parallel
+  {
+    baz ("%d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+    #pragma omp for schedule (dynamic, 15)
+    for (i = 0; i < 10000; i++)
+      printf ("section1 %d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+  }
+}
+
+void
+f5 (void)
+{
+  int i;
+#pragma omp parallel
+  {
+    #pragma omp for schedule (dynamic, 15)
+    for (i = 0; i < 10000; i++)
+      printf ("section1 %d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+    baz ("%d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+  }
+}
+
+void
+f6 (void)
+{
+  int i;
+#pragma omp parallel
+  {
+    int bb = bar ();
+    #pragma omp for schedule (runtime)
+    for (i = 0; i < 10000; i++)
+      printf ("section1 %d/%d\n", omp_get_thread_num (), omp_get_num_threads ());
+  }
+}
+
+/* There should not be a GOMP_parallel_{loop,sections}* call.  */
+/* { dg-final { scan-tree-dump-times "GOMP_parallel_loop" 0 "ompexp"} } */
+/* { dg-final { scan-tree-dump-times "GOMP_parallel_sections" 0 "ompexp"} } */
+/* { dg-final { cleanup-tree-dump "ompexp" } } */

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]