[committed] openmp: Add support for strict modifier on grainsize/num_tasks clauses

Jakub Jelinek jakub@redhat.com
Mon Aug 23 08:25:44 GMT 2021


Hi!

With strict: modifier on these clauses, the standard is explicit about
how many iterations (and which) each generated task of taskloop directive
should contain.  For num_tasks it actually matches what we were already
implementing, but for grainsize it does not (and even violates the old
rule - without strict it requires that the number of iterations (unspecified
which exactly) handled by each generated task is >= grainsize argument and
< 2 * grainsize argument, with strict: it requires that each generated
task handles exactly == grainsize argument iterations, except for the
generated task handling the last iteration which can handles <= grainsize
iterations).

The following patch implements it for C and C++.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2021-08-23  Jakub Jelinek  <jakub@redhat.com>

gcc/
	* tree.h (OMP_CLAUSE_GRAINSIZE_STRICT): Define.
	(OMP_CLAUSE_NUM_TASKS_STRICT): Define.
	* tree-pretty-print.c (dump_omp_clause) <case OMP_CLAUSE_GRAINSIZE,
	case OMP_CLAUSE_NUM_TASKS>: Print strict: modifier.
	* omp-expand.c (expand_task_call): Use GOMP_TASK_FLAG_STRICT in iflags
	if either grainsize or num_tasks clause has the strict modifier.
gcc/c/
	* c-parser.c (c_parser_omp_clause_num_tasks,
	c_parser_omp_clause_grainsize): Parse the optional strict: modifier.
gcc/cp/
	* parser.c (cp_parser_omp_clause_num_tasks,
	cp_parser_omp_clause_grainsize): Parse the optional strict: modifier.
include/
	* gomp-constants.h (GOMP_TASK_FLAG_STRICT): Define.
libgomp/
	* taskloop.c (GOMP_taskloop): Handle GOMP_TASK_FLAG_STRICT.
	* testsuite/libgomp.c-c++-common/taskloop-4.c (main): Fix up comment.
	* testsuite/libgomp.c-c++-common/taskloop-5.c: New test.

--- gcc/tree.h.jj	2021-08-19 11:42:27.458421107 +0200
+++ gcc/tree.h	2021-08-20 18:22:28.743682537 +0200
@@ -1612,6 +1612,11 @@ class auto_suppress_location_wrappers
 #define OMP_CLAUSE_PRIORITY_EXPR(NODE) \
   OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PRIORITY),0)
 
+#define OMP_CLAUSE_GRAINSIZE_STRICT(NODE) \
+  TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_GRAINSIZE))
+#define OMP_CLAUSE_NUM_TASKS_STRICT(NODE) \
+  TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_NUM_TASKS))
+
 /* OpenACC clause expressions  */
 #define OMP_CLAUSE_EXPR(NODE, CLAUSE) \
   OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, CLAUSE), 0)
--- gcc/tree-pretty-print.c.jj	2021-08-17 09:29:41.391205129 +0200
+++ gcc/tree-pretty-print.c	2021-08-20 18:23:32.522804918 +0200
@@ -1066,6 +1066,8 @@ dump_omp_clause (pretty_printer *pp, tre
 
     case OMP_CLAUSE_GRAINSIZE:
       pp_string (pp, "grainsize(");
+      if (OMP_CLAUSE_GRAINSIZE_STRICT (clause))
+	pp_string (pp, "strict:");
       dump_generic_node (pp, OMP_CLAUSE_GRAINSIZE_EXPR (clause),
 			 spc, flags, false);
       pp_right_paren (pp);
@@ -1073,6 +1075,8 @@ dump_omp_clause (pretty_printer *pp, tre
 
     case OMP_CLAUSE_NUM_TASKS:
       pp_string (pp, "num_tasks(");
+      if (OMP_CLAUSE_NUM_TASKS_STRICT (clause))
+	pp_string (pp, "strict:");
       dump_generic_node (pp, OMP_CLAUSE_NUM_TASKS_EXPR (clause),
 			 spc, flags, false);
       pp_right_paren (pp);
--- gcc/omp-expand.c.jj	2021-08-17 09:29:41.398205034 +0200
+++ gcc/omp-expand.c	2021-08-20 18:49:35.779449914 +0200
@@ -791,13 +791,19 @@ expand_task_call (struct omp_region *reg
       tree tclauses = gimple_omp_for_clauses (g);
       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
       if (num_tasks)
-	num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
+	{
+	  if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
+	    iflags |= GOMP_TASK_FLAG_STRICT;
+	  num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
+	}
       else
 	{
 	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
 	  if (num_tasks)
 	    {
 	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
+	      if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
+		iflags |= GOMP_TASK_FLAG_STRICT;
 	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
 	    }
 	  else
--- gcc/c/c-parser.c.jj	2021-08-20 11:36:30.964244616 +0200
+++ gcc/c/c-parser.c	2021-08-20 18:33:52.145278707 +0200
@@ -13786,7 +13786,10 @@ c_parser_omp_clause_num_threads (c_parse
 }
 
 /* OpenMP 4.5:
-   num_tasks ( expression ) */
+   num_tasks ( expression )
+
+   OpenMP 5.1:
+   num_tasks ( strict : expression ) */
 
 static tree
 c_parser_omp_clause_num_tasks (c_parser *parser, tree list)
@@ -13795,6 +13798,17 @@ c_parser_omp_clause_num_tasks (c_parser
   matching_parens parens;
   if (parens.require_open (parser))
     {
+      bool strict = false;
+      if (c_parser_next_token_is (parser, CPP_NAME)
+	  && c_parser_peek_2nd_token (parser)->type == CPP_COLON
+	  && strcmp (IDENTIFIER_POINTER (c_parser_peek_token (parser)->value),
+		     "strict") == 0)
+	{
+	  strict = true;
+	  c_parser_consume_token (parser);
+	  c_parser_consume_token (parser);
+	}
+
       location_t expr_loc = c_parser_peek_token (parser)->location;
       c_expr expr = c_parser_expr_no_commas (parser, NULL);
       expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true);
@@ -13824,6 +13838,7 @@ c_parser_omp_clause_num_tasks (c_parser
 
       c = build_omp_clause (num_tasks_loc, OMP_CLAUSE_NUM_TASKS);
       OMP_CLAUSE_NUM_TASKS_EXPR (c) = t;
+      OMP_CLAUSE_NUM_TASKS_STRICT (c) = strict;
       OMP_CLAUSE_CHAIN (c) = list;
       list = c;
     }
@@ -13832,7 +13847,10 @@ c_parser_omp_clause_num_tasks (c_parser
 }
 
 /* OpenMP 4.5:
-   grainsize ( expression ) */
+   grainsize ( expression )
+
+   OpenMP 5.1:
+   grainsize ( strict : expression ) */
 
 static tree
 c_parser_omp_clause_grainsize (c_parser *parser, tree list)
@@ -13841,6 +13859,17 @@ c_parser_omp_clause_grainsize (c_parser
   matching_parens parens;
   if (parens.require_open (parser))
     {
+      bool strict = false;
+      if (c_parser_next_token_is (parser, CPP_NAME)
+	  && c_parser_peek_2nd_token (parser)->type == CPP_COLON
+	  && strcmp (IDENTIFIER_POINTER (c_parser_peek_token (parser)->value),
+		     "strict") == 0)
+	{
+	  strict = true;
+	  c_parser_consume_token (parser);
+	  c_parser_consume_token (parser);
+	}
+
       location_t expr_loc = c_parser_peek_token (parser)->location;
       c_expr expr = c_parser_expr_no_commas (parser, NULL);
       expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true);
@@ -13870,6 +13899,7 @@ c_parser_omp_clause_grainsize (c_parser
 
       c = build_omp_clause (grainsize_loc, OMP_CLAUSE_GRAINSIZE);
       OMP_CLAUSE_GRAINSIZE_EXPR (c) = t;
+      OMP_CLAUSE_GRAINSIZE_STRICT (c) = strict;
       OMP_CLAUSE_CHAIN (c) = list;
       list = c;
     }
--- gcc/cp/parser.c.jj	2021-08-20 11:36:30.968244560 +0200
+++ gcc/cp/parser.c	2021-08-20 18:46:20.945085317 +0200
@@ -37237,7 +37237,10 @@ cp_parser_omp_clause_num_threads (cp_par
 }
 
 /* OpenMP 4.5:
-   num_tasks ( expression ) */
+   num_tasks ( expression )
+
+   OpenMP 5.1:
+   num_tasks ( strict : expression ) */
 
 static tree
 cp_parser_omp_clause_num_tasks (cp_parser *parser, tree list,
@@ -37249,6 +37252,19 @@ cp_parser_omp_clause_num_tasks (cp_parse
   if (!parens.require_open (parser))
     return list;
 
+  bool strict = false;
+  if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)
+      && cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON))
+    {
+      tree id = cp_lexer_peek_token (parser->lexer)->u.value;
+      if (!strcmp (IDENTIFIER_POINTER (id), "strict"))
+	{
+	  strict = true;
+	  cp_lexer_consume_token (parser->lexer);
+	  cp_lexer_consume_token (parser->lexer);
+	}
+    }
+
   t = cp_parser_assignment_expression (parser);
 
   if (t == error_mark_node
@@ -37262,13 +37278,17 @@ cp_parser_omp_clause_num_tasks (cp_parse
 
   c = build_omp_clause (location, OMP_CLAUSE_NUM_TASKS);
   OMP_CLAUSE_NUM_TASKS_EXPR (c) = t;
+  OMP_CLAUSE_NUM_TASKS_STRICT (c) = strict;
   OMP_CLAUSE_CHAIN (c) = list;
 
   return c;
 }
 
 /* OpenMP 4.5:
-   grainsize ( expression ) */
+   grainsize ( expression )
+
+   OpenMP 5.1:
+   grainsize ( strict : expression ) */
 
 static tree
 cp_parser_omp_clause_grainsize (cp_parser *parser, tree list,
@@ -37280,6 +37300,19 @@ cp_parser_omp_clause_grainsize (cp_parse
   if (!parens.require_open (parser))
     return list;
 
+  bool strict = false;
+  if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)
+      && cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON))
+    {
+      tree id = cp_lexer_peek_token (parser->lexer)->u.value;
+      if (!strcmp (IDENTIFIER_POINTER (id), "strict"))
+	{
+	  strict = true;
+	  cp_lexer_consume_token (parser->lexer);
+	  cp_lexer_consume_token (parser->lexer);
+	}
+    }
+
   t = cp_parser_assignment_expression (parser);
 
   if (t == error_mark_node
@@ -37293,6 +37326,7 @@ cp_parser_omp_clause_grainsize (cp_parse
 
   c = build_omp_clause (location, OMP_CLAUSE_GRAINSIZE);
   OMP_CLAUSE_GRAINSIZE_EXPR (c) = t;
+  OMP_CLAUSE_GRAINSIZE_STRICT (c) = strict;
   OMP_CLAUSE_CHAIN (c) = list;
 
   return c;
--- include/gomp-constants.h.jj	2021-01-16 22:52:33.673413185 +0100
+++ include/gomp-constants.h	2021-08-20 18:17:39.316666260 +0200
@@ -222,6 +222,7 @@ enum gomp_map_kind
 #define GOMP_TASK_FLAG_NOGROUP		(1 << 11)
 #define GOMP_TASK_FLAG_REDUCTION	(1 << 12)
 #define GOMP_TASK_FLAG_DETACH		(1 << 13)
+#define GOMP_TASK_FLAG_STRICT		(1 << 14)
 
 /* GOMP_target{_ext,update_ext,enter_exit_data} flags argument.  */
 #define GOMP_TARGET_FLAG_NOWAIT		(1 << 0)
--- libgomp/taskloop.c.jj	2021-05-11 23:40:52.744338169 +0200
+++ libgomp/taskloop.c	2021-08-22 14:37:56.859984138 +0200
@@ -97,6 +97,7 @@ GOMP_taskloop (void (*fn) (void *), void
 #endif
 
   TYPE task_step = step;
+  TYPE nfirst_task_step = step;
   unsigned long nfirst = n;
   if (flags & GOMP_TASK_FLAG_GRAINSIZE)
     {
@@ -109,7 +110,22 @@ GOMP_taskloop (void (*fn) (void *), void
       if (num_tasks != ndiv)
 	num_tasks = ~0UL;
 #endif
-      if (num_tasks <= 1)
+      if ((flags & GOMP_TASK_FLAG_STRICT)
+	  && num_tasks != ~0ULL)
+	{
+	  UTYPE mod = n % grainsize;
+	  task_step = (TYPE) grainsize * step;
+	  if (mod)
+	    {
+	      num_tasks++;
+	      nfirst_task_step = (TYPE) mod * step;
+	      if (num_tasks == 1)
+		task_step = nfirst_task_step;
+	      else
+		nfirst = num_tasks - 2;
+	    }
+	}
+      else if (num_tasks <= 1)
 	{
 	  num_tasks = 1;
 	  task_step = end - start;
@@ -124,6 +140,7 @@ GOMP_taskloop (void (*fn) (void *), void
 	  task_step = (TYPE) grainsize * step;
 	  if (mul != n)
 	    {
+	      nfirst_task_step = task_step;
 	      task_step += step;
 	      nfirst = n - mul - 1;
 	    }
@@ -135,6 +152,7 @@ GOMP_taskloop (void (*fn) (void *), void
 	  task_step = (TYPE) div * step;
 	  if (mod)
 	    {
+	      nfirst_task_step = task_step;
 	      task_step += step;
 	      nfirst = mod - 1;
 	    }
@@ -153,6 +171,7 @@ GOMP_taskloop (void (*fn) (void *), void
 	  task_step = (TYPE) div * step;
 	  if (mod)
 	    {
+	      nfirst_task_step = task_step;
 	      task_step += step;
 	      nfirst = mod - 1;
 	    }
@@ -225,7 +244,7 @@ GOMP_taskloop (void (*fn) (void *), void
 	      start += task_step;
 	      ((TYPE *)arg)[1] = start;
 	      if (i == nfirst)
-		task_step -= step;
+		task_step = nfirst_task_step;
 	      fn (arg);
 	      arg += arg_size;
 	      if (!priority_queue_empty_p (&task[i].children_queue,
@@ -258,7 +277,7 @@ GOMP_taskloop (void (*fn) (void *), void
 	    start += task_step;
 	    ((TYPE *)data)[1] = start;
 	    if (i == nfirst)
-	      task_step -= step;
+	      task_step = nfirst_task_step;
 	    fn (data);
 	    if (!priority_queue_empty_p (&task.children_queue,
 					 MEMMODEL_RELAXED))
@@ -303,7 +322,7 @@ GOMP_taskloop (void (*fn) (void *), void
 	  start += task_step;
 	  ((TYPE *)arg)[1] = start;
 	  if (i == nfirst)
-	    task_step -= step;
+	    task_step = nfirst_task_step;
 	  thr->task = parent;
 	  task->kind = GOMP_TASK_WAITING;
 	  task->fn = fn;
--- libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c.jj	2020-01-12 11:54:39.029373941 +0100
+++ libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c	2021-08-20 19:19:27.613993520 +0200
@@ -85,7 +85,8 @@ main ()
 	if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters) != 7
 	    || ntasks != 1 || min_iters != 7 || max_iters != 7)
 	  __builtin_abort ();
-	/* If num_tasks is present, # of task loop iters is min (# of loop iters, num_tasks).  */
+	/* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks)
+	   and each task has at least one iteration.  */
 	if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters) != 54
 	    || ntasks != 9)
 	  __builtin_abort ();
--- libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c.jj	2021-08-20 18:58:21.594313604 +0200
+++ libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c	2021-08-22 14:14:55.859105770 +0200
@@ -0,0 +1,135 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+int u[64], v, w[64];
+
+__attribute__((noinline, noclone)) int
+test (int a, int b, int c, int d, void (*fn) (int, int, int, int),
+      int *num_tasks, int *min_iters, int *max_iters, int *sep)
+{
+  int i, j, t = 0;
+  __builtin_memset (u, 0, sizeof u);
+  v = 0;
+  fn (a, b, c, d);
+  *min_iters = 0;
+  *max_iters = 0;
+  *num_tasks = v;
+  *sep = v;
+  if (v)
+    {
+      *min_iters = u[0];
+      *max_iters = u[0];
+      t = u[0];
+      for (i = 1; i < v; i++)
+	{
+	  if (*min_iters > u[i])
+	    *min_iters = u[i];
+	  if (*max_iters < u[i])
+	    *max_iters = u[i];
+	  t += u[i];
+	}
+      if (*min_iters != *max_iters)
+	{
+	  for (i = 0; i < v - 1; i++)
+	    {
+	      int min_idx = i;
+	      for (j = i + 1; j < v; j++)
+		if (w[min_idx] > w[j])
+		  min_idx = j;
+	      if (min_idx != i)
+		{
+		  int tem = u[i];
+		  u[i] = u[min_idx];
+		  u[min_idx] = tem;
+		  tem = w[i];
+		  w[i] = w[min_idx];
+		  w[min_idx] = tem;
+		}
+	    }
+	  if (u[0] != *max_iters)
+	    __builtin_abort ();
+	  for (i = 1; i < v; i++)
+	    if (u[i] != u[i - 1])
+	      {
+		if (*sep != v || u[i] != *min_iters)
+		  __builtin_abort ();
+		*sep = i;
+	      }
+	}
+    }
+  return t;
+}
+
+void
+grainsize (int a, int b, int c, int d)
+{
+  int i, j = 0, k = 0;
+  #pragma omp taskloop firstprivate (j, k) grainsize(strict:d)
+  for (i = a; i < b; i += c)
+    {
+      if (j == 0)
+	{
+	  #pragma omp atomic capture
+	    k = v++;
+	  if (k >= 64)
+	    __builtin_abort ();
+	  w[k] = i;
+	}
+      u[k] = ++j;
+    }
+}
+
+void
+num_tasks (int a, int b, int c, int d)
+{
+  int i, j = 0, k = 0;
+  #pragma omp taskloop firstprivate (j, k) num_tasks(strict:d)
+  for (i = a; i < b; i += c)
+    {
+      if (j == 0)
+	{
+	  #pragma omp atomic capture
+	    k = v++;
+	  if (k >= 64)
+	    __builtin_abort ();
+	  w[k] = i;
+	}
+      u[k] = ++j;
+    }
+}
+
+int
+main ()
+{
+  #pragma omp parallel
+    #pragma omp single
+      {
+	int min_iters, max_iters, ntasks, sep;
+	/* If grainsize is present and has strict modifier, # of task loop iters is == grainsize,
+	   except that it can be smaller on the last task.  */
+	if (test (0, 79, 1, 17, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 79
+	    || ntasks != 5 || min_iters != 11 || max_iters != 17 || sep != 4)
+	  __builtin_abort ();
+	if (test (-49, 2541, 7, 28, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 370
+	    || ntasks != 14 || min_iters != 6 || max_iters != 28 || sep != 13)
+	  __builtin_abort ();
+	if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 7
+	    || ntasks != 1 || min_iters != 7 || max_iters != 7 || sep != 1)
+	  __builtin_abort ();
+	/* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks)
+	   and each task has at least one iteration.  If strict modifier is present,
+	   first set of tasks has ceil (# of loop iters / num_tasks) iterations,
+	   followed by possibly empty set of tasks with floor (# of loop iters / num_tasks)
+	   iterations.  */
+	if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 54
+	    || ntasks != 9 || min_iters != 6 || max_iters != 6 || sep != 9)
+	  __builtin_abort ();
+	if (test (0, 57, 1, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 57
+	    || ntasks != 9 || min_iters != 6 || max_iters != 7 || sep != 3)
+	  __builtin_abort ();
+	if (test (0, 25, 2, 17, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 13
+	    || ntasks != 13 || min_iters != 1 || max_iters != 1 || sep != 13)
+	  __builtin_abort ();
+      }
+  return 0;
+}

	Jakub



More information about the Gcc-patches mailing list