[committed] openmp, v2: Honor OpenMP 5.1 num_teams lower bound
Jakub Jelinek
jakub@redhat.com
Fri Nov 12 11:44:37 GMT 2021
On Thu, Nov 11, 2021 at 08:03:13PM +0100, Jakub Jelinek via Gcc-patches wrote:
> The following patch implements what I've been talking about earlier,
> honor that for explicit num_teams clause we create at least the
> lower-bound (if not specified, upper-bound) teams in the league.
> For host fallback, it still means we only have one thread doing all the
> teams, sequentially one after another.
> For PTX and GCN, I think the new teams-2.c test and maybe teams-4.c too
> will or might fail.
> For these offloads, I think it is ok to remove symbols no longer used
> from libgomp.a.
> If num_teams_lower is bigger than the provided num_blocks or num_workgroups,
> we should arrange for gomp_num_teams_var to be num_teams_lower - 1,
> stop using the %ctaid.x or __builtin_gcn_dim_pos (0) for omp_get_team_num ()
> and instead use for it some .shared var that GOMP_teams4 initializes to
> %ctaid.x or __builtin_gcn_dim_pos (0) when first and for !first
> increment that by num_blocks or num_workgroups each time and only
> return false when we are above num_teams_lower.
> Any help with actually implementing this for the 2 architectures highly
> appreciated.
Testing found many Fortran failures, due to BT_BOOL in fortran actually
not being boolean_type_node.
The following updated version fixes that, bootstrapped/regtested on
x86_64-linux and i686-linux, committed to trunk.
2021-11-12 Jakub Jelinek <jakub@redhat.com>
gcc/
* omp-builtins.def (BUILT_IN_GOMP_TEAMS): Remove.
(BUILT_IN_GOMP_TEAMS4): New.
* builtin-types.def (BT_FN_VOID_UINT_UINT): Remove.
(BT_FN_BOOL_UINT_UINT_UINT_BOOL): New.
* omp-low.c (lower_omp_teams): Use GOMP_teams4 instead of
GOMP_teams, pass to it also num_teams lower-bound expression
or a dup of upper-bound if it is missing and a flag whether
it is the first call or not.
gcc/fortran/
* types.def (BT_FN_VOID_UINT_UINT): Remove.
(BT_FN_BOOL_UINT_UINT_UINT_BOOL): New.
libgomp/
* libgomp_g.h (GOMP_teams4): Declare.
* libgomp.map (GOMP_5.1): Export GOMP_teams4.
* target.c (GOMP_teams4): New function.
* config/nvptx/target.c (GOMP_teams): Remove.
(GOMP_teams4): New function.
* config/gcn/target.c (GOMP_teams): Remove.
(GOMP_teams4): New function.
* testsuite/libgomp.c/teams-4.c (main): Expect exactly 2
teams instead of <= 2.
* testsuite/libgomp.c-c++-common/teams-2.c: New test.
--- gcc/omp-builtins.def.jj 2021-08-20 11:36:30.961244658 +0200
+++ gcc/omp-builtins.def 2021-11-11 17:53:44.092433139 +0100
@@ -442,8 +442,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_U
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA,
"GOMP_target_enter_exit_data",
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR, ATTR_NOTHROW_LIST)
-DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS, "GOMP_teams",
- BT_FN_VOID_UINT_UINT, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS4, "GOMP_teams4",
+ BT_FN_BOOL_UINT_UINT_UINT_BOOL, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS_REG, "GOMP_teams_reg",
BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER,
--- gcc/builtin-types.def.jj 2021-02-04 18:15:05.253113955 +0100
+++ gcc/builtin-types.def 2021-11-11 17:54:12.693023370 +0100
@@ -489,7 +489,6 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT
DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
BT_CONST_VOLATILE_PTR)
DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_INT_BOOL, BT_BOOL, BT_INT, BT_BOOL)
-DEF_FUNCTION_TYPE_2 (BT_FN_VOID_UINT_UINT, BT_VOID, BT_UINT, BT_UINT)
DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_PTR, BT_UINT, BT_UINT, BT_PTR)
DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_CONST_PTR, BT_UINT, BT_UINT, BT_CONST_PTR)
DEF_FUNCTION_TYPE_2 (BT_FN_PTR_CONST_PTR_SIZE, BT_PTR, BT_CONST_PTR, BT_SIZE)
@@ -680,6 +679,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULL
BT_PTR_ULONGLONG)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR,
BT_INT, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_UINT_UINT_BOOL,
+ BT_BOOL, BT_UINT, BT_UINT, BT_UINT, BT_BOOL)
DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING,
--- gcc/omp-low.c.jj 2021-11-11 14:35:37.633348092 +0100
+++ gcc/omp-low.c 2021-11-12 11:26:58.872919086 +0100
@@ -13902,14 +13902,24 @@ lower_omp_teams (gimple_stmt_iterator *g
tree num_teams = omp_find_clause (gimple_omp_teams_clauses (teams_stmt),
OMP_CLAUSE_NUM_TEAMS);
+ tree num_teams_lower = NULL_TREE;
if (num_teams == NULL_TREE)
num_teams = build_int_cst (unsigned_type_node, 0);
else
{
+ num_teams_lower = OMP_CLAUSE_NUM_TEAMS_LOWER_EXPR (num_teams);
+ if (num_teams_lower)
+ {
+ num_teams_lower = fold_convert (unsigned_type_node, num_teams_lower);
+ gimplify_expr (&num_teams_lower, &bind_body, NULL, is_gimple_val,
+ fb_rvalue);
+ }
num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
num_teams = fold_convert (unsigned_type_node, num_teams);
gimplify_expr (&num_teams, &bind_body, NULL, is_gimple_val, fb_rvalue);
}
+ if (num_teams_lower == NULL_TREE)
+ num_teams_lower = num_teams;
tree thread_limit = omp_find_clause (gimple_omp_teams_clauses (teams_stmt),
OMP_CLAUSE_THREAD_LIMIT);
if (thread_limit == NULL_TREE)
@@ -13921,6 +13931,30 @@ lower_omp_teams (gimple_stmt_iterator *g
gimplify_expr (&thread_limit, &bind_body, NULL, is_gimple_val,
fb_rvalue);
}
+ location_t loc = gimple_location (teams_stmt);
+ tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS4);
+ tree rettype = TREE_TYPE (TREE_TYPE (decl));
+ tree first = create_tmp_var (rettype);
+ gimple_seq_add_stmt (&bind_body,
+ gimple_build_assign (first, build_one_cst (rettype)));
+ tree llabel = create_artificial_label (loc);
+ gimple_seq_add_stmt (&bind_body, gimple_build_label (llabel));
+ gimple *call
+ = gimple_build_call (decl, 4, num_teams_lower, num_teams, thread_limit,
+ first);
+ gimple_set_location (call, loc);
+ tree temp = create_tmp_var (rettype);
+ gimple_call_set_lhs (call, temp);
+ gimple_seq_add_stmt (&bind_body, call);
+
+ tree tlabel = create_artificial_label (loc);
+ tree flabel = create_artificial_label (loc);
+ gimple *cond = gimple_build_cond (NE_EXPR, temp, build_zero_cst (rettype),
+ tlabel, flabel);
+ gimple_seq_add_stmt (&bind_body, cond);
+ gimple_seq_add_stmt (&bind_body, gimple_build_label (tlabel));
+ gimple_seq_add_stmt (&bind_body,
+ gimple_build_assign (first, build_zero_cst (rettype)));
lower_rec_input_clauses (gimple_omp_teams_clauses (teams_stmt),
&bind_body, &dlist, ctx, NULL);
@@ -13929,17 +13963,13 @@ lower_omp_teams (gimple_stmt_iterator *g
NULL, ctx);
gimple_seq_add_stmt (&bind_body, teams_stmt);
- location_t loc = gimple_location (teams_stmt);
- tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS);
- gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit);
- gimple_set_location (call, loc);
- gimple_seq_add_stmt (&bind_body, call);
-
gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt));
gimple_omp_set_body (teams_stmt, NULL);
gimple_seq_add_seq (&bind_body, olist);
gimple_seq_add_seq (&bind_body, dlist);
gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true));
+ gimple_seq_add_stmt (&bind_body, gimple_build_goto (llabel));
+ gimple_seq_add_stmt (&bind_body, gimple_build_label (flabel));
gimple_bind_set_body (bind, bind_body);
pop_gimplify_context (bind);
--- gcc/fortran/types.def.jj 2021-08-20 11:36:30.969244547 +0200
+++ gcc/fortran/types.def 2021-11-11 17:54:35.838691751 +0100
@@ -117,7 +117,6 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT
DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
BT_CONST_VOLATILE_PTR)
DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_INT_BOOL, BT_BOOL, BT_INT, BT_BOOL)
-DEF_FUNCTION_TYPE_2 (BT_FN_VOID_UINT_UINT, BT_VOID, BT_UINT, BT_UINT)
DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_PTRMODE,
BT_VOID, BT_PTR, BT_PTRMODE)
DEF_FUNCTION_TYPE_2 (BT_FN_VOID_CONST_PTR_SIZE, BT_VOID, BT_CONST_PTR, BT_SIZE)
@@ -173,6 +172,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULL
BT_PTR_ULONGLONG)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR,
BT_INT, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_UINT_UINT_BOOL,
+ BT_BOOL, BT_UINT, BT_UINT, BT_UINT, BT_BOOL)
DEF_FUNCTION_TYPE_5 (BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT,
--- libgomp/libgomp_g.h.jj 2021-08-20 11:36:30.970244532 +0200
+++ libgomp/libgomp_g.h 2021-11-11 17:52:35.342418147 +0100
@@ -355,6 +355,7 @@ extern void GOMP_target_enter_exit_data
unsigned short *, unsigned int,
void **);
extern void GOMP_teams (unsigned int, unsigned int);
+extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool);
/* teams.c */
--- libgomp/libgomp.map.jj 2021-10-11 12:20:21.926063118 +0200
+++ libgomp/libgomp.map 2021-11-11 19:45:27.472110004 +0100
@@ -399,6 +399,7 @@ GOMP_5.1 {
GOMP_error;
GOMP_scope_start;
GOMP_warning;
+ GOMP_teams4;
} GOMP_5.0.1;
OACC_2.0 {
--- libgomp/target.c.jj 2021-10-13 09:55:49.777781910 +0200
+++ libgomp/target.c 2021-11-11 18:00:17.465797133 +0100
@@ -3088,6 +3088,32 @@ GOMP_teams (unsigned int num_teams, unsi
(void) num_teams;
}
+bool
+GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
+ unsigned int thread_limit, bool first)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ if (first)
+ {
+ if (thread_limit)
+ {
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->thread_limit_var
+ = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
+ }
+ (void) num_teams_high;
+ if (num_teams_low == 0)
+ num_teams_low = 1;
+ thr->num_teams = num_teams_low - 1;
+ thr->team_num = 0;
+ }
+ else if (thr->team_num == thr->num_teams)
+ return false;
+ else
+ ++thr->team_num;
+ return true;
+}
+
void *
omp_target_alloc (size_t size, int device_num)
{
--- libgomp/config/nvptx/target.c.jj 2021-05-26 11:28:42.064386868 +0200
+++ libgomp/config/nvptx/target.c 2021-11-11 19:16:46.368611557 +0100
@@ -26,9 +26,12 @@
#include "libgomp.h"
#include <limits.h>
-void
-GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
+bool
+GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
+ unsigned int thread_limit, bool first)
{
+ if (!first)
+ return false;
if (thread_limit)
{
struct gomp_task_icv *icv = gomp_icv (true);
@@ -38,14 +41,15 @@ GOMP_teams (unsigned int num_teams, unsi
unsigned int num_blocks, block_id;
asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks));
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
- if (!num_teams || num_teams >= num_blocks)
- num_teams = num_blocks;
- else if (block_id >= num_teams)
- {
- gomp_free_thread (nvptx_thrs);
- asm ("exit;");
- }
- gomp_num_teams_var = num_teams - 1;
+ /* FIXME: If num_teams_lower > num_blocks, we want to loop multiple
+ times for some CTAs. */
+ (void) num_teams_lower;
+ if (!num_teams_upper || num_teams_upper >= num_blocks)
+ num_teams_upper = num_blocks;
+ else if (block_id >= num_teams_upper)
+ return false;
+ gomp_num_teams_var = num_teams_upper - 1;
+ return true;
}
int
--- libgomp/config/gcn/target.c.jj 2021-05-26 11:28:42.064386868 +0200
+++ libgomp/config/gcn/target.c 2021-11-11 19:18:35.873053653 +0100
@@ -26,9 +26,12 @@
#include "libgomp.h"
#include <limits.h>
-void
-GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
+bool
+GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
+ unsigned int thread_limit, bool first)
{
+ if (!first)
+ return false;
if (thread_limit)
{
struct gomp_task_icv *icv = gomp_icv (true);
@@ -38,14 +41,15 @@ GOMP_teams (unsigned int num_teams, unsi
unsigned int num_workgroups, workgroup_id;
num_workgroups = __builtin_gcn_dim_size (0);
workgroup_id = __builtin_gcn_dim_pos (0);
- if (!num_teams || num_teams >= num_workgroups)
- num_teams = num_workgroups;
- else if (workgroup_id >= num_teams)
- {
- gomp_free_thread (gcn_thrs ());
- exit (0);
- }
- gomp_num_teams_var = num_teams - 1;
+ /* FIXME: If num_teams_lower > num_workgroups, we want to loop
+ multiple times at least for some workgroups. */
+ (void) num_teams_lower;
+ if (!num_teams_upper || num_teams_upper >= num_workgroups)
+ num_teams_upper = num_workgroups;
+ else if (workgroup_id >= num_teams_upper)
+ return false;
+ gomp_num_teams_var = num_teams_upper - 1;
+ return true;
}
int
--- libgomp/testsuite/libgomp.c/teams-4.c.jj 2021-11-11 14:35:37.700347127 +0100
+++ libgomp/testsuite/libgomp.c/teams-4.c 2021-11-11 19:27:07.198779107 +0100
@@ -20,7 +20,7 @@ main ()
#pragma omp parallel if (0)
#pragma omp target
#pragma omp teams num_teams (2)
- if (omp_get_num_teams () > 2
+ if (omp_get_num_teams () != 2
|| (unsigned) omp_get_team_num () >= 2U)
abort ();
if (omp_get_num_teams () != 4 || (unsigned) team >= 4U)
--- libgomp/testsuite/libgomp.c-c++-common/teams-2.c.jj 2021-11-11 19:28:33.066557475 +0100
+++ libgomp/testsuite/libgomp.c-c++-common/teams-2.c 2021-11-11 19:44:03.837301688 +0100
@@ -0,0 +1,70 @@
+#include <omp.h>
+#include <stdlib.h>
+
+int
+foo ()
+{
+ return 934;
+}
+
+int
+main ()
+{
+ int a[934] = {};
+ int k, e;
+ #pragma omp target map(a)
+ #pragma omp teams num_teams (foo ())
+ {
+ int i = omp_get_team_num ();
+ if (omp_get_num_teams () != 934
+ || (unsigned) i >= 934U
+ || a[i] != 0)
+ abort ();
+ ++a[i];
+ }
+ #pragma omp target map(a)
+ #pragma omp teams num_teams (foo () - 50 : foo ())
+ {
+ int i = omp_get_team_num ();
+ int j = omp_get_num_teams ();
+ if (j < 884
+ || j > 934
+ || (unsigned) i >= (unsigned) j
+ || a[i] != 1)
+ abort ();
+ ++a[i];
+ }
+ #pragma omp target teams map(a) num_teams (foo () / 2)
+ {
+ int i = omp_get_team_num ();
+ if (omp_get_num_teams () != 467
+ || (unsigned) i >= 467U
+ || a[i] != 2)
+ abort ();
+ ++a[i];
+ }
+ #pragma omp target teams map(a) num_teams (foo () / 2 - 50 : foo () / 2)
+ {
+ int i = omp_get_team_num ();
+ int j = omp_get_num_teams ();
+ if (j < 417
+ || j > 467
+ || (unsigned) i >= (unsigned) j
+ || a[i] != 3)
+ abort ();
+ ++a[i];
+ }
+ e = 4;
+ for (k = 0; k < 934; k++)
+ {
+ if (k >= 417 && k < 467 && a[k] == 3)
+ e = 3;
+ else if (k == 467)
+ e = 2;
+ else if (k >= 884 && a[k] == 1)
+ e = 1;
+ if (a[k] != e)
+ abort ();
+ }
+ return 0;
+}
Jakub
More information about the Gcc-patches
mailing list