[Patch] OpenMP, libgomp, gimple: omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices
Marcel Vollweiler
marcel@codesourcery.com
Wed Aug 3 12:40:13 GMT 2022
Hi Jakub,
This patch was reduced a bit and most of your comments were considered in the
last submission of the environment variable syntax extension patch
(https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599175.html). This patch
also builds on that envvar patch version.
The nteams-var related content was moved from this patch to the envvar patch as
that is closely connected. However, additional testing and testing of copy back
device-specific nteams-var ICV values is still included in this patch together
with the teams-thread-limit-var content.
>> --- a/gcc/gimplify.cc
>> +++ b/gcc/gimplify.cc
>> @@ -13994,7 +13994,7 @@ optimize_target_teams (tree target, gimple_seq *pre_p)
>> struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp;
>>
>> if (teams == NULL_TREE)
>> - num_teams_upper = integer_one_node;
>> + num_teams_upper = integer_minus_two_node;
>
> No, please don't introduce this, it is quite costly to have a GC trees
> like integer_one_node, so they should stay for the most commonly used
> numbers, -2 isn't like that. Just build_int_cst (integer_type_node, -2).
integer_minus_two_node was replaced by "build_int_cst (integer_type_node, -2)".
>
>> --- a/gcc/tree-core.h
>> +++ b/gcc/tree-core.h
>> @@ -642,6 +642,7 @@ enum tree_index {
>> TI_INTEGER_ONE,
>> TI_INTEGER_THREE,
>> TI_INTEGER_MINUS_ONE,
>> + TI_INTEGER_MINUS_TWO,
>> TI_NULL_POINTER,
>>
>> TI_SIZE_ZERO,
>> diff --git a/gcc/tree.cc b/gcc/tree.cc
>> index 8f83ea1..8cb474d 100644
>> --- a/gcc/tree.cc
>> +++ b/gcc/tree.cc
>> @@ -9345,6 +9345,7 @@ build_common_tree_nodes (bool signed_char)
>> integer_one_node = build_int_cst (integer_type_node, 1);
>> integer_three_node = build_int_cst (integer_type_node, 3);
>> integer_minus_one_node = build_int_cst (integer_type_node, -1);
>> + integer_minus_two_node = build_int_cst (integer_type_node, -2);
>>
>> size_zero_node = size_int (0);
>> size_one_node = size_int (1);
>> diff --git a/gcc/tree.h b/gcc/tree.h
>> index cea49a5..1aeb009 100644
>> --- a/gcc/tree.h
>> +++ b/gcc/tree.h
>> @@ -4206,6 +4206,7 @@ tree_strip_any_location_wrapper (tree exp)
>> #define integer_one_node global_trees[TI_INTEGER_ONE]
>> #define integer_three_node global_trees[TI_INTEGER_THREE]
>> #define integer_minus_one_node global_trees[TI_INTEGER_MINUS_ONE]
>> +#define integer_minus_two_node global_trees[TI_INTEGER_MINUS_TWO]
>> #define size_zero_node global_trees[TI_SIZE_ZERO]
>> #define size_one_node global_trees[TI_SIZE_ONE]
>> #define bitsize_zero_node global_trees[TI_BITSIZE_ZERO]
>
> And drop the above 3 hunks.
Removed.
>
>> --- a/libgomp/config/gcn/icv-device.c
>> +++ b/libgomp/config/gcn/icv-device.c
>> @@ -37,6 +37,7 @@ volatile int GOMP_DEFAULT_DEVICE_VAR;
>> volatile int GOMP_MAX_ACTIVE_LEVELS_VAR;
>> volatile omp_proc_bind_t GOMP_BIND_VAR;
>> volatile int GOMP_NTEAMS_VAR;
>> +volatile int GOMP_TEAMS_THREAD_LIMIT_VAR;
>
> I really don't like this copying of individual ICVs one by one to the
> device, copy a struct containing them and access fields in that struct.
I recently changed this in
https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599175.html. So there is
one struct containing all ICVs that are copied from host to the device and back.
>
>> --- a/libgomp/libgomp-plugin.h
>> +++ b/libgomp/libgomp-plugin.h
>> @@ -116,6 +116,7 @@ struct addr_pair
>> #define GOMP_MAX_ACTIVE_LEVELS_VAR __gomp_max_active_levels
>> #define GOMP_BIND_VAR __gomp_bind
>> #define GOMP_NTEAMS_VAR __gomp_nteams
>> +#define GOMP_TEAMS_THREAD_LIMIT_VAR __gomp_teams_thread_limit_var
>
> Likewise here.
Those were all removed.
>
>> @@ -527,13 +538,19 @@ struct gomp_icv_list {
>>
>> extern void *gomp_get_icv_value_ptr (struct gomp_icv_list **list,
>> int device_num);
>> -extern struct gomp_icv_list *gomp_run_sched_var_dev_list;
>> -extern struct gomp_icv_list *gomp_run_sched_chunk_size_dev_list;
>> +extern struct gomp_icv_list* gomp_add_device_specific_icv (int dev_num,
>> + size_t size,
>> + struct gomp_icv_list **list);
>> +extern struct gomp_icv_list *gomp_initial_run_sched_var_dev_list;
>> +extern struct gomp_icv_list *gomp_initial_run_sched_chunk_size_dev_list;
>> +extern struct gomp_icv_list *gomp_initial_max_active_levels_var_dev_list;
>> +extern struct gomp_icv_list *gomp_initial_proc_bind_var_dev_list;
>> +extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_dev_list;
>> +extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_len_dev_list;
>> +extern struct gomp_icv_list *gomp_initial_nteams_var_dev_list;
>> +
>> extern struct gomp_icv_list *gomp_nteams_var_dev_list;
>> -extern struct gomp_icv_list *gomp_max_active_levels_var_dev_list;
>> -extern struct gomp_icv_list *gomp_proc_bind_var_dev_list;
>> -extern struct gomp_icv_list *gomp_proc_bind_var_list_dev_list;
>> -extern struct gomp_icv_list *gomp_proc_bind_var_list_len_dev_list;
>> +extern struct gomp_icv_list *gomp_teams_thread_limit_var_dev_list;
>
> Nor these per-var lists. For a specific device, walk the list with
> all the vars in it, start with the most specific (matching dev number),
> then just dev and then all and fill in from it what is going to be copied.
The above lists were removed and instead one list for device-specific ICV
structs was introduced in the above mentioned patch.
>> --- a/libgomp/plugin/plugin-gcn.c
>> +++ b/libgomp/plugin/plugin-gcn.c
>> @@ -572,7 +572,8 @@ static char *GOMP_ICV_STRINGS[] =
>> XSTRING (GOMP_DYN_VAR),
>> XSTRING (GOMP_MAX_ACTIVE_LEVELS_VAR),
>> XSTRING (GOMP_BIND_VAR),
>> - XSTRING (GOMP_NTEAMS_VAR)
>> + XSTRING (GOMP_NTEAMS_VAR),
>> + XSTRING (GOMP_TEAMS_THREAD_LIMIT_VAR)
>
> Then you don't need to e.g. track the names of the individual vars, just
> one for the whole ICV block.
That array was also removed.
The patch was tested on x86_64-linux with nvptx and
amdgcn offloading without regression.
Marcel
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
-------------- next part --------------
This patch adds support for omp_get_max_teams, omp_set_num_teams, and
omp_{gs}et_teams_thread_limit on offload devices. That includes the usage of
device-specific ICV values (specified as environment variables or changed on a
device). In order to reuse device-specific ICV values, a copy back mechanism is
implemented that copies ICV values back from device to the host.
gcc/ChangeLog:
* gimplify.cc (optimize_target_teams): Set initial num_teams_upper
to "-2" instead of "1" for non-existing num_teams clause in order to
disambiguate from the case of an existing num_teams clause with value 1.
libgomp/ChangeLog:
* config/gcn/icv-device.c (omp_get_teams_thread_limit): Added to
allow processing of device-specific values.
(omp_set_teams_thread_limit): Likewise.
(ialias): Likewise.
* config/nvptx/icv-device.c (omp_get_teams_thread_limit): Likewise.
(omp_set_teams_thread_limit): Likewise.
(ialias): Likewise.
* icv-device.c (omp_get_teams_thread_limit): Likewise.
(ialias): Likewise.
(omp_set_teams_thread_limit): Likewise.
* icv.c (omp_set_teams_thread_limit): Removed.
(omp_get_teams_thread_limit): Likewise.
(ialias): Likewise.
* target.c (get_gomp_offload_icvs): Added teams_thread_limit_var
handling.
(gomp_load_image_to_device): Added a size check for the ICVs struct
variable.
(gomp_copy_back_icvs): New function that is used in GOMP_target_ext to
copy back the ICV values from device to host.
(GOMP_target_ext): Update the number of teams and threads in the kernel
args also considering device-specific values.
* testsuite/libgomp.c-c++-common/icv-4.c: Bugfix.
* testsuite/libgomp.c-c++-common/icv-5.c: Extended.
* testsuite/libgomp.c-c++-common/icv-6.c: Extended.
* testsuite/libgomp.c-c++-common/icv-7.c: Extended.
* testsuite/libgomp.c-c++-common/icv-8.c: Extended.
* testsuite/libgomp.c-c++-common/icv-9.c: New test.
* testsuite/libgomp.fortran/icv-5.f90: New test.
* testsuite/libgomp.fortran/icv-6.f90: New test.
gcc/testsuite/ChangeLog:
* c-c++-common/gomp/target-teams-1.c: Adapt expected values for
num_teams from "1" to "-2" in cases without num_teams clause.
* g++.dg/gomp/target-teams-1.C: Likewise.
* gfortran.dg/gomp/defaultmap-4.f90: Likewise.
* gfortran.dg/gomp/defaultmap-5.f90: Likewise.
* gfortran.dg/gomp/defaultmap-6.f90: Likewise.
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 2ac7ca0..468fc2b 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -14103,7 +14103,7 @@ optimize_target_teams (tree target, gimple_seq *pre_p)
struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp;
if (teams == NULL_TREE)
- num_teams_upper = integer_one_node;
+ num_teams_upper = build_int_cst (integer_type_node, -2);
else
for (c = OMP_TEAMS_CLAUSES (teams); c; c = OMP_CLAUSE_CHAIN (c))
{
diff --git a/gcc/testsuite/c-c++-common/gomp/target-teams-1.c b/gcc/testsuite/c-c++-common/gomp/target-teams-1.c
index 51b8d48..74d60e1 100644
--- a/gcc/testsuite/c-c++-common/gomp/target-teams-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/target-teams-1.c
@@ -81,5 +81,5 @@ foo (int a, int b, long c, long d)
/* { dg-final { scan-tree-dump-times "thread_limit\\(-1\\)" 3 "gimple" } } */
/* { dg-final { scan-tree-dump-times "num_teams\\(0\\)" 4 "gimple" } } */
/* { dg-final { scan-tree-dump-times "thread_limit\\(0\\)" 6 "gimple" } } */
-/* { dg-final { scan-tree-dump-times "num_teams\\(1\\)" 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "num_teams\\(-2\\)" 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times "thread_limit\\(1\\)" 0 "gimple" } } */
diff --git a/gcc/testsuite/g++.dg/gomp/target-teams-1.C b/gcc/testsuite/g++.dg/gomp/target-teams-1.C
index f78a608..29e5597 100644
--- a/gcc/testsuite/g++.dg/gomp/target-teams-1.C
+++ b/gcc/testsuite/g++.dg/gomp/target-teams-1.C
@@ -88,5 +88,5 @@ foo (int a, int b, long c, long d)
/* { dg-final { scan-tree-dump-times "thread_limit\\(-1\\)" 3 "gimple" } } */
/* { dg-final { scan-tree-dump-times "num_teams\\(0\\)" 4 "gimple" } } */
/* { dg-final { scan-tree-dump-times "thread_limit\\(0\\)" 6 "gimple" } } */
-/* { dg-final { scan-tree-dump-times "num_teams\\(1\\)" 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "num_teams\\(-2\\)" 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times "thread_limit\\(1\\)" 0 "gimple" } } */
diff --git a/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90 b/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90
index 7b182b5..9081159 100644
--- a/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90
@@ -141,5 +141,5 @@ end
! { dg-final { scan-tree-dump-times "map\\(to:\\.strxparr \\\[len:" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(to:strxparr \\\[pointer set, len:" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(to:\\.strxp \\\[len:" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) defaultmap\\(alloc\\)" 1 "gimple" } }
-! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) defaultmap\\(alloc:scalar\\) defaultmap\\(to:aggregate\\) defaultmap\\(tofrom:allocatable\\) defaultmap\\(firstprivate:pointer\\)" 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(alloc\\)" 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(alloc:scalar\\) defaultmap\\(to:aggregate\\) defaultmap\\(tofrom:allocatable\\) defaultmap\\(firstprivate:pointer\\)" 1 "gimple" } }
diff --git a/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90 b/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90
index 1391274..91566ed 100644
--- a/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90
@@ -141,5 +141,5 @@ end
! { dg-final { scan-tree-dump-times "map\\(to:strxparr \\\[pointer set, len:" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(to:\\*strxp \\\[len:" 1 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(to:\\.strxp \\\[len:" 1 "gimple" } }
-! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) defaultmap\\(to\\)" 1 "gimple" } }
-! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) defaultmap\\(to:scalar\\) defaultmap\\(tofrom:aggregate\\) defaultmap\\(firstprivate:allocatable\\) defaultmap\\(default:pointer\\)" 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(to\\)" 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(to:scalar\\) defaultmap\\(tofrom:aggregate\\) defaultmap\\(firstprivate:allocatable\\) defaultmap\\(default:pointer\\)" 1 "gimple" } }
diff --git a/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90 b/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90
index 9a81d0f..867e41a 100644
--- a/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90
@@ -101,4 +101,4 @@ end
! { dg-final { scan-tree-dump-times "map\\(to:\\.strxparr \\\[len:" 1 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(to:strxparr \\\[pointer set, len:" 1 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(to:\\.strxp \\\[len:" 1 "gimple" } }
-! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) defaultmap\\(default\\)" 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(default\\)" 1 "gimple" } }
diff --git a/libgomp/config/gcn/icv-device.c b/libgomp/config/gcn/icv-device.c
index bf757ba..eb68881 100644
--- a/libgomp/config/gcn/icv-device.c
+++ b/libgomp/config/gcn/icv-device.c
@@ -81,6 +81,19 @@ omp_set_num_teams (int num_teams)
GOMP_ADDITIONAL_ICVS.nteams = num_teams;
}
+int
+omp_get_teams_thread_limit (void)
+{
+ return GOMP_ADDITIONAL_ICVS.teams_thread_limit;
+}
+
+void
+omp_set_teams_thread_limit (int thread_limit)
+{
+ if (thread_limit >= 0)
+ GOMP_ADDITIONAL_ICVS.teams_thread_limit = thread_limit;
+}
+
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_initial_device)
@@ -89,3 +102,5 @@ ialias (omp_is_initial_device)
ialias (omp_get_device_num)
ialias (omp_get_max_teams)
ialias (omp_set_num_teams)
+ialias (omp_get_teams_thread_limit)
+ialias (omp_set_teams_thread_limit)
diff --git a/libgomp/config/nvptx/icv-device.c b/libgomp/config/nvptx/icv-device.c
index 6f869be..a3f00cf 100644
--- a/libgomp/config/nvptx/icv-device.c
+++ b/libgomp/config/nvptx/icv-device.c
@@ -81,6 +81,19 @@ omp_set_num_teams (int num_teams)
GOMP_ADDITIONAL_ICVS.nteams = num_teams;
}
+int
+omp_get_teams_thread_limit (void)
+{
+ return GOMP_ADDITIONAL_ICVS.teams_thread_limit;
+}
+
+void
+omp_set_teams_thread_limit (int thread_limit)
+{
+ if (thread_limit >= 0)
+ GOMP_ADDITIONAL_ICVS.teams_thread_limit = thread_limit;
+}
+
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_initial_device)
@@ -89,3 +102,5 @@ ialias (omp_is_initial_device)
ialias (omp_get_device_num)
ialias (omp_get_max_teams)
ialias (omp_set_num_teams)
+ialias (omp_get_teams_thread_limit)
+ialias (omp_set_teams_thread_limit)
diff --git a/libgomp/icv-device.c b/libgomp/icv-device.c
index d8acf0e..48607ce 100644
--- a/libgomp/icv-device.c
+++ b/libgomp/icv-device.c
@@ -97,3 +97,20 @@ omp_set_num_teams (int num_teams)
}
ialias (omp_set_num_teams)
+
+int
+omp_get_teams_thread_limit (void)
+{
+ return gomp_teams_thread_limit_var;
+}
+
+ialias (omp_get_teams_thread_limit)
+
+void
+omp_set_teams_thread_limit (int thread_limit)
+{
+ if (thread_limit >= 0)
+ gomp_teams_thread_limit_var = thread_limit;
+}
+
+ialias (omp_set_teams_thread_limit)
diff --git a/libgomp/icv.c b/libgomp/icv.c
index df423c0..9aef91c 100644
--- a/libgomp/icv.c
+++ b/libgomp/icv.c
@@ -148,19 +148,6 @@ omp_get_supported_active_levels (void)
return gomp_supported_active_levels;
}
-void
-omp_set_teams_thread_limit (int thread_limit)
-{
- if (thread_limit >= 0)
- gomp_teams_thread_limit_var = thread_limit;
-}
-
-int
-omp_get_teams_thread_limit (void)
-{
- return gomp_teams_thread_limit_var;
-}
-
int
omp_get_cancellation (void)
{
@@ -261,8 +248,6 @@ ialias (omp_get_thread_limit)
ialias (omp_set_max_active_levels)
ialias (omp_get_max_active_levels)
ialias (omp_get_supported_active_levels)
-ialias (omp_set_teams_thread_limit)
-ialias (omp_get_teams_thread_limit)
ialias (omp_get_cancellation)
ialias (omp_get_proc_bind)
ialias (omp_get_max_task_priority)
diff --git a/libgomp/target.c b/libgomp/target.c
index 1624938..6160deb 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -2143,6 +2143,19 @@ get_gomp_offload_icvs (int dev_num)
new->icvs.nteams = gomp_default_icv_values.nteams_var;
if (dev_x != NULL
+ && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
+ new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
+ else if (dev != NULL
+ && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
+ new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
+ else if (all != NULL
+ && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
+ new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
+ else
+ new->icvs.teams_thread_limit
+ = gomp_default_icv_values.teams_thread_limit_var;
+
+ if (dev_x != NULL
&& gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
new->icvs.default_device = dev_x->icvs.default_device_var;
else if (dev != NULL
@@ -2278,24 +2291,31 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
int dev_num = (int) (devicep - &devices[0]);
struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
size_t var_size = var->end - var->start;
-
+ if (var_size != sizeof (struct gomp_offload_icvs))
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ if (is_register_lock)
+ gomp_mutex_unlock (®ister_lock);
+ gomp_fatal ("offload plugin managed 'icv struct' not of expected "
+ "format");
+ }
/* Copy the ICVs variable to place on device memory, hereby
actually designating its device number into effect. */
gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
var_size, false, NULL);
- splay_tree_key k = &array->key;
- k->host_start = (uintptr_t) icvs;
- k->host_end =
- k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
- k->tgt = tgt;
- k->tgt_offset = var->start;
- k->refcount = REFCOUNT_INFINITY;
- k->dynamic_refcount = 0;
- k->aux = NULL;
- array->left = NULL;
- array->right = NULL;
- splay_tree_insert (&devicep->mem_map, array);
- array++;
+ splay_tree_key k = &array->key;
+ k->host_start = (uintptr_t) icvs;
+ k->host_end =
+ k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
+ k->tgt = tgt;
+ k->tgt_offset = var->start;
+ k->refcount = REFCOUNT_INFINITY;
+ k->dynamic_refcount = 0;
+ k->aux = NULL;
+ array->left = NULL;
+ array->right = NULL;
+ splay_tree_insert (&devicep->mem_map, array);
+ array++;
}
}
@@ -2757,6 +2777,20 @@ clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
return flags;
}
+static void
+gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
+{
+ struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
+ if (item == NULL)
+ return;
+
+ void *host_ptr = &item->icvs;
+ void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
+ if (dev_ptr != NULL)
+ gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
+ sizeof (struct gomp_offload_icvs));
+}
+
/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
and several arguments have been added:
FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
@@ -2789,6 +2823,142 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
size_t tgt_align = 0, tgt_size = 0;
bool fpc_done = false;
+ /* Obtain the original TEAMS and THREADS values from ARGS. */
+ intptr_t orig_teams = 1, orig_threads = 0;
+ size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
+ void **tmpargs = args;
+ while (*tmpargs)
+ {
+ intptr_t id = (intptr_t) *tmpargs++, val;
+ if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+ {
+ val = (intptr_t) *tmpargs++;
+ len = 2;
+ }
+ else
+ {
+ val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
+ len = 1;
+ }
+ num_args += len;
+ if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
+ continue;
+ val = val > INT_MAX ? INT_MAX : val;
+ if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
+ {
+ orig_teams = val;
+ teams_len = len;
+ }
+ else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
+ {
+ orig_threads = val;
+ threads_len = len;
+ }
+ }
+
+ intptr_t new_teams = orig_teams, new_threads = orig_threads;
+ /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
+ ORIG_TEAMS == -1: Teams construct with NUM_TEAMS clause specified, but the
+ value could not be specified. No Change.
+ ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
+ Set device-specific value.
+ ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
+ No change. */
+ if (orig_teams == -2)
+ new_teams = 1;
+ else if (orig_teams == 0)
+ {
+ struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
+ if (item != NULL)
+ new_teams = item->icvs.nteams;
+ }
+ /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
+ region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
+ e.g. a THREAD_LIMIT clause. */
+ if (orig_teams >= -2 && orig_threads == 0)
+ {
+ struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
+ if (item != NULL)
+ new_threads = item->icvs.teams_thread_limit;
+ }
+
+ /* Copy and change the arguments list only if TEAMS or THREADS need to be
+ updated. */
+ void **new_args = args;
+ if (orig_teams != new_teams || orig_threads != new_threads)
+ {
+ size_t tms_len = (orig_teams == new_teams
+ ? teams_len
+ : (new_teams > -(1 << 15) && new_teams < (1 << 15)
+ ? 1 : 2));
+ size_t ths_len = (orig_threads == new_threads
+ ? threads_len
+ : (new_threads > -(1 << 15) && new_threads < (1 << 15)
+ ? 1 : 2));
+ /* One additional item after the last arg must be NULL. */
+ size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
+ + ths_len + 1;
+ new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
+
+ tmpargs = args;
+ void **tmp_new_args = new_args;
+ while (*tmpargs)
+ {
+ intptr_t id = (intptr_t) *tmpargs;
+ if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
+ && orig_teams != new_teams)
+ || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
+ && orig_threads != new_threads))
+ {
+ tmpargs++;
+ if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+ tmpargs++;
+ }
+ else
+ {
+ *tmp_new_args++ = *tmpargs++;
+ if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+ *tmp_new_args++ = *tmpargs++;
+ }
+ }
+
+ if (orig_teams != new_teams)
+ {
+ intptr_t new_val = new_teams;
+ if (tms_len == 1)
+ {
+ new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
+ | GOMP_TARGET_ARG_NUM_TEAMS;
+ *tmp_new_args++ = (void *) new_val;
+ }
+ else
+ {
+ *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
+ | GOMP_TARGET_ARG_NUM_TEAMS);
+ *tmp_new_args++ = (void *) new_val;
+ }
+ }
+
+ if (orig_threads != new_threads)
+ {
+ intptr_t new_val = new_threads;
+ if (ths_len == 1)
+ {
+ new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
+ | GOMP_TARGET_ARG_THREAD_LIMIT;
+ *tmp_new_args++ = (void *) new_val;
+ }
+ else
+ {
+ *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
+ | GOMP_TARGET_ARG_THREAD_LIMIT);
+ *tmp_new_args++ = (void *) new_val;
+ }
+ }
+
+ *tmp_new_args = NULL;
+ }
+
flags = clear_unsupported_flags (devicep, flags);
if (flags & GOMP_TARGET_FLAG_NOWAIT)
@@ -2827,7 +2997,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
&& !thr->task->final_task)
{
gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
- sizes, kinds, flags, depend, args,
+ sizes, kinds, flags, depend, new_args,
GOMP_TARGET_TASK_BEFORE_MAP);
return;
}
@@ -2873,7 +3043,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
tgt_align, tgt_size);
}
}
- gomp_target_fallback (fn, hostaddrs, devicep, args);
+ gomp_target_fallback (fn, hostaddrs, devicep, new_args);
return;
}
@@ -2903,7 +3073,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
}
devicep->run_func (devicep->target_id, fn_addr,
tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
- args);
+ new_args);
if (tgt_vars)
{
htab_clear (refcount_set);
@@ -2911,6 +3081,12 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
}
if (refcount_set)
htab_free (refcount_set);
+
+ /* Copy back ICVs from device to host.
+ HOST_PTR is expected to exist since it was added in
+ gomp_load_image_to_device if not already available. */
+ gomp_copy_back_icvs (devicep, device);
+
}
/* Host fallback for GOMP_target_data{,_ext} routines. */
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-4.c b/libgomp/testsuite/libgomp.c-c++-common/icv-4.c
index b987a33..9da0d63 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/icv-4.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-4.c
@@ -16,7 +16,7 @@ main ()
}
else
omp_set_num_teams (6);
- if (getenv ("OMP_TEAMS_THREAD_LIMIT") == NULL
+ if (getenv ("OMP_TEAMS_THREAD_LIMIT") != NULL
&& strcmp (getenv ("OMP_TEAMS_THREAD_LIMIT"), "12") == 0)
{
if (omp_get_teams_thread_limit () != 12)
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-5.c b/libgomp/testsuite/libgomp.c-c++-common/icv-5.c
index 82108bce..82d8e76 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/icv-5.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-5.c
@@ -1,25 +1,205 @@
+/* { dg-additional-options "-DAMD" { target offload_target_amdgcn } } */
/* { dg-do run } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "42" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "43" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "44" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "45" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "46" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS "47" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS "5" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "6" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "7" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "8" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT "4" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_0 "5" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_1 "6" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_2 "7" } */
#include <omp.h>
#include <stdlib.h>
+#include <unistd.h>
int
main (int argc, char *const *argv)
{
- if (omp_get_max_teams () != 47)
+ if (omp_get_max_teams () != 5
+ || omp_get_teams_thread_limit () != 4)
abort ();
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > 5
+ || omp_get_team_num () >= 5)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 4
+ || omp_get_thread_num () >= 4)
+ abort ();
+ }
+
+ omp_set_num_teams (4);
+ omp_set_teams_thread_limit (3);
+ if (omp_get_max_teams () != 4
+ || omp_get_teams_thread_limit () != 3)
+ abort ();
+
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > 4
+ || omp_get_team_num () >= 4)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 3
+ || omp_get_thread_num () >= 3)
+ abort ();
+ }
+
+ #pragma omp teams num_teams(3) thread_limit(2)
+ {
+ if (omp_get_num_teams () != 3
+ || omp_get_team_num () >= 3)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 2
+ || omp_get_thread_num () >= 2)
+ abort ();
+ }
+
+ #pragma omp teams num_teams(5) thread_limit(4)
+ {
+ if (omp_get_num_teams () != 5
+ || omp_get_team_num () >= 5)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 4
+ || omp_get_thread_num () >= 4)
+ abort ();
+ }
+
int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices ();
+ if (num_devices <= 0)
+ return 0;
+
for (int i=0; i < num_devices; i++)
- #pragma omp target device (i)
- if (omp_get_max_teams () != 42 + i)
+ {
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 6 + i
+ || omp_get_teams_thread_limit () != 5 + i)
abort ();
+ #pragma omp target device (i)
+ #pragma omp teams
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 5 + i
+ || omp_get_thread_num () >= 5 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ {
+ omp_set_num_teams (5 + i);
+ omp_set_teams_thread_limit (4 + i);
+ if (omp_get_max_teams () != 5 + i
+ || omp_get_teams_thread_limit () != 4 + i)
+ abort ();
+ }
+
+ /* omp_set_num_teams and omp_set_teams_thread_limit above set the value
+ of nteams-var and teams-thread-limit-var ICVs on device 'i', which has
+ scope 'device' and should be avaible in subsequent target regions. */
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 5 + i
+ || omp_get_teams_thread_limit () != 4 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > 5 + i
+ || omp_get_team_num () >= 5 + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 4 + i
+ || omp_get_thread_num () >= 4 + i)
+ abort ();
+ }
+
+ #pragma omp target device (i)
+ #pragma omp teams num_teams(6 + i) thread_limit(5 + i)
+ {
+ if (omp_get_num_teams () > 6 + i
+ || omp_get_team_num () >= 6 + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 5 + i
+ || omp_get_thread_num () >= 5 + i
+ || omp_get_num_teams () > 6 + i
+ || omp_get_team_num () >= 6 + i)
+ abort ();
+ }
+
+ #pragma omp target device (i)
+ #pragma omp teams num_teams(4 + i) thread_limit(3 + i)
+ {
+ if (omp_get_num_teams () > 4 + i
+ || omp_get_team_num () >= 4 + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 3 + i
+ || omp_get_thread_num () >= 3 + i
+ || omp_get_num_teams () > 4 + i
+ || omp_get_team_num () >= 4 + i)
+ abort ();
+ }
+
+ #pragma omp target device (i)
+ #pragma omp teams thread_limit(3 + i) num_teams(4 + i)
+ {
+ if (omp_get_num_teams () > 4 + i
+ || omp_get_team_num () >= 4 + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 3 + i
+ || omp_get_thread_num () >= 3 + i
+ || omp_get_num_teams () > 4 + i
+ || omp_get_team_num () >= 4 + i)
+ abort ();
+ }
+
+ /* This tests a large number of teams and threads. If it is larger than
+ 2^15+1 then the according argument in the kernels arguments list
+ is encoded with two items instead of one. On NVIDIA there is an
+ adjustment for too large teams and threads. For AMD such adjustment
+ exists only for threads and will cause runtime errors with a two large
+ number of teams. */
+ intptr_t large_num_teams = 66000;
+#ifdef AMD
+ large_num_teams = 8;
+#endif
+ intptr_t large_threads_limit = 67000;
+ #pragma omp target device (i)
+ {
+ omp_set_num_teams (large_num_teams + i);
+ omp_set_teams_thread_limit (large_threads_limit + i);
+ if (omp_get_max_teams () != large_num_teams + i
+ || omp_get_teams_thread_limit () != large_threads_limit + i)
+ abort ();
+ }
+
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != large_num_teams + i
+ || omp_get_teams_thread_limit () != large_threads_limit + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > large_num_teams + i
+ || omp_get_team_num () >= large_num_teams + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > large_threads_limit + i
+ || omp_get_thread_num () >= large_threads_limit + i)
+ abort ();
+ }
+
+ }
return 0;
}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-6.c b/libgomp/testsuite/libgomp.c-c++-common/icv-6.c
index 05f07c7..7fdcaf0 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/icv-6.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-6.c
@@ -1,9 +1,10 @@
/* { dg-do run } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "42" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "43" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } */
/* { dg-set-target-env-var OMP_SCHEDULE_ALL "guided,4" } */
/* { dg-set-target-env-var OMP_DYNAMIC_ALL "true" } */
-/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "44" } */
/* { dg-set-target-env-var OMP_THREAD_LIMIT_ALL "45" } */
/* { dg-set-target-env-var OMP_NUM_THREADS_ALL "46,3,2" } */
/* { dg-set-target-env-var OMP_MAX_ACTIVE_LEVELS_ALL "47" } */
@@ -12,7 +13,8 @@
/* This tests the hierarchical usage of ICVs on the device, i.e. if
OMP_NUM_TEAMS_DEV_<device_num> is not configured, then the value of
- OMP_NUM_TEAMS_DEV should be used. And if */
+ OMP_NUM_TEAMS_DEV should be used. And if OMP_NUM_TEAMS (without suffix) is
+ not defined, then OMP_NUM_TEAMS_ALL should be used for the host. */
#include <omp.h>
#include <stdlib.h>
@@ -24,10 +26,10 @@ main (int argc, char *const *argv)
int chunk_size;
omp_get_schedule(&kind, &chunk_size);
- if (omp_get_max_teams () != 42
+ if (omp_get_max_teams () != 3
|| !omp_get_dynamic ()
|| kind != 3 || chunk_size != 4
- || omp_get_teams_thread_limit () != 44
+ || omp_get_teams_thread_limit () != 2
|| omp_get_thread_limit () != 45
|| omp_get_max_threads () != 46
|| omp_get_proc_bind () != omp_proc_bind_spread
@@ -36,9 +38,52 @@ main (int argc, char *const *argv)
int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices ();
for (int i=0; i < num_devices; i++)
- #pragma omp target device (i)
- if (omp_get_max_teams () != 43)
+ {
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 4
+ || omp_get_teams_thread_limit () != 3)
abort ();
+ #pragma omp target device (i)
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > 4
+ || omp_get_team_num () >= 4)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 3
+ || omp_get_thread_num () >= 3)
+ abort ();
+ }
+
+ #pragma omp target device (i)
+ {
+ omp_set_num_teams (3 + i);
+ omp_set_teams_thread_limit (2 + i);
+ if (omp_get_max_teams () != 3 + i
+ || omp_get_teams_thread_limit () != 2 + i)
+ abort ();
+ }
+
+ /* omp_set_num_teams above set the value of nteams-var ICV on device 'i',
+ which has scope 'device' and should be avaible in subsequent target
+ regions. */
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 3 + i
+ || omp_get_teams_thread_limit () != 2 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > 3 + i
+ || omp_get_team_num () >= 3 + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 2 + i
+ || omp_get_thread_num () >= 2 + i)
+ abort ();
+ }
+ }
return 0;
}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-7.c b/libgomp/testsuite/libgomp.c-c++-common/icv-7.c
index 67081dc..ffceaf3 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/icv-7.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-7.c
@@ -1,5 +1,6 @@
/* { dg-do run } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "42" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "7" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */
/* This tests the hierarchical usage of ICVs on the host and on devices, i.e. if
OMP_NUM_TEAMS_DEV_<device_num>, OMP_NUM_TEAMS_DEV, and
@@ -13,14 +14,73 @@
int
main (int argc, char *const *argv)
{
- if (omp_get_max_teams () != 42)
+ if (omp_get_max_teams () != 7
+ || omp_get_teams_thread_limit () != 2)
+ abort ();
+
+ #pragma omp teams
+ if (omp_get_num_teams () > 7
+ || omp_get_team_num () >= 7)
+ abort ();
+
+ omp_set_num_teams (9);
+ omp_set_teams_thread_limit (3);
+ if (omp_get_max_teams () != 9
+ || omp_get_teams_thread_limit () != 3)
+ abort ();
+
+ #pragma omp teams
+ if (omp_get_num_teams () > 9
+ || omp_get_team_num () >= 9)
+ abort ();
+
+ #pragma omp teams num_teams(5)
+ if (omp_get_num_teams () > 5
+ || omp_get_team_num () >= 5)
abort ();
int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices ();
for (int i=0; i < num_devices; i++)
- #pragma omp target device (i)
- if (omp_get_max_teams () != 42)
+ {
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 7
+ || omp_get_teams_thread_limit () != 2)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ if (omp_get_num_teams () > 7
+ || omp_get_team_num () >= 7)
+ abort ();
+
+ #pragma omp target device (i)
+ {
+ omp_set_num_teams (8 + i);
+ omp_set_teams_thread_limit (4 + i);
+ if (omp_get_max_teams () != 8 + i
+ || omp_get_teams_thread_limit () != 4 + i)
+ abort ();
+ }
+
+ /* omp_set_num_teams above set the value of nteams-var ICV on device 'i',
+ which has scope 'device' and should be avaible in subsequent target
+ regions. */
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 8 + i
+ || omp_get_teams_thread_limit () != 4 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ if (omp_get_num_teams () > 8 + i
+ || omp_get_team_num () >= 8 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams num_teams(5 + i)
+ if (omp_get_num_teams () != 5 + i)
abort ();
+ }
return 0;
}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-8.c b/libgomp/testsuite/libgomp.c-c++-common/icv-8.c
index adaff5a..ad1dbfc 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/icv-8.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-8.c
@@ -19,4 +19,4 @@ main (int argc, char *const *argv)
/* { dg-output ".*Invalid device number in OMP_NUM_TEAMS_DEV_01=44 (leading zero).*" } */
/* { dg-output ".*Invalid device number in OMP_NUM_TEAMS_DEV_a=45.*" } */
/* { dg-output ".*Invalid device number in OMP_NUM_TEAMS_DEV_12345678901=46 (too long).*" } */
-/* { dg-output ".*Non-negative device number expected in OMP_NUM_TEAMS_DEV_-1=47.*" } */
+/* { dg-output ".*Non-negative device number expected in OMP_NUM_TEAMS_DEV_-1=47.*" } */
\ No newline at end of file
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-9.c b/libgomp/testsuite/libgomp.c-c++-common/icv-9.c
new file mode 100644
index 0000000..c850342
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-9.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+
+/* This tests usage of ICVs on the host and on devices if no corresponding
+ environment variables are configured. */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main (int argc, char *const *argv)
+{
+ if (omp_get_max_teams () != 0
+ || omp_get_teams_thread_limit () != 0)
+ abort ();
+
+ omp_set_num_teams (9);
+ omp_set_teams_thread_limit (2);
+ if (omp_get_max_teams () != 9
+ || omp_get_teams_thread_limit () != 2)
+ abort ();
+
+ #pragma omp teams
+ if (omp_get_num_teams () > 9
+ || omp_get_team_num () >= 9)
+ abort ();
+
+ #pragma omp teams num_teams(5)
+ if (omp_get_num_teams () > 5
+ || omp_get_team_num () >= 5)
+ abort ();
+
+ int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices ();
+ for (int i=0; i < num_devices; i++)
+ {
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 0
+ || omp_get_teams_thread_limit () != 0)
+ abort ();
+
+ #pragma omp target device (i)
+ {
+ omp_set_num_teams (8 + i);
+ omp_set_teams_thread_limit (3 + i);
+ if (omp_get_max_teams () != 8 + i
+ || omp_get_teams_thread_limit () != 3 + i)
+ abort ();
+ }
+
+ /* omp_set_num_teams above set the value of nteams-var ICV on device 'i',
+ which has scope 'device' and should be avaible in subsequent target
+ regions. */
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 8 + i
+ || omp_get_teams_thread_limit () != 3 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ if (omp_get_num_teams () > 8 + i
+ || omp_get_team_num () >= 8 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams num_teams(5 + i)
+ if (omp_get_num_teams () > 5 + i
+ || omp_get_team_num () >= 5 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ ;
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/icv-5.f90 b/libgomp/testsuite/libgomp.fortran/icv-5.f90
new file mode 100644
index 0000000..05a35fa
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/icv-5.f90
@@ -0,0 +1,226 @@
+! { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" }
+! { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" }
+! { dg-set-target-env-var OMP_NUM_TEAMS "5" }
+! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "6" }
+! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "7" }
+! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "8" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT "4" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_0 "5" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_1 "6" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_2 "7" }
+
+use omp_lib
+implicit none (type, external)
+ integer :: num_devices, i, large_num_teams, large_threads_limit
+ logical :: err
+
+ if (omp_get_num_devices () > 3) then
+ num_devices = 3
+ else
+ num_devices = omp_get_num_devices ()
+ end if
+
+ do i=0,num_devices-1
+
+ ! Testing NUM_TEAMS.
+ if (env_is_set_dev ("OMP_NUM_TEAMS_DEV_", i, 6 + i)) then
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_max_teams () /= 6 + i) err = .true.
+ !$omp end target
+ if (err) stop 1
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ if (omp_get_num_teams () > 6 + i .or. omp_get_team_num () >= 6 + i) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 2
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_num_teams (5 + i)
+ if (omp_get_max_teams () /= 5 + i) err = .true.
+ !$omp end target
+ if (err) stop 3
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_max_teams () /= 5 + i) err = .true.
+ !$omp end target
+ if (err) stop 4
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ if (omp_get_num_teams () > 5 + i .or. omp_get_team_num () >= 5 + i) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 5
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams num_teams(6 + i)
+ if (omp_get_num_teams () > 6 + i .or. omp_get_team_num () >= 6 + i) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 6
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams num_teams(4 + i)
+ if (omp_get_num_teams () > 4 + i .or. omp_get_team_num () >= 4 + i) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 7
+
+ large_num_teams = 66000
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_num_teams (large_num_teams + i)
+ if (omp_get_max_teams () /= large_num_teams + i) err = .true.
+ !$omp end target
+ if (err) stop 8
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_max_teams () /= large_num_teams + i) err = .true.
+ !$omp end target
+ if (err) stop 9
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ if (omp_get_num_teams () > large_num_teams + i &
+ .or. omp_get_team_num () >= large_num_teams + i) err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 10
+ end if
+
+ ! Testing TEAMS-THREAD-LIMIT
+ if (env_is_set_dev ("OMP_TEAMS_THREAD_LIMIT_DEV_", i, 5 + i)) then
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_teams_thread_limit () /= 5 + i) err = .true.
+ !$omp end target
+ if (err) stop 11
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ !$omp parallel
+ if (omp_get_thread_limit () > 5 + i .or. omp_get_thread_num () >= 5 + i) &
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 12
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_teams_thread_limit (4 + i)
+ if (omp_get_teams_thread_limit () /= 4 + i) err = .true.
+ !$omp end target
+ if (err) stop 13
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_teams_thread_limit () /= 4 + i) err = .true.
+ !$omp end target
+ if (err) stop 14
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ !$omp parallel
+ if (omp_get_thread_limit () > 4 + i .or. omp_get_thread_num () >= 4 + i) &
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 15
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams thread_limit(5 + i)
+ !$omp parallel
+ if (omp_get_thread_limit () > 5 + i .or. omp_get_thread_num () >= 5 + i) &
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 16
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams thread_limit(3 + i)
+ !$omp parallel
+ if (omp_get_thread_limit () > 3 + i .or. omp_get_thread_num () >= 3 + i) &
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 17
+
+ large_threads_limit = 67000
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_teams_thread_limit (large_threads_limit + i)
+ if (omp_get_teams_thread_limit () /= large_threads_limit + i) err = .true.
+ !$omp end target
+ if (err) stop 18
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_teams_thread_limit () /= large_threads_limit + i) err = .true.
+ !$omp end target
+ if (err) stop 19
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ !$omp parallel
+ if (omp_get_thread_limit () > large_threads_limit + i &
+ .or. omp_get_thread_num () >= large_threads_limit + i) err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 20
+ end if
+
+ end do
+
+contains
+ logical function env_is_set (name, val)
+ character(len=*) :: name, val
+ character(len=40) :: val2
+ integer :: stat
+ call get_environment_variable (name, val2, status=stat)
+ if (stat == 0) then
+ if (val == val2) then
+ env_is_set = .true.
+ return
+ end if
+ else if (stat /= 1) then
+ error stop 30
+ endif
+ env_is_set = .false.
+ end
+ logical function env_is_set_dev (name, dev_num, val)
+ character(len=*) :: name
+ integer :: dev_num, val
+ character(len=64) :: dev_num_str, env_var, val_str
+ dev_num_str = ADJUSTL(dev_num_str)
+ env_var = name // dev_num_str
+ val_str = ADJUSTL(val_str)
+ env_is_set_dev = env_is_set (TRIM(env_var), TRIM(val_str))
+ end
+end
diff --git a/libgomp/testsuite/libgomp.fortran/icv-6.f90 b/libgomp/testsuite/libgomp.fortran/icv-6.f90
new file mode 100644
index 0000000..c8e6a0d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/icv-6.f90
@@ -0,0 +1,140 @@
+! { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" }
+! { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" }
+
+! This test considers the hierarchical usage of ICVs on the device, i.e. if
+! e.g. OMP_NUM_TEAMS_DEV_<device_num> is not configured, then the value of
+! OMP_NUM_TEAMS_DEV should be used for the targets.
+
+use omp_lib
+implicit none (type, external)
+ integer :: num_devices, i, stat, tmp
+ logical :: err
+ character(len=40) :: val
+
+ ! The following environment variables should not be set.
+ call get_environment_variable ("OMP_NUM_TEAMS_DEV_0", val, status=stat)
+ if (stat /= 1) return
+ call get_environment_variable ("OMP_NUM_TEAMS_DEV_1", val, status=stat)
+ if (stat /= 1) return
+ call get_environment_variable ("OMP_NUM_TEAMS_DEV_2", val, status=stat)
+ if (stat /= 1) return
+ call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_0", val, status=stat)
+ if (stat /= 1) return
+ call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_1", val, status=stat)
+ if (stat /= 1) return
+ call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_2", val, status=stat)
+ if (stat /= 1) return
+
+ if (omp_get_num_devices () > 3) then
+ num_devices = 3
+ else
+ num_devices = omp_get_num_devices ()
+ end if
+
+ do i=0,num_devices-1
+
+ ! Testing NUM_TEAMS.
+ if (env_is_set ("OMP_NUM_TEAMS_DEV", "4")) then
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_max_teams () /= 4) err = .true.
+ !$omp end target
+ if (err) stop 1
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ if (omp_get_num_teams () > 4 .or. omp_get_team_num () >= 4) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 2
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_num_teams (3 + i)
+ if (omp_get_max_teams () /= 3 + i) err = .true.
+ !$omp end target
+ if (err) stop 3
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_max_teams () /= 3 + i) err = .true.
+ !$omp end target
+ if (err) stop 4
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ if (omp_get_num_teams () > 3 + i .or. omp_get_team_num () >= 3 + i) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 5
+ end if
+
+ ! Testing TEAMS-THREAD-LIMIT
+ if (env_is_set ("OMP_TEAMS_THREAD_LIMIT_DEV", "3")) then
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_teams_thread_limit () /= 3) err = .true.
+ !$omp end target
+ if (err) stop 6
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ !$omp parallel
+ if (omp_get_thread_limit () > 3 .or. omp_get_thread_num () >= 3) &
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 7
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_teams_thread_limit (2 + i)
+ if (omp_get_teams_thread_limit () /= 2 + i) err = .true.
+ !$omp end target
+ if (err) stop 8
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_teams_thread_limit () /= 2 + i) err = .true.
+ !$omp end target
+ if (err) stop 9
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ !$omp parallel
+ if (omp_get_thread_limit () > 2 + i .or. omp_get_thread_num () >= 2 + i) &
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 10
+ end if
+
+ end do
+
+contains
+ logical function env_is_set (name, val)
+ character(len=*) :: name, val
+ character(len=40) :: val2
+ integer :: stat
+ call get_environment_variable (name, val2, status=stat)
+ if (stat == 0) then
+ if (val == val2) then
+ env_is_set = .true.
+ return
+ end if
+ else if (stat /= 1) then
+ error stop 10
+ endif
+ env_is_set = .false.
+ end
+end
More information about the Gcc-patches
mailing list