This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [gomp4] optimize GOMP_MAP_TO_PSET
- From: Thomas Schwinge <thomas at codesourcery dot com>
- To: Cesar Philippidis <cesar at codesourcery dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>, Fortran List <fortran at gcc dot gnu dot org>
- Date: Mon, 30 Jan 2017 11:26:31 +0100
- Subject: Re: [gomp4] optimize GOMP_MAP_TO_PSET
- Authentication-results: sourceware.org; auth=none
- References: <551a81a0-8ad4-9250-d8cd-68378e06d8fc@codesourcery.com>
Hi!
On Fri, 27 Jan 2017 08:06:22 -0800, Cesar Philippidis <cesar@codesourcery.com> wrote:
> This patch optimizes GOMP_MAP_TO_PSET in libgomp by installing the
> remapped pointer to the array data directly in the PSET, instead of
> uploading it separately with GOMP_MAP_POINTER. Effectively this
> eliminates the GOMP_MAP_POINTER that is associated with the PSET,
> thereby eliminating an additional host2dev data transfer.
>
> While it does work, it's not quite as effective as I had hope it would
> be. I'm only observing about 0.05s, if that, in CloverLeaf, and arguably
> that's statistical noise.
Ah, too bad.
> This is probably because CloverLeaf makes use
> of ACC DATA regions in the critical sections, so all of those PSETs and
> POINTERs are already preset on the accelerator.
>
> One thing I don't like about this patch is that I'm updating the host's
> copy of the PSET prior to uploading it. The host's PSET does get
> restored prior to returning from gomp_map_vars, however that might
> impact things if the host were to run in multi-threaded applications.
> Maybe I'll drop this patch from gomp4 since it's not very effective.
... also there is some bug somewhere; I see:
PASS: libgomp.fortran/examples-4/async_target-2.f90 -O0 (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -O0 execution test
PASS: libgomp.fortran/examples-4/async_target-2.f90 -O1 (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -O1 execution test
PASS: libgomp.fortran/examples-4/async_target-2.f90 -O2 (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -O2 execution test
PASS: libgomp.fortran/examples-4/async_target-2.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test
PASS: libgomp.fortran/examples-4/async_target-2.f90 -O3 -g (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -O3 -g execution test
PASS: libgomp.fortran/examples-4/async_target-2.f90 -Os (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -Os execution test
..., and:
PASS: libgomp.fortran/target3.f90 -O0 (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -O0 execution test
PASS: libgomp.fortran/target3.f90 -O1 (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -O1 execution test
PASS: libgomp.fortran/target3.f90 -O2 (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -O2 execution test
PASS: libgomp.fortran/target3.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test
PASS: libgomp.fortran/target3.f90 -O3 -g (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -O3 -g execution test
PASS: libgomp.fortran/target3.f90 -Os (test for excess errors)
[-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -Os execution test
In all cases, the run-time error message is:
libgomp: Pointer target of array section wasn't mapped
For reference, I'm appending the patch, which wasn't included in the
original email.
Grüße
Thomas
commit 29f783a0e2162fea3e21e7f4295bc16f252e1c1f
Author: cesar <cesar@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Fri Jan 27 15:51:52 2017 +0000
Optimize GOMP_MAP_TO_PSET.
libgomp/
* target.c (gomp_map_pset): New function.
(gomp_map_vars): Update GOMP_MAP_POINTER with GOMP_MAP_TO_PSET, when
possible.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@244984 138bc75d-0d04-0410-961f-82ee72b054a4
---
libgomp/ChangeLog.gomp | 6 +++
libgomp/target.c | 101 ++++++++++++++++++++++++++++++++++---------------
2 files changed, 77 insertions(+), 30 deletions(-)
diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 40b536f..0fb5297 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,5 +1,11 @@
2017-01-27 Cesar Philippidis <cesar@codesourcery.com>
+ * target.c (gomp_map_pset): New function.
+ (gomp_map_vars): Update GOMP_MAP_POINTER with GOMP_MAP_TO_PSET, when
+ possible.
+
+2017-01-27 Cesar Philippidis <cesar@codesourcery.com>
+
* plugin/plugin-nvptx.c (nvptx_exec): Make aware of
GOMP_MAP_FIRSTPRIVATE_INT host addresses.
* testsuite/libgomp.oacc-c++/firstprivate-int.C: New test.
diff --git libgomp/target.c libgomp/target.c
index 557f565..590b7dd 100644
--- libgomp/target.c
+++ libgomp/target.c
@@ -295,6 +295,37 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
(void *) &cur_node.tgt_offset, sizeof (void *));
}
+static uintptr_t
+gomp_map_pset (struct target_mem_desc *tgt, uintptr_t host_ptr,
+ uintptr_t target_offset, uintptr_t bias)
+{
+ struct gomp_device_descr *devicep = tgt->device_descr;
+ struct splay_tree_s *mem_map = &devicep->mem_map;
+ struct splay_tree_key_s cur_node;
+
+ cur_node.host_start = host_ptr;
+
+ /* Add bias to the pointer value. */
+ cur_node.host_start += bias;
+ cur_node.host_end = cur_node.host_start;
+ splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("Pointer target of array section wasn't mapped");
+ }
+
+ cur_node.host_start -= n->host_start;
+ cur_node.tgt_offset
+ = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
+ /* At this point tgt_offset is target address of the
+ array section. Now subtract bias to get what we want
+ to initialize the pointer with. */
+ cur_node.tgt_offset -= bias;
+
+ return cur_node.tgt_offset;
+}
+
static void
gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
size_t first, size_t i, void **hostaddrs,
@@ -984,36 +1015,46 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
break;
case GOMP_MAP_TO_PSET:
/* FIXME: see above FIXME comment. */
- gomp_copy_host2dev (devicep,
- (void *) (tgt->tgt_start
- + k->tgt_offset),
- (void *) k->host_start,
- k->host_end - k->host_start);
-
- for (j = i + 1; j < mapnum; j++)
- if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
- j)
- & typemask))
- break;
- else if ((uintptr_t) hostaddrs[j] < k->host_start
- || ((uintptr_t) hostaddrs[j] + sizeof (void *)
- > k->host_end))
- break;
- else
- {
- tgt->list[j].key = k;
- tgt->list[j].copy_from = false;
- tgt->list[j].always_copy_from = false;
- if (k->refcount != REFCOUNT_INFINITY)
- k->refcount++;
- gomp_map_pointer (tgt,
- (uintptr_t) *(void **) hostaddrs[j],
- k->tgt_offset
- + ((uintptr_t) hostaddrs[j]
- - k->host_start),
- sizes[j]);
- i++;
- }
+ {
+ bool found_pointer = false;
+ for (j = i + 1; j < mapnum; j++)
+ if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
+ j)
+ & typemask))
+ break;
+ else if ((uintptr_t) hostaddrs[j] < k->host_start
+ || ((uintptr_t) hostaddrs[j] + sizeof (void *)
+ > k->host_end))
+ break;
+ else
+ {
+ uintptr_t tptr = 0;
+ uintptr_t toffset =
+ gomp_map_pset (tgt,
+ (uintptr_t) *(void **)
+ hostaddrs[j],
+ k->tgt_offset
+ + ((uintptr_t) hostaddrs[j]
+ - k->host_start),
+ sizes[j]);
+ tptr = *(uintptr_t *) hostaddrs[i];
+ *(uintptr_t *) hostaddrs[i]= toffset;
+ gomp_copy_host2dev (devicep,
+ (void *) (tgt->tgt_start
+ + k->tgt_offset),
+ (void *) k->host_start,
+ k->host_end - k->host_start);
+ *(uintptr_t *) hostaddrs[i] = tptr;
+ i++;
+ found_pointer = true;
+ }
+ if (!found_pointer)
+ gomp_copy_host2dev (devicep,
+ (void *) (tgt->tgt_start
+ + k->tgt_offset),
+ (void *) k->host_start,
+ k->host_end - k->host_start);
+ }
break;
case GOMP_MAP_FORCE_PRESENT:
{