[gcc r14-9629] openmp: Change to using a hashtab to lookup offload target addresses for indirect function calls

Kwok Yeung kcy@gcc.gnu.org
Fri Mar 22 18:10:54 GMT 2024


https://gcc.gnu.org/g:637e76b90e8b045c5e25206a41e3be55deace8d5

commit r14-9629-g637e76b90e8b045c5e25206a41e3be55deace8d5
Author: Kwok Cheung Yeung <kcyeung@baylibre.com>
Date:   Fri Mar 22 18:07:43 2024 +0000

    openmp: Change to using a hashtab to lookup offload target addresses for indirect function calls
    
    A splay-tree was previously used to lookup equivalent target addresses
    for a given host address on offload targets. However, as splay-trees can
    modify their structure on lookup, they are not suitable for concurrent
    access from separate teams/threads without some form of locking.  This
    patch changes the lookup data structure to a hashtab instead, which does
    not have these issues.
    
    The call to build_indirect_map to initialize the data structure is now
    called from just the first thread of the first team to avoid redundant
    calls to this function.
    
    2024-03-22  Kwok Cheung Yeung  <kcyeung@baylibre.com>
    
            libgomp/
            * config/accel/target-indirect.c: Include string.h and hashtab.h.
            Remove include of splay-tree.h.  Update comments.
            (splay_tree_prefix, splay_tree_c): Delete.
            (struct indirect_map_t): New.
            (hash_entry_type, htab_alloc, htab_free, htab_hash, htab_eq): New.
            (GOMP_INDIRECT_ADD_MAP): Remove volatile qualifier.
            (USE_SPLAY_TREE_LOOKUP): Rename to...
            (USE_HASHTAB_LOOKUP): ..this.
            (indirect_map, indirect_array): Delete.
            (indirect_htab): New.
            (build_indirect_map): Remove locking.  Build indirect map using
            hashtab.
            (GOMP_target_map_indirect_ptr): Use indirect_htab to lookup target
            address.
            (GOMP_target_map_indirect_ptr): Remove volatile qualifier.
            * config/gcn/team.c (gomp_gcn_enter_kernel): Call build_indirect_map
            from first thread of first team only.
            * config/nvptx/team.c (gomp_nvptx_main): Likewise.
            * testsuite/libgomp.c-c++-common/declare-target-indirect-2.c (main):
            Add missing break statements.
            * testsuite/libgomp.fortran/declare-target-indirect-2.f90: Remove
            xfail.

Diff:
---
 libgomp/config/accel/target-indirect.c             | 83 ++++++++++++----------
 libgomp/config/gcn/team.c                          |  7 +-
 libgomp/config/nvptx/team.c                        |  9 ++-
 .../declare-target-indirect-2.c                    | 14 ++--
 .../libgomp.fortran/declare-target-indirect-2.f90  |  1 -
 5 files changed, 63 insertions(+), 51 deletions(-)

diff --git a/libgomp/config/accel/target-indirect.c b/libgomp/config/accel/target-indirect.c
index c60fd547cb6..cfef1ddbc49 100644
--- a/libgomp/config/accel/target-indirect.c
+++ b/libgomp/config/accel/target-indirect.c
@@ -25,60 +25,73 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <assert.h>
+#include <string.h>
 #include "libgomp.h"
 
-#define splay_tree_prefix indirect
-#define splay_tree_c
-#include "splay-tree.h"
+struct indirect_map_t
+{
+  void *host_addr;
+  void *target_addr;
+};
+
+typedef struct indirect_map_t *hash_entry_type;
+
+static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
+static inline void htab_free (void *ptr) { free (ptr); }
+
+#include "hashtab.h"
+
+static inline hashval_t
+htab_hash (hash_entry_type element)
+{
+  return hash_pointer (element->host_addr);
+}
 
-volatile void **GOMP_INDIRECT_ADDR_MAP = NULL;
+static inline bool
+htab_eq (hash_entry_type x, hash_entry_type y)
+{
+  return x->host_addr == y->host_addr;
+}
 
-/* Use a splay tree to lookup the target address instead of using a
-   linear search.  */
-#define USE_SPLAY_TREE_LOOKUP
+void **GOMP_INDIRECT_ADDR_MAP = NULL;
 
-#ifdef USE_SPLAY_TREE_LOOKUP
+/* Use a hashtab to lookup the target address instead of using a linear
+   search.  */
+#define USE_HASHTAB_LOOKUP
 
-static struct indirect_splay_tree_s indirect_map;
-static indirect_splay_tree_node indirect_array = NULL;
+#ifdef USE_HASHTAB_LOOKUP
 
-/* Build the splay tree used for host->target address lookups.  */
+static htab_t indirect_htab = NULL;
+
+/* Build the hashtab used for host->target address lookups.  */
 
 void
 build_indirect_map (void)
 {
   size_t num_ind_funcs = 0;
-  volatile void **map_entry;
-  static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (&lock); */
+  void **map_entry;
 
   if (!GOMP_INDIRECT_ADDR_MAP)
     return;
 
-  gomp_mutex_lock (&lock);
-
-  if (!indirect_array)
+  if (!indirect_htab)
     {
       /* Count the number of entries in the NULL-terminated address map.  */
       for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
 	   map_entry += 2, num_ind_funcs++);
 
-      /* Build splay tree for address lookup.  */
-      indirect_array = gomp_malloc (num_ind_funcs * sizeof (*indirect_array));
-      indirect_splay_tree_node array = indirect_array;
+      /* Build hashtab for address lookup.  */
+      indirect_htab = htab_create (num_ind_funcs);
       map_entry = GOMP_INDIRECT_ADDR_MAP;
 
-      for (int i = 0; i < num_ind_funcs; i++, array++)
+      for (int i = 0; i < num_ind_funcs; i++, map_entry += 2)
 	{
-	  indirect_splay_tree_key k = &array->key;
-	  k->host_addr = (uint64_t) *map_entry++;
-	  k->target_addr = (uint64_t) *map_entry++;
-	  array->left = NULL;
-	  array->right = NULL;
-	  indirect_splay_tree_insert (&indirect_map, array);
+	  struct indirect_map_t element = { *map_entry, NULL };
+	  hash_entry_type *slot = htab_find_slot (&indirect_htab, &element,
+						  INSERT);
+	  *slot = (hash_entry_type) map_entry;
 	}
     }
-
-  gomp_mutex_unlock (&lock);
 }
 
 void *
@@ -88,15 +101,11 @@ GOMP_target_map_indirect_ptr (void *ptr)
   if (!ptr)
     return ptr;
 
-  assert (indirect_array);
-
-  struct indirect_splay_tree_key_s k;
-  indirect_splay_tree_key node = NULL;
-
-  k.host_addr = (uint64_t) ptr;
-  node = indirect_splay_tree_lookup (&indirect_map, &k);
+  assert (indirect_htab);
 
-  return node ? (void *) node->target_addr : ptr;
+  struct indirect_map_t element = { ptr, NULL };
+  hash_entry_type entry = htab_find (indirect_htab, &element);
+  return entry ? entry->target_addr : ptr;
 }
 
 #else
@@ -115,7 +124,7 @@ GOMP_target_map_indirect_ptr (void *ptr)
 
   assert (GOMP_INDIRECT_ADDR_MAP);
 
-  for (volatile void **map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
+  for (void **map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
        map_entry += 2)
     if (*map_entry == ptr)
       return (void *) *(map_entry + 1);
diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c
index 61e9c616b67..bd3df448b52 100644
--- a/libgomp/config/gcn/team.c
+++ b/libgomp/config/gcn/team.c
@@ -52,14 +52,15 @@ gomp_gcn_enter_kernel (void)
 {
   int threadid = __builtin_gcn_dim_pos (1);
 
-  /* Initialize indirect function support.  */
-  build_indirect_map ();
-
   if (threadid == 0)
     {
       int numthreads = __builtin_gcn_dim_size (1);
       int teamid = __builtin_gcn_dim_pos(0);
 
+      /* Initialize indirect function support.  */
+      if (teamid == 0)
+	build_indirect_map ();
+
       /* Set up the global state.
 	 Every team will do this, but that should be harmless.  */
       gomp_global_icv.nthreads_var = 16;
diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c
index 0cf5dad39ca..d5361917a24 100644
--- a/libgomp/config/nvptx/team.c
+++ b/libgomp/config/nvptx/team.c
@@ -60,9 +60,6 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data)
   asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
   asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
 
-  /* Initialize indirect function support.  */
-  build_indirect_map ();
-
   if (tid == 0)
     {
       gomp_global_icv.nthreads_var = ntids;
@@ -74,6 +71,12 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data)
       nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
       memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
 
+      /* Initialize indirect function support.  */
+      unsigned int block_id;
+      asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
+      if (block_id == 0)
+	build_indirect_map ();
+
       /* Find the low-latency heap details ....  */
       uint32_t *shared_pool;
       uint32_t shared_pool_size = 0;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c
index 9fe190efce8..545f1a9fcbf 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c
@@ -17,17 +17,17 @@ int main (void)
     {
       switch (i % 3)
 	{
-	case 0: fn_ptr[i] = &foo;
-	case 1: fn_ptr[i] = &bar;
-	case 2: fn_ptr[i] = &baz;
+	case 0: fn_ptr[i] = &foo; break;
+	case 1: fn_ptr[i] = &bar; break;
+	case 2: fn_ptr[i] = &baz; break;
 	}
       expected += (*fn_ptr[i]) ();
     }
 
-#pragma omp target teams distribute parallel for reduction(+: x) \
-		map (to: fn_ptr) map (tofrom: x)
-  for (int i = 0; i < N; i++)
-    x += (*fn_ptr[i]) ();
+  #pragma omp target teams distribute parallel for \
+	reduction (+: x) map (to: fn_ptr) map (tofrom: x)
+    for (int i = 0; i < N; i++)
+      x += (*fn_ptr[i]) ();
 
   return x - expected;
 }
diff --git a/libgomp/testsuite/libgomp.fortran/declare-target-indirect-2.f90 b/libgomp/testsuite/libgomp.fortran/declare-target-indirect-2.f90
index 34dd2778893..d3baa81dd07 100644
--- a/libgomp/testsuite/libgomp.fortran/declare-target-indirect-2.f90
+++ b/libgomp/testsuite/libgomp.fortran/declare-target-indirect-2.f90
@@ -1,5 +1,4 @@
 ! { dg-do run }
-! { dg-xfail-run-if "Requires libgomp bug fix pending review" { offload_device } }
 
 module m
 contains


More information about the Gcc-cvs mailing list