[PATCH] libgomp: Introduce gomp_thread::spare_team

Sebastian Huber sebastian.huber@embedded-brains.de
Tue Jul 7 12:18:00 GMT 2015


Try to re-use the previous team to avoid the use of malloc() and free()
in the normal case where number of threads is the same.  Avoid
superfluous destruction and initialization of team synchronization
objects.

Using the microbenchmark posted here

https://gcc.gnu.org/ml/gcc-patches/2008-03/msg00930.html

shows an improvement in the parallel bench test case (target
x86_64-unknown-linux-gnu, median out of 9 test runs, iteration count
increased to 200000).

Before the patch:

parallel bench 11.2284 seconds

After the patch:

parallel bench 10.7575 seconds

libgomp/ChangeLog
2015-07-07  Sebastian Huber  <sebastian.huber@embedded-brains.de>

	* libgomp.h (gomp_thread): Add spare_team field.
	* team.c (gomp_thread_start): Initialize spare team for non-TLS
	targets.
	(gomp_new_team): Use spare team if possible.
	(free_team): Destroy more team objects.
	(gomp_free_thread): Free spare team if necessary.
	(free_non_nested_team): New.
	(gomp_team_end): Move some team object destructions to
	free_team().  Use free_non_nested_team().
---
 libgomp/libgomp.h |  3 +++
 libgomp/team.c    | 63 ++++++++++++++++++++++++++++++++++++-------------------
 2 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 5ed0f78..563c1e2 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -448,6 +448,9 @@ struct gomp_thread
 
   /* User pthread thread pool */
   struct gomp_thread_pool *thread_pool;
+
+  /* Spare team ready for re-use in gomp_new_team()  */
+  struct gomp_team *spare_team;
 };
 
 
diff --git a/libgomp/team.c b/libgomp/team.c
index b98b233..cc19eb0 100644
--- a/libgomp/team.c
+++ b/libgomp/team.c
@@ -77,6 +77,7 @@ gomp_thread_start (void *xdata)
   struct gomp_thread local_thr;
   thr = &local_thr;
   pthread_setspecific (gomp_tls_key, thr);
+  thr->spare_team = NULL;
 #endif
   gomp_sem_init (&thr->release, 0);
 
@@ -140,19 +141,35 @@ gomp_thread_start (void *xdata)
 struct gomp_team *
 gomp_new_team (unsigned nthreads)
 {
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *spare_team = thr->spare_team;
   struct gomp_team *team;
-  size_t size;
   int i;
 
-  size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
-				      + sizeof (team->implicit_task[0]));
-  team = gomp_malloc (size);
+  if (spare_team && spare_team->nthreads == nthreads)
+    {
+      thr->spare_team = NULL;
+      team = spare_team;
+    }
+  else
+    {
+      size_t extra = sizeof (team->ordered_release[0])
+                     + sizeof (team->implicit_task[0]);
+      team = gomp_malloc (sizeof (*team) + nthreads * extra);
+
+#ifndef HAVE_SYNC_BUILTINS
+      gomp_mutex_init (&team->work_share_list_free_lock);
+#endif
+      gomp_barrier_init (&team->barrier, nthreads);
+      gomp_sem_init (&team->master_release, 0);
+      gomp_mutex_init (&team->task_lock);
+
+      team->nthreads = nthreads;
+    }
 
   team->work_share_chunk = 8;
 #ifdef HAVE_SYNC_BUILTINS
   team->single_count = 0;
-#else
-  gomp_mutex_init (&team->work_share_list_free_lock);
 #endif
   team->work_shares_to_free = &team->work_shares[0];
   gomp_init_work_share (&team->work_shares[0], false, nthreads);
@@ -163,14 +180,9 @@ gomp_new_team (unsigned nthreads)
     team->work_shares[i].next_free = &team->work_shares[i + 1];
   team->work_shares[i].next_free = NULL;
 
-  team->nthreads = nthreads;
-  gomp_barrier_init (&team->barrier, nthreads);
-
-  gomp_sem_init (&team->master_release, 0);
   team->ordered_release = (void *) &team->implicit_task[nthreads];
   team->ordered_release[0] = &team->master_release;
 
-  gomp_mutex_init (&team->task_lock);
   team->task_queue = NULL;
   team->task_count = 0;
   team->task_queued_count = 0;
@@ -187,6 +199,10 @@ gomp_new_team (unsigned nthreads)
 static void
 free_team (struct gomp_team *team)
 {
+  gomp_sem_destroy (&team->master_release);
+#ifndef HAVE_SYNC_BUILTINS
+  gomp_mutex_destroy (&team->work_share_list_free_lock);
+#endif
   gomp_barrier_destroy (&team->barrier);
   gomp_mutex_destroy (&team->task_lock);
   free (team);
@@ -225,6 +241,8 @@ gomp_free_thread (void *arg __attribute__((unused)))
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_thread_pool *pool = thr->thread_pool;
+  if (thr->spare_team)
+    free_team (thr->spare_team);
   if (pool)
     {
       if (pool->threads_used > 0)
@@ -835,6 +853,18 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
     free (affinity_thr);
 }
 
+static void
+free_non_nested_team (struct gomp_team *team, struct gomp_thread *thr)
+{
+  struct gomp_thread_pool *pool = thr->thread_pool;
+  if (pool->last_team)
+    {
+      if (thr->spare_team)
+	free_team (thr->spare_team);
+      thr->spare_team = pool->last_team;
+    }
+  pool->last_team = team;
+}
 
 /* Terminate the current team.  This is only to be called by the master
    thread.  We assume that we must wait for the other threads.  */
@@ -894,21 +924,12 @@ gomp_team_end (void)
 	}
       while (ws != NULL);
     }
-  gomp_sem_destroy (&team->master_release);
-#ifndef HAVE_SYNC_BUILTINS
-  gomp_mutex_destroy (&team->work_share_list_free_lock);
-#endif
 
   if (__builtin_expect (thr->ts.team != NULL, 0)
       || __builtin_expect (team->nthreads == 1, 0))
     free_team (team);
   else
-    {
-      struct gomp_thread_pool *pool = thr->thread_pool;
-      if (pool->last_team)
-	free_team (pool->last_team);
-      pool->last_team = team;
-    }
+    free_non_nested_team (team, thr);
 }
 
 
-- 
1.8.4.5



More information about the Gcc-patches mailing list