This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] [gomp] Recycle non-nested team if possible
- From: Sebastian Huber <sebastian dot huber at embedded-brains dot de>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Sebastian Huber <sebastian dot huber at embedded-brains dot de>
- Date: Mon, 13 Jul 2015 13:15:44 +0200
- Subject: [PATCH] [gomp] Recycle non-nested team if possible
- Authentication-results: sourceware.org; auth=none
Try to recycle the last non-nested team to avoid the use of malloc() and
free() in the normal case where the number of threads is the same.
Avoid superfluous destruction and initialization of team synchronization
objects.
Using the microbenchmark posted here
https://gcc.gnu.org/ml/gcc-patches/2008-03/msg00930.html
shows an improvement in the parallel bench test case (target
x86_64-unknown-linux-gnu, median out of 9 test runs, iteration count
increased to 200000).
Before the patch:
parallel bench 11.2284 seconds
After the patch:
parallel bench 10.5912 seconds
libgomp/ChangeLog
2015-07-13 Sebastian Huber <sebastian.huber@embedded-brains.de>
* team.c (get_recycable_team): New.
(gomp_new_team): Recycle last non-nested team if possible.
(free_team): Destroy more team synchronization objects.
(gomp_team_end): Move some team synchronization object
destructions to free_team().
---
libgomp/team.c | 54 +++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 39 insertions(+), 15 deletions(-)
diff --git a/libgomp/team.c b/libgomp/team.c
index b98b233..0bcbaf8 100644
--- a/libgomp/team.c
+++ b/libgomp/team.c
@@ -134,6 +134,25 @@ gomp_thread_start (void *xdata)
return NULL;
}
+static struct gomp_team *
+get_recycable_team (unsigned nthreads)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->ts.team == NULL)
+ {
+ struct gomp_thread_pool *pool = thr->thread_pool;
+ if (pool != NULL)
+ {
+ struct gomp_team *last_team = pool->last_team;
+ if (last_team != NULL && last_team->nthreads == nthreads)
+ {
+ pool->last_team = NULL;
+ return last_team;
+ }
+ }
+ }
+ return NULL;
+}
/* Create a new team data structure. */
@@ -141,18 +160,28 @@ struct gomp_team *
gomp_new_team (unsigned nthreads)
{
struct gomp_team *team;
- size_t size;
int i;
- size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
- + sizeof (team->implicit_task[0]));
- team = gomp_malloc (size);
+ team = get_recycable_team (nthreads);
+ if (team == NULL)
+ {
+ size_t extra = sizeof (team->ordered_release[0])
+ + sizeof (team->implicit_task[0]);
+ team = gomp_malloc (sizeof (*team) + nthreads * extra);
+
+#ifndef HAVE_SYNC_BUILTINS
+ gomp_mutex_init (&team->work_share_list_free_lock);
+#endif
+ gomp_barrier_init (&team->barrier, nthreads);
+ gomp_sem_init (&team->master_release, 0);
+ gomp_mutex_init (&team->task_lock);
+
+ team->nthreads = nthreads;
+ }
team->work_share_chunk = 8;
#ifdef HAVE_SYNC_BUILTINS
team->single_count = 0;
-#else
- gomp_mutex_init (&team->work_share_list_free_lock);
#endif
team->work_shares_to_free = &team->work_shares[0];
gomp_init_work_share (&team->work_shares[0], false, nthreads);
@@ -163,14 +192,9 @@ gomp_new_team (unsigned nthreads)
team->work_shares[i].next_free = &team->work_shares[i + 1];
team->work_shares[i].next_free = NULL;
- team->nthreads = nthreads;
- gomp_barrier_init (&team->barrier, nthreads);
-
- gomp_sem_init (&team->master_release, 0);
team->ordered_release = (void *) &team->implicit_task[nthreads];
team->ordered_release[0] = &team->master_release;
- gomp_mutex_init (&team->task_lock);
team->task_queue = NULL;
team->task_count = 0;
team->task_queued_count = 0;
@@ -187,6 +211,10 @@ gomp_new_team (unsigned nthreads)
static void
free_team (struct gomp_team *team)
{
+ gomp_sem_destroy (&team->master_release);
+#ifndef HAVE_SYNC_BUILTINS
+ gomp_mutex_destroy (&team->work_share_list_free_lock);
+#endif
gomp_barrier_destroy (&team->barrier);
gomp_mutex_destroy (&team->task_lock);
free (team);
@@ -894,10 +922,6 @@ gomp_team_end (void)
}
while (ws != NULL);
}
- gomp_sem_destroy (&team->master_release);
-#ifndef HAVE_SYNC_BUILTINS
- gomp_mutex_destroy (&team->work_share_list_free_lock);
-#endif
if (__builtin_expect (thr->ts.team != NULL, 0)
|| __builtin_expect (team->nthreads == 1, 0))
--
1.8.4.5