This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] Avoid unnecessary futex_wake syscalls when all threads are busy waiting


Hi!

If lock contention is high, but all locks are held for relatively short time
and no threads actually goes into futex_wait, we still completely
unnecessarily do lots of futex_wake syscalls.

On Linux with futex, our mutexes have 3 states:
0 - unlocked
1 - locked, uncontended
2 - locked, contended
When the initial atomic 0 -> 1 change fails, gomp_mutex_slow is called and
from that point onwards the lock is changed into state 2 and handled as
contended, until it is unlocked.  State 2 in particular means that
gomp_mutex_unlock will do a futex_wake on it.
The following patch changes it, so that when 0 -> 1 change fails, because
the mutex is already in state 1, it will do the userland spinning.  If the
spinning times out with no change, it will enter state 2, futex_wait and
continue as before, while if during the spinning the lock became 0, it will
try to change it again atomically from 0 -> 1.  If that succeeds, it
returns, if it fails because another thread made it into 1, it will do the
spinning again, and any time the mutex becomes 2, it will fall thru into the
old loop.  The advantage of this is that if no thread needed to go to sleep,
no futex_wakes will be done.

Additionally I've improved the old loop, from 
  do 
   {
     int oldval = __sync_val_compare_and_swap (mutex, 1, 2);
     if (oldval != 0)
	do_wait (mutex, 2);
    }
  while (!__sync_bool_compare_and_swap (mutex, 0, 2));  
into:
  while ((oldval = __sync_lock_test_and_set (mutex, 2)))
    do_wait (mutex, 2);
which should have the advantage that there is just one atomic insn instead
of two.  While __sync_lock_test_and_set isn't a full barrier on all targets,
I hope it doesn't matter, because the inline caller has already done one
__sync_val_compare_and_swap which is a full barrier.

Tested with libgomp testsuite and looked at performance numbers of Sho's
omp_fib.c and libgomp.c/sort-1.c, unfortunately the differences looked to be
in the noise.  But, e.g. on omp_fib.c strace -f shows that the number of
futex FUTEX_WAKE syscalls went down a lot (from ~ 75000 for omp_fib 32 down 
to ~ 50 with the default wait policy, of course for OMP_WAIT_POLICY=passive
it remained roughly the same).

Any comments?  Can anyone see meassurable differences on some benchmark?

2011-07-15  Jakub Jelinek  <jakub@redhat.com>

	* config/linux/wait.h (do_spin): New inline, largely copied
	from do_wait, just don't do futex_wait here, instead return true if
	it should be done.
	(do_wait): Implement using do_spin.
	* config/linux/mutex.h (gomp_mutex_lock_slow): Add an int argument
	to prototype.
	(gomp_mutex_lock): Use __sync_val_compare_and_swap instead of
	__sync_bool_compare_and_swap, pass the oldval to
	gomp_mutex_lock_slow.
	* config/linux/mutex.c (gomp_mutex_lock_slow): Add oldval argument.
	If all mutex contenders are just spinning and not sleeping, don't
	change state to 2 unnecessarily.  Optimize the loop when state has
	already become 2 to use just one atomic operation per loop instead
	of two.
	* config/linux/ia64/mutex.h (gomp_mutex_lock_slow): Add an int argument
	to prototype.
	(gomp_mutex_lock): Use __sync_val_compare_and_swap instead of
	__sync_bool_compare_and_swap, pass the oldval to
	gomp_mutex_lock_slow.

--- libgomp/config/linux/wait.h.jj	2011-02-15 15:26:05.000000000 +0100
+++ libgomp/config/linux/wait.h	2011-07-15 09:56:14.000000000 +0200
@@ -44,7 +44,7 @@ extern long int gomp_futex_wait, gomp_fu
 
 #include <futex.h>
 
-static inline void do_wait (int *addr, int val)
+static inline int do_spin (int *addr, int val)
 {
   unsigned long long i, count = gomp_spin_count_var;
 
@@ -52,10 +52,16 @@ static inline void do_wait (int *addr, i
     count = gomp_throttled_spin_count_var;
   for (i = 0; i < count; i++)
     if (__builtin_expect (*addr != val, 0))
-      return;
+      return 0;
     else
       cpu_relax ();
-  futex_wait (addr, val);
+  return 1;
+}
+
+static inline void do_wait (int *addr, int val)
+{
+  if (do_spin (addr, val))
+    futex_wait (addr, val);
 }
 
 #ifdef HAVE_ATTRIBUTE_VISIBILITY
--- libgomp/config/linux/mutex.h.jj	2009-08-05 11:47:08.000000000 +0200
+++ libgomp/config/linux/mutex.h	2011-07-15 09:29:04.000000000 +0200
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005, 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2005, 2009, 2011 Free Software Foundation, Inc.
    Contributed by Richard Henderson <rth@redhat.com>.
 
    This file is part of the GNU OpenMP Library (libgomp).
@@ -38,11 +38,12 @@ static inline void gomp_mutex_init (gomp
   *mutex = 0;
 }
 
-extern void gomp_mutex_lock_slow (gomp_mutex_t *mutex);
+extern void gomp_mutex_lock_slow (gomp_mutex_t *mutex, int);
 static inline void gomp_mutex_lock (gomp_mutex_t *mutex)
 {
-  if (!__sync_bool_compare_and_swap (mutex, 0, 1))
-    gomp_mutex_lock_slow (mutex);
+  int oldval = __sync_val_compare_and_swap (mutex, 0, 1);
+  if (__builtin_expect (oldval, 0))
+    gomp_mutex_lock_slow (mutex, oldval);
 }
 
 extern void gomp_mutex_unlock_slow (gomp_mutex_t *mutex);
--- libgomp/config/linux/mutex.c.jj	2009-04-14 16:33:07.000000000 +0200
+++ libgomp/config/linux/mutex.c	2011-07-15 10:18:21.000000000 +0200
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2005, 2008, 2009, 2011 Free Software Foundation, Inc.
    Contributed by Richard Henderson <rth@redhat.com>.
 
    This file is part of the GNU OpenMP Library (libgomp).
@@ -32,15 +32,27 @@ long int gomp_futex_wake = FUTEX_WAKE | 
 long int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG;
 
 void
-gomp_mutex_lock_slow (gomp_mutex_t *mutex)
+gomp_mutex_lock_slow (gomp_mutex_t *mutex, int oldval)
 {
-  do
+  while (oldval == 1)
     {
-      int oldval = __sync_val_compare_and_swap (mutex, 1, 2);
-      if (oldval != 0)
-	do_wait (mutex, 2);
+      if (do_spin (mutex, 1))
+	{
+	  oldval = __sync_lock_test_and_set (mutex, 2);
+	  if (oldval == 0)
+	    return;
+	  futex_wait (mutex, 2);
+	  break;
+	}
+      else
+	{
+	  oldval = __sync_val_compare_and_swap (mutex, 0, 1);
+	  if (oldval == 0)
+	    return;
+	}
     }
-  while (!__sync_bool_compare_and_swap (mutex, 0, 2));
+  while ((oldval = __sync_lock_test_and_set (mutex, 2)))
+    do_wait (mutex, 2);
 }
 
 void
--- libgomp/config/linux/ia64/mutex.h.jj	2009-04-14 16:33:07.000000000 +0200
+++ libgomp/config/linux/ia64/mutex.h	2011-07-15 10:39:25.000000000 +0200
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2005, 2008, 2009, 2011 Free Software Foundation, Inc.
    Contributed by Richard Henderson <rth@redhat.com>.
 
    This file is part of the GNU OpenMP Library (libgomp).
@@ -38,11 +38,12 @@ static inline void gomp_mutex_init (gomp
   *mutex = 0;
 }
 
-extern void gomp_mutex_lock_slow (gomp_mutex_t *mutex);
+extern void gomp_mutex_lock_slow (gomp_mutex_t *mutex, int);
 static inline void gomp_mutex_lock (gomp_mutex_t *mutex)
 {
-  if (!__sync_bool_compare_and_swap (mutex, 0, 1))
-    gomp_mutex_lock_slow (mutex);
+  int oldval = __sync_val_compare_and_swap (mutex, 0, 1);
+  if (__builtin_expect (oldval, 0))
+    gomp_mutex_lock_slow (mutex, oldval);
 }
 
 extern void gomp_mutex_unlock_slow (gomp_mutex_t *mutex);

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]