This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch preview] libstdc++/24704


Hi,

upon Andrew insistence ;) I decided to return to this issue, and well, apparently now I can imagine how to deal with it. An essential part of the idea is the observation (originally made by Rth) that atomics are in general *very* expensive, apparently very much so compared to __gthread_active_p(), which is the important point of reference here.

Thus the below which I'm going to carefully regtest and performance test on a few arches (besides x86-linux, which is fine), but I want to show you early (maybe Bert can help with the tests, for example... and *sorry* for not believing at all in the very feasibility of implementing something similar).

Most of the patch consistes of mechanical renames, the "beef" being in bits/atomicity.h where there are new dispatches to appropriate code: either out of line, "full blown", __exchange_and_add and __atomic_add or inline, trivial, __exchange_and_add_single and __atomic_add_single.

To give you an idea about the performance, a stupid program like this:

int main()
{
 string a("1");

 for (int i = 0; i < 10000000; ++i)
   {
     string ss1(a);
     string ss2(ss1);
     string ss3(ss2);
     string ss4(ss3);
     string ss5(ss4);
   }
}

which does not involve threads, of course, goes (on my ready at hand P4-2400):

mainline
6.380u 0.004s 0:06.38 100.0%    0+0k 0+0io 0pf+0w

patched
0.752u 0.004s 0:00.75 100.0%    0+0k 0+0io 0pf+0w

From the binary compatibility point of view, everything seems fine to me, because we are still exporting untouched __exchange_and_add and __atomic_add, and using them exactly like we were doing, when necessary (__GTHREADS && __gthread_active_p()).

Pitfalls which I'm not seeing? Suggestions?

Thanks for now,
Paolo.

//////////////////////
Index: include/ext/pool_allocator.h
===================================================================
--- include/ext/pool_allocator.h	(revision 113915)
+++ include/ext/pool_allocator.h	(working copy)
@@ -206,9 +206,9 @@
 	  if (_S_force_new == 0)
 	    {
 	      if (std::getenv("GLIBCXX_FORCE_NEW"))
-		__atomic_add(&_S_force_new, 1);
+		__atomic_add_dispatch(&_S_force_new, 1);
 	      else
-		__atomic_add(&_S_force_new, -1);
+		__atomic_add_dispatch(&_S_force_new, -1);
 	    }
 
 	  const size_t __bytes = __n * sizeof(_Tp);	      
Index: include/ext/rc_string_base.h
===================================================================
--- include/ext/rc_string_base.h	(revision 113915)
+++ include/ext/rc_string_base.h	(working copy)
@@ -1,6 +1,6 @@
 // Reference-counted versatile string base -*- C++ -*-
 
-// Copyright (C) 2005 Free Software Foundation, Inc.
+// Copyright (C) 2005, 2006 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
@@ -134,7 +134,7 @@
 	_CharT*
 	_M_refcopy() throw()
 	{
-	  __atomic_add(&_M_info._M_refcount, 1);
+	  __atomic_add_dispatch(&_M_info._M_refcount, 1);
 	  return _M_refdata();
 	}  // XXX MT
 	
@@ -202,7 +202,8 @@
       void
       _M_dispose()
       {
-	if (__exchange_and_add(&_M_rep()->_M_info._M_refcount, -1) <= 0)
+	if (__exchange_and_add_dispatch(&_M_rep()->_M_info._M_refcount,
+					-1) <= 0)
 	  _M_rep()->_M_destroy(_M_get_allocator());
       }  // XXX MT
 
Index: include/bits/locale_classes.h
===================================================================
--- include/bits/locale_classes.h	(revision 113973)
+++ include/bits/locale_classes.h	(working copy)
@@ -400,12 +400,12 @@
   private:
     inline void
     _M_add_reference() const throw()
-    { __gnu_cxx::__atomic_add(&_M_refcount, 1); }
+    { __gnu_cxx::__atomic_add_dispatch(&_M_refcount, 1); }
 
     inline void
     _M_remove_reference() const throw()
     {
-      if (__gnu_cxx::__exchange_and_add(&_M_refcount, -1) == 1)
+      if (__gnu_cxx::__exchange_and_add_dispatch(&_M_refcount, -1) == 1)
 	{
 	  try
 	    { delete this; }
@@ -505,12 +505,12 @@
 
     inline void
     _M_add_reference() throw()
-    { __gnu_cxx::__atomic_add(&_M_refcount, 1); }
+    { __gnu_cxx::__atomic_add_dispatch(&_M_refcount, 1); }
 
     inline void
     _M_remove_reference() throw()
     {
-      if (__gnu_cxx::__exchange_and_add(&_M_refcount, -1) == 1)
+      if (__gnu_cxx::__exchange_and_add_dispatch(&_M_refcount, -1) == 1)
 	{
 	  try
 	    { delete this; }
Index: include/bits/atomicity.h
===================================================================
--- include/bits/atomicity.h	(revision 113980)
+++ include/bits/atomicity.h	(working copy)
@@ -1,6 +1,6 @@
 // Low-level functions for atomic operations -*- C++ -*-
 
-// Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+// Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
@@ -36,11 +36,12 @@
 #define _GLIBCXX_ATOMICITY_H	1
 
 #include <bits/c++config.h>
+#include <bits/gthr.h>
 #include <bits/atomic_word.h>
 
 _GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx)
 
-  _Atomic_word 
+  _Atomic_word
   __attribute__ ((__unused__))
   __exchange_and_add(volatile _Atomic_word* __mem, int __val);
 
@@ -48,6 +49,54 @@
   __attribute__ ((__unused__))
   __atomic_add(volatile _Atomic_word* __mem, int __val);
 
+  static inline _Atomic_word
+  __exchange_and_add_single(volatile _Atomic_word* __mem, int __val)
+  {
+    _Atomic_word __result = *__mem;
+    *__mem += __val;
+    return __result;
+  }
+
+  static inline void
+  __atomic_add_single(volatile _Atomic_word* __mem, int __val)
+  { *__mem += __val; }
+
+  static inline _Atomic_word
+  __attribute__ ((__unused__))
+  __exchange_and_add_dispatch(volatile _Atomic_word* __mem, int __val)
+  {
+#ifdef __GTHREADS
+
+    if (__gthread_active_p())
+      return __exchange_and_add(__mem, __val);
+    else
+      return __exchange_and_add_single(__mem, __val);
+
+#else
+
+    return __exchange_and_add_single(__mem, __val);
+
+#endif
+  }
+
+  static inline void
+  __attribute__ ((__unused__))
+  __atomic_add_dispatch(volatile _Atomic_word* __mem, int __val)
+  {
+#ifdef __GTHREADS
+
+    if (__gthread_active_p())
+      __atomic_add(__mem, __val);
+    else
+      __atomic_add_single(__mem, __val);
+
+#else
+
+    __atomic_add_single(__mem, __val);
+
+#endif
+  }
+
 _GLIBCXX_END_NAMESPACE
 
 /* Even if the CPU doesn't need a memory barrier, we need to ensure that
Index: include/bits/basic_string.h
===================================================================
--- include/bits/basic_string.h	(revision 113973)
+++ include/bits/basic_string.h	(working copy)
@@ -232,7 +232,8 @@
 #ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
 	  if (__builtin_expect(this != &_S_empty_rep(), false))
 #endif
-	    if (__gnu_cxx::__exchange_and_add(&this->_M_refcount, -1) <= 0)
+	    if (__gnu_cxx::__exchange_and_add_dispatch(&this->_M_refcount,
+						       -1) <= 0)
 	      _M_destroy(__a);
 	}  // XXX MT
 
@@ -245,7 +246,7 @@
 #ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
 	  if (__builtin_expect(this != &_S_empty_rep(), false))
 #endif
-            __gnu_cxx::__atomic_add(&this->_M_refcount, 1);
+            __gnu_cxx::__atomic_add_dispatch(&this->_M_refcount, 1);
 	  return _M_refdata();
 	}  // XXX MT
 
Index: include/bits/ios_base.h
===================================================================
--- include/bits/ios_base.h	(revision 113973)
+++ include/bits/ios_base.h	(working copy)
@@ -476,12 +476,12 @@
       : _M_next(__cb), _M_fn(__fn), _M_index(__index), _M_refcount(0) { }
 
       void
-      _M_add_reference() { __gnu_cxx::__atomic_add(&_M_refcount, 1); }
+      _M_add_reference() { __gnu_cxx::__atomic_add_dispatch(&_M_refcount, 1); }
 
       // 0 => OK to delete.
       int
       _M_remove_reference() 
-      { return __gnu_cxx::__exchange_and_add(&_M_refcount, -1); }
+      { return __gnu_cxx::__exchange_and_add_dispatch(&_M_refcount, -1); }
     };
 
      _Callback_list*	_M_callbacks;
Index: include/tr1/boost_shared_ptr.h
===================================================================
--- include/tr1/boost_shared_ptr.h	(revision 113915)
+++ include/tr1/boost_shared_ptr.h	(working copy)
@@ -130,14 +130,14 @@
   void
   add_ref_copy()
   {
-    __gnu_cxx::__atomic_add(&_M_use_count, 1);
+    __gnu_cxx::__atomic_add_dispatch(&_M_use_count, 1);
   }
 
   void
   add_ref_lock()
   {
     __gnu_cxx::lock lock(_M_mutex);
-    if (__gnu_cxx::__exchange_and_add(&_M_use_count, 1) == 0)
+    if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, 1) == 0)
       {
 	_M_use_count = 0;
 	__throw_bad_weak_ptr();
@@ -147,14 +147,14 @@
   void
   release() // nothrow
   {
-    if (__gnu_cxx::__exchange_and_add(&_M_use_count, -1) == 1)
+    if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
       {
 	dispose();
 #ifdef __GTHREADS	
 	_GLIBCXX_READ_MEM_BARRIER;
 	_GLIBCXX_WRITE_MEM_BARRIER;
 #endif
-	if (__gnu_cxx::__exchange_and_add(&_M_weak_count, -1) == 1)
+	if (__gnu_cxx::__exchange_and_add_dispatch(&_M_weak_count, -1) == 1)
 	  destroy();
       }
   }
@@ -162,13 +162,13 @@
   void
   weak_add_ref() // nothrow
   {
-    __gnu_cxx::__atomic_add(&_M_weak_count, 1);
+    __gnu_cxx::__atomic_add_dispatch(&_M_weak_count, 1);
   }
 
   void
   weak_release() // nothrow
   {
-    if (__gnu_cxx::__exchange_and_add(&_M_weak_count, -1) == 1)
+    if (__gnu_cxx::__exchange_and_add_dispatch(&_M_weak_count, -1) == 1)
       {
 #ifdef __GTHREADS
 	_GLIBCXX_READ_MEM_BARRIER;
Index: src/ios.cc
===================================================================
--- src/ios.cc	(revision 113915)
+++ src/ios.cc	(working copy)
@@ -107,7 +107,7 @@
     // Implementation note: Initialize top to zero to ensure that
     // initialization occurs before main() is started.
     static _Atomic_word _S_top = 0; 
-    return __gnu_cxx::__exchange_and_add(&_S_top, 1) + 4;
+    return __gnu_cxx::__exchange_and_add_dispatch(&_S_top, 1) + 4;
   }
 
   void 
Index: src/locale.cc
===================================================================
--- src/locale.cc	(revision 113915)
+++ src/locale.cc	(working copy)
@@ -433,7 +433,8 @@
 	  _M_index = 1 + f->_M_id();
 	else
 #endif
-	  _M_index = 1 + __gnu_cxx::__exchange_and_add(&_S_refcount, 1);
+	  _M_index = 1 + __gnu_cxx::__exchange_and_add_dispatch(&_S_refcount,
+								1);
       }
     return _M_index - 1;
   }
Index: src/ios_init.cc
===================================================================
--- src/ios_init.cc	(revision 113915)
+++ src/ios_init.cc	(working copy)
@@ -82,7 +82,7 @@
 
   ios_base::Init::Init()
   {
-    if (__gnu_cxx::__exchange_and_add(&_S_refcount, 1) == 0)
+    if (__gnu_cxx::__exchange_and_add_dispatch(&_S_refcount, 1) == 0)
       {
 	// Standard streams default to synced with "C" operations.
 	_S_synced_with_stdio = true;
@@ -121,13 +121,13 @@
 	// streams are not re-initialized with uses of ios_base::Init
 	// besides <iostream> static object, ie just using <ios> with
 	// ios_base::Init objects.
-	__gnu_cxx::__atomic_add(&_S_refcount, 1);
+	__gnu_cxx::__atomic_add_dispatch(&_S_refcount, 1);
       }
   }
 
   ios_base::Init::~Init()
   {
-    if (__gnu_cxx::__exchange_and_add(&_S_refcount, -1) == 2)
+    if (__gnu_cxx::__exchange_and_add_dispatch(&_S_refcount, -1) == 2)
       {
 	// Catch any exceptions thrown by basic_ostream::flush()
 	try

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]