This is the mail archive of the
libstdc++@gcc.gnu.org
mailing list for the libstdc++ project.
[Patch preview] libstdc++/24704
- From: Paolo Carlini <pcarlini at suse dot de>
- To: libstdc++ <libstdc++ at gcc dot gnu dot org>
- Cc: ahu at ds9a dot nl
- Date: Mon, 22 May 2006 18:47:32 +0200
- Subject: [Patch preview] libstdc++/24704
Hi,
upon Andrew insistence ;) I decided to return to this issue, and well,
apparently now I can imagine how to deal with it. An essential part of
the idea is the observation (originally made by Rth) that atomics are in
general *very* expensive, apparently very much so compared to
__gthread_active_p(), which is the important point of reference here.
Thus the below which I'm going to carefully regtest and performance test
on a few arches (besides x86-linux, which is fine), but I want to show
you early (maybe Bert can help with the tests, for example... and
*sorry* for not believing at all in the very feasibility of implementing
something similar).
Most of the patch consistes of mechanical renames, the "beef" being in
bits/atomicity.h where there are new dispatches to appropriate code:
either out of line, "full blown", __exchange_and_add and __atomic_add or
inline, trivial, __exchange_and_add_single and __atomic_add_single.
To give you an idea about the performance, a stupid program like this:
int main()
{
string a("1");
for (int i = 0; i < 10000000; ++i)
{
string ss1(a);
string ss2(ss1);
string ss3(ss2);
string ss4(ss3);
string ss5(ss4);
}
}
which does not involve threads, of course, goes (on my ready at hand
P4-2400):
mainline
6.380u 0.004s 0:06.38 100.0% 0+0k 0+0io 0pf+0w
patched
0.752u 0.004s 0:00.75 100.0% 0+0k 0+0io 0pf+0w
From the binary compatibility point of view, everything seems fine to
me, because we are still exporting untouched __exchange_and_add and
__atomic_add, and using them exactly like we were doing, when necessary
(__GTHREADS && __gthread_active_p()).
Pitfalls which I'm not seeing? Suggestions?
Thanks for now,
Paolo.
//////////////////////
Index: include/ext/pool_allocator.h
===================================================================
--- include/ext/pool_allocator.h (revision 113915)
+++ include/ext/pool_allocator.h (working copy)
@@ -206,9 +206,9 @@
if (_S_force_new == 0)
{
if (std::getenv("GLIBCXX_FORCE_NEW"))
- __atomic_add(&_S_force_new, 1);
+ __atomic_add_dispatch(&_S_force_new, 1);
else
- __atomic_add(&_S_force_new, -1);
+ __atomic_add_dispatch(&_S_force_new, -1);
}
const size_t __bytes = __n * sizeof(_Tp);
Index: include/ext/rc_string_base.h
===================================================================
--- include/ext/rc_string_base.h (revision 113915)
+++ include/ext/rc_string_base.h (working copy)
@@ -1,6 +1,6 @@
// Reference-counted versatile string base -*- C++ -*-
-// Copyright (C) 2005 Free Software Foundation, Inc.
+// Copyright (C) 2005, 2006 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
@@ -134,7 +134,7 @@
_CharT*
_M_refcopy() throw()
{
- __atomic_add(&_M_info._M_refcount, 1);
+ __atomic_add_dispatch(&_M_info._M_refcount, 1);
return _M_refdata();
} // XXX MT
@@ -202,7 +202,8 @@
void
_M_dispose()
{
- if (__exchange_and_add(&_M_rep()->_M_info._M_refcount, -1) <= 0)
+ if (__exchange_and_add_dispatch(&_M_rep()->_M_info._M_refcount,
+ -1) <= 0)
_M_rep()->_M_destroy(_M_get_allocator());
} // XXX MT
Index: include/bits/locale_classes.h
===================================================================
--- include/bits/locale_classes.h (revision 113973)
+++ include/bits/locale_classes.h (working copy)
@@ -400,12 +400,12 @@
private:
inline void
_M_add_reference() const throw()
- { __gnu_cxx::__atomic_add(&_M_refcount, 1); }
+ { __gnu_cxx::__atomic_add_dispatch(&_M_refcount, 1); }
inline void
_M_remove_reference() const throw()
{
- if (__gnu_cxx::__exchange_and_add(&_M_refcount, -1) == 1)
+ if (__gnu_cxx::__exchange_and_add_dispatch(&_M_refcount, -1) == 1)
{
try
{ delete this; }
@@ -505,12 +505,12 @@
inline void
_M_add_reference() throw()
- { __gnu_cxx::__atomic_add(&_M_refcount, 1); }
+ { __gnu_cxx::__atomic_add_dispatch(&_M_refcount, 1); }
inline void
_M_remove_reference() throw()
{
- if (__gnu_cxx::__exchange_and_add(&_M_refcount, -1) == 1)
+ if (__gnu_cxx::__exchange_and_add_dispatch(&_M_refcount, -1) == 1)
{
try
{ delete this; }
Index: include/bits/atomicity.h
===================================================================
--- include/bits/atomicity.h (revision 113980)
+++ include/bits/atomicity.h (working copy)
@@ -1,6 +1,6 @@
// Low-level functions for atomic operations -*- C++ -*-
-// Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+// Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
@@ -36,11 +36,12 @@
#define _GLIBCXX_ATOMICITY_H 1
#include <bits/c++config.h>
+#include <bits/gthr.h>
#include <bits/atomic_word.h>
_GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx)
- _Atomic_word
+ _Atomic_word
__attribute__ ((__unused__))
__exchange_and_add(volatile _Atomic_word* __mem, int __val);
@@ -48,6 +49,54 @@
__attribute__ ((__unused__))
__atomic_add(volatile _Atomic_word* __mem, int __val);
+ static inline _Atomic_word
+ __exchange_and_add_single(volatile _Atomic_word* __mem, int __val)
+ {
+ _Atomic_word __result = *__mem;
+ *__mem += __val;
+ return __result;
+ }
+
+ static inline void
+ __atomic_add_single(volatile _Atomic_word* __mem, int __val)
+ { *__mem += __val; }
+
+ static inline _Atomic_word
+ __attribute__ ((__unused__))
+ __exchange_and_add_dispatch(volatile _Atomic_word* __mem, int __val)
+ {
+#ifdef __GTHREADS
+
+ if (__gthread_active_p())
+ return __exchange_and_add(__mem, __val);
+ else
+ return __exchange_and_add_single(__mem, __val);
+
+#else
+
+ return __exchange_and_add_single(__mem, __val);
+
+#endif
+ }
+
+ static inline void
+ __attribute__ ((__unused__))
+ __atomic_add_dispatch(volatile _Atomic_word* __mem, int __val)
+ {
+#ifdef __GTHREADS
+
+ if (__gthread_active_p())
+ __atomic_add(__mem, __val);
+ else
+ __atomic_add_single(__mem, __val);
+
+#else
+
+ __atomic_add_single(__mem, __val);
+
+#endif
+ }
+
_GLIBCXX_END_NAMESPACE
/* Even if the CPU doesn't need a memory barrier, we need to ensure that
Index: include/bits/basic_string.h
===================================================================
--- include/bits/basic_string.h (revision 113973)
+++ include/bits/basic_string.h (working copy)
@@ -232,7 +232,8 @@
#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
if (__builtin_expect(this != &_S_empty_rep(), false))
#endif
- if (__gnu_cxx::__exchange_and_add(&this->_M_refcount, -1) <= 0)
+ if (__gnu_cxx::__exchange_and_add_dispatch(&this->_M_refcount,
+ -1) <= 0)
_M_destroy(__a);
} // XXX MT
@@ -245,7 +246,7 @@
#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
if (__builtin_expect(this != &_S_empty_rep(), false))
#endif
- __gnu_cxx::__atomic_add(&this->_M_refcount, 1);
+ __gnu_cxx::__atomic_add_dispatch(&this->_M_refcount, 1);
return _M_refdata();
} // XXX MT
Index: include/bits/ios_base.h
===================================================================
--- include/bits/ios_base.h (revision 113973)
+++ include/bits/ios_base.h (working copy)
@@ -476,12 +476,12 @@
: _M_next(__cb), _M_fn(__fn), _M_index(__index), _M_refcount(0) { }
void
- _M_add_reference() { __gnu_cxx::__atomic_add(&_M_refcount, 1); }
+ _M_add_reference() { __gnu_cxx::__atomic_add_dispatch(&_M_refcount, 1); }
// 0 => OK to delete.
int
_M_remove_reference()
- { return __gnu_cxx::__exchange_and_add(&_M_refcount, -1); }
+ { return __gnu_cxx::__exchange_and_add_dispatch(&_M_refcount, -1); }
};
_Callback_list* _M_callbacks;
Index: include/tr1/boost_shared_ptr.h
===================================================================
--- include/tr1/boost_shared_ptr.h (revision 113915)
+++ include/tr1/boost_shared_ptr.h (working copy)
@@ -130,14 +130,14 @@
void
add_ref_copy()
{
- __gnu_cxx::__atomic_add(&_M_use_count, 1);
+ __gnu_cxx::__atomic_add_dispatch(&_M_use_count, 1);
}
void
add_ref_lock()
{
__gnu_cxx::lock lock(_M_mutex);
- if (__gnu_cxx::__exchange_and_add(&_M_use_count, 1) == 0)
+ if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, 1) == 0)
{
_M_use_count = 0;
__throw_bad_weak_ptr();
@@ -147,14 +147,14 @@
void
release() // nothrow
{
- if (__gnu_cxx::__exchange_and_add(&_M_use_count, -1) == 1)
+ if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
{
dispose();
#ifdef __GTHREADS
_GLIBCXX_READ_MEM_BARRIER;
_GLIBCXX_WRITE_MEM_BARRIER;
#endif
- if (__gnu_cxx::__exchange_and_add(&_M_weak_count, -1) == 1)
+ if (__gnu_cxx::__exchange_and_add_dispatch(&_M_weak_count, -1) == 1)
destroy();
}
}
@@ -162,13 +162,13 @@
void
weak_add_ref() // nothrow
{
- __gnu_cxx::__atomic_add(&_M_weak_count, 1);
+ __gnu_cxx::__atomic_add_dispatch(&_M_weak_count, 1);
}
void
weak_release() // nothrow
{
- if (__gnu_cxx::__exchange_and_add(&_M_weak_count, -1) == 1)
+ if (__gnu_cxx::__exchange_and_add_dispatch(&_M_weak_count, -1) == 1)
{
#ifdef __GTHREADS
_GLIBCXX_READ_MEM_BARRIER;
Index: src/ios.cc
===================================================================
--- src/ios.cc (revision 113915)
+++ src/ios.cc (working copy)
@@ -107,7 +107,7 @@
// Implementation note: Initialize top to zero to ensure that
// initialization occurs before main() is started.
static _Atomic_word _S_top = 0;
- return __gnu_cxx::__exchange_and_add(&_S_top, 1) + 4;
+ return __gnu_cxx::__exchange_and_add_dispatch(&_S_top, 1) + 4;
}
void
Index: src/locale.cc
===================================================================
--- src/locale.cc (revision 113915)
+++ src/locale.cc (working copy)
@@ -433,7 +433,8 @@
_M_index = 1 + f->_M_id();
else
#endif
- _M_index = 1 + __gnu_cxx::__exchange_and_add(&_S_refcount, 1);
+ _M_index = 1 + __gnu_cxx::__exchange_and_add_dispatch(&_S_refcount,
+ 1);
}
return _M_index - 1;
}
Index: src/ios_init.cc
===================================================================
--- src/ios_init.cc (revision 113915)
+++ src/ios_init.cc (working copy)
@@ -82,7 +82,7 @@
ios_base::Init::Init()
{
- if (__gnu_cxx::__exchange_and_add(&_S_refcount, 1) == 0)
+ if (__gnu_cxx::__exchange_and_add_dispatch(&_S_refcount, 1) == 0)
{
// Standard streams default to synced with "C" operations.
_S_synced_with_stdio = true;
@@ -121,13 +121,13 @@
// streams are not re-initialized with uses of ios_base::Init
// besides <iostream> static object, ie just using <ios> with
// ios_base::Init objects.
- __gnu_cxx::__atomic_add(&_S_refcount, 1);
+ __gnu_cxx::__atomic_add_dispatch(&_S_refcount, 1);
}
}
ios_base::Init::~Init()
{
- if (__gnu_cxx::__exchange_and_add(&_S_refcount, -1) == 2)
+ if (__gnu_cxx::__exchange_and_add_dispatch(&_S_refcount, -1) == 2)
{
// Catch any exceptions thrown by basic_ostream::flush()
try