This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch] Nathan's improvements to basic_string


Hi,

below you will find attached the long standing patch
contributed by Nathan, rediffed against mainline, which
finally I and Loren got around to test for performance
(besides the obvious regression testing).

Indeed, the improvements are easily measurable even on
single processor machines, like my P4-2400/linux.

Consider this testcase:

#include <string>
main()
{
 for (int i = 0; i < 1000; ++i) {
   std::string a[100000];
 }
}

Current mainline:
-O0: 11.900u 0.000s 0:11.98 99.3%    0+0k 0+0io 204pf+0w
-O2: 10.420u 0.010s 0:10.51 99.2%    0+0k 0+0io 204pf+0w

Current + Nathan's:
-O0: 3.310u 0.000s 0:03.32 99.6%     0+0k 0+0io 201pf+0w
-O2: 0.350u 0.000s 0:00.35 100.0%    0+0k 0+0io 201pf+0w

As you can see, besides a great improvement at -O0, it
becomse also easier for the compiler to optimize away
dead instances (I would appreciate some feedback from
Nathan's about this additional nice feature)

Everything considered we believe with Loren (more expert
than me of MT topics) that the patch should go in as soon
as possible, but nevertheless will wait some additional
24 hours, just in case...

Thanks,
Paolo.

////////////
2003-06-12  Nathan C. Myers <ncm-nospam@cantrip.org>

	Avoid multi-processor bus contention on increment/decrement-and-
	test of the reference count in the empty-string object, by comparing 
        addresses first, and never touching the reference count of the empty-
        string object.  
	* include/bits/basic_string.h:
	(_S_empty_rep_storage): Move into basic_string<>::_Rep for use by its
	members.
	(_Rep::_S_empty_rep()): New accessor. 
	(_Rep::_M_length, _Rep::_M_capacity, _Rep::_M_references): Move to
	a base class _Rep_base.
	(_Rep::_M_dispose, _Rep::_M_refcopy): Check for the empty string.
	(basic_string()): Change to use _M_refdata() in place of _M_refcopy(),
	since no longer must increment its refcount.
	* include/bits/basic_string.tcc:
	(_Rep::_M_destroy, _M_leak_hard): Check for the empty string and 
        return immediately.  The former might be unnecessary.  The latter 
        prevents begin() and end() from cloning it unnecessarily.
	(_S_construct(_InIterator, _InIterator, const _Alloc&, input_iterator_tag),
	_S_construct(_InIterator, _InIterator, const _Alloc&, forward_iterator_tag),
	_S_construct(size_type, _CharT, const _Alloc&)): Change to use _M_refdata()
	in place of _M_refcopy().
	(_M_mutate): Check for the empty string and treat it as shared.
        This is necessary here because _M_mutate is sometimes called with
        all-zero arguments; in all other uses of _M_is_shared, the test comes
        out right anyhow.
diff -urN libstdc++-v3-orig/include/bits/basic_string.h libstdc++-v3/include/bits/basic_string.h
--- libstdc++-v3-orig/include/bits/basic_string.h	2003-06-10 23:59:29.000000000 +0200
+++ libstdc++-v3/include/bits/basic_string.h	2003-06-12 13:51:36.000000000 +0200
@@ -1,6 +1,6 @@
 // Components for manipulating sequences of characters -*- C++ -*-
 
-// Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002
+// Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
 // Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
@@ -140,7 +140,15 @@
       //   4. All fields==0 is an empty string, given the extra storage
       //      beyond-the-end for a null terminator; thus, the shared
       //      empty string representation needs no constructor.
-      struct _Rep
+
+      struct _Rep_base
+      {
+	size_type 		_M_length;
+	size_type 		_M_capacity;
+	_Atomic_word		_M_references;
+      };
+
+      struct _Rep : _Rep_base
       {
 	// Types:
 	typedef typename _Alloc::template rebind<char>::other _Raw_bytes_alloc;
@@ -157,29 +165,33 @@
 	// npos = sizeof(_Rep) + (m * sizeof(_CharT)) + sizeof(_CharT)
 	// Solving for m:
 	// m = ((npos - sizeof(_Rep))/sizeof(CharT)) - 1
-	// In addition, this implementation quarters this ammount.
+	// In addition, this implementation quarters this amount.
 	static const size_type 	_S_max_size;
 	static const _CharT 	_S_terminal;
 
-	size_type 		_M_length;
-	size_type 		_M_capacity;
-	_Atomic_word		_M_references;
-
+	// The following storage is init'd to 0 by the linker, resulting
+        // (carefully) in an empty string with one reference.
+        static size_type _S_empty_rep_storage[];
+
+        static _Rep& 
+        _S_empty_rep()
+        { return *reinterpret_cast<_Rep*>(&_S_empty_rep_storage); }
+ 
         bool
 	_M_is_leaked() const
-        { return _M_references < 0; }
+        { return this->_M_references < 0; }
 
         bool
 	_M_is_shared() const
-        { return _M_references > 0; }
+        { return this->_M_references > 0; }
 
         void
 	_M_set_leaked()
-        { _M_references = -1; }
+        { this->_M_references = -1; }
 
         void
 	_M_set_sharable()
-        { _M_references = 0; }
+        { this->_M_references = 0; }
 
 	_CharT*
 	_M_refdata() throw()
@@ -203,8 +215,9 @@
 	void
 	_M_dispose(const _Alloc& __a)
 	{
-	  if (__exchange_and_add(&_M_references, -1) <= 0)
-	    _M_destroy(__a);
+	  if (__builtin_expect(this != &_S_empty_rep(), false))
+	    if (__exchange_and_add(&this->_M_references, -1) <= 0)
+	      _M_destroy(__a);
 	}  // XXX MT
 
 	void
@@ -213,7 +226,8 @@
 	_CharT*
 	_M_refcopy() throw()
 	{
-	  __atomic_add(&_M_references, 1);
+	  if (__builtin_expect(this != &_S_empty_rep(), false))
+            __atomic_add(&this->_M_references, 1);
 	  return _M_refdata();
 	}  // XXX MT
 
@@ -240,10 +254,6 @@
       // Data Members (private):
       mutable _Alloc_hider 	_M_dataplus;
 
-      // The following storage is init'd to 0 by the linker, resulting
-      // (carefully) in an empty string with one reference.
-      static size_type _S_empty_rep_storage[(sizeof(_Rep) + sizeof(_CharT) + sizeof(size_type) - 1)/sizeof(size_type)];
-
       _CharT*
       _M_data() const
       { return  _M_dataplus._M_p; }
@@ -322,7 +332,7 @@
 
       static _Rep&
       _S_empty_rep()
-      { return *reinterpret_cast<_Rep*>(&_S_empty_rep_storage); }
+      { return _Rep::_S_empty_rep(); }
 
     public:
       // Construct/copy/destroy:
@@ -859,7 +869,7 @@
   template<typename _CharT, typename _Traits, typename _Alloc>
     inline basic_string<_CharT, _Traits, _Alloc>::
     basic_string()
-    : _M_dataplus(_S_empty_rep()._M_refcopy(), _Alloc()) { }
+    : _M_dataplus(_S_empty_rep()._M_refdata(), _Alloc()) { }
 
   // operator+
   template<typename _CharT, typename _Traits, typename _Alloc>
diff -urN libstdc++-v3-orig/include/bits/basic_string.tcc libstdc++-v3/include/bits/basic_string.tcc
--- libstdc++-v3-orig/include/bits/basic_string.tcc	2003-06-10 23:59:29.000000000 +0200
+++ libstdc++-v3/include/bits/basic_string.tcc	2003-06-12 20:36:01.000000000 +0200
@@ -48,7 +48,7 @@
   template<typename _CharT, typename _Traits, typename _Alloc>
     const typename basic_string<_CharT, _Traits, _Alloc>::size_type 
     basic_string<_CharT, _Traits, _Alloc>::
-    _Rep::_S_max_size = (((npos - sizeof(_Rep))/sizeof(_CharT)) - 1) / 4;
+    _Rep::_S_max_size = (((npos - sizeof(_Rep_base))/sizeof(_CharT)) - 1) / 4;
 
   template<typename _CharT, typename _Traits, typename _Alloc>
     const _CharT 
@@ -63,8 +63,9 @@
   // at static init time (before static ctors are run).
   template<typename _CharT, typename _Traits, typename _Alloc>
     typename basic_string<_CharT, _Traits, _Alloc>::size_type
-    basic_string<_CharT, _Traits, _Alloc>::_S_empty_rep_storage[
-    (sizeof(_Rep) + sizeof(_CharT) + sizeof(size_type) - 1)/sizeof(size_type)];
+    basic_string<_CharT, _Traits, _Alloc>::_Rep::_S_empty_rep_storage[
+    (sizeof(_Rep_base) + sizeof(_CharT) + sizeof(size_type) - 1) /
+      sizeof(size_type)];
 
   // NB: This is the special case for Input Iterators, used in
   // istreambuf_iterators, etc.
@@ -78,7 +79,7 @@
 		   input_iterator_tag)
       {
 	if (__beg == __end && __a == _Alloc())
-	  return _S_empty_rep()._M_refcopy();
+	  return _S_empty_rep()._M_refdata();
 	// Avoid reallocation for common case.
 	_CharT __buf[100];
 	size_type __i = 0;
@@ -138,7 +139,7 @@
 		   forward_iterator_tag)
       {
 	if (__beg == __end && __a == _Alloc())
-	  return _S_empty_rep()._M_refcopy();
+	  return _S_empty_rep()._M_refdata();
 
 	// NB: Not required, but considered best practice.
 	if (__builtin_expect(__beg == _InIterator(), 0))
@@ -167,7 +168,7 @@
     _S_construct(size_type __n, _CharT __c, const _Alloc& __a)
     {
       if (__n == 0 && __a == _Alloc())
-	return _S_empty_rep()._M_refcopy();
+	return _S_empty_rep()._M_refdata();
 
       // Check for out_of_range and length_error exceptions.
       _Rep* __r = _Rep::_S_create(__n, __a);
@@ -242,7 +243,8 @@
 
   template<typename _CharT, typename _Traits, typename _Alloc>
     basic_string<_CharT, _Traits, _Alloc>&
-    basic_string<_CharT, _Traits, _Alloc>::assign(const basic_string& __str)
+    basic_string<_CharT, _Traits, _Alloc>::
+    assign(const basic_string& __str)
     {
       if (_M_rep() != __str._M_rep())
 	{
@@ -371,7 +373,10 @@
     basic_string<_CharT, _Traits, _Alloc>::_Rep::
     _M_destroy(const _Alloc& __a) throw ()
     {
-      const size_type __size = sizeof(_Rep) + (_M_capacity + 1) * sizeof(_CharT);
+      if (this == &_S_empty_rep())
+        return;
+      const size_type __size = sizeof(_Rep_base) +
+	                       (this->_M_capacity + 1) * sizeof(_CharT);
       _Raw_bytes_alloc(__a).deallocate(reinterpret_cast<char*>(this), __size);
     }
 
@@ -379,6 +384,8 @@
     void
     basic_string<_CharT, _Traits, _Alloc>::_M_leak_hard()
     {
+      if (_M_rep() == &_S_empty_rep())
+        return;
       if (_M_rep()->_M_is_shared()) 
 	_M_mutate(0, 0, 0);
       _M_rep()->_M_set_leaked();
@@ -400,7 +407,8 @@
       const _CharT*        __src = _M_data()  + __pos + __len1;
       const size_type __how_much = __old_size - __pos - __len1;
       
-      if (_M_rep()->_M_is_shared() || __new_size > capacity())
+      if (_M_rep() == &_S_empty_rep()
+	  || _M_rep()->_M_is_shared() || __new_size > capacity())
 	{
 	  // Must reallocate.
 	  allocator_type __a = get_allocator();
@@ -434,7 +442,7 @@
 	    }
 	  _M_rep()->_M_dispose(__a);
 	  _M_data(__r->_M_refdata());
-      }
+	}
       else if (__how_much && __len1 != __len2)
 	{
 	  // Work in-place
@@ -443,7 +451,7 @@
       _M_rep()->_M_set_sharable();
       _M_rep()->_M_length = __new_size;
       _M_data()[__new_size] = _Rep::_S_terminal; // grrr. (per 21.3.4)
-    // You cannot leave those LWG people alone for a second.
+      // You cannot leave those LWG people alone for a second.
     }
   
   template<typename _CharT, typename _Traits, typename _Alloc>
@@ -505,7 +513,7 @@
       // NB: Need an array of char_type[__capacity], plus a
       // terminating null char_type() element, plus enough for the
       // _Rep data structure. Whew. Seemingly so needy, yet so elemental.
-      size_t __size = (__capacity + 1) * sizeof(_CharT) + sizeof(_Rep);
+      size_t __size = (__capacity + 1) * sizeof(_CharT) + sizeof(_Rep_base);
 
       // The standard places no restriction on allocating more memory
       // than is strictly needed within this layer at the moment or as
@@ -538,7 +546,7 @@
 	    (__pagesize - ((__size + __malloc_header_size) % __pagesize))
 	    % __pagesize;
 	  __capacity += __extra / sizeof(_CharT);
-	  __size = (__capacity + 1) * sizeof(_CharT) + sizeof(_Rep);
+	  __size = (__capacity + 1) * sizeof(_CharT) + sizeof(_Rep_base);
 	}
       else if (__size > __subpagesize)
 	{
@@ -546,7 +554,7 @@
 	    (__subpagesize - ((__size + __malloc_header_size) % __subpagesize))
 	    % __subpagesize;
 	  __capacity += __extra / sizeof(_CharT);
-	  __size = (__capacity + 1) * sizeof(_CharT) + sizeof(_Rep);
+	  __size = (__capacity + 1) * sizeof(_CharT) + sizeof(_Rep_base);
 	}
 
       // NB: Might throw, but no worries about a leak, mate: _Rep()
@@ -565,33 +573,37 @@
     _M_clone(const _Alloc& __alloc, size_type __res)
     {
       // Requested capacity of the clone.
-      const size_type __requested_cap = _M_length + __res;
+      const size_type __requested_cap = this->_M_length + __res;
       // See above (_S_create) for the meaning and value of these constants.
       const size_type __pagesize = 4096;
       const size_type __malloc_header_size = 4 * sizeof (void*);
       // The biggest string which fits in a memory page.
       const size_type __page_capacity =
-        (__pagesize - __malloc_header_size - sizeof(_Rep) - sizeof(_CharT))
+        (__pagesize - __malloc_header_size - sizeof(_Rep_base) - sizeof(_CharT))
         / sizeof(_CharT);
       _Rep* __r;
-      if (__requested_cap > _M_capacity && __requested_cap > __page_capacity)
+      if (__requested_cap > this->_M_capacity
+	  && __requested_cap > __page_capacity)
         // Growing exponentially.
-        __r = _Rep::_S_create(__requested_cap > 2*_M_capacity ?
-                              __requested_cap : 2*_M_capacity, __alloc);
+        __r = _Rep::_S_create(__requested_cap > 2*this->_M_capacity ?
+                              __requested_cap : 2*this->_M_capacity, __alloc);
       else
         __r = _Rep::_S_create(__requested_cap, __alloc);
       
-      if (_M_length)
+      if (this->_M_length)
 	{
 	  try 
-	    { traits_type::copy(__r->_M_refdata(), _M_refdata(), _M_length); }
+	    {
+	      traits_type::copy(__r->_M_refdata(), _M_refdata(),
+				this->_M_length);
+	    }
 	  catch(...)  
 	    { 
 	      __r->_M_destroy(__alloc); 
 	      __throw_exception_again;
 	    }
 	}
-      __r->_M_length = _M_length;
+      __r->_M_length = this->_M_length;
       return __r->_M_refdata();
     }
   

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]