This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch (preview)] libstdc++/24469


Hi,

the below is what I'm currently testing: seems ok from the correctness point of view on x86/x86_64/ia64. Likewise, the performance testsuite seems ok, really very, very close to the current numbers on an x86 machine I can 100% control, more variance on some multiway x86_64 machines, but still ok, I think. I'm attaching the outputs and, in any case, I will double check everything.

Anyway, the idea basically is due to Richard (and as such will be properly acknowledged ;) : two different _M_used, one not involved in concurrent accesses, the numbers merged at deallocation time.

My "interpretation" of it boils down to *extending* instead the memory pointed by the very same _M_used, to avoid changing the class-layout (see _M_initialize). Then, any thread can increase his own normal counter stored in the first half of _M_used (no atomics needed); every thread can also decrease his own _M_used counter, *but* if a thread want to decrease the counter of another thread can't do that directly, has to *increase* instead the corresponding atomic counter stored in the second half of _M_used (I call this second part of _M_used, the reclaimed part). Eventually, or, at the beginning of _M_reclaim_block, all the info above is merged and updated.

Note that _M_reserve_block - where we are only increasing the counters in the first half of _M_used, exactly as we are currently doing - is completely unchanged, this is important for the binary compatibility because those operations are inlined (_M_adjust_freelist).

I think everything is fine... Otherwise, please stop me before it's too late! :)

Paolo.

///////////////////
Index: include/ext/mt_allocator.h
===================================================================
--- include/ext/mt_allocator.h	(revision 116606)
+++ include/ext/mt_allocator.h	(working copy)
@@ -298,8 +298,14 @@
 
 	// An "array" of counters used to keep track of the amount of
 	// blocks that are on the freelist/used for each thread id.
-	// Memory to these "arrays" is allocated in _S_initialize() for
-	// _S_max_threads + global pool 0.
+	// - Note that the second half of the allocated _M_used "array"
+	//   actually hosts (atomic) counters of reclaimed blocks:  at
+	//   deallocation time (in _M_reclaim_block) those numbers are
+	//   subtracted from the first ones to obtain the actual size
+	//   of the "working set" of the given thread, used by the
+	//   removal heuristics.
+	// - Memory to these "arrays" is allocated in _S_initialize()
+	//   for _S_max_threads + global pool 0.
 	size_t*				_M_free;
 	size_t*			        _M_used;
 	
Index: src/mt_allocator.cc
===================================================================
--- src/mt_allocator.cc	(revision 116604)
+++ src/mt_allocator.cc	(working copy)
@@ -34,6 +34,7 @@
 #include <bits/c++config.h>
 #include <bits/concurrence.h>
 #include <ext/mt_allocator.h>
+#include <cstring>
 
 namespace
 {
@@ -263,17 +264,35 @@
 	// number of records is "high enough".
 	const size_t __thread_id = _M_get_thread_id();
 	const _Tune& __options = _M_get_options();	
-	const unsigned long __limit = 100 * (_M_bin_size - __which)
-		                      * __options._M_freelist_headroom;
+	const size_t __limit = (100 * (_M_bin_size - __which)
+				* __options._M_freelist_headroom);
 
-	unsigned long __remove = __bin._M_free[__thread_id];
+	size_t __remove = __bin._M_free[__thread_id];
 	__remove *= __options._M_freelist_headroom;
-	if (__remove >= __bin._M_used[__thread_id])
-	  __remove -= __bin._M_used[__thread_id];
+
+	// NB: We assume that reads of _Atomic_words are atomic.
+	const size_t __max_threads = __options._M_max_threads + 1;
+	_Atomic_word* const __reclaimed_base =
+	  reinterpret_cast<_Atomic_word*>(__bin._M_used + __max_threads);
+	const _Atomic_word __reclaimed = __reclaimed_base[__thread_id];
+
+	// NB: We delay the actual update of _M_used[__thread_id] to
+	// the body of the conditional below, to spare an atomic. 
+	// Note that when we actually enter it, __reclaimed is <= of
+	// the actual current __reclaimed_base[__thread_id], thus safe,
+	// because, between the read above and the atomic, the other
+	// threads can only have increased the latter (in the else 30
+	// lines below).
+	const size_t __used = __bin._M_used[__thread_id] - __reclaimed;
+	if (__remove >= __used)
+	  __remove -= __used;
 	else
 	  __remove = 0;
 	if (__remove > __limit && __remove > __bin._M_free[__thread_id])
 	  {
+	    __bin._M_used[__thread_id] -= __reclaimed;
+	    __atomic_add(&__reclaimed_base[__thread_id], -__reclaimed);
+
 	    _Block_record* __first = __bin._M_first[__thread_id];
 	    _Block_record* __tmp = __first;
 	    __remove /= __options._M_freelist_headroom;
@@ -292,8 +311,11 @@
 
 	// Return this block to our list and update counters and
 	// owner id as needed.
-	--__bin._M_used[__block->_M_thread_id];
-	
+	if (__block->_M_thread_id == __thread_id)
+	  --__bin._M_used[__thread_id];
+	else
+	  __atomic_add(&__reclaimed_base[__block->_M_thread_id], 1);
+
 	__block->_M_next = __bin._M_first[__thread_id];
 	__bin._M_first[__thread_id] = __block;
 	
@@ -533,14 +555,19 @@
 	  {
 	    _Bin_record& __bin = _M_bin[__n];
 	    __v = ::operator new(sizeof(_Block_record*) * __max_threads);
+	    std::memset(__v, 0, sizeof(_Block_record*) * __max_threads);    
 	    __bin._M_first = static_cast<_Block_record**>(__v);
 
 	    __bin._M_address = NULL;
 
 	    __v = ::operator new(sizeof(size_t) * __max_threads);
+	    std::memset(__v, 0, sizeof(size_t) * __max_threads);	    	    
 	    __bin._M_free = static_cast<size_t*>(__v);
-	      
-	    __v = ::operator new(sizeof(size_t) * __max_threads);
+
+	    __v = ::operator new(sizeof(size_t) * __max_threads
+				 + sizeof(_Atomic_word) * __max_threads);
+	    std::memset(__v, 0, (sizeof(size_t) * __max_threads
+				 + sizeof(_Atomic_word) * __max_threads));
 	    __bin._M_used = static_cast<size_t*>(__v);
 	      
 	    __v = ::operator new(sizeof(__gthread_mutex_t));
@@ -555,12 +582,6 @@
 #else
 	    { __GTHREAD_MUTEX_INIT_FUNCTION(__bin._M_mutex); }
 #endif
-	    for (size_t __threadn = 0; __threadn < __max_threads; ++__threadn)
-	      {
-		__bin._M_first[__threadn] = NULL;
-		__bin._M_free[__threadn] = 0;
-		__bin._M_used[__threadn] = 0;
-	      }
 	  }
       }
     else
@@ -729,16 +750,21 @@
 	  {
 	    _Bin_record& __bin = _M_bin[__n];
 	    __v = ::operator new(sizeof(_Block_record*) * __max_threads);
+	    std::memset(__v, 0, sizeof(_Block_record*) * __max_threads);
 	    __bin._M_first = static_cast<_Block_record**>(__v);
 
 	    __bin._M_address = NULL;
 
 	    __v = ::operator new(sizeof(size_t) * __max_threads);
+	    std::memset(__v, 0, sizeof(size_t) * __max_threads);
 	    __bin._M_free = static_cast<size_t*>(__v);
 	      
-	    __v = ::operator new(sizeof(size_t) * __max_threads);
+	    __v = ::operator new(sizeof(size_t) * __max_threads + 
+				 sizeof(_Atomic_word) * __max_threads);
+	    std::memset(__v, 0, (sizeof(size_t) * __max_threads
+				 + sizeof(_Atomic_word) * __max_threads));
 	    __bin._M_used = static_cast<size_t*>(__v);
-	      
+
 	    __v = ::operator new(sizeof(__gthread_mutex_t));
 	    __bin._M_mutex = static_cast<__gthread_mutex_t*>(__v);
 	      
@@ -751,12 +777,6 @@
 #else
 	    { __GTHREAD_MUTEX_INIT_FUNCTION(__bin._M_mutex); }
 #endif
-	    for (size_t __threadn = 0; __threadn < __max_threads; ++__threadn)
-	      {
-		__bin._M_first[__threadn] = NULL;
-		__bin._M_free[__threadn] = 0;
-		__bin._M_used[__threadn] = 0;
-	      }
 	  }
       }
     else

Attachment: libstdc++-performance.sum-baseline-x86.bz2
Description: Unix tar archive

Attachment: libstdc++-performance.sum-patched-x86.bz2
Description: Unix tar archive

Attachment: libstdc++-performance.sum-baseline-x86_64.bz2
Description: Unix tar archive

Attachment: libstdc++-performance.sum-patched-x86_64.bz2
Description: Unix tar archive


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]