This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: std::unordered_map and the % operator


I took some time to run a bench for the usage of libdivide (or equivalent) into std unordered containers.

To do so I had to patch the _Hashtable type so that I could inject in the implementation a libdivide based denominator used for modulus computation. The denominator type you will find in the patch can be constructed or assigned from a std::size_t and is also convertible to a std::size_t so that it doesn't change anything for the _Hashtable type.

The results are indeed interesting, in main.cc you will find the code I have run to produce following figures:

main.cc set<int> libdivide 50000000 insertions 594r 451u 142s -2102431760mem 0pf main.cc std::unordered_set<int> 50000000 insertions 619r 565u 53s -2102431728mem 0pf

First number is real time which is better, it is the total of second number, the user time, which is much better and third time, the system time, which is worst. I guess modulo operation was done in user time and libdivide operation is done in system time. I hope Ryan will be able to elaborate on it, I will assist him to reproduce it on his side.

I discovered that this technique is already used in gcc, within the compiler hashtable itself. In src/gcc/hash-table.c there is a list of prime numbers associated with numbers needed to compute the modulo fast. If we were to adopt this technique we could do the same and pre-compute what we need.

FranÃois


On 16/03/2014 20:40, Jonathan Wakely wrote:
On 16 March 2014 19:31, Ryan Lewis wrote:
I'm not sure I see why a local_iterator would need to store the extra
struct, but, i'll take your word for it.
Because the local_iterator contains the hash function. If you want to
add data to the hash function, you increase its size, and the size of
anything that contains it.

You can easily verify that by adding a member to the
_Mod_range_hashing struct, which is the one that would need to be
replaced to avoid the modulus operation, and checking
sizeof(unordered_map<int,int>::local_iterator).


Index: include/bits/hashtable_policy.h
===================================================================
--- include/bits/hashtable_policy.h	(revision 208775)
+++ include/bits/hashtable_policy.h	(working copy)
@@ -1071,6 +1071,7 @@
     protected:
       typedef void* 					__hash_code;
       typedef _Hash_node<_Value, false>			__node_type;
+      using __bkt_count_t = typename _Hash::second_argument_type;
 
       // We need the default constructor for the local iterators.
       _Hash_code_base() = default;
@@ -1084,13 +1085,13 @@
       { return 0; }
 
       std::size_t
-      _M_bucket_index(const _Key& __k, __hash_code, std::size_t __n) const
+      _M_bucket_index(const _Key& __k, __hash_code, __bkt_count_t __n) const
       { return _M_ranged_hash()(__k, __n); }
 
       std::size_t
-      _M_bucket_index(const __node_type* __p, std::size_t __n) const
+      _M_bucket_index(const __node_type* __p, __bkt_count_t __n) const
 	noexcept( noexcept(declval<const _Hash&>()(declval<const _Key&>(),
-						   (std::size_t)0)) )
+						   declval<__bkt_count_t>())) )
       { return _M_ranged_hash()(_M_extract()(__p->_M_v()), __n); }
 
       void
@@ -1161,6 +1162,7 @@
     protected:
       typedef std::size_t 				__hash_code;
       typedef _Hash_node<_Value, false>			__node_type;
+      using __bkt_count_t = typename _H2::second_argument_type;
 
       // We need the default constructor for the local iterators.
       _Hash_code_base() = default;
@@ -1175,14 +1177,14 @@
       { return _M_h1()(__k); }
 
       std::size_t
-      _M_bucket_index(const _Key&, __hash_code __c, std::size_t __n) const
+      _M_bucket_index(const _Key&, __hash_code __c, __bkt_count_t __n) const
       { return _M_h2()(__c, __n); }
 
       std::size_t
-      _M_bucket_index(const __node_type* __p, std::size_t __n) const
+      _M_bucket_index(const __node_type* __p, __bkt_count_t __n) const
 	noexcept( noexcept(declval<const _H1&>()(declval<const _Key&>()))
 		  && noexcept(declval<const _H2&>()((__hash_code)0,
-						    (std::size_t)0)) )
+						    declval<__bkt_count_t>())) )
       { return _M_h2()(_M_h1()(_M_extract()(__p->_M_v())), __n); }
 
       void
@@ -1239,6 +1241,7 @@
       using __ebo_extract_key = _Hashtable_ebo_helper<0, _ExtractKey>;
       using __ebo_h1 = _Hashtable_ebo_helper<1, _H1>;
       using __ebo_h2 = _Hashtable_ebo_helper<2, _H2>;
+      using __bkt_count_t = typename _H2::second_argument_type;
 
     public:
       typedef _H1 					hasher;
@@ -1261,14 +1264,13 @@
       { return _M_h1()(__k); }
 
       std::size_t
-      _M_bucket_index(const _Key&, __hash_code __c,
-		      std::size_t __n) const
+      _M_bucket_index(const _Key&, __hash_code __c, __bkt_count_t __n) const
       { return _M_h2()(__c, __n); }
 
       std::size_t
-      _M_bucket_index(const __node_type* __p, std::size_t __n) const
+      _M_bucket_index(const __node_type* __p, __bkt_count_t __n) const
 	noexcept( noexcept(declval<const _H2&>()((__hash_code)0,
-						 (std::size_t)0)) )
+						 declval<__bkt_count_t>())) )
       { return _M_h2()(__p->_M_hash_code, __n); }
 
       void
@@ -1349,11 +1351,12 @@
       using __base_type = _Hashtable_ebo_helper<0, _H2>;
       using __hash_code_base = _Hash_code_base<_Key, _Value, _ExtractKey,
 					       _H1, _H2, _Hash, true>;
+      using __bkt_count_t = typename _H2::second_argument_type;
 
       _Local_iterator_base() = default;
       _Local_iterator_base(const __hash_code_base& __base,
 			   _Hash_node<_Value, true>* __p,
-			   std::size_t __bkt, std::size_t __bkt_count)
+			   std::size_t __bkt, __bkt_count_t __bkt_count)
       : __base_type(__base._M_h2()),
 	_M_cur(__p), _M_bucket(__bkt), _M_bucket_count(__bkt_count) { }
 
@@ -1373,7 +1376,7 @@
 
       _Hash_node<_Value, true>*  _M_cur;
       std::size_t _M_bucket;
-      std::size_t _M_bucket_count;
+      __bkt_count_t _M_bucket_count;
 
     public:
       const void*
@@ -1430,12 +1433,13 @@
     protected:
       using __hash_code_base = _Hash_code_base<_Key, _Value, _ExtractKey,
 					       _H1, _H2, _Hash, false>;
+      using __bkt_count_t = typename __hash_code_base::__bkt_count_t;
 
       _Local_iterator_base() : _M_bucket_count(-1) { }
 
       _Local_iterator_base(const __hash_code_base& __base,
 			   _Hash_node<_Value, false>* __p,
-			   std::size_t __bkt, std::size_t __bkt_count)
+			   std::size_t __bkt, __bkt_count_t __bkt_count)
       : _M_cur(__p), _M_bucket(__bkt), _M_bucket_count(__bkt_count)
       { _M_init(__base); }
 
@@ -1481,7 +1485,7 @@
 
       _Hash_node<_Value, false>*  _M_cur;
       std::size_t _M_bucket;
-      std::size_t _M_bucket_count;
+      __bkt_count_t _M_bucket_count;
 
       void
       _M_init(const __hash_code_base& __base)
@@ -1528,6 +1532,7 @@
       using __base_type = _Local_iterator_base<_Key, _Value, _ExtractKey,
 					       _H1, _H2, _Hash, __cache>;
       using __hash_code_base = typename __base_type::__hash_code_base;
+      using __bkt_count_t = typename _H2::second_argument_type;
     public:
       typedef _Value					value_type;
       typedef typename std::conditional<__constant_iterators,
@@ -1543,7 +1548,7 @@
 
       _Local_iterator(const __hash_code_base& __base,
 		      _Hash_node<_Value, __cache>* __p,
-		      std::size_t __bkt, std::size_t __bkt_count)
+		      std::size_t __bkt, __bkt_count_t __bkt_count)
 	: __base_type(__base, __p, __bkt, __bkt_count)
       { }
 
@@ -1583,6 +1588,7 @@
       using __base_type = _Local_iterator_base<_Key, _Value, _ExtractKey,
 					       _H1, _H2, _Hash, __cache>;
       using __hash_code_base = typename __base_type::__hash_code_base;
+      using __bkt_count_t = typename _H2::second_argument_type;
 
     public:
       typedef _Value					value_type;
@@ -1595,7 +1601,7 @@
 
       _Local_const_iterator(const __hash_code_base& __base,
 			    _Hash_node<_Value, __cache>* __p,
-			    std::size_t __bkt, std::size_t __bkt_count)
+			    std::size_t __bkt, __bkt_count_t __bkt_count)
 	: __base_type(__base, __p, __bkt, __bkt_count)
       { }
 
Index: include/bits/hashtable.h
===================================================================
--- include/bits/hashtable.h	(revision 208775)
+++ include/bits/hashtable.h	(working copy)
@@ -244,6 +244,8 @@
 					    _Equal, _H1, _H2, _Hash,
 					    _RehashPolicy, _Traits>;
 
+      using __bkt_count_t = typename __hash_code_base::__bkt_count_t;
+
       using __reuse_or_alloc_node_type =
 	__detail::_ReuseOrAllocNode<__node_alloc_type>;
 
@@ -265,7 +267,7 @@
       // in methods (erase, swap...) that shall not throw.
       static_assert(noexcept(declval<const __hash_code_base_access&>()
 			     ._M_bucket_index((const __node_type*)nullptr,
-					      (std::size_t)0)),
+					      declval<__bkt_count_t>())),
 		    "Cache the hash code or qualify your functors involved"
 		    " in hash code and bucket index computation with noexcept");
 
@@ -311,7 +313,7 @@
 
     private:
       __bucket_type*		_M_buckets;
-      size_type			_M_bucket_count;
+      __bkt_count_t		_M_bucket_count;
       __node_base		_M_before_begin;
       size_type			_M_element_count;
       _RehashPolicy		_M_rehash_policy;
@@ -861,7 +863,7 @@
 
 	// Reuse allocated buckets and nodes.
 	__bucket_type* __former_buckets = nullptr;
-	std::size_t __former_bucket_count = _M_bucket_count;
+	__bkt_count_t __former_bucket_count = _M_bucket_count;
 	const __rehash_state& __former_state = _M_rehash_policy._M_state();
 	
 	if (_M_bucket_count != __ht._M_bucket_count)
@@ -1013,7 +1015,7 @@
 	{
 	  // Can't move memory, move elements then.
 	  __bucket_type* __former_buckets = nullptr;
-	  size_type __former_bucket_count = _M_bucket_count;
+	  __bkt_count_t __former_bucket_count = _M_bucket_count;
 	  const __rehash_state& __former_state = _M_rehash_policy._M_state();
 
 	  if (_M_bucket_count != __ht._M_bucket_count)
@@ -1944,6 +1946,7 @@
 	       _H1, _H2, _Hash, _RehashPolicy, _Traits>::
     _M_rehash_aux(size_type __n, std::true_type)
     {
+      __bkt_count_t __bkt_count(__n);
       __bucket_type* __new_buckets = this->_M_allocate_buckets(__n);
       __node_type* __p = _M_begin();
       _M_before_begin._M_nxt = nullptr;
@@ -1951,7 +1954,8 @@
       while (__p)
 	{
 	  __node_type* __next = __p->_M_next();
-	  std::size_t __bkt = __hash_code_base::_M_bucket_index(__p, __n);
+	  std::size_t __bkt =
+	    __hash_code_base::_M_bucket_index(__p, __bkt_count);
 	  if (!__new_buckets[__bkt])
 	    {
 	      __p->_M_nxt = _M_before_begin._M_nxt;
@@ -1971,7 +1975,7 @@
 
       if (__builtin_expect(_M_bucket_count != 0, true))
 	_M_deallocate_buckets();
-      _M_bucket_count = __n;
+      _M_bucket_count = __bkt_count;
       _M_buckets = __new_buckets;
     }
 
@@ -1986,6 +1990,7 @@
 	       _H1, _H2, _Hash, _RehashPolicy, _Traits>::
     _M_rehash_aux(size_type __n, std::false_type)
     {
+      __bkt_count_t __bkt_count(__n);
       __bucket_type* __new_buckets = this->_M_allocate_buckets(__n);
 
       __node_type* __p = _M_begin();
@@ -1998,7 +2003,7 @@
       while (__p)
 	{
 	  __node_type* __next = __p->_M_next();
-	  std::size_t __bkt = __hash_code_base::_M_bucket_index(__p, __n);
+	  std::size_t __bkt = __hash_code_base::_M_bucket_index(__p, __bkt_count);
 
 	  if (__prev_p && __prev_bkt == __bkt)
 	    {
@@ -2025,7 +2030,7 @@
 		    {
 		      std::size_t __next_bkt
 			= __hash_code_base::_M_bucket_index(__prev_p->_M_next(),
-							    __n);
+							    __bkt_count);
 		      if (__next_bkt != __prev_bkt)
 			__new_buckets[__next_bkt] = __prev_p;
 		    }
@@ -2055,14 +2060,15 @@
       if (__check_bucket && __prev_p->_M_nxt)
 	{
 	  std::size_t __next_bkt
-	    = __hash_code_base::_M_bucket_index(__prev_p->_M_next(), __n);
+	    = __hash_code_base::_M_bucket_index(__prev_p->_M_next(),
+						__bkt_count);
 	  if (__next_bkt != __prev_bkt)
 	    __new_buckets[__next_bkt] = __prev_p;
 	}
 
       if (__builtin_expect(_M_bucket_count != 0, true))
 	_M_deallocate_buckets();
-      _M_bucket_count = __n;
+      _M_bucket_count = __bkt_count;
       _M_buckets = __new_buckets;
     }
 
#include <unordered_set>
#include "testsuite_performance.h"
#include "libdivide.h"

struct denominator
{
  denominator() = default;
  denominator(std::size_t n)
    : _denom(n), _div(n)
  { }
  denominator(const denominator&) = default;

  operator std::size_t() const noexcept { return _denom; }

  denominator&
  operator=(std::size_t n)
  {
    _denom = n;
    _div = libdivide::divider<std::size_t>(n);
    return *this;
  }

  denominator&
  operator=(const denominator&) = default;

  std::size_t _denom;
  libdivide::divider<std::size_t> _div;
};

std::size_t
operator/(std::size_t nbr, const denominator& denom)
{ return nbr / denom._div; }

std::size_t
operator%(std::size_t nbr, const denominator& denom)
{
  std::size_t q = nbr / denom._div;
  return nbr - q * denom._denom;
}

struct libdivide_range_hashing
{
  typedef std::size_t first_argument_type;
  typedef denominator second_argument_type;
  typedef std::size_t result_type;

  result_type
  operator()(first_argument_type num,
	     const second_argument_type& denom) const noexcept
  { return num % denom; }
};

  using namespace std;

  template<typename _Value,
	   typename _Hash = hash<_Value>,
	   typename _Pred = std::equal_to<_Value>,
  	   typename _Alloc = std::allocator<_Value>,
	   typename _Tr = __uset_traits<__cache_default<_Value, _Hash>::value>>
    using __uset = _Hashtable<_Value, _Value, _Alloc,
			      __detail::_Identity, _Pred, _Hash,
			      libdivide_range_hashing,
			      __detail::_Default_ranged_hash,
			      __detail::_Prime_rehash_policy, _Tr>;

template<typename _Cont>
  void test(const char* desc)
  {
    using namespace __gnu_test;
    const int sz = 50000000;

    time_counter time;
    resource_counter resource;

    _Cont c;

    start_counters(time, resource);

    for (int i = 0; i != sz; ++i)
      c.insert(i);

    stop_counters(time, resource);
  
    ostringstream ostr;
    ostr << desc << ' ' << c.size() << " insertions";
    report_performance(__FILE__, ostr.str().c_str(), time, resource);
  }

int
main()
{
  test<__uset<int>>("set<int> libdivide");
  test<std::unordered_set<int>>("std::unordered_set<int>");
  return 0;
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]