This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: tr1::hashtable::operator[]


Paolo Carlini wrote:

Joe Buck wrote:

There is wasted work because of the two lookups, but perhaps there's
a way to further improve the situation by exposing the common part
of find() and insert(), namely the hash computation.  If there were
a way to do the find(), and somehow reuse the hash computed by the
find() to do the insert() more quickly, then I think Peter's approach
would be an even bigger win.

Definitely. That is exactly my point.

... and the below is a preview of my implementation of your (Joe's and Peter's) very helpful suggestions. As you can see, in a nutshell the idea is implementing insert with hint and calling it from operator[] (as per our std::map). In this case the hint is "special", we are not passing a regular iterator, because we have to convey somehow the info that the element is not present yet and in which bucket it will be inserted (assuming no rehashing occurs).


As-is the patch (*) already passes regtesting and a few additional simple tests, it would be nice if Peter could benchmark it, while I refine it to its final (hoepfully cleaner) shape...

Thanks,
Paolo.

(*) It includes trivial clean-ups, otherwise would be much smaller.

//////////////////
Index: include/tr1/hashtable
===================================================================
--- include/tr1/hashtable	(revision 113737)
+++ include/tr1/hashtable	(working copy)
@@ -680,8 +680,9 @@
       operator[](const K& k)
       {
 	Hashtable* h = static_cast<Hashtable*>(this);
-	typename Hashtable::iterator it = 
-	  h->insert(std::make_pair(k, mapped_type())).first;
+	typename Hashtable::iterator it = h->m_find(k);
+	if (!it.m_cur_node)
+	  it = h->insert(it, std::make_pair(k, mapped_type()));
 	return it->second;
       }
     };
@@ -1032,6 +1033,9 @@
 						 cache_hash_code>
                                                           const_iterator;
 
+      template<typename K, typename Pair, typename Hashtable>
+        friend struct Internal::map_base;
+
     private:
       typedef Internal::hash_node<Value, cache_hash_code> node;
       typedef typename Allocator::template rebind<node>::other
@@ -1188,10 +1192,18 @@
 
     public:				// lookup
       iterator
-      find(const key_type&);
+      find(const key_type& k)
+      {
+	iterator i = m_find(k);
+	return i.m_cur_node ? i : this->end();
+      }
 
       const_iterator
-      find(const key_type& k) const;
+      find(const key_type& k) const
+      {
+	const_iterator i = const_iterator(m_find(k));
+	return i.m_cur_node ? i : this->end();
+      }
 
       size_type
       count(const key_type& k) const;
@@ -1202,7 +1214,7 @@
       std::pair<const_iterator, const_iterator>
       equal_range(const key_type& k) const;
 
-    private:			// Insert and erase helper functions
+    private:			// Find, insert and erase helper functions
       // ??? This dispatching is a workaround for the fact that we don't
       // have partial specialization of member templates; it would be
       // better to just specialize insert on unique_keys.  There may be a
@@ -1217,34 +1229,46 @@
                                    >::type
         Insert_Conv_Type;
 
+      iterator
+      m_find(const key_type& k) const;
+
       node*
-      find_node(node* p, const key_type& k,
-		typename hashtable::hash_code_t c) const;
+      m_find_node(node* p, const key_type& k,
+		  typename hashtable::hash_code_t c) const;
 
       std::pair<iterator, bool>
-      insert(const value_type&, std::tr1::true_type);
+      m_insert(const_iterator, const value_type&, std::tr1::true_type);
   
       iterator
-      insert(const value_type&, std::tr1::false_type);
+      m_insert(const_iterator, const value_type&, std::tr1::false_type);
 
       void
-      erase_node(node*, node**);
+      m_erase_node(node*, node**);
 
     public:				// Insert and erase
       Insert_Return_Type
       insert(const value_type& v) 
-      { 
-	return this->insert(v, std::tr1::integral_constant<bool,
-			    unique_keys>());
+      {
+	return m_insert(this->end(), v, std::tr1::integral_constant<bool,
+			unique_keys>());
       }
 
       iterator
-      insert(iterator, const value_type& v)
-      { return iterator(Insert_Conv_Type()(this->insert(v))); }
-      
+      insert(iterator it, const value_type& v)
+      {
+	return iterator(Insert_Conv_Type()
+			(m_insert(it, v, std::tr1::integral_constant<bool,
+				  unique_keys>())));
+      }
+
       const_iterator
-      insert(const_iterator, const value_type& v)
-      { return const_iterator(Insert_Conv_Type()(this->insert(v))); }
+      insert(const_iterator it, const value_type& v)
+      { 
+	return
+	  const_iterator(Insert_Conv_Type()
+			 (m_insert(it, v, std::tr1::integral_constant<bool,
+				   unique_keys>())));
+      }
 
       template<typename InIter>
         void
@@ -1531,32 +1555,18 @@
 	   bool c, bool ci, bool u>
     typename hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::iterator
     hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::
-    find(const key_type& k)
+    m_find(const key_type& k) const
     {
       typename hashtable::hash_code_t code = this->m_hash_code(k);
       std::size_t n = this->bucket_index(k, code, this->bucket_count());
-      node* p = find_node(m_buckets[n], k, code);
-      return p ? iterator(p, m_buckets + n) : this->end();
+      node* p = m_find_node(m_buckets[n], k, code);
+      return iterator(p, m_buckets + n);
     }
-  
+   
   template<typename K, typename V, 
 	   typename A, typename Ex, typename Eq,
 	   typename H1, typename H2, typename H, typename RP,
 	   bool c, bool ci, bool u>
-    typename hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::const_iterator
-    hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::
-    find(const key_type& k) const
-    {
-      typename hashtable::hash_code_t code = this->m_hash_code(k);
-      std::size_t n = this->bucket_index(k, code, this->bucket_count());
-      node* p = find_node(m_buckets[n], k, code);
-      return p ? const_iterator(p, m_buckets + n) : this->end();
-    }
-  
-  template<typename K, typename V, 
-	   typename A, typename Ex, typename Eq,
-	   typename H1, typename H2, typename H, typename RP,
-	   bool c, bool ci, bool u>
     typename hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::size_type
     hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::
     count(const key_type& k) const
@@ -1584,7 +1594,7 @@
       typename hashtable::hash_code_t code = this->m_hash_code(k);
       std::size_t n = this->bucket_index(k, code, this->bucket_count());
       node** head = m_buckets + n;
-      node* p = find_node(*head, k, code);
+      node* p = m_find_node(*head, k, code);
       
       if (p)
 	{
@@ -1617,7 +1627,7 @@
       typename hashtable::hash_code_t code = this->m_hash_code(k);
       std::size_t n = this->bucket_index(k, code, this->bucket_count());
       node** head = m_buckets + n;
-      node* p = find_node(*head, k, code);
+      node* p = m_find_node(*head, k, code);
 
       if (p)
 	{
@@ -1644,8 +1654,8 @@
 	   bool c, bool ci, bool u>
     typename hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::node* 
     hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::
-    find_node(node* p, const key_type& k,
-	      typename hashtable::hash_code_t code) const
+    m_find_node(node* p, const key_type& k,
+		typename hashtable::hash_code_t code) const
     {
       for (; p; p = p->m_next)
 	if (this->compare(k, code, p))
@@ -1661,15 +1671,27 @@
     std::pair<typename hashtable<K, V, A, Ex, Eq, H1,
 				 H2, H, RP, c, ci, u>::iterator, bool>
     hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::
-    insert(const value_type& v, std::tr1::true_type)
+    m_insert(const_iterator it, const value_type& v, std::tr1::true_type)
     {
       const key_type& k = this->m_extract(v);
       typename hashtable::hash_code_t code = this->m_hash_code(k);
-      std::size_t n = this->bucket_index(k, code, m_bucket_count);
-      
-      if (node* p = find_node(m_buckets[n], k, code))
-	return std::make_pair(iterator(p, m_buckets + n), false);
 
+      size_type n;
+
+      if (!it.m_cur_node)
+	n = it.m_cur_bucket - m_buckets;
+      else
+	{
+	  if (it != this->end() && this->compare(k, code, it.m_cur_node))
+	    return std::make_pair(iterator(it.m_cur_node, it.m_cur_bucket),
+				  false);
+
+	  n = this->bucket_index(k, code, m_bucket_count);
+
+	  if (node* p = m_find_node(m_buckets[n], k, code))
+	    return std::make_pair(iterator(p, m_buckets + n), false);
+	}
+
       std::pair<bool, std::size_t> do_rehash
 	= m_rehash_policy.need_rehash(m_bucket_count, m_element_count, 1);
 
@@ -1705,19 +1727,33 @@
 	   bool c, bool ci, bool u>
     typename hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::iterator
     hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::
-    insert(const value_type& v, std::tr1::false_type)
+    m_insert(const_iterator it, const value_type& v, std::tr1::false_type)
     {
       std::pair<bool, std::size_t> do_rehash
 	= m_rehash_policy.need_rehash(m_bucket_count, m_element_count, 1);
       if (do_rehash.first)
 	m_rehash(do_rehash.second);
-
+ 
       const key_type& k = this->m_extract(v);
       typename hashtable::hash_code_t code = this->m_hash_code(k);
-      std::size_t n = this->bucket_index(k, code, m_bucket_count);
-      
+ 
+      size_type n;
+      node* prev;
+
+      if (!do_rehash.first
+	  && it != this->end() && this->compare(k, code, it.m_cur_node))
+	{
+	  n = it.m_cur_bucket - m_buckets;
+	  prev = it.m_cur_node;
+	}
+      else
+	{
+	  n = this->bucket_index(k, code, m_bucket_count);
+	  prev = m_find_node(m_buckets[n], k, code);
+	}
+
       node* new_node = m_allocate_node(v);
-      node* prev = find_node(m_buckets[n], k, code);
+
       if (prev)
 	{
 	  new_node->m_next = prev->m_next;
@@ -1741,7 +1777,7 @@
 	   bool c, bool ci, bool u>
     void
     hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::
-    erase_node(node* p, node** b)
+    m_erase_node(node* p, node** b)
     {
       node* cur = *b;
       if (cur == p)
@@ -1786,25 +1822,25 @@
 	   bool c, bool ci, bool u>
     typename hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::iterator
     hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::
-    erase(iterator i)
+    erase(iterator it)
     {
-      iterator result = i;
+      iterator result = it;
       ++result;
-      erase_node(i.m_cur_node, i.m_cur_bucket);
+      m_erase_node(it.m_cur_node, it.m_cur_bucket);
       return result;
     }
-  
+
   template<typename K, typename V, 
 	   typename A, typename Ex, typename Eq,
 	   typename H1, typename H2, typename H, typename RP,
 	   bool c, bool ci, bool u>
     typename hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::const_iterator
     hashtable<K, V, A, Ex, Eq, H1, H2, H, RP, c, ci, u>::
-    erase(const_iterator i)
+    erase(const_iterator it)
     {
-      const_iterator result = i;
+      const_iterator result = it;
       ++result;
-      erase_node(i.m_cur_node, i.m_cur_bucket);
+      m_erase_node(it.m_cur_node, it.m_cur_bucket);
       return result;
     }
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]