This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Allow lazy construction of hash_{table,set,map}


Hi!

Already in the PR71446 patch I used ugly and slow code to avoid allocating
memory in a hash_set all the time, even when it will be used only rarely and
in PR89767 I've reached it again.

While e.g. a vec is POD that even doesn't have a constructor and auto_vec
has quite a cheap ctor, hash_set/hash_map/hash_table actually allocate the
elements array right away.  I find that overkill for the cases where the
usual case is that the hash table will not be used and it handles only
something that happens rarely.  My PR71446 patch uses a pointer to hash_set
and performs new/delete if it is needed.

The following (so far untested) patch allows to construct
hash_{table,set,map} with no memory allocation, with the limitation that
only destruction, or size/elements methods can be used safely on that.
In order to switch such hash_* into normal use, there is a create method
(with optional number of initial elements), which is then called once and
afterwards it acts as any other hash_*.

Does this look reasonable, or do you have other proposals?
Or is this thing simply not worth bothering with and I should just
unconditionally use hash_set with the default 13 elements construction?

2019-03-20  Jakub Jelinek  <jakub@redhat.com>

	* hash-table.h (empty_hash): New enum.
	(hash_table::hash_table): New ctor with empty_hash first argument.
	(hash_table::create): New method.
	* hash-set.h (hash_set::hash_set): New ctor with empty_hash first
	argument.
	(hash_set::create_ggc): Formatting fix.
	(hash_set::create, hash_set::size): New methods.
	* hash-map.h (hash_map::hash_map): New ctor with empty_hash first
	argument.
	(hash_map::empty): Formatting fix.
	(hash_map::create, hash_map::size): New methods.

--- gcc/hash-table.h.jj	2019-03-14 23:46:28.593616750 +0100
+++ gcc/hash-table.h	2019-03-20 17:47:16.925027249 +0100
@@ -167,6 +167,24 @@ along with GCC; see the file COPYING3.
    See hash_table for details.  The interface is very similar to libiberty's
    htab_t.
 
+   If a hash table is used only in some rare cases, it is possible
+   to construct the hash_table lazily before first use.  This is done
+   through:
+
+      hash_table <some_type_hasher> some_type_hash_table (EMPTY_HASH);
+
+   On such an object, only size () or elements () method can be safely
+   called and in order to make it usable,
+
+      some_type_hash_table.create ();
+
+   needs to be called, possibly guarded like:
+
+      if (some_type_hash_table.size () == 0)
+	some_type_hash_table.create ();
+
+   This way, no memory is allocated during construction.
+
 
    EASY DESCRIPTORS FOR POINTERS
 
@@ -340,6 +358,8 @@ hash_table_mod2 (hashval_t hash, unsigne
 
 class mem_usage;
 
+enum empty_hash { EMPTY_HASH };
+
 /* User-facing hash table type.
 
    The table stores elements of type Descriptor::value_type and uses
@@ -365,6 +385,10 @@ public:
 		       bool gather_mem_stats = GATHER_STATISTICS,
 		       mem_alloc_origin origin = HASH_TABLE_ORIGIN
 		       CXX_MEM_STAT_INFO);
+  explicit hash_table (empty_hash, bool ggc = false,
+		       bool gather_mem_stats = GATHER_STATISTICS,
+		       mem_alloc_origin origin = HASH_TABLE_ORIGIN
+		       CXX_MEM_STAT_INFO);
   explicit hash_table (const hash_table &, bool ggc = false,
 		       bool gather_mem_stats = GATHER_STATISTICS,
 		       mem_alloc_origin origin = HASH_TABLE_ORIGIN
@@ -381,6 +405,10 @@ public:
     return table;
   }
 
+  /* If hash_table has been constructed with EMPTY_HASH, this method
+     needs to be called before using it (except for size method).  */
+  void create (size_t n);
+
   /* Current size (in entries) of the hash table.  */
   size_t size () const { return m_size; }
 
@@ -581,11 +609,38 @@ hash_table<Descriptor, Allocator>::hash_
 
   if (m_gather_mem_stats)
     hash_table_usage ().register_descriptor (this, origin, ggc
-					  FINAL_PASS_MEM_STAT);
+					     FINAL_PASS_MEM_STAT);
 
   m_entries = alloc_entries (size PASS_MEM_STAT);
   m_size = size;
   m_size_prime_index = size_prime_index;
+}
+
+template<typename Descriptor, template<typename Type> class Allocator>
+hash_table<Descriptor, Allocator>::hash_table (empty_hash, bool ggc, bool
+					       gather_mem_stats,
+					       mem_alloc_origin origin
+					       MEM_STAT_DECL) :
+  m_entries (NULL), m_size (0), m_n_elements (0), m_n_deleted (0),
+  m_searches (0), m_collisions (0), m_size_prime_index (INT_MAX),
+  m_ggc (ggc), m_gather_mem_stats (gather_mem_stats)
+{
+  if (m_gather_mem_stats)
+    hash_table_usage ().register_descriptor (this, origin, ggc
+					     FINAL_PASS_MEM_STAT);
+}
+
+template<typename Descriptor, template<typename Type> class Allocator>
+void
+hash_table<Descriptor, Allocator>::create (size_t size)
+{
+  gcc_checking_assert (m_size == 0);
+
+  unsigned int size_prime_index = hash_table_higher_prime_index (size);
+  size = prime_tab[size_prime_index].prime;
+  m_entries = alloc_entries (size PASS_MEM_STAT);
+  m_size = size;
+  m_size_prime_index = size_prime_index;
 }
 
 template<typename Descriptor, template<typename Type> class Allocator>
--- gcc/hash-set.h.jj	2019-01-01 12:37:15.826996789 +0100
+++ gcc/hash-set.h	2019-03-20 17:31:45.045119708 +0100
@@ -29,15 +29,22 @@ public:
   explicit hash_set (size_t n = 13, bool ggc = false CXX_MEM_STAT_INFO)
     : m_table (n, ggc, GATHER_STATISTICS, HASH_SET_ORIGIN PASS_MEM_STAT) {}
 
+  explicit hash_set (empty_hash, bool ggc = false CXX_MEM_STAT_INFO)
+    : m_table (EMPTY_HASH, ggc, GATHER_STATISTICS,
+	       HASH_SET_ORIGIN PASS_MEM_STAT) {}
+
   /* Create a hash_set in gc memory with space for at least n elements.  */
 
   static hash_set *
-    create_ggc (size_t n)
-      {
-	hash_set *set = ggc_alloc<hash_set> ();
-	new (set) hash_set (n, true);
-	return set;
-      }
+  create_ggc (size_t n)
+    {
+      hash_set *set = ggc_alloc<hash_set> ();
+      new (set) hash_set (n, true);
+      return set;
+    }
+
+  /* Create for hash_sets constructed with EMPTY_HASH.  */
+  void create (size_t n = 13) { m_table.create (n); }
 
   /* If key k isn't already in the map add it to the map, and
      return false.  Otherwise return true.  */
@@ -80,6 +87,10 @@ public:
 
   size_t elements () const { return m_table.elements (); }
 
+  /* Return the size of the underlying hash table.  */
+
+  size_t size () const { return m_table.size (); }
+
   /* Clear the hash table.  */
 
   void empty () { m_table.empty (); }
--- gcc/hash-map.h.jj	2019-01-01 12:37:21.032911374 +0100
+++ gcc/hash-map.h	2019-03-20 17:34:43.373232085 +0100
@@ -122,6 +122,12 @@ public:
 		     CXX_MEM_STAT_INFO)
     : m_table (n, ggc, gather_mem_stats, HASH_MAP_ORIGIN PASS_MEM_STAT) {}
 
+  explicit hash_map (empty_hash, bool ggc = false,
+		     bool gather_mem_stats = GATHER_STATISTICS
+		     CXX_MEM_STAT_INFO)
+    : m_table (EMPTY_HASH, ggc, gather_mem_stats,
+	       HASH_MAP_ORIGIN PASS_MEM_STAT) {}
+
   explicit hash_map (const hash_map &h, bool ggc = false,
 		     bool gather_mem_stats = GATHER_STATISTICS
 		     CXX_MEM_STAT_INFO)
@@ -138,6 +144,9 @@ public:
       return map;
     }
 
+  /* Create for hash_maps constructed with EMPTY_HASH.  */
+  void create (size_t n = 13) { m_table.create (n); }
+
   /* If key k isn't already in the map add key k with value v to the map, and
      return false.  Otherwise set the value of the entry for key k to be v and
      return true.  */
@@ -209,7 +218,9 @@ public:
 
   size_t elements () const { return m_table.elements (); }
 
-  void empty () { m_table.empty(); }
+  size_t size () const { return m_table.size (); }
+
+  void empty () { m_table.empty (); }
 
   class iterator
   {


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]