[gcc r12-2698] libstdc++: Reduce header dependencies in <regex>

Jonathan Wakely redi@gcc.gnu.org
Tue Aug 3 15:33:46 GMT 2021


https://gcc.gnu.org/g:e9f64fff64d83f5fcaa9ff17f1688490f75bdcb7

commit r12-2698-ge9f64fff64d83f5fcaa9ff17f1688490f75bdcb7
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Mon Aug 2 18:34:19 2021 +0100

    libstdc++: Reduce header dependencies in <regex>
    
    This reduces the size of <regex> a little. This is one of the largest
    and slowest headers in the library.
    
    By using <bits/stl_algobase.h> and <bits/stl_algo.h> instead of
    <algorithm> we don't need to parse all the parallel algorithms and
    std::ranges:: algorithms that are not needed by <regex>. Similarly, by
    using <bits/stl_tree.h> and <bits/stl_map.h> instead of <map> we don't
    need to parse the definition of std::multimap.
    
    The _State_info type is not movable or copyable, so doesn't need to use
    std::unique_ptr<bool[]> to manage a bitset, we can just delete it in the
    destructor. It would use a lot less space if we used a bitset instead,
    but that would be an ABI break. We could do it for the versioned
    namespace, but this patch doesn't do so. For future reference, using
    vector<bool> would work, but would increase sizeof(_State_info) by two
    pointers, because it's three times as large as unique_ptr<bool[]>. We
    can't use std::bitset because the length isn't constant. We want a
    bitset with a non-constant but fixed length.
    
    Signed-off-by: Jonathan Wakely <jwakely@redhat.com>
    
    libstdc++-v3/ChangeLog:
    
            * include/bits/regex_executor.h (_State_info): Replace
            unique_ptr<bool[]> with array of bool.
            * include/bits/regex_executor.tcc: Likewise.
            * include/bits/regex_scanner.tcc: Replace std::strchr with
            __builtin_strchr.
            * include/std/regex: Replace standard headers with smaller
            internal ones.
            * testsuite/28_regex/traits/char/lookup_classname.cc: Include
            <string.h> for strlen.
            * testsuite/28_regex/traits/char/lookup_collatename.cc:
            Likewise.

Diff:
---
 libstdc++-v3/include/bits/regex_executor.h             |  7 ++++++-
 libstdc++-v3/include/bits/regex_executor.tcc           |  2 +-
 libstdc++-v3/include/bits/regex_scanner.tcc            |  4 ++--
 libstdc++-v3/include/std/regex                         | 18 +++++++++---------
 .../testsuite/28_regex/traits/char/lookup_classname.cc |  1 +
 .../28_regex/traits/char/lookup_collatename.cc         |  1 +
 6 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index 4a641eeee6c..014b4e83064 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -195,6 +195,11 @@ namespace __detail
 	  : _M_visited_states(new bool[__n]()), _M_start(__start)
 	  { }
 
+	  ~_State_info() { delete[] _M_visited_states; }
+
+	  _State_info(const _State_info&) = delete;
+	  _State_info& operator=(const _State_info&) = delete;
+
 	  bool _M_visited(_StateIdT __i)
 	  {
 	    if (_M_visited_states[__i])
@@ -212,7 +217,7 @@ namespace __detail
 	  // Saves states that need to be considered for the next character.
 	  vector<pair<_StateIdT, _ResultsVec>>	_M_match_queue;
 	  // Indicates which states are already visited.
-	  unique_ptr<bool[]>			_M_visited_states;
+	  bool*     _M_visited_states;
 	  // To record current solution.
 	  _StateIdT _M_start;
 	};
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index 405d1c4d0d1..3cefeda48a3 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -122,7 +122,7 @@ namespace __detail
 	  _M_has_sol = false;
 	  if (_M_states._M_match_queue.empty())
 	    break;
-	  std::fill_n(_M_states._M_visited_states.get(), _M_nfa.size(), false);
+	  std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false);
 	  auto __old_queue = std::move(_M_states._M_match_queue);
 	  for (auto& __task : __old_queue)
 	    {
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc
index a3512083f0e..a9d6a613648 100644
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -98,7 +98,7 @@ namespace __detail
     {
       auto __c = *_M_current++;
 
-      if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr)
+      if (__builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr)
 	{
 	  _M_token = _S_token_ord_char;
 	  _M_value.assign(1, __c);
@@ -394,7 +394,7 @@ namespace __detail
 			    "Unexpected end of regex when escaping.");
 
       auto __c = *_M_current;
-      auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
+      auto __pos = __builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
 
       if (__pos != nullptr && *__pos != '\0')
 	{
diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex
index e623a6ed498..04fb8b2d971 100644
--- a/libstdc++-v3/include/std/regex
+++ b/libstdc++-v3/include/std/regex
@@ -35,26 +35,26 @@
 # include <bits/c++0x_warning.h>
 #else
 
-#include <algorithm>
 #include <bitset>
-#ifdef _GLIBCXX_DEBUG
-# include <iosfwd>
-#endif
-#include <iterator>
 #include <locale>
-#include <memory>
 #include <sstream>
 #include <stack>
 #include <stdexcept>
 #include <string>
-#include <vector>
-#include <map>
-#include <cstring>
 
 #include <ext/aligned_buffer.h>
 #include <ext/numeric_traits.h>
+#include <bits/shared_ptr.h>
 #include <bits/std_function.h>
+#include <bits/stl_algobase.h> // std::copy, std::fill_n
+#include <bits/stl_algo.h>     // std::sort, std::unique
+#include <bits/stl_iterator_base_types.h> // std::iterator_traits
 #include <bits/stl_pair.h>
+#include <bits/stl_tree.h>
+#include <bits/stl_map.h>
+#include <bits/stl_vector.h>
+#include <bits/stl_bvector.h>
+#include <bits/vector.tcc>
 #include <bits/regex_constants.h>
 #include <bits/regex_error.h>
 #include <bits/regex_automaton.h>
diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/lookup_classname.cc b/libstdc++-v3/testsuite/28_regex/traits/char/lookup_classname.cc
index ffaed97e2a7..6c2baf21b56 100644
--- a/libstdc++-v3/testsuite/28_regex/traits/char/lookup_classname.cc
+++ b/libstdc++-v3/testsuite/28_regex/traits/char/lookup_classname.cc
@@ -27,6 +27,7 @@
 
 #include <regex>
 #include <forward_list>
+#include <string.h>
 #include <testsuite_hooks.h>
 
 void
diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc b/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc
index 3780c40729d..35447d49f7c 100644
--- a/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc
+++ b/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc
@@ -27,6 +27,7 @@
 
 #include <regex>
 #include <forward_list>
+#include <string.h>
 #include <testsuite_hooks.h>
 
 void


More information about the Libstdc++-cvs mailing list