This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Implement P0966 std::string::reserve should not shrink


On 22/01/19 08:34 +0000, Andrew Luo wrote:
Please see the attached patch.  Still a work in progress, but mostly done.

For reference:
http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0966r1.html

Thanks for the patch, Andrew. We'd need a changelog entry and
copyright assignment to use this, see
https://gcc.gnu.org/contribute.html

All patches requesting review need to be CC'd to the gcc-patches list
(and libstdc++ ones need to be sent to the libstdc++ list too).

I've added some comments inline below, and attached a proof-of-concept
showing how I'd rather implement this change.


Index: libstdc++-v3/config/abi/pre/gnu.ver
===================================================================
--- libstdc++-v3/config/abi/pre/gnu.ver	(revision 268105)
+++ libstdc++-v3/config/abi/pre/gnu.ver	(working copy)
@@ -2238,6 +2238,14 @@
    # _Sp_make_shared_tag::_S_eq
    _ZNSt19_Sp_make_shared_tag5_S_eqERKSt9type_info;

+    # basic_string::_M_request_capacity
+    _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE19_M_request_capacityEm;

You'd need to export the Copy-On-Write versions of _M_request_capacity
too. It looks like the tests haven't been run for the COW string, as
I'd expect them to fail with an undefined reference to
_M_request_capacity, and because char/18654.cc relies on the shrink
behaviour to pass the tests. That's easy to fix though, just note the
actual capacity before the shrink request, and verify it hasn't
changed:

--- a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/18654.cc
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/18654.cc
@@ -47,12 +47,14 @@ void test01()
    {
      string str(i, 'x');
      str.reserve(3 * i);
+      const size_type cap = str.capacity();
+      VERIFY( cap >= 3 * i );

      str.reserve(2 * i);
-      VERIFY( str.capacity() == 2 * i );
+      VERIFY( str.capacity() == cap );

      str.reserve();
-      VERIFY( str.capacity() == i );
+      VERIFY( str.capacity() == cap );
    }
}


+    # basic_string::reserve() (C++2a)
+    _ZNSs7reserveEv;
+    _ZNSbIwSt11char_traitsIwESaIwEE7reserveEv;
+    _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE7reserveEv;
} GLIBCXX_3.4.25;


There are existing patterns which already match these new reserve()
overloads, so they would need to be altered to not match the new ones:

@@ -224,7 +224,10 @@ GLIBCXX_3.4 {
    _ZNSs6assignE[PRcjmvy]*;
    _ZNSs6insertE[PRcjmvy]*;
    _ZNSs6insertEN9__gnu_cxx17__normal_iteratorIPcSsEE[PRcjmvy]*;
-    _ZNSs[67][j-z]*E[PRcjmvy]*;
+    _ZNSs6rbeginEv;
+    _ZNSs6resizeE[jmy]*;
+    _ZNSs7replaceE[jmy]*;
+    _ZNSs7reserveE[jmy];
    _ZNSs7[a-z]*EES2_[NPRjmy]*;
    _ZNSs7[a-z]*EES2_S[12]*;
    _ZNSs12_Alloc_hiderC*;
@@ -1740,7 +1743,7 @@ GLIBCXX_3.4.21 {
    _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE6rbegin*;
    _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE6resize*;
    _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE7replace*;
-    _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE7reserve*;
+    _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE7reserveE[jmy];
    _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE8pop_back*;
    _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE9push_back*;
    _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE[7-9]_[MS]_*;


# Symbols in the support library (libsupc++) have their own tag.
Index: libstdc++-v3/include/bits/basic_string.h
===================================================================
--- libstdc++-v3/include/bits/basic_string.h	(revision 268105)
+++ libstdc++-v3/include/bits/basic_string.h	(working copy)
@@ -420,6 +420,9 @@
      void
      _M_erase(size_type __pos, size_type __n);

+      void
+      _M_request_capacity(size_type __requested_capacity);

I think it would be better to not add this new function. Currently we
have a single reserve(size_type) function which handles the growing
case, and the shrinking case. If the post-P0966 world means you have
to call separate functions for those cases (reserve to grow, and
shrink_to_fit to shrink) then it's suboptimal to use a single
implementation. No callers of reserve want the shrinking path, and no
callers of shrink_to_fit want the growing path.

See the attached patch for my preferred solution. It makes the
definition of reserve(size_type) smaller, and hopefully faster, and
more likely to be inlined.

This has another minor advantage, which is that shrink_to_fit() can now work when -fno-exceptions is used, if the string fits in the SSO
buffer (previously it always did nothing for -fno-exceptions just
because it was simplest to implement as a call to reserve(0)).

    public:
      // Construct/copy/destroy:
      // NB: We overload ctors in some cases instead of using default
@@ -1014,8 +1017,21 @@
       *  data.
       */
      void
+#if __cplusplus > 201703L
+      reserve(size_type __res_arg);
+#else
      reserve(size_type __res_arg = 0);
+#endif

+#if __cplusplus > 201703L
+	  /**
+	   *  Deprecated function, added for compatibility
+	   */

We want to use the [[deprecated]] attribute, not a comment. And there
doesn't seem to be any point in only conditionally making the split
from one reserve function into two overloaded functions. Since you've
changed the behaviour of reserve(0) unconditionally, why not just
declare the reserve() member unconditionally too, instead of limiting
it to C++17? That means we can issue a deprecation warning for all
modes.

+      void
+      reserve()
+      { }

It's also unfortunate to have to add new symbols to the shared library
for a deprecated function that will be removed at some point.

I'm not sure what to do about that. For now my patch doesn't remove
the default argument, but we could do this:

@@ -1014,7 +1032,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       *  data.
       */
      void
-      reserve(size_type __res_arg = 0);
+      reserve(size_type __res_arg);
+
+      __attribute__((__always_inline__))
+      __attribute__((__deprecated__("use shrink_to_fit() or reserve(0)")))
+      void reserve() { }

      /**
       *  Erases the string, making it empty.
@@ -3979,7 +4006,11 @@ _GLIBCXX_END_NAMESPACE_CXX11
       *  data.
       */
      void
-      reserve(size_type __res_arg = 0);
+      reserve(size_type __res_arg);
+
+      __attribute__((__always_inline__))
+      __attribute__((__deprecated__("use shrink_to_fit() or reserve(0)")))
+      void reserve() { this->reserve(0); }

      /**
       *  Erases the string, making it empty.

The changes to the gnu.ver linker script shown above are still needed
here, to prevent the new overloads being exported with the wrong
symbol versions. But the always_inline attributes mean we don't need
to export new symbols.

For the COW string it still calls reserve(0) to un-share the string.

But there's a high level problem with this change: it makes it awkward
to do shrink_to_fit in C++98 mode. There's no shrink_to_fit() member
for C++98, and you're removing the reserve(0) behaviour. Usually we'd
implement resolutions for LWG issues in all relevant -std modes, but
in this case I'm not sure we want to do that. However, because
std::string and std::wstring are explicitly instantiated in the
library, it's not easy to provide different behaviour for -std=c++98
and other modes.

Maybe the answer is for the new zero-argument reserve() overload to do
shrink-to-fit in C++98 mode. By marking it always_inline we can give
it different behaviour in C++98 and later modes, because the explicit
instantiation in the library won't be used for that function.

@@ -951,16 +963,11 @@
    basic_string<_CharT, _Traits, _Alloc>::
    reserve(size_type __res)
    {
-      if (__res != this->capacity() || _M_rep()->_M_is_shared())
-        {
-	  // Make sure we don't shrink below the current size
-	  if (__res < this->size())
-	    __res = this->size();
-	  const allocator_type __a = get_allocator();
-	  _CharT* __tmp = _M_rep()->_M_clone(__a, __res - this->size());
-	  _M_rep()->_M_dispose(__a);
-	  _M_data(__tmp);
-        }
+      // P0966 reserve should not shrink
+      if (__res <= capacity())
+	return;

You're removing the property that calling reserve on a COW string
guarantees it is no longer shared. I think we need to keep that (as my
patch does).

Given that this change (either your version, or mine) alters the
semantics of std::string in all -std modes, I don't think we can make
this change now. It will have to wait for GCC 10, so that the changes
have more time on trunk.

commit 3dfeb74032e145315bf460e0674232fe8a35bae8
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Tue Jan 22 13:12:11 2019 +0000

    Adjust tests for basic_string:;reserve(size_type) change (P0966R1)
    
    2019-01-22  Andrew Luo  <andrewluotechnologies@outlook.com>
    
            * testsuite/21_strings/basic_string/capacity/1.cc: Adjust for new
            semantics of basic_string::reserve(size_type).
            * testsuite/21_strings/basic_string/capacity/char/1.cc: Likewise.
            * testsuite/21_strings/basic_string/capacity/char/18654.cc: Likewise.
            * testsuite/21_strings/basic_string/capacity/wchar_t/1.cc: Likewise.
            * testsuite/21_strings/basic_string/capacity/wchar_t/18654.cc:
            Likewise.

diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/1.cc b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/1.cc
index 2b77c4b07ab..5911cf391ca 100644
--- a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/1.cc
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/1.cc
@@ -138,11 +138,13 @@ void test01()
   VERIFY( sz04 >= 100 );
   str02.reserve();
   sz03 = str02.capacity();
-#if _GLIBCXX_USE_CXX11_ABI
-  VERIFY( sz03 < 100);
-#else
-  VERIFY( sz03 == 0 );
-#endif
+  VERIFY( sz03 == sz04 );
+
+  // P0966: reserve should not shrink
+  str02.reserve(100);
+  sz03 = str02.capacity();
+  str02.reserve(sz03 - 1);
+  VERIFY( str02.capacity() == sz03 );
 
   sz03 = str02.size() + 5;
   str02.resize(sz03);
diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/1.cc b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/1.cc
index b0e364ea612..38a8d0bdc67 100644
--- a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/1.cc
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/1.cc
@@ -35,11 +35,13 @@ void test01()
   VERIFY( sz02 >= 100 );
   str01.reserve();
   sz01 = str01.capacity();
-#if _GLIBCXX_USE_CXX11_ABI
-  VERIFY( sz01 < 100);
-#else
-  VERIFY( sz01 == 0 );
-#endif
+  VERIFY( sz01 == sz02 );
+
+  // P0966: reserve should not shrink
+  str01.reserve(100);
+  sz01 = str01.capacity();
+  str01.reserve(sz01 - 1);
+  VERIFY( str01.capacity() == sz01 );
 
   sz01 = str01.size() + 5;
   str01.resize(sz01);
diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/18654.cc b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/18654.cc
index 265e9468e5b..72eb3531cbc 100644
--- a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/18654.cc
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/18654.cc
@@ -47,12 +47,14 @@ void test01()
     {
       string str(i, 'x');
       str.reserve(3 * i);
+      const size_type cap = str.capacity();
+      VERIFY( cap >= 3 * i );
 
       str.reserve(2 * i);
-      VERIFY( str.capacity() == 2 * i );
+      VERIFY( str.capacity() == cap );
 
       str.reserve();
-      VERIFY( str.capacity() == i );
+      VERIFY( str.capacity() == cap );
     }
 }
 
diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/wchar_t/1.cc b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/wchar_t/1.cc
index 9f82502557e..6aed54f6e04 100644
--- a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/wchar_t/1.cc
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/wchar_t/1.cc
@@ -35,11 +35,13 @@ void test01()
   VERIFY( sz02 >= 100 );
   str01.reserve();
   sz01 = str01.capacity();
-#if _GLIBCXX_USE_CXX11_ABI
-  VERIFY( sz01 < 100);
-#else
-  VERIFY( sz01 == 0 );
-#endif
+  VERIFY( sz01 == sz02 );
+
+  // P0966: reserve should not shrink
+  str01.reserve(100);
+  sz01 = str01.capacity();
+  str01.reserve(sz01 - 1);
+  VERIFY( str01.capacity() == sz01 );
 
   sz01 = str01.size() + 5;
   str01.resize(sz01);
diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/wchar_t/18654.cc b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/wchar_t/18654.cc
index 54eafa0e633..3f3b61d8137 100644
--- a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/wchar_t/18654.cc
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/wchar_t/18654.cc
@@ -47,12 +47,14 @@ void test01()
     {
       wstring str(i, L'x');
       str.reserve(3 * i);
+      const size_type cap = str.capacity();
+      VERIFY( cap >= 3 * i );
 
       str.reserve(2 * i);
-      VERIFY( str.capacity() == 2 * i );
+      VERIFY( str.capacity() == cap );
 
       str.reserve();
-      VERIFY( str.capacity() == i );
+      VERIFY( str.capacity() == cap );
     }
 }
 

commit 510aff029f55e56448606f6ab8630636c205fccb
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Tue Jan 22 13:03:03 2019 +0000

    Do not reduce capacity in std::basic_string::reserve (P0966R1)
    
            * doc/xml/manual/intro.xml: Document P0966R1 / LWG 2968 change.
            * include/bits/basic_string.h [_GLIBCXX_USE_CXX11_ABI] (shrink_to_fit):
            Copy capacity-reducing implementation from reserve(size_type).
            [!_GLIBCXX_USE_CXX11_ABI] (shrink_to_fit): Likewise.
            [_GLIBCXX_USE_CXX11_ABI] (reserve): Improve comment.
            * include/bits/basic_string.tcc [_GLIBCXX_USE_CXX11_ABI] (reserve):
            Remove branch that reduces capacity.
            [!_GLIBCXX_USE_CXX11_ABI] (reserve): Likewise.

diff --git a/libstdc++-v3/doc/xml/manual/intro.xml b/libstdc++-v3/doc/xml/manual/intro.xml
index 28210cb0862..75b62131f69 100644
--- a/libstdc++-v3/doc/xml/manual/intro.xml
+++ b/libstdc++-v3/doc/xml/manual/intro.xml
@@ -1175,6 +1175,15 @@ requirements of the license of GCC.
     <listitem><para>Add noexcept.
     </para></listitem></varlistentry>
 
+    <varlistentry xml:id="manual.bugs.dr2968"><term><link xmlns:xlink="http://www.w3.org/1999/xlink"; xlink:href="&DR;#2968">2968</link>:
+       <emphasis>Inconsistencies between <code>basic_string reserve</code>
+         and <code>vector/unordered_map/unordered_set reserve</code> functions
+       </emphasis>
+    </term>
+    <listitem><para>Change <code>basic_string::reserve</code> to keep excess
+      capacity when called with a value smaller than the current capacity.
+    </para></listitem></varlistentry>
+
     <varlistentry xml:id="manual.bugs.dr2993"><term><link xmlns:xlink="http://www.w3.org/1999/xlink"; xlink:href="&DR;#2993">2993</link>:
        <emphasis><code>reference_wrapper&lt;T&gt;</code> conversion from <code>T&amp;&amp;</code>
        </emphasis>
diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h
index 0a6dd3cbc71..1e3f50bf738 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -973,17 +973,35 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       void
       shrink_to_fit() noexcept
       {
-#if __cpp_exceptions
-	if (capacity() > size())
+	if (!_M_is_local())
 	  {
-	    try
-	      { reserve(0); }
-	    catch(...)
-	      { }
+	    const auto __capacity = capacity();
+	    const auto __size = size();
+	    if (__size <= size_type(_S_local_capacity))
+	      {
+		this->_S_copy(_M_local_data(), _M_data(), __size + 1);
+		_M_destroy(__capacity);
+		_M_data(_M_local_data());
+	      }
+#if __cpp_exceptions
+	    else if (__capacity > __size)
+	      {
+		try
+		  {
+		    size_type __newcap = __size;
+		    pointer __tmp = _M_create(__newcap, __capacity);
+		    this->_S_copy(__tmp, _M_data(), __size + 1);
+		    _M_dispose();
+		    _M_data(__tmp);
+		    _M_capacity(__newcap);
+		  }
+		catch(...)
+		  { }
+	      }
+#endif
 	  }
-#endif
       }
-#endif
+#endif // C++11
 
       /**
        *  Returns the total number of characters that the %string can hold
@@ -1012,6 +1030,13 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        *  required, the user can reserve the memory in %advance, and thus
        *  prevent a possible reallocation of memory and copying of %string
        *  data.
+       *
+       *  The disadvantage is that reserving an exact capacity will override
+       *  the usual exponential growth that provides amortized constant time
+       *  for appending to a string. If the string needs to grow past the
+       *  capacity that was reserved, it might result in more reallocations
+       *  than would have happened if the string had been allowed to manage
+       *  its own capacity.
        */
       void
       reserve(size_type __res_arg = 0);
@@ -3945,13 +3970,18 @@ _GLIBCXX_END_NAMESPACE_CXX11
 	if (capacity() > size())
 	  {
 	    try
-	      { reserve(0); }
+	      {
+		const allocator_type __a = get_allocator();
+		_CharT* __tmp = _M_rep()->_M_clone(__a);
+		_M_rep()->_M_dispose(__a);
+		_M_data(__tmp);
+	      }
 	    catch(...)
 	      { }
 	  }
 #endif
       }
-#endif
+#endif // C++11
 
       /**
        *  Returns the total number of characters that the %string can hold
diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc
index 314b8fe207f..889f7a0df5f 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -280,29 +280,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     basic_string<_CharT, _Traits, _Alloc>::
     reserve(size_type __res)
     {
-      // Make sure we don't shrink below the current size.
-      if (__res < length())
-	__res = length();
-
       const size_type __capacity = capacity();
-      if (__res != __capacity)
-	{
-	  if (__res > __capacity
-	      || __res > size_type(_S_local_capacity))
-	    {
-	      pointer __tmp = _M_create(__res, __capacity);
-	      this->_S_copy(__tmp, _M_data(), length() + 1);
-	      _M_dispose();
-	      _M_data(__tmp);
-	      _M_capacity(__res);
-	    }
-	  else if (!_M_is_local())
-	    {
-	      this->_S_copy(_M_local_data(), _M_data(), length() + 1);
-	      _M_destroy(__capacity);
-	      _M_data(_M_local_data());
-	    }
-	}
+
+      // _GLIBCXX_RESOLVE_LIB_DEFECTS
+      // 2968. Inconsistencies between basic_string reserve and
+      // vector/unordered_map/unordered_set reserve functions
+      if (__res <= __capacity)
+	return;
+
+      pointer __tmp = _M_create(__res, __capacity);
+      this->_S_copy(__tmp, _M_data(), length() + 1);
+      _M_dispose();
+      _M_data(__tmp);
+      _M_capacity(__res);
     }
 
   template<typename _CharT, typename _Traits, typename _Alloc>
@@ -951,16 +941,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     basic_string<_CharT, _Traits, _Alloc>::
     reserve(size_type __res)
     {
-      if (__res != this->capacity() || _M_rep()->_M_is_shared())
-        {
-	  // Make sure we don't shrink below the current size
-	  if (__res < this->size())
-	    __res = this->size();
-	  const allocator_type __a = get_allocator();
-	  _CharT* __tmp = _M_rep()->_M_clone(__a, __res - this->size());
-	  _M_rep()->_M_dispose(__a);
-	  _M_data(__tmp);
-        }
+      const size_type __capacity = capacity();
+
+      // _GLIBCXX_RESOLVE_LIB_DEFECTS
+      // 2968. Inconsistencies between basic_string reserve and
+      // vector/unordered_map/unordered_set reserve functions
+      if (__res <= __capacity)
+	{
+	  if (_M_rep()->_M_is_shared())
+	    __res = capacity(); // unshare, but preserve excess capacity
+	  else
+	    return;
+	}
+
+      const allocator_type __a = get_allocator();
+      _CharT* __tmp = _M_rep()->_M_clone(__a, __res - size());
+      _M_rep()->_M_dispose(__a);
+      _M_data(__tmp);
     }
 
   template<typename _CharT, typename _Traits, typename _Alloc>

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]