[v3] Print ints directly to streambuf

Jerry Quinn jlquinn@optonline.net
Sat Feb 22 05:40:00 GMT 2003


Nathan Myers worked with me on putting this together -

When conditions are right and we can easily figure out how long the string
form of a number is going to be, we can render the number directly into the
ostreambuf buffer and save copying costs.

To support this, we do a few things.  First, compute the length the number is
going to be.  Next, provide a routine to grab the streambuf buffer if there is
enough space for this number.  Finally, in sputn(), detect that the streambuf
is being handed its own buffer and just update positioning.

This is worth ~4% (from 7.5s down to 7.2s) on the same test from PR 8761 I've
used for the other benchmarking.

2003-02-22  Jerry Quinn  <jlquinn@optonline.net>

        * include/bits/locale_facets.h: Use climits, streambuf_iterator.h.
	(__bypass): New.
	(__num_base::_Threshold_type, __num_base::struct __format_length,
	__num_base::_S_lengths_initialized, __num_base::_S_lengths,
	__num_base::_S_init_format_length_table,
	__num_base::_M_estimate_length): New.
	* include/bits/locale_facets.tcc (num_put::_M_convert_int): Use
	__bypass and _M_estimate_length.
	* include/bits/streambuf.tcc (basic_streambuf::xsputn): Only update
	endpointers if using own buffer.
	* include/bits/streambuf_iterator.h (ostreambuf_iterator::_M_bypass):
	New.
	* include/std/std_streambuf.h (basic_streambuf::_M_bypass): New.
	* src/locale.cc (__num_base::_S_lengths_initialized): Define.
	(__num_base::_S_init_format_length_table): Define.

Index: include/bits/locale_facets.h
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/include/bits/locale_facets.h,v
retrieving revision 1.56
diff -u -r1.56 locale_facets.h
--- include/bits/locale_facets.h	18 Feb 2003 05:45:06 -0000	1.56
+++ include/bits/locale_facets.h	22 Feb 2003 05:23:58 -0000
@@ -44,9 +44,11 @@
 
 #include <ctime>	// For struct tm
 #include <cwctype>	// For wctype_t
+#include <climits>	// For CHAR_BIT
 #include <iosfwd>
 #include <bits/ios_base.h>  // For ios_base, ios_base::iostate
 #include <streambuf>
+#include <bits/streambuf_iterator.h>
 
 namespace std
 {
@@ -161,6 +163,34 @@
       return __s;
     }
 
+  // This template permits specializing facet output code for
+  // ostreambuf_iterator.  For ostreambuf_iterator, we can access
+  // the internal buffer, and avoid having to copy results.  We would really
+  // like to be able to specialize for ostreambuf_iterator<_CharT>, but
+  // there's no guarantee that the user hasn't specialized it and not provided
+  // _M_bypass.
+
+  // This is the unspecialized form of the template.
+  template<typename _CharT, typename _OutIter>
+    inline
+    _CharT*
+    __bypass(_OutIter, _CharT* __buf, int)
+    { return __buf; }
+
+  template<>
+    inline
+    char*
+    __bypass<char>(ostreambuf_iterator<char> __s, char* __buf, int __len)
+    { return __s._M_bypass(__buf, __len); }
+
+#ifdef _GLIBCPP_USE_WCHAR_T
+  template<>
+    inline
+    wchar_t*
+    __bypass<wchar_t>(ostreambuf_iterator<wchar_t> __s, wchar_t* __buf, int __len)
+    { return __s._M_bypass(__buf, __len); }
+#endif
+
 
   // 22.2.1.1  Template class ctype
   // Include host and configuration specific ctype enums for ctype_base.
@@ -577,6 +607,87 @@
     // Construct and return valid scanf format for integer types.
     static void
     _S_format_int(const ios_base& __io, char* __fptr, char __mod, char __modl);
+
+
+    // The following code is used to compute the length of an integer so we
+    // can allocate a buffer of exactly the right size to hold it.  With this
+    // information, we can safely render a number directly into the output
+    // buffer, rather than into a temporary that must be copied.
+
+    // Table for computing size of a number
+#ifdef _GLIBCPP_USE_LONG_LONG
+    typedef unsigned long long _Threshold_type;
+#else
+    typedef unsigned long _Threshold_type;
+#endif
+    struct __format_length { _Threshold_type __threshold; int __digits; };
+    // This length should probably be in a static const, but I can't set the
+    // length of an array in a struct this way
+    static bool _S_lengths_initialized;
+    static __format_length _S_lengths[(sizeof(__num_base::_Threshold_type) * CHAR_BIT + 2) / 3];
+    
+    // Construct the format length table
+    static void
+    _S_init_format_length_table();
+
+    inline size_t
+    _M_estimate_length(unsigned long __v, ios_base& __io) const { return _M_estimate_length(__v, __io, false); }
+    inline size_t
+    _M_estimate_length(long __v, ios_base& __io) const
+    {
+      if (__v < 0)
+	return _M_estimate_length(static_cast<unsigned long>(-__v), __io, true);
+      return _M_estimate_length(static_cast<unsigned long>(__v), __io, false);
+    }
+
+#ifdef _GLIBCPP_USE_LONG_LONG
+    inline size_t
+    _M_estimate_length(unsigned long long __v, ios_base& __io) const { return _M_estimate_length(__v, __io, false); }
+    inline size_t
+    _M_estimate_length(long long __v, ios_base& __io) const
+    {
+      if (__v < 0)
+	return _M_estimate_length(static_cast<unsigned long long>(-__v), __io, true);
+      return _M_estimate_length(static_cast<unsigned long long>(__v), __io, false);
+    }
+#endif
+
+    template<typename _ValueT>
+    inline size_t
+    _M_estimate_length(_ValueT __v, const ios_base& __io, bool __neg) const
+    {
+      if (__builtin_expect((__io.flags() & ios_base::basefield) == ios_base::hex, false))
+	{
+	  size_t  __log16 = 1;
+	  while ((__v >>= 4) != 0)
+	    { ++__log16; }
+	  if (__v && __io.flags() & ios_base::showbase)
+	    __log16 += 2;           // Leading '0x'
+	  return __log16;
+	}
+
+      size_t  __log8 = 0;
+      _ValueT __i = __v;
+      while ((__i >>= 3) != 0)
+	{ ++__log8; }
+      if (__builtin_expect((__io.flags() & ios_base::basefield) == ios_base::oct, false))
+	{
+	  if (__v && __io.flags() & ios_base::showbase)
+	    __log8++;		// Leading '0'
+	  return __log8;
+	}
+
+      size_t  __digits = _S_lengths[__log8].__digits;
+      if (__v >= (_ValueT)_S_lengths[__log8].__threshold)
+	{ ++__digits; }
+      if (__neg || __io.flags() & ios_base::showpos)
+	++__digits;
+      return __digits;
+    }
+
+
+    // Constructor
+    __num_base() { if (!_S_lengths_initialized) _S_init_format_length_table(); }
   };
 
 
Index: include/bits/locale_facets.tcc
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/include/bits/locale_facets.tcc,v
retrieving revision 1.92
diff -u -r1.92 locale_facets.tcc
--- include/bits/locale_facets.tcc	21 Feb 2003 08:34:18 -0000	1.92
+++ include/bits/locale_facets.tcc	22 Feb 2003 05:23:59 -0000
@@ -773,9 +773,11 @@
 	_CharT* __lit = __lc._M_literals;
 
 	// Long enough to hold hex, dec, and octal representations.
-	int __ilen = 4 * sizeof(_ValueT);
+	int __ilen = _M_estimate_length(__v, __io);
 	_CharT* __cs = static_cast<_CharT*>(__builtin_alloca(sizeof(_CharT) 
 							     * __ilen));
+	__cs = __bypass(__s, __cs, __ilen);
+
 	// [22.2.2.2.2] Stage 1, numeric conversion to character.
 	// Result is returned right-justified in the buffer.
 	int __len;
Index: include/bits/streambuf.tcc
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/include/bits/streambuf.tcc,v
retrieving revision 1.25
diff -u -r1.25 streambuf.tcc
--- include/bits/streambuf.tcc	13 Feb 2003 21:39:03 -0000	1.25
+++ include/bits/streambuf.tcc	22 Feb 2003 05:23:59 -0000
@@ -159,6 +159,13 @@
     basic_streambuf<_CharT, _Traits>::
     xsputn(const char_type* __s, streamsize __n)
     {
+      // Direct buffer writing
+      if (__s == _M_out_cur)
+	{
+	  _M_out_cur_move(__n);
+	  return __n;
+	}
+
       streamsize __ret = 0;
       while (__ret < __n)
 	{
Index: include/bits/streambuf_iterator.h
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/include/bits/streambuf_iterator.h,v
retrieving revision 1.9
diff -u -r1.9 streambuf_iterator.h
--- include/bits/streambuf_iterator.h	6 Feb 2003 05:58:10 -0000	1.9
+++ include/bits/streambuf_iterator.h	22 Feb 2003 05:23:59 -0000
@@ -205,6 +205,10 @@
 	this->_M_sbuf->sputn(__ws, __len);
 	return *this;
       }
+
+      _CharT* 
+      _M_bypass(_CharT* __ws, streamsize __len)
+      { return this->_M_sbuf->_M_bypass(__ws, __len); }
     };
 } // namespace std
 #endif
Index: include/std/std_streambuf.h
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/include/std/std_streambuf.h,v
retrieving revision 1.13
diff -u -r1.13 std_streambuf.h
--- include/std/std_streambuf.h	23 Jan 2003 22:53:35 -0000	1.13
+++ include/std/std_streambuf.h	22 Feb 2003 05:24:00 -0000
@@ -917,6 +917,23 @@
       __streambuf_type& 
       operator=(const __streambuf_type&) { return *this; };
 #endif
+
+    public:
+      // Support efficient direct access to the underlying buffer.  When the
+      // buffer is has enough space to write to, return it to the calling
+      // function.  The function can assemble the output directly into the
+      // streambuf buffer, saving a redundant copy.  sputn() checks to see if
+      // it is being passed its own buffer and bypasses normal processing in
+      // this case.
+
+      inline _CharT*
+      _M_bypass(_CharT* __buf, int __n)
+        {
+	  if (this->_M_out_buf_size() >= __n)
+	    return this->pptr();
+	  else
+	    return __buf;
+	}
     };
 } // namespace std
 
Index: src/locale.cc
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/src/locale.cc,v
retrieving revision 1.78
diff -u -r1.78 locale.cc
--- src/locale.cc	17 Feb 2003 21:42:05 -0000	1.78
+++ src/locale.cc	22 Feb 2003 05:24:00 -0000
@@ -563,5 +563,35 @@
       *__fptr++ = __mod;
     *__fptr = '\0';
   }
+
+  bool __num_base::_S_lengths_initialized = false;
+  __num_base::__format_length __num_base::_S_lengths[(sizeof(__num_base::_Threshold_type) * CHAR_BIT + 2) / 3];
+
+  void
+  __num_base::_S_init_format_length_table()
+  {
+    // MT should be OK.  Even if multiple routines write the array, they will
+    // store the same values.
+    _S_lengths_initialized = true;
+
+    // For computing format length table
+    const size_t __format_lengths_size
+      = (sizeof(__num_base::_Threshold_type) * CHAR_BIT + 2) / 3;
+
+    _Threshold_type __bench = 0;
+    _Threshold_type __dectuple = 10;
+    size_t __digits = 1;
+    for (size_t __log8 = 0; __log8 < __format_lengths_size; ++__log8)
+      {
+	_S_lengths[__log8].__threshold = __dectuple;
+	_S_lengths[__log8].__digits = __digits;
+	__bench = (__bench << 3) | 7;
+	if (__bench > __dectuple)
+	  {
+	    __dectuple *= 10;
+	    ++__digits;
+	  }
+      }
+  }
 } // namespace std
 



More information about the Libstdc++ mailing list