This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch] libstdc++/22515


Hi,

the below fixes this performance issue with operator>> of chars (both
string and array). The solution - as usual, see the current getline anf
ignore - involves using a "bulky" algorithm, working inside the
streambuf. On the performance testcase (200 chars per line), the timings
change:

string&
=======

mainline
--------
10.840u 0.890s 0:11.78 99.5%     0+0k 0+0io 231pf+0w

mainline + patch
----------------
 3.640u 0.870s 0:04.52 99.7%     0+0k 0+0io 232pf+0w


char*
=====

mainline
--------
10.010u 0.680s 0:10.73 99.6%     0+0k 0+0io 226pf+0w

mainline + patch
----------------
 3.540u 0.870s 0:04.42 99.7%     0+0k 0+0io 227pf+0w

The improvement is of course smaller for shorter input lines but always
easily measurable down to 20-30 chars and never becoming a regression
also for very short inputs (< 5-10 chars).

A special feature of the standard requirements for these functions
(compared to getline and ignore, f.i.) is the use of ctype::is,
completely different for char (fast, inline, no virtual function calls)
vs wchar_t. Ultimately, that's why in my (rather extensive) experiments
during the last days, adding specializations for wchar_t too is not
profitable: only for long inputs a small improvement could be measured
and the performance actually become worse for shorter inputs (wrt the
generic solution currently in place, see istream.tcc).

An interesting project for after 4.1 branches, will be commonizing some
of the code of the optimized istream functions.

Tested x86-linux.

Paolo.

//////////////
2005-07-24  Paolo Carlini  <pcarlini@suse.de>

	PR libstdc++/22515
	* include/bits/basic_string.h: Declare the specialization
	operator>>(basic_istream<char>&, basic_string<char>&).
	* include/std/std_istream.h: Declate the specialization
	operator>>(basic_istream<char>&, char*).
	* include/std/std_streambuf.h (basic_streambuf): Add friend
	declarations for the above.
	* src/istream.cc: Define the above.
	* testsuite/27_io/basic_istream/extractors_character/char/4.cc: New.
	* testsuite/27_io/basic_istream/extractors_character/wchar_t/4.cc:
	Likewise.
	* testsuite/performance/27_io/ifstream_extract_chars.cc: Likewise.
diff -urN libstdc++-v3-orig/include/bits/basic_string.h libstdc++-v3/include/bits/basic_string.h
--- libstdc++-v3-orig/include/bits/basic_string.h	2005-06-29 14:18:08.000000000 +0200
+++ libstdc++-v3/include/bits/basic_string.h	2005-07-22 12:32:48.000000000 +0200
@@ -2373,6 +2373,10 @@
     operator>>(basic_istream<_CharT, _Traits>& __is,
 	       basic_string<_CharT, _Traits, _Alloc>& __str);
 
+  template<>
+    basic_istream<char>&
+    operator>>(basic_istream<char>& __is, basic_string<char>& __str);
+
   /**
    *  @brief  Write string to a stream.
    *  @param os  Output stream.
diff -urN libstdc++-v3-orig/include/std/std_istream.h libstdc++-v3/include/std/std_istream.h
--- libstdc++-v3-orig/include/std/std_istream.h	2004-11-24 05:11:15.000000000 +0100
+++ libstdc++-v3/include/std/std_istream.h	2005-07-22 14:38:45.000000000 +0200
@@ -1,6 +1,6 @@
 // Input streams -*- C++ -*-
 
-// Copyright (C) 1997, 1998, 1999, 2001, 2002, 2003, 2004
+// Copyright (C) 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005
 // Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
@@ -723,7 +723,12 @@
   template<typename _CharT, typename _Traits>
     basic_istream<_CharT, _Traits>&
     operator>>(basic_istream<_CharT, _Traits>& __in, _CharT* __s);
-  
+
+  // Explicit specialization declaration, defined in src/istream.cc.
+  template<>
+    basic_istream<char>&
+    operator>>(basic_istream<char>& __in, char* __s);
+
   template<class _Traits>
     basic_istream<char, _Traits>&
     operator>>(basic_istream<char, _Traits>& __in, unsigned char* __s)
diff -urN libstdc++-v3-orig/include/std/std_streambuf.h libstdc++-v3/include/std/std_streambuf.h
--- libstdc++-v3-orig/include/std/std_streambuf.h	2004-11-24 05:11:21.000000000 +0100
+++ libstdc++-v3/include/std/std_streambuf.h	2005-07-22 12:32:48.000000000 +0200
@@ -1,6 +1,6 @@
 // Stream buffer classes -*- C++ -*-
 
-// Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
+// Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
 // Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
@@ -154,6 +154,15 @@
       __copy_streambufs<>(__streambuf_type* __sbin,
 			  __streambuf_type* __sbout);
 
+      template<typename _CharT2, typename _Traits2>
+        friend basic_istream<_CharT2, _Traits2>&
+        operator>>(basic_istream<_CharT2, _Traits2>&, _CharT2*);
+
+      template<typename _CharT2, typename _Traits2, typename _Alloc>
+        friend basic_istream<_CharT2, _Traits2>&
+        operator>>(basic_istream<_CharT2, _Traits2>&,
+		   basic_string<_CharT2, _Traits2, _Alloc>&);
+
       template<typename _CharT2, typename _Traits2, typename _Alloc>
         friend basic_istream<_CharT2, _Traits2>&
         getline(basic_istream<_CharT2, _Traits2>&,
diff -urN libstdc++-v3-orig/src/istream.cc libstdc++-v3/src/istream.cc
--- libstdc++-v3-orig/src/istream.cc	2005-06-17 09:33:55.000000000 +0200
+++ libstdc++-v3/src/istream.cc	2005-07-23 00:09:42.000000000 +0200
@@ -185,6 +185,157 @@
 
   template<>
     basic_istream<char>&
+    operator>>(basic_istream<char>& __in, char* __s)
+    {
+      typedef basic_istream<char>       	__istream_type;
+      typedef __istream_type::int_type		__int_type;
+      typedef __istream_type::char_type		__char_type;
+      typedef __istream_type::traits_type	__traits_type;
+      typedef __istream_type::__streambuf_type  __streambuf_type;
+      typedef __istream_type::__ctype_type	__ctype_type;
+
+      streamsize __extracted = 0;
+      ios_base::iostate __err = ios_base::iostate(ios_base::goodbit);
+      __istream_type::sentry __cerb(__in, false);
+      if (__cerb)
+	{
+	  try
+	    {
+	      // Figure out how many characters to extract.
+	      streamsize __num = __in.width();
+	      if (__num <= 0)
+		__num = numeric_limits<streamsize>::max();
+
+	      const __ctype_type& __ct = use_facet<__ctype_type>(__in.getloc());
+
+	      const __int_type __eof = __traits_type::eof();
+	      __streambuf_type* __sb = __in.rdbuf();
+	      __int_type __c = __sb->sgetc();
+
+	      while (__extracted < __num - 1
+		     && !__traits_type::eq_int_type(__c, __eof)
+		     && !__ct.is(ctype_base::space,
+				 __traits_type::to_char_type(__c)))
+		{
+		  streamsize __size = std::min(streamsize(__sb->egptr()
+							  - __sb->gptr()),
+					       streamsize(__num - __extracted
+							  - 1));
+		  if (__size > 1)
+		    {
+		      __size = (__ct.scan_is(ctype_base::space,
+					     __sb->gptr() + 1,
+					     __sb->gptr() + __size)
+				- __sb->gptr());
+		      __traits_type::copy(__s, __sb->gptr(), __size);
+		      __s += __size;
+		      __sb->gbump(__size);
+		      __extracted += __size;
+		      __c = __sb->sgetc();
+		    }
+		  else
+		    {
+		      *__s++ = __traits_type::to_char_type(__c);
+		      ++__extracted;
+		      __c = __sb->snextc();
+		    }
+		}
+
+	      if (__traits_type::eq_int_type(__c, __eof))
+		__err |= ios_base::eofbit;
+
+	      // _GLIBCXX_RESOLVE_LIB_DEFECTS
+	      // 68.  Extractors for char* should store null at end
+	      *__s = __char_type();
+	      __in.width(0);
+	    }
+	  catch(...)
+	    { __in._M_setstate(ios_base::badbit); }
+	}
+      if (!__extracted)
+	__err |= ios_base::failbit;
+      if (__err)
+	__in.setstate(__err);
+      return __in;
+    }
+
+  template<>
+    basic_istream<char>&
+    operator>>(basic_istream<char>& __in, basic_string<char>& __str)
+    {
+      typedef basic_istream<char>       	__istream_type;
+      typedef __istream_type::int_type		__int_type;
+      typedef __istream_type::char_type		__char_type;
+      typedef __istream_type::traits_type	__traits_type;
+      typedef __istream_type::__streambuf_type  __streambuf_type;
+      typedef __istream_type::__ctype_type	__ctype_type;
+      typedef basic_string<char>        	__string_type;
+      typedef __string_type::size_type		__size_type;
+
+      __size_type __extracted = 0;
+      ios_base::iostate __err = ios_base::iostate(ios_base::goodbit);
+      __istream_type::sentry __cerb(__in, false);
+      if (__cerb)
+	{
+	  try
+	    {
+	      __str.erase();
+	      const streamsize __w = __in.width();
+	      const __size_type __n = __w > 0 ? static_cast<__size_type>(__w)
+		                              : __str.max_size();
+	      const __ctype_type& __ct = use_facet<__ctype_type>(__in.getloc());
+	      const __int_type __eof = __traits_type::eof();
+	      __streambuf_type* __sb = __in.rdbuf();
+	      __int_type __c = __sb->sgetc();
+
+	      while (__extracted < __n
+		     && !__traits_type::eq_int_type(__c, __eof)
+		     && !__ct.is(ctype_base::space,
+				 __traits_type::to_char_type(__c)))
+		{
+		  streamsize __size = std::min(streamsize(__sb->egptr()
+							  - __sb->gptr()),
+					       streamsize(__n - __extracted));
+		  if (__size > 1)
+		    {
+		      __size = (__ct.scan_is(ctype_base::space,
+					     __sb->gptr() + 1,
+					     __sb->gptr() + __size)
+				- __sb->gptr());
+		      __str.append(__sb->gptr(), __size);
+		      __sb->gbump(__size);
+		      __extracted += __size;
+		      __c = __sb->sgetc();
+		    }
+		  else
+		    {
+		      __str += __traits_type::to_char_type(__c);
+		      ++__extracted;
+		      __c = __sb->snextc();
+		    }		  
+		}
+
+	      if (__traits_type::eq_int_type(__c, __eof))
+		__err |= ios_base::eofbit;
+	      __in.width(0);
+	    }
+	  catch(...)
+	    {
+	      // _GLIBCXX_RESOLVE_LIB_DEFECTS
+	      // 91. Description of operator>> and getline() for string<>
+	      // might cause endless loop
+	      __in._M_setstate(ios_base::badbit);
+	    }
+	}
+      if (!__extracted)
+	__err |= ios_base::failbit;
+      if (__err)
+	__in.setstate(__err);
+      return __in;
+    }
+
+  template<>
+    basic_istream<char>&
     getline(basic_istream<char>& __in, basic_string<char>& __str,
 	    char __delim)
     {
diff -urN libstdc++-v3-orig/testsuite/27_io/basic_istream/extractors_character/char/4.cc libstdc++-v3/testsuite/27_io/basic_istream/extractors_character/char/4.cc
--- libstdc++-v3-orig/testsuite/27_io/basic_istream/extractors_character/char/4.cc	1970-01-01 01:00:00.000000000 +0100
+++ libstdc++-v3/testsuite/27_io/basic_istream/extractors_character/char/4.cc	2005-07-22 14:32:58.000000000 +0200
@@ -0,0 +1,90 @@
+// 2005-07-22  Paolo Carlini  <pcarlini@suse.de>
+
+// Copyright (C) 2005 Free Software Foundation
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING.  If not, write to the Free
+// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+// USA.
+
+// 27.6.1.2.3 basic_istream::operator>>
+
+#include <istream>
+#include <string>
+#include <fstream>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+string prepare(string::size_type len, unsigned nchunks)
+{
+  string ret;
+  for (unsigned i = 0; i < nchunks; ++i)
+    {
+      for (string::size_type j = 0; j < len; ++j)
+	ret.push_back('a' + rand() % 26);
+      len *= 2;
+      ret.push_back(' ');
+    }
+  return ret;
+}
+
+void check(istream& stream, const string& str, unsigned nchunks)
+{
+  bool test __attribute__((unused)) = true;
+
+  char* chunk = new char[str.size()];
+  memset(chunk, 'X', str.size());
+
+  string::size_type index = 0, index_new = 0;
+  unsigned n = 0;
+
+  while (stream >> chunk)
+    {
+      index_new = str.find(' ', index);
+      VERIFY( !str.compare(index, index_new - index, chunk) );
+      index = index_new + 1;
+      ++n;
+      memset(chunk, 'X', str.size());
+    }
+  VERIFY( stream.eof() );
+  VERIFY( n == nchunks );
+
+  delete[] chunk;
+}
+
+// istream& operator>>(istream&, charT*)
+void test01()
+{
+  const char filename[] = "inserters_extractors-4.txt";
+
+  const unsigned nchunks = 10;
+  const string data = prepare(666, nchunks);
+
+  ofstream ofstrm;
+  ofstrm.open(filename);
+  ofstrm.write(data.data(), data.size());
+  ofstrm.close();
+
+  ifstream ifstrm;
+  ifstrm.open(filename);
+  check(ifstrm, data, nchunks);
+  ifstrm.close();
+}
+
+int main()
+{
+  test01();
+  return 0;
+}
diff -urN libstdc++-v3-orig/testsuite/27_io/basic_istream/extractors_character/wchar_t/4.cc libstdc++-v3/testsuite/27_io/basic_istream/extractors_character/wchar_t/4.cc
--- libstdc++-v3-orig/testsuite/27_io/basic_istream/extractors_character/wchar_t/4.cc	1970-01-01 01:00:00.000000000 +0100
+++ libstdc++-v3/testsuite/27_io/basic_istream/extractors_character/wchar_t/4.cc	2005-07-22 14:33:18.000000000 +0200
@@ -0,0 +1,90 @@
+// 2005-07-22  Paolo Carlini  <pcarlini@suse.de>
+
+// Copyright (C) 2005 Free Software Foundation
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING.  If not, write to the Free
+// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+// USA.
+
+// 27.6.1.2.3 basic_istream::operator>>
+
+#include <istream>
+#include <string>
+#include <fstream>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+wstring prepare(wstring::size_type len, unsigned nchunks)
+{
+  wstring ret;
+  for (unsigned i = 0; i < nchunks; ++i)
+    {
+      for (wstring::size_type j = 0; j < len; ++j)
+	ret.push_back(L'a' + rand() % 26);
+      len *= 2;
+      ret.push_back(L' ');
+    }
+  return ret;
+}
+
+void check(wistream& stream, const wstring& str, unsigned nchunks)
+{
+  bool test __attribute__((unused)) = true;
+
+  wchar_t* chunk = new wchar_t[str.size()];
+  wmemset(chunk, L'X', str.size());
+
+  wstring::size_type index = 0, index_new = 0;
+  unsigned n = 0;
+
+  while (stream >> chunk)
+    {
+      index_new = str.find(' ', index);
+      VERIFY( !str.compare(index, index_new - index, chunk) );
+      index = index_new + 1;
+      ++n;
+      wmemset(chunk, L'X', str.size());
+    }
+  VERIFY( stream.eof() );
+  VERIFY( n == nchunks );
+
+  delete[] chunk;
+}
+
+// istream& operator>>(istream&, charT*)
+void test01()
+{
+  const char filename[] = "inserters_extractors-4.txt";
+
+  const unsigned nchunks = 10;
+  const wstring data = prepare(666, nchunks);
+
+  wofstream ofstrm;
+  ofstrm.open(filename);
+  ofstrm.write(data.data(), data.size());
+  ofstrm.close();
+
+  wifstream ifstrm;
+  ifstrm.open(filename);
+  check(ifstrm, data, nchunks);
+  ifstrm.close();
+}
+
+int main()
+{
+  test01();
+  return 0;
+}
diff -urN libstdc++-v3-orig/testsuite/performance/27_io/ifstream_extract_chars.cc libstdc++-v3/testsuite/performance/27_io/ifstream_extract_chars.cc
--- libstdc++-v3-orig/testsuite/performance/27_io/ifstream_extract_chars.cc	1970-01-01 01:00:00.000000000 +0100
+++ libstdc++-v3/testsuite/performance/27_io/ifstream_extract_chars.cc	2005-07-22 12:32:48.000000000 +0200
@@ -0,0 +1,91 @@
+// Copyright (C) 2005 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING.  If not, write to the Free
+// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+// USA.
+
+// As a special exception, you may use this file as part of a free software
+// library without restriction.  Specifically, if other files instantiate
+// templates or use macros or inline functions from this file, or you compile
+// this file and link it with other files to produce an executable, this
+// file does not by itself cause the resulting executable to be covered by
+// the GNU General Public License.  This exception does not however
+// invalidate any other reasons why the executable file might be covered by
+// the GNU General Public License.
+
+#include <cstdio>
+#include <fstream>
+#include <string>
+#include <testsuite_performance.h>
+
+// libstdc++/22515
+int main() 
+{
+  using namespace std;
+  using namespace __gnu_test;
+
+  time_counter time;
+  resource_counter resource;
+
+  const char filename[] = "tmp_perf_chars.txt";
+  const unsigned lines = 200000;
+  const unsigned line_length = 200;
+
+  char* line = new char[line_length + 2];
+
+  // Construct data.
+  {
+    memset(line, 'x', line_length);
+    line[line_length] = '\n';
+    line[line_length + 1] = '\0';
+    
+    ofstream out(filename);
+    for (unsigned i = 0; i < lines; ++i)
+      out << line;
+  }
+  
+  // operator>>(basic_istream<char>& __in, basic_string<char>& __str)
+  {
+    start_counters(time, resource);
+    for (int iter = 0; iter < 25; ++iter)
+      {
+	ifstream file(filename);
+	string string_line;
+	
+	while (file >> string_line);
+      }
+    stop_counters(time, resource);
+    report_performance(__FILE__, "string&", time, resource);
+    clear_counters(time, resource);
+  }
+  
+  // operator>>(basic_istream<char>& __in, char* __s)
+  {
+    start_counters(time, resource);
+    for (int iter = 0; iter < 25; ++iter)
+      {
+	ifstream file(filename);
+	
+	while (file >> line);
+      }
+    stop_counters(time, resource);
+    report_performance(__FILE__, "char*", time, resource);
+    clear_counters(time, resource);
+  }
+
+  delete[] line;
+  unlink(filename);
+  return 0;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]