This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

patch: basic_string.tcc


Hi,
	This patch is basically an optimization for string::rfind(), and
string::find_first_of(). It attempts to use different algorithms for
either functions to speed up the operations of the functions in various
situations. What I have done for find_first_of() is that there is a
threshold(Paolo pointed out that the function will perform badly when
the string passed as a parameter is >> *this, or when the string is too
large, and the table build-up time is >> the normal case) of 128. Only
if the string passed is <= 128 then will the optimized algorithm kick
in. Else, the normal algorithm will be used.

Also, I have one doubt: There is a line such as this:

if (sizeof(char) <= 8 && (some other run-time condition))
{

}
else
{

}

Now, suppose that sizeof(char) > 8, will the optimizer eliminate the if
block completely?

Also, attached are test-cases used for measuring the timing of either of
the functions before and after applying the patch. The file output.txt
contains the timings reported by the above 2 tests before and after the
application of the patch.

Tested x86-Linux.

However, these 2 seemingly unrelated tests are failing on the current
cvs version:

FAIL: 21_strings/basic_string/inserters_extractors/pod/10081-in.cc (test
for excess errors)
WARNING: 21_strings/basic_string/inserters_extractors/pod/10081-in.cc
compilation failed to produce executable
FAIL: 21_strings/basic_string/inserters_extractors/pod/10081-out.cc
(test for excess errors)
WARNING: 21_strings/basic_string/inserters_extractors/pod/10081-out.cc
compilation failed to produce executable




-- 
	-Dhruv Matani.
http://www.geocities.com/dhruvbird/

template<typename Signature>
  class CustomSignature : public Signature
{ };

Attachment: ChangeLog
Description: Text document

*** basic_string.tcc	2004-06-12 13:40:22.000000000 +0530
--- /home/dhruv/projects/new_libstdc++-v3/basic_string.tcc	2004-07-20 15:59:40.000000000 +0530
*************** namespace std
*** 721,740 ****
      basic_string<_CharT, _Traits, _Alloc>::
      rfind(const _CharT* __s, size_type __pos, size_type __n) const
      {
!       __glibcxx_requires_string_len(__s, __n);
!       const size_type __size = this->size();
!       if (__n <= __size)
  	{
- 	  __pos = std::min(size_type(__size - __n), __pos);
- 	  const _CharT* __data = _M_data();
  	  do
  	    {
! 	      if (traits_type::compare(__data + __pos, __s, __n) == 0)
! 		return __pos;
  	    }
- 	  while (__pos-- > 0);
  	}
!       return npos;
      }
  
    template<typename _CharT, typename _Traits, typename _Alloc>
--- 721,763 ----
      basic_string<_CharT, _Traits, _Alloc>::
      rfind(const _CharT* __s, size_type __pos, size_type __n) const
      {
!       typedef basic_string<_CharT, _Traits, _Alloc> _This_type;
! 
!       if (!__n)
! 	return std::min(this->size() - __n, __pos);
!       if (this->size() < __n)
! 	return npos;
!       // Now, we are sure that s.size() != 0!
! 
!       const typename _This_type::const_pointer __data = this->data();
!       const typename _This_type::const_pointer __last = __s + __n;
!       typename _This_type::const_pointer __start = __data 
! 	+ std::min(this->size() - __n, __pos) + 1;
! 
!     restart_loop_:
! 
!       while(__start != __data)
  	{
  	  do
  	    {
! 	      --__start;
! 	    } while(__start != __data && *__start != *__s);
! 
! 	  if (*__start == *__s)
! 	    {
! 	      typename _This_type::const_pointer __tmp = __s + 1;
! 	      typename _This_type::const_pointer __src_p = __start + 1;
! 	      while(__tmp != __last)
! 		{
! 		  if (*__tmp != *__src_p)
! 		    goto restart_loop_;
! 		  ++__tmp;
! 		  ++__src_p;
! 		}
! 	      return (__start - __data);
  	    }
  	}
!       return _This_type::npos;
      }
  
    template<typename _CharT, typename _Traits, typename _Alloc>
*************** namespace std
*** 759,772 ****
      basic_string<_CharT, _Traits, _Alloc>::
      find_first_of(const _CharT* __s, size_type __pos, size_type __n) const
      {
        __glibcxx_requires_string_len(__s, __n);
!       for (; __n && __pos < this->size(); ++__pos)
  	{
! 	  const _CharT* __p = traits_type::find(__s, __n, _M_data()[__pos]);
! 	  if (__p)
! 	    return __pos;
  	}
-       return npos;
      }
  
    template<typename _CharT, typename _Traits, typename _Alloc>
--- 782,821 ----
      basic_string<_CharT, _Traits, _Alloc>::
      find_first_of(const _CharT* __s, size_type __pos, size_type __n) const
      {
+       typedef basic_string<_CharT, _Traits, _Alloc> _This_type;
        __glibcxx_requires_string_len(__s, __n);
!       const unsigned int __s_threshold = 128;
! 
!       // Eliminate ME!
!       if (sizeof(char) <= 8 && __n <= __s_threshold 
! 	  && this->size() - __pos >= __n)
  	{
! 	  char __table[1 << 8] = { 0 };
! 	  typename _This_type::size_type __i;
!       
! 	  for (__i = 0; __i < __n; ++__i)
! 	    __table[static_cast<unsigned int>(__s[__i])] = 1;
! 
! 	  typename _This_type::size_type __sz = this->size();
! 	  typename _This_type::const_pointer __src = this->data();
! 	  for (__i = __pos; __i < __sz; ++__i)
! 	    {
! 	      // Is the entry present?
! 	      if (__table[static_cast<unsigned int>(__src[__i])] == 1)
! 		return __i;
! 	    }
! 	  return _This_type::npos;
! 	}
!       else
! 	{
! 	  for (; __n && __pos < this->size(); ++__pos)
! 	    {
! 	      const _CharT* __p = traits_type::find(__s, __n, _M_data()[__pos]);
! 	      if (__p)
! 		return __pos;
! 	    }
! 	  return npos;
  	}
      }
  
    template<typename _CharT, typename _Traits, typename _Alloc>
#include <iostream>
#include <string>
#include <cassert>
#include <time.h>

std::string s_str[18] = { "iostream", "include", "int", "float", "double", 
		   "assert", "FILE", "for", "if", "i", "<", "++i", 
			"string" , "main", "namespace", "fopen", "fclose" };

using namespace std;

struct Timer {
  clock_t begin, end;
  void start() { begin = clock(); }
  void stop() { end = clock(); }
  double operator()() { return static_cast<double>
			  (end-begin)/CLOCKS_PER_SEC; }
};

// string::size_type
// ptr_rr_find(const char* f, string const& s, string::size_type _pos, string::size_type _n)
// {
//   if (s.size() < _n || !_n)
//     return 0;
//   // Now, we are sure that s.size() != 0!

//   const string::const_pointer _data = s.data();
//   const string::const_pointer last = f + _n;
//   string::const_pointer start = _data + std::min(s.size() - _n, _pos) + 1;

//  restart_loop_:

//   while(start != _data)
//     {
//       do
// 	{
// 	  --start;
// 	} while(start != _data && *start != *f);

//       if (*start == *f)
// 	{
// 	  string::const_pointer _tmp = f + 1;
// 	  string::const_pointer _src_ptr = start + 1;
// 	  while(_tmp != last)
// 	    {
// 	      if (*_tmp != *_src_ptr)
// 		goto restart_loop_;
// 	      ++_tmp;
// 	      ++_src_ptr;
// 	    }
// 	  return (start - _data);
// 	}
//     }
//   return string::npos;
// }

int main()
{
  FILE* pf = fopen(__FILE__, "r");
  assert(pf);
  string src_file;
  char ch = getc(pf);
  Timer t;

  while(!feof(pf))
    {
      src_file.push_back(ch);
      ch = getc(pf);
    }

  fclose(pf);

  int Max = sizeof(s_str) / sizeof(string);
  char temp[2];
  *temp = 3;
  s_str[17] = temp;

  int Loop = 10000;
  t.start();
  while (Loop--)
    for (int i = 0; i < Max; ++i)
      //      cout<<endl<<
      src_file.rfind(s_str[i]);
  t.stop();
  cout<<"Time taken: "<<t()<<" seconds."<<endl;

//   Loop = 10000;
//   t.start();
//   while (Loop--)
//     for (int i = 0; i < Max; ++i)
//       //      cout<<endl<<
// 	ptr_rr_find(s_str[i].data(), src_file, src_file.size() - 1, s_str[i].size());
//   t.stop();
//   cout<<"Time taken: "<<t()<<" seconds."<<endl;
}
#include <iostream>
#include <string>
#include <time.h>

using namespace std;

struct Timer {
  clock_t begin, end;
  void start() { begin = clock(); }
  void stop() { end = clock(); }
  double operator()() { return static_cast<double>
			  (end-begin)/CLOCKS_PER_SEC; }
};

// string::size_type
// ffirst_of(string const& src, string const& f)
// {
// #if __CHAR_BIT__ <= 8
//   if (f.size() <= F_THRESHOLD && src.size() >= f.size())
//     {
//       char table[1 << __CHAR_BIT__] = { 0 };
//       string::size_type sz = f.size();
//       string::size_type i;
      
//       for (i = 0; i < sz; ++i)
// 	table[(unsigned int)f[i]] = 1;
      
//       sz = src.size();
      
//       for (i = 0; i < sz; ++i)
// 	{
// 	  //Is the entry present?
// 	  if (table[(unsigned int)src[i]] == 1)
// 	    return i;
// 	}
//       return string::npos;
//     }
// #else
// #error Use Traditional Algorithm
// #endif
// }



void
test_str(string const& src, string const& f)
{
  Timer t;
  const int Max = 300000;
  int sz = 0;

  t.start();
  for (int i = 0; i < Max; ++i)
    sz = src.find_first_of(f);
  t.stop();

  cout<<sz<<" Time taken == "<<t()<<" seconds."<<endl;

//   t.start();
//   for (int i = 0; i < Max; ++i)
//     sz = ffirst_of(src, f);
//   t.stop();

//   cout<<sz<<" Time taken == "<<t()<<" seconds."<<endl;
//   cout<<endl;
}


int main()
{
  string src = "some huge string not containing the character after the characher x early on in the "
    "string : x -> d";
  string search_for = "dhruv";

  test_str(src, search_for);

  search_for = "d";
  test_str(src, search_for);

  search_for = "df";
  test_str(src, search_for);

  search_for = "dx";
  test_str(src, search_for);

  search_for = "dxe";
  test_str(src, search_for);

  search_for = "z";
  test_str(src, search_for);
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]