This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch] Use std::search in string::find


Hi,
	You might also want to patch rfind, while you are at it. Included is a
function rr_find, that attempts to emulate rfind, and also the timings
for that function relative to string::rfind.




On Sat, 2004-06-12 at 01:46, Paolo Carlini wrote:
> Hi,
> 
> the below is what I have implemented and tested on x86 and x86-64.
> 
> Interestingly for the new performance testcase the stripped code
> size does *not* change. In mainline the total time changes as
> follows on P4-2400 (always -O2):
> 
> current mainline x86
> --------------------
> 22.370u 0.000s 0:22.46 99.5%    0+0k 0+0io 166pf+0w
> 
> patched mainline x86
> --------------------
>  9.090u 0.000s 0:09.13 99.5%    0+0k 0+0io 168pf+0w
> 
> And as follows on x86-64-2GHz:
> 
> current mainline x86-64
> -----------------------
>  5.216u 0.000s 0:05.21 100.0%    0+0k 0+0io 0pf+0w
> 
> patched mainline x86-64
> -----------------------
>  4.743u 0.000s 0:04.74 100.0%    0+0k 0+0io 0pf+0w
> 
> In general, in these days often happens that on x86 current 3.4
> gives better results than mainline. Indeed, this happens here
> too for unpatched 3.4:
> 
> current 3.4 x86
> ---------------
> 14.920u 0.010s 0:15.03 99.3%    0+0k 0+0io 162pf+0w
> 
> patched 3.4 x86
> ---------------
>  9.450u 0.010s 0:09.51 99.4%    0+0k 0+0io 164pf+0w
> 
> All in all, the numbers speak in favor of this approach, and,
> in case we really want to change the algorithm, we can just
> improve the one provided generically in std::search.
> 
> I'll wait 'til tomorrow in case of comments.
> 

-- 
        -Dhruv Matani.
http://www.geocities.com/dhruvbird/

Proud to be a Vegetarian.
http://www.vegetarianstarterkit.com/
http://www.vegkids.com/vegkids/index.html

#include <iostream>
#include <string>
#include <cstdlib>
#include <vector>
#include <algorithm>
#include <time.h>

using namespace std;

struct Timer {
  clock_t begin, end;
  void start() { begin = clock(); }
  void stop() { end = clock(); }
  double operator()() { return static_cast<double>
			  (end-begin)/CLOCKS_PER_SEC; }
};

string::size_type
s_find(const string& data, const string& src)
{
  string::size_type max = src.size();
  string::size_type inner_max = data.size();
  for (string::size_type i = 0; i < max; ++i)
    {
      string::size_type j;
      for (j = 0; j < inner_max; ++j)
	{
	  if (src[i + j] != data[j])
	    break;
	}
      if (j == inner_max)
	return i;
    }
  return string::npos;
}

string::size_type
s_search(const string& data, const string& src)
{
  string::const_iterator i = 
    std::search(src.begin(), src.end(), 
		data.begin(), data.end());
  return (i - src.begin());
}



template <class _Str>
typename _Str::size_type str_find (const _Str& search_for, const _Str& src)
{
  typename _Str::size_type src_size = src.size();
  typename _Str::size_type data_size = search_for.size();
  typename _Str::size_type next_start = 0;
  register typename _Str::size_type i = 0;

  while (i < src_size)
    {
      register typename _Str::size_type j;
      for (j = 0; j < data_size; ++j)
	{
	  if (src[i] != search_for[j])
	    {
	      if (next_start)
		{
		  i = next_start;
		  next_start = 0;
		}
	      else
		if (search_for[0] != src[i])
		  ++i;
	      break;
	    }
	  else
	    {
	      if (j != 0 && next_start == 0 && src[i] == search_for[0])
		next_start = i;
	      ++i;
	    }
	}
      if (j == data_size)
	return (i - data_size);
    }
  return _Str::npos;
}

string::size_type
rr_find(string const& f, string const& s)
{
  string::size_type src_sz = s.size();
  string::size_type vt_sz = f.size();
  if (src_sz < vt_sz)
    return 0;

  string::size_type diff = src_sz - vt_sz;
  ++diff;
  while(diff)
    {
      string::size_type tdiff = diff;
      do
	{
	  --tdiff;
	} while (s[tdiff] != f[0] && tdiff != 0);
      diff = tdiff;
      if (s[diff] == f[0])
	{
	  string::size_type i;
	  //Search remaining string.
	  for (i = 0; i < vt_sz; ++i)
	    if (s[i + diff] != f[i])
	      break;
	  if (i == vt_sz && s[diff + i - 1] == f[i - 1])
	    return diff;
	}
    }
  return string::npos;
}



void
test_pair(string const& s, string const& f)
{
  cout<<"Searching for: "<<f<<"*** in: "<<s<<endl;
  Timer t;
  string::size_type sz = 0;

  t.start ();
  for (int i = 0; i < 1000000; ++i)
    sz = rr_find(f, s);
  t.stop ();

  cout<<"Time taken: "<<t()<<" seconds."<<endl;
  cout<<sz<<endl;

  t.start ();
  for (int i = 0; i < 1000000; ++i)
    sz = s.rfind (f);
  t.stop ();

  cout<<"Time taken: "<<t()<<" seconds."<<endl;
  cout<<sz<<endl;

//   t.start ();
//   for (int i = 0; i < 1000000; ++i)
//     sz = str_find(f, s);
//   t.stop ();

//   cout<<"Time taken: "<<t()<<" seconds."<<endl;
//   cout<<sz<<endl;

//   t.start ();
//   for (int i = 0; i < 1000000; ++i)
//     sz = s_search(f, s);
//   t.stop ();

//   cout<<"Time taken: "<<t()<<" seconds."<<endl;
//   cout<<sz<<endl;
}



int main ()
{
  string s, f;

  s = "aabbaabbaaxd adbffdadgaxaabbbddhatyaaaabbbaabbaabbcsy";
  f = "aabbaabbc";
  test_pair(s, f);

  f = "aabbb";
  test_pair(s, f);

  f = "xd";
  test_pair(s, f);

  s = "dhruv is a very very good boy ;-)";
  f = "very";
  test_pair(s, f);

  f = "bad";
  test_pair(s, f);

  f = "extra irritating";
  test_pair(s, f);

  s = "this is a very long sentence this is a very this is a verty this is a very this is a very long sentenc";
  f = "this is a very long sentence";
  test_pair(s, f);
}

Attachment: str_find_output.txt
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]