This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.
| Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
|---|---|---|
| Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
| Other format: | [Raw text] | |
Hi,
You might also want to patch rfind, while you are at it. Included is a
function rr_find, that attempts to emulate rfind, and also the timings
for that function relative to string::rfind.
On Sat, 2004-06-12 at 01:46, Paolo Carlini wrote:
> Hi,
>
> the below is what I have implemented and tested on x86 and x86-64.
>
> Interestingly for the new performance testcase the stripped code
> size does *not* change. In mainline the total time changes as
> follows on P4-2400 (always -O2):
>
> current mainline x86
> --------------------
> 22.370u 0.000s 0:22.46 99.5% 0+0k 0+0io 166pf+0w
>
> patched mainline x86
> --------------------
> 9.090u 0.000s 0:09.13 99.5% 0+0k 0+0io 168pf+0w
>
> And as follows on x86-64-2GHz:
>
> current mainline x86-64
> -----------------------
> 5.216u 0.000s 0:05.21 100.0% 0+0k 0+0io 0pf+0w
>
> patched mainline x86-64
> -----------------------
> 4.743u 0.000s 0:04.74 100.0% 0+0k 0+0io 0pf+0w
>
> In general, in these days often happens that on x86 current 3.4
> gives better results than mainline. Indeed, this happens here
> too for unpatched 3.4:
>
> current 3.4 x86
> ---------------
> 14.920u 0.010s 0:15.03 99.3% 0+0k 0+0io 162pf+0w
>
> patched 3.4 x86
> ---------------
> 9.450u 0.010s 0:09.51 99.4% 0+0k 0+0io 164pf+0w
>
> All in all, the numbers speak in favor of this approach, and,
> in case we really want to change the algorithm, we can just
> improve the one provided generically in std::search.
>
> I'll wait 'til tomorrow in case of comments.
>
--
-Dhruv Matani.
http://www.geocities.com/dhruvbird/
Proud to be a Vegetarian.
http://www.vegetarianstarterkit.com/
http://www.vegkids.com/vegkids/index.html
#include <iostream>
#include <string>
#include <cstdlib>
#include <vector>
#include <algorithm>
#include <time.h>
using namespace std;
struct Timer {
clock_t begin, end;
void start() { begin = clock(); }
void stop() { end = clock(); }
double operator()() { return static_cast<double>
(end-begin)/CLOCKS_PER_SEC; }
};
string::size_type
s_find(const string& data, const string& src)
{
string::size_type max = src.size();
string::size_type inner_max = data.size();
for (string::size_type i = 0; i < max; ++i)
{
string::size_type j;
for (j = 0; j < inner_max; ++j)
{
if (src[i + j] != data[j])
break;
}
if (j == inner_max)
return i;
}
return string::npos;
}
string::size_type
s_search(const string& data, const string& src)
{
string::const_iterator i =
std::search(src.begin(), src.end(),
data.begin(), data.end());
return (i - src.begin());
}
template <class _Str>
typename _Str::size_type str_find (const _Str& search_for, const _Str& src)
{
typename _Str::size_type src_size = src.size();
typename _Str::size_type data_size = search_for.size();
typename _Str::size_type next_start = 0;
register typename _Str::size_type i = 0;
while (i < src_size)
{
register typename _Str::size_type j;
for (j = 0; j < data_size; ++j)
{
if (src[i] != search_for[j])
{
if (next_start)
{
i = next_start;
next_start = 0;
}
else
if (search_for[0] != src[i])
++i;
break;
}
else
{
if (j != 0 && next_start == 0 && src[i] == search_for[0])
next_start = i;
++i;
}
}
if (j == data_size)
return (i - data_size);
}
return _Str::npos;
}
string::size_type
rr_find(string const& f, string const& s)
{
string::size_type src_sz = s.size();
string::size_type vt_sz = f.size();
if (src_sz < vt_sz)
return 0;
string::size_type diff = src_sz - vt_sz;
++diff;
while(diff)
{
string::size_type tdiff = diff;
do
{
--tdiff;
} while (s[tdiff] != f[0] && tdiff != 0);
diff = tdiff;
if (s[diff] == f[0])
{
string::size_type i;
//Search remaining string.
for (i = 0; i < vt_sz; ++i)
if (s[i + diff] != f[i])
break;
if (i == vt_sz && s[diff + i - 1] == f[i - 1])
return diff;
}
}
return string::npos;
}
void
test_pair(string const& s, string const& f)
{
cout<<"Searching for: "<<f<<"*** in: "<<s<<endl;
Timer t;
string::size_type sz = 0;
t.start ();
for (int i = 0; i < 1000000; ++i)
sz = rr_find(f, s);
t.stop ();
cout<<"Time taken: "<<t()<<" seconds."<<endl;
cout<<sz<<endl;
t.start ();
for (int i = 0; i < 1000000; ++i)
sz = s.rfind (f);
t.stop ();
cout<<"Time taken: "<<t()<<" seconds."<<endl;
cout<<sz<<endl;
// t.start ();
// for (int i = 0; i < 1000000; ++i)
// sz = str_find(f, s);
// t.stop ();
// cout<<"Time taken: "<<t()<<" seconds."<<endl;
// cout<<sz<<endl;
// t.start ();
// for (int i = 0; i < 1000000; ++i)
// sz = s_search(f, s);
// t.stop ();
// cout<<"Time taken: "<<t()<<" seconds."<<endl;
// cout<<sz<<endl;
}
int main ()
{
string s, f;
s = "aabbaabbaaxd adbffdadgaxaabbbddhatyaaaabbbaabbaabbcsy";
f = "aabbaabbc";
test_pair(s, f);
f = "aabbb";
test_pair(s, f);
f = "xd";
test_pair(s, f);
s = "dhruv is a very very good boy ;-)";
f = "very";
test_pair(s, f);
f = "bad";
test_pair(s, f);
f = "extra irritating";
test_pair(s, f);
s = "this is a very long sentence this is a very this is a verty this is a very this is a very long sentenc";
f = "this is a very long sentence";
test_pair(s, f);
}
Attachment:
str_find_output.txt
Description: Text document
| Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
|---|---|---|
| Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |