This is the mail archive of the
libstdc++@gcc.gnu.org
mailing list for the libstdc++ project.
Re: on the speed of std::string::find
Am Freitag, den 01.09.2006, 14:30 +0200 schrieb Paolo Carlini:
> Dennis Lubert wrote:
>
> >
> >
> Likewise good point ;) Yes, in various parts of the library, locale for
> instance, we are putting to good use very convenient gnu extensions,
> therefore the plan makes sense. Well, we have to study the issue a bit
> more and see whether the added complexity is worth the improvement (from
> a maintainance point of view too).
>From a maintainance pov I think its not much more to call memmem instead
of std::search (and of course sort out the different parameters, and
different returns, but its just similar enough).
Attached is a small test that I hacked together. Its not very accurate
and clean etc. but it should show that memmem is ~1.5 times faster in
this case.
Be aware that this program needs to be compiled without any optimization
(on irc I had the report that it even only works when you explicitly set
-O0) otherwise the memmem call will be eliminitaed outside the loop
completely.
greets
Dennis
PS: playing around with the strings can vary the factor a lot it seems.
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
extern "C" {
#include <sys/time.h>
}
#include <cstring>
#include <ctime>
#include <iostream>
#include <string>
struct timeval start_tv;
struct timeval stop_tv;
// ok, this way to time stuff isn't very accurate, but for demonstration
// purposes it should work fine here.
void start_clock( void )
{
gettimeofday(&start_tv,0);
}
void stop_clock( void )
{
gettimeofday(&stop_tv,0);
}
double time_clock( void )
{
double s = (stop_tv.tv_sec - start_tv.tv_sec);
s += (double)(stop_tv.tv_usec - start_tv.tv_usec) / 1000000.0;
return s;
}
using namespace std;
const int r = 10000000; // leads to ~2.5 seconds on std::string::find, adapt if its too fast or too slow for your system
double t1 = 0.0;
double t2 = 0.0;
// With this we test the std::string::find function
void test1( const std::string& hay, const std::string& ned )
{
start_clock();
for(int i = 0; i < r; ++i )
{
hay.find(ned);
}
stop_clock();
t1 = time_clock();
cout << "std::string::find : " << t1 << endl;
}
// This is the wrapper function that shall work the same way as
// std::string::find. one day...
std::string::size_type memfind( const std::string& hay, const std::string& ned )
{
char* pos = reinterpret_cast<char*>(memmem( hay.data(), hay.length(), ned.data(), ned.length() ));
if( pos == 0 )
{
return std::string::npos;
}
else
{
return ( hay.data() - pos );
}
}
// With this we test the gnu extension function memmem
// be aware, that with too aggressive optimization, gcc removes the call of the
// memfind function alltogether. If you have a result like e-7 as the time,
// then it was most likely the case.
void test2( const std::string& hay, const std::string& ned )
{
start_clock();
for(int i = 0; i < r; ++i )
{
memfind(hay,ned);
}
stop_clock();
t2 = time_clock();
cout << "GNU memmem : " << t2 << endl;
}
int main(int, char ** argv)
{
std::string foo = "This is a moderate length text string we will go and search in";
// this is the string we want to find.
// I think the length of both string is maybe not totally representative
// but should be common enough to hold as a quite-ok benchmark.
std::string len("length");
test1(foo,len);
test2(foo,len);
cout << "Factor : " << (t1/t2) << endl;
}