This is the mail archive of the gcc-help@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

regex slowdowns with testcase


I am currently working on moving an application from Solaris to Linux.  As
part of that, we are moving from the proprietary Roguewave libraries to
open-source alternatives.  One of the regular expression libraries we are
considering is the native-to-libstdc++ regex.h.  With that in mind, we wrote
a very small program to test out how the various regular expressions worked. 
That file is attached.  Also as part of this move, we are starting to test
our code using gcc 3.2.2, cvs as of two days ago (we are currently using 
2.95.4).  The benchmark results of compiling this code with the two versions 
and then running them 10,000 times is below.  As you can see, when 
compiled with 3.2.2, it runs 50% slower.  As our program uses regular 
expressions extensively, and on fairly large strings, I believe that would 
absolutely kill our performance.  I'm hoping that someone else has had 
similar experiences, and would be willling to share what their particular 
solution was, even if that solution was "use Boost.Regex".  Something 
more along the lines of "Idiot, don't do foo on line 32, do bar!" would be 
even more appreciated, as we would really like to use the (gnu) standard.

Many thanks in advance for your help, and have a great day!

Compile lines: 
g++ -O2 std_regex2.cpp -o std_regex2.295 -pedantic
$ g++ --version 
2.95.4
g++-3.2 -O2 std_regex2.cpp -o std_regex2.32 -pedantic
$ g++-3.2 --version
g++-3.2 (GCC) 3.2.2 20021210 (Debian prerelease)
Benchmark machine:
Dual-Athlon 2.2 GHz
1.5Gig of RAM
Kernel 2.4.7

Random sidenote: we get almost identical performance with gcc 3.0.4, so
I do not believe this is a "cvs is unstable"-sort of bug, but rather one inherent
in the 3.x line.

[ dbishop@pelinux4 ] $ let a=0; time while [ $a -lt 10000 ]; do ./std_regex2.295 ; let a=$a+1; done &> /tmp/bench.295

real    0m42.083s
user    0m13.530s
sys     0m11.340s

[ dbishop@pelinux4 ] $ let a=0; time while [ $a -lt 10000 ]; do ./std_regex2.295 ; let a=$a+1; done &> /tmp/bench.295

real    0m40.399s
user    0m16.360s
sys     0m16.480s

[ dbishop@pelinux4 ] $ let a=0; time while [ $a -lt 10000 ]; do ./std_regex2.295 ; let a=$a+1; done &> /tmp/bench.295

real    0m40.426s
user    0m19.100s
sys     0m18.710s

----------------------------------------

[ dbishop@pelinux4 ] $ let a=0; time while [ $a -lt 10000 ]; do ./std_regex2.32 ; let a=$a+1; done &> /tmp/bench.32

real    0m59.983s
user    0m32.760s
sys     0m18.500s

[ dbishop@pelinux4 ] $ let a=0; time while [ $a -lt 10000 ]; do ./std_regex2.32 ; let a=$a+1; done &> /tmp/bench.32

real    0m59.902s
user    0m39.160s
sys     0m19.560s

[ dbishop@pelinux4 ] $ let a=0; time while [ $a -lt 10000 ]; do ./std_regex2.32 ; let a=$a+1; done &> /tmp/bench.32

real    1m1.005s
user    0m41.710s
sys     0m19.660s

-- 
D.A.Bishop

"Outside of a dog, a book is man's best friend. Inside of a dog, it's too dark 
to read."	--Groucho Marx
#include <iostream>
#include <regex.h>

using namespace std;

// This is a test of the standard regular expression library

 
int main(int argc, char **argv)
{

   regex_t theRegex;
   regmatch_t theMatch;
   const regex_t *theRegexPtr = &theRegex;
   const char *theString;
   
   theString = "I contain the letter Z in the middle of the string";

   if (regcomp(&theRegex,"Z", REG_NEWLINE) != 0) return(EOF);
   if (regexec(theRegexPtr, theString, (size_t) 0, NULL, 0) == 0) {} else {cout << "simple test...     FAILED" << endl;}
   regfree(&theRegex);

   theString = "I            tabs";
   if (regcomp(&theRegex, "[[:space:]]\\+", REG_NEWLINE) != 0) return(EOF);
   if (regexec(theRegexPtr, theString, (size_t) 0, NULL, 0) == 0) {} else {cout << "RXwhite...         FAILED" << endl;}
   regfree(&theRegex);

   theString = "I contain 123 an integer";
   if (regcomp(&theRegex, "[[:digit:]]\\+", REG_NEWLINE) != 0) return(EOF);
   if (regexec(theRegexPtr, theString, (size_t) 0, NULL, 0) == 0) {} else {cout << "RXint...           FAILED" << endl;}
   regfree(&theRegex);

   theString = "I contain -1.23 a double";
   if (regcomp(&theRegex, "-\\?\\(\\([0-9]\\+\\.[0-9]*\\)\\|\\([0-9]\\+\\)\\|\\(\\.[0-9]\\+\\)\\)\\([eE][---+]\\?[0-9]\\+\\)\\?", REG_NEWLINE) != 0) return(EOF);
   if (regexec(theRegexPtr, theString, (size_t) 0, NULL, 0) == 0) {} else {cout << "RXdouble...        FAILED" << endl;}
   regfree(&theRegex);

   theString = "I contain alpha";
   if (regcomp(&theRegex, "[[:alpha:]]\\+", REG_NEWLINE) != 0) return(EOF);
   if (regexec(theRegexPtr, theString, (size_t) 0, NULL, 0) == 0) {} else {cout << "RXalpha...         FAILED" << endl;}
   regfree(&theRegex);

   theString = "i contain lowercase";
   if (regcomp(&theRegex, "[[:lower:]]\\+", REG_NEWLINE) != 0) return(EOF);
   if (regexec(theRegexPtr, theString, (size_t) 0, NULL, 0) == 0) {} else {cout << "RXlowercase...     FAILED" << endl;}
   regfree(&theRegex);

   theString = "I CONTAIN UPPERCASE";
   if (regcomp(&theRegex, "[[:upper:]]\\+", REG_NEWLINE) != 0) return(EOF);
   if (regexec(theRegexPtr, theString, (size_t) 0, NULL, 0) == 0) {} else {cout << "RXuppercase...     FAILED" << endl;}
   regfree(&theRegex);

   theString = "I contain alphanumeric1";
   if (regcomp(&theRegex, "[[:alnum:]]\\+", REG_NEWLINE) != 0) return(EOF);
   if (regexec(theRegexPtr, theString, (size_t) 0, NULL, 0) == 0) {} else {cout << "RXalphanum...      FAILED" << endl;}
   regfree(&theRegex);

   theString = "Aa_01";
   if (regcomp(&theRegex, "[[:alpha:]][[:alnum:]]*", REG_NEWLINE) != 0) return(EOF);
   if (regexec(theRegexPtr, theString, (size_t) 0, NULL, 0) == 0) {} else {cout << "RXidentifier...    FAILED" << endl;}
   regfree(&theRegex);

   theString = "1234567.111RZ";
   if (regcomp(&theRegex, "^[[:alnum:]]\\{3\\}/\\?[[:alnum:]]\\{2\\}/\\?[[:alnum:]]\\{2\\}[/.]", REG_NEWLINE) != 0) return(EOF);
   if (regexec(theRegexPtr, theString, (size_t) 0, NULL, 0) == 0) {} else {cout << "RXRetestLot...     FAILED" << endl;}
   regfree(&theRegex);
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]