This is the mail archive of the
java-patches@gcc.gnu.org
mailing list for the Java project.
[gcjx] Patch: FYI: speed up lexer
- From: Tom Tromey <tromey at redhat dot com>
- To: Java Patch List <java-patches at gcc dot gnu dot org>
- Date: 28 Sep 2005 18:48:49 -0600
- Subject: [gcjx] Patch: FYI: speed up lexer
- Reply-to: tromey at redhat dot com
I'm checking this in on the gcjx branch.
This changes gcjx to lex a file all at once, instead of lexing on
demand. This cut more than 30 seconds off the build time for
Classpath here (from 4m28s to 3m51s in my unscientific test -- sadly,
still enormously slower than ecj).
Tom
Index: ChangeLog
from Tom Tromey <tromey@redhat.com>
* source/tstream.hh (token_stream::buffer): Changed type.
(token_stream::buffer_size): Removed.
(token_stream::buffer_end): Likewise.
(token_stream::marks): Likewise.
(token_stream::mark_buffering): Likewise.
(token_stream::peek_buffering): Likewise.
(token_stream::set_mark): Now inline.
(token_stream::reset_to_mark): Likewise.
(token_stream::unset_mark): Removed.
(token_stream::lex_file): Declare.
(token_stream): Updated.
(~token_stream): Updated.
(~marker): Updated.
* source/tstream.cc (set_mark, unset_mark, reset_to_mark):
Removed.
(lex_file): New method.
(get_unfiltered_token): Rewrote.
(peek_token, peek_token1): Updated.
Index: source/tstream.cc
===================================================================
RCS file: /cvs/gcc/gcc/gcjx/source/Attic/tstream.cc,v
retrieving revision 1.1.2.1
diff -u -r1.1.2.1 tstream.cc
--- source/tstream.cc 13 Jan 2005 03:18:37 -0000 1.1.2.1
+++ source/tstream.cc 29 Sep 2005 00:47:55 -0000
@@ -1,6 +1,6 @@
// Implementation of the token stream.
-// Copyright (C) 2004 Free Software Foundation, Inc.
+// Copyright (C) 2004, 2005 Free Software Foundation, Inc.
//
// This file is part of GCC.
//
@@ -22,85 +22,24 @@
#include "typedefs.hh"
#include "source/tstream.hh"
-int
-token_stream::set_mark ()
+void
+token_stream::lex_file ()
{
- if (marks.empty () && read_position > 0)
- {
- // There is no mark at present, but we're still buffering some
- // tokens. Let's move the tokens to the start of the buffer to
- // make some space. FIXME: or we could implement a circular
- // buffer, which means more complex logic here and there.
- // FIXME: or we could just optimistically wait until the buffer
- // is about to overflow.
- for (int i = read_position; i < buffer_end; ++i)
- buffer[i - read_position] = buffer[i];
- buffer_end -= read_position;
- read_position = 0;
- }
- else if (buffer == NULL)
+ token r;
+ do
{
- buffer_size = 64;
- buffer = new token[buffer_size];
+ r = lexer::get_token ();
+ buffer.push_back (r);
}
-
- int result = read_position;
- assert (! marks.empty () || read_position == 0);
- marks.push (result);
- mark_buffering = true;
- return result;
-}
-
-void
-token_stream::unset_mark (int position)
-{
- assert (marks.top () == position);
- marks.pop ();
- mark_buffering = ! marks.empty ();
-}
-
-void
-token_stream::reset_to_mark (int position)
-{
- // Since we require marks to be nested, and since the mark
- // destructor removes it, we know we can only rewind to the most
- // recent mark.
- assert (position == marks.top ());
- read_position = position;
+ while (r != TOKEN_EOF);
}
token
token_stream::get_unfiltered_token ()
{
- // If we're in the buffer, read from it.
- if (read_position < buffer_end)
- return buffer[read_position++];
-
- // Ask the lexer, and buffer the result if needed.
- token r = lexer::get_token ();
- if (mark_buffering || peek_buffering)
- {
- if (buffer == NULL)
- {
- buffer_size = 64;
- buffer = new token[buffer_size];
- }
- else if (buffer_end == buffer_size)
- {
- buffer_size *= 2;
- token *nb = new token[buffer_size];
- for (int i = 0; i < buffer_end; ++i)
- nb[i] = buffer[i];
- delete [] buffer;
- buffer = nb;
- }
-
- buffer[buffer_end] = r;
- ++buffer_end;
- ++read_position;
- }
-
- return r;
+ if (read_position < buffer.size ())
+ ++read_position;
+ return buffer[read_position - 1];
}
token
@@ -161,7 +100,6 @@
token_stream::peek_token ()
{
saver<bool> save_jd (javadoc_is_ok);
- saver<bool> save_peek (peek_buffering, true);
saver<int> save_read (read_position);
token r = get_token ();
return r;
@@ -171,7 +109,6 @@
token_stream::peek_token1 ()
{
saver<bool> save_jd (javadoc_is_ok);
- saver<bool> save_peek (peek_buffering, true);
saver<int> save_read (read_position);
get_token ();
token r = get_token ();
Index: source/tstream.hh
===================================================================
RCS file: /cvs/gcc/gcc/gcjx/source/Attic/tstream.hh,v
retrieving revision 1.1.2.1
diff -u -r1.1.2.1 tstream.hh
--- source/tstream.hh 13 Jan 2005 03:18:37 -0000 1.1.2.1
+++ source/tstream.hh 29 Sep 2005 00:47:55 -0000
@@ -1,6 +1,6 @@
// Token stream.
-// Copyright (C) 2004 Free Software Foundation, Inc.
+// Copyright (C) 2004, 2005 Free Software Foundation, Inc.
//
// This file is part of GCC.
//
@@ -32,27 +32,12 @@
// filter the actual tokens according to what the parser requires.
class token_stream : public lexer
{
- // Our buffer. FIXME: size and stuff.. use some STL thing.
- token *buffer;
-
- // Size of buffer.
- int buffer_size;
-
- // Position of next free slot in buffer.
- int buffer_end;
+ // Our buffer.
+ std::deque<token> buffer;
// Position of next unread element in buffer.
int read_position;
- // All the marks we've set.
- std::stack<int> marks;
-
- // True if we're buffering tokens because there is a mark.
- bool mark_buffering;
-
- // True if we're buffering tokens because we're peeking.
- bool peek_buffering;
-
// True if the parser can usefully interpret a javadoc comment as
// the next token. When false, we filter out such comments.
@@ -61,14 +46,17 @@
// Set a mark at the current point. Only called by marker class.
// Returns the position.
- int set_mark ();
-
- // Unset the mark at position. Only called by marker class.
- void unset_mark (int);
+ int set_mark ()
+ {
+ return read_position;
+ }
// Reset the read pointer to a position. Only called by marker
// class.
- void reset_to_mark (int);
+ void reset_to_mark (int where)
+ {
+ read_position = where;
+ }
friend class marker;
@@ -76,24 +64,21 @@
// Return a token before any filtering is applied.
token get_unfiltered_token ();
+ // Lex the file and fill our buffer.
+ void lex_file ();
+
public:
token_stream (ucs2_reader *source, const char *file)
: lexer (source, file),
- buffer (NULL),
- buffer_size (0),
- buffer_end (0),
read_position (0),
- mark_buffering (false),
- peek_buffering (false),
javadoc_is_ok (false)
{
+ lex_file ();
}
~token_stream ()
{
- if (buffer != NULL)
- delete [] buffer;
}
// Indicate that it is ok for the next token to be TOKEN_JAVADOC.
@@ -135,7 +120,6 @@
~marker ()
{
- stream->unset_mark (location);
}
void backtrack ()