This is the mail archive of the java-patches@gcc.gnu.org mailing list for the Java project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gcjx] Patch: FYI: speed up lexer


I'm checking this in on the gcjx branch.

This changes gcjx to lex a file all at once, instead of lexing on
demand.  This cut more than 30 seconds off the build time for
Classpath here (from 4m28s to 3m51s in my unscientific test -- sadly,
still enormously slower than ecj).

Tom

Index: ChangeLog
from  Tom Tromey  <tromey@redhat.com>

	* source/tstream.hh (token_stream::buffer): Changed type.
	(token_stream::buffer_size): Removed.
	(token_stream::buffer_end): Likewise.
	(token_stream::marks): Likewise.
	(token_stream::mark_buffering): Likewise.
	(token_stream::peek_buffering): Likewise.
	(token_stream::set_mark): Now inline.
	(token_stream::reset_to_mark): Likewise.
	(token_stream::unset_mark): Removed.
	(token_stream::lex_file): Declare.
	(token_stream): Updated.
	(~token_stream): Updated.
	(~marker): Updated.
	* source/tstream.cc (set_mark, unset_mark, reset_to_mark):
	Removed.
	(lex_file): New method.
	(get_unfiltered_token): Rewrote.
	(peek_token, peek_token1): Updated.

Index: source/tstream.cc
===================================================================
RCS file: /cvs/gcc/gcc/gcjx/source/Attic/tstream.cc,v
retrieving revision 1.1.2.1
diff -u -r1.1.2.1 tstream.cc
--- source/tstream.cc 13 Jan 2005 03:18:37 -0000 1.1.2.1
+++ source/tstream.cc 29 Sep 2005 00:47:55 -0000
@@ -1,6 +1,6 @@
 // Implementation of the token stream.
 
-// Copyright (C) 2004 Free Software Foundation, Inc.
+// Copyright (C) 2004, 2005 Free Software Foundation, Inc.
 //
 // This file is part of GCC.
 //
@@ -22,85 +22,24 @@
 #include "typedefs.hh"
 #include "source/tstream.hh"
 
-int
-token_stream::set_mark ()
+void
+token_stream::lex_file ()
 {
-  if (marks.empty () && read_position > 0)
-    {
-      // There is no mark at present, but we're still buffering some
-      // tokens.  Let's move the tokens to the start of the buffer to
-      // make some space.  FIXME: or we could implement a circular
-      // buffer, which means more complex logic here and there.
-      // FIXME: or we could just optimistically wait until the buffer
-      // is about to overflow.
-      for (int i = read_position; i < buffer_end; ++i)
-	buffer[i - read_position] = buffer[i];
-      buffer_end -= read_position;
-      read_position = 0;
-    }
-  else if (buffer == NULL)
+  token r;
+  do
     {
-      buffer_size = 64;
-      buffer = new token[buffer_size];
+      r = lexer::get_token ();
+      buffer.push_back (r);
     }
-
-  int result = read_position;
-  assert (! marks.empty () || read_position == 0);
-  marks.push (result);
-  mark_buffering = true;
-  return result;
-}
-
-void
-token_stream::unset_mark (int position)
-{
-  assert (marks.top () == position);
-  marks.pop ();
-  mark_buffering = ! marks.empty ();
-}
-
-void
-token_stream::reset_to_mark (int position)
-{
-  // Since we require marks to be nested, and since the mark
-  // destructor removes it, we know we can only rewind to the most
-  // recent mark.
-  assert (position == marks.top ());
-  read_position = position;
+  while (r != TOKEN_EOF);
 }
 
 token
 token_stream::get_unfiltered_token ()
 {
-  // If we're in the buffer, read from it.
-  if (read_position < buffer_end)
-    return buffer[read_position++];
-
-  // Ask the lexer, and buffer the result if needed.
-  token r = lexer::get_token ();
-  if (mark_buffering || peek_buffering)
-    {
-      if (buffer == NULL)
-	{
-	  buffer_size = 64;
-	  buffer = new token[buffer_size];
-	}
-      else if (buffer_end == buffer_size)
-	{
-	  buffer_size *= 2;
-	  token *nb = new token[buffer_size];
-	  for (int i = 0; i < buffer_end; ++i)
-	    nb[i] = buffer[i];
-	  delete [] buffer;
-	  buffer = nb;
-	}
-
-      buffer[buffer_end] = r;
-      ++buffer_end;
-      ++read_position;
-    }
-
-  return r;
+  if (read_position < buffer.size ())
+    ++read_position;
+  return buffer[read_position - 1];
 }
 
 token
@@ -161,7 +100,6 @@
 token_stream::peek_token ()
 {
   saver<bool> save_jd (javadoc_is_ok);
-  saver<bool> save_peek (peek_buffering, true);
   saver<int> save_read (read_position);
   token r = get_token ();
   return r;
@@ -171,7 +109,6 @@
 token_stream::peek_token1 ()
 {
   saver<bool> save_jd (javadoc_is_ok);
-  saver<bool> save_peek (peek_buffering, true);
   saver<int> save_read (read_position);
   get_token ();
   token r = get_token ();
Index: source/tstream.hh
===================================================================
RCS file: /cvs/gcc/gcc/gcjx/source/Attic/tstream.hh,v
retrieving revision 1.1.2.1
diff -u -r1.1.2.1 tstream.hh
--- source/tstream.hh 13 Jan 2005 03:18:37 -0000 1.1.2.1
+++ source/tstream.hh 29 Sep 2005 00:47:55 -0000
@@ -1,6 +1,6 @@
 // Token stream.
 
-// Copyright (C) 2004 Free Software Foundation, Inc.
+// Copyright (C) 2004, 2005 Free Software Foundation, Inc.
 //
 // This file is part of GCC.
 //
@@ -32,27 +32,12 @@
 // filter the actual tokens according to what the parser requires.
 class token_stream : public lexer
 {
-  // Our buffer.  FIXME: size and stuff.. use some STL thing.
-  token *buffer;
-
-  // Size of buffer.
-  int buffer_size;
-
-  // Position of next free slot in buffer.
-  int buffer_end;
+  // Our buffer.
+  std::deque<token> buffer;
 
   // Position of next unread element in buffer.
   int read_position;
 
-  // All the marks we've set.
-  std::stack<int> marks;
-
-  // True if we're buffering tokens because there is a mark.
-  bool mark_buffering;
-
-  // True if we're buffering tokens because we're peeking.
-  bool peek_buffering;
-  
 
   // True if the parser can usefully interpret a javadoc comment as
   // the next token.  When false, we filter out such comments.
@@ -61,14 +46,17 @@
 
   // Set a mark at the current point.  Only called by marker class.
   // Returns the position.
-  int set_mark ();
-
-  // Unset the mark at position.  Only called by marker class.
-  void unset_mark (int);
+  int set_mark ()
+  {
+    return read_position;
+  }
 
   // Reset the read pointer to a position.  Only called by marker
   // class.
-  void reset_to_mark (int);
+  void reset_to_mark (int where)
+  {
+    read_position = where;
+  }
 
   friend class marker;
 
@@ -76,24 +64,21 @@
   // Return a token before any filtering is applied.
   token get_unfiltered_token ();
 
+  // Lex the file and fill our buffer.
+  void lex_file ();
+
 public:
 
   token_stream (ucs2_reader *source, const char *file)
     : lexer (source, file),
-      buffer (NULL),
-      buffer_size (0),
-      buffer_end (0),
       read_position (0),
-      mark_buffering (false),
-      peek_buffering (false),
       javadoc_is_ok (false)
   {
+    lex_file ();
   }
 
   ~token_stream ()
   {
-    if (buffer != NULL)
-      delete [] buffer;
   }
 
   // Indicate that it is ok for the next token to be TOKEN_JAVADOC.
@@ -135,7 +120,6 @@
 
   ~marker ()
   {
-    stream->unset_mark (location);
   }
 
   void backtrack ()


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]