This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

FYI: another 0.5-1.0% speedup in the C++ parser


By moving some operations around I have managed to squeeze another
percentage point or so out of the C++ parser.  The logic is,
cp_lexer_peek_token is called 5-10x as many times as
cp_lexer_peek_nth_token or cp_lexer_consume_token.  So we want to make
the former as fast as possible.  It was doing three things besides
just returning the current-token pointer:

1) Printing a debugging message if requested - only relevant to
   ENABLE_CHECKING builds, not to release builds.

2) Skipping past CPP_PURGED tokens.  This operation could be moved to
   cp_lexer_consume_token and cp_lexer_peek_nth_token; it is now an
   invariant that lexer->next_token never sits on a CPP_PURGED token.

3) Updating the source_position global.  It makes more sense to do
   this in cp_lexer_consume_token anyway, so that's where I put it.

I also made cp_lexer_next_token_is/is_not be inline, since these are
very simple wrappers around cp_lexer_peek_token.

Unfortunately, (3) has the consequence of moving a lot of error
messages, causing about 150 diagnostic regressions.  I am going 
to try to fix all of those by reordering operations in the rest
of the parser.  Obviously I won't check the patch in until they
are all fixed.

Attached are the patch and a spreadsheet showing timing results.

zw
        * parser.c (cp_lexer_set_source_position_from_token): Delete.
        (cp_lexer_skip_purged_tokens): Delete.
        (cp_lexer_peek_token_emit_debug_info): Surround with
        #ifdef ENABLE_CHECKING.  Avoid fprintf.
        (cp_lexer_debug_stream): Define as stdout, not NULL, when
        !ENABLE_CHECKING, to suppress warnings.
        (cp_lexer_print_token, cp_lexer_peek_token_emit_debug_info):
        Define as (void)0, not nothing, when !ENABLE_CHECKING, to
        suppress warnings.
        (cp_lexer_new_main, cp_lexer_new_from_token_array): Assert that
        initial value of lexer->next_token is not a CPP_PURGED token.
        (cp_lexer_peek_token): Just call cp_lexer_peek_token_emit_debug_info
        and then return lexer->next_token.
        (cp_lexer_next_token_is, cp_lexer_next_token_is_not): Make inline.
        Simplify internally.
        (cp_lexer_peek_nth_token): Print a debugging report similar to
        cp_lexer_peek_token.
        (cp_lexer_consume_token): Advance over purged tokens here.
        Update input_location and in_system_header here.  Avoid
        fprintf in debugging report.
        (cp_lexer_handle_pragma): No need to call
        cp_lexer_set_source_position_from_token.
        (cp_parser_string_literal): Simplify.
        (cp_parser_template_id): Use cp_lexer_peek_token, not
        cp_lexer_peek_nth_token with second argument 1.

===================================================================
Index: cp/parser.c
--- cp/parser.c	21 Sep 2004 20:46:57 -0000	1.251
+++ cp/parser.c	22 Sep 2004 18:01:09 -0000
@@ -133,10 +133,6 @@ static void cp_lexer_get_preprocessor_to
   (cp_lexer *, cp_token *);
 static inline cp_token *cp_lexer_peek_token
   (cp_lexer *);
-static void cp_lexer_peek_token_emit_debug_info
-  (cp_lexer *, cp_token *);
-static void cp_lexer_skip_purged_tokens
-  (cp_lexer *);
 static cp_token *cp_lexer_peek_nth_token
   (cp_lexer *, size_t);
 static inline bool cp_lexer_next_token_is
@@ -159,8 +155,6 @@ static void cp_lexer_commit_tokens
   (cp_lexer *);
 static void cp_lexer_rollback_tokens
   (cp_lexer *);
-static inline void cp_lexer_set_source_position_from_token
-  (cp_lexer *, const cp_token *);
 #ifdef ENABLE_CHECKING
 static void cp_lexer_print_token
   (FILE *, cp_token *);
@@ -170,10 +164,17 @@ static void cp_lexer_start_debugging
   (cp_lexer *) ATTRIBUTE_UNUSED;
 static void cp_lexer_stop_debugging
   (cp_lexer *) ATTRIBUTE_UNUSED;
+static void cp_lexer_peek_token_emit_debug_info
+  (cp_lexer *, cp_token *);
 #else
-#define cp_lexer_debug_stream NULL
-#define cp_lexer_print_token(str, tok)
+/* If we define cp_lexer_debug_stream to NULL it will provoke warnings
+   about passing NULL to functions that require non-NULL arguments
+   (fputs, fprintf).  It will never be used, so all we need is a value
+   of the right type that's guaranteed not to be NULL.  */
+#define cp_lexer_debug_stream stdout
+#define cp_lexer_print_token(str, tok) (void) 0
 #define cp_lexer_debugging_p(lexer) 0
+#define cp_lexer_peek_token_emit_debug_info(lexer, tok) (void) 0
 #endif /* ENABLE_CHECKING */
 
 static cp_token_cache *cp_token_cache_new
@@ -266,6 +267,7 @@ cp_lexer_new_main (void)
      string constant concatenation.  */
   c_lex_return_raw_strings = false;
 
+  gcc_assert (lexer->next_token->type != CPP_PURGED);
   return lexer;
 }
 
@@ -302,6 +304,8 @@ cp_lexer_new_from_token_array (cp_token 
   /* Initially we are not debugging.  */
   lexer->debugging_p = false;
 #endif
+
+  gcc_assert (lexer->next_token->type != CPP_PURGED);
   return lexer;
 }
 
@@ -335,24 +339,6 @@ cp_lexer_debugging_p (cp_lexer *lexer)
 
 #endif /* ENABLE_CHECKING */
 
-/* Set the current source position from the information stored in
-   TOKEN.  */
-
-static inline void
-cp_lexer_set_source_position_from_token (cp_lexer *lexer ATTRIBUTE_UNUSED ,
-                                         const cp_token *token)
-{
-  /* Ideally, the source position information would not be a global
-     variable, but it is.  */
-
-  /* Update the line number and system header flag. */
-  if (token->type != CPP_EOF)
-    {
-      input_location = token->location;
-      in_system_header = token->in_system_header;
-    }
-}
-
 /* TOKEN points into the circular token buffer.  Return a pointer to
    the next token in the buffer.  */
 
@@ -490,22 +476,12 @@ cp_lexer_get_preprocessor_token (cp_lexe
 static inline cp_token *
 cp_lexer_peek_token (cp_lexer *lexer)
 {
-  cp_token *token;
-
-  /* Skip over purged tokens if necessary. */
-  if (lexer->next_token->type == CPP_PURGED)
-    cp_lexer_skip_purged_tokens (lexer);
-
-  token = lexer->next_token;
-
-  /* Provide debugging output.  */
   if (cp_lexer_debugging_p (lexer))
-    cp_lexer_peek_token_emit_debug_info (lexer, token);
-
-  cp_lexer_set_source_position_from_token (lexer, token);
-  return token;
+    cp_lexer_peek_token_emit_debug_info (lexer, lexer->next_token);
+  return lexer->next_token;
 }
 
+#ifdef ENABLE_CHECKING
 /* Emit debug output for cp_lexer_peek_token.  Split out into a
    separate function so that cp_lexer_peek_token can be small and
    inlinable. */
@@ -514,35 +490,23 @@ static void
 cp_lexer_peek_token_emit_debug_info (cp_lexer *lexer ATTRIBUTE_UNUSED,
 				     cp_token *token ATTRIBUTE_UNUSED)
 {
-  fprintf (cp_lexer_debug_stream, "cp_lexer: peeking at token: ");
+  fputs ("cp_lexer: peeking at token: ", cp_lexer_debug_stream);
   cp_lexer_print_token (cp_lexer_debug_stream, token);
-  fprintf (cp_lexer_debug_stream, "\n");
-}
-
-/* Skip all tokens whose type is CPP_PURGED. */
-
-static void cp_lexer_skip_purged_tokens (cp_lexer *lexer)
-{
-  while (lexer->next_token->type == CPP_PURGED)
-    ++lexer->next_token;
+  putc ('\n', cp_lexer_debug_stream);
 }
+#endif
 
 /* Return true if the next token has the indicated TYPE.  */
 
-static bool
+static inline bool
 cp_lexer_next_token_is (cp_lexer* lexer, enum cpp_ttype type)
 {
-  cp_token *token;
-
-  /* Peek at the next token.  */
-  token = cp_lexer_peek_token (lexer);
-  /* Check to see if it has the indicated TYPE.  */
-  return token->type == type;
+  return cp_lexer_peek_token (lexer)->type == type;
 }
 
 /* Return true if the next token does not have the indicated TYPE.  */
 
-static bool
+static inline bool
 cp_lexer_next_token_is_not (cp_lexer* lexer, enum cpp_ttype type)
 {
   return !cp_lexer_next_token_is (lexer, type);
@@ -550,7 +514,7 @@ cp_lexer_next_token_is_not (cp_lexer* le
 
 /* Return true if the next token is the indicated KEYWORD.  */
 
-static bool
+static inline bool
 cp_lexer_next_token_is_keyword (cp_lexer* lexer, enum rid keyword)
 {
   cp_token *token;
@@ -562,7 +526,10 @@ cp_lexer_next_token_is_keyword (cp_lexer
 }
 
 /* Return a pointer to the Nth token in the token stream.  If N is 1,
-   then this is precisely equivalent to cp_lexer_peek_token.  */
+   then this is precisely equivalent to cp_lexer_peek_token (except
+   that it is not inline).  One would like to disallow that case, but
+   there is one case (cp_parser_nth_token_starts_template_id) where
+   the caller passes a variable for N and it might be 1.  */
 
 static cp_token *
 cp_lexer_peek_nth_token (cp_lexer* lexer, size_t n)
@@ -572,6 +539,10 @@ cp_lexer_peek_nth_token (cp_lexer* lexer
   /* N is 1-based, not zero-based.  */
   gcc_assert (n > 0);
 
+  if (cp_lexer_debugging_p (lexer))
+    fprintf (cp_lexer_debug_stream,
+	     "cp_lexer: peeking ahead %ld at token: ", (long)n);
+
   --n;
   token = lexer->next_token;
   while (n != 0)
@@ -581,31 +552,40 @@ cp_lexer_peek_nth_token (cp_lexer* lexer
 	--n;
     }
 
+  if (cp_lexer_debugging_p (lexer))
+    {
+      cp_lexer_print_token (cp_lexer_debug_stream, token);
+      putc ('\n', cp_lexer_debug_stream);
+    }
+
   return token;
 }
 
-/* Consume the next token.  The pointer returned is valid only until
-   another token is read.  Callers should preserve copy the token
-   explicitly if they will need its value for a longer period of
-   time.  */
+/* Return the next token, and advance the lexer's next_token pointer
+   to point to the next non-purged token.  */
 
 static cp_token *
 cp_lexer_consume_token (cp_lexer* lexer)
 {
-  cp_token *token;
+  cp_token *token = lexer->next_token;
 
-  /* Skip over purged tokens if necessary. */
-  if (lexer->next_token->type == CPP_PURGED)
-    cp_lexer_skip_purged_tokens (lexer);
+  do
+    ++lexer->next_token;
+  while (lexer->next_token->type == CPP_PURGED);
 
-  token = lexer->next_token++;
+  /* Update the line number and system header flag. */
+  if (token->type != CPP_EOF)
+    {
+      input_location = token->location;
+      in_system_header = token->in_system_header;
+    }
 
   /* Provide debugging output.  */
   if (cp_lexer_debugging_p (lexer))
     {
-      fprintf (cp_lexer_debug_stream, "cp_lexer: consuming token: ");
+      fputs ("cp_lexer: consuming token: ", cp_lexer_debug_stream);
       cp_lexer_print_token (cp_lexer_debug_stream, token);
-      fprintf (cp_lexer_debug_stream, "\n");
+      putc ('\n', cp_lexer_debug_stream);
     }
 
   return token;
@@ -658,7 +638,6 @@ cp_lexer_handle_pragma (cp_lexer *lexer)
   s.len = TREE_STRING_LENGTH (token->value);
   s.text = (const unsigned char *) TREE_STRING_POINTER (token->value);
 
-  cp_lexer_set_source_position_from_token (lexer, token);
   cpp_handle_deferred_pragma (parse_in, &s);
 
   /* Clearing token->value here means that we will get an ICE if we
@@ -2446,16 +2425,18 @@ cp_parser_string_literal (cp_parser *par
       return error_mark_node;
     }
 
-  /* Try to avoid the overhead of creating and destroying an obstac
+  /* Try to avoid the overhead of creating and destroying an obstack
      for the common case of just one string.  */
-  if (!cp_parser_is_string_literal (cp_lexer_peek_nth_token (parser->lexer, 2)))
+  if (!cp_parser_is_string_literal
+      (cp_lexer_peek_nth_token (parser->lexer, 2)))
     {
+      cp_lexer_consume_token (parser->lexer);
+
       str.text = (const unsigned char *)TREE_STRING_POINTER (tok->value);
       str.len = TREE_STRING_LENGTH (tok->value);
       count = 1;
       if (tok->type == CPP_WSTRING)
 	wide = true;
-      cp_lexer_consume_token (parser->lexer);
 
       strs = &str;
     }
@@ -2466,6 +2447,7 @@ cp_parser_string_literal (cp_parser *par
 
       do
 	{
+	  cp_lexer_consume_token (parser->lexer);
 	  count++;
 	  str.text = (unsigned char *)TREE_STRING_POINTER (tok->value);
 	  str.len = TREE_STRING_LENGTH (tok->value);
@@ -2474,11 +2456,7 @@ cp_parser_string_literal (cp_parser *par
 
 	  obstack_grow (&str_ob, &str, sizeof (cpp_string));
 
-	  /* We do it this way so that, if we have to issue semantic
-	     errors on this string literal, the source position will
-	     be that of the first token of the string.  */
-	  tok = cp_lexer_peek_nth_token (parser->lexer, 2);
-	  cp_lexer_consume_token (parser->lexer);
+	  tok = cp_lexer_peek_token (parser->lexer);
 	}
       while (cp_parser_is_string_literal (tok));
 
@@ -8371,7 +8349,7 @@ cp_parser_template_id (cp_parser *parser
   /* If we find the sequence `[:' after a template-name, it's probably
      a digraph-typo for `< ::'. Substitute the tokens and check if we can
      parse correctly the argument list.  */
-  next_token = cp_lexer_peek_nth_token (parser->lexer, 1);
+  next_token = cp_lexer_peek_token (parser->lexer);
   next_token_2 = cp_lexer_peek_nth_token (parser->lexer, 2);
   if (next_token->type == CPP_OPEN_SQUARE
       && next_token->flags & DIGRAPH

Attachment: easy-peek-token-timings.gnumeric
Description: application/gnumeric

Attachment: easy-peek-token-timings.html
Description: Binary data


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]