This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
| Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
|---|---|---|
| Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
| Other format: | [Raw text] | |
By moving some operations around I have managed to squeeze another percentage point or so out of the C++ parser. The logic is, cp_lexer_peek_token is called 5-10x as many times as cp_lexer_peek_nth_token or cp_lexer_consume_token. So we want to make the former as fast as possible. It was doing three things besides just returning the current-token pointer: 1) Printing a debugging message if requested - only relevant to ENABLE_CHECKING builds, not to release builds. 2) Skipping past CPP_PURGED tokens. This operation could be moved to cp_lexer_consume_token and cp_lexer_peek_nth_token; it is now an invariant that lexer->next_token never sits on a CPP_PURGED token. 3) Updating the source_position global. It makes more sense to do this in cp_lexer_consume_token anyway, so that's where I put it. I also made cp_lexer_next_token_is/is_not be inline, since these are very simple wrappers around cp_lexer_peek_token. Unfortunately, (3) has the consequence of moving a lot of error messages, causing about 150 diagnostic regressions. I am going to try to fix all of those by reordering operations in the rest of the parser. Obviously I won't check the patch in until they are all fixed. Attached are the patch and a spreadsheet showing timing results. zw
* parser.c (cp_lexer_set_source_position_from_token): Delete.
(cp_lexer_skip_purged_tokens): Delete.
(cp_lexer_peek_token_emit_debug_info): Surround with
#ifdef ENABLE_CHECKING. Avoid fprintf.
(cp_lexer_debug_stream): Define as stdout, not NULL, when
!ENABLE_CHECKING, to suppress warnings.
(cp_lexer_print_token, cp_lexer_peek_token_emit_debug_info):
Define as (void)0, not nothing, when !ENABLE_CHECKING, to
suppress warnings.
(cp_lexer_new_main, cp_lexer_new_from_token_array): Assert that
initial value of lexer->next_token is not a CPP_PURGED token.
(cp_lexer_peek_token): Just call cp_lexer_peek_token_emit_debug_info
and then return lexer->next_token.
(cp_lexer_next_token_is, cp_lexer_next_token_is_not): Make inline.
Simplify internally.
(cp_lexer_peek_nth_token): Print a debugging report similar to
cp_lexer_peek_token.
(cp_lexer_consume_token): Advance over purged tokens here.
Update input_location and in_system_header here. Avoid
fprintf in debugging report.
(cp_lexer_handle_pragma): No need to call
cp_lexer_set_source_position_from_token.
(cp_parser_string_literal): Simplify.
(cp_parser_template_id): Use cp_lexer_peek_token, not
cp_lexer_peek_nth_token with second argument 1.
===================================================================
Index: cp/parser.c
--- cp/parser.c 21 Sep 2004 20:46:57 -0000 1.251
+++ cp/parser.c 22 Sep 2004 18:01:09 -0000
@@ -133,10 +133,6 @@ static void cp_lexer_get_preprocessor_to
(cp_lexer *, cp_token *);
static inline cp_token *cp_lexer_peek_token
(cp_lexer *);
-static void cp_lexer_peek_token_emit_debug_info
- (cp_lexer *, cp_token *);
-static void cp_lexer_skip_purged_tokens
- (cp_lexer *);
static cp_token *cp_lexer_peek_nth_token
(cp_lexer *, size_t);
static inline bool cp_lexer_next_token_is
@@ -159,8 +155,6 @@ static void cp_lexer_commit_tokens
(cp_lexer *);
static void cp_lexer_rollback_tokens
(cp_lexer *);
-static inline void cp_lexer_set_source_position_from_token
- (cp_lexer *, const cp_token *);
#ifdef ENABLE_CHECKING
static void cp_lexer_print_token
(FILE *, cp_token *);
@@ -170,10 +164,17 @@ static void cp_lexer_start_debugging
(cp_lexer *) ATTRIBUTE_UNUSED;
static void cp_lexer_stop_debugging
(cp_lexer *) ATTRIBUTE_UNUSED;
+static void cp_lexer_peek_token_emit_debug_info
+ (cp_lexer *, cp_token *);
#else
-#define cp_lexer_debug_stream NULL
-#define cp_lexer_print_token(str, tok)
+/* If we define cp_lexer_debug_stream to NULL it will provoke warnings
+ about passing NULL to functions that require non-NULL arguments
+ (fputs, fprintf). It will never be used, so all we need is a value
+ of the right type that's guaranteed not to be NULL. */
+#define cp_lexer_debug_stream stdout
+#define cp_lexer_print_token(str, tok) (void) 0
#define cp_lexer_debugging_p(lexer) 0
+#define cp_lexer_peek_token_emit_debug_info(lexer, tok) (void) 0
#endif /* ENABLE_CHECKING */
static cp_token_cache *cp_token_cache_new
@@ -266,6 +267,7 @@ cp_lexer_new_main (void)
string constant concatenation. */
c_lex_return_raw_strings = false;
+ gcc_assert (lexer->next_token->type != CPP_PURGED);
return lexer;
}
@@ -302,6 +304,8 @@ cp_lexer_new_from_token_array (cp_token
/* Initially we are not debugging. */
lexer->debugging_p = false;
#endif
+
+ gcc_assert (lexer->next_token->type != CPP_PURGED);
return lexer;
}
@@ -335,24 +339,6 @@ cp_lexer_debugging_p (cp_lexer *lexer)
#endif /* ENABLE_CHECKING */
-/* Set the current source position from the information stored in
- TOKEN. */
-
-static inline void
-cp_lexer_set_source_position_from_token (cp_lexer *lexer ATTRIBUTE_UNUSED ,
- const cp_token *token)
-{
- /* Ideally, the source position information would not be a global
- variable, but it is. */
-
- /* Update the line number and system header flag. */
- if (token->type != CPP_EOF)
- {
- input_location = token->location;
- in_system_header = token->in_system_header;
- }
-}
-
/* TOKEN points into the circular token buffer. Return a pointer to
the next token in the buffer. */
@@ -490,22 +476,12 @@ cp_lexer_get_preprocessor_token (cp_lexe
static inline cp_token *
cp_lexer_peek_token (cp_lexer *lexer)
{
- cp_token *token;
-
- /* Skip over purged tokens if necessary. */
- if (lexer->next_token->type == CPP_PURGED)
- cp_lexer_skip_purged_tokens (lexer);
-
- token = lexer->next_token;
-
- /* Provide debugging output. */
if (cp_lexer_debugging_p (lexer))
- cp_lexer_peek_token_emit_debug_info (lexer, token);
-
- cp_lexer_set_source_position_from_token (lexer, token);
- return token;
+ cp_lexer_peek_token_emit_debug_info (lexer, lexer->next_token);
+ return lexer->next_token;
}
+#ifdef ENABLE_CHECKING
/* Emit debug output for cp_lexer_peek_token. Split out into a
separate function so that cp_lexer_peek_token can be small and
inlinable. */
@@ -514,35 +490,23 @@ static void
cp_lexer_peek_token_emit_debug_info (cp_lexer *lexer ATTRIBUTE_UNUSED,
cp_token *token ATTRIBUTE_UNUSED)
{
- fprintf (cp_lexer_debug_stream, "cp_lexer: peeking at token: ");
+ fputs ("cp_lexer: peeking at token: ", cp_lexer_debug_stream);
cp_lexer_print_token (cp_lexer_debug_stream, token);
- fprintf (cp_lexer_debug_stream, "\n");
-}
-
-/* Skip all tokens whose type is CPP_PURGED. */
-
-static void cp_lexer_skip_purged_tokens (cp_lexer *lexer)
-{
- while (lexer->next_token->type == CPP_PURGED)
- ++lexer->next_token;
+ putc ('\n', cp_lexer_debug_stream);
}
+#endif
/* Return true if the next token has the indicated TYPE. */
-static bool
+static inline bool
cp_lexer_next_token_is (cp_lexer* lexer, enum cpp_ttype type)
{
- cp_token *token;
-
- /* Peek at the next token. */
- token = cp_lexer_peek_token (lexer);
- /* Check to see if it has the indicated TYPE. */
- return token->type == type;
+ return cp_lexer_peek_token (lexer)->type == type;
}
/* Return true if the next token does not have the indicated TYPE. */
-static bool
+static inline bool
cp_lexer_next_token_is_not (cp_lexer* lexer, enum cpp_ttype type)
{
return !cp_lexer_next_token_is (lexer, type);
@@ -550,7 +514,7 @@ cp_lexer_next_token_is_not (cp_lexer* le
/* Return true if the next token is the indicated KEYWORD. */
-static bool
+static inline bool
cp_lexer_next_token_is_keyword (cp_lexer* lexer, enum rid keyword)
{
cp_token *token;
@@ -562,7 +526,10 @@ cp_lexer_next_token_is_keyword (cp_lexer
}
/* Return a pointer to the Nth token in the token stream. If N is 1,
- then this is precisely equivalent to cp_lexer_peek_token. */
+ then this is precisely equivalent to cp_lexer_peek_token (except
+ that it is not inline). One would like to disallow that case, but
+ there is one case (cp_parser_nth_token_starts_template_id) where
+ the caller passes a variable for N and it might be 1. */
static cp_token *
cp_lexer_peek_nth_token (cp_lexer* lexer, size_t n)
@@ -572,6 +539,10 @@ cp_lexer_peek_nth_token (cp_lexer* lexer
/* N is 1-based, not zero-based. */
gcc_assert (n > 0);
+ if (cp_lexer_debugging_p (lexer))
+ fprintf (cp_lexer_debug_stream,
+ "cp_lexer: peeking ahead %ld at token: ", (long)n);
+
--n;
token = lexer->next_token;
while (n != 0)
@@ -581,31 +552,40 @@ cp_lexer_peek_nth_token (cp_lexer* lexer
--n;
}
+ if (cp_lexer_debugging_p (lexer))
+ {
+ cp_lexer_print_token (cp_lexer_debug_stream, token);
+ putc ('\n', cp_lexer_debug_stream);
+ }
+
return token;
}
-/* Consume the next token. The pointer returned is valid only until
- another token is read. Callers should preserve copy the token
- explicitly if they will need its value for a longer period of
- time. */
+/* Return the next token, and advance the lexer's next_token pointer
+ to point to the next non-purged token. */
static cp_token *
cp_lexer_consume_token (cp_lexer* lexer)
{
- cp_token *token;
+ cp_token *token = lexer->next_token;
- /* Skip over purged tokens if necessary. */
- if (lexer->next_token->type == CPP_PURGED)
- cp_lexer_skip_purged_tokens (lexer);
+ do
+ ++lexer->next_token;
+ while (lexer->next_token->type == CPP_PURGED);
- token = lexer->next_token++;
+ /* Update the line number and system header flag. */
+ if (token->type != CPP_EOF)
+ {
+ input_location = token->location;
+ in_system_header = token->in_system_header;
+ }
/* Provide debugging output. */
if (cp_lexer_debugging_p (lexer))
{
- fprintf (cp_lexer_debug_stream, "cp_lexer: consuming token: ");
+ fputs ("cp_lexer: consuming token: ", cp_lexer_debug_stream);
cp_lexer_print_token (cp_lexer_debug_stream, token);
- fprintf (cp_lexer_debug_stream, "\n");
+ putc ('\n', cp_lexer_debug_stream);
}
return token;
@@ -658,7 +638,6 @@ cp_lexer_handle_pragma (cp_lexer *lexer)
s.len = TREE_STRING_LENGTH (token->value);
s.text = (const unsigned char *) TREE_STRING_POINTER (token->value);
- cp_lexer_set_source_position_from_token (lexer, token);
cpp_handle_deferred_pragma (parse_in, &s);
/* Clearing token->value here means that we will get an ICE if we
@@ -2446,16 +2425,18 @@ cp_parser_string_literal (cp_parser *par
return error_mark_node;
}
- /* Try to avoid the overhead of creating and destroying an obstac
+ /* Try to avoid the overhead of creating and destroying an obstack
for the common case of just one string. */
- if (!cp_parser_is_string_literal (cp_lexer_peek_nth_token (parser->lexer, 2)))
+ if (!cp_parser_is_string_literal
+ (cp_lexer_peek_nth_token (parser->lexer, 2)))
{
+ cp_lexer_consume_token (parser->lexer);
+
str.text = (const unsigned char *)TREE_STRING_POINTER (tok->value);
str.len = TREE_STRING_LENGTH (tok->value);
count = 1;
if (tok->type == CPP_WSTRING)
wide = true;
- cp_lexer_consume_token (parser->lexer);
strs = &str;
}
@@ -2466,6 +2447,7 @@ cp_parser_string_literal (cp_parser *par
do
{
+ cp_lexer_consume_token (parser->lexer);
count++;
str.text = (unsigned char *)TREE_STRING_POINTER (tok->value);
str.len = TREE_STRING_LENGTH (tok->value);
@@ -2474,11 +2456,7 @@ cp_parser_string_literal (cp_parser *par
obstack_grow (&str_ob, &str, sizeof (cpp_string));
- /* We do it this way so that, if we have to issue semantic
- errors on this string literal, the source position will
- be that of the first token of the string. */
- tok = cp_lexer_peek_nth_token (parser->lexer, 2);
- cp_lexer_consume_token (parser->lexer);
+ tok = cp_lexer_peek_token (parser->lexer);
}
while (cp_parser_is_string_literal (tok));
@@ -8371,7 +8349,7 @@ cp_parser_template_id (cp_parser *parser
/* If we find the sequence `[:' after a template-name, it's probably
a digraph-typo for `< ::'. Substitute the tokens and check if we can
parse correctly the argument list. */
- next_token = cp_lexer_peek_nth_token (parser->lexer, 1);
+ next_token = cp_lexer_peek_token (parser->lexer);
next_token_2 = cp_lexer_peek_nth_token (parser->lexer, 2);
if (next_token->type == CPP_OPEN_SQUARE
&& next_token->flags & DIGRAPH
Attachment:
easy-peek-token-timings.gnumeric
Description: application/gnumeric
Attachment:
easy-peek-token-timings.html
Description: Binary data
| Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
|---|---|---|
| Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |