This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [patch] new exec-charset testcase
- From: Alexandre Oliva <aoliva at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: mark at codesourcery dot com, echristo at redhat dot com, zack at codesourcery dot com, aldyh at redhat dot com
- Date: 03 Jun 2004 19:25:41 -0300
- Subject: Re: [patch] new exec-charset testcase
- Organization: Red Hat Global Engineering Services Compiler Team
- References: <4085ECF6.4080404@codesourcery.com>
http://gcc.gnu.org/ml/gcc-patches/2004-04/msg01330.html
Back in April 21, 2004, Mark Mitchell wrote:
Here are two alternative solutions to the problem. I don't quite like
the idea of making the lexer even more context sensitive than it is,
especially given that the parser already knows exactly what's going
on. Consider, for example:
asm ("foo" : "=x" (({ int i; asm ("bar" : "=r" (i) : "r" ("bar"));
i; })));
you get to keep track of multiple separate asm statements, in a
stack. This is not a lexer any more. It's parser stuff.
My first attempt was to try to get both versions of strings saved when
parsing tentatively, and arranging for all locations that used host
strings to get to them properly. This was a neat trick, but it
enabled me to see something else:
the *only* case in which we parse strings tentatively is while
scanning for the closing parenthesis when tentatively parsing a cast
expression.
However, in a case like this:
foo = ( (whatever long sequence of tokens forming an expression) );
when we see the first `(', we still scan and save the entire
`(whatever...))' token stream tentatively. Even though it's pretty
obvious upfront that `(whatever)' can't possibly be a type name.
The solution I liked better was to attempt to parse a type name
upfront. Then, in the case of a compound literal, we backtrack like
we did before. The advantage is that now we will no longer scan the
complete expression within parentheses to only then try to tell
whether it begins with a type name. I *think* this should pay off the
disadvantage of having to parse the type name twice in compound
literal expressions, since parentheses are far more common in
expressions than type casts, that are far more common than compound
literals.
So the second patch below is the one I'm submitting for review. The
former, probably incomplete, is only for the record. Ok to install
the latter?
Index: gcc/c-pragma.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-pragma.h,v
retrieving revision 1.39
diff -u -p -r1.39 c-pragma.h
--- gcc/c-pragma.h 27 Feb 2004 02:01:06 -0000 1.39
+++ gcc/c-pragma.h 3 Jun 2004 22:08:02 -0000
@@ -58,7 +58,9 @@ extern int c_lex (tree *);
extern int c_lex_with_flags (tree *, unsigned char *);
/* If true, then lex strings into the execution character set.
- Otherwise, lex strings into the host character set. */
-extern bool c_lex_string_translate;
+ If false, lex strings into the host character set.
+ If -true, lex both, and chain them together, such that the latter
+ is the TREE_CHAIN of the former. */
+extern int c_lex_string_translate;
#endif /* GCC_C_PRAGMA_H */
Index: gcc/c-lex.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-lex.c,v
retrieving revision 1.221
diff -u -p -r1.221 c-lex.c
--- gcc/c-lex.c 2 Jun 2004 02:09:44 -0000 1.221
+++ gcc/c-lex.c 3 Jun 2004 22:08:03 -0000
@@ -53,7 +53,11 @@ static splay_tree file_info_tree;
int pending_lang_change; /* If we need to switch languages - C++ only */
int c_header_level; /* depth in C headers - C++ only */
-bool c_lex_string_translate = true; /* If we need to translate characters received. */
+/* If we need to translate characters received. This is tri-state:
+ false means use only the untranslated string; true means use only
+ the translated string; -true means chain the unstranslated string
+ to the translated one. */
+int c_lex_string_translate = true;
static tree interpret_integer (const cpp_token *, unsigned int);
static tree interpret_float (const cpp_token *, unsigned int);
@@ -699,6 +703,22 @@ lex_string (const cpp_token *tok, tree *
{
value = build_string (istr.len, (char *)istr.text);
free ((void *)istr.text);
+
+ if (c_lex_string_translate == -true)
+ {
+ if (!cpp_interpret_string_notranslate (parse_in, strs, count,
+ &istr, wide))
+ /* Assume that, if we managed to translate the string
+ above, then the untreanslated parsing will always
+ succeed. */
+ abort ();
+
+ if (TREE_STRING_LENGTH (value) != (int)istr.len
+ || 0 != strncmp (TREE_STRING_POINTER (value), (char *)istr.text,
+ istr.len))
+ TREE_CHAIN (value) = build_string (istr.len, (char *)istr.text);
+ free ((void *)istr.text);
+ }
}
else
{
Index: gcc/cp/parser.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cp/parser.c,v
retrieving revision 1.199
diff -u -p -r1.199 parser.c
--- gcc/cp/parser.c 1 Jun 2004 15:12:13 -0000 1.199
+++ gcc/cp/parser.c 3 Jun 2004 22:08:12 -0000
@@ -7086,6 +7086,10 @@ cp_parser_linkage_specification (cp_pars
/* Assume C++ linkage. */
linkage = get_identifier ("c++");
}
+ /* If the string is chained to another string, take the latter,
+ that's the untranslated string. */
+ else if (TREE_CHAIN (token->value))
+ linkage = get_identifier (TREE_STRING_POINTER (TREE_CHAIN (token->value)));
/* If it's a simple string constant, things are easier. */
else
linkage = get_identifier (TREE_STRING_POINTER (token->value));
@@ -9887,6 +9891,7 @@ cp_parser_asm_definition (cp_parser* par
tree asm_stmt;
bool volatile_p = false;
bool extended_p = false;
+ int c_lex_string_translate_save = c_lex_string_translate;
/* Look for the `asm' keyword. */
cp_parser_require_keyword (parser, RID_ASM, "`asm'");
@@ -9907,6 +9912,9 @@ cp_parser_asm_definition (cp_parser* par
if (!token)
goto finish;
string = token->value;
+ /* Take the untranslated string, if it was tentatively parsed as both. */
+ if (TREE_CHAIN (string))
+ string = TREE_CHAIN (string);
/* If we're allowing GNU extensions, check for the extended assembly
syntax. Unfortunately, the `:' tokens need not be separated by
a space in C, and so, for compatibility, we tolerate that here
@@ -9999,7 +10007,7 @@ cp_parser_asm_definition (cp_parser* par
assemble_asm (string);
finish:
- c_lex_string_translate = true;
+ c_lex_string_translate = c_lex_string_translate_save;
}
/* Declarators [gram.dcl.decl] */
@@ -13441,6 +13449,9 @@ cp_parser_asm_operand_list (cp_parser* p
/* Look for the string-literal. */
token = cp_parser_require (parser, CPP_STRING, "string-literal");
string_literal = token ? token->value : error_mark_node;
+ /* Take the untranslated string, if we got both. */
+ if (TREE_CHAIN (string_literal))
+ string_literal = TREE_CHAIN (string_literal);
c_lex_string_translate = true;
/* Look for the `('. */
cp_parser_require (parser, CPP_OPEN_PAREN, "`('");
@@ -13573,6 +13584,7 @@ static tree
cp_parser_attribute_list (cp_parser* parser)
{
tree attribute_list = NULL_TREE;
+ int c_lex_string_translate_save = c_lex_string_translate;
c_lex_string_translate = false;
while (true)
@@ -13620,7 +13632,7 @@ cp_parser_attribute_list (cp_parser* par
/* Consume the comma and keep going. */
cp_lexer_consume_token (parser->lexer);
}
- c_lex_string_translate = true;
+ c_lex_string_translate = c_lex_string_translate_save;
/* We built up the list in reverse order. */
return nreverse (attribute_list);
@@ -15367,6 +15379,10 @@ cp_parser_parse_tentatively (cp_parser*
access checks are queued up until we are no longer parsing
tentatively. */
push_deferring_access_checks (dk_deferred);
+ /* Keep both translated and untranslated versions of string tokens. */
+ if (!c_lex_string_translate)
+ abort ();
+ c_lex_string_translate = -true;
}
/* Commit to the currently active tentative parse. */
@@ -15377,6 +15393,8 @@ cp_parser_commit_to_tentative_parse (cp_
cp_parser_context *context;
cp_lexer *lexer;
+ c_lex_string_translate = true;
+
/* Mark all of the levels as committed. */
lexer = parser->lexer;
for (context = parser->context; context->next; context = context->next)
@@ -15412,6 +15430,8 @@ cp_parser_parse_definitely (cp_parser* p
bool error_occurred;
cp_parser_context *context;
+ c_lex_string_translate = true;
+
/* Remember whether or not an error occurred, since we are about to
destroy that information. */
error_occurred = cp_parser_error_occurred (parser);
Index: gcc/cp/ChangeLog
from Alexandre Oliva <aoliva@redhat.com>
* parser.c (cp_parser_cast_expression): Try to parse a type
instead of scanning tentatively up to the closing parenthesis.
(cp_parser_asm_operand_list): Remove redundant assignment to
c_lex_string_translate.
Index: gcc/cp/parser.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cp/parser.c,v
retrieving revision 1.199
diff -u -p -r1.199 parser.c
--- gcc/cp/parser.c 1 Jun 2004 15:12:13 -0000 1.199
+++ gcc/cp/parser.c 3 Jun 2004 22:03:44 -0000
@@ -4820,8 +4820,8 @@ cp_parser_cast_expression (cp_parser *pa
{
tree type = NULL_TREE;
tree expr = NULL_TREE;
- bool compound_literal_p;
const char *saved_message;
+ bool saved_in_type_id_in_expr_p;
/* There's no way to know yet whether or not this is a cast.
For example, `(int (3))' is a unary-expression, while `(int)
@@ -4850,29 +4850,25 @@ cp_parser_cast_expression (cp_parser *pa
Save tokens so that we can put them back. */
cp_lexer_save_tokens (parser->lexer);
- /* Skip tokens until the next token is a closing parenthesis.
- If we find the closing `)', and the next token is a `{', then
- we are looking at a compound-literal. */
- compound_literal_p
- = (cp_parser_skip_to_closing_parenthesis (parser, false, false,
- /*consume_paren=*/true)
- && cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE));
- /* Roll back the tokens we skipped. */
- cp_lexer_rollback_tokens (parser->lexer);
- /* If we were looking at a compound-literal, simulate an error
- so that the call to cp_parser_parse_definitely below will
- fail. */
- if (compound_literal_p)
- cp_parser_simulate_error (parser);
- else
+
+ saved_in_type_id_in_expr_p = parser->in_type_id_in_expr_p;
+ parser->in_type_id_in_expr_p = true;
+ /* Look for the type-id. */
+ type = cp_parser_type_id (parser);
+ /* Look for the closing `)'. */
+ cp_parser_require (parser, CPP_CLOSE_PAREN, "`)'");
+ parser->in_type_id_in_expr_p = saved_in_type_id_in_expr_p;
+
+ /* If the next token is a `{', then we are looking at a
+ compound-literal. If we were looking at a compound-literal,
+ simulate an error so that the call to
+ cp_parser_parse_definitely below will fail. */
+ if (cp_parser_error_occurred (parser)
+ || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
{
- bool saved_in_type_id_in_expr_p = parser->in_type_id_in_expr_p;
- parser->in_type_id_in_expr_p = true;
- /* Look for the type-id. */
- type = cp_parser_type_id (parser);
- /* Look for the closing `)'. */
- cp_parser_require (parser, CPP_CLOSE_PAREN, "`)'");
- parser->in_type_id_in_expr_p = saved_in_type_id_in_expr_p;
+ /* Roll back the tokens we skipped. */
+ cp_lexer_rollback_tokens (parser->lexer);
+ cp_parser_simulate_error (parser);
}
/* Restore the saved message. */
@@ -13422,8 +13418,6 @@ cp_parser_asm_operand_list (cp_parser* p
tree name;
cp_token *token;
- c_lex_string_translate = false;
-
if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
{
/* Consume the `[' token. */
--
Alexandre Oliva http://www.ic.unicamp.br/~oliva/
Red Hat Compiler Engineer aoliva@{redhat.com, gcc.gnu.org}
Free Software Evangelist oliva@{lsd.ic.unicamp.br, gnu.org}