This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [patch] new exec-charset testcase


http://gcc.gnu.org/ml/gcc-patches/2004-04/msg01330.html

Back in April 21, 2004, Mark Mitchell wrote:

Here are two alternative solutions to the problem.  I don't quite like
the idea of making the lexer even more context sensitive than it is,
especially given that the parser already knows exactly what's going
on.  Consider, for example:

  asm ("foo" : "=x" (({ int i; asm ("bar" : "=r" (i) : "r" ("bar"));
                        i; })));

you get to keep track of multiple separate asm statements, in a
stack.  This is not a lexer any more.  It's parser stuff.


My first attempt was to try to get both versions of strings saved when
parsing tentatively, and arranging for all locations that used host
strings to get to them properly.  This was a neat trick, but it
enabled me to see something else:

the *only* case in which we parse strings tentatively is while
scanning for the closing parenthesis when tentatively parsing a cast
expression.

However, in a case like this:

  foo = ( (whatever long sequence of tokens forming an expression) );

when we see the first `(', we still scan and save the entire
`(whatever...))' token stream tentatively.  Even though it's pretty
obvious upfront that `(whatever)' can't possibly be a type name.

The solution I liked better was to attempt to parse a type name
upfront.  Then, in the case of a compound literal, we backtrack like
we did before.  The advantage is that now we will no longer scan the
complete expression within parentheses to only then try to tell
whether it begins with a type name.  I *think* this should pay off the
disadvantage of having to parse the type name twice in compound
literal expressions, since parentheses are far more common in
expressions than type casts, that are far more common than compound
literals.

So the second patch below is the one I'm submitting for review.  The
former, probably incomplete, is only for the record.  Ok to install
the latter?


Index: gcc/c-pragma.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-pragma.h,v
retrieving revision 1.39
diff -u -p -r1.39 c-pragma.h
--- gcc/c-pragma.h 27 Feb 2004 02:01:06 -0000 1.39
+++ gcc/c-pragma.h 3 Jun 2004 22:08:02 -0000
@@ -58,7 +58,9 @@ extern int c_lex (tree *);
 extern int c_lex_with_flags (tree *, unsigned char *);
 
 /* If true, then lex strings into the execution character set.  
-   Otherwise, lex strings into the host character set.  */
-extern bool c_lex_string_translate;
+   If false, lex strings into the host character set.
+   If -true, lex both, and chain them together, such that the latter
+   is the TREE_CHAIN of the former.  */
+extern int c_lex_string_translate;
 
 #endif /* GCC_C_PRAGMA_H */
Index: gcc/c-lex.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-lex.c,v
retrieving revision 1.221
diff -u -p -r1.221 c-lex.c
--- gcc/c-lex.c 2 Jun 2004 02:09:44 -0000 1.221
+++ gcc/c-lex.c 3 Jun 2004 22:08:03 -0000
@@ -53,7 +53,11 @@ static splay_tree file_info_tree;
 
 int pending_lang_change; /* If we need to switch languages - C++ only */
 int c_header_level;	 /* depth in C headers - C++ only */
-bool c_lex_string_translate = true; /* If we need to translate characters received.  */
+/* If we need to translate characters received.  This is tri-state:
+   false means use only the untranslated string; true means use only
+   the translated string; -true means chain the unstranslated string
+   to the translated one.  */
+int c_lex_string_translate = true;
 
 static tree interpret_integer (const cpp_token *, unsigned int);
 static tree interpret_float (const cpp_token *, unsigned int);
@@ -699,6 +703,22 @@ lex_string (const cpp_token *tok, tree *
     {
       value = build_string (istr.len, (char *)istr.text);
       free ((void *)istr.text);
+
+      if (c_lex_string_translate == -true)
+	{
+	  if (!cpp_interpret_string_notranslate (parse_in, strs, count,
+						 &istr, wide))
+	    /* Assume that, if we managed to translate the string
+	       above, then the untreanslated parsing will always
+	       succeed.  */
+	    abort ();
+	  
+	  if (TREE_STRING_LENGTH (value) != (int)istr.len
+	      || 0 != strncmp (TREE_STRING_POINTER (value), (char *)istr.text,
+			       istr.len))
+	    TREE_CHAIN (value) = build_string (istr.len, (char *)istr.text);
+	  free ((void *)istr.text);
+	}
     }
   else
     {
Index: gcc/cp/parser.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cp/parser.c,v
retrieving revision 1.199
diff -u -p -r1.199 parser.c
--- gcc/cp/parser.c 1 Jun 2004 15:12:13 -0000 1.199
+++ gcc/cp/parser.c 3 Jun 2004 22:08:12 -0000
@@ -7086,6 +7086,10 @@ cp_parser_linkage_specification (cp_pars
       /* Assume C++ linkage.  */
       linkage = get_identifier ("c++");
     }
+  /* If the string is chained to another string, take the latter,
+     that's the untranslated string.  */
+  else if (TREE_CHAIN (token->value))
+    linkage = get_identifier (TREE_STRING_POINTER (TREE_CHAIN (token->value)));
   /* If it's a simple string constant, things are easier.  */
   else
     linkage = get_identifier (TREE_STRING_POINTER (token->value));
@@ -9887,6 +9891,7 @@ cp_parser_asm_definition (cp_parser* par
   tree asm_stmt;
   bool volatile_p = false;
   bool extended_p = false;
+  int c_lex_string_translate_save = c_lex_string_translate;
 
   /* Look for the `asm' keyword.  */
   cp_parser_require_keyword (parser, RID_ASM, "`asm'");
@@ -9907,6 +9912,9 @@ cp_parser_asm_definition (cp_parser* par
   if (!token)
     goto finish;
   string = token->value;
+  /* Take the untranslated string, if it was tentatively parsed as both.  */
+  if (TREE_CHAIN (string))
+    string = TREE_CHAIN (string);
   /* If we're allowing GNU extensions, check for the extended assembly
      syntax.  Unfortunately, the `:' tokens need not be separated by
      a space in C, and so, for compatibility, we tolerate that here
@@ -9999,7 +10007,7 @@ cp_parser_asm_definition (cp_parser* par
     assemble_asm (string);
 
  finish:
-  c_lex_string_translate = true;
+  c_lex_string_translate = c_lex_string_translate_save;
 }
 
 /* Declarators [gram.dcl.decl] */
@@ -13441,6 +13449,9 @@ cp_parser_asm_operand_list (cp_parser* p
       /* Look for the string-literal.  */
       token = cp_parser_require (parser, CPP_STRING, "string-literal");
       string_literal = token ? token->value : error_mark_node;
+      /* Take the untranslated string, if we got both.  */
+      if (TREE_CHAIN (string_literal))
+	string_literal = TREE_CHAIN (string_literal);
       c_lex_string_translate = true;
       /* Look for the `('.  */
       cp_parser_require (parser, CPP_OPEN_PAREN, "`('");
@@ -13573,6 +13584,7 @@ static tree
 cp_parser_attribute_list (cp_parser* parser)
 {
   tree attribute_list = NULL_TREE;
+  int c_lex_string_translate_save = c_lex_string_translate;
 
   c_lex_string_translate = false;
   while (true)
@@ -13620,7 +13632,7 @@ cp_parser_attribute_list (cp_parser* par
       /* Consume the comma and keep going.  */
       cp_lexer_consume_token (parser->lexer);
     }
-  c_lex_string_translate = true;
+  c_lex_string_translate = c_lex_string_translate_save;
 
   /* We built up the list in reverse order.  */
   return nreverse (attribute_list);
@@ -15367,6 +15379,10 @@ cp_parser_parse_tentatively (cp_parser* 
      access checks are queued up until we are no longer parsing
      tentatively.  */
   push_deferring_access_checks (dk_deferred);
+  /* Keep both translated and untranslated versions of string tokens.  */
+  if (!c_lex_string_translate)
+    abort ();
+  c_lex_string_translate = -true;
 }
 
 /* Commit to the currently active tentative parse.  */
@@ -15377,6 +15393,8 @@ cp_parser_commit_to_tentative_parse (cp_
   cp_parser_context *context;
   cp_lexer *lexer;
 
+  c_lex_string_translate = true;
+
   /* Mark all of the levels as committed.  */
   lexer = parser->lexer;
   for (context = parser->context; context->next; context = context->next)
@@ -15412,6 +15430,8 @@ cp_parser_parse_definitely (cp_parser* p
   bool error_occurred;
   cp_parser_context *context;
 
+  c_lex_string_translate = true;
+
   /* Remember whether or not an error occurred, since we are about to
      destroy that information.  */
   error_occurred = cp_parser_error_occurred (parser);
Index: gcc/cp/ChangeLog
from  Alexandre Oliva  <aoliva@redhat.com>

	* parser.c (cp_parser_cast_expression): Try to parse a type
	instead of scanning tentatively up to the closing parenthesis.
	(cp_parser_asm_operand_list): Remove redundant assignment to
	c_lex_string_translate.

Index: gcc/cp/parser.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cp/parser.c,v
retrieving revision 1.199
diff -u -p -r1.199 parser.c
--- gcc/cp/parser.c 1 Jun 2004 15:12:13 -0000 1.199
+++ gcc/cp/parser.c 3 Jun 2004 22:03:44 -0000
@@ -4820,8 +4820,8 @@ cp_parser_cast_expression (cp_parser *pa
     {
       tree type = NULL_TREE;
       tree expr = NULL_TREE;
-      bool compound_literal_p;
       const char *saved_message;
+      bool saved_in_type_id_in_expr_p;
 
       /* There's no way to know yet whether or not this is a cast.
 	 For example, `(int (3))' is a unary-expression, while `(int)
@@ -4850,29 +4850,25 @@ cp_parser_cast_expression (cp_parser *pa
 
 	 Save tokens so that we can put them back.  */
       cp_lexer_save_tokens (parser->lexer);
-      /* Skip tokens until the next token is a closing parenthesis.
-	 If we find the closing `)', and the next token is a `{', then
-	 we are looking at a compound-literal.  */
-      compound_literal_p
-	= (cp_parser_skip_to_closing_parenthesis (parser, false, false,
-						  /*consume_paren=*/true)
-	   && cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE));
-      /* Roll back the tokens we skipped.  */
-      cp_lexer_rollback_tokens (parser->lexer);
-      /* If we were looking at a compound-literal, simulate an error
-	 so that the call to cp_parser_parse_definitely below will
-	 fail.  */
-      if (compound_literal_p)
-	cp_parser_simulate_error (parser);
-      else
+
+      saved_in_type_id_in_expr_p = parser->in_type_id_in_expr_p;
+      parser->in_type_id_in_expr_p = true;
+      /* Look for the type-id.  */
+      type = cp_parser_type_id (parser);
+      /* Look for the closing `)'.  */
+      cp_parser_require (parser, CPP_CLOSE_PAREN, "`)'");
+      parser->in_type_id_in_expr_p = saved_in_type_id_in_expr_p;
+
+      /* If the next token is a `{', then we are looking at a
+	 compound-literal.  If we were looking at a compound-literal,
+	 simulate an error so that the call to
+	 cp_parser_parse_definitely below will fail.  */
+      if (cp_parser_error_occurred (parser)
+	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
 	{
-	  bool saved_in_type_id_in_expr_p = parser->in_type_id_in_expr_p;
-	  parser->in_type_id_in_expr_p = true;
-	  /* Look for the type-id.  */
-	  type = cp_parser_type_id (parser);
-	  /* Look for the closing `)'.  */
-	  cp_parser_require (parser, CPP_CLOSE_PAREN, "`)'");
-	  parser->in_type_id_in_expr_p = saved_in_type_id_in_expr_p;
+	  /* Roll back the tokens we skipped.  */
+	  cp_lexer_rollback_tokens (parser->lexer);
+	  cp_parser_simulate_error (parser);
 	}
 
       /* Restore the saved message.  */
@@ -13422,8 +13418,6 @@ cp_parser_asm_operand_list (cp_parser* p
       tree name;
       cp_token *token;
 
-      c_lex_string_translate = false;
-
       if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_SQUARE))
 	{
 	  /* Consume the `[' token.  */
-- 
Alexandre Oliva             http://www.ic.unicamp.br/~oliva/
Red Hat Compiler Engineer   aoliva@{redhat.com, gcc.gnu.org}
Free Software Evangelist  oliva@{lsd.ic.unicamp.br, gnu.org}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]