Move string concatenation out of the C parsers

Neil Booth neil@daikokuya.demon.co.uk
Fri Dec 7 16:47:00 GMT 2001


This moves string concatenation to c-lex.c, where it's only one step
away from a clean move into cpplib, a not unreasonable plan IMO.  I'd
eventually like cpplib to be wholly responsible for stages 1-6 of the
C standard.  Even concatenating in c-lex.c is an improvement and
simplification over being in the parsers, as this patch demonstrates.

This move isn't possible without altering the parser implementation of
ObjC strings, which happily is actually a minor cleanup.  Leaving it
as it was would require cpplib to be able to back up more than one
token, which is awkward to implement and never going to happen (think
just-exited macros whose expansions have been popped off the stack,
only to immediately enter a new one.  Trust me, you don't want to go
there 8^)).

Note that ObjC string _semantics_ do not change with this patch.

Bootstrapped x86 Linux without regressions.  OK to commit?

Neil.

	* c-lex.c (c_lex): Peek a token ahead for a string to concatenate,
	using combine_strings to do the concatenation.
	* c-parse.in: Replace uses of the string non-terminal with STRING.
	Don't attempt string concatenation.
	(OBJC_STRING): New terminal.
	(string): Remove non-terminal.
	(_yylex): Call combine_strings on function names.  Generate
	OBJC_STRING terminals; don't pass '@' on to yacc.
	* c-typeck.c (simple_asm_stmt): Don't concatenate strings here.
	(build_asm_stmt): Similarly.
cp:
	* parse.y: Replace uses of the string non-terminal with STRING.
	Don't perform string concatentaion here.
	(string): Remove non-terminal.
	* semantics.c (finish_asm_stmt): Don't concatenate strings here.

============================================================
Index: gcc/c-lex.c
--- gcc/c-lex.c	2001/12/02 14:38:06	1.159
+++ gcc/c-lex.c	2001/12/08 00:32:59
@@ -762,6 +762,7 @@ c_lex (value)
      tree *value;
 {
   const cpp_token *tok;
+  enum cpp_ttype result;
 
   retry:
   timevar_push (TV_CPP);
@@ -776,7 +777,9 @@ c_lex (value)
   lineno = src_lineno;
 
   *value = NULL_TREE;
-  switch (tok->type)
+  result = tok->type;
+
+  switch (result)
     {
     case CPP_OPEN_BRACE:  indent_level++;  break;
     case CPP_CLOSE_BRACE: indent_level--;  break;
@@ -804,8 +807,48 @@ c_lex (value)
 
     case CPP_STRING:
     case CPP_WSTRING:
-      *value = lex_string ((const char *)tok->val.str.text,
-			   tok->val.str.len, tok->type == CPP_WSTRING);
+      {
+	tree full_str = NULL_TREE;
+
+	do
+	  {
+	    /* Translate escape sequences in this string, then append it.  */
+	    tree str = lex_string ((const char *) tok->val.str.text,
+				   tok->val.str.len,
+				   tok->type == CPP_WSTRING);
+
+	    if (full_str && c_language == clk_c && warn_traditional
+		&& !in_system_header)
+	      {
+		static int last_lineno;
+		static const char *last_input_filename;
+
+		if (lineno != last_lineno || !last_input_filename
+		    || strcmp (last_input_filename, input_filename))
+		  {
+		    warning ("traditional C rejects string concatenation");
+		    last_lineno = lineno;
+		    last_input_filename = input_filename;
+		  }
+	      }
+
+	    full_str = chainon (full_str, str);
+
+	    /* Wide and non-wide give a wide result.  */
+	    if (tok->type == CPP_WSTRING)
+	      result = CPP_WSTRING;
+
+	    /* Look ahead for another string token.  */
+	    do
+	      tok = cpp_get_token (parse_in);
+	    while (tok->type == CPP_PADDING);
+	  }
+	while (tok->type == CPP_STRING || tok->type == CPP_WSTRING);
+
+	_cpp_backup_tokens (parse_in, 1);
+
+	*value = combine_strings (full_str);
+      }
       break;
 
       /* These tokens should not be visible outside cpplib.  */
@@ -817,7 +860,7 @@ c_lex (value)
     default: break;
     }
 
-  return tok->type;
+  return result;
 }
 
 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
============================================================
Index: gcc/c-parse.in
--- gcc/c-parse.in	2001/12/04 22:55:37	1.117
+++ gcc/c-parse.in	2001/12/08 00:33:09
@@ -99,9 +99,8 @@ end ifobjc
    yylval is the node for the constant.  */
 %token CONSTANT
 
-/* String constants in raw form.
-   yylval is a STRING_CST node.  */
-%token STRING
+/* String constants as arrays of the appropriate character type.  */
+%token STRING OBJC_STRING
 
 /* "...", used for functions with variable arglists.  */
 %token ELLIPSIS
@@ -151,7 +150,7 @@ end ifobjc
 %type <ttype> BREAK CONTINUE RETURN GOTO ASM_KEYWORD SIZEOF TYPEOF ALIGNOF
 
 %type <ttype> identifier IDENTIFIER TYPENAME CONSTANT expr nonnull_exprlist exprlist
-%type <ttype> expr_no_commas cast_expr unary_expr primary string STRING
+%type <ttype> expr_no_commas cast_expr unary_expr primary STRING
 %type <ttype> declspecs_nosc_nots_nosa_noea declspecs_nosc_nots_nosa_ea
 %type <ttype> declspecs_nosc_nots_sa_noea declspecs_nosc_nots_sa_ea
 %type <ttype> declspecs_nosc_ts_nosa_noea declspecs_nosc_ts_nosa_ea
@@ -204,6 +203,8 @@ end ifobjc
 %type <lineno> save_lineno
 
 ifobjc
+%token OBJC_STRING
+
 /* the Objective-C nonterminals */
 
 %type <ttype> ivar_decl_list ivar_decls ivar_decl ivars ivar_declarator
@@ -214,7 +215,7 @@ ifobjc
 %type <ttype> selectorarg keywordnamelist keywordname objcencodeexpr
 %type <ttype> objc_string non_empty_protocolrefs protocolrefs identifier_list objcprotocolexpr
 
-%type <ttype> CLASSNAME OBJECTNAME
+%type <ttype> CLASSNAME OBJECTNAME OBJC_STRING
 end ifobjc
 
 %{
@@ -618,8 +619,8 @@ primary:
 		  $$ = build_external_ref ($1, yychar == '(');
 		}
 	| CONSTANT
-	| string
-		{ $$ = combine_strings ($1); }
+	| STRING
+		{ $$ = $1; }
 	| VAR_FUNC_NAME
 		{ $$ = fname_decl (C_RID_CODE ($$), $$); }
 	| '(' typename ')' '{' 
@@ -709,37 +710,13 @@ ifobjc
 end ifobjc
 	;
 
-/* Produces a STRING_CST with perhaps more STRING_CSTs chained onto it.  */
-string:
-	  STRING
-	| string STRING
-		{
-ifc
-                  static int last_lineno = 0;
-                  static const char *last_input_filename = 0;
-end ifc
-                  $$ = chainon ($1, $2);
-ifc
-		  if (warn_traditional && !in_system_header
-		      && (lineno != last_lineno || !last_input_filename ||
-			  strcmp (last_input_filename, input_filename)))
-		    {
-		      warning ("traditional C rejects string concatenation");
-		      last_lineno = lineno;
-		      last_input_filename = input_filename;
-		    }
-end ifc
-		}
-	;
-
 ifobjc
-/* Produces an STRING_CST with perhaps more STRING_CSTs chained
-   onto it, which is to be read as an ObjC string object.  */
+/* Chains ObjC string objects together.  */
 objc_string:
-	  '@' STRING
-		{ $$ = $2; }
-	| objc_string '@' STRING
-		{ $$ = chainon ($1, $3); }
+	  OBJC_STRING
+		{ $$ = $1; }
+	| objc_string OBJC_STRING
+		{ $$ = chainon ($1, $2); }
 	;
 end ifobjc
 
@@ -1372,10 +1349,8 @@ notype_initdecls:
 maybeasm:
 	  /* empty */
 		{ $$ = NULL_TREE; }
-	| ASM_KEYWORD '(' string ')'
-		{ if (TREE_CHAIN ($3)) $3 = combine_strings ($3);
-		  $$ = $3;
-		}
+	| ASM_KEYWORD '(' STRING ')'
+		{ $$ = $3; }
 	;
 
 initdcl:
@@ -2433,10 +2408,10 @@ asm_operand:
 	;
 
 asm_clobbers:
-	  string
-		{ $$ = tree_cons (NULL_TREE, combine_strings ($1), NULL_TREE); }
-	| asm_clobbers ',' string
-		{ $$ = tree_cons (NULL_TREE, combine_strings ($3), $1); }
+	  STRING
+		{ $$ = tree_cons (NULL_TREE, $1, NULL_TREE); }
+	| asm_clobbers ',' STRING
+		{ $$ = tree_cons (NULL_TREE, $3, $1); }
 	;
 
 /* This is what appears inside the parens in a function declarator.
@@ -3578,7 +3553,8 @@ end ifobjc
 	       to string constants.  */
 	    const char *name = fname_string (rid_code);
 	  
-	    yylval.ttype = build_string (strlen (name) + 1, name);
+	    yylval.ttype
+	      = combine_strings (build_string (strlen (name) + 1, name));
 	    last_token = CPP_STRING;  /* so yyerror won't choke */
 	    return STRING;
 	  }
@@ -3695,23 +3671,16 @@ _yylex ()
 	 special significance.  */
     case CPP_ATSIGN:
 ifobjc
-      {
-	tree after_at;
-	enum cpp_ttype after_at_type;
+	last_token = c_lex (&yylval.ttype);
 
-	after_at_type = c_lex (&after_at);
+	if (last_token == CPP_NAME
+	    && C_IS_RESERVED_WORD (yylval.ttype)
+	    && OBJC_IS_AT_KEYWORD (C_RID_CODE (yylval.ttype)))
+	  return rid_to_yy [(int) C_RID_CODE (yylval.ttype)];
+	else if (last_token == CPP_STRING || last_token == CPP_WSTRING)
+	  return OBJC_STRING;
 
-	if (after_at_type == CPP_NAME
-	    && C_IS_RESERVED_WORD (after_at)
-	    && OBJC_IS_AT_KEYWORD (C_RID_CODE (after_at)))
-	  {
-	    yylval.ttype = after_at;
-	    last_token = after_at_type;
-	    return rid_to_yy [(int) C_RID_CODE (after_at)];
-	  }
-	_cpp_backup_tokens (parse_in, 1);
-	return '@';
-      }
+	/* Fall through...  */
 end ifobjc
 
       /* These tokens are C++ specific (and will not be generated
============================================================
Index: gcc/c-typeck.c
--- gcc/c-typeck.c	2001/12/05 14:13:56	1.155
+++ gcc/c-typeck.c	2001/12/08 00:33:31
@@ -6832,8 +6832,6 @@ simple_asm_stmt (expr)
     {
       tree stmt;
 
-      if (TREE_CHAIN (expr))
-	expr = combine_strings (expr);
       stmt = add_stmt (build_stmt (ASM_STMT, NULL_TREE, expr,
 				   NULL_TREE, NULL_TREE,
 				   NULL_TREE));
@@ -6858,8 +6856,6 @@ build_asm_stmt (cv_qualifier, string, ou
 {
   tree tail;
 
-  if (TREE_CHAIN (string))
-    string = combine_strings (string);
   if (TREE_CODE (string) != STRING_CST)
     {
       error ("asm template is not a string constant");
============================================================
Index: gcc/cp/parse.y
--- gcc/cp/parse.y	2001/12/03 12:39:46	1.233
+++ gcc/cp/parse.y	2001/12/08 00:33:44
@@ -259,8 +259,7 @@ cp_parse_init ()
    yylval contains an IDENTIFIER_NODE which indicates which one.  */
 %token VAR_FUNC_NAME
 
-/* String constants in raw form.
-   yylval is a STRING_CST node.  */
+/* String constants as arrays of a suitable type.  */
 %token STRING
 
 /* "...", used for functions with variable arglists.  */
@@ -329,7 +328,7 @@ cp_parse_init ()
 %type <ttype> PFUNCNAME maybe_identifier
 %type <ttype> paren_expr_or_null nontrivial_exprlist SELFNAME
 %type <ttype> expr_no_commas expr_no_comma_rangle
-%type <ttype> cast_expr unary_expr primary string STRING
+%type <ttype> cast_expr unary_expr primary STRING
 %type <ttype> reserved_declspecs boolean.literal
 %type <ttype> reserved_typespecquals
 %type <ttype> SCSPEC TYPESPEC CV_QUALIFIER maybe_cv_qualifier
@@ -497,9 +496,8 @@ extdef:
 		{ do_pending_inlines (); }
 	| template_def
 		{ do_pending_inlines (); }
-	| asm_keyword '(' string ')' ';'
-		{ if (TREE_CHAIN ($3)) $3 = combine_strings ($3);
-		  assemble_asm ($3); }
+	| asm_keyword '(' STRING ')' ';'
+		{ assemble_asm ($3); }
 	| extern_lang_string '{' extdefs_opt '}'
 		{ pop_lang_context (); }
 	| extern_lang_string .hush_warning fndef .warning_ok eat_saved_input
@@ -1547,9 +1545,8 @@ primary:
 		}		
 	| CONSTANT
 	| boolean.literal
-	| string
+	| STRING
 		{
-		  $$ = combine_strings ($$);
 		  /* combine_strings doesn't set up TYPE_MAIN_VARIANT of
 		     a const array the way we want, so fix it.  */
 		  if (flag_const_strings)
@@ -1751,13 +1748,6 @@ boolean.literal:
 		{ $$ = boolean_false_node; }
 	;
 
-/* Produces a STRING_CST with perhaps more STRING_CSTs chained onto it.  */
-string:
-	  STRING
-	| string STRING
-		{ $$ = chainon ($$, $2); }
-	;
-
 nodecls:
 	  /* empty */
 		{
@@ -2041,8 +2031,8 @@ nomods_initdecls:
 maybeasm:
 	  /* empty */
 		{ $$ = NULL_TREE; }
-	| asm_keyword '(' string ')'
-		{ if (TREE_CHAIN ($3)) $3 = combine_strings ($3); $$ = $3; }
+	| asm_keyword '(' STRING ')'
+		{ $$ = $3; }
 	;
 
 initdcl:
@@ -3439,27 +3429,27 @@ simple_stmt:
                 { $$ = finish_return_stmt (NULL_TREE); }
 	| RETURN_KEYWORD expr ';'
                 { $$ = finish_return_stmt ($2); }
-	| asm_keyword maybe_cv_qualifier '(' string ')' ';'
+	| asm_keyword maybe_cv_qualifier '(' STRING ')' ';'
 		{ $$ = finish_asm_stmt ($2, $4, NULL_TREE, NULL_TREE,
 					NULL_TREE);
 		  ASM_INPUT_P ($$) = 1; }
 	/* This is the case with just output operands.  */
-	| asm_keyword maybe_cv_qualifier '(' string ':' asm_operands ')' ';'
+	| asm_keyword maybe_cv_qualifier '(' STRING ':' asm_operands ')' ';'
 		{ $$ = finish_asm_stmt ($2, $4, $6, NULL_TREE, NULL_TREE); }
 	/* This is the case with input operands as well.  */
-	| asm_keyword maybe_cv_qualifier '(' string ':' asm_operands ':'
+	| asm_keyword maybe_cv_qualifier '(' STRING ':' asm_operands ':'
 	  asm_operands ')' ';'
 		{ $$ = finish_asm_stmt ($2, $4, $6, $8, NULL_TREE); }
-	| asm_keyword maybe_cv_qualifier '(' string SCOPE asm_operands ')' ';'
+	| asm_keyword maybe_cv_qualifier '(' STRING SCOPE asm_operands ')' ';'
 		{ $$ = finish_asm_stmt ($2, $4, NULL_TREE, $6, NULL_TREE); }
 	/* This is the case with clobbered registers as well.  */
-	| asm_keyword maybe_cv_qualifier '(' string ':' asm_operands ':'
+	| asm_keyword maybe_cv_qualifier '(' STRING ':' asm_operands ':'
 	  asm_operands ':' asm_clobbers ')' ';'
 		{ $$ = finish_asm_stmt ($2, $4, $6, $8, $10); }
-	| asm_keyword maybe_cv_qualifier '(' string SCOPE asm_operands ':'
+	| asm_keyword maybe_cv_qualifier '(' STRING SCOPE asm_operands ':'
 	  asm_clobbers ')' ';'
 		{ $$ = finish_asm_stmt ($2, $4, NULL_TREE, $6, $8); }
-	| asm_keyword maybe_cv_qualifier '(' string ':' asm_operands SCOPE
+	| asm_keyword maybe_cv_qualifier '(' STRING ':' asm_operands SCOPE
 	  asm_clobbers ')' ';'
 		{ $$ = finish_asm_stmt ($2, $4, $6, NULL_TREE, $8); }
 	| GOTO '*' expr ';'
@@ -3614,10 +3604,10 @@ asm_operand:
 	;
 
 asm_clobbers:
-	  string
-		{ $$ = tree_cons (NULL_TREE, combine_strings ($1), NULL_TREE);}
-	| asm_clobbers ',' string
-		{ $$ = tree_cons (NULL_TREE, combine_strings ($3), $1); }
+	  STRING
+		{ $$ = tree_cons (NULL_TREE, $1, NULL_TREE);}
+	| asm_clobbers ',' STRING
+		{ $$ = tree_cons (NULL_TREE, $3, $1); }
 	;
 
 /* This is what appears inside the parens in a function declarator.
============================================================
Index: gcc/cp/semantics.c
--- gcc/cp/semantics.c	2001/12/04 15:10:15	1.232
+++ gcc/cp/semantics.c	2001/12/08 00:33:49
@@ -883,9 +883,6 @@ finish_asm_stmt (cv_qualifier, string, o
   tree r;
   tree t;
 
-  if (TREE_CHAIN (string))
-    string = combine_strings (string);
-
   if (cv_qualifier != NULL_TREE
       && cv_qualifier != ridpointers[(int) RID_VOLATILE])
     {



More information about the Gcc-patches mailing list