cpplib: fixes for ObjC in integrated preprocessor

Zack Weinberg zack@wolery.cumb.org
Thu Aug 3 10:37:00 GMT 2000


In Objective C, '@' begins some keywords and may appear just before a
string constant (@"foo").  In integrated mode, if the @ is passed to
c-lex.c as a separate token, it doesn't work.  This patch causes
cpplib to return the @ with the token it follows.

I will apply this if it survives a bootstrap on i386-linux (with a
standalone preprocessor).

zw

	* cpplex.c (parse_name): Might have to glue a CPP_OTHER token
	before the name.
	(lex_line): Glue @ onto the beginning of identifiers and
	string constants, in Objective-C mode.
	(output_token, spell_token): Handle CPP_OSTRING.
	(can_paste, maybe_paste_with_next): Handle pasting @ onto the
	beginning of a NAME or a STRING, in objc mode.

	* cpplib.c (get_define_node): Do not permit identifiers that
	begin with @ to be #defined.
	* cppmacro.c (CAN_PASTE_AFTER): Add CPP_OTHER.
	* cpplib.h (TTYPE_TABLE): Add CPP_OSTRING.

===================================================================
Index: cpplex.c
--- cpplex.c	2000/08/02 07:08:48	1.85
+++ cpplex.c	2000/08/03 17:33:37
@@ -1008,15 +1008,27 @@ parse_name (pfile, tok, cur, rlimit)
     }
   len = cur - name;
 
-  if (tok->val.node == 0)
+  if (tok->type == CPP_NAME && tok->val.node == 0)
     tok->val.node = _cpp_lookup_with_hash (pfile, name, len, r);
   else
     {
-      unsigned int oldlen = tok->val.node->length;
-      U_CHAR *newname = alloca (oldlen + len);
-      memcpy (newname, tok->val.node->name, oldlen);
+      unsigned int oldlen;
+      U_CHAR *newname;
+
+      if (tok->type == CPP_NAME)
+	oldlen = tok->val.node->length;
+      else
+	oldlen = 1;
+
+      newname = alloca (oldlen + len);
+
+      if (tok->type == CPP_NAME)
+	memcpy (newname, tok->val.node->name, oldlen);
+      else
+	newname[0] = tok->val.aux;
       memcpy (newname + oldlen, name, len);
       tok->val.node = cpp_lookup (pfile, newname, len + oldlen);
+      tok->type = CPP_NAME;
     }
 
   return cur;
@@ -1373,9 +1385,17 @@ lex_line (pfile, list)
 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 	case 'Y': case 'Z':
 	  cur--;		     /* Backup character.  */
-	  cur_token->val.node = 0;
-	  cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
 
+	  /* In Objective C, '@' may begin certain keywords.  */
+	  if (CPP_OPTION (pfile, objc) && cur_token[-1].type == CPP_OTHER
+	      && cur_token[-1].val.aux == '@' && IMMED_TOKEN ())
+	    cur_token--;
+	  else
+	    {
+	      cur_token->val.node = 0;
+	      cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
+	    }
+
 	continue_name:
 	  cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
 
@@ -1394,12 +1414,21 @@ lex_line (pfile, list)
 	  break;
 
 	case '\'':
+	  cur_token->type = CPP_CHAR;
+	  if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
+	      && cur_token[-1].val.node == pfile->spec_nodes->n_L)
+	    BACKUP_TOKEN (CPP_WCHAR);
+	  goto do_parse_string;
+	  
 	case '\"':
-	  cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
-	  /* Do we have a wide string?  */
+	  cur_token->type = CPP_STRING;
 	  if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
 	      && cur_token[-1].val.node == pfile->spec_nodes->n_L)
-	    BACKUP_TOKEN (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
+	    BACKUP_TOKEN (CPP_WSTRING);
+	  else if (CPP_OPTION (pfile, objc)
+		   && cur_token[-1].type == CPP_OTHER && IMMED_TOKEN ()
+		   && cur_token[-1].val.aux == '@')
+	    BACKUP_TOKEN (CPP_OSTRING);
 
 	do_parse_string:
 	  /* Here c is one of ' " or >.  */
@@ -1883,20 +1912,21 @@ output_token (pfile, fp, token, prev, wh
 
     case SPELL_STRING:
       {
-	if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
-	  putc ('L', fp);
-
-	if (token->type == CPP_STRING || token->type == CPP_WSTRING)
-	  putc ('"', fp);
-	if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
-	  putc ('\'', fp);
-
+	int left, right, tag;
+	switch (token->type)
+	  {
+	  case CPP_STRING:	left = '"';  right = '"';  tag = '\0'; break;
+	  case CPP_WSTRING:	left = '"';  right = '"';  tag = 'L';  break;
+	  case CPP_OSTRING:	left = '"';  right = '"';  tag = '@';  break;
+	  case CPP_CHAR:	left = '\''; right = '\''; tag = '\0'; break;
+    	  case CPP_WCHAR:	left = '\''; right = '\''; tag = 'L';  break;
+	  case CPP_HEADER_NAME:	left = '<';  right = '>';  tag = '\0'; break;
+	  default:		left = '\0'; right = '\0'; tag = '\0'; break;
+	  }
+	if (tag) putc (tag, fp);
+	if (left) putc (left, fp);
 	fwrite (token->val.str.text, 1, token->val.str.len, fp);
-	
-	if (token->type == CPP_STRING || token->type == CPP_WSTRING)
-	  putc ('"', fp);
-	if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
-	  putc ('\'', fp);
+	if (right) putc (right, fp);
       }
       break;
 
@@ -1999,21 +2029,22 @@ spell_token (pfile, token, buffer)
 
     case SPELL_STRING:
       {
-	if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
-	  *buffer++ = 'L';
-
-	if (token->type == CPP_STRING || token->type == CPP_WSTRING)
-	  *buffer++ = '"';
-	if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
-	  *buffer++ = '\'';
-
+	int left, right, tag;
+	switch (token->type)
+	  {
+	  case CPP_STRING:	left = '"';  right = '"';  tag = '\0'; break;
+	  case CPP_WSTRING:	left = '"';  right = '"';  tag = 'L';  break;
+	  case CPP_OSTRING:	left = '"';  right = '"';  tag = '@';  break;
+	  case CPP_CHAR:	left = '\''; right = '\''; tag = '\0'; break;
+    	  case CPP_WCHAR:	left = '\''; right = '\''; tag = 'L';  break;
+	  case CPP_HEADER_NAME:	left = '<';  right = '>';  tag = '\0'; break;
+	  default:		left = '\0'; right = '\0'; tag = '\0'; break;
+	  }
+	if (tag) *buffer++ = tag;
+	if (left) *buffer++ = left;
 	memcpy (buffer, token->val.str.text, token->val.str.len);
 	buffer += token->val.str.len;
-	
-	if (token->type == CPP_STRING || token->type == CPP_WSTRING)
-	  *buffer++ = '"';
-	if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
-	  *buffer++ = '\'';
+	if (right) *buffer++ = right;
       }
       break;
 
@@ -2700,6 +2731,13 @@ can_paste (pfile, token1, token2, digrap
 	return CPP_NUMBER;
       break;
 
+    case CPP_OTHER:
+      if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
+	{
+	  if (b == CPP_NAME)	return CPP_NAME;
+	  if (b == CPP_STRING)	return CPP_OSTRING;
+	}
+
     default:
       break;
     }
@@ -2789,7 +2827,8 @@ maybe_paste_with_next (pfile, token)
 		  pasted->val.str.len = end - buf;
 		}
 	    }
-	  else if (type == CPP_WCHAR || type == CPP_WSTRING)
+	  else if (type == CPP_WCHAR || type == CPP_WSTRING
+		   || type == CPP_OSTRING)
 	    pasted = duplicate_token (pfile, second);
 	  else
 	    {
===================================================================
Index: cpplib.c
--- cpplib.c	2000/08/02 07:08:48	1.195
+++ cpplib.c	2000/08/03 17:33:38
@@ -242,6 +242,16 @@ get_define_node (pfile)
       return 0;
     }
 
+  /* In Objective C, some keywords begin with '@', but general identifiers
+     do not, and you're not allowed to #define them.  */
+  if (token->val.node->name[0] == '@')
+    {
+      cpp_error_with_line (pfile, token->line, token->col,
+			   "\"%s\" cannot be used as a macro name",
+			   token->val.node->name);
+      return 0;
+    }
+
   /* Check for poisoned identifiers now.  */
   if (token->val.node->type == T_POISON)
     {
===================================================================
Index: cpplib.h
--- cpplib.h	2000/08/02 01:13:44	1.115
+++ cpplib.h	2000/08/03 17:33:38
@@ -123,6 +123,7 @@ typedef struct cpp_hashnode cpp_hashnode
 \
   TK(CPP_STRING,	SPELL_STRING)	/* "string" */			\
   TK(CPP_WSTRING,	SPELL_STRING)	/* L"string" */			\
+  TK(CPP_OSTRING,	SPELL_STRING)	/* @"string" - Objective C */	\
   TK(CPP_HEADER_NAME,	SPELL_STRING)	/* <stdio.h> in #include */	\
 \
   TK(CPP_COMMENT,	SPELL_STRING)	/* Only if output comments.  */ \
===================================================================
Index: cppmacro.c
--- cppmacro.c	2000/08/02 07:08:49	1.6
+++ cppmacro.c	2000/08/03 17:33:38
@@ -55,12 +55,14 @@ static cpp_toklist * alloc_macro PARAMS 
 
 /* These are all the tokens that can have something pasted after them.
    Comma is included in the list only to support the GNU varargs extension
-   (where you write a ## b and a disappears if b is an empty rest argument).  */
+   (where you write a ## b and a disappears if b is an empty rest argument).
+   CPP_OTHER is included because of Objective C's use of '@'.  */
 #define CAN_PASTE_AFTER(type) \
 ((type) <= CPP_LAST_EQ || (type) == CPP_COLON || (type) == CPP_HASH \
  || (type) == CPP_DEREF || (type) == CPP_DOT || (type) == CPP_NAME \
  || (type) == CPP_INT || (type) == CPP_FLOAT || (type) == CPP_NUMBER \
- || (type) == CPP_MACRO_ARG || (type) == CPP_PLACEMARKER || (type) == CPP_COMMA)
+ || (type) == CPP_MACRO_ARG || (type) == CPP_PLACEMARKER \
+ || (type) == CPP_COMMA || (type) == CPP_OTHER)
 
 /* Scans for a given token, returning the parameter number if found,
    or 0 if not found.  Scans from FIRST to TOKEN - 1 or the first


More information about the Gcc-patches mailing list