This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Re: cpplib: lexer cleanup

To: Zack Weinberg <zack at wolery dot cumb dot org>
Subject: Re: cpplib: lexer cleanup
From: Neil Booth <NeilB at earthling dot net>
Date: Tue, 9 May 2000 19:03:31 +0900
Cc: gcc-patches at gcc dot gnu dot org
References: <E12ooKQ-0004rC-00@monkey.rosenet.ne.jp> <20000508103318.E24013@wolery.cumb.org>
Zack Weinberg wrote:-

> Too many changes in the same patch.  Could you split it into:
> 
> - the pure bug fixes
> - folding all the spelling functions
> - the changes to comment handling
> - the assertion changes
> 
> and send each separately, please?

And this is the patch that folds the spelling functions into
spell_token.

If this doesn't handle comments the correct way, Zack, let me know.
I've written it so that feeding the commented output of cpp -C into
cpp again maintaining the same -traditional setting will reproduce an
identical sequence of tokens and whitespace flags the second time.
This is my understanding of what -C means and what it needs to do.
Like before, comments do not appear in the token stream proper.

Neil.

	* cpplex.c (spell_token): New function.
	(spell_string, spell_comment, spell_name): fold into
	spell_token.	
	(I, S): Add macros.
	(E, H): Remove macros.
	(save_comment): Save comment opening characters too.
	(_cpp_lex_file): Update to use spell_token.  Tidy up comment
	handling.
	* cpplib.h (I, S): Add macros.
	(E, H): Remove macros.
	(TTYPE_TABLE): Update entries for new speller.
	(SYNTAX_ASSERT): Remove.

Index: cpplex.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplex.c,v
retrieving revision 1.38
diff -u -p -r1.38 cpplex.c
--- cpplex.c	2000/05/08 22:22:48	1.38
+++ cpplex.c	2000/05/09 09:29:19
@@ -84,12 +84,6 @@ void _cpp_lex_line PARAMS ((cpp_reader *
 
 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
 
-unsigned int spell_string PARAMS ((unsigned char *, cpp_toklist *,
-				   cpp_token *token));
-unsigned int spell_comment PARAMS ((unsigned char *, cpp_toklist *,
-				    cpp_token *token));
-unsigned int spell_name PARAMS ((unsigned char *, cpp_toklist *,
-				 cpp_token *token));
 static unsigned char * spell_token PARAMS ((cpp_reader *, cpp_token *,
 					    cpp_toklist *, unsigned char *,
 					    int));
@@ -120,13 +114,17 @@ typedef unsigned int (* speller) PARAMS 
 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 
-#define SPELL_TEXT     0
-#define SPELL_HANDLER  1
-#define SPELL_CHAR     2
-#define SPELL_NONE     3
-
-#define T(e, s) {SPELL_TEXT, s},
-#define H(e, s) {SPELL_HANDLER, (PTR) s},
+/* Order here matters.  Those beyond SPELL_NONE store their spelling
+   in the token list, and it's length in the token->val.name.len.  */
+#define SPELL_OPERATOR 0
+#define SPELL_CHAR     1
+#define SPELL_NONE     2
+#define SPELL_IDENT    3
+#define SPELL_STRING   4
+
+#define T(e, s) {SPELL_OPERATOR, s},
+#define I(e, s) {SPELL_IDENT, s},
+#define S(e, s) {SPELL_STRING, s},
 #define C(e, s) {SPELL_CHAR, s},
 #define N(e, s) {SPELL_NONE, s},
 
@@ -137,7 +135,8 @@ static const struct token_spelling
 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 
 #undef T
-#undef H
+#undef I
+#undef S
 #undef C
 #undef N
 
@@ -147,10 +146,10 @@ static const struct token_spelling
 #define BACKUP_DIGRAPH(ttype) do { \
   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 
-/* If there is this many bytes in a buffer, you have enough room to
-   spell the token, including preceding whitespace.  */
-#define TOKEN_LEN(token) (5 + (token_spellings[token->type].type == \
-			       SPELL_HANDLER ? token->val.name.len: 0))
+/* An upper bound on the number of bytes needed to spell a token,
+   including preceding whitespace.  */
+#define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
+		               SPELL_NONE ? token->val.name.len: 0))
 
 #endif
 
@@ -2759,10 +2758,12 @@ parse_string2 (pfile, list, name, termin
     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
 			 : "null character preserved"));
 }
+
+/* The character TYPE helps us distinguish comment types: '*' = C
+   style, '-' = Chill-style and '/' = C++ style.  For code simplicity,
+   the stored comment includes the comment start and any terminator.  */
 
-/* The character C helps us distinguish comment types: '*' = C style,
-   '-' = Chill-style and '/' = C++ style.  For code simplicity, the
-   stored comment includes any C-style comment terminator.  */
+#define COMMENT_START_LEN 2
 static void
 save_comment (list, from, len, tok_no, type)
      cpp_toklist *list;
@@ -2772,6 +2773,9 @@ save_comment (list, from, len, tok_no, t
      unsigned int type;
 {
   cpp_token *comment;
+  unsigned char *buffer;
+  
+  len += COMMENT_START_LEN;
 
   if (list->comments_used == list->comments_cap)
     expand_comment_space (list);
@@ -2780,12 +2784,24 @@ save_comment (list, from, len, tok_no, t
     expand_name_space (list, len);
 
   comment = &list->comments[list->comments_used++];
-  comment->type = type;
+  comment->type = CPP_COMMENT;
   comment->aux = tok_no;
   comment->val.name.len = len;
   comment->val.name.offset = list->name_used;
+
+  buffer = list->namebuf + list->name_used;
+  if (type == '*')
+    {
+      *buffer++ = '/';
+      *buffer++ = '*';
+    }
+  else
+    {
+      *buffer++ = type;
+      *buffer++ = type;
+    }
 
-  memcpy (list->namebuf + list->name_used, from, len);
+  memcpy (buffer, from, len - COMMENT_START_LEN);
   list->name_used += len;
 }
 
@@ -2956,8 +2972,7 @@ _cpp_lex_line (pfile, list)
 					     "multi-line comment");
 		      if (!CPP_OPTION (pfile, discard_comments))
 			save_comment (list, cur, buffer->cur - cur,
-				      cur_token - 1 - list->tokens, c == '/'
-				      ? CPP_CPP_COMMENT: CPP_CHILL_COMMENT);
+				      cur_token - 1 - list->tokens, c);
 		      cur = buffer->cur;
 
 		      /* Back-up to first '-' or '/'.  */
@@ -2988,7 +3003,7 @@ _cpp_lex_line (pfile, list)
 				 "comment end '*/' split across lines");
 		  if (!CPP_OPTION (pfile, discard_comments))
 		    save_comment (list, cur, buffer->cur - cur,
-				 cur_token - 1 - list->tokens, CPP_C_COMMENT);
+				 cur_token - 1 - list->tokens, c);
 		  cur = buffer->cur;
 
 		  cur_token -= 2;
@@ -3277,79 +3293,6 @@ _cpp_lex_line (pfile, list)
 			 "invalid preprocessing directive");
 }
 
-/* Token spelling functions.  Used for output of a preprocessed file,
-   stringizing and token pasting.  They all assume sufficient buffer
-   is allocated, and return exactly how much they used.  */
-
-/* Needs buffer of 3 + len.  */
-unsigned int
-spell_string (buffer, list, token)
-     unsigned char *buffer;
-     cpp_toklist *list;
-     cpp_token *token;
-{
-  unsigned char c, *orig_buff = buffer;
-  size_t len;
-
-  if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
-    *buffer++ = 'L';
-  c = token->type == CPP_STRING || token->type == CPP_WSTRING ? '"': '\'';
-  *buffer++ = c;
-
-  len = token->val.name.len;
-  memcpy (buffer, list->namebuf + token->val.name.offset, len);
-  buffer += len;
-  *buffer++ = c;
-  return buffer - orig_buff;
-}
-
-/* Needs buffer of len + 2.  */
-unsigned int
-spell_comment (buffer, list, token)
-     unsigned char *buffer;
-     cpp_toklist *list;
-     cpp_token *token;
-{
-  size_t len;
-
-  if (token->type == CPP_C_COMMENT)
-    {
-      *buffer++ = '/';
-      *buffer++ = '*';
-    }
-  else if (token->type == CPP_CPP_COMMENT)
-    {
-      *buffer++ = '/';
-      *buffer++ = '/';
-    }
-  else 
-    {
-      *buffer++ = '-';
-      *buffer++ = '-';
-    }
-
-  len = token->val.name.len;
-  memcpy (buffer, list->namebuf + token->val.name.offset, len);
-
-  return len + 2;
-}
-
-/* Needs buffer of len.  */
-unsigned int
-spell_name (buffer, list, token)
-     unsigned char *buffer;
-     cpp_toklist *list;
-     cpp_token *token;
-{
-  size_t len;
-
-  len = token->val.name.len;
-  memcpy (buffer, list->namebuf + token->val.name.offset, len);
-  buffer += len;
-
-  return len;
-}
-
 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
    already contain the enough space to hold the token's spelling.  If
    WHITESPACE is true, and the token was preceded by whitespace,
@@ -3372,7 +3315,7 @@ spell_token (pfile, token, list, buffer,
 
   switch (token_spellings[token->type].type)
     {
-    case SPELL_TEXT:
+    case SPELL_OPERATOR:
       {
 	const unsigned char *spelling;
 	unsigned char c;
@@ -3386,13 +3329,27 @@ spell_token (pfile, token, list, buffer,
 	  *buffer++ = c;
       }
       break;
+
+    case SPELL_IDENT:
+      memcpy (buffer, list->namebuf + token->val.name.offset,
+	      token->val.name.len);
+      buffer += token->val.name.len;
+      break;
 
-    case SPELL_HANDLER:
+    case SPELL_STRING:
       {
-	speller s;
+	unsigned char c;
 
-	s = (speller) token_spellings[token->type].speller;
-	buffer += s (buffer, list, token);
+	if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
+	  *buffer++ = 'L';
+	c = '\'';
+	if (token->type == CPP_STRING || token->type == CPP_WSTRING)
+	  c = '"';
+	*buffer++ = c;
+	memcpy (buffer, list->namebuf + token->val.name.offset,
+		token->val.name.len);
+	buffer += token->val.name.len;
+	*buffer++ = c;
       }
       break;
 
@@ -3447,29 +3404,30 @@ _cpp_output_list (pfile, list)
      cpp_reader *pfile;
      cpp_toklist *list;
 {
-  unsigned int comment_no = 0;
-  cpp_token *token, *comment_token = 0;
+  cpp_token *token, *comment, *comment_before = 0;
 
   if (list->comments_used > 0)
-    comment_token = list->tokens + list->comments[0].aux;
+    {
+      comment = &list->comments[0];
+      comment_before = &list->tokens[comment->aux];
+    }
 
   token = &list->tokens[0];
   do
     {
       /* Output comments if -C.  */
-      if (token == comment_token)
+      while (token == comment_before)
 	{
-	  cpp_token *comment = &list->comments[comment_no];
-	  do
-	    {
-	      CPP_RESERVE (pfile, TOKEN_LEN (comment));
-	      pfile->limit += spell_comment (pfile->limit, list, comment);
-	      comment_no++, comment++;
-	      if (comment_no == list->comments_used)
-		break;
-	      comment_token = comment->aux + list->tokens;
-	    }
-	  while (comment_token == token);
+	  /* Make space for the comment, and copy it out.  */
+	  CPP_RESERVE (pfile, TOKEN_LEN (comment));
+	  pfile->limit = spell_token (pfile, comment, list, pfile->limit, 0);
+
+	  /* Stop if no comments left, or no more comments appear
+             before the current token.  */
+	  comment++;
+	  if (comment == list->comments + list->comments_used)
+	    break;
+	  comment_before = &list->tokens[comment->aux];
 	}
 
       CPP_RESERVE (pfile, TOKEN_LEN (token));
Index: cpplib.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplib.h,v
retrieving revision 1.90
diff -u -p -r1.90 cpplib.h
--- cpplib.h	2000/05/08 22:22:48	1.90
+++ cpplib.h	2000/05/09 09:29:20
@@ -109,33 +109,31 @@ typedef struct cpp_name cpp_name;
   T(CPP_MAX,		">?")			\
   C(CPP_OTHER,		0)	/* stray punctuation */ \
 \
-  H(CPP_NAME,		spell_name)	/* word */	\
-  N(CPP_INT,		0)		/* 23 */	\
-  N(CPP_FLOAT,		0)		/* 3.14159 */	\
-  H(CPP_NUMBER,		spell_name)	/* 34_be+ta  */	\
-  H(CPP_CHAR,		spell_string)	/* 'char' */	\
-  H(CPP_WCHAR,		spell_string)	/* L'char' */	\
-  H(CPP_STRING,		spell_string)	/* "string" */	\
-  H(CPP_WSTRING,	spell_string)	/* L"string" */	\
+  I(CPP_NAME,		0)	/* word */	\
+  N(CPP_INT,		0)	/* 23 */	\
+  N(CPP_FLOAT,		0)	/* 3.14159 */	\
+  I(CPP_NUMBER,		0)	/* 34_be+ta  */	\
+  S(CPP_CHAR,		0)	/* 'char' */	\
+  S(CPP_WCHAR,		0)	/* L'char' */	\
+  S(CPP_STRING,		0)	/* "string" */	\
+  S(CPP_WSTRING,	0)	/* L"string" */	\
 \
-  H(CPP_C_COMMENT,	spell_comment)	/* Only if output comments.  */ \
-  H(CPP_CPP_COMMENT,	spell_comment)	/* Only if output comments.  */ \
-  H(CPP_CHILL_COMMENT,	spell_comment)	/* Only if output comments.  */ \
-  N(CPP_MACRO_ARG,      0)              /* Macro argument.  */          \
-  N(CPP_SUBLIST,        0)	        /* Sublist.  */                 \
-  T(CPP_VSPACE,		"\n")		/* End of line.  */		\
-  N(CPP_EOF,		0)		/* End of file.  */		\
-  N(CPP_HEADER_NAME,	0)		/* <stdio.h> in #include */	\
-  N(CPP_ASSERTION,	0)		/* (...) in #assert */		\
+  I(CPP_COMMENT,	0)	/* Only if output comments.  */ \
+  N(CPP_MACRO_ARG,      0)	/* Macro argument.  */          \
+  N(CPP_SUBLIST,        0)	/* Sublist.  */                 \
+  T(CPP_VSPACE,		"\n")	/* End of line.  */		\
+  N(CPP_EOF,		0)	/* End of file.  */		\
+  N(CPP_HEADER_NAME,	0)	/* <stdio.h> in #include */	\
+  N(CPP_ASSERTION,	0)	/* (...) in #assert */		\
 \
   /* Obsolete - will be removed when no code uses them still.  */	\
-  H(CPP_COMMENT,	0)		/* Only if output comments.  */ \
-  N(CPP_HSPACE,		0)		/* Horizontal white space.  */	\
-  N(CPP_DIRECTIVE,	0)		/* #define and the like */	\
-  N(CPP_MACRO,		0)		/* Like a NAME, but expanded.  */
+  N(CPP_HSPACE,		0)	/* Horizontal white space.  */	\
+  N(CPP_DIRECTIVE,	0)	/* #define and the like */	\
+  N(CPP_MACRO,		0)	/* Like a NAME, but expanded.  */
 
 #define T(e, s) e,
-#define H(e, s) e,
+#define I(e, s) e,
+#define S(e, s) e,
 #define C(e, s) e,
 #define N(e, s) e,
 enum cpp_ttype
@@ -144,7 +142,8 @@ enum cpp_ttype
   N_TTYPES
 };
 #undef T
-#undef H
+#undef I
+#undef S
 #undef C
 #undef N
Follow-Ups:
- Re: cpplib: lexer cleanup
  - From: Neil Booth
- Re: cpplib: lexer cleanup
  - From: Zack Weinberg
References:
- cpplib: lexer cleanup
  - From: Neil Booth
- Re: cpplib: lexer cleanup
  - From: Zack Weinberg
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]