This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: cpplib: lexer cleanup
Zack Weinberg wrote:-
> Too many changes in the same patch. Could you split it into:
>
> - the pure bug fixes
> - folding all the spelling functions
> - the changes to comment handling
> - the assertion changes
>
> and send each separately, please?
And this is the patch that folds the spelling functions into
spell_token.
If this doesn't handle comments the correct way, Zack, let me know.
I've written it so that feeding the commented output of cpp -C into
cpp again maintaining the same -traditional setting will reproduce an
identical sequence of tokens and whitespace flags the second time.
This is my understanding of what -C means and what it needs to do.
Like before, comments do not appear in the token stream proper.
Neil.
* cpplex.c (spell_token): New function.
(spell_string, spell_comment, spell_name): fold into
spell_token.
(I, S): Add macros.
(E, H): Remove macros.
(save_comment): Save comment opening characters too.
(_cpp_lex_file): Update to use spell_token. Tidy up comment
handling.
* cpplib.h (I, S): Add macros.
(E, H): Remove macros.
(TTYPE_TABLE): Update entries for new speller.
(SYNTAX_ASSERT): Remove.
Index: cpplex.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplex.c,v
retrieving revision 1.38
diff -u -p -r1.38 cpplex.c
--- cpplex.c 2000/05/08 22:22:48 1.38
+++ cpplex.c 2000/05/09 09:29:19
@@ -84,12 +84,6 @@ void _cpp_lex_line PARAMS ((cpp_reader *
static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
-unsigned int spell_string PARAMS ((unsigned char *, cpp_toklist *,
- cpp_token *token));
-unsigned int spell_comment PARAMS ((unsigned char *, cpp_toklist *,
- cpp_token *token));
-unsigned int spell_name PARAMS ((unsigned char *, cpp_toklist *,
- cpp_token *token));
static unsigned char * spell_token PARAMS ((cpp_reader *, cpp_token *,
cpp_toklist *, unsigned char *,
int));
@@ -120,13 +114,17 @@ typedef unsigned int (* speller) PARAMS
#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
#define PREV_TOKEN_TYPE (cur_token[-1].type)
-#define SPELL_TEXT 0
-#define SPELL_HANDLER 1
-#define SPELL_CHAR 2
-#define SPELL_NONE 3
-
-#define T(e, s) {SPELL_TEXT, s},
-#define H(e, s) {SPELL_HANDLER, (PTR) s},
+/* Order here matters. Those beyond SPELL_NONE store their spelling
+ in the token list, and it's length in the token->val.name.len. */
+#define SPELL_OPERATOR 0
+#define SPELL_CHAR 1
+#define SPELL_NONE 2
+#define SPELL_IDENT 3
+#define SPELL_STRING 4
+
+#define T(e, s) {SPELL_OPERATOR, s},
+#define I(e, s) {SPELL_IDENT, s},
+#define S(e, s) {SPELL_STRING, s},
#define C(e, s) {SPELL_CHAR, s},
#define N(e, s) {SPELL_NONE, s},
@@ -137,7 +135,8 @@ static const struct token_spelling
} token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
#undef T
-#undef H
+#undef I
+#undef S
#undef C
#undef N
@@ -147,10 +146,10 @@ static const struct token_spelling
#define BACKUP_DIGRAPH(ttype) do { \
BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
-/* If there is this many bytes in a buffer, you have enough room to
- spell the token, including preceding whitespace. */
-#define TOKEN_LEN(token) (5 + (token_spellings[token->type].type == \
- SPELL_HANDLER ? token->val.name.len: 0))
+/* An upper bound on the number of bytes needed to spell a token,
+ including preceding whitespace. */
+#define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
+ SPELL_NONE ? token->val.name.len: 0))
#endif
@@ -2759,10 +2758,12 @@ parse_string2 (pfile, list, name, termin
cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
: "null character preserved"));
}
+
+/* The character TYPE helps us distinguish comment types: '*' = C
+ style, '-' = Chill-style and '/' = C++ style. For code simplicity,
+ the stored comment includes the comment start and any terminator. */
-/* The character C helps us distinguish comment types: '*' = C style,
- '-' = Chill-style and '/' = C++ style. For code simplicity, the
- stored comment includes any C-style comment terminator. */
+#define COMMENT_START_LEN 2
static void
save_comment (list, from, len, tok_no, type)
cpp_toklist *list;
@@ -2772,6 +2773,9 @@ save_comment (list, from, len, tok_no, t
unsigned int type;
{
cpp_token *comment;
+ unsigned char *buffer;
+
+ len += COMMENT_START_LEN;
if (list->comments_used == list->comments_cap)
expand_comment_space (list);
@@ -2780,12 +2784,24 @@ save_comment (list, from, len, tok_no, t
expand_name_space (list, len);
comment = &list->comments[list->comments_used++];
- comment->type = type;
+ comment->type = CPP_COMMENT;
comment->aux = tok_no;
comment->val.name.len = len;
comment->val.name.offset = list->name_used;
+
+ buffer = list->namebuf + list->name_used;
+ if (type == '*')
+ {
+ *buffer++ = '/';
+ *buffer++ = '*';
+ }
+ else
+ {
+ *buffer++ = type;
+ *buffer++ = type;
+ }
- memcpy (list->namebuf + list->name_used, from, len);
+ memcpy (buffer, from, len - COMMENT_START_LEN);
list->name_used += len;
}
@@ -2956,8 +2972,7 @@ _cpp_lex_line (pfile, list)
"multi-line comment");
if (!CPP_OPTION (pfile, discard_comments))
save_comment (list, cur, buffer->cur - cur,
- cur_token - 1 - list->tokens, c == '/'
- ? CPP_CPP_COMMENT: CPP_CHILL_COMMENT);
+ cur_token - 1 - list->tokens, c);
cur = buffer->cur;
/* Back-up to first '-' or '/'. */
@@ -2988,7 +3003,7 @@ _cpp_lex_line (pfile, list)
"comment end '*/' split across lines");
if (!CPP_OPTION (pfile, discard_comments))
save_comment (list, cur, buffer->cur - cur,
- cur_token - 1 - list->tokens, CPP_C_COMMENT);
+ cur_token - 1 - list->tokens, c);
cur = buffer->cur;
cur_token -= 2;
@@ -3277,79 +3293,6 @@ _cpp_lex_line (pfile, list)
"invalid preprocessing directive");
}
-/* Token spelling functions. Used for output of a preprocessed file,
- stringizing and token pasting. They all assume sufficient buffer
- is allocated, and return exactly how much they used. */
-
-/* Needs buffer of 3 + len. */
-unsigned int
-spell_string (buffer, list, token)
- unsigned char *buffer;
- cpp_toklist *list;
- cpp_token *token;
-{
- unsigned char c, *orig_buff = buffer;
- size_t len;
-
- if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
- *buffer++ = 'L';
- c = token->type == CPP_STRING || token->type == CPP_WSTRING ? '"': '\'';
- *buffer++ = c;
-
- len = token->val.name.len;
- memcpy (buffer, list->namebuf + token->val.name.offset, len);
- buffer += len;
- *buffer++ = c;
- return buffer - orig_buff;
-}
-
-/* Needs buffer of len + 2. */
-unsigned int
-spell_comment (buffer, list, token)
- unsigned char *buffer;
- cpp_toklist *list;
- cpp_token *token;
-{
- size_t len;
-
- if (token->type == CPP_C_COMMENT)
- {
- *buffer++ = '/';
- *buffer++ = '*';
- }
- else if (token->type == CPP_CPP_COMMENT)
- {
- *buffer++ = '/';
- *buffer++ = '/';
- }
- else
- {
- *buffer++ = '-';
- *buffer++ = '-';
- }
-
- len = token->val.name.len;
- memcpy (buffer, list->namebuf + token->val.name.offset, len);
-
- return len + 2;
-}
-
-/* Needs buffer of len. */
-unsigned int
-spell_name (buffer, list, token)
- unsigned char *buffer;
- cpp_toklist *list;
- cpp_token *token;
-{
- size_t len;
-
- len = token->val.name.len;
- memcpy (buffer, list->namebuf + token->val.name.offset, len);
- buffer += len;
-
- return len;
-}
-
/* Write the spelling of a token TOKEN to BUFFER. The buffer must
already contain the enough space to hold the token's spelling. If
WHITESPACE is true, and the token was preceded by whitespace,
@@ -3372,7 +3315,7 @@ spell_token (pfile, token, list, buffer,
switch (token_spellings[token->type].type)
{
- case SPELL_TEXT:
+ case SPELL_OPERATOR:
{
const unsigned char *spelling;
unsigned char c;
@@ -3386,13 +3329,27 @@ spell_token (pfile, token, list, buffer,
*buffer++ = c;
}
break;
+
+ case SPELL_IDENT:
+ memcpy (buffer, list->namebuf + token->val.name.offset,
+ token->val.name.len);
+ buffer += token->val.name.len;
+ break;
- case SPELL_HANDLER:
+ case SPELL_STRING:
{
- speller s;
+ unsigned char c;
- s = (speller) token_spellings[token->type].speller;
- buffer += s (buffer, list, token);
+ if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
+ *buffer++ = 'L';
+ c = '\'';
+ if (token->type == CPP_STRING || token->type == CPP_WSTRING)
+ c = '"';
+ *buffer++ = c;
+ memcpy (buffer, list->namebuf + token->val.name.offset,
+ token->val.name.len);
+ buffer += token->val.name.len;
+ *buffer++ = c;
}
break;
@@ -3447,29 +3404,30 @@ _cpp_output_list (pfile, list)
cpp_reader *pfile;
cpp_toklist *list;
{
- unsigned int comment_no = 0;
- cpp_token *token, *comment_token = 0;
+ cpp_token *token, *comment, *comment_before = 0;
if (list->comments_used > 0)
- comment_token = list->tokens + list->comments[0].aux;
+ {
+ comment = &list->comments[0];
+ comment_before = &list->tokens[comment->aux];
+ }
token = &list->tokens[0];
do
{
/* Output comments if -C. */
- if (token == comment_token)
+ while (token == comment_before)
{
- cpp_token *comment = &list->comments[comment_no];
- do
- {
- CPP_RESERVE (pfile, TOKEN_LEN (comment));
- pfile->limit += spell_comment (pfile->limit, list, comment);
- comment_no++, comment++;
- if (comment_no == list->comments_used)
- break;
- comment_token = comment->aux + list->tokens;
- }
- while (comment_token == token);
+ /* Make space for the comment, and copy it out. */
+ CPP_RESERVE (pfile, TOKEN_LEN (comment));
+ pfile->limit = spell_token (pfile, comment, list, pfile->limit, 0);
+
+ /* Stop if no comments left, or no more comments appear
+ before the current token. */
+ comment++;
+ if (comment == list->comments + list->comments_used)
+ break;
+ comment_before = &list->tokens[comment->aux];
}
CPP_RESERVE (pfile, TOKEN_LEN (token));
Index: cpplib.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplib.h,v
retrieving revision 1.90
diff -u -p -r1.90 cpplib.h
--- cpplib.h 2000/05/08 22:22:48 1.90
+++ cpplib.h 2000/05/09 09:29:20
@@ -109,33 +109,31 @@ typedef struct cpp_name cpp_name;
T(CPP_MAX, ">?") \
C(CPP_OTHER, 0) /* stray punctuation */ \
\
- H(CPP_NAME, spell_name) /* word */ \
- N(CPP_INT, 0) /* 23 */ \
- N(CPP_FLOAT, 0) /* 3.14159 */ \
- H(CPP_NUMBER, spell_name) /* 34_be+ta */ \
- H(CPP_CHAR, spell_string) /* 'char' */ \
- H(CPP_WCHAR, spell_string) /* L'char' */ \
- H(CPP_STRING, spell_string) /* "string" */ \
- H(CPP_WSTRING, spell_string) /* L"string" */ \
+ I(CPP_NAME, 0) /* word */ \
+ N(CPP_INT, 0) /* 23 */ \
+ N(CPP_FLOAT, 0) /* 3.14159 */ \
+ I(CPP_NUMBER, 0) /* 34_be+ta */ \
+ S(CPP_CHAR, 0) /* 'char' */ \
+ S(CPP_WCHAR, 0) /* L'char' */ \
+ S(CPP_STRING, 0) /* "string" */ \
+ S(CPP_WSTRING, 0) /* L"string" */ \
\
- H(CPP_C_COMMENT, spell_comment) /* Only if output comments. */ \
- H(CPP_CPP_COMMENT, spell_comment) /* Only if output comments. */ \
- H(CPP_CHILL_COMMENT, spell_comment) /* Only if output comments. */ \
- N(CPP_MACRO_ARG, 0) /* Macro argument. */ \
- N(CPP_SUBLIST, 0) /* Sublist. */ \
- T(CPP_VSPACE, "\n") /* End of line. */ \
- N(CPP_EOF, 0) /* End of file. */ \
- N(CPP_HEADER_NAME, 0) /* <stdio.h> in #include */ \
- N(CPP_ASSERTION, 0) /* (...) in #assert */ \
+ I(CPP_COMMENT, 0) /* Only if output comments. */ \
+ N(CPP_MACRO_ARG, 0) /* Macro argument. */ \
+ N(CPP_SUBLIST, 0) /* Sublist. */ \
+ T(CPP_VSPACE, "\n") /* End of line. */ \
+ N(CPP_EOF, 0) /* End of file. */ \
+ N(CPP_HEADER_NAME, 0) /* <stdio.h> in #include */ \
+ N(CPP_ASSERTION, 0) /* (...) in #assert */ \
\
/* Obsolete - will be removed when no code uses them still. */ \
- H(CPP_COMMENT, 0) /* Only if output comments. */ \
- N(CPP_HSPACE, 0) /* Horizontal white space. */ \
- N(CPP_DIRECTIVE, 0) /* #define and the like */ \
- N(CPP_MACRO, 0) /* Like a NAME, but expanded. */
+ N(CPP_HSPACE, 0) /* Horizontal white space. */ \
+ N(CPP_DIRECTIVE, 0) /* #define and the like */ \
+ N(CPP_MACRO, 0) /* Like a NAME, but expanded. */
#define T(e, s) e,
-#define H(e, s) e,
+#define I(e, s) e,
+#define S(e, s) e,
#define C(e, s) e,
#define N(e, s) e,
enum cpp_ttype
@@ -144,7 +142,8 @@ enum cpp_ttype
N_TTYPES
};
#undef T
-#undef H
+#undef I
+#undef S
#undef C
#undef N