]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
Daily bump.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
041c3194
ZW
23/*
24
25Cleanups to do:-
26
041c3194
ZW
27o Get use of digraphs in sync with the standard reqd on the command line.
28o -dM and with _cpp_dump_list: too many \n output.
29o Put a printer object in cpp_reader?
30o Check line numbers assigned to all errors.
31o Replace strncmp with memcmp almost everywhere.
32o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
041c3194
ZW
33o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
34 give it a run when we've got some code.
041c3194
ZW
35o Distinguish integers, floats, and 'other' pp-numbers.
36o Store ints and char constants as binary values.
37o New command-line assertion syntax.
041c3194
ZW
38o Work towards functions in cpperror.c taking a message level parameter.
39 If we do this, merge the common code of do_warning and do_error.
40o Comment all functions, and describe macro expansion algorithm.
41o Move as much out of header files as possible.
42o Remove single quote pairs `', and some '', from diagnostics.
43o Correct pastability test for CPP_NAME and CPP_NUMBER.
44
45*/
46
45b966db
ZW
47#include "config.h"
48#include "system.h"
49#include "intl.h"
50#include "cpplib.h"
51#include "cpphash.h"
041c3194 52#include "symcat.h"
45b966db 53
041c3194
ZW
54#define auto_expand_name_space(list) \
55 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
f2d5f0cc
ZW
56static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
57 size_t, FILE *));
041c3194 58static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
1368ee70 59 unsigned int));
041c3194 60static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
9e62c811 61 unsigned int));
f2d5f0cc 62
041c3194
ZW
63static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
64static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
b8f41010
NB
65 unsigned char *));
66static const unsigned char *backslash_start PARAMS ((cpp_reader *,
67 const unsigned char *));
041c3194
ZW
68static int skip_block_comment PARAMS ((cpp_reader *));
69static int skip_line_comment PARAMS ((cpp_reader *));
b8f41010 70static void skip_whitespace PARAMS ((cpp_reader *, int));
bfb9dc7f
ZW
71static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
72 const U_CHAR *, const U_CHAR *));
73static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
041c3194
ZW
74static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
75 unsigned int));
b8f41010 76static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
f624ffa7
NB
77static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
78 const unsigned char *,
ad265aa4 79 unsigned int, unsigned int));
041c3194
ZW
80static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
81static int lex_next PARAMS ((cpp_reader *, int));
82static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
83 const cpp_token *));
b8f41010 84
041c3194
ZW
85static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
86static void expand_context_stack PARAMS ((cpp_reader *));
d1d9a6bd 87static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
041c3194
ZW
88 unsigned char *));
89static void output_token PARAMS ((cpp_reader *, const cpp_token *,
90 const cpp_token *));
b8f41010
NB
91typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
92 cpp_token *));
041c3194
ZW
93static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
94 unsigned int));
95static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
96static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
97 const cpp_token *));
98static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
99static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
100 const cpp_token *));
101static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
102 const cpp_token *, int *));
103static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
104static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
105static cpp_token *get_temp_token PARAMS ((cpp_reader *));
106static void release_temp_tokens PARAMS ((cpp_reader *));
107static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
108static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
b8f41010 109
bfb9dc7f
ZW
110#define INIT_TOKEN_STR(list, token) \
111 do {(token)->val.str.len = 0; \
112 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
d1d9a6bd 113 } while (0)
b8f41010 114
041c3194
ZW
115#define VALID_SIGN(c, prevc) \
116 (((c) == '+' || (c) == '-') && \
117 ((prevc) == 'e' || (prevc) == 'E' \
118 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
119
b8f41010
NB
120/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
121 character, if any, is in buffer. */
041c3194 122
b8f41010 123#define handle_newline(cur, limit, c) \
041c3194 124 do { \
b8f41010
NB
125 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
126 (cur)++; \
041c3194
ZW
127 pfile->buffer->lineno++; \
128 pfile->buffer->line_base = (cur); \
6ab3e7dd 129 pfile->col_adjust = 0; \
041c3194 130 } while (0)
b8f41010 131
041c3194 132#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
b8f41010
NB
133#define PREV_TOKEN_TYPE (cur_token[-1].type)
134
f617b8e2
NB
135#define PUSH_TOKEN(ttype) cur_token++->type = ttype
136#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
137#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
138#define BACKUP_DIGRAPH(ttype) do { \
139 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
140
141/* An upper bound on the number of bytes needed to spell a token,
142 including preceding whitespace. */
bfb9dc7f
ZW
143#define TOKEN_SPELL(token) token_spellings[(token)->type].type
144#define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
145 ? (token)->val.str.len \
146 : (TOKEN_SPELL(token) == SPELL_IDENT \
147 ? (token)->val.node->length \
148 : 0)))
f617b8e2 149
f617b8e2 150#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
5d7ee2fa
NB
151#define I(e, s) {SPELL_IDENT, s},
152#define S(e, s) {SPELL_STRING, s},
b8f41010
NB
153#define C(e, s) {SPELL_CHAR, s},
154#define N(e, s) {SPELL_NONE, s},
b8f41010 155
041c3194
ZW
156const struct token_spelling
157token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
158
159#undef T
160#undef I
161#undef S
162#undef C
163#undef N
164
165/* For debugging: the internal names of the tokens. */
cf00a885
ZW
166#define T(e, s) U STRINGX(e) + 4,
167#define I(e, s) U STRINGX(e) + 4,
168#define S(e, s) U STRINGX(e) + 4,
169#define C(e, s) U STRINGX(e) + 4,
170#define N(e, s) U STRINGX(e) + 4,
041c3194 171
cf00a885 172const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
b8f41010
NB
173
174#undef T
5d7ee2fa
NB
175#undef I
176#undef S
b8f41010
NB
177#undef C
178#undef N
b8f41010 179
041c3194
ZW
180/* The following table is used by trigraph_ok/trigraph_replace. If we
181 have designated initializers, it can be constant data; otherwise,
182 it is set up at runtime by _cpp_init_input_buffer. */
183
184#if (GCC_VERSION >= 2007)
185#define init_trigraph_map() /* nothing */
186#define TRIGRAPH_MAP \
187__extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
188#define END };
189#define s(p, v) [p] = v,
190#else
191#define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
192 static void init_trigraph_map PARAMS ((void)) { \
193 unsigned char *x = trigraph_map;
194#define END }
195#define s(p, v) x[p] = v;
196#endif
197
198TRIGRAPH_MAP
199 s('=', '#') s(')', ']') s('!', '|')
200 s('(', '[') s('\'', '^') s('>', '}')
201 s('/', '\\') s('<', '{') s('-', '~')
202END
203
204#undef TRIGRAPH_MAP
205#undef END
206#undef s
207
45b966db
ZW
208/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
209
210void
211_cpp_grow_token_buffer (pfile, n)
212 cpp_reader *pfile;
213 long n;
214{
215 long old_written = CPP_WRITTEN (pfile);
216 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
217 pfile->token_buffer = (U_CHAR *)
218 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
219 CPP_SET_WRITTEN (pfile, old_written);
220}
221
f2d5f0cc
ZW
222/* Deal with the annoying semantics of fwrite. */
223static void
224safe_fwrite (pfile, buf, len, fp)
225 cpp_reader *pfile;
226 const U_CHAR *buf;
227 size_t len;
228 FILE *fp;
229{
230 size_t count;
45b966db 231
f2d5f0cc
ZW
232 while (len)
233 {
234 count = fwrite (buf, 1, len, fp);
235 if (count == 0)
236 goto error;
237 len -= count;
238 buf += count;
239 }
240 return;
241
242 error:
243 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
244}
245
246/* Notify the compiler proper that the current line number has jumped,
247 or the current file name has changed. */
248
249static void
1368ee70 250output_line_command (pfile, print, line)
45b966db 251 cpp_reader *pfile;
f2d5f0cc 252 cpp_printer *print;
1368ee70 253 unsigned int line;
45b966db 254{
041c3194 255 cpp_buffer *ip = CPP_BUFFER (pfile);
f2d5f0cc
ZW
256 enum { same = 0, enter, leave, rname } change;
257 static const char * const codes[] = { "", " 1", " 2", "" };
258
041c3194
ZW
259 if (line == 0)
260 return;
261
262 /* End the previous line of text. */
263 if (pfile->need_newline)
264 putc ('\n', print->outf);
265 pfile->need_newline = 0;
266
f2d5f0cc
ZW
267 if (CPP_OPTION (pfile, no_line_commands))
268 return;
269
041c3194
ZW
270 /* If ip is null, we've been called from cpp_finish, and they just
271 needed the final flush and trailing newline. */
272 if (!ip)
273 return;
274
fb753f88 275 if (pfile->include_depth == print->last_id)
54bef41d 276 {
fb753f88
JJ
277 /* Determine whether the current filename has changed, and if so,
278 how. 'nominal_fname' values are unique, so they can be compared
279 by comparing pointers. */
280 if (ip->nominal_fname == print->last_fname)
281 change = same;
282 else
0e500c78 283 change = rname;
fb753f88
JJ
284 }
285 else
286 {
287 if (pfile->include_depth > print->last_id)
288 change = enter;
f2d5f0cc 289 else
fb753f88
JJ
290 change = leave;
291 print->last_id = pfile->include_depth;
45b966db 292 }
fb753f88
JJ
293 print->last_fname = ip->nominal_fname;
294
f2d5f0cc
ZW
295 /* If the current file has not changed, we can output a few newlines
296 instead if we want to increase the line number by a small amount.
297 We cannot do this if print->lineno is zero, because that means we
298 haven't output any line commands yet. (The very first line
299 command output is a `same_file' command.) */
041c3194 300 if (change == same && print->lineno > 0
f2d5f0cc 301 && line >= print->lineno && line < print->lineno + 8)
45b966db 302 {
f2d5f0cc 303 while (line > print->lineno)
45b966db 304 {
f2d5f0cc
ZW
305 putc ('\n', print->outf);
306 print->lineno++;
45b966db 307 }
f2d5f0cc 308 return;
45b966db 309 }
f2d5f0cc
ZW
310
311#ifndef NO_IMPLICIT_EXTERN_C
312 if (CPP_OPTION (pfile, cplusplus))
313 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
314 codes[change],
c31a6508
ZW
315 ip->inc->sysp ? " 3" : "",
316 (ip->inc->sysp == 2) ? " 4" : "");
f2d5f0cc
ZW
317 else
318#endif
319 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
320 codes[change],
c31a6508 321 ip->inc->sysp ? " 3" : "");
f2d5f0cc
ZW
322 print->lineno = line;
323}
324
325/* Write the contents of the token_buffer to the output stream, and
326 clear the token_buffer. Also handles generating line commands and
327 keeping track of file transitions. */
328
329void
041c3194 330cpp_output_tokens (pfile, print, line)
f2d5f0cc
ZW
331 cpp_reader *pfile;
332 cpp_printer *print;
041c3194 333 unsigned int line;
f2d5f0cc 334{
f6fab919
ZW
335 if (CPP_WRITTEN (pfile) - print->written)
336 {
f6fab919
ZW
337 safe_fwrite (pfile, pfile->token_buffer,
338 CPP_WRITTEN (pfile) - print->written, print->outf);
041c3194
ZW
339 pfile->need_newline = 1;
340 if (print->lineno)
341 print->lineno++;
45b966db 342
041c3194 343 CPP_SET_WRITTEN (pfile, print->written);
f2d5f0cc 344 }
041c3194 345 output_line_command (pfile, print, line);
45b966db
ZW
346}
347
c56c2073 348/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
349
350void
351cpp_scan_buffer_nooutput (pfile)
352 cpp_reader *pfile;
353{
f2d5f0cc 354 unsigned int old_written = CPP_WRITTEN (pfile);
041c3194
ZW
355 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
356
f2d5f0cc
ZW
357 for (;;)
358 {
041c3194
ZW
359 /* In no-output mode, we can ignore everything but directives. */
360 const cpp_token *token = cpp_get_token (pfile);
361 if (token->type == CPP_EOF)
362 {
363 cpp_pop_buffer (pfile);
364 if (CPP_BUFFER (pfile) == stop)
365 break;
366 }
367 _cpp_skip_rest_of_line (pfile);
f2d5f0cc
ZW
368 }
369 CPP_SET_WRITTEN (pfile, old_written);
370}
371
c56c2073 372/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
373
374void
375cpp_scan_buffer (pfile, print)
376 cpp_reader *pfile;
377 cpp_printer *print;
378{
c56c2073 379 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
041c3194 380 const cpp_token *token, *prev = 0;
f2d5f0cc
ZW
381
382 for (;;)
383 {
384 token = cpp_get_token (pfile);
041c3194 385 if (token->type == CPP_EOF)
f2d5f0cc 386 {
041c3194
ZW
387 cpp_pop_buffer (pfile);
388 if (CPP_BUFFER (pfile) == stop)
f2d5f0cc 389 return;
041c3194
ZW
390 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
391 prev = 0;
392 continue;
393 }
394
395 if (token->flags & BOL)
396 {
397 cpp_output_tokens (pfile, print, pfile->token_list.line);
398 prev = 0;
f2d5f0cc 399 }
041c3194
ZW
400
401 output_token (pfile, token, prev);
402 prev = token;
f2d5f0cc
ZW
403 }
404}
405
041c3194
ZW
406/* Helper routine used by parse_include, which can't see spell_token.
407 Reinterpret the current line as an h-char-sequence (< ... >); we are
408 looking at the first token after the <. */
409const cpp_token *
410_cpp_glue_header_name (pfile)
45b966db
ZW
411 cpp_reader *pfile;
412{
041c3194
ZW
413 unsigned int written = CPP_WRITTEN (pfile);
414 const cpp_token *t;
415 cpp_token *hdr;
416 U_CHAR *buf;
417 size_t len;
418
419 for (;;)
420 {
421 t = cpp_get_token (pfile);
422 if (t->type == CPP_GREATER || t->type == CPP_EOF)
423 break;
424
425 CPP_RESERVE (pfile, TOKEN_LEN (t));
426 if (t->flags & PREV_WHITE)
427 CPP_PUTC_Q (pfile, ' ');
428 pfile->limit = spell_token (pfile, t, pfile->limit);
429 }
430
431 if (t->type == CPP_EOF)
432 cpp_error (pfile, "missing terminating > character");
45b966db 433
041c3194
ZW
434 len = CPP_WRITTEN (pfile) - written;
435 buf = xmalloc (len);
436 memcpy (buf, pfile->token_buffer + written, len);
437 CPP_SET_WRITTEN (pfile, written);
438
439 hdr = get_temp_token (pfile);
440 hdr->type = CPP_HEADER_NAME;
441 hdr->flags = 0;
bfb9dc7f
ZW
442 hdr->val.str.text = buf;
443 hdr->val.str.len = len;
041c3194 444 return hdr;
45b966db
ZW
445}
446
1368ee70
ZW
447/* Token-buffer helper functions. */
448
d1d9a6bd
NB
449/* Expand a token list's string space. It is *vital* that
450 list->tokens_used is correct, to get pointer fix-up right. */
041c3194
ZW
451void
452_cpp_expand_name_space (list, len)
1368ee70 453 cpp_toklist *list;
c5a04734
ZW
454 unsigned int len;
455{
f617b8e2 456 const U_CHAR *old_namebuf;
f617b8e2
NB
457
458 old_namebuf = list->namebuf;
c5a04734
ZW
459 list->name_cap += len;
460 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
f617b8e2
NB
461
462 /* Fix up token text pointers. */
79f50f2a 463 if (list->namebuf != old_namebuf)
f617b8e2
NB
464 {
465 unsigned int i;
466
467 for (i = 0; i < list->tokens_used; i++)
bfb9dc7f
ZW
468 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
469 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
f617b8e2 470 }
1368ee70
ZW
471}
472
041c3194
ZW
473/* If there is not enough room for LEN more characters, expand the
474 list by just enough to have room for LEN characters. */
475void
476_cpp_reserve_name_space (list, len)
477 cpp_toklist *list;
478 unsigned int len;
479{
480 unsigned int room = list->name_cap - list->name_used;
481
482 if (room < len)
483 _cpp_expand_name_space (list, len - room);
484}
485
1368ee70 486/* Expand the number of tokens in a list. */
d1d9a6bd
NB
487void
488_cpp_expand_token_space (list, count)
1368ee70 489 cpp_toklist *list;
d1d9a6bd 490 unsigned int count;
1368ee70 491{
d1d9a6bd
NB
492 unsigned int n;
493
494 list->tokens_cap += count;
495 n = list->tokens_cap;
15dad1d9 496 if (list->flags & LIST_OFFSET)
d1d9a6bd 497 list->tokens--, n++;
1368ee70 498 list->tokens = (cpp_token *)
d1d9a6bd 499 xrealloc (list->tokens, n * sizeof (cpp_token));
15dad1d9
ZW
500 if (list->flags & LIST_OFFSET)
501 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
502}
503
d1d9a6bd
NB
504/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
505 an extra token in front of the token list, as this allows the lexer
506 to always peek at the previous token without worrying about
507 underflowing the list, and some initial space. Otherwise, no
508 token- or name-space is allocated, and there is no dummy token. */
15dad1d9 509void
d1d9a6bd 510_cpp_init_toklist (list, flags)
1368ee70 511 cpp_toklist *list;
d1d9a6bd 512 int flags;
1368ee70 513{
d1d9a6bd
NB
514 if (flags == NO_DUMMY_TOKEN)
515 {
516 list->tokens_cap = 0;
041c3194 517 list->tokens = 0;
d1d9a6bd 518 list->name_cap = 0;
041c3194 519 list->namebuf = 0;
d1d9a6bd
NB
520 list->flags = 0;
521 }
522 else
523 {
524 /* Initialize token space. Put a dummy token before the start
525 that will fail matches. */
526 list->tokens_cap = 256; /* 4K's worth. */
527 list->tokens = (cpp_token *)
528 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
529 list->tokens[0].type = CPP_EOF;
530 list->tokens++;
531
532 /* Initialize name space. */
533 list->name_cap = 1024;
041c3194 534 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
d1d9a6bd
NB
535 list->flags = LIST_OFFSET;
536 }
15dad1d9 537
15dad1d9
ZW
538 _cpp_clear_toklist (list);
539}
1368ee70 540
15dad1d9
ZW
541/* Clear a token list. */
542void
543_cpp_clear_toklist (list)
544 cpp_toklist *list;
545{
c5a04734
ZW
546 list->tokens_used = 0;
547 list->name_used = 0;
041c3194
ZW
548 list->directive = 0;
549 list->paramc = 0;
550 list->params_len = 0;
15dad1d9
ZW
551 list->flags &= LIST_OFFSET; /* clear all but that one */
552}
553
554/* Free a token list. Does not free the list itself, which may be
555 embedded in a larger structure. */
556void
557_cpp_free_toklist (list)
041c3194 558 const cpp_toklist *list;
15dad1d9 559{
15dad1d9
ZW
560 if (list->flags & LIST_OFFSET)
561 free (list->tokens - 1); /* Backup over dummy token. */
562 else
563 free (list->tokens);
564 free (list->namebuf);
1368ee70
ZW
565}
566
15dad1d9
ZW
567/* Compare two tokens. */
568int
569_cpp_equiv_tokens (a, b)
570 const cpp_token *a, *b;
571{
041c3194
ZW
572 if (a->type == b->type && a->flags == b->flags)
573 switch (token_spellings[a->type].type)
574 {
575 default: /* Keep compiler happy. */
576 case SPELL_OPERATOR:
577 return 1;
578 case SPELL_CHAR:
579 case SPELL_NONE:
580 return a->val.aux == b->val.aux; /* arg_no or character. */
581 case SPELL_IDENT:
bfb9dc7f 582 return a->val.node == b->val.node;
041c3194 583 case SPELL_STRING:
bfb9dc7f
ZW
584 return (a->val.str.len == b->val.str.len
585 && !memcmp (a->val.str.text, b->val.str.text,
586 a->val.str.len));
041c3194 587 }
15dad1d9 588
041c3194 589 return 0;
15dad1d9
ZW
590}
591
592/* Compare two token lists. */
593int
594_cpp_equiv_toklists (a, b)
595 const cpp_toklist *a, *b;
596{
597 unsigned int i;
598
041c3194
ZW
599 if (a->tokens_used != b->tokens_used
600 || a->flags != b->flags
601 || a->paramc != b->paramc)
15dad1d9
ZW
602 return 0;
603
604 for (i = 0; i < a->tokens_used; i++)
605 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
606 return 0;
607 return 1;
608}
609
041c3194 610/* Utility routine:
9e62c811 611
bfb9dc7f
ZW
612 Compares, the token TOKEN to the NUL-terminated string STRING.
613 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 614
041c3194 615int
bfb9dc7f
ZW
616cpp_ideq (token, string)
617 const cpp_token *token;
041c3194
ZW
618 const char *string;
619{
bfb9dc7f 620 if (token->type != CPP_NAME)
041c3194 621 return 0;
bfb9dc7f
ZW
622
623 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
15dad1d9 624}
1368ee70 625
041c3194 626/* Lexing algorithm.
45b966db 627
041c3194
ZW
628 The original lexer in cpplib was made up of two passes: a first pass
629 that replaced trigraphs and deleted esacped newlines, and a second
630 pass that tokenized the result of the first pass. Tokenisation was
631 performed by peeking at the next character in the input stream. For
632 example, if the input stream contained "!=", the handler for the !
633 character would peek at the next character, and if it were a '='
634 would skip over it, and return a "!=" token, otherwise it would
635 return just the "!" token.
61474454 636
041c3194
ZW
637 To implement a single-pass lexer, this peeking ahead is unworkable.
638 An arbitrary number of escaped newlines, and trigraphs (in particular
639 ??/ which translates to the escape \), could separate the '!' and '='
640 in the input stream, yet the next token is still a "!=".
61474454 641
041c3194
ZW
642 Suppose instead that we lex by one logical line at a time, producing
643 a token list or stack for each logical line, and when seeing the '!'
644 push a CPP_NOT token on the list. Then if the '!' is part of a
645 longer token ("!=") we know we must see the remainder of the token by
646 the time we reach the end of the logical line. Thus we can have the
647 '=' handler look at the previous token (at the end of the list / top
648 of the stack) and see if it is a "!" token, and if so, instead of
649 pushing a "=" token revise the existing token to be a "!=" token.
61474454 650
041c3194
ZW
651 This works in the presence of escaped newlines, because the '\' would
652 have been pushed on the top of the stack as a CPP_BACKSLASH. The
653 newline ('\n' or '\r') handler looks at the token at the top of the
654 stack to see if it is a CPP_BACKSLASH, and if so discards both.
655 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
656 the '=' handler would never see any intervening escaped newlines.
45b966db 657
041c3194
ZW
658 To make trigraphs work in this context, as in precedence trigraphs
659 are highest and converted before anything else, the '?' handler does
660 lookahead to see if it is a trigraph, and if so skips the trigraph
661 and pushes the token it represents onto the top of the stack. This
662 also works in the particular case of a CPP_BACKSLASH trigraph.
45b966db 663
041c3194
ZW
664 To the preprocessor, whitespace is only significant to the point of
665 knowing whether whitespace precedes a particular token. For example,
666 the '=' handler needs to know whether there was whitespace between it
667 and a "!" token on the top of the stack, to make the token conversion
668 decision correctly. So each token has a PREV_WHITE flag to
669 indicate this - the standard permits consecutive whitespace to be
670 regarded as a single space. The compiler front ends are not
671 interested in whitespace at all; they just require a token stream.
672 Another place where whitespace is significant to the preprocessor is
673 a #define statment - if there is whitespace between the macro name
674 and an initial "(" token the macro is "object-like", otherwise it is
675 a function-like macro that takes arguments.
676
677 However, all is not rosy. Parsing of identifiers, numbers, comments
678 and strings becomes trickier because of the possibility of raw
679 trigraphs and escaped newlines in the input stream.
680
681 The trigraphs are three consecutive characters beginning with two
682 question marks. A question mark is not valid as part of a number or
683 identifier, so parsing of a number or identifier terminates normally
684 upon reaching it, returning to the mainloop which handles the
685 trigraph just like it would in any other position. Similarly for the
686 backslash of a backslash-newline combination. So we just need the
687 escaped-newline dropper in the mainloop to check if the token on the
688 top of the stack after dropping the escaped newline is a number or
689 identifier, and if so to continue the processing it as if nothing had
690 happened.
691
692 For strings, we replace trigraphs whenever we reach a quote or
693 newline, because there might be a backslash trigraph escaping them.
694 We need to be careful that we start trigraph replacing from where we
695 left off previously, because it is possible for a first scan to leave
696 "fake" trigraphs that a second scan would pick up as real (e.g. the
697 sequence "????/\n=" would find a fake ??= trigraph after removing the
698 escaped newline.)
699
700 For line comments, on reaching a newline we scan the previous
701 character(s) to see if it escaped, and continue if it is. Block
702 comments ignore everything and just focus on finding the comment
703 termination mark. The only difficult thing, and it is surprisingly
704 tricky, is checking if an asterisk precedes the final slash since
705 they could be separated by escaped newlines. If the preprocessor is
706 invoked with the output comments option, we don't bother removing
707 escaped newlines and replacing trigraphs for output.
708
709 Finally, numbers can begin with a period, which is pushed initially
710 as a CPP_DOT token in its own right. The digit handler checks if the
711 previous token was a CPP_DOT not separated by whitespace, and if so
712 pops it off the stack and pushes a period into the number's buffer
713 before calling the number parser.
45b966db 714
041c3194
ZW
715*/
716
717static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
718 U":>", U"<%", U"%>"};
45b966db 719
041c3194
ZW
720/* Call when a trigraph is encountered. It warns if necessary, and
721 returns true if the trigraph should be honoured. END is the third
722 character of a trigraph in the input stream. */
45b966db 723static int
041c3194 724trigraph_ok (pfile, end)
45b966db 725 cpp_reader *pfile;
041c3194 726 const unsigned char *end;
45b966db 727{
041c3194
ZW
728 int accept = CPP_OPTION (pfile, trigraphs);
729
730 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db 731 {
041c3194
ZW
732 unsigned int col = end - 1 - pfile->buffer->line_base;
733 if (accept)
734 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
735 "trigraph ??%c converted to %c",
736 (int) *end, (int) trigraph_map[*end]);
45b966db 737 else
041c3194
ZW
738 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
739 "trigraph ??%c ignored", (int) *end);
45b966db 740 }
041c3194 741 return accept;
45b966db
ZW
742}
743
041c3194
ZW
744/* Scan a string for trigraphs, warning or replacing them inline as
745 appropriate. When parsing a string, we must call this routine
746 before processing a newline character (if trigraphs are enabled),
747 since the newline might be escaped by a preceding backslash
748 trigraph sequence. Returns a pointer to the end of the name after
749 replacement. */
750
751static unsigned char *
752trigraph_replace (pfile, src, limit)
45b966db 753 cpp_reader *pfile;
041c3194
ZW
754 unsigned char *src;
755 unsigned char *limit;
45b966db 756{
041c3194
ZW
757 unsigned char *dest;
758
759 /* Starting with src[1], find two consecutive '?'. The case of no
760 trigraphs is streamlined. */
761
043afb2a 762 for (src++; src + 1 < limit; src += 2)
041c3194
ZW
763 {
764 if (src[0] != '?')
765 continue;
766
767 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
768 if (src[-1] == '?')
769 src--;
770 else if (src + 2 == limit || src[1] != '?')
771 continue;
45b966db 772
041c3194
ZW
773 /* Check if it really is a trigraph. */
774 if (trigraph_map[src[2]] == 0)
775 continue;
45b966db 776
041c3194
ZW
777 dest = src;
778 goto trigraph_found;
779 }
780 return limit;
45b966db 781
041c3194
ZW
782 /* Now we have a trigraph, we need to scan the remaining buffer, and
783 copy-shifting its contents left if replacement is enabled. */
784 for (; src + 2 < limit; dest++, src++)
785 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
786 {
787 trigraph_found:
788 src += 2;
789 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
790 *dest = trigraph_map[*src];
791 }
792
793 /* Copy remaining (at most 2) characters. */
794 while (src < limit)
795 *dest++ = *src++;
796 return dest;
45b966db
ZW
797}
798
041c3194
ZW
799/* If CUR is a backslash or the end of a trigraphed backslash, return
800 a pointer to its beginning, otherwise NULL. We don't read beyond
801 the buffer start, because there is the start of the comment in the
802 buffer. */
803static const unsigned char *
804backslash_start (pfile, cur)
64aaf407 805 cpp_reader *pfile;
041c3194 806 const unsigned char *cur;
64aaf407 807{
041c3194
ZW
808 if (cur[0] == '\\')
809 return cur;
810 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
811 && trigraph_ok (pfile, cur))
812 return cur - 2;
813 return 0;
64aaf407
NB
814}
815
041c3194
ZW
816/* Skip a C-style block comment. This is probably the trickiest
817 handler. We find the end of the comment by seeing if an asterisk
818 is before every '/' we encounter. The nasty complication is that a
819 previous asterisk may be separated by one or more escaped newlines.
820 Returns non-zero if comment terminated by EOF, zero otherwise. */
821static int
822skip_block_comment (pfile)
45b966db
ZW
823 cpp_reader *pfile;
824{
041c3194
ZW
825 cpp_buffer *buffer = pfile->buffer;
826 const unsigned char *char_after_star = 0;
827 register const unsigned char *cur = buffer->cur;
828 int seen_eof = 0;
829
830 /* Inner loop would think the comment has ended if the first comment
831 character is a '/'. Avoid this and keep the inner loop clean by
832 skipping such a character. */
833 if (cur < buffer->rlimit && cur[0] == '/')
834 cur++;
64aaf407 835
041c3194 836 for (; cur < buffer->rlimit; )
45b966db 837 {
041c3194
ZW
838 unsigned char c = *cur++;
839
840 /* People like decorating comments with '*', so check for
841 '/' instead for efficiency. */
842 if (c == '/')
45b966db 843 {
041c3194
ZW
844 if (cur[-2] == '*' || cur - 1 == char_after_star)
845 goto out;
846
847 /* Warn about potential nested comments, but not when
848 the final character inside the comment is a '/'.
849 Don't bother to get it right across escaped newlines. */
850 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
851 && cur[0] == '*' && cur[1] != '/')
45b966db 852 {
041c3194
ZW
853 buffer->cur = cur;
854 cpp_warning (pfile, "'/*' within comment");
45b966db 855 }
45b966db 856 }
91fcd158 857 else if (is_vspace (c))
45b966db 858 {
041c3194
ZW
859 const unsigned char* bslash = backslash_start (pfile, cur - 2);
860
861 handle_newline (cur, buffer->rlimit, c);
862 /* Work correctly if there is an asterisk before an
863 arbirtrarily long sequence of escaped newlines. */
864 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
865 char_after_star = cur;
866 else
867 char_after_star = 0;
45b966db 868 }
45b966db 869 }
041c3194
ZW
870 seen_eof = 1;
871
64aaf407 872 out:
041c3194
ZW
873 buffer->cur = cur;
874 return seen_eof;
45b966db
ZW
875}
876
041c3194
ZW
877/* Skip a C++ or Chill line comment. Handles escaped newlines.
878 Returns non-zero if a multiline comment. */
879static int
880skip_line_comment (pfile)
45b966db
ZW
881 cpp_reader *pfile;
882{
041c3194
ZW
883 cpp_buffer *buffer = pfile->buffer;
884 register const unsigned char *cur = buffer->cur;
885 int multiline = 0;
886
887 for (; cur < buffer->rlimit; )
888 {
889 unsigned char c = *cur++;
890
91fcd158 891 if (is_vspace (c))
45b966db 892 {
041c3194
ZW
893 /* Check for a (trigaph?) backslash escaping the newline. */
894 if (!backslash_start (pfile, cur - 2))
895 goto out;
896 multiline = 1;
897 handle_newline (cur, buffer->rlimit, c);
898 }
899 }
900 cur++;
45b966db 901
041c3194
ZW
902 out:
903 buffer->cur = cur - 1; /* Leave newline for caller. */
904 return multiline;
905}
45b966db 906
041c3194
ZW
907/* Skips whitespace, stopping at next non-whitespace character.
908 Adjusts pfile->col_adjust to account for tabs. This enables tokens
909 to be assigned the correct column. */
910static void
911skip_whitespace (pfile, in_directive)
912 cpp_reader *pfile;
913 int in_directive;
914{
915 cpp_buffer *buffer = pfile->buffer;
91fcd158 916 unsigned short warned = 0;
45b966db 917
91fcd158
NB
918 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
919 while (buffer->cur < buffer->rlimit)
041c3194 920 {
91fcd158 921 unsigned char c = *buffer->cur;
45b966db 922
91fcd158
NB
923 if (!is_nvspace (c))
924 break;
925
926 buffer->cur++;
927 /* Horizontal space always OK. */
928 if (c == ' ')
929 continue;
930 else if (c == '\t')
931 pfile->col_adjust += CPP_OPTION (pfile, tabstop) - 1
932 - (CPP_BUF_COL (buffer) - 1) % CPP_OPTION(pfile, tabstop);
933 /* Must be \f \v or \0. */
934 else if (c == '\0')
041c3194 935 {
91fcd158
NB
936 if (!warned)
937 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
938 CPP_BUF_COL (buffer),
939 "embedded null character ignored");
940 warned = 1;
45b966db 941 }
041c3194 942 else if (in_directive && CPP_PEDANTIC (pfile))
91fcd158
NB
943 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
944 CPP_BUF_COL (buffer),
945 "%s in preprocessing directive",
946 c == '\f' ? "form feed" : "vertical tab");
45b966db 947 }
041c3194 948}
45b966db 949
041c3194 950/* Parse (append) an identifier. */
bfb9dc7f
ZW
951static inline const U_CHAR *
952parse_name (pfile, tok, cur, rlimit)
45b966db 953 cpp_reader *pfile;
bfb9dc7f
ZW
954 cpp_token *tok;
955 const U_CHAR *cur, *rlimit;
45b966db 956{
bfb9dc7f
ZW
957 const U_CHAR *name = cur;
958 unsigned int len;
041c3194 959
bfb9dc7f 960 while (cur < rlimit)
041c3194 961 {
bfb9dc7f
ZW
962 if (! is_idchar (*cur))
963 break;
e5ec2402
ZW
964 /* $ is not a legal identifier character in the standard, but is
965 commonly accepted as an extension. Don't warn about it in
966 skipped conditional blocks. */
bfb9dc7f 967 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
041c3194 968 {
bfb9dc7f 969 CPP_BUFFER (pfile)->cur = cur;
041c3194
ZW
970 cpp_pedwarn (pfile, "'$' character in identifier");
971 }
bfb9dc7f 972 cur++;
041c3194 973 }
bfb9dc7f 974 len = cur - name;
45b966db 975
bfb9dc7f 976 if (tok->val.node)
041c3194 977 {
bfb9dc7f
ZW
978 unsigned int oldlen = tok->val.node->length;
979 U_CHAR *newname = alloca (oldlen + len);
980 memcpy (newname, tok->val.node->name, oldlen);
981 memcpy (newname + oldlen, name, len);
982 len += oldlen;
983 name = newname;
041c3194
ZW
984 }
985
bfb9dc7f
ZW
986 tok->val.node = cpp_lookup (pfile, name, len);
987 return cur;
45b966db
ZW
988}
989
041c3194 990/* Parse (append) a number. */
45b966db 991static void
041c3194 992parse_number (pfile, list, name)
45b966db 993 cpp_reader *pfile;
041c3194 994 cpp_toklist *list;
bfb9dc7f 995 cpp_string *name;
45b966db 996{
041c3194
ZW
997 const unsigned char *name_limit;
998 unsigned char *namebuf;
999 cpp_buffer *buffer = pfile->buffer;
1000 register const unsigned char *cur = buffer->cur;
45b966db 1001
041c3194
ZW
1002 expanded:
1003 name_limit = list->namebuf + list->name_cap;
1004 namebuf = list->namebuf + list->name_used;
45b966db 1005
041c3194
ZW
1006 for (; cur < buffer->rlimit && namebuf < name_limit; )
1007 {
1008 unsigned char c = *namebuf = *cur; /* Copy a single char. */
45b966db 1009
041c3194
ZW
1010 /* Perhaps we should accept '$' here if we accept it for
1011 identifiers. We know namebuf[-1] is safe, because for c to
1012 be a sign we must have pushed at least one character. */
1013 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1014 goto out;
45b966db 1015
041c3194
ZW
1016 namebuf++;
1017 cur++;
45b966db 1018 }
64aaf407 1019
041c3194
ZW
1020 /* Run out of name space? */
1021 if (cur < buffer->rlimit)
1022 {
1023 list->name_used = namebuf - list->namebuf;
1024 auto_expand_name_space (list);
1025 goto expanded;
1026 }
1027
64aaf407 1028 out:
041c3194
ZW
1029 buffer->cur = cur;
1030 name->len = namebuf - name->text;
1031 list->name_used = namebuf - list->namebuf;
45b966db
ZW
1032}
1033
041c3194 1034/* Places a string terminated by an unescaped TERMINATOR into a
bfb9dc7f 1035 cpp_string, which should be expandable and thus at the top of the
041c3194
ZW
1036 list's stack. Handles embedded trigraphs, if necessary, and
1037 escaped newlines.
45b966db 1038
041c3194
ZW
1039 Can be used for character constants (terminator = '\''), string
1040 constants ('"') and angled headers ('>'). Multi-line strings are
1041 allowed, except for within directives. */
45b966db 1042
041c3194
ZW
1043static void
1044parse_string (pfile, list, token, terminator)
45b966db 1045 cpp_reader *pfile;
041c3194
ZW
1046 cpp_toklist *list;
1047 cpp_token *token;
1048 unsigned int terminator;
45b966db 1049{
041c3194 1050 cpp_buffer *buffer = pfile->buffer;
bfb9dc7f 1051 cpp_string *name = &token->val.str;
041c3194
ZW
1052 register const unsigned char *cur = buffer->cur;
1053 const unsigned char *name_limit;
1054 unsigned char *namebuf;
1055 unsigned int null_count = 0;
1056 unsigned int trigraphed = list->name_used;
45b966db 1057
041c3194
ZW
1058 expanded:
1059 name_limit = list->namebuf + list->name_cap;
1060 namebuf = list->namebuf + list->name_used;
f2d5f0cc 1061
041c3194 1062 for (; cur < buffer->rlimit && namebuf < name_limit; )
45b966db 1063 {
041c3194 1064 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
45b966db 1065
041c3194
ZW
1066 if (c == '\0')
1067 null_count++;
91fcd158 1068 else if (c == terminator || is_vspace (c))
45b966db 1069 {
041c3194
ZW
1070 /* Needed for trigraph_replace and multiline string warning. */
1071 buffer->cur = cur;
5eec0563 1072
041c3194
ZW
1073 /* Scan for trigraphs before checking if backslash-escaped. */
1074 if ((CPP_OPTION (pfile, trigraphs)
1075 || CPP_OPTION (pfile, warn_trigraphs))
1076 && namebuf - (list->namebuf + trigraphed) >= 3)
45b966db 1077 {
041c3194
ZW
1078 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1079 namebuf);
1080 /* The test above guarantees trigraphed will be positive. */
1081 trigraphed = namebuf - list->namebuf - 2;
45b966db 1082 }
45b966db 1083
041c3194 1084 namebuf--; /* Drop the newline / terminator from the name. */
91fcd158 1085 if (is_vspace (c))
45b966db 1086 {
041c3194
ZW
1087 /* Drop a backslash newline, and continue. */
1088 if (namebuf[-1] == '\\')
45b966db 1089 {
041c3194
ZW
1090 handle_newline (cur, buffer->rlimit, c);
1091 namebuf--;
1092 continue;
45b966db 1093 }
45b966db 1094
041c3194 1095 cur--;
45b966db 1096
041c3194
ZW
1097 /* In Fortran and assembly language, silently terminate
1098 strings of either variety at end of line. This is a
1099 kludge around not knowing where comments are in these
1100 languages. */
1101 if (CPP_OPTION (pfile, lang_fortran)
1102 || CPP_OPTION (pfile, lang_asm))
1103 goto out;
64aaf407 1104
041c3194
ZW
1105 /* Character constants, headers and asserts may not
1106 extend over multiple lines. In Standard C, neither
1107 may strings. We accept multiline strings as an
1108 extension. (Even in directives - otherwise, glibc's
1109 longlong.h breaks.) */
1110 if (terminator != '"')
1111 goto unterminated;
1112
1113 cur++; /* Move forwards again. */
45b966db 1114
041c3194
ZW
1115 if (pfile->multiline_string_line == 0)
1116 {
1117 pfile->multiline_string_line = token->line;
1118 pfile->multiline_string_column = token->col;
1119 if (CPP_PEDANTIC (pfile))
1120 cpp_pedwarn (pfile, "multi-line string constant");
1121 }
1122
1123 *namebuf++ = '\n';
1124 handle_newline (cur, buffer->rlimit, c);
45b966db
ZW
1125 }
1126 else
1127 {
041c3194 1128 unsigned char *temp;
45b966db 1129
041c3194
ZW
1130 /* An odd number of consecutive backslashes represents
1131 an escaped terminator. */
1132 temp = namebuf - 1;
1133 while (temp >= name->text && *temp == '\\')
1134 temp--;
45b966db 1135
041c3194
ZW
1136 if ((namebuf - temp) & 1)
1137 goto out;
1138 namebuf++;
1139 }
45b966db 1140 }
ff2b53ef 1141 }
45b966db 1142
041c3194
ZW
1143 /* Run out of name space? */
1144 if (cur < buffer->rlimit)
45b966db 1145 {
041c3194
ZW
1146 list->name_used = namebuf - list->namebuf;
1147 auto_expand_name_space (list);
1148 goto expanded;
1149 }
45b966db 1150
041c3194
ZW
1151 /* We may not have trigraph-replaced the input for this code path,
1152 but as the input is in error by being unterminated we don't
1153 bother. Prevent warnings about no newlines at EOF. */
91fcd158 1154 if (is_vspace (cur[-1]))
041c3194 1155 cur--;
45b966db 1156
041c3194
ZW
1157 unterminated:
1158 cpp_error (pfile, "missing terminating %c character", (int) terminator);
476f2869 1159
041c3194
ZW
1160 if (terminator == '\"' && pfile->multiline_string_line != list->line
1161 && pfile->multiline_string_line != 0)
1162 {
1163 cpp_error_with_line (pfile, pfile->multiline_string_line,
1164 pfile->multiline_string_column,
1165 "possible start of unterminated string literal");
1166 pfile->multiline_string_line = 0;
45b966db 1167 }
041c3194
ZW
1168
1169 out:
1170 buffer->cur = cur;
1171 name->len = namebuf - name->text;
1172 list->name_used = namebuf - list->namebuf;
45b966db 1173
041c3194
ZW
1174 if (null_count > 0)
1175 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1176 : "null character preserved"));
45b966db
ZW
1177}
1178
041c3194
ZW
1179/* The character TYPE helps us distinguish comment types: '*' = C
1180 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
1181 the stored comment includes the comment start and any terminator. */
1182
1183#define COMMENT_START_LEN 2
9e62c811 1184static void
041c3194
ZW
1185save_comment (list, token, from, len, type)
1186 cpp_toklist *list;
1187 cpp_token *token;
1188 const unsigned char *from;
9e62c811 1189 unsigned int len;
041c3194 1190 unsigned int type;
9e62c811 1191{
041c3194
ZW
1192 unsigned char *buffer;
1193
1194 len += COMMENT_START_LEN;
9e62c811 1195
041c3194
ZW
1196 if (list->name_used + len > list->name_cap)
1197 _cpp_expand_name_space (list, len);
9e62c811 1198
bfb9dc7f 1199 INIT_TOKEN_STR (list, token);
041c3194 1200 token->type = CPP_COMMENT;
bfb9dc7f 1201 token->val.str.len = len;
45b966db 1202
041c3194
ZW
1203 buffer = list->namebuf + list->name_used;
1204 list->name_used += len;
45b966db 1205
041c3194
ZW
1206 /* Copy the comment. */
1207 if (type == '*')
45b966db 1208 {
041c3194
ZW
1209 *buffer++ = '/';
1210 *buffer++ = '*';
45b966db 1211 }
041c3194 1212 else
ea4a453b 1213 {
041c3194
ZW
1214 *buffer++ = type;
1215 *buffer++ = type;
ea4a453b 1216 }
041c3194 1217 memcpy (buffer, from, len - COMMENT_START_LEN);
45b966db
ZW
1218}
1219
041c3194
ZW
1220/*
1221 * The tokenizer's main loop. Returns a token list, representing a
1222 * logical line in the input file. On EOF after some tokens have
1223 * been processed, we return immediately. Then in next call, or if
1224 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1225 * token is placed in the list.
1226 *
1227 * Implementation relies almost entirely on lookback, rather than
1228 * looking forwards. This means that tokenization requires just
1229 * a single pass of the file, even in the presence of trigraphs and
1230 * escaped newlines, providing significant performance benefits.
1231 * Trigraph overhead is negligible if they are disabled, and low
1232 * even when enabled.
1233 */
1234
91fcd158 1235#define KNOWN_DIRECTIVE() (list->directive != 0)
041c3194
ZW
1236#define MIGHT_BE_DIRECTIVE() \
1237(cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
45b966db 1238
041c3194
ZW
1239static void
1240lex_line (pfile, list)
45b966db 1241 cpp_reader *pfile;
041c3194 1242 cpp_toklist *list;
45b966db 1243{
041c3194
ZW
1244 cpp_token *cur_token, *token_limit, *first;
1245 cpp_buffer *buffer = pfile->buffer;
1246 const unsigned char *cur = buffer->cur;
1247 unsigned char flags = 0;
1248 unsigned int first_token = list->tokens_used;
45b966db 1249
041c3194
ZW
1250 if (!(list->flags & LIST_OFFSET))
1251 (abort) ();
1252
1253 list->file = buffer->nominal_fname;
1254 list->line = CPP_BUF_LINE (buffer);
1255 pfile->col_adjust = 0;
1256 pfile->in_lex_line = 1;
1257 if (cur == buffer->buf)
1258 list->flags |= BEG_OF_FILE;
1259
1260 expanded:
1261 token_limit = list->tokens + list->tokens_cap;
1262 cur_token = list->tokens + list->tokens_used;
45b966db 1263
041c3194 1264 for (; cur < buffer->rlimit && cur_token < token_limit;)
45b966db 1265 {
041c3194 1266 unsigned char c;
45b966db 1267
91fcd158
NB
1268 /* Optimize non-vertical whitespace skipping; most tokens are
1269 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1270 c = *cur;
1271 if (is_nvspace (c))
45b966db 1272 {
91fcd158
NB
1273 buffer->cur = cur;
1274 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1275 && cur_token > &list->tokens[first_token]));
041c3194 1276 cur = buffer->cur;
ff2b53ef 1277
041c3194
ZW
1278 flags = PREV_WHITE;
1279 if (cur == buffer->rlimit)
1280 break;
91fcd158 1281 c = *cur;
45b966db 1282 }
91fcd158 1283 cur++;
45b966db 1284
041c3194
ZW
1285 /* Initialize current token. CPP_EOF will not be fixed up by
1286 expand_name_space. */
1287 list->tokens_used = cur_token - list->tokens + 1;
1288 cur_token->type = CPP_EOF;
1289 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1290 cur_token->line = CPP_BUF_LINE (buffer);
1291 cur_token->flags = flags;
1292 flags = 0;
46d07497 1293
041c3194
ZW
1294 switch (c)
1295 {
1296 case '0': case '1': case '2': case '3': case '4':
1297 case '5': case '6': case '7': case '8': case '9':
1298 {
1299 int prev_dot;
46d07497 1300
041c3194
ZW
1301 cur--; /* Backup character. */
1302 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1303 if (prev_dot)
1304 cur_token--;
bfb9dc7f 1305 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1306 /* Prepend an immediately previous CPP_DOT token. */
1307 if (prev_dot)
1308 {
1309 if (list->name_cap == list->name_used)
1310 auto_expand_name_space (list);
46d07497 1311
bfb9dc7f 1312 cur_token->val.str.len = 1;
041c3194
ZW
1313 list->namebuf[list->name_used++] = '.';
1314 }
46d07497 1315
041c3194
ZW
1316 continue_number:
1317 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1318 buffer->cur = cur;
bfb9dc7f 1319 parse_number (pfile, list, &cur_token->val.str);
041c3194
ZW
1320 cur = buffer->cur;
1321 }
1322 /* Check for # 123 form of #line. */
1323 if (MIGHT_BE_DIRECTIVE ())
1324 list->directive = _cpp_check_linemarker (pfile, cur_token,
1325 !(cur_token[-1].flags
1326 & PREV_WHITE));
1327 cur_token++;
1328 break;
46d07497 1329
041c3194
ZW
1330 letter:
1331 case '_':
1332 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1333 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1334 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1335 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1336 case 'y': case 'z':
1337 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1338 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1339 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1340 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1341 case 'Y': case 'Z':
1342 cur--; /* Backup character. */
bfb9dc7f 1343 cur_token->val.node = 0;
041c3194
ZW
1344 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1345
1346 continue_name:
bfb9dc7f 1347 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
45b966db 1348
041c3194
ZW
1349 if (MIGHT_BE_DIRECTIVE ())
1350 list->directive = _cpp_check_directive (pfile, cur_token,
1351 !(list->tokens[0].flags
1352 & PREV_WHITE));
1353 cur_token++;
1354 break;
f8f769ea 1355
041c3194
ZW
1356 case '\'':
1357 /* Character constants are not recognized when processing Fortran,
1358 or if -traditional. */
1359 if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1360 goto other;
45b966db 1361
041c3194
ZW
1362 /* Fall through. */
1363 case '\"':
1364 /* Traditionally, escaped strings are not strings. */
1365 if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1366 && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1367 goto other;
04e3ec78 1368
041c3194
ZW
1369 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1370 /* Do we have a wide string? */
1371 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
bfb9dc7f 1372 && cur_token[-1].val.node == pfile->spec_nodes->n_L
041c3194 1373 && !CPP_TRADITIONAL (pfile))
04e3ec78 1374 {
041c3194 1375 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
04e3ec78 1376 }
45b966db 1377
041c3194
ZW
1378 do_parse_string:
1379 /* Here c is one of ' " or >. */
bfb9dc7f 1380 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1381 buffer->cur = cur;
1382 parse_string (pfile, list, cur_token, c);
1383 cur = buffer->cur;
1384 cur_token++;
1385 break;
45b966db 1386
041c3194
ZW
1387 case '/':
1388 cur_token->type = CPP_DIV;
1389 if (IMMED_TOKEN ())
1390 {
1391 if (PREV_TOKEN_TYPE == CPP_DIV)
1392 {
1393 /* We silently allow C++ comments in system headers,
1394 irrespective of conformance mode, because lots of
1395 broken systems do that and trying to clean it up
1396 in fixincludes is a nightmare. */
1397 if (CPP_IN_SYSTEM_HEADER (pfile))
1398 goto do_line_comment;
1399 else if (CPP_OPTION (pfile, cplusplus_comments))
1400 {
1401 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1402 && ! buffer->warned_cplusplus_comments)
1403 {
1404 buffer->cur = cur;
1405 cpp_pedwarn (pfile,
1406 "C++ style comments are not allowed in ISO C89");
1407 cpp_pedwarn (pfile,
1408 "(this will be reported only once per input file)");
1409 buffer->warned_cplusplus_comments = 1;
1410 }
1411 do_line_comment:
1412 buffer->cur = cur;
1413#if 0 /* Leave until new lexer in place. */
1414 if (cur[-2] != c)
1415 cpp_warning (pfile,
1416 "comment start split across lines");
1417#endif
1418 if (skip_line_comment (pfile))
1419 cpp_warning (pfile, "multi-line comment");
f8f769ea 1420
041c3194
ZW
1421 /* Back-up to first '-' or '/'. */
1422 cur_token--;
1423 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1424 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1425 || (list->directive->flags & COMMENTS)))
1426 save_comment (list, cur_token++, cur,
1427 buffer->cur - cur, c);
1428 else if (!CPP_OPTION (pfile, traditional))
1429 flags = PREV_WHITE;
1430
1431 cur = buffer->cur;
1432 break;
1433 }
1434 }
1435 }
1436 cur_token++;
1437 break;
1438
1439 case '*':
1440 cur_token->type = CPP_MULT;
1441 if (IMMED_TOKEN ())
45b966db 1442 {
041c3194
ZW
1443 if (PREV_TOKEN_TYPE == CPP_DIV)
1444 {
1445 buffer->cur = cur;
1446#if 0 /* Leave until new lexer in place. */
1447 if (cur[-2] != '/')
1448 cpp_warning (pfile,
1449 "comment start '/*' split across lines");
1450#endif
1451 if (skip_block_comment (pfile))
1452 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1453 "unterminated comment");
1454#if 0 /* Leave until new lexer in place. */
1455 else if (buffer->cur[-2] != '*')
1456 cpp_warning (pfile,
1457 "comment end '*/' split across lines");
1458#endif
1459 /* Back up to opening '/'. */
1460 cur_token--;
1461 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1462 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1463 || (list->directive->flags & COMMENTS)))
1464 save_comment (list, cur_token++, cur,
1465 buffer->cur - cur, c);
1466 else if (!CPP_OPTION (pfile, traditional))
1467 flags = PREV_WHITE;
1468
1469 cur = buffer->cur;
1470 break;
1471 }
1472 else if (CPP_OPTION (pfile, cplusplus))
1473 {
1474 /* In C++, there are .* and ->* operators. */
1475 if (PREV_TOKEN_TYPE == CPP_DEREF)
1476 BACKUP_TOKEN (CPP_DEREF_STAR);
1477 else if (PREV_TOKEN_TYPE == CPP_DOT)
1478 BACKUP_TOKEN (CPP_DOT_STAR);
1479 }
45b966db 1480 }
041c3194 1481 cur_token++;
f8f769ea 1482 break;
45b966db 1483
041c3194
ZW
1484 case '\n':
1485 case '\r':
1486 handle_newline (cur, buffer->rlimit, c);
1487 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
45b966db 1488 {
041c3194 1489 if (IMMED_TOKEN ())
45b966db 1490 {
041c3194
ZW
1491 /* Remove the escaped newline. Then continue to process
1492 any interrupted name or number. */
1493 cur_token--;
1494 /* Backslash-newline may not be immediately followed by
1495 EOF (C99 5.1.1.2). */
1496 if (cur >= buffer->rlimit)
1497 {
1498 cpp_pedwarn (pfile, "backslash-newline at end of file");
1499 break;
1500 }
1501 if (IMMED_TOKEN ())
1502 {
1503 cur_token--;
1504 if (cur_token->type == CPP_NAME)
1505 goto continue_name;
1506 else if (cur_token->type == CPP_NUMBER)
1507 goto continue_number;
1508 cur_token++;
1509 }
1510 /* Remember whitespace setting. */
1511 flags = cur_token->flags;
f8f769ea 1512 break;
45b966db 1513 }
041c3194
ZW
1514 else
1515 {
1516 buffer->cur = cur;
1517 cpp_warning (pfile,
1518 "backslash and newline separated by space");
1519 }
1520 }
1521 else if (MIGHT_BE_DIRECTIVE ())
1522 {
1523 /* "Null directive." C99 6.10.7: A preprocessing
1524 directive of the form # <new-line> has no effect.
1525
1526 But it is still a directive, and therefore disappears
1527 from the output. */
1528 cur_token--;
1529 if (cur_token->flags & PREV_WHITE)
45b966db 1530 {
041c3194
ZW
1531 if (CPP_WTRADITIONAL (pfile))
1532 cpp_warning (pfile,
1533 "K+R C ignores #\\n with the # indented");
1534 if (CPP_TRADITIONAL (pfile))
1535 cur_token++;
45b966db 1536 }
f8f769ea 1537 }
45b966db 1538
041c3194
ZW
1539 /* Skip vertical space until we have at least one token to
1540 return. */
1541 if (cur_token != &list->tokens[first_token])
1542 goto out;
1543 list->line = CPP_BUF_LINE (buffer);
f8f769ea 1544 break;
04e3ec78 1545
041c3194
ZW
1546 case '-':
1547 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1548 {
1549 if (CPP_OPTION (pfile, chill))
1550 goto do_line_comment;
1551 REVISE_TOKEN (CPP_MINUS_MINUS);
1552 }
1553 else
1554 PUSH_TOKEN (CPP_MINUS);
1555 break;
45b966db 1556
041c3194
ZW
1557 make_hash:
1558 case '#':
1559 /* The digraph flag checking ensures that ## and %:%:
1560 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1561 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1562 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1563 REVISE_TOKEN (CPP_PASTE);
1564 else
1565 PUSH_TOKEN (CPP_HASH);
1566 break;
04e3ec78 1567
041c3194
ZW
1568 case ':':
1569 cur_token->type = CPP_COLON;
1570 if (IMMED_TOKEN ())
1571 {
1572 if (PREV_TOKEN_TYPE == CPP_COLON
1573 && CPP_OPTION (pfile, cplusplus))
1574 BACKUP_TOKEN (CPP_SCOPE);
1575 /* Digraph: "<:" is a '[' */
1576 else if (PREV_TOKEN_TYPE == CPP_LESS)
1577 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1578 /* Digraph: "%:" is a '#' */
1579 else if (PREV_TOKEN_TYPE == CPP_MOD)
1580 {
1581 (--cur_token)->flags |= DIGRAPH;
1582 goto make_hash;
1583 }
1584 }
1585 cur_token++;
1586 break;
f8f769ea 1587
041c3194
ZW
1588 case '&':
1589 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1590 REVISE_TOKEN (CPP_AND_AND);
1591 else
1592 PUSH_TOKEN (CPP_AND);
f8f769ea 1593 break;
45b966db 1594
041c3194
ZW
1595 make_or:
1596 case '|':
1597 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1598 REVISE_TOKEN (CPP_OR_OR);
1599 else
1600 PUSH_TOKEN (CPP_OR);
1601 break;
45b966db 1602
041c3194
ZW
1603 case '+':
1604 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1605 REVISE_TOKEN (CPP_PLUS_PLUS);
1606 else
1607 PUSH_TOKEN (CPP_PLUS);
1608 break;
45b966db 1609
041c3194
ZW
1610 case '=':
1611 /* This relies on equidistance of "?=" and "?" tokens. */
1612 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1613 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1614 else
1615 PUSH_TOKEN (CPP_EQ);
1616 break;
45b966db 1617
041c3194
ZW
1618 case '>':
1619 cur_token->type = CPP_GREATER;
1620 if (IMMED_TOKEN ())
1621 {
1622 if (PREV_TOKEN_TYPE == CPP_GREATER)
1623 BACKUP_TOKEN (CPP_RSHIFT);
1624 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1625 BACKUP_TOKEN (CPP_DEREF);
1626 /* Digraph: ":>" is a ']' */
1627 else if (PREV_TOKEN_TYPE == CPP_COLON)
1628 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1629 /* Digraph: "%>" is a '}' */
1630 else if (PREV_TOKEN_TYPE == CPP_MOD)
1631 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1632 }
1633 cur_token++;
1634 break;
1635
1636 case '<':
1637 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1638 {
1639 REVISE_TOKEN (CPP_LSHIFT);
1640 break;
1641 }
1642 /* Is this the beginning of a header name? */
91fcd158 1643 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
041c3194
ZW
1644 {
1645 c = '>'; /* Terminator. */
1646 cur_token->type = CPP_HEADER_NAME;
1647 goto do_parse_string;
1648 }
1649 PUSH_TOKEN (CPP_LESS);
1650 break;
45b966db 1651
041c3194
ZW
1652 case '%':
1653 /* Digraph: "<%" is a '{' */
1654 cur_token->type = CPP_MOD;
1655 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1656 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1657 cur_token++;
1658 break;
04e3ec78 1659
041c3194
ZW
1660 case '?':
1661 if (cur + 1 < buffer->rlimit && *cur == '?'
1662 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1663 {
1664 /* Handle trigraph. */
1665 cur++;
1666 switch (*cur++)
1667 {
1668 case '(': goto make_open_square;
1669 case ')': goto make_close_square;
1670 case '<': goto make_open_brace;
1671 case '>': goto make_close_brace;
1672 case '=': goto make_hash;
1673 case '!': goto make_or;
1674 case '-': goto make_complement;
1675 case '/': goto make_backslash;
1676 case '\'': goto make_xor;
1677 }
1678 }
1679 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1680 {
1681 /* GNU C++ defines <? and >? operators. */
1682 if (PREV_TOKEN_TYPE == CPP_LESS)
1683 {
1684 REVISE_TOKEN (CPP_MIN);
1685 break;
1686 }
1687 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1688 {
1689 REVISE_TOKEN (CPP_MAX);
1690 break;
1691 }
1692 }
1693 PUSH_TOKEN (CPP_QUERY);
1694 break;
45b966db 1695
041c3194
ZW
1696 case '.':
1697 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1698 && IMMED_TOKEN ()
1699 && !(cur_token[-1].flags & PREV_WHITE))
1700 {
1701 cur_token -= 2;
1702 PUSH_TOKEN (CPP_ELLIPSIS);
1703 }
1704 else
1705 PUSH_TOKEN (CPP_DOT);
1706 break;
c5a04734 1707
041c3194
ZW
1708 make_complement:
1709 case '~': PUSH_TOKEN (CPP_COMPL); break;
1710 make_xor:
1711 case '^': PUSH_TOKEN (CPP_XOR); break;
1712 make_open_brace:
1713 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1714 make_close_brace:
1715 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1716 make_open_square:
1717 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1718 make_close_square:
1719 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1720 make_backslash:
1721 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1722 case '!': PUSH_TOKEN (CPP_NOT); break;
1723 case ',': PUSH_TOKEN (CPP_COMMA); break;
1724 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1725 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1726 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
6d2c2047 1727
041c3194
ZW
1728 case '$':
1729 if (CPP_OPTION (pfile, dollars_in_ident))
1730 goto letter;
1731 /* Fall through */
1732 other:
1733 default:
1734 cur_token->val.aux = c;
1735 PUSH_TOKEN (CPP_OTHER);
1736 break;
1737 }
1738 }
6d2c2047 1739
041c3194
ZW
1740 /* Run out of token space? */
1741 if (cur_token == token_limit)
1742 {
1743 list->tokens_used = cur_token - list->tokens;
1744 _cpp_expand_token_space (list, 256);
1745 goto expanded;
1746 }
6d2c2047 1747
041c3194
ZW
1748 cur_token->flags = flags;
1749 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1750 {
91fcd158 1751 if (cur > buffer->buf && !is_vspace (cur[-1]))
041c3194
ZW
1752 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1753 CPP_BUF_COLUMN (buffer, cur),
1754 "no newline at end of file");
1755 cur_token++->type = CPP_EOF;
1756 }
6d2c2047 1757
041c3194
ZW
1758 out:
1759 /* All tokens are allocated, so the memory location is fixed. */
1760 first = &list->tokens[first_token];
1761
1762 /* Don't complain about the null directive, nor directives in
1763 assembly source: we don't know where the comments are, and # may
1764 introduce assembler pseudo-ops. Don't complain about invalid
1765 directives in skipped conditional groups (6.10 p4). */
1766 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1767 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1768 {
1769 if (first[1].type == CPP_NAME)
1770 cpp_error (pfile, "invalid preprocessing directive #%.*s",
bfb9dc7f 1771 (int) first[1].val.node->length, first[1].val.node->name);
041c3194
ZW
1772 else
1773 cpp_error (pfile, "invalid preprocessing directive");
1774 }
1775
91fcd158
NB
1776 /* Put EOF at end of known directives. This covers "directives do
1777 not extend beyond the end of the line (description 6.10 part 2)". */
1778 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
041c3194
ZW
1779 {
1780 pfile->first_directive_token = first;
1781 cur_token++->type = CPP_EOF;
1782 }
1783
91fcd158
NB
1784 /* Directives, known or not, always start a new line. */
1785 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
041c3194 1786 first->flags |= BOL;
6d2c2047 1787 else
041c3194
ZW
1788 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1789 up the invocation of a function-like macro, new line is
1790 considered a normal white-space character. */
1791 first->flags |= PREV_WHITE;
1792
1793 buffer->cur = cur;
1794 list->tokens_used = cur_token - list->tokens;
1795 pfile->in_lex_line = 0;
6d2c2047
ZW
1796}
1797
041c3194
ZW
1798/* Write the spelling of a token TOKEN, with any appropriate
1799 whitespace before it, to the token_buffer. PREV is the previous
1800 token, which is used to determine if we need to shove in an extra
1801 space in order to avoid accidental token paste. */
1802static void
1803output_token (pfile, token, prev)
1804 cpp_reader *pfile;
1805 const cpp_token *token, *prev;
1806{
1807 int dummy;
c5a04734 1808
041c3194
ZW
1809 if (token->col && (token->flags & BOL))
1810 {
1811 /* Supply enough whitespace to put this token in its original
1812 column. Don't bother trying to reconstruct tabs; we can't
1813 get it right in general, and nothing ought to care. (Yes,
1814 some things do care; the fault lies with them.) */
1815 unsigned char *buffer;
1816 unsigned int spaces = token->col - 1;
1817
1818 CPP_RESERVE (pfile, token->col);
1819 buffer = pfile->limit;
1820
1821 while (spaces--)
1822 *buffer++ = ' ';
1823 pfile->limit = buffer;
1824 }
1825 else if (token->flags & PREV_WHITE)
1826 CPP_PUTC (pfile, ' ');
1827 /* Check for and prevent accidental token pasting, in ANSI mode. */
d6d5f795 1828
041c3194
ZW
1829 else if (!CPP_TRADITIONAL (pfile) && prev)
1830 {
1831 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1832 CPP_PUTC (pfile, ' ');
1833 /* can_paste catches most of the accidental paste cases, but not all.
1834 Consider a + ++b - if there is not a space between the + and ++, it
1835 will be misparsed as a++ + b. */
1836 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1837 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1838 CPP_PUTC (pfile, ' ');
1839 }
d6d5f795 1840
041c3194
ZW
1841 CPP_RESERVE (pfile, TOKEN_LEN (token));
1842 pfile->limit = spell_token (pfile, token, pfile->limit);
1843}
d6d5f795 1844
041c3194 1845/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1846 already contain the enough space to hold the token's spelling.
1847 Returns a pointer to the character after the last character
1848 written. */
d6d5f795 1849
041c3194
ZW
1850static unsigned char *
1851spell_token (pfile, token, buffer)
1852 cpp_reader *pfile; /* Would be nice to be rid of this... */
1853 const cpp_token *token;
1854 unsigned char *buffer;
1855{
1856 switch (token_spellings[token->type].type)
1857 {
1858 case SPELL_OPERATOR:
1859 {
1860 const unsigned char *spelling;
1861 unsigned char c;
d6d5f795 1862
041c3194
ZW
1863 if (token->flags & DIGRAPH)
1864 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1865 else
1866 spelling = token_spellings[token->type].spelling;
1867
1868 while ((c = *spelling++) != '\0')
1869 *buffer++ = c;
1870 }
1871 break;
d6d5f795 1872
041c3194 1873 case SPELL_IDENT:
bfb9dc7f
ZW
1874 memcpy (buffer, token->val.node->name, token->val.node->length);
1875 buffer += token->val.node->length;
041c3194 1876 break;
d6d5f795 1877
041c3194
ZW
1878 case SPELL_STRING:
1879 {
041c3194
ZW
1880 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1881 *buffer++ = 'L';
bfb9dc7f 1882
041c3194 1883 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
bfb9dc7f
ZW
1884 *buffer++ = '"';
1885 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1886 *buffer++ = '\'';
1887
1888 memcpy (buffer, token->val.str.text, token->val.str.len);
1889 buffer += token->val.str.len;
1890
1891 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1892 *buffer++ = '"';
1893 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1894 *buffer++ = '\'';
041c3194
ZW
1895 }
1896 break;
d6d5f795 1897
041c3194
ZW
1898 case SPELL_CHAR:
1899 *buffer++ = token->val.aux;
1900 break;
d6d5f795 1901
041c3194
ZW
1902 case SPELL_NONE:
1903 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1904 break;
1905 }
d6d5f795 1906
041c3194
ZW
1907 return buffer;
1908}
d6d5f795 1909
cf00a885
ZW
1910/* Return the spelling of a token known to be an operator.
1911 Does not distinguish digraphs from their counterparts. */
1912const unsigned char *
1913_cpp_spell_operator (type)
1914 enum cpp_ttype type;
1915{
1916 if (token_spellings[type].type == SPELL_OPERATOR)
1917 return token_spellings[type].spelling;
1918 else
1919 return token_names[type];
1920}
1921
1922
041c3194 1923/* Macro expansion algorithm. TODO. */
d6d5f795 1924
7de9cc38
KG
1925static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
1926static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
c5a04734 1927
041c3194
ZW
1928#define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1929#define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1930
1931/* Flags for cpp_context. */
1932#define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
1933#define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
1934#define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
1935#define CONTEXT_ARG (1 << 3) /* If an argument context. */
1936
1937#define ASSIGN_FLAGS_AND_POS(d, s) \
1938 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1939 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1940 } while (0)
1941
1942/* f is flags, just consisting of PREV_WHITE | BOL. */
1943#define MODIFY_FLAGS_AND_POS(d, s, f) \
1944 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1945 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1946 } while (0)
1947
1948typedef struct cpp_context cpp_context;
1949struct cpp_context
1950{
1951 union
1952 {
1953 const cpp_toklist *list; /* Used for macro contexts only. */
1954 const cpp_token **arg; /* Used for arg contexts only. */
1955 } u;
1956
1957 /* Pushed token to be returned by next call to cpp_get_token. */
1958 const cpp_token *pushed_token;
1959
1960 struct macro_args *args; /* 0 for arguments and object-like macros. */
1961 unsigned short posn; /* Current posn, index into u. */
1962 unsigned short count; /* No. of tokens in u. */
1963 unsigned short level;
1964 unsigned char flags;
1965};
1966
1967typedef struct macro_args macro_args;
1968struct macro_args
1969{
1970 unsigned int *ends;
1971 const cpp_token **tokens;
1972 unsigned int capacity;
1973 unsigned int used;
1974 unsigned short level;
1975};
1976
1977static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
1978static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
1979 macro_args *, unsigned int *));
1980static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
1981static void save_token PARAMS ((macro_args *, const cpp_token *));
1982static const cpp_token *push_arg_context PARAMS ((cpp_reader *,
1983 const cpp_token *));
1984static int do_pop_context PARAMS ((cpp_reader *));
1985static const cpp_token *pop_context PARAMS ((cpp_reader *));
1986static const cpp_token *push_macro_context PARAMS ((cpp_reader *,
1987 cpp_hashnode *,
1988 const cpp_token *));
1989static void free_macro_args PARAMS ((macro_args *));
1990
1991/* Free the storage allocated for macro arguments. */
1992static void
1993free_macro_args (args)
1994 macro_args *args;
c5a04734 1995{
041c3194
ZW
1996 if (args->tokens)
1997 free (args->tokens);
1998 free (args->ends);
1999 free (args);
c5a04734
ZW
2000}
2001
041c3194
ZW
2002/* Determines if a macro has been already used (and is therefore
2003 disabled). */
c5a04734 2004static int
041c3194 2005is_macro_disabled (pfile, expansion, token)
c5a04734 2006 cpp_reader *pfile;
041c3194
ZW
2007 const cpp_toklist *expansion;
2008 const cpp_token *token;
c5a04734 2009{
041c3194
ZW
2010 cpp_context *context = CURRENT_CONTEXT (pfile);
2011
cf00a885
ZW
2012 /* Don't expand anything if this file has already been preprocessed. */
2013 if (CPP_OPTION (pfile, preprocessed))
2014 return 1;
2015
041c3194
ZW
2016 /* Arguments on either side of ## are inserted in place without
2017 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2018 occurs during a later rescan pass. The effect is that we expand
2019 iff we would as part of the macro's expansion list, so we should
2020 drop to the macro's context. */
2021 if (IS_ARG_CONTEXT (context))
c5a04734 2022 {
041c3194
ZW
2023 if (token->flags & PASTED)
2024 context--;
2025 else if (!(context->flags & CONTEXT_RAW))
2026 return 1;
2027 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2028 context--;
c5a04734 2029 }
c5a04734 2030
041c3194
ZW
2031 /* Have we already used this macro? */
2032 while (context->level > 0)
c5a04734 2033 {
041c3194
ZW
2034 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2035 return 1;
2036 /* Raw argument tokens are judged based on the token list they
2037 came from. */
2038 if (context->flags & CONTEXT_RAW)
2039 context = pfile->contexts + context->level;
2040 else
2041 context--;
2042 }
c5a04734 2043
041c3194
ZW
2044 /* Function-like macros may be disabled if the '(' is not in the
2045 current context. We check this without disrupting the context
2046 stack. */
2047 if (expansion->paramc >= 0)
2048 {
2049 const cpp_token *next;
2050 unsigned int prev_nme;
c5a04734 2051
041c3194
ZW
2052 context = CURRENT_CONTEXT (pfile);
2053 /* Drop down any contexts we're at the end of: the '(' may
2054 appear in lower macro expansions, or in the rest of the file. */
2055 while (context->posn == context->count && context > pfile->contexts)
2056 {
2057 context--;
2058 /* If we matched, we are disabled, as we appear in the
2059 expansion of each macro we meet. */
2060 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2061 return 1;
2062 }
c5a04734 2063
041c3194
ZW
2064 prev_nme = pfile->no_expand_level;
2065 pfile->no_expand_level = context - pfile->contexts;
2066 next = cpp_get_token (pfile);
2067 restore_macro_expansion (pfile, prev_nme);
2068 if (next->type != CPP_OPEN_PAREN)
2069 {
2070 _cpp_push_token (pfile, next);
2071 if (CPP_OPTION (pfile, warn_traditional))
2072 cpp_warning (pfile,
2073 "function macro %.*s must be used with arguments in traditional C",
bfb9dc7f 2074 (int) token->val.node->length, token->val.node->name);
041c3194
ZW
2075 return 1;
2076 }
c5a04734 2077 }
c5a04734 2078
041c3194 2079 return 0;
c5a04734
ZW
2080}
2081
041c3194
ZW
2082/* Add a token to the set of tokens forming the arguments to the macro
2083 being parsed in parse_args. */
2084static void
2085save_token (args, token)
2086 macro_args *args;
2087 const cpp_token *token;
c5a04734 2088{
041c3194
ZW
2089 if (args->used == args->capacity)
2090 {
2091 args->capacity += args->capacity + 100;
2092 args->tokens = (const cpp_token **)
2093 xrealloc (args->tokens, args->capacity * sizeof (const cpp_token *));
2094 }
2095 args->tokens[args->used++] = token;
c5a04734
ZW
2096}
2097
041c3194
ZW
2098/* Take and save raw tokens until we finish one argument. Empty
2099 arguments are saved as a single CPP_PLACEMARKER token. */
2100static const cpp_token *
2101parse_arg (pfile, var_args, paren_context, args, pcount)
c5a04734 2102 cpp_reader *pfile;
041c3194
ZW
2103 int var_args;
2104 unsigned int paren_context;
2105 macro_args *args;
2106 unsigned int *pcount;
c5a04734 2107{
041c3194
ZW
2108 const cpp_token *token;
2109 unsigned int paren = 0, count = 0;
2110 int raw, was_raw = 1;
c5a04734 2111
041c3194 2112 for (count = 0;; count++)
c5a04734 2113 {
041c3194 2114 token = cpp_get_token (pfile);
c5a04734 2115
041c3194 2116 switch (token->type)
c5a04734 2117 {
041c3194
ZW
2118 default:
2119 break;
c5a04734 2120
041c3194
ZW
2121 case CPP_OPEN_PAREN:
2122 paren++;
2123 break;
2124
2125 case CPP_CLOSE_PAREN:
2126 if (paren-- != 0)
2127 break;
2128 goto out;
2129
2130 case CPP_COMMA:
2131 /* Commas are not terminators within parantheses or var_args. */
2132 if (paren || var_args)
2133 break;
2134 goto out;
2135
2136 case CPP_EOF: /* Error reported by caller. */
2137 goto out;
c5a04734 2138 }
c5a04734 2139
041c3194
ZW
2140 raw = pfile->cur_context <= paren_context;
2141 if (raw != was_raw)
2142 {
2143 was_raw = raw;
2144 save_token (args, 0);
2145 count++;
c5a04734 2146 }
041c3194 2147 save_token (args, token);
c5a04734 2148 }
c5a04734
ZW
2149
2150 out:
041c3194
ZW
2151 if (count == 0)
2152 {
2153 /* Duplicate the placemarker. Then we can set its flags and
2154 position and safely be using more than one. */
2155 save_token (args, duplicate_token (pfile, &placemarker_token));
2156 count++;
2157 }
2158
2159 *pcount = count;
2160 return token;
c5a04734
ZW
2161}
2162
041c3194
ZW
2163/* This macro returns true if the argument starting at offset O of arglist
2164 A is empty - that is, it's either a single PLACEMARKER token, or a null
2165 pointer followed by a PLACEMARKER. */
2166
2167#define empty_argument(A, O) \
2168 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2169 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2170
2171/* Parse the arguments making up a macro invocation. Nested arguments
2172 are automatically macro expanded, but immediate macros are not
2173 expanded; this enables e.g. operator # to work correctly. Returns
2174 non-zero on error. */
c5a04734 2175static int
041c3194 2176parse_args (pfile, hp, args)
c5a04734 2177 cpp_reader *pfile;
041c3194
ZW
2178 cpp_hashnode *hp;
2179 macro_args *args;
c5a04734 2180{
041c3194
ZW
2181 const cpp_token *token;
2182 const cpp_toklist *macro;
2183 unsigned int total = 0;
2184 unsigned int paren_context = pfile->cur_context;
2185 int argc = 0;
2186
2187 macro = hp->value.expansion;
2188 do
c5a04734 2189 {
041c3194 2190 unsigned int count;
c5a04734 2191
041c3194
ZW
2192 token = parse_arg (pfile, (argc + 1 == macro->paramc
2193 && (macro->flags & VAR_ARGS)),
2194 paren_context, args, &count);
2195 if (argc < macro->paramc)
c5a04734 2196 {
041c3194
ZW
2197 total += count;
2198 args->ends[argc] = total;
c5a04734 2199 }
041c3194 2200 argc++;
c5a04734 2201 }
041c3194 2202 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
c5a04734 2203
041c3194
ZW
2204 if (token->type == CPP_EOF)
2205 {
2206 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2207 hp->length, hp->name);
2208 return 1;
2209 }
2210 else if (argc < macro->paramc)
2211 {
2212 /* A rest argument is allowed to not appear in the invocation at all.
2213 e.g. #define debug(format, args...) ...
2214 debug("string");
2215 This is exactly the same as if the rest argument had received no
563dd08a 2216 tokens - debug("string",); This extension is deprecated. */
041c3194 2217
563dd08a 2218 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
041c3194
ZW
2219 {
2220 /* Duplicate the placemarker. Then we can set its flags and
2221 position and safely be using more than one. */
2222 save_token (args, duplicate_token (pfile, &placemarker_token));
2223 args->ends[argc] = total + 1;
2224 return 0;
2225 }
2226 else
2227 {
2228 cpp_error (pfile,
2229 "insufficient arguments in invocation of macro \"%.*s\"",
2230 hp->length, hp->name);
2231 return 1;
2232 }
2233 }
2234 /* An empty argument to an empty function-like macro is fine. */
2235 else if (argc > macro->paramc
2236 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2237 {
2238 cpp_error (pfile,
2239 "too many arguments in invocation of macro \"%.*s\"",
2240 hp->length, hp->name);
2241 return 1;
2242 }
c5a04734 2243
041c3194
ZW
2244 return 0;
2245}
2246
2247/* Adds backslashes before all backslashes and double quotes appearing
2248 in strings. Non-printable characters are converted to octal. */
2249static U_CHAR *
2250quote_string (dest, src, len)
2251 U_CHAR *dest;
2252 const U_CHAR *src;
2253 unsigned int len;
2254{
2255 while (len--)
c5a04734 2256 {
041c3194 2257 U_CHAR c = *src++;
c5a04734 2258
041c3194 2259 if (c == '\\' || c == '"')
6ab3e7dd 2260 {
041c3194
ZW
2261 *dest++ = '\\';
2262 *dest++ = c;
2263 }
2264 else
2265 {
2266 if (ISPRINT (c))
2267 *dest++ = c;
2268 else
2269 {
2270 sprintf ((char *) dest, "\\%03o", c);
2271 dest += 4;
2272 }
6ab3e7dd 2273 }
c5a04734 2274 }
c5a04734 2275
041c3194 2276 return dest;
c5a04734
ZW
2277}
2278
041c3194
ZW
2279/* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2280 CPP_STRING token containing TEXT in quoted form. */
2281static cpp_token *
2282make_string_token (token, text, len)
2283 cpp_token *token;
2284 const U_CHAR *text;
2285 unsigned int len;
2286{
2287 U_CHAR *buf;
2288
2289 buf = (U_CHAR *) xmalloc (len * 4);
2290 token->type = CPP_STRING;
2291 token->flags = 0;
bfb9dc7f
ZW
2292 token->val.str.text = buf;
2293 token->val.str.len = quote_string (buf, text, len) - buf;
041c3194
ZW
2294 return token;
2295}
2296
2297/* Allocates and converts a temporary token to a CPP_NUMBER token,
2298 evaluating to NUMBER. */
2299static cpp_token *
2300alloc_number_token (pfile, number)
c5a04734 2301 cpp_reader *pfile;
041c3194 2302 int number;
c5a04734 2303{
041c3194
ZW
2304 cpp_token *result;
2305 char *buf;
2306
2307 result = get_temp_token (pfile);
2308 buf = xmalloc (20);
2309 sprintf (buf, "%d", number);
2310
2311 result->type = CPP_NUMBER;
2312 result->flags = 0;
bfb9dc7f
ZW
2313 result->val.str.text = (U_CHAR *) buf;
2314 result->val.str.len = strlen (buf);
041c3194
ZW
2315 return result;
2316}
c5a04734 2317
041c3194
ZW
2318/* Returns a temporary token from the temporary token store of PFILE. */
2319static cpp_token *
2320get_temp_token (pfile)
2321 cpp_reader *pfile;
2322{
2323 if (pfile->temp_used == pfile->temp_alloced)
2324 {
2325 if (pfile->temp_used == pfile->temp_cap)
2326 {
2327 pfile->temp_cap += pfile->temp_cap + 20;
2328 pfile->temp_tokens = (cpp_token **) xrealloc
2329 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2330 }
2331 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2332 (sizeof (cpp_token));
2333 }
c5a04734 2334
041c3194
ZW
2335 return pfile->temp_tokens[pfile->temp_used++];
2336}
2337
2338/* Release (not free) for re-use the temporary tokens of PFILE. */
2339static void
2340release_temp_tokens (pfile)
2341 cpp_reader *pfile;
2342{
2343 while (pfile->temp_used)
c5a04734 2344 {
041c3194 2345 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
c5a04734 2346
bfb9dc7f 2347 if (token_spellings[token->type].type == SPELL_STRING)
c5a04734 2348 {
bfb9dc7f
ZW
2349 free ((char *) token->val.str.text);
2350 token->val.str.text = 0;
c5a04734
ZW
2351 }
2352 }
041c3194 2353}
c5a04734 2354
041c3194
ZW
2355/* Free all of PFILE's dynamically-allocated temporary tokens. */
2356void
2357_cpp_free_temp_tokens (pfile)
2358 cpp_reader *pfile;
2359{
2360 if (pfile->temp_tokens)
c5a04734 2361 {
041c3194
ZW
2362 /* It is possible, though unlikely (looking for '(' of a funlike
2363 macro into EOF), that we haven't released the tokens yet. */
2364 release_temp_tokens (pfile);
2365 while (pfile->temp_alloced)
2366 free (pfile->temp_tokens[--pfile->temp_alloced]);
2367 free (pfile->temp_tokens);
c5a04734
ZW
2368 }
2369
041c3194
ZW
2370 if (pfile->date)
2371 {
bfb9dc7f 2372 free ((char *) pfile->date->val.str.text);
041c3194 2373 free (pfile->date);
bfb9dc7f 2374 free ((char *) pfile->time->val.str.text);
041c3194
ZW
2375 free (pfile->time);
2376 }
c5a04734
ZW
2377}
2378
041c3194
ZW
2379/* Copy TOKEN into a temporary token from PFILE's store. */
2380static cpp_token *
2381duplicate_token (pfile, token)
2382 cpp_reader *pfile;
2383 const cpp_token *token;
2384{
2385 cpp_token *result = get_temp_token (pfile);
c5a04734 2386
041c3194 2387 *result = *token;
bfb9dc7f 2388 if (token_spellings[token->type].type == SPELL_STRING)
041c3194 2389 {
bfb9dc7f
ZW
2390 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2391 memcpy (buff, token->val.str.text, token->val.str.len);
2392 result->val.str.text = buff;
041c3194
ZW
2393 }
2394 return result;
2395}
c5a04734 2396
041c3194
ZW
2397/* Determine whether two tokens can be pasted together, and if so,
2398 what the resulting token is. Returns CPP_EOF if the tokens cannot
2399 be pasted, or the appropriate type for the merged token if they
2400 can. */
2401static enum cpp_ttype
2402can_paste (pfile, token1, token2, digraph)
2403 cpp_reader * pfile;
2404 const cpp_token *token1, *token2;
2405 int* digraph;
c5a04734 2406{
041c3194
ZW
2407 enum cpp_ttype a = token1->type, b = token2->type;
2408 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 2409
041c3194
ZW
2410 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2411 return a + (CPP_EQ_EQ - CPP_EQ);
c5a04734 2412
041c3194 2413 switch (a)
c5a04734 2414 {
041c3194
ZW
2415 case CPP_GREATER:
2416 if (b == a) return CPP_RSHIFT;
2417 if (b == CPP_QUERY && cxx) return CPP_MAX;
2418 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2419 break;
2420 case CPP_LESS:
2421 if (b == a) return CPP_LSHIFT;
2422 if (b == CPP_QUERY && cxx) return CPP_MIN;
2423 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2424 if (b == CPP_COLON)
2425 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2426 if (b == CPP_MOD)
2427 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2428 break;
c5a04734 2429
041c3194
ZW
2430 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2431 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2432 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 2433
041c3194
ZW
2434 case CPP_MINUS:
2435 if (b == a) return CPP_MINUS_MINUS;
2436 if (b == CPP_GREATER) return CPP_DEREF;
2437 break;
2438 case CPP_COLON:
2439 if (b == a && cxx) return CPP_SCOPE;
2440 if (b == CPP_GREATER)
2441 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2442 break;
2443
2444 case CPP_MOD:
2445 if (b == CPP_GREATER)
2446 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2447 if (b == CPP_COLON)
2448 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2449 break;
2450 case CPP_DEREF:
2451 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2452 break;
2453 case CPP_DOT:
2454 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2455 if (b == CPP_NUMBER) return CPP_NUMBER;
2456 break;
2457
2458 case CPP_HASH:
2459 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2460 /* %:%: digraph */
2461 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2462 break;
2463
2464 case CPP_NAME:
2465 if (b == CPP_NAME) return CPP_NAME;
2466 if (b == CPP_NUMBER
bfb9dc7f 2467 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
041c3194 2468 if (b == CPP_CHAR
bfb9dc7f 2469 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
041c3194 2470 if (b == CPP_STRING
bfb9dc7f 2471 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
041c3194
ZW
2472 break;
2473
2474 case CPP_NUMBER:
2475 if (b == CPP_NUMBER) return CPP_NUMBER;
2476 if (b == CPP_NAME) return CPP_NUMBER;
2477 if (b == CPP_DOT) return CPP_NUMBER;
2478 /* Numbers cannot have length zero, so this is safe. */
2479 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 2480 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
2481 return CPP_NUMBER;
2482 break;
2483
2484 default:
2485 break;
c5a04734
ZW
2486 }
2487
041c3194
ZW
2488 return CPP_EOF;
2489}
2490
2491/* Check if TOKEN is to be ##-pasted with the token after it. */
2492static const cpp_token *
2493maybe_paste_with_next (pfile, token)
2494 cpp_reader *pfile;
2495 const cpp_token *token;
2496{
2497 cpp_token *pasted;
2498 const cpp_token *second;
2499 cpp_context *context = CURRENT_CONTEXT (pfile);
2500
2501 /* Is this token on the LHS of ## ? */
2502 if (!((context->flags & CONTEXT_PASTEL) && context->posn == context->count)
2503 && !(token->flags & PASTE_LEFT))
2504 return token;
2505
2506 /* Prevent recursion, and possibly pushing back more than one token. */
2507 if (pfile->paste_level)
2508 return token;
2509
2510 /* Suppress macro expansion for next token, but don't conflict with
2511 the other method of suppression. If it is an argument, macro
2512 expansion within the argument will still occur. */
2513 pfile->paste_level = pfile->cur_context;
2514 second = cpp_get_token (pfile);
2515 pfile->paste_level = 0;
2516
563dd08a
NB
2517 /* Ignore placemarker argument tokens (cannot be from an empty macro
2518 since macros are not expanded). */
041c3194
ZW
2519 if (token->type == CPP_PLACEMARKER)
2520 pasted = duplicate_token (pfile, second);
2521 else if (second->type == CPP_PLACEMARKER)
c5a04734 2522 {
563dd08a 2523 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
041c3194
ZW
2524 /* GCC has special extended semantics for a ## b where b is a
2525 varargs parameter: a disappears if b consists of no tokens.
2526 This extension is deprecated. */
563dd08a
NB
2527 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2528 && (mac_context->u.list->tokens[mac_context->posn - 1].val.aux + 1
2529 == (unsigned) mac_context->u.list->paramc))
041c3194
ZW
2530 {
2531 cpp_warning (pfile, "deprecated GNU ## extension used");
2532 pasted = duplicate_token (pfile, second);
2533 }
2534 else
2535 pasted = duplicate_token (pfile, token);
c5a04734 2536 }
041c3194
ZW
2537 else
2538 {
2539 int digraph = 0;
2540 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
c5a04734 2541
041c3194
ZW
2542 if (type == CPP_EOF)
2543 {
2544 if (CPP_OPTION (pfile, warn_paste))
2545 cpp_warning (pfile,
2546 "pasting would not give a valid preprocessing token");
2547 _cpp_push_token (pfile, second);
2548 return token;
2549 }
c5a04734 2550
041c3194
ZW
2551 if (type == CPP_NAME || type == CPP_NUMBER)
2552 {
2553 /* Join spellings. */
bfb9dc7f 2554 U_CHAR *buf, *end;
c5a04734 2555
041c3194 2556 pasted = get_temp_token (pfile);
bfb9dc7f
ZW
2557 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2558 end = spell_token (pfile, token, buf);
2559 end = spell_token (pfile, second, end);
2560 *end = '\0';
041c3194 2561
bfb9dc7f
ZW
2562 if (type == CPP_NAME)
2563 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2564 else
2565 {
2566 pasted->val.str.text = uxstrdup (buf);
2567 pasted->val.str.len = end - buf;
2568 }
041c3194
ZW
2569 }
2570 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2571 pasted = duplicate_token (pfile, second);
2572 else
2573 {
2574 pasted = get_temp_token (pfile);
2575 pasted->val.integer = 0;
2576 }
2577
2578 pasted->type = type;
bfb9dc7f 2579 pasted->flags = digraph ? DIGRAPH : 0;
041c3194
ZW
2580 }
2581
2582 /* The pasted token gets the whitespace flags and position of the
2583 first token, the PASTE_LEFT flag of the second token, plus the
2584 PASTED flag to indicate it is the result of a paste. However, we
2585 want to preserve the DIGRAPH flag. */
2586 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2587 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2588 | (second->flags & PASTE_LEFT) | PASTED);
2589 pasted->col = token->col;
2590 pasted->line = token->line;
2591
2592 return maybe_paste_with_next (pfile, pasted);
2593}
2594
2595/* Convert a token sequence to a single string token according to the
2596 rules of the ISO C #-operator. */
2597#define INIT_SIZE 200
2598static cpp_token *
2599stringify_arg (pfile, token)
c5a04734 2600 cpp_reader *pfile;
041c3194 2601 const cpp_token *token;
c5a04734 2602{
041c3194
ZW
2603 cpp_token *result;
2604 unsigned char *main_buf;
2605 unsigned int prev_value, backslash_count = 0;
2606 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
c5a04734 2607
041c3194
ZW
2608 prev_value = prevent_macro_expansion (pfile);
2609 main_buf = (unsigned char *) xmalloc (buf_cap);
c5a04734 2610
041c3194
ZW
2611 result = get_temp_token (pfile);
2612 ASSIGN_FLAGS_AND_POS (result, token);
2613
2614 for (; (token = cpp_get_token (pfile))->type != CPP_EOF; )
c5a04734 2615 {
041c3194
ZW
2616 int escape;
2617 unsigned char *buf;
2618 unsigned int len = TOKEN_LEN (token);
c5a04734 2619
041c3194
ZW
2620 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2621 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2622 if (escape)
2623 len *= 4 + 1;
2624
2625 if (buf_used + len > buf_cap)
c5a04734 2626 {
041c3194
ZW
2627 buf_cap = buf_used + len + INIT_SIZE;
2628 main_buf = xrealloc (main_buf, buf_cap);
2629 }
c5a04734 2630
041c3194
ZW
2631 if (whitespace && (token->flags & PREV_WHITE))
2632 main_buf[buf_used++] = ' ';
c5a04734 2633
041c3194
ZW
2634 if (escape)
2635 buf = (unsigned char *) xmalloc (len);
2636 else
2637 buf = main_buf + buf_used;
2638
2639 len = spell_token (pfile, token, buf) - buf;
2640 if (escape)
2641 {
2642 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2643 free (buf);
2644 }
2645 else
2646 buf_used += len;
c5a04734 2647
041c3194
ZW
2648 whitespace = 1;
2649 if (token->type == CPP_BACKSLASH)
2650 backslash_count++;
2651 else
2652 backslash_count = 0;
2653 }
c5a04734 2654
041c3194
ZW
2655 /* Ignore the final \ of invalid string literals. */
2656 if (backslash_count & 1)
2657 {
2658 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2659 buf_used--;
2660 }
c5a04734 2661
041c3194 2662 result->type = CPP_STRING;
bfb9dc7f
ZW
2663 result->val.str.text = main_buf;
2664 result->val.str.len = buf_used;
041c3194
ZW
2665 restore_macro_expansion (pfile, prev_value);
2666 return result;
2667}
c5a04734 2668
041c3194
ZW
2669/* Allocate more room on the context stack of PFILE. */
2670static void
2671expand_context_stack (pfile)
2672 cpp_reader *pfile;
2673{
2674 pfile->context_cap += pfile->context_cap + 20;
2675 pfile->contexts = (cpp_context *)
2676 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2677}
c5a04734 2678
041c3194
ZW
2679/* Push the context of macro NODE onto the context stack. TOKEN is
2680 the CPP_NAME token invoking the macro. */
2681static const cpp_token *
2682push_macro_context (pfile, node, token)
2683 cpp_reader *pfile;
2684 cpp_hashnode *node;
2685 const cpp_token *token;
2686{
2687 unsigned char orig_flags;
2688 macro_args *args;
2689 cpp_context *context;
c5a04734 2690
041c3194 2691 if (pfile->cur_context > CPP_STACK_MAX)
c5a04734 2692 {
041c3194
ZW
2693 cpp_error (pfile, "infinite macro recursion invoking '%s'", node->name);
2694 return token;
c5a04734
ZW
2695 }
2696
041c3194
ZW
2697 /* Token's flags may change when parsing args containing a nested
2698 invocation of this macro. */
2699 orig_flags = token->flags & (PREV_WHITE | BOL);
2700 args = 0;
2701 if (node->value.expansion->paramc >= 0)
c5a04734 2702 {
041c3194
ZW
2703 unsigned int error, prev_nme;
2704
2705 /* Allocate room for the argument contexts, and parse them. */
2706 args = (macro_args *) xmalloc (sizeof (macro_args));
2707 args->ends = (unsigned int *)
2708 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2709 args->tokens = 0;
2710 args->capacity = 0;
2711 args->used = 0;
2712 args->level = pfile->cur_context;
2713
2714 prev_nme = prevent_macro_expansion (pfile);
2715 pfile->args = args;
2716 error = parse_args (pfile, node, args);
2717 pfile->args = 0;
2718 restore_macro_expansion (pfile, prev_nme);
2719 if (error)
2720 {
2721 free_macro_args (args);
2722 return token;
2723 }
c5a04734 2724 }
c5a04734 2725
041c3194
ZW
2726 /* Now push its context. */
2727 pfile->cur_context++;
2728 if (pfile->cur_context == pfile->context_cap)
2729 expand_context_stack (pfile);
2730
2731 context = CURRENT_CONTEXT (pfile);
2732 context->u.list = node->value.expansion;
2733 context->args = args;
2734 context->posn = 0;
2735 context->count = context->u.list->tokens_used;
2736 context->level = pfile->cur_context;
2737 context->flags = 0;
2738 context->pushed_token = 0;
2739
2740 /* Set the flags of the first token. We know there must
2741 be one, empty macros are a single placemarker token. */
2742 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2743
2744 return cpp_get_token (pfile);
c5a04734
ZW
2745}
2746
041c3194
ZW
2747/* Push an argument to the current macro onto the context stack.
2748 TOKEN is the MACRO_ARG token representing the argument expansion. */
2749static const cpp_token *
2750push_arg_context (pfile, token)
2751 cpp_reader *pfile;
2752 const cpp_token *token;
c5a04734 2753{
041c3194
ZW
2754 cpp_context *context;
2755 macro_args *args;
2756
2757 pfile->cur_context++;
2758 if (pfile->cur_context == pfile->context_cap)
2759 expand_context_stack (pfile);
2760
2761 context = CURRENT_CONTEXT (pfile);
2762 args = context[-1].args;
2763
2764 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2765 context->u.arg = args->tokens + context->count;
2766 context->count = args->ends[token->val.aux] - context->count;
2767 context->args = 0;
2768 context->posn = 0;
2769 context->level = args->level;
2770 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2771 context->pushed_token = 0;
2772
2773 /* Set the flags of the first token. There is one. */
2774 {
2775 const cpp_token *first = context->u.arg[0];
2776 if (!first)
2777 first = context->u.arg[1];
c5a04734 2778
041c3194
ZW
2779 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2780 token->flags & (PREV_WHITE | BOL));
2781 }
c5a04734 2782
041c3194
ZW
2783 if (token->flags & STRINGIFY_ARG)
2784 return stringify_arg (pfile, token);
c5a04734 2785
041c3194
ZW
2786 if (token->flags & PASTE_LEFT)
2787 context->flags |= CONTEXT_PASTEL;
2788 if (pfile->paste_level)
2789 context->flags |= CONTEXT_PASTER;
d1d9a6bd 2790
041c3194 2791 return get_raw_token (pfile);
c5a04734
ZW
2792}
2793
041c3194
ZW
2794/* "Unget" a token. It is effectively inserted in the token queue and
2795 will be returned by the next call to get_raw_token. */
c5a04734 2796void
041c3194 2797_cpp_push_token (pfile, token)
c5a04734 2798 cpp_reader *pfile;
041c3194 2799 const cpp_token *token;
c5a04734 2800{
041c3194
ZW
2801 cpp_context *context = CURRENT_CONTEXT (pfile);
2802 if (context->pushed_token)
2803 cpp_ice (pfile, "two tokens pushed in a row");
2804 if (token->type != CPP_EOF)
2805 context->pushed_token = token;
2806 /* Don't push back a directive's CPP_EOF, step back instead. */
2807 else if (pfile->cur_context == 0)
2808 pfile->contexts[0].posn--;
2809}
c5a04734 2810
041c3194
ZW
2811/* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2812 introducing the directive. */
2813static void
2814process_directive (pfile, token)
2815 cpp_reader *pfile;
2816 const cpp_token *token;
2817{
2818 const struct directive *d = pfile->token_list.directive;
2819 int prev_nme = 0;
2820
2821 /* Skip over the directive name. */
2822 if (token[1].type == CPP_NAME)
2823 _cpp_get_raw_token (pfile);
2824 else if (token[1].type != CPP_NUMBER)
2825 cpp_ice (pfile, "directive begins with %s?!",
2826 token_names[token[1].type]);
2827
2828 /* Flush pending tokens at this point, in case the directive produces
2829 output. XXX Directive output won't be visible to a direct caller of
2830 cpp_get_token. */
2831 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2832 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2833
2834 if (! (d->flags & EXPAND))
2835 prev_nme = prevent_macro_expansion (pfile);
2836 (void) (*d->handler) (pfile);
2837 if (! (d->flags & EXPAND))
2838 restore_macro_expansion (pfile, prev_nme);
2839 _cpp_skip_rest_of_line (pfile);
2840}
c5a04734 2841
041c3194
ZW
2842/* The external interface to return the next token. All macro
2843 expansion and directive processing is handled internally, the
2844 caller only ever sees the output after preprocessing. */
2845const cpp_token *
2846cpp_get_token (pfile)
2847 cpp_reader *pfile;
2848{
2849 const cpp_token *token;
2850 cpp_hashnode *node;
6ab3e7dd 2851
041c3194
ZW
2852 /* Loop till we hit a non-directive, non-skipped, non-placemarker token. */
2853 for (;;)
2854 {
2855 token = get_raw_token (pfile);
2856 if (token->flags & BOL && token->type == CPP_HASH
2857 && pfile->token_list.directive)
c5a04734 2858 {
041c3194
ZW
2859 process_directive (pfile, token);
2860 continue;
c5a04734
ZW
2861 }
2862
041c3194
ZW
2863 /* Short circuit EOF. */
2864 if (token->type == CPP_EOF)
2865 return token;
2866
2867 if (pfile->skipping && ! pfile->token_list.directive)
c5a04734 2868 {
041c3194
ZW
2869 _cpp_skip_rest_of_line (pfile);
2870 continue;
2871 }
2872 break;
2873 }
c5a04734 2874
041c3194
ZW
2875 /* If there's a potential control macro and we get here, then that
2876 #ifndef didn't cover the entire file and its argument shouldn't
2877 be taken as a control macro. */
2878 pfile->potential_control_macro = 0;
c5a04734 2879
041c3194 2880 token = maybe_paste_with_next (pfile, token);
c5a04734 2881
041c3194
ZW
2882 if (token->type != CPP_NAME)
2883 return token;
c5a04734 2884
041c3194
ZW
2885 /* Is macro expansion disabled in general? */
2886 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2887 return token;
2888
bfb9dc7f 2889 node = token->val.node;
041c3194
ZW
2890 if (node->type == T_VOID)
2891 return token;
c5a04734 2892
041c3194
ZW
2893 if (node->type == T_MACRO)
2894 {
2895 if (is_macro_disabled (pfile, node->value.expansion, token))
2896 return token;
c5a04734 2897
041c3194
ZW
2898 return push_macro_context (pfile, node, token);
2899 }
2900 else
2901 return special_symbol (pfile, node, token);
2902}
c5a04734 2903
041c3194
ZW
2904/* Returns the next raw token, i.e. without performing macro
2905 expansion. Argument contexts are automatically entered. */
2906static const cpp_token *
2907get_raw_token (pfile)
2908 cpp_reader *pfile;
2909{
2910 const cpp_token *result;
2911 cpp_context *context = CURRENT_CONTEXT (pfile);
c5a04734 2912
041c3194
ZW
2913 if (context->pushed_token)
2914 {
2915 result = context->pushed_token;
2916 context->pushed_token = 0;
2917 }
2918 else if (context->posn == context->count)
2919 result = pop_context (pfile);
2920 else
2921 {
2922 if (IS_ARG_CONTEXT (context))
2923 {
2924 result = context->u.arg[context->posn++];
2925 if (result == 0)
c5a04734 2926 {
041c3194
ZW
2927 context->flags ^= CONTEXT_RAW;
2928 result = context->u.arg[context->posn++];
c5a04734 2929 }
041c3194
ZW
2930 return result; /* Cannot be a CPP_MACRO_ARG */
2931 }
2932 result = &context->u.list->tokens[context->posn++];
2933 }
c5a04734 2934
041c3194
ZW
2935 if (result->type == CPP_MACRO_ARG)
2936 result = push_arg_context (pfile, result);
2937 return result;
2938}
c5a04734 2939
041c3194
ZW
2940/* Internal interface to get the token without macro expanding. */
2941const cpp_token *
2942_cpp_get_raw_token (pfile)
2943 cpp_reader *pfile;
2944{
2945 int prev_nme = prevent_macro_expansion (pfile);
2946 const cpp_token *result = cpp_get_token (pfile);
2947 restore_macro_expansion (pfile, prev_nme);
2948 return result;
2949}
c5a04734 2950
041c3194
ZW
2951/* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2952 list should be overwritten, or zero if we need to append
2953 (typically, if we are within the arguments to a macro, or looking
2954 for the '(' to start a function-like macro invocation). */
2955static int
2956lex_next (pfile, clear)
2957 cpp_reader *pfile;
2958 int clear;
2959{
2960 cpp_toklist *list = &pfile->token_list;
2961 const cpp_token *old_list = list->tokens;
2962 unsigned int old_used = list->tokens_used;
c5a04734 2963
91fcd158
NB
2964 /* If we are currently processing a directive, do not advance. 6.10
2965 paragraph 2: A new-line character ends the directive even if it
2966 occurs within what would otherwise be an invocation of a
2967 function-like macro.
2968
2969 It is possible that clear == 1 too; e.g. "#if funlike_macro ("
2970 since parse_args swallowed the directive's EOF. */
2971 if (list->directive)
2972 return 1;
2973
041c3194 2974 if (clear)
c5a04734 2975 {
041c3194
ZW
2976 /* Release all temporary tokens. */
2977 _cpp_clear_toklist (list);
2978 pfile->contexts[0].posn = 0;
2979 if (pfile->temp_used)
2980 release_temp_tokens (pfile);
c5a04734 2981 }
041c3194
ZW
2982
2983 lex_line (pfile, list);
2984 pfile->contexts[0].count = list->tokens_used;
c5a04734 2985
041c3194 2986 if (!clear && pfile->args)
c5a04734 2987 {
041c3194
ZW
2988 /* Fix up argument token pointers. */
2989 if (old_list != list->tokens)
2990 {
2991 unsigned int i;
2992
2993 for (i = 0; i < pfile->args->used; i++)
2994 {
2995 const cpp_token *token = pfile->args->tokens[i];
2996 if (token >= old_list && token < old_list + old_used)
2997 pfile->args->tokens[i] = (const cpp_token *)
2998 ((char *) token + ((char *) list->tokens - (char *) old_list));
2999 }
3000 }
3001
3002 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3003 tokens within the list of arguments that would otherwise act as
3004 preprocessing directives, the behavior is undefined.
3005
3006 This implementation will report a hard error and treat the
3007 'sequence of preprocessing tokens' as part of the macro argument,
3008 not a directive.
3009
3010 Note if pfile->args == 0, we're OK since we're only inside a
3011 macro argument after a '('. */
3012 if (list->directive)
3013 {
3014 cpp_error_with_line (pfile, list->tokens[old_used].line,
3015 list->tokens[old_used].col,
3016 "#%s may not be used inside a macro argument",
3017 list->directive->name);
91fcd158 3018 return 1;
041c3194 3019 }
c5a04734
ZW
3020 }
3021
041c3194 3022 return 0;
c5a04734
ZW
3023}
3024
041c3194
ZW
3025/* Pops a context of the context stack. If we're at the bottom, lexes
3026 the next logical line. Returns 1 if we're at the end of the
3027 argument list to the # operator, or if it is illegal to "overflow"
3028 into the rest of the file (e.g. 6.10.3.1.1). */
3029static int
3030do_pop_context (pfile)
3031 cpp_reader *pfile;
3032{
3033 cpp_context *context;
3fef5b2b 3034
041c3194
ZW
3035 if (pfile->cur_context == 0)
3036 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3037
3038 /* Argument contexts, when parsing args or handling # operator
3039 return CPP_EOF at the end. */
3040 context = CURRENT_CONTEXT (pfile);
3041 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3042 return 1;
3043
3044 /* Free resources when leaving macro contexts. */
3045 if (context->args)
3046 free_macro_args (context->args);
3047
3048 if (pfile->cur_context == pfile->no_expand_level)
3049 pfile->no_expand_level--;
3050 pfile->cur_context--;
3051
3052 return 0;
3053}
3054
3055/* Move down the context stack, and return the next raw token. */
3056static const cpp_token *
3057pop_context (pfile)
3058 cpp_reader *pfile;
3059{
3060 if (do_pop_context (pfile))
3061 return &eof_token;
3062 return get_raw_token (pfile);
3063}
3064
3065/* Turn off macro expansion at the current context level. */
3066static unsigned int
3067prevent_macro_expansion (pfile)
3068 cpp_reader *pfile;
3069{
3070 unsigned int prev_value = pfile->no_expand_level;
3071 pfile->no_expand_level = pfile->cur_context;
3072 return prev_value;
3073}
3074
3075/* Restore macro expansion to its previous state. */
3076static void
3077restore_macro_expansion (pfile, prev_value)
3078 cpp_reader *pfile;
3079 unsigned int prev_value;
3080{
3081 pfile->no_expand_level = prev_value;
3082}
3083
3084/* Used by cpperror.c to obtain the correct line and column to report
3085 in a diagnostic. */
3086unsigned int
3087_cpp_get_line (pfile, pcol)
3088 cpp_reader *pfile;
3089 unsigned int *pcol;
3090{
3091 unsigned int index;
3092 const cpp_token *cur_token;
3093
3094 if (pfile->in_lex_line)
3095 index = pfile->token_list.tokens_used;
3096 else
3097 index = pfile->contexts[0].posn;
3098
3099 cur_token = &pfile->token_list.tokens[index - 1];
3100 if (pcol)
3101 *pcol = cur_token->col;
3102 return cur_token->line;
3103}
3104
3105#define DSC(str) (const U_CHAR *)str, sizeof str - 1
3106static const char * const monthnames[] =
3107{
3108 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3109 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3110};
3111
3112/* Handle builtin macros like __FILE__. */
3113static const cpp_token *
3114special_symbol (pfile, node, token)
3115 cpp_reader *pfile;
3116 cpp_hashnode *node;
d1d9a6bd 3117 const cpp_token *token;
3fef5b2b 3118{
041c3194
ZW
3119 cpp_token *result;
3120 cpp_buffer *ip;
3fef5b2b 3121
041c3194 3122 switch (node->type)
3fef5b2b 3123 {
041c3194
ZW
3124 case T_FILE:
3125 case T_BASE_FILE:
3fef5b2b 3126 {
041c3194 3127 const char *file;
3fef5b2b 3128
041c3194
ZW
3129 ip = CPP_BUFFER (pfile);
3130 if (ip == 0)
3131 file = "";
3fef5b2b 3132 else
041c3194
ZW
3133 {
3134 if (node->type == T_BASE_FILE)
3135 while (CPP_PREV_BUFFER (ip) != NULL)
3136 ip = CPP_PREV_BUFFER (ip);
3137
3138 file = ip->nominal_fname;
3139 }
3140 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3141 strlen (file));
3142 }
3143 break;
3fef5b2b 3144
041c3194
ZW
3145 case T_INCLUDE_LEVEL:
3146 {
3147 int true_indepth = 0;
3148
3149 /* Do not count the primary source file in the include level. */
3150 ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3151 while (ip)
3152 {
3153 true_indepth++;
3154 ip = CPP_PREV_BUFFER (ip);
3155 }
3156 result = alloc_number_token (pfile, true_indepth);
3fef5b2b
NB
3157 }
3158 break;
3159
041c3194
ZW
3160 case T_SPECLINE:
3161 /* If __LINE__ is embedded in a macro, it must expand to the
3162 line of the macro's invocation, not its definition.
3163 Otherwise things like assert() will not work properly. */
3164 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
5d7ee2fa
NB
3165 break;
3166
041c3194 3167 case T_STDC:
3fef5b2b 3168 {
041c3194 3169 int stdc = 1;
3fef5b2b 3170
041c3194
ZW
3171#ifdef STDC_0_IN_SYSTEM_HEADERS
3172 if (CPP_IN_SYSTEM_HEADER (pfile)
bfb9dc7f 3173 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
041c3194
ZW
3174 stdc = 0;
3175#endif
3176 result = alloc_number_token (pfile, stdc);
3fef5b2b
NB
3177 }
3178 break;
3179
041c3194
ZW
3180 case T_DATE:
3181 case T_TIME:
3182 if (pfile->date == 0)
3183 {
3184 /* Allocate __DATE__ and __TIME__ from permanent storage,
3185 and save them in pfile so we don't have to do this again.
3186 We don't generate these strings at init time because
3187 time() and localtime() are very slow on some systems. */
3188 time_t tt = time (NULL);
3189 struct tm *tb = localtime (&tt);
3190
3191 pfile->date = make_string_token
3192 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3193 pfile->time = make_string_token
3194 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3195
bfb9dc7f 3196 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
041c3194 3197 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
bfb9dc7f 3198 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
041c3194
ZW
3199 tb->tm_hour, tb->tm_min, tb->tm_sec);
3200 }
3201 result = node->type == T_DATE ? pfile->date: pfile->time;
3fef5b2b
NB
3202 break;
3203
041c3194 3204 case T_POISON:
bfb9dc7f 3205 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
041c3194
ZW
3206 return token;
3207
3208 default:
3209 cpp_ice (pfile, "invalid special hash type");
3210 return token;
3fef5b2b
NB
3211 }
3212
041c3194
ZW
3213 ASSIGN_FLAGS_AND_POS (result, token);
3214 return result;
3215}
3216#undef DSC
3217
3218/* Dump the original user's spelling of argument index ARG_NO to the
3219 macro whose expansion is LIST. */
3220static void
3221dump_param_spelling (pfile, list, arg_no)
3222 cpp_reader *pfile;
3223 const cpp_toklist *list;
3224 unsigned int arg_no;
3225{
3226 const U_CHAR *param = list->namebuf;
3227
3228 while (arg_no--)
3229 param += ustrlen (param) + 1;
3230 CPP_PUTS (pfile, param, ustrlen (param));
3fef5b2b
NB
3231}
3232
041c3194 3233/* Dump a token list to the output. */
c5a04734 3234void
041c3194
ZW
3235_cpp_dump_list (pfile, list, token, flush)
3236 cpp_reader *pfile;
3237 const cpp_toklist *list;
3238 const cpp_token *token;
3239 int flush;
c5a04734 3240{
041c3194
ZW
3241 const cpp_token *limit = list->tokens + list->tokens_used;
3242 const cpp_token *prev = 0;
c5a04734 3243
041c3194
ZW
3244 /* Avoid the CPP_EOF. */
3245 if (list->directive)
3246 limit--;
c5a04734 3247
041c3194 3248 while (token < limit)
c5a04734 3249 {
041c3194
ZW
3250 if (token->type == CPP_MACRO_ARG)
3251 {
3252 if (token->flags & PREV_WHITE)
3253 CPP_PUTC (pfile, ' ');
3254 if (token->flags & STRINGIFY_ARG)
3255 CPP_PUTC (pfile, '#');
3256 dump_param_spelling (pfile, list, token->val.aux);
3257 }
c5a04734 3258 else
041c3194
ZW
3259 output_token (pfile, token, prev);
3260 if (token->flags & PASTE_LEFT)
3261 CPP_PUTS (pfile, " ##", 3);
3262 prev = token;
3263 token++;
c5a04734 3264 }
041c3194
ZW
3265
3266 if (flush && pfile->printer)
3267 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
c5a04734
ZW
3268}
3269
041c3194
ZW
3270/* Allocate pfile->input_buffer, and initialize trigraph_map[]
3271 if it hasn't happened already. */
3272
3273void
3274_cpp_init_input_buffer (pfile)
3275 cpp_reader *pfile;
3276{
3277 init_trigraph_map ();
3278 pfile->context_cap = 20;
3279 pfile->contexts = (cpp_context *)
3280 xmalloc (pfile->context_cap * sizeof (cpp_context));
3281 pfile->cur_context = 0;
3282 pfile->contexts[0].u.list = &pfile->token_list;
3283
3284 pfile->contexts[0].posn = 0;
3285 pfile->contexts[0].count = 0;
3286 pfile->no_expand_level = UINT_MAX;
3287
3288 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3289}
3290
3291/* Moves to the end of the directive line, popping contexts as
3292 necessary. */
3293void
3294_cpp_skip_rest_of_line (pfile)
3295 cpp_reader *pfile;
3296{
3297 /* Get to base context. Clear parsing args and each contexts flags,
3298 since these can cause pop_context to return without popping. */
3299 pfile->no_expand_level = UINT_MAX;
3300 while (pfile->cur_context != 0)
c5a04734 3301 {
041c3194
ZW
3302 pfile->contexts[pfile->cur_context].flags = 0;
3303 do_pop_context (pfile);
c5a04734 3304 }
041c3194
ZW
3305
3306 pfile->contexts[pfile->cur_context].count = 0;
3307 pfile->contexts[pfile->cur_context].posn = 0;
3308 pfile->token_list.directive = 0;
c5a04734
ZW
3309}
3310
041c3194
ZW
3311/* Directive handler wrapper used by the command line option
3312 processor. */
3313void
3314_cpp_run_directive (pfile, dir, buf, count)
3315 cpp_reader *pfile;
3316 const struct directive *dir;
3317 const char *buf;
3318 size_t count;
3319{
3320 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3321 {
3322 unsigned int prev_lvl = 0;
3323 /* scan the line now, else prevent_macro_expansion won't work */
3324 do_pop_context (pfile);
3325 if (! (dir->flags & EXPAND))
3326 prev_lvl = prevent_macro_expansion (pfile);
3327
3328 (void) (*dir->handler) (pfile);
3329
3330 if (! (dir->flags & EXPAND))
3331 restore_macro_expansion (pfile, prev_lvl);
3332
3333 _cpp_skip_rest_of_line (pfile);
3334 cpp_pop_buffer (pfile);
3335 }
3336}
This page took 6.792302 seconds and 5 git commands to generate.