]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
new tests results from changes
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
041c3194
ZW
23/*
24
25Cleanups to do:-
26
041c3194
ZW
27o -dM and with _cpp_dump_list: too many \n output.
28o Put a printer object in cpp_reader?
29o Check line numbers assigned to all errors.
30o Replace strncmp with memcmp almost everywhere.
31o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
041c3194
ZW
32o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
33 give it a run when we've got some code.
041c3194
ZW
34o Distinguish integers, floats, and 'other' pp-numbers.
35o Store ints and char constants as binary values.
36o New command-line assertion syntax.
041c3194
ZW
37o Work towards functions in cpperror.c taking a message level parameter.
38 If we do this, merge the common code of do_warning and do_error.
39o Comment all functions, and describe macro expansion algorithm.
40o Move as much out of header files as possible.
41o Remove single quote pairs `', and some '', from diagnostics.
42o Correct pastability test for CPP_NAME and CPP_NUMBER.
43
44*/
45
45b966db
ZW
46#include "config.h"
47#include "system.h"
48#include "intl.h"
49#include "cpplib.h"
50#include "cpphash.h"
041c3194 51#include "symcat.h"
45b966db 52
041c3194
ZW
53#define auto_expand_name_space(list) \
54 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
f2d5f0cc
ZW
55static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
56 size_t, FILE *));
041c3194 57static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
1368ee70 58 unsigned int));
041c3194 59static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
9e62c811 60 unsigned int));
f2d5f0cc 61
041c3194
ZW
62static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
63static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
b8f41010
NB
64 unsigned char *));
65static const unsigned char *backslash_start PARAMS ((cpp_reader *,
66 const unsigned char *));
041c3194
ZW
67static int skip_block_comment PARAMS ((cpp_reader *));
68static int skip_line_comment PARAMS ((cpp_reader *));
b8f41010 69static void skip_whitespace PARAMS ((cpp_reader *, int));
bfb9dc7f
ZW
70static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
71 const U_CHAR *, const U_CHAR *));
72static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
041c3194
ZW
73static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
74 unsigned int));
b8f41010 75static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
f624ffa7
NB
76static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
77 const unsigned char *,
ad265aa4 78 unsigned int, unsigned int));
041c3194
ZW
79static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
80static int lex_next PARAMS ((cpp_reader *, int));
81static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
82 const cpp_token *));
b8f41010 83
041c3194
ZW
84static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
85static void expand_context_stack PARAMS ((cpp_reader *));
d1d9a6bd 86static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
041c3194
ZW
87 unsigned char *));
88static void output_token PARAMS ((cpp_reader *, const cpp_token *,
89 const cpp_token *));
b8f41010
NB
90typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
91 cpp_token *));
041c3194
ZW
92static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
93 unsigned int));
94static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
95static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
96 const cpp_token *));
97static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
98static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
99 const cpp_token *));
100static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
101 const cpp_token *, int *));
102static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
103static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
104static cpp_token *get_temp_token PARAMS ((cpp_reader *));
105static void release_temp_tokens PARAMS ((cpp_reader *));
106static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
107static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
b8f41010 108
bfb9dc7f
ZW
109#define INIT_TOKEN_STR(list, token) \
110 do {(token)->val.str.len = 0; \
111 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
d1d9a6bd 112 } while (0)
b8f41010 113
041c3194
ZW
114#define VALID_SIGN(c, prevc) \
115 (((c) == '+' || (c) == '-') && \
116 ((prevc) == 'e' || (prevc) == 'E' \
117 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
118
b8f41010
NB
119/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
120 character, if any, is in buffer. */
041c3194 121
b8f41010 122#define handle_newline(cur, limit, c) \
041c3194 123 do { \
b8f41010
NB
124 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
125 (cur)++; \
041c3194
ZW
126 pfile->buffer->lineno++; \
127 pfile->buffer->line_base = (cur); \
6ab3e7dd 128 pfile->col_adjust = 0; \
041c3194 129 } while (0)
b8f41010 130
041c3194 131#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
b8f41010
NB
132#define PREV_TOKEN_TYPE (cur_token[-1].type)
133
f617b8e2
NB
134#define PUSH_TOKEN(ttype) cur_token++->type = ttype
135#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
136#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
137#define BACKUP_DIGRAPH(ttype) do { \
138 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
139
140/* An upper bound on the number of bytes needed to spell a token,
141 including preceding whitespace. */
bfb9dc7f
ZW
142#define TOKEN_SPELL(token) token_spellings[(token)->type].type
143#define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
144 ? (token)->val.str.len \
145 : (TOKEN_SPELL(token) == SPELL_IDENT \
146 ? (token)->val.node->length \
147 : 0)))
f617b8e2 148
f617b8e2 149#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
5d7ee2fa
NB
150#define I(e, s) {SPELL_IDENT, s},
151#define S(e, s) {SPELL_STRING, s},
b8f41010
NB
152#define C(e, s) {SPELL_CHAR, s},
153#define N(e, s) {SPELL_NONE, s},
b8f41010 154
041c3194
ZW
155const struct token_spelling
156token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
157
158#undef T
159#undef I
160#undef S
161#undef C
162#undef N
163
164/* For debugging: the internal names of the tokens. */
b449f23a
KG
165#define T(e, s) U STRINGX(e),
166#define I(e, s) U STRINGX(e),
167#define S(e, s) U STRINGX(e),
168#define C(e, s) U STRINGX(e),
169#define N(e, s) U STRINGX(e),
041c3194 170
cf00a885 171const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
b8f41010
NB
172
173#undef T
5d7ee2fa
NB
174#undef I
175#undef S
b8f41010
NB
176#undef C
177#undef N
b8f41010 178
041c3194
ZW
179/* The following table is used by trigraph_ok/trigraph_replace. If we
180 have designated initializers, it can be constant data; otherwise,
181 it is set up at runtime by _cpp_init_input_buffer. */
182
183#if (GCC_VERSION >= 2007)
184#define init_trigraph_map() /* nothing */
185#define TRIGRAPH_MAP \
186__extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
187#define END };
188#define s(p, v) [p] = v,
189#else
190#define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
191 static void init_trigraph_map PARAMS ((void)) { \
192 unsigned char *x = trigraph_map;
193#define END }
194#define s(p, v) x[p] = v;
195#endif
196
197TRIGRAPH_MAP
198 s('=', '#') s(')', ']') s('!', '|')
199 s('(', '[') s('\'', '^') s('>', '}')
200 s('/', '\\') s('<', '{') s('-', '~')
201END
202
203#undef TRIGRAPH_MAP
204#undef END
205#undef s
206
45b966db
ZW
207/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
208
209void
210_cpp_grow_token_buffer (pfile, n)
211 cpp_reader *pfile;
212 long n;
213{
214 long old_written = CPP_WRITTEN (pfile);
215 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
216 pfile->token_buffer = (U_CHAR *)
217 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
218 CPP_SET_WRITTEN (pfile, old_written);
219}
220
f2d5f0cc
ZW
221/* Deal with the annoying semantics of fwrite. */
222static void
223safe_fwrite (pfile, buf, len, fp)
224 cpp_reader *pfile;
225 const U_CHAR *buf;
226 size_t len;
227 FILE *fp;
228{
229 size_t count;
45b966db 230
f2d5f0cc
ZW
231 while (len)
232 {
233 count = fwrite (buf, 1, len, fp);
234 if (count == 0)
235 goto error;
236 len -= count;
237 buf += count;
238 }
239 return;
240
241 error:
242 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
243}
244
245/* Notify the compiler proper that the current line number has jumped,
246 or the current file name has changed. */
247
248static void
1368ee70 249output_line_command (pfile, print, line)
45b966db 250 cpp_reader *pfile;
f2d5f0cc 251 cpp_printer *print;
1368ee70 252 unsigned int line;
45b966db 253{
041c3194 254 cpp_buffer *ip = CPP_BUFFER (pfile);
f2d5f0cc
ZW
255 enum { same = 0, enter, leave, rname } change;
256 static const char * const codes[] = { "", " 1", " 2", "" };
257
041c3194
ZW
258 if (line == 0)
259 return;
260
261 /* End the previous line of text. */
262 if (pfile->need_newline)
263 putc ('\n', print->outf);
264 pfile->need_newline = 0;
265
f2d5f0cc
ZW
266 if (CPP_OPTION (pfile, no_line_commands))
267 return;
268
041c3194
ZW
269 /* If ip is null, we've been called from cpp_finish, and they just
270 needed the final flush and trailing newline. */
271 if (!ip)
272 return;
273
fb753f88 274 if (pfile->include_depth == print->last_id)
54bef41d 275 {
fb753f88
JJ
276 /* Determine whether the current filename has changed, and if so,
277 how. 'nominal_fname' values are unique, so they can be compared
278 by comparing pointers. */
279 if (ip->nominal_fname == print->last_fname)
280 change = same;
281 else
0e500c78 282 change = rname;
fb753f88
JJ
283 }
284 else
285 {
286 if (pfile->include_depth > print->last_id)
287 change = enter;
f2d5f0cc 288 else
fb753f88
JJ
289 change = leave;
290 print->last_id = pfile->include_depth;
45b966db 291 }
fb753f88
JJ
292 print->last_fname = ip->nominal_fname;
293
f2d5f0cc
ZW
294 /* If the current file has not changed, we can output a few newlines
295 instead if we want to increase the line number by a small amount.
296 We cannot do this if print->lineno is zero, because that means we
297 haven't output any line commands yet. (The very first line
298 command output is a `same_file' command.) */
041c3194 299 if (change == same && print->lineno > 0
f2d5f0cc 300 && line >= print->lineno && line < print->lineno + 8)
45b966db 301 {
f2d5f0cc 302 while (line > print->lineno)
45b966db 303 {
f2d5f0cc
ZW
304 putc ('\n', print->outf);
305 print->lineno++;
45b966db 306 }
f2d5f0cc 307 return;
45b966db 308 }
f2d5f0cc
ZW
309
310#ifndef NO_IMPLICIT_EXTERN_C
311 if (CPP_OPTION (pfile, cplusplus))
312 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
313 codes[change],
c31a6508
ZW
314 ip->inc->sysp ? " 3" : "",
315 (ip->inc->sysp == 2) ? " 4" : "");
f2d5f0cc
ZW
316 else
317#endif
318 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
319 codes[change],
c31a6508 320 ip->inc->sysp ? " 3" : "");
f2d5f0cc
ZW
321 print->lineno = line;
322}
323
324/* Write the contents of the token_buffer to the output stream, and
325 clear the token_buffer. Also handles generating line commands and
326 keeping track of file transitions. */
327
328void
041c3194 329cpp_output_tokens (pfile, print, line)
f2d5f0cc
ZW
330 cpp_reader *pfile;
331 cpp_printer *print;
041c3194 332 unsigned int line;
f2d5f0cc 333{
f6fab919
ZW
334 if (CPP_WRITTEN (pfile) - print->written)
335 {
f6fab919
ZW
336 safe_fwrite (pfile, pfile->token_buffer,
337 CPP_WRITTEN (pfile) - print->written, print->outf);
041c3194
ZW
338 pfile->need_newline = 1;
339 if (print->lineno)
340 print->lineno++;
45b966db 341
041c3194 342 CPP_SET_WRITTEN (pfile, print->written);
f2d5f0cc 343 }
041c3194 344 output_line_command (pfile, print, line);
45b966db
ZW
345}
346
c56c2073 347/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
348
349void
350cpp_scan_buffer_nooutput (pfile)
351 cpp_reader *pfile;
352{
f2d5f0cc 353 unsigned int old_written = CPP_WRITTEN (pfile);
041c3194
ZW
354 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
355
f2d5f0cc
ZW
356 for (;;)
357 {
041c3194
ZW
358 /* In no-output mode, we can ignore everything but directives. */
359 const cpp_token *token = cpp_get_token (pfile);
360 if (token->type == CPP_EOF)
361 {
362 cpp_pop_buffer (pfile);
363 if (CPP_BUFFER (pfile) == stop)
364 break;
365 }
366 _cpp_skip_rest_of_line (pfile);
f2d5f0cc
ZW
367 }
368 CPP_SET_WRITTEN (pfile, old_written);
369}
370
c56c2073 371/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
372
373void
374cpp_scan_buffer (pfile, print)
375 cpp_reader *pfile;
376 cpp_printer *print;
377{
c56c2073 378 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
041c3194 379 const cpp_token *token, *prev = 0;
f2d5f0cc
ZW
380
381 for (;;)
382 {
383 token = cpp_get_token (pfile);
041c3194 384 if (token->type == CPP_EOF)
f2d5f0cc 385 {
041c3194
ZW
386 cpp_pop_buffer (pfile);
387 if (CPP_BUFFER (pfile) == stop)
f2d5f0cc 388 return;
041c3194
ZW
389 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
390 prev = 0;
391 continue;
392 }
393
394 if (token->flags & BOL)
395 {
396 cpp_output_tokens (pfile, print, pfile->token_list.line);
397 prev = 0;
f2d5f0cc 398 }
041c3194
ZW
399
400 output_token (pfile, token, prev);
401 prev = token;
f2d5f0cc
ZW
402 }
403}
404
041c3194
ZW
405/* Helper routine used by parse_include, which can't see spell_token.
406 Reinterpret the current line as an h-char-sequence (< ... >); we are
407 looking at the first token after the <. */
408const cpp_token *
409_cpp_glue_header_name (pfile)
45b966db
ZW
410 cpp_reader *pfile;
411{
041c3194
ZW
412 unsigned int written = CPP_WRITTEN (pfile);
413 const cpp_token *t;
414 cpp_token *hdr;
415 U_CHAR *buf;
416 size_t len;
417
418 for (;;)
419 {
420 t = cpp_get_token (pfile);
421 if (t->type == CPP_GREATER || t->type == CPP_EOF)
422 break;
423
424 CPP_RESERVE (pfile, TOKEN_LEN (t));
425 if (t->flags & PREV_WHITE)
426 CPP_PUTC_Q (pfile, ' ');
427 pfile->limit = spell_token (pfile, t, pfile->limit);
428 }
429
430 if (t->type == CPP_EOF)
431 cpp_error (pfile, "missing terminating > character");
45b966db 432
041c3194
ZW
433 len = CPP_WRITTEN (pfile) - written;
434 buf = xmalloc (len);
435 memcpy (buf, pfile->token_buffer + written, len);
436 CPP_SET_WRITTEN (pfile, written);
437
438 hdr = get_temp_token (pfile);
439 hdr->type = CPP_HEADER_NAME;
440 hdr->flags = 0;
bfb9dc7f
ZW
441 hdr->val.str.text = buf;
442 hdr->val.str.len = len;
041c3194 443 return hdr;
45b966db
ZW
444}
445
1368ee70
ZW
446/* Token-buffer helper functions. */
447
d1d9a6bd
NB
448/* Expand a token list's string space. It is *vital* that
449 list->tokens_used is correct, to get pointer fix-up right. */
041c3194
ZW
450void
451_cpp_expand_name_space (list, len)
1368ee70 452 cpp_toklist *list;
c5a04734
ZW
453 unsigned int len;
454{
f617b8e2 455 const U_CHAR *old_namebuf;
f617b8e2
NB
456
457 old_namebuf = list->namebuf;
c5a04734
ZW
458 list->name_cap += len;
459 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
f617b8e2
NB
460
461 /* Fix up token text pointers. */
79f50f2a 462 if (list->namebuf != old_namebuf)
f617b8e2
NB
463 {
464 unsigned int i;
465
466 for (i = 0; i < list->tokens_used; i++)
bfb9dc7f
ZW
467 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
468 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
f617b8e2 469 }
1368ee70
ZW
470}
471
041c3194
ZW
472/* If there is not enough room for LEN more characters, expand the
473 list by just enough to have room for LEN characters. */
474void
475_cpp_reserve_name_space (list, len)
476 cpp_toklist *list;
477 unsigned int len;
478{
479 unsigned int room = list->name_cap - list->name_used;
480
481 if (room < len)
482 _cpp_expand_name_space (list, len - room);
483}
484
1368ee70 485/* Expand the number of tokens in a list. */
d1d9a6bd
NB
486void
487_cpp_expand_token_space (list, count)
1368ee70 488 cpp_toklist *list;
d1d9a6bd 489 unsigned int count;
1368ee70 490{
d1d9a6bd
NB
491 unsigned int n;
492
493 list->tokens_cap += count;
494 n = list->tokens_cap;
15dad1d9 495 if (list->flags & LIST_OFFSET)
d1d9a6bd 496 list->tokens--, n++;
1368ee70 497 list->tokens = (cpp_token *)
d1d9a6bd 498 xrealloc (list->tokens, n * sizeof (cpp_token));
15dad1d9
ZW
499 if (list->flags & LIST_OFFSET)
500 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
501}
502
d1d9a6bd
NB
503/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
504 an extra token in front of the token list, as this allows the lexer
505 to always peek at the previous token without worrying about
506 underflowing the list, and some initial space. Otherwise, no
507 token- or name-space is allocated, and there is no dummy token. */
15dad1d9 508void
d1d9a6bd 509_cpp_init_toklist (list, flags)
1368ee70 510 cpp_toklist *list;
d1d9a6bd 511 int flags;
1368ee70 512{
d1d9a6bd
NB
513 if (flags == NO_DUMMY_TOKEN)
514 {
515 list->tokens_cap = 0;
041c3194 516 list->tokens = 0;
d1d9a6bd 517 list->name_cap = 0;
041c3194 518 list->namebuf = 0;
d1d9a6bd
NB
519 list->flags = 0;
520 }
521 else
522 {
523 /* Initialize token space. Put a dummy token before the start
524 that will fail matches. */
525 list->tokens_cap = 256; /* 4K's worth. */
526 list->tokens = (cpp_token *)
527 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
528 list->tokens[0].type = CPP_EOF;
529 list->tokens++;
530
531 /* Initialize name space. */
532 list->name_cap = 1024;
041c3194 533 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
d1d9a6bd
NB
534 list->flags = LIST_OFFSET;
535 }
15dad1d9 536
15dad1d9
ZW
537 _cpp_clear_toklist (list);
538}
1368ee70 539
15dad1d9
ZW
540/* Clear a token list. */
541void
542_cpp_clear_toklist (list)
543 cpp_toklist *list;
544{
c5a04734
ZW
545 list->tokens_used = 0;
546 list->name_used = 0;
041c3194
ZW
547 list->directive = 0;
548 list->paramc = 0;
549 list->params_len = 0;
15dad1d9
ZW
550 list->flags &= LIST_OFFSET; /* clear all but that one */
551}
552
553/* Free a token list. Does not free the list itself, which may be
554 embedded in a larger structure. */
555void
556_cpp_free_toklist (list)
041c3194 557 const cpp_toklist *list;
15dad1d9 558{
15dad1d9
ZW
559 if (list->flags & LIST_OFFSET)
560 free (list->tokens - 1); /* Backup over dummy token. */
561 else
562 free (list->tokens);
563 free (list->namebuf);
1368ee70
ZW
564}
565
15dad1d9
ZW
566/* Compare two tokens. */
567int
568_cpp_equiv_tokens (a, b)
569 const cpp_token *a, *b;
570{
041c3194
ZW
571 if (a->type == b->type && a->flags == b->flags)
572 switch (token_spellings[a->type].type)
573 {
574 default: /* Keep compiler happy. */
575 case SPELL_OPERATOR:
576 return 1;
577 case SPELL_CHAR:
578 case SPELL_NONE:
579 return a->val.aux == b->val.aux; /* arg_no or character. */
580 case SPELL_IDENT:
bfb9dc7f 581 return a->val.node == b->val.node;
041c3194 582 case SPELL_STRING:
bfb9dc7f
ZW
583 return (a->val.str.len == b->val.str.len
584 && !memcmp (a->val.str.text, b->val.str.text,
585 a->val.str.len));
041c3194 586 }
15dad1d9 587
041c3194 588 return 0;
15dad1d9
ZW
589}
590
591/* Compare two token lists. */
592int
593_cpp_equiv_toklists (a, b)
594 const cpp_toklist *a, *b;
595{
596 unsigned int i;
597
041c3194
ZW
598 if (a->tokens_used != b->tokens_used
599 || a->flags != b->flags
600 || a->paramc != b->paramc)
15dad1d9
ZW
601 return 0;
602
603 for (i = 0; i < a->tokens_used; i++)
604 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
605 return 0;
606 return 1;
607}
608
041c3194 609/* Utility routine:
9e62c811 610
bfb9dc7f
ZW
611 Compares, the token TOKEN to the NUL-terminated string STRING.
612 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 613
041c3194 614int
bfb9dc7f
ZW
615cpp_ideq (token, string)
616 const cpp_token *token;
041c3194
ZW
617 const char *string;
618{
bfb9dc7f 619 if (token->type != CPP_NAME)
041c3194 620 return 0;
bfb9dc7f
ZW
621
622 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
15dad1d9 623}
1368ee70 624
041c3194 625/* Lexing algorithm.
45b966db 626
041c3194
ZW
627 The original lexer in cpplib was made up of two passes: a first pass
628 that replaced trigraphs and deleted esacped newlines, and a second
629 pass that tokenized the result of the first pass. Tokenisation was
630 performed by peeking at the next character in the input stream. For
631 example, if the input stream contained "!=", the handler for the !
632 character would peek at the next character, and if it were a '='
633 would skip over it, and return a "!=" token, otherwise it would
634 return just the "!" token.
61474454 635
041c3194
ZW
636 To implement a single-pass lexer, this peeking ahead is unworkable.
637 An arbitrary number of escaped newlines, and trigraphs (in particular
638 ??/ which translates to the escape \), could separate the '!' and '='
639 in the input stream, yet the next token is still a "!=".
61474454 640
041c3194
ZW
641 Suppose instead that we lex by one logical line at a time, producing
642 a token list or stack for each logical line, and when seeing the '!'
643 push a CPP_NOT token on the list. Then if the '!' is part of a
644 longer token ("!=") we know we must see the remainder of the token by
645 the time we reach the end of the logical line. Thus we can have the
646 '=' handler look at the previous token (at the end of the list / top
647 of the stack) and see if it is a "!" token, and if so, instead of
648 pushing a "=" token revise the existing token to be a "!=" token.
61474454 649
041c3194
ZW
650 This works in the presence of escaped newlines, because the '\' would
651 have been pushed on the top of the stack as a CPP_BACKSLASH. The
652 newline ('\n' or '\r') handler looks at the token at the top of the
653 stack to see if it is a CPP_BACKSLASH, and if so discards both.
654 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
655 the '=' handler would never see any intervening escaped newlines.
45b966db 656
041c3194
ZW
657 To make trigraphs work in this context, as in precedence trigraphs
658 are highest and converted before anything else, the '?' handler does
659 lookahead to see if it is a trigraph, and if so skips the trigraph
660 and pushes the token it represents onto the top of the stack. This
661 also works in the particular case of a CPP_BACKSLASH trigraph.
45b966db 662
041c3194
ZW
663 To the preprocessor, whitespace is only significant to the point of
664 knowing whether whitespace precedes a particular token. For example,
665 the '=' handler needs to know whether there was whitespace between it
666 and a "!" token on the top of the stack, to make the token conversion
667 decision correctly. So each token has a PREV_WHITE flag to
668 indicate this - the standard permits consecutive whitespace to be
669 regarded as a single space. The compiler front ends are not
670 interested in whitespace at all; they just require a token stream.
671 Another place where whitespace is significant to the preprocessor is
672 a #define statment - if there is whitespace between the macro name
673 and an initial "(" token the macro is "object-like", otherwise it is
674 a function-like macro that takes arguments.
675
676 However, all is not rosy. Parsing of identifiers, numbers, comments
677 and strings becomes trickier because of the possibility of raw
678 trigraphs and escaped newlines in the input stream.
679
680 The trigraphs are three consecutive characters beginning with two
681 question marks. A question mark is not valid as part of a number or
682 identifier, so parsing of a number or identifier terminates normally
683 upon reaching it, returning to the mainloop which handles the
684 trigraph just like it would in any other position. Similarly for the
685 backslash of a backslash-newline combination. So we just need the
686 escaped-newline dropper in the mainloop to check if the token on the
687 top of the stack after dropping the escaped newline is a number or
688 identifier, and if so to continue the processing it as if nothing had
689 happened.
690
691 For strings, we replace trigraphs whenever we reach a quote or
692 newline, because there might be a backslash trigraph escaping them.
693 We need to be careful that we start trigraph replacing from where we
694 left off previously, because it is possible for a first scan to leave
695 "fake" trigraphs that a second scan would pick up as real (e.g. the
696 sequence "????/\n=" would find a fake ??= trigraph after removing the
697 escaped newline.)
698
699 For line comments, on reaching a newline we scan the previous
700 character(s) to see if it escaped, and continue if it is. Block
701 comments ignore everything and just focus on finding the comment
702 termination mark. The only difficult thing, and it is surprisingly
703 tricky, is checking if an asterisk precedes the final slash since
704 they could be separated by escaped newlines. If the preprocessor is
705 invoked with the output comments option, we don't bother removing
706 escaped newlines and replacing trigraphs for output.
707
708 Finally, numbers can begin with a period, which is pushed initially
709 as a CPP_DOT token in its own right. The digit handler checks if the
710 previous token was a CPP_DOT not separated by whitespace, and if so
711 pops it off the stack and pushes a period into the number's buffer
712 before calling the number parser.
45b966db 713
041c3194
ZW
714*/
715
716static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
717 U":>", U"<%", U"%>"};
45b966db 718
041c3194
ZW
719/* Call when a trigraph is encountered. It warns if necessary, and
720 returns true if the trigraph should be honoured. END is the third
721 character of a trigraph in the input stream. */
45b966db 722static int
041c3194 723trigraph_ok (pfile, end)
45b966db 724 cpp_reader *pfile;
041c3194 725 const unsigned char *end;
45b966db 726{
041c3194
ZW
727 int accept = CPP_OPTION (pfile, trigraphs);
728
729 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db 730 {
041c3194
ZW
731 unsigned int col = end - 1 - pfile->buffer->line_base;
732 if (accept)
733 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
734 "trigraph ??%c converted to %c",
735 (int) *end, (int) trigraph_map[*end]);
45b966db 736 else
041c3194
ZW
737 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
738 "trigraph ??%c ignored", (int) *end);
45b966db 739 }
041c3194 740 return accept;
45b966db
ZW
741}
742
041c3194
ZW
743/* Scan a string for trigraphs, warning or replacing them inline as
744 appropriate. When parsing a string, we must call this routine
745 before processing a newline character (if trigraphs are enabled),
746 since the newline might be escaped by a preceding backslash
747 trigraph sequence. Returns a pointer to the end of the name after
748 replacement. */
749
750static unsigned char *
751trigraph_replace (pfile, src, limit)
45b966db 752 cpp_reader *pfile;
041c3194
ZW
753 unsigned char *src;
754 unsigned char *limit;
45b966db 755{
041c3194
ZW
756 unsigned char *dest;
757
758 /* Starting with src[1], find two consecutive '?'. The case of no
759 trigraphs is streamlined. */
760
043afb2a 761 for (src++; src + 1 < limit; src += 2)
041c3194
ZW
762 {
763 if (src[0] != '?')
764 continue;
765
766 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
767 if (src[-1] == '?')
768 src--;
769 else if (src + 2 == limit || src[1] != '?')
770 continue;
45b966db 771
041c3194
ZW
772 /* Check if it really is a trigraph. */
773 if (trigraph_map[src[2]] == 0)
774 continue;
45b966db 775
041c3194
ZW
776 dest = src;
777 goto trigraph_found;
778 }
779 return limit;
45b966db 780
041c3194
ZW
781 /* Now we have a trigraph, we need to scan the remaining buffer, and
782 copy-shifting its contents left if replacement is enabled. */
783 for (; src + 2 < limit; dest++, src++)
784 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
785 {
786 trigraph_found:
787 src += 2;
788 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
789 *dest = trigraph_map[*src];
790 }
791
792 /* Copy remaining (at most 2) characters. */
793 while (src < limit)
794 *dest++ = *src++;
795 return dest;
45b966db
ZW
796}
797
041c3194
ZW
798/* If CUR is a backslash or the end of a trigraphed backslash, return
799 a pointer to its beginning, otherwise NULL. We don't read beyond
800 the buffer start, because there is the start of the comment in the
801 buffer. */
802static const unsigned char *
803backslash_start (pfile, cur)
64aaf407 804 cpp_reader *pfile;
041c3194 805 const unsigned char *cur;
64aaf407 806{
041c3194
ZW
807 if (cur[0] == '\\')
808 return cur;
809 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
810 && trigraph_ok (pfile, cur))
811 return cur - 2;
812 return 0;
64aaf407
NB
813}
814
041c3194
ZW
815/* Skip a C-style block comment. This is probably the trickiest
816 handler. We find the end of the comment by seeing if an asterisk
817 is before every '/' we encounter. The nasty complication is that a
818 previous asterisk may be separated by one or more escaped newlines.
819 Returns non-zero if comment terminated by EOF, zero otherwise. */
820static int
821skip_block_comment (pfile)
45b966db
ZW
822 cpp_reader *pfile;
823{
041c3194
ZW
824 cpp_buffer *buffer = pfile->buffer;
825 const unsigned char *char_after_star = 0;
826 register const unsigned char *cur = buffer->cur;
827 int seen_eof = 0;
828
829 /* Inner loop would think the comment has ended if the first comment
830 character is a '/'. Avoid this and keep the inner loop clean by
831 skipping such a character. */
832 if (cur < buffer->rlimit && cur[0] == '/')
833 cur++;
64aaf407 834
041c3194 835 for (; cur < buffer->rlimit; )
45b966db 836 {
041c3194
ZW
837 unsigned char c = *cur++;
838
839 /* People like decorating comments with '*', so check for
840 '/' instead for efficiency. */
841 if (c == '/')
45b966db 842 {
041c3194
ZW
843 if (cur[-2] == '*' || cur - 1 == char_after_star)
844 goto out;
845
846 /* Warn about potential nested comments, but not when
847 the final character inside the comment is a '/'.
848 Don't bother to get it right across escaped newlines. */
849 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
850 && cur[0] == '*' && cur[1] != '/')
45b966db 851 {
041c3194
ZW
852 buffer->cur = cur;
853 cpp_warning (pfile, "'/*' within comment");
45b966db 854 }
45b966db 855 }
91fcd158 856 else if (is_vspace (c))
45b966db 857 {
041c3194
ZW
858 const unsigned char* bslash = backslash_start (pfile, cur - 2);
859
860 handle_newline (cur, buffer->rlimit, c);
861 /* Work correctly if there is an asterisk before an
862 arbirtrarily long sequence of escaped newlines. */
863 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
864 char_after_star = cur;
865 else
866 char_after_star = 0;
45b966db 867 }
45b966db 868 }
041c3194
ZW
869 seen_eof = 1;
870
64aaf407 871 out:
041c3194
ZW
872 buffer->cur = cur;
873 return seen_eof;
45b966db
ZW
874}
875
041c3194
ZW
876/* Skip a C++ or Chill line comment. Handles escaped newlines.
877 Returns non-zero if a multiline comment. */
878static int
879skip_line_comment (pfile)
45b966db
ZW
880 cpp_reader *pfile;
881{
041c3194
ZW
882 cpp_buffer *buffer = pfile->buffer;
883 register const unsigned char *cur = buffer->cur;
884 int multiline = 0;
885
886 for (; cur < buffer->rlimit; )
887 {
888 unsigned char c = *cur++;
889
91fcd158 890 if (is_vspace (c))
45b966db 891 {
041c3194
ZW
892 /* Check for a (trigaph?) backslash escaping the newline. */
893 if (!backslash_start (pfile, cur - 2))
894 goto out;
895 multiline = 1;
896 handle_newline (cur, buffer->rlimit, c);
897 }
898 }
899 cur++;
45b966db 900
041c3194
ZW
901 out:
902 buffer->cur = cur - 1; /* Leave newline for caller. */
903 return multiline;
904}
45b966db 905
041c3194
ZW
906/* Skips whitespace, stopping at next non-whitespace character.
907 Adjusts pfile->col_adjust to account for tabs. This enables tokens
908 to be assigned the correct column. */
909static void
910skip_whitespace (pfile, in_directive)
911 cpp_reader *pfile;
912 int in_directive;
913{
914 cpp_buffer *buffer = pfile->buffer;
91fcd158 915 unsigned short warned = 0;
45b966db 916
91fcd158
NB
917 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
918 while (buffer->cur < buffer->rlimit)
041c3194 919 {
91fcd158 920 unsigned char c = *buffer->cur;
45b966db 921
91fcd158
NB
922 if (!is_nvspace (c))
923 break;
924
925 buffer->cur++;
926 /* Horizontal space always OK. */
927 if (c == ' ')
928 continue;
929 else if (c == '\t')
930 pfile->col_adjust += CPP_OPTION (pfile, tabstop) - 1
931 - (CPP_BUF_COL (buffer) - 1) % CPP_OPTION(pfile, tabstop);
932 /* Must be \f \v or \0. */
933 else if (c == '\0')
041c3194 934 {
91fcd158
NB
935 if (!warned)
936 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
937 CPP_BUF_COL (buffer),
938 "embedded null character ignored");
939 warned = 1;
45b966db 940 }
041c3194 941 else if (in_directive && CPP_PEDANTIC (pfile))
91fcd158
NB
942 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
943 CPP_BUF_COL (buffer),
944 "%s in preprocessing directive",
945 c == '\f' ? "form feed" : "vertical tab");
45b966db 946 }
041c3194 947}
45b966db 948
041c3194 949/* Parse (append) an identifier. */
bfb9dc7f
ZW
950static inline const U_CHAR *
951parse_name (pfile, tok, cur, rlimit)
45b966db 952 cpp_reader *pfile;
bfb9dc7f
ZW
953 cpp_token *tok;
954 const U_CHAR *cur, *rlimit;
45b966db 955{
bfb9dc7f
ZW
956 const U_CHAR *name = cur;
957 unsigned int len;
041c3194 958
bfb9dc7f 959 while (cur < rlimit)
041c3194 960 {
bfb9dc7f
ZW
961 if (! is_idchar (*cur))
962 break;
e5ec2402
ZW
963 /* $ is not a legal identifier character in the standard, but is
964 commonly accepted as an extension. Don't warn about it in
965 skipped conditional blocks. */
bfb9dc7f 966 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
041c3194 967 {
bfb9dc7f 968 CPP_BUFFER (pfile)->cur = cur;
041c3194
ZW
969 cpp_pedwarn (pfile, "'$' character in identifier");
970 }
bfb9dc7f 971 cur++;
041c3194 972 }
bfb9dc7f 973 len = cur - name;
45b966db 974
bfb9dc7f 975 if (tok->val.node)
041c3194 976 {
bfb9dc7f
ZW
977 unsigned int oldlen = tok->val.node->length;
978 U_CHAR *newname = alloca (oldlen + len);
979 memcpy (newname, tok->val.node->name, oldlen);
980 memcpy (newname + oldlen, name, len);
981 len += oldlen;
982 name = newname;
041c3194
ZW
983 }
984
bfb9dc7f
ZW
985 tok->val.node = cpp_lookup (pfile, name, len);
986 return cur;
45b966db
ZW
987}
988
041c3194 989/* Parse (append) a number. */
45b966db 990static void
041c3194 991parse_number (pfile, list, name)
45b966db 992 cpp_reader *pfile;
041c3194 993 cpp_toklist *list;
bfb9dc7f 994 cpp_string *name;
45b966db 995{
041c3194
ZW
996 const unsigned char *name_limit;
997 unsigned char *namebuf;
998 cpp_buffer *buffer = pfile->buffer;
999 register const unsigned char *cur = buffer->cur;
45b966db 1000
041c3194
ZW
1001 expanded:
1002 name_limit = list->namebuf + list->name_cap;
1003 namebuf = list->namebuf + list->name_used;
45b966db 1004
041c3194
ZW
1005 for (; cur < buffer->rlimit && namebuf < name_limit; )
1006 {
1007 unsigned char c = *namebuf = *cur; /* Copy a single char. */
45b966db 1008
041c3194
ZW
1009 /* Perhaps we should accept '$' here if we accept it for
1010 identifiers. We know namebuf[-1] is safe, because for c to
1011 be a sign we must have pushed at least one character. */
1012 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1013 goto out;
45b966db 1014
041c3194
ZW
1015 namebuf++;
1016 cur++;
45b966db 1017 }
64aaf407 1018
041c3194
ZW
1019 /* Run out of name space? */
1020 if (cur < buffer->rlimit)
1021 {
1022 list->name_used = namebuf - list->namebuf;
1023 auto_expand_name_space (list);
1024 goto expanded;
1025 }
1026
64aaf407 1027 out:
041c3194
ZW
1028 buffer->cur = cur;
1029 name->len = namebuf - name->text;
1030 list->name_used = namebuf - list->namebuf;
45b966db
ZW
1031}
1032
041c3194 1033/* Places a string terminated by an unescaped TERMINATOR into a
bfb9dc7f 1034 cpp_string, which should be expandable and thus at the top of the
041c3194
ZW
1035 list's stack. Handles embedded trigraphs, if necessary, and
1036 escaped newlines.
45b966db 1037
041c3194
ZW
1038 Can be used for character constants (terminator = '\''), string
1039 constants ('"') and angled headers ('>'). Multi-line strings are
1040 allowed, except for within directives. */
45b966db 1041
041c3194
ZW
1042static void
1043parse_string (pfile, list, token, terminator)
45b966db 1044 cpp_reader *pfile;
041c3194
ZW
1045 cpp_toklist *list;
1046 cpp_token *token;
1047 unsigned int terminator;
45b966db 1048{
041c3194 1049 cpp_buffer *buffer = pfile->buffer;
bfb9dc7f 1050 cpp_string *name = &token->val.str;
041c3194
ZW
1051 register const unsigned char *cur = buffer->cur;
1052 const unsigned char *name_limit;
1053 unsigned char *namebuf;
1054 unsigned int null_count = 0;
1055 unsigned int trigraphed = list->name_used;
45b966db 1056
041c3194
ZW
1057 expanded:
1058 name_limit = list->namebuf + list->name_cap;
1059 namebuf = list->namebuf + list->name_used;
f2d5f0cc 1060
041c3194 1061 for (; cur < buffer->rlimit && namebuf < name_limit; )
45b966db 1062 {
041c3194 1063 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
45b966db 1064
041c3194
ZW
1065 if (c == '\0')
1066 null_count++;
91fcd158 1067 else if (c == terminator || is_vspace (c))
45b966db 1068 {
041c3194
ZW
1069 /* Needed for trigraph_replace and multiline string warning. */
1070 buffer->cur = cur;
5eec0563 1071
041c3194
ZW
1072 /* Scan for trigraphs before checking if backslash-escaped. */
1073 if ((CPP_OPTION (pfile, trigraphs)
1074 || CPP_OPTION (pfile, warn_trigraphs))
1075 && namebuf - (list->namebuf + trigraphed) >= 3)
45b966db 1076 {
041c3194
ZW
1077 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1078 namebuf);
1079 /* The test above guarantees trigraphed will be positive. */
1080 trigraphed = namebuf - list->namebuf - 2;
45b966db 1081 }
45b966db 1082
041c3194 1083 namebuf--; /* Drop the newline / terminator from the name. */
91fcd158 1084 if (is_vspace (c))
45b966db 1085 {
041c3194
ZW
1086 /* Drop a backslash newline, and continue. */
1087 if (namebuf[-1] == '\\')
45b966db 1088 {
041c3194
ZW
1089 handle_newline (cur, buffer->rlimit, c);
1090 namebuf--;
1091 continue;
45b966db 1092 }
45b966db 1093
041c3194 1094 cur--;
45b966db 1095
041c3194
ZW
1096 /* In Fortran and assembly language, silently terminate
1097 strings of either variety at end of line. This is a
1098 kludge around not knowing where comments are in these
1099 languages. */
1100 if (CPP_OPTION (pfile, lang_fortran)
1101 || CPP_OPTION (pfile, lang_asm))
1102 goto out;
64aaf407 1103
041c3194
ZW
1104 /* Character constants, headers and asserts may not
1105 extend over multiple lines. In Standard C, neither
1106 may strings. We accept multiline strings as an
1107 extension. (Even in directives - otherwise, glibc's
1108 longlong.h breaks.) */
1109 if (terminator != '"')
1110 goto unterminated;
1111
1112 cur++; /* Move forwards again. */
45b966db 1113
041c3194
ZW
1114 if (pfile->multiline_string_line == 0)
1115 {
1116 pfile->multiline_string_line = token->line;
1117 pfile->multiline_string_column = token->col;
1118 if (CPP_PEDANTIC (pfile))
1119 cpp_pedwarn (pfile, "multi-line string constant");
1120 }
1121
1122 *namebuf++ = '\n';
1123 handle_newline (cur, buffer->rlimit, c);
45b966db
ZW
1124 }
1125 else
1126 {
041c3194 1127 unsigned char *temp;
45b966db 1128
041c3194
ZW
1129 /* An odd number of consecutive backslashes represents
1130 an escaped terminator. */
1131 temp = namebuf - 1;
1132 while (temp >= name->text && *temp == '\\')
1133 temp--;
45b966db 1134
041c3194
ZW
1135 if ((namebuf - temp) & 1)
1136 goto out;
1137 namebuf++;
1138 }
45b966db 1139 }
ff2b53ef 1140 }
45b966db 1141
041c3194
ZW
1142 /* Run out of name space? */
1143 if (cur < buffer->rlimit)
45b966db 1144 {
041c3194
ZW
1145 list->name_used = namebuf - list->namebuf;
1146 auto_expand_name_space (list);
1147 goto expanded;
1148 }
45b966db 1149
041c3194
ZW
1150 /* We may not have trigraph-replaced the input for this code path,
1151 but as the input is in error by being unterminated we don't
1152 bother. Prevent warnings about no newlines at EOF. */
91fcd158 1153 if (is_vspace (cur[-1]))
041c3194 1154 cur--;
45b966db 1155
041c3194
ZW
1156 unterminated:
1157 cpp_error (pfile, "missing terminating %c character", (int) terminator);
476f2869 1158
041c3194
ZW
1159 if (terminator == '\"' && pfile->multiline_string_line != list->line
1160 && pfile->multiline_string_line != 0)
1161 {
1162 cpp_error_with_line (pfile, pfile->multiline_string_line,
1163 pfile->multiline_string_column,
1164 "possible start of unterminated string literal");
1165 pfile->multiline_string_line = 0;
45b966db 1166 }
041c3194
ZW
1167
1168 out:
1169 buffer->cur = cur;
1170 name->len = namebuf - name->text;
1171 list->name_used = namebuf - list->namebuf;
45b966db 1172
041c3194
ZW
1173 if (null_count > 0)
1174 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1175 : "null character preserved"));
45b966db
ZW
1176}
1177
041c3194
ZW
1178/* The character TYPE helps us distinguish comment types: '*' = C
1179 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
1180 the stored comment includes the comment start and any terminator. */
1181
1182#define COMMENT_START_LEN 2
9e62c811 1183static void
041c3194
ZW
1184save_comment (list, token, from, len, type)
1185 cpp_toklist *list;
1186 cpp_token *token;
1187 const unsigned char *from;
9e62c811 1188 unsigned int len;
041c3194 1189 unsigned int type;
9e62c811 1190{
041c3194
ZW
1191 unsigned char *buffer;
1192
1193 len += COMMENT_START_LEN;
9e62c811 1194
041c3194
ZW
1195 if (list->name_used + len > list->name_cap)
1196 _cpp_expand_name_space (list, len);
9e62c811 1197
bfb9dc7f 1198 INIT_TOKEN_STR (list, token);
041c3194 1199 token->type = CPP_COMMENT;
bfb9dc7f 1200 token->val.str.len = len;
45b966db 1201
041c3194
ZW
1202 buffer = list->namebuf + list->name_used;
1203 list->name_used += len;
45b966db 1204
041c3194
ZW
1205 /* Copy the comment. */
1206 if (type == '*')
45b966db 1207 {
041c3194
ZW
1208 *buffer++ = '/';
1209 *buffer++ = '*';
45b966db 1210 }
041c3194 1211 else
ea4a453b 1212 {
041c3194
ZW
1213 *buffer++ = type;
1214 *buffer++ = type;
ea4a453b 1215 }
041c3194 1216 memcpy (buffer, from, len - COMMENT_START_LEN);
45b966db
ZW
1217}
1218
041c3194
ZW
1219/*
1220 * The tokenizer's main loop. Returns a token list, representing a
1221 * logical line in the input file. On EOF after some tokens have
1222 * been processed, we return immediately. Then in next call, or if
1223 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1224 * token is placed in the list.
1225 *
1226 * Implementation relies almost entirely on lookback, rather than
1227 * looking forwards. This means that tokenization requires just
1228 * a single pass of the file, even in the presence of trigraphs and
1229 * escaped newlines, providing significant performance benefits.
1230 * Trigraph overhead is negligible if they are disabled, and low
1231 * even when enabled.
1232 */
1233
91fcd158 1234#define KNOWN_DIRECTIVE() (list->directive != 0)
041c3194
ZW
1235#define MIGHT_BE_DIRECTIVE() \
1236(cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
45b966db 1237
041c3194
ZW
1238static void
1239lex_line (pfile, list)
45b966db 1240 cpp_reader *pfile;
041c3194 1241 cpp_toklist *list;
45b966db 1242{
041c3194
ZW
1243 cpp_token *cur_token, *token_limit, *first;
1244 cpp_buffer *buffer = pfile->buffer;
1245 const unsigned char *cur = buffer->cur;
1246 unsigned char flags = 0;
1247 unsigned int first_token = list->tokens_used;
45b966db 1248
041c3194
ZW
1249 if (!(list->flags & LIST_OFFSET))
1250 (abort) ();
1251
1252 list->file = buffer->nominal_fname;
1253 list->line = CPP_BUF_LINE (buffer);
1254 pfile->col_adjust = 0;
1255 pfile->in_lex_line = 1;
1256 if (cur == buffer->buf)
1257 list->flags |= BEG_OF_FILE;
1258
1259 expanded:
1260 token_limit = list->tokens + list->tokens_cap;
1261 cur_token = list->tokens + list->tokens_used;
45b966db 1262
041c3194 1263 for (; cur < buffer->rlimit && cur_token < token_limit;)
45b966db 1264 {
041c3194 1265 unsigned char c;
45b966db 1266
91fcd158
NB
1267 /* Optimize non-vertical whitespace skipping; most tokens are
1268 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1269 c = *cur;
1270 if (is_nvspace (c))
45b966db 1271 {
91fcd158
NB
1272 buffer->cur = cur;
1273 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1274 && cur_token > &list->tokens[first_token]));
041c3194 1275 cur = buffer->cur;
ff2b53ef 1276
041c3194
ZW
1277 flags = PREV_WHITE;
1278 if (cur == buffer->rlimit)
1279 break;
91fcd158 1280 c = *cur;
45b966db 1281 }
91fcd158 1282 cur++;
45b966db 1283
041c3194
ZW
1284 /* Initialize current token. CPP_EOF will not be fixed up by
1285 expand_name_space. */
1286 list->tokens_used = cur_token - list->tokens + 1;
1287 cur_token->type = CPP_EOF;
1288 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1289 cur_token->line = CPP_BUF_LINE (buffer);
1290 cur_token->flags = flags;
1291 flags = 0;
46d07497 1292
041c3194
ZW
1293 switch (c)
1294 {
1295 case '0': case '1': case '2': case '3': case '4':
1296 case '5': case '6': case '7': case '8': case '9':
1297 {
1298 int prev_dot;
46d07497 1299
041c3194
ZW
1300 cur--; /* Backup character. */
1301 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1302 if (prev_dot)
1303 cur_token--;
bfb9dc7f 1304 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1305 /* Prepend an immediately previous CPP_DOT token. */
1306 if (prev_dot)
1307 {
1308 if (list->name_cap == list->name_used)
1309 auto_expand_name_space (list);
46d07497 1310
bfb9dc7f 1311 cur_token->val.str.len = 1;
041c3194
ZW
1312 list->namebuf[list->name_used++] = '.';
1313 }
46d07497 1314
041c3194
ZW
1315 continue_number:
1316 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1317 buffer->cur = cur;
bfb9dc7f 1318 parse_number (pfile, list, &cur_token->val.str);
041c3194
ZW
1319 cur = buffer->cur;
1320 }
1321 /* Check for # 123 form of #line. */
1322 if (MIGHT_BE_DIRECTIVE ())
1323 list->directive = _cpp_check_linemarker (pfile, cur_token,
1324 !(cur_token[-1].flags
1325 & PREV_WHITE));
1326 cur_token++;
1327 break;
46d07497 1328
041c3194
ZW
1329 letter:
1330 case '_':
1331 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1332 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1333 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1334 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1335 case 'y': case 'z':
1336 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1337 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1338 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1339 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1340 case 'Y': case 'Z':
1341 cur--; /* Backup character. */
bfb9dc7f 1342 cur_token->val.node = 0;
041c3194
ZW
1343 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1344
1345 continue_name:
bfb9dc7f 1346 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
45b966db 1347
041c3194
ZW
1348 if (MIGHT_BE_DIRECTIVE ())
1349 list->directive = _cpp_check_directive (pfile, cur_token,
1350 !(list->tokens[0].flags
1351 & PREV_WHITE));
1352 cur_token++;
1353 break;
f8f769ea 1354
041c3194
ZW
1355 case '\'':
1356 /* Character constants are not recognized when processing Fortran,
1357 or if -traditional. */
1358 if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1359 goto other;
45b966db 1360
041c3194
ZW
1361 /* Fall through. */
1362 case '\"':
1363 /* Traditionally, escaped strings are not strings. */
1364 if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1365 && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1366 goto other;
04e3ec78 1367
041c3194
ZW
1368 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1369 /* Do we have a wide string? */
1370 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
bfb9dc7f 1371 && cur_token[-1].val.node == pfile->spec_nodes->n_L
041c3194 1372 && !CPP_TRADITIONAL (pfile))
04e3ec78 1373 {
041c3194 1374 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
04e3ec78 1375 }
45b966db 1376
041c3194
ZW
1377 do_parse_string:
1378 /* Here c is one of ' " or >. */
bfb9dc7f 1379 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1380 buffer->cur = cur;
1381 parse_string (pfile, list, cur_token, c);
1382 cur = buffer->cur;
1383 cur_token++;
1384 break;
45b966db 1385
041c3194
ZW
1386 case '/':
1387 cur_token->type = CPP_DIV;
1388 if (IMMED_TOKEN ())
1389 {
1390 if (PREV_TOKEN_TYPE == CPP_DIV)
1391 {
1392 /* We silently allow C++ comments in system headers,
1393 irrespective of conformance mode, because lots of
1394 broken systems do that and trying to clean it up
1395 in fixincludes is a nightmare. */
1396 if (CPP_IN_SYSTEM_HEADER (pfile))
1397 goto do_line_comment;
1398 else if (CPP_OPTION (pfile, cplusplus_comments))
1399 {
1400 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1401 && ! buffer->warned_cplusplus_comments)
1402 {
1403 buffer->cur = cur;
1404 cpp_pedwarn (pfile,
1405 "C++ style comments are not allowed in ISO C89");
1406 cpp_pedwarn (pfile,
1407 "(this will be reported only once per input file)");
1408 buffer->warned_cplusplus_comments = 1;
1409 }
1410 do_line_comment:
1411 buffer->cur = cur;
1412#if 0 /* Leave until new lexer in place. */
1413 if (cur[-2] != c)
1414 cpp_warning (pfile,
1415 "comment start split across lines");
1416#endif
1417 if (skip_line_comment (pfile))
1418 cpp_warning (pfile, "multi-line comment");
f8f769ea 1419
041c3194
ZW
1420 /* Back-up to first '-' or '/'. */
1421 cur_token--;
1422 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1423 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1424 || (list->directive->flags & COMMENTS)))
1425 save_comment (list, cur_token++, cur,
1426 buffer->cur - cur, c);
1427 else if (!CPP_OPTION (pfile, traditional))
1428 flags = PREV_WHITE;
1429
1430 cur = buffer->cur;
1431 break;
1432 }
1433 }
1434 }
1435 cur_token++;
1436 break;
1437
1438 case '*':
1439 cur_token->type = CPP_MULT;
1440 if (IMMED_TOKEN ())
45b966db 1441 {
041c3194
ZW
1442 if (PREV_TOKEN_TYPE == CPP_DIV)
1443 {
1444 buffer->cur = cur;
1445#if 0 /* Leave until new lexer in place. */
1446 if (cur[-2] != '/')
1447 cpp_warning (pfile,
1448 "comment start '/*' split across lines");
1449#endif
1450 if (skip_block_comment (pfile))
1451 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1452 "unterminated comment");
1453#if 0 /* Leave until new lexer in place. */
1454 else if (buffer->cur[-2] != '*')
1455 cpp_warning (pfile,
1456 "comment end '*/' split across lines");
1457#endif
1458 /* Back up to opening '/'. */
1459 cur_token--;
1460 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1461 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1462 || (list->directive->flags & COMMENTS)))
1463 save_comment (list, cur_token++, cur,
1464 buffer->cur - cur, c);
1465 else if (!CPP_OPTION (pfile, traditional))
1466 flags = PREV_WHITE;
1467
1468 cur = buffer->cur;
1469 break;
1470 }
1471 else if (CPP_OPTION (pfile, cplusplus))
1472 {
1473 /* In C++, there are .* and ->* operators. */
1474 if (PREV_TOKEN_TYPE == CPP_DEREF)
1475 BACKUP_TOKEN (CPP_DEREF_STAR);
1476 else if (PREV_TOKEN_TYPE == CPP_DOT)
1477 BACKUP_TOKEN (CPP_DOT_STAR);
1478 }
45b966db 1479 }
041c3194 1480 cur_token++;
f8f769ea 1481 break;
45b966db 1482
041c3194
ZW
1483 case '\n':
1484 case '\r':
1485 handle_newline (cur, buffer->rlimit, c);
1486 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
45b966db 1487 {
041c3194 1488 if (IMMED_TOKEN ())
45b966db 1489 {
041c3194
ZW
1490 /* Remove the escaped newline. Then continue to process
1491 any interrupted name or number. */
1492 cur_token--;
1493 /* Backslash-newline may not be immediately followed by
1494 EOF (C99 5.1.1.2). */
1495 if (cur >= buffer->rlimit)
1496 {
1497 cpp_pedwarn (pfile, "backslash-newline at end of file");
1498 break;
1499 }
1500 if (IMMED_TOKEN ())
1501 {
1502 cur_token--;
1503 if (cur_token->type == CPP_NAME)
1504 goto continue_name;
1505 else if (cur_token->type == CPP_NUMBER)
1506 goto continue_number;
1507 cur_token++;
1508 }
1509 /* Remember whitespace setting. */
1510 flags = cur_token->flags;
f8f769ea 1511 break;
45b966db 1512 }
041c3194
ZW
1513 else
1514 {
1515 buffer->cur = cur;
1516 cpp_warning (pfile,
1517 "backslash and newline separated by space");
1518 }
1519 }
1520 else if (MIGHT_BE_DIRECTIVE ())
1521 {
1522 /* "Null directive." C99 6.10.7: A preprocessing
1523 directive of the form # <new-line> has no effect.
1524
1525 But it is still a directive, and therefore disappears
1526 from the output. */
1527 cur_token--;
1528 if (cur_token->flags & PREV_WHITE)
45b966db 1529 {
041c3194
ZW
1530 if (CPP_WTRADITIONAL (pfile))
1531 cpp_warning (pfile,
1532 "K+R C ignores #\\n with the # indented");
1533 if (CPP_TRADITIONAL (pfile))
1534 cur_token++;
45b966db 1535 }
f8f769ea 1536 }
45b966db 1537
041c3194
ZW
1538 /* Skip vertical space until we have at least one token to
1539 return. */
1540 if (cur_token != &list->tokens[first_token])
1541 goto out;
1542 list->line = CPP_BUF_LINE (buffer);
f8f769ea 1543 break;
04e3ec78 1544
041c3194
ZW
1545 case '-':
1546 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1547 {
1548 if (CPP_OPTION (pfile, chill))
1549 goto do_line_comment;
1550 REVISE_TOKEN (CPP_MINUS_MINUS);
1551 }
1552 else
1553 PUSH_TOKEN (CPP_MINUS);
1554 break;
45b966db 1555
041c3194
ZW
1556 make_hash:
1557 case '#':
1558 /* The digraph flag checking ensures that ## and %:%:
1559 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1560 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1561 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1562 REVISE_TOKEN (CPP_PASTE);
1563 else
1564 PUSH_TOKEN (CPP_HASH);
1565 break;
04e3ec78 1566
041c3194
ZW
1567 case ':':
1568 cur_token->type = CPP_COLON;
1569 if (IMMED_TOKEN ())
1570 {
1571 if (PREV_TOKEN_TYPE == CPP_COLON
1572 && CPP_OPTION (pfile, cplusplus))
1573 BACKUP_TOKEN (CPP_SCOPE);
9b55f29a 1574 else if (CPP_OPTION (pfile, digraphs))
041c3194 1575 {
9b55f29a
NB
1576 /* Digraph: "<:" is a '[' */
1577 if (PREV_TOKEN_TYPE == CPP_LESS)
1578 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1579 /* Digraph: "%:" is a '#' */
1580 else if (PREV_TOKEN_TYPE == CPP_MOD)
1581 {
1582 (--cur_token)->flags |= DIGRAPH;
1583 goto make_hash;
1584 }
041c3194
ZW
1585 }
1586 }
1587 cur_token++;
1588 break;
f8f769ea 1589
041c3194
ZW
1590 case '&':
1591 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1592 REVISE_TOKEN (CPP_AND_AND);
1593 else
1594 PUSH_TOKEN (CPP_AND);
f8f769ea 1595 break;
45b966db 1596
041c3194
ZW
1597 make_or:
1598 case '|':
1599 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1600 REVISE_TOKEN (CPP_OR_OR);
1601 else
1602 PUSH_TOKEN (CPP_OR);
1603 break;
45b966db 1604
041c3194
ZW
1605 case '+':
1606 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1607 REVISE_TOKEN (CPP_PLUS_PLUS);
1608 else
1609 PUSH_TOKEN (CPP_PLUS);
1610 break;
45b966db 1611
041c3194
ZW
1612 case '=':
1613 /* This relies on equidistance of "?=" and "?" tokens. */
1614 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1615 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1616 else
1617 PUSH_TOKEN (CPP_EQ);
1618 break;
45b966db 1619
041c3194
ZW
1620 case '>':
1621 cur_token->type = CPP_GREATER;
1622 if (IMMED_TOKEN ())
1623 {
1624 if (PREV_TOKEN_TYPE == CPP_GREATER)
1625 BACKUP_TOKEN (CPP_RSHIFT);
1626 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1627 BACKUP_TOKEN (CPP_DEREF);
9b55f29a
NB
1628 else if (CPP_OPTION (pfile, digraphs))
1629 {
1630 /* Digraph: ":>" is a ']' */
1631 if (PREV_TOKEN_TYPE == CPP_COLON)
1632 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1633 /* Digraph: "%>" is a '}' */
1634 else if (PREV_TOKEN_TYPE == CPP_MOD)
1635 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1636 }
041c3194
ZW
1637 }
1638 cur_token++;
1639 break;
1640
1641 case '<':
1642 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1643 {
1644 REVISE_TOKEN (CPP_LSHIFT);
1645 break;
1646 }
1647 /* Is this the beginning of a header name? */
91fcd158 1648 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
041c3194
ZW
1649 {
1650 c = '>'; /* Terminator. */
1651 cur_token->type = CPP_HEADER_NAME;
1652 goto do_parse_string;
1653 }
1654 PUSH_TOKEN (CPP_LESS);
1655 break;
45b966db 1656
041c3194
ZW
1657 case '%':
1658 /* Digraph: "<%" is a '{' */
1659 cur_token->type = CPP_MOD;
9b55f29a
NB
1660 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1661 && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1662 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1663 cur_token++;
1664 break;
04e3ec78 1665
041c3194
ZW
1666 case '?':
1667 if (cur + 1 < buffer->rlimit && *cur == '?'
1668 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1669 {
1670 /* Handle trigraph. */
1671 cur++;
1672 switch (*cur++)
1673 {
1674 case '(': goto make_open_square;
1675 case ')': goto make_close_square;
1676 case '<': goto make_open_brace;
1677 case '>': goto make_close_brace;
1678 case '=': goto make_hash;
1679 case '!': goto make_or;
1680 case '-': goto make_complement;
1681 case '/': goto make_backslash;
1682 case '\'': goto make_xor;
1683 }
1684 }
1685 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1686 {
1687 /* GNU C++ defines <? and >? operators. */
1688 if (PREV_TOKEN_TYPE == CPP_LESS)
1689 {
1690 REVISE_TOKEN (CPP_MIN);
1691 break;
1692 }
1693 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1694 {
1695 REVISE_TOKEN (CPP_MAX);
1696 break;
1697 }
1698 }
1699 PUSH_TOKEN (CPP_QUERY);
1700 break;
45b966db 1701
041c3194
ZW
1702 case '.':
1703 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1704 && IMMED_TOKEN ()
1705 && !(cur_token[-1].flags & PREV_WHITE))
1706 {
1707 cur_token -= 2;
1708 PUSH_TOKEN (CPP_ELLIPSIS);
1709 }
1710 else
1711 PUSH_TOKEN (CPP_DOT);
1712 break;
c5a04734 1713
041c3194
ZW
1714 make_complement:
1715 case '~': PUSH_TOKEN (CPP_COMPL); break;
1716 make_xor:
1717 case '^': PUSH_TOKEN (CPP_XOR); break;
1718 make_open_brace:
1719 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1720 make_close_brace:
1721 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1722 make_open_square:
1723 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1724 make_close_square:
1725 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1726 make_backslash:
1727 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1728 case '!': PUSH_TOKEN (CPP_NOT); break;
1729 case ',': PUSH_TOKEN (CPP_COMMA); break;
1730 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1731 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1732 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
6d2c2047 1733
041c3194
ZW
1734 case '$':
1735 if (CPP_OPTION (pfile, dollars_in_ident))
1736 goto letter;
1737 /* Fall through */
1738 other:
1739 default:
1740 cur_token->val.aux = c;
1741 PUSH_TOKEN (CPP_OTHER);
1742 break;
1743 }
1744 }
6d2c2047 1745
041c3194
ZW
1746 /* Run out of token space? */
1747 if (cur_token == token_limit)
1748 {
1749 list->tokens_used = cur_token - list->tokens;
1750 _cpp_expand_token_space (list, 256);
1751 goto expanded;
1752 }
6d2c2047 1753
041c3194
ZW
1754 cur_token->flags = flags;
1755 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1756 {
91fcd158 1757 if (cur > buffer->buf && !is_vspace (cur[-1]))
041c3194
ZW
1758 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1759 CPP_BUF_COLUMN (buffer, cur),
1760 "no newline at end of file");
1761 cur_token++->type = CPP_EOF;
1762 }
6d2c2047 1763
041c3194
ZW
1764 out:
1765 /* All tokens are allocated, so the memory location is fixed. */
1766 first = &list->tokens[first_token];
1767
1768 /* Don't complain about the null directive, nor directives in
1769 assembly source: we don't know where the comments are, and # may
1770 introduce assembler pseudo-ops. Don't complain about invalid
1771 directives in skipped conditional groups (6.10 p4). */
1772 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1773 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1774 {
1775 if (first[1].type == CPP_NAME)
1776 cpp_error (pfile, "invalid preprocessing directive #%.*s",
bfb9dc7f 1777 (int) first[1].val.node->length, first[1].val.node->name);
041c3194
ZW
1778 else
1779 cpp_error (pfile, "invalid preprocessing directive");
1780 }
1781
91fcd158
NB
1782 /* Put EOF at end of known directives. This covers "directives do
1783 not extend beyond the end of the line (description 6.10 part 2)". */
1784 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
041c3194
ZW
1785 {
1786 pfile->first_directive_token = first;
1787 cur_token++->type = CPP_EOF;
1788 }
1789
91fcd158
NB
1790 /* Directives, known or not, always start a new line. */
1791 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
041c3194 1792 first->flags |= BOL;
6d2c2047 1793 else
041c3194
ZW
1794 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1795 up the invocation of a function-like macro, new line is
1796 considered a normal white-space character. */
1797 first->flags |= PREV_WHITE;
1798
1799 buffer->cur = cur;
1800 list->tokens_used = cur_token - list->tokens;
1801 pfile->in_lex_line = 0;
6d2c2047
ZW
1802}
1803
041c3194
ZW
1804/* Write the spelling of a token TOKEN, with any appropriate
1805 whitespace before it, to the token_buffer. PREV is the previous
1806 token, which is used to determine if we need to shove in an extra
1807 space in order to avoid accidental token paste. */
1808static void
1809output_token (pfile, token, prev)
1810 cpp_reader *pfile;
1811 const cpp_token *token, *prev;
1812{
1813 int dummy;
c5a04734 1814
041c3194
ZW
1815 if (token->col && (token->flags & BOL))
1816 {
1817 /* Supply enough whitespace to put this token in its original
1818 column. Don't bother trying to reconstruct tabs; we can't
1819 get it right in general, and nothing ought to care. (Yes,
1820 some things do care; the fault lies with them.) */
1821 unsigned char *buffer;
1822 unsigned int spaces = token->col - 1;
1823
1824 CPP_RESERVE (pfile, token->col);
1825 buffer = pfile->limit;
1826
1827 while (spaces--)
1828 *buffer++ = ' ';
1829 pfile->limit = buffer;
1830 }
1831 else if (token->flags & PREV_WHITE)
1832 CPP_PUTC (pfile, ' ');
1833 /* Check for and prevent accidental token pasting, in ANSI mode. */
d6d5f795 1834
041c3194
ZW
1835 else if (!CPP_TRADITIONAL (pfile) && prev)
1836 {
1837 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1838 CPP_PUTC (pfile, ' ');
1839 /* can_paste catches most of the accidental paste cases, but not all.
1840 Consider a + ++b - if there is not a space between the + and ++, it
1841 will be misparsed as a++ + b. */
1842 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1843 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1844 CPP_PUTC (pfile, ' ');
1845 }
d6d5f795 1846
041c3194
ZW
1847 CPP_RESERVE (pfile, TOKEN_LEN (token));
1848 pfile->limit = spell_token (pfile, token, pfile->limit);
1849}
d6d5f795 1850
041c3194 1851/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1852 already contain the enough space to hold the token's spelling.
1853 Returns a pointer to the character after the last character
1854 written. */
d6d5f795 1855
041c3194
ZW
1856static unsigned char *
1857spell_token (pfile, token, buffer)
1858 cpp_reader *pfile; /* Would be nice to be rid of this... */
1859 const cpp_token *token;
1860 unsigned char *buffer;
1861{
1862 switch (token_spellings[token->type].type)
1863 {
1864 case SPELL_OPERATOR:
1865 {
1866 const unsigned char *spelling;
1867 unsigned char c;
d6d5f795 1868
041c3194
ZW
1869 if (token->flags & DIGRAPH)
1870 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1871 else
1872 spelling = token_spellings[token->type].spelling;
1873
1874 while ((c = *spelling++) != '\0')
1875 *buffer++ = c;
1876 }
1877 break;
d6d5f795 1878
041c3194 1879 case SPELL_IDENT:
bfb9dc7f
ZW
1880 memcpy (buffer, token->val.node->name, token->val.node->length);
1881 buffer += token->val.node->length;
041c3194 1882 break;
d6d5f795 1883
041c3194
ZW
1884 case SPELL_STRING:
1885 {
041c3194
ZW
1886 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1887 *buffer++ = 'L';
bfb9dc7f 1888
041c3194 1889 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
bfb9dc7f
ZW
1890 *buffer++ = '"';
1891 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1892 *buffer++ = '\'';
1893
1894 memcpy (buffer, token->val.str.text, token->val.str.len);
1895 buffer += token->val.str.len;
1896
1897 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1898 *buffer++ = '"';
1899 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1900 *buffer++ = '\'';
041c3194
ZW
1901 }
1902 break;
d6d5f795 1903
041c3194
ZW
1904 case SPELL_CHAR:
1905 *buffer++ = token->val.aux;
1906 break;
d6d5f795 1907
041c3194
ZW
1908 case SPELL_NONE:
1909 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1910 break;
1911 }
d6d5f795 1912
041c3194
ZW
1913 return buffer;
1914}
d6d5f795 1915
cf00a885
ZW
1916/* Return the spelling of a token known to be an operator.
1917 Does not distinguish digraphs from their counterparts. */
1918const unsigned char *
1919_cpp_spell_operator (type)
1920 enum cpp_ttype type;
1921{
1922 if (token_spellings[type].type == SPELL_OPERATOR)
1923 return token_spellings[type].spelling;
1924 else
1925 return token_names[type];
1926}
1927
1928
041c3194 1929/* Macro expansion algorithm. TODO. */
d6d5f795 1930
7de9cc38
KG
1931static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
1932static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
c5a04734 1933
041c3194
ZW
1934#define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1935#define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1936
1937/* Flags for cpp_context. */
1938#define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
1939#define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
1940#define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
1941#define CONTEXT_ARG (1 << 3) /* If an argument context. */
1942
1943#define ASSIGN_FLAGS_AND_POS(d, s) \
1944 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1945 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1946 } while (0)
1947
1948/* f is flags, just consisting of PREV_WHITE | BOL. */
1949#define MODIFY_FLAGS_AND_POS(d, s, f) \
1950 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1951 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1952 } while (0)
1953
1954typedef struct cpp_context cpp_context;
1955struct cpp_context
1956{
1957 union
1958 {
1959 const cpp_toklist *list; /* Used for macro contexts only. */
1960 const cpp_token **arg; /* Used for arg contexts only. */
1961 } u;
1962
1963 /* Pushed token to be returned by next call to cpp_get_token. */
1964 const cpp_token *pushed_token;
1965
1966 struct macro_args *args; /* 0 for arguments and object-like macros. */
1967 unsigned short posn; /* Current posn, index into u. */
1968 unsigned short count; /* No. of tokens in u. */
1969 unsigned short level;
1970 unsigned char flags;
1971};
1972
1973typedef struct macro_args macro_args;
1974struct macro_args
1975{
1976 unsigned int *ends;
1977 const cpp_token **tokens;
1978 unsigned int capacity;
1979 unsigned int used;
1980 unsigned short level;
1981};
1982
1983static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
1984static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
1985 macro_args *, unsigned int *));
1986static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
1987static void save_token PARAMS ((macro_args *, const cpp_token *));
1988static const cpp_token *push_arg_context PARAMS ((cpp_reader *,
1989 const cpp_token *));
1990static int do_pop_context PARAMS ((cpp_reader *));
1991static const cpp_token *pop_context PARAMS ((cpp_reader *));
1992static const cpp_token *push_macro_context PARAMS ((cpp_reader *,
1993 cpp_hashnode *,
1994 const cpp_token *));
1995static void free_macro_args PARAMS ((macro_args *));
1996
1997/* Free the storage allocated for macro arguments. */
1998static void
1999free_macro_args (args)
2000 macro_args *args;
c5a04734 2001{
041c3194
ZW
2002 if (args->tokens)
2003 free (args->tokens);
2004 free (args->ends);
2005 free (args);
c5a04734
ZW
2006}
2007
041c3194
ZW
2008/* Determines if a macro has been already used (and is therefore
2009 disabled). */
c5a04734 2010static int
041c3194 2011is_macro_disabled (pfile, expansion, token)
c5a04734 2012 cpp_reader *pfile;
041c3194
ZW
2013 const cpp_toklist *expansion;
2014 const cpp_token *token;
c5a04734 2015{
041c3194
ZW
2016 cpp_context *context = CURRENT_CONTEXT (pfile);
2017
cf00a885
ZW
2018 /* Don't expand anything if this file has already been preprocessed. */
2019 if (CPP_OPTION (pfile, preprocessed))
2020 return 1;
2021
041c3194
ZW
2022 /* Arguments on either side of ## are inserted in place without
2023 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2024 occurs during a later rescan pass. The effect is that we expand
2025 iff we would as part of the macro's expansion list, so we should
2026 drop to the macro's context. */
2027 if (IS_ARG_CONTEXT (context))
c5a04734 2028 {
041c3194
ZW
2029 if (token->flags & PASTED)
2030 context--;
2031 else if (!(context->flags & CONTEXT_RAW))
2032 return 1;
2033 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2034 context--;
c5a04734 2035 }
c5a04734 2036
041c3194
ZW
2037 /* Have we already used this macro? */
2038 while (context->level > 0)
c5a04734 2039 {
041c3194
ZW
2040 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2041 return 1;
2042 /* Raw argument tokens are judged based on the token list they
2043 came from. */
2044 if (context->flags & CONTEXT_RAW)
2045 context = pfile->contexts + context->level;
2046 else
2047 context--;
2048 }
c5a04734 2049
041c3194
ZW
2050 /* Function-like macros may be disabled if the '(' is not in the
2051 current context. We check this without disrupting the context
2052 stack. */
2053 if (expansion->paramc >= 0)
2054 {
2055 const cpp_token *next;
2056 unsigned int prev_nme;
c5a04734 2057
041c3194
ZW
2058 context = CURRENT_CONTEXT (pfile);
2059 /* Drop down any contexts we're at the end of: the '(' may
2060 appear in lower macro expansions, or in the rest of the file. */
2061 while (context->posn == context->count && context > pfile->contexts)
2062 {
2063 context--;
2064 /* If we matched, we are disabled, as we appear in the
2065 expansion of each macro we meet. */
2066 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2067 return 1;
2068 }
c5a04734 2069
041c3194
ZW
2070 prev_nme = pfile->no_expand_level;
2071 pfile->no_expand_level = context - pfile->contexts;
2072 next = cpp_get_token (pfile);
2073 restore_macro_expansion (pfile, prev_nme);
2074 if (next->type != CPP_OPEN_PAREN)
2075 {
2076 _cpp_push_token (pfile, next);
2077 if (CPP_OPTION (pfile, warn_traditional))
2078 cpp_warning (pfile,
2079 "function macro %.*s must be used with arguments in traditional C",
bfb9dc7f 2080 (int) token->val.node->length, token->val.node->name);
041c3194
ZW
2081 return 1;
2082 }
c5a04734 2083 }
c5a04734 2084
041c3194 2085 return 0;
c5a04734
ZW
2086}
2087
041c3194
ZW
2088/* Add a token to the set of tokens forming the arguments to the macro
2089 being parsed in parse_args. */
2090static void
2091save_token (args, token)
2092 macro_args *args;
2093 const cpp_token *token;
c5a04734 2094{
041c3194
ZW
2095 if (args->used == args->capacity)
2096 {
2097 args->capacity += args->capacity + 100;
2098 args->tokens = (const cpp_token **)
2099 xrealloc (args->tokens, args->capacity * sizeof (const cpp_token *));
2100 }
2101 args->tokens[args->used++] = token;
c5a04734
ZW
2102}
2103
041c3194
ZW
2104/* Take and save raw tokens until we finish one argument. Empty
2105 arguments are saved as a single CPP_PLACEMARKER token. */
2106static const cpp_token *
2107parse_arg (pfile, var_args, paren_context, args, pcount)
c5a04734 2108 cpp_reader *pfile;
041c3194
ZW
2109 int var_args;
2110 unsigned int paren_context;
2111 macro_args *args;
2112 unsigned int *pcount;
c5a04734 2113{
041c3194
ZW
2114 const cpp_token *token;
2115 unsigned int paren = 0, count = 0;
2116 int raw, was_raw = 1;
c5a04734 2117
041c3194 2118 for (count = 0;; count++)
c5a04734 2119 {
041c3194 2120 token = cpp_get_token (pfile);
c5a04734 2121
041c3194 2122 switch (token->type)
c5a04734 2123 {
041c3194
ZW
2124 default:
2125 break;
c5a04734 2126
041c3194
ZW
2127 case CPP_OPEN_PAREN:
2128 paren++;
2129 break;
2130
2131 case CPP_CLOSE_PAREN:
2132 if (paren-- != 0)
2133 break;
2134 goto out;
2135
2136 case CPP_COMMA:
2137 /* Commas are not terminators within parantheses or var_args. */
2138 if (paren || var_args)
2139 break;
2140 goto out;
2141
2142 case CPP_EOF: /* Error reported by caller. */
2143 goto out;
c5a04734 2144 }
c5a04734 2145
041c3194
ZW
2146 raw = pfile->cur_context <= paren_context;
2147 if (raw != was_raw)
2148 {
2149 was_raw = raw;
2150 save_token (args, 0);
2151 count++;
c5a04734 2152 }
041c3194 2153 save_token (args, token);
c5a04734 2154 }
c5a04734
ZW
2155
2156 out:
041c3194
ZW
2157 if (count == 0)
2158 {
2159 /* Duplicate the placemarker. Then we can set its flags and
2160 position and safely be using more than one. */
2161 save_token (args, duplicate_token (pfile, &placemarker_token));
2162 count++;
2163 }
2164
2165 *pcount = count;
2166 return token;
c5a04734
ZW
2167}
2168
041c3194
ZW
2169/* This macro returns true if the argument starting at offset O of arglist
2170 A is empty - that is, it's either a single PLACEMARKER token, or a null
2171 pointer followed by a PLACEMARKER. */
2172
2173#define empty_argument(A, O) \
2174 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2175 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2176
2177/* Parse the arguments making up a macro invocation. Nested arguments
2178 are automatically macro expanded, but immediate macros are not
2179 expanded; this enables e.g. operator # to work correctly. Returns
2180 non-zero on error. */
c5a04734 2181static int
041c3194 2182parse_args (pfile, hp, args)
c5a04734 2183 cpp_reader *pfile;
041c3194
ZW
2184 cpp_hashnode *hp;
2185 macro_args *args;
c5a04734 2186{
041c3194
ZW
2187 const cpp_token *token;
2188 const cpp_toklist *macro;
2189 unsigned int total = 0;
2190 unsigned int paren_context = pfile->cur_context;
2191 int argc = 0;
2192
2193 macro = hp->value.expansion;
2194 do
c5a04734 2195 {
041c3194 2196 unsigned int count;
c5a04734 2197
041c3194
ZW
2198 token = parse_arg (pfile, (argc + 1 == macro->paramc
2199 && (macro->flags & VAR_ARGS)),
2200 paren_context, args, &count);
2201 if (argc < macro->paramc)
c5a04734 2202 {
041c3194
ZW
2203 total += count;
2204 args->ends[argc] = total;
c5a04734 2205 }
041c3194 2206 argc++;
c5a04734 2207 }
041c3194 2208 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
c5a04734 2209
041c3194
ZW
2210 if (token->type == CPP_EOF)
2211 {
2212 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2213 hp->length, hp->name);
2214 return 1;
2215 }
2216 else if (argc < macro->paramc)
2217 {
2218 /* A rest argument is allowed to not appear in the invocation at all.
2219 e.g. #define debug(format, args...) ...
2220 debug("string");
2221 This is exactly the same as if the rest argument had received no
563dd08a 2222 tokens - debug("string",); This extension is deprecated. */
041c3194 2223
563dd08a 2224 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
041c3194
ZW
2225 {
2226 /* Duplicate the placemarker. Then we can set its flags and
2227 position and safely be using more than one. */
2228 save_token (args, duplicate_token (pfile, &placemarker_token));
2229 args->ends[argc] = total + 1;
2230 return 0;
2231 }
2232 else
2233 {
2234 cpp_error (pfile,
2235 "insufficient arguments in invocation of macro \"%.*s\"",
2236 hp->length, hp->name);
2237 return 1;
2238 }
2239 }
2240 /* An empty argument to an empty function-like macro is fine. */
2241 else if (argc > macro->paramc
2242 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2243 {
2244 cpp_error (pfile,
2245 "too many arguments in invocation of macro \"%.*s\"",
2246 hp->length, hp->name);
2247 return 1;
2248 }
c5a04734 2249
041c3194
ZW
2250 return 0;
2251}
2252
2253/* Adds backslashes before all backslashes and double quotes appearing
2254 in strings. Non-printable characters are converted to octal. */
2255static U_CHAR *
2256quote_string (dest, src, len)
2257 U_CHAR *dest;
2258 const U_CHAR *src;
2259 unsigned int len;
2260{
2261 while (len--)
c5a04734 2262 {
041c3194 2263 U_CHAR c = *src++;
c5a04734 2264
041c3194 2265 if (c == '\\' || c == '"')
6ab3e7dd 2266 {
041c3194
ZW
2267 *dest++ = '\\';
2268 *dest++ = c;
2269 }
2270 else
2271 {
2272 if (ISPRINT (c))
2273 *dest++ = c;
2274 else
2275 {
2276 sprintf ((char *) dest, "\\%03o", c);
2277 dest += 4;
2278 }
6ab3e7dd 2279 }
c5a04734 2280 }
c5a04734 2281
041c3194 2282 return dest;
c5a04734
ZW
2283}
2284
041c3194
ZW
2285/* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2286 CPP_STRING token containing TEXT in quoted form. */
2287static cpp_token *
2288make_string_token (token, text, len)
2289 cpp_token *token;
2290 const U_CHAR *text;
2291 unsigned int len;
2292{
2293 U_CHAR *buf;
2294
2295 buf = (U_CHAR *) xmalloc (len * 4);
2296 token->type = CPP_STRING;
2297 token->flags = 0;
bfb9dc7f
ZW
2298 token->val.str.text = buf;
2299 token->val.str.len = quote_string (buf, text, len) - buf;
041c3194
ZW
2300 return token;
2301}
2302
2303/* Allocates and converts a temporary token to a CPP_NUMBER token,
2304 evaluating to NUMBER. */
2305static cpp_token *
2306alloc_number_token (pfile, number)
c5a04734 2307 cpp_reader *pfile;
041c3194 2308 int number;
c5a04734 2309{
041c3194
ZW
2310 cpp_token *result;
2311 char *buf;
2312
2313 result = get_temp_token (pfile);
2314 buf = xmalloc (20);
2315 sprintf (buf, "%d", number);
2316
2317 result->type = CPP_NUMBER;
2318 result->flags = 0;
bfb9dc7f
ZW
2319 result->val.str.text = (U_CHAR *) buf;
2320 result->val.str.len = strlen (buf);
041c3194
ZW
2321 return result;
2322}
c5a04734 2323
041c3194
ZW
2324/* Returns a temporary token from the temporary token store of PFILE. */
2325static cpp_token *
2326get_temp_token (pfile)
2327 cpp_reader *pfile;
2328{
2329 if (pfile->temp_used == pfile->temp_alloced)
2330 {
2331 if (pfile->temp_used == pfile->temp_cap)
2332 {
2333 pfile->temp_cap += pfile->temp_cap + 20;
2334 pfile->temp_tokens = (cpp_token **) xrealloc
2335 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2336 }
2337 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2338 (sizeof (cpp_token));
2339 }
c5a04734 2340
041c3194
ZW
2341 return pfile->temp_tokens[pfile->temp_used++];
2342}
2343
2344/* Release (not free) for re-use the temporary tokens of PFILE. */
2345static void
2346release_temp_tokens (pfile)
2347 cpp_reader *pfile;
2348{
2349 while (pfile->temp_used)
c5a04734 2350 {
041c3194 2351 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
c5a04734 2352
bfb9dc7f 2353 if (token_spellings[token->type].type == SPELL_STRING)
c5a04734 2354 {
bfb9dc7f
ZW
2355 free ((char *) token->val.str.text);
2356 token->val.str.text = 0;
c5a04734
ZW
2357 }
2358 }
041c3194 2359}
c5a04734 2360
041c3194
ZW
2361/* Free all of PFILE's dynamically-allocated temporary tokens. */
2362void
2363_cpp_free_temp_tokens (pfile)
2364 cpp_reader *pfile;
2365{
2366 if (pfile->temp_tokens)
c5a04734 2367 {
041c3194
ZW
2368 /* It is possible, though unlikely (looking for '(' of a funlike
2369 macro into EOF), that we haven't released the tokens yet. */
2370 release_temp_tokens (pfile);
2371 while (pfile->temp_alloced)
2372 free (pfile->temp_tokens[--pfile->temp_alloced]);
2373 free (pfile->temp_tokens);
c5a04734
ZW
2374 }
2375
041c3194
ZW
2376 if (pfile->date)
2377 {
bfb9dc7f 2378 free ((char *) pfile->date->val.str.text);
041c3194 2379 free (pfile->date);
bfb9dc7f 2380 free ((char *) pfile->time->val.str.text);
041c3194
ZW
2381 free (pfile->time);
2382 }
c5a04734
ZW
2383}
2384
041c3194
ZW
2385/* Copy TOKEN into a temporary token from PFILE's store. */
2386static cpp_token *
2387duplicate_token (pfile, token)
2388 cpp_reader *pfile;
2389 const cpp_token *token;
2390{
2391 cpp_token *result = get_temp_token (pfile);
c5a04734 2392
041c3194 2393 *result = *token;
bfb9dc7f 2394 if (token_spellings[token->type].type == SPELL_STRING)
041c3194 2395 {
bfb9dc7f
ZW
2396 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2397 memcpy (buff, token->val.str.text, token->val.str.len);
2398 result->val.str.text = buff;
041c3194
ZW
2399 }
2400 return result;
2401}
c5a04734 2402
041c3194
ZW
2403/* Determine whether two tokens can be pasted together, and if so,
2404 what the resulting token is. Returns CPP_EOF if the tokens cannot
2405 be pasted, or the appropriate type for the merged token if they
2406 can. */
2407static enum cpp_ttype
2408can_paste (pfile, token1, token2, digraph)
2409 cpp_reader * pfile;
2410 const cpp_token *token1, *token2;
2411 int* digraph;
c5a04734 2412{
041c3194
ZW
2413 enum cpp_ttype a = token1->type, b = token2->type;
2414 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 2415
041c3194
ZW
2416 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2417 return a + (CPP_EQ_EQ - CPP_EQ);
c5a04734 2418
041c3194 2419 switch (a)
c5a04734 2420 {
041c3194
ZW
2421 case CPP_GREATER:
2422 if (b == a) return CPP_RSHIFT;
2423 if (b == CPP_QUERY && cxx) return CPP_MAX;
2424 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2425 break;
2426 case CPP_LESS:
2427 if (b == a) return CPP_LSHIFT;
2428 if (b == CPP_QUERY && cxx) return CPP_MIN;
2429 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
2430 if (CPP_OPTION (pfile, digraphs))
2431 {
2432 if (b == CPP_COLON)
2433 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2434 if (b == CPP_MOD)
2435 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2436 }
041c3194 2437 break;
c5a04734 2438
041c3194
ZW
2439 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2440 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2441 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 2442
041c3194
ZW
2443 case CPP_MINUS:
2444 if (b == a) return CPP_MINUS_MINUS;
2445 if (b == CPP_GREATER) return CPP_DEREF;
2446 break;
2447 case CPP_COLON:
2448 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 2449 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
2450 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2451 break;
2452
2453 case CPP_MOD:
9b55f29a
NB
2454 if (CPP_OPTION (pfile, digraphs))
2455 {
2456 if (b == CPP_GREATER)
2457 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2458 if (b == CPP_COLON)
2459 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2460 }
041c3194
ZW
2461 break;
2462 case CPP_DEREF:
2463 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2464 break;
2465 case CPP_DOT:
2466 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2467 if (b == CPP_NUMBER) return CPP_NUMBER;
2468 break;
2469
2470 case CPP_HASH:
2471 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2472 /* %:%: digraph */
2473 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2474 break;
2475
2476 case CPP_NAME:
2477 if (b == CPP_NAME) return CPP_NAME;
2478 if (b == CPP_NUMBER
bfb9dc7f 2479 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
041c3194 2480 if (b == CPP_CHAR
bfb9dc7f 2481 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
041c3194 2482 if (b == CPP_STRING
bfb9dc7f 2483 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
041c3194
ZW
2484 break;
2485
2486 case CPP_NUMBER:
2487 if (b == CPP_NUMBER) return CPP_NUMBER;
2488 if (b == CPP_NAME) return CPP_NUMBER;
2489 if (b == CPP_DOT) return CPP_NUMBER;
2490 /* Numbers cannot have length zero, so this is safe. */
2491 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 2492 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
2493 return CPP_NUMBER;
2494 break;
2495
2496 default:
2497 break;
c5a04734
ZW
2498 }
2499
041c3194
ZW
2500 return CPP_EOF;
2501}
2502
2503/* Check if TOKEN is to be ##-pasted with the token after it. */
2504static const cpp_token *
2505maybe_paste_with_next (pfile, token)
2506 cpp_reader *pfile;
2507 const cpp_token *token;
2508{
2509 cpp_token *pasted;
2510 const cpp_token *second;
2511 cpp_context *context = CURRENT_CONTEXT (pfile);
2512
2513 /* Is this token on the LHS of ## ? */
2514 if (!((context->flags & CONTEXT_PASTEL) && context->posn == context->count)
2515 && !(token->flags & PASTE_LEFT))
2516 return token;
2517
2518 /* Prevent recursion, and possibly pushing back more than one token. */
2519 if (pfile->paste_level)
2520 return token;
2521
2522 /* Suppress macro expansion for next token, but don't conflict with
2523 the other method of suppression. If it is an argument, macro
2524 expansion within the argument will still occur. */
2525 pfile->paste_level = pfile->cur_context;
2526 second = cpp_get_token (pfile);
2527 pfile->paste_level = 0;
2528
563dd08a
NB
2529 /* Ignore placemarker argument tokens (cannot be from an empty macro
2530 since macros are not expanded). */
041c3194
ZW
2531 if (token->type == CPP_PLACEMARKER)
2532 pasted = duplicate_token (pfile, second);
2533 else if (second->type == CPP_PLACEMARKER)
c5a04734 2534 {
563dd08a 2535 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
041c3194
ZW
2536 /* GCC has special extended semantics for a ## b where b is a
2537 varargs parameter: a disappears if b consists of no tokens.
2538 This extension is deprecated. */
563dd08a
NB
2539 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2540 && (mac_context->u.list->tokens[mac_context->posn - 1].val.aux + 1
2541 == (unsigned) mac_context->u.list->paramc))
041c3194
ZW
2542 {
2543 cpp_warning (pfile, "deprecated GNU ## extension used");
2544 pasted = duplicate_token (pfile, second);
2545 }
2546 else
2547 pasted = duplicate_token (pfile, token);
c5a04734 2548 }
041c3194
ZW
2549 else
2550 {
2551 int digraph = 0;
2552 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
c5a04734 2553
041c3194
ZW
2554 if (type == CPP_EOF)
2555 {
2556 if (CPP_OPTION (pfile, warn_paste))
2557 cpp_warning (pfile,
2558 "pasting would not give a valid preprocessing token");
2559 _cpp_push_token (pfile, second);
2560 return token;
2561 }
c5a04734 2562
041c3194
ZW
2563 if (type == CPP_NAME || type == CPP_NUMBER)
2564 {
2565 /* Join spellings. */
bfb9dc7f 2566 U_CHAR *buf, *end;
c5a04734 2567
041c3194 2568 pasted = get_temp_token (pfile);
bfb9dc7f
ZW
2569 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2570 end = spell_token (pfile, token, buf);
2571 end = spell_token (pfile, second, end);
2572 *end = '\0';
041c3194 2573
bfb9dc7f
ZW
2574 if (type == CPP_NAME)
2575 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2576 else
2577 {
2578 pasted->val.str.text = uxstrdup (buf);
2579 pasted->val.str.len = end - buf;
2580 }
041c3194
ZW
2581 }
2582 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2583 pasted = duplicate_token (pfile, second);
2584 else
2585 {
2586 pasted = get_temp_token (pfile);
2587 pasted->val.integer = 0;
2588 }
2589
2590 pasted->type = type;
bfb9dc7f 2591 pasted->flags = digraph ? DIGRAPH : 0;
041c3194
ZW
2592 }
2593
2594 /* The pasted token gets the whitespace flags and position of the
2595 first token, the PASTE_LEFT flag of the second token, plus the
2596 PASTED flag to indicate it is the result of a paste. However, we
2597 want to preserve the DIGRAPH flag. */
2598 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2599 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2600 | (second->flags & PASTE_LEFT) | PASTED);
2601 pasted->col = token->col;
2602 pasted->line = token->line;
2603
2604 return maybe_paste_with_next (pfile, pasted);
2605}
2606
2607/* Convert a token sequence to a single string token according to the
2608 rules of the ISO C #-operator. */
2609#define INIT_SIZE 200
2610static cpp_token *
2611stringify_arg (pfile, token)
c5a04734 2612 cpp_reader *pfile;
041c3194 2613 const cpp_token *token;
c5a04734 2614{
041c3194
ZW
2615 cpp_token *result;
2616 unsigned char *main_buf;
2617 unsigned int prev_value, backslash_count = 0;
2618 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
c5a04734 2619
041c3194
ZW
2620 prev_value = prevent_macro_expansion (pfile);
2621 main_buf = (unsigned char *) xmalloc (buf_cap);
c5a04734 2622
041c3194
ZW
2623 result = get_temp_token (pfile);
2624 ASSIGN_FLAGS_AND_POS (result, token);
2625
2626 for (; (token = cpp_get_token (pfile))->type != CPP_EOF; )
c5a04734 2627 {
041c3194
ZW
2628 int escape;
2629 unsigned char *buf;
2630 unsigned int len = TOKEN_LEN (token);
c5a04734 2631
041c3194
ZW
2632 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2633 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2634 if (escape)
2635 len *= 4 + 1;
2636
2637 if (buf_used + len > buf_cap)
c5a04734 2638 {
041c3194
ZW
2639 buf_cap = buf_used + len + INIT_SIZE;
2640 main_buf = xrealloc (main_buf, buf_cap);
2641 }
c5a04734 2642
041c3194
ZW
2643 if (whitespace && (token->flags & PREV_WHITE))
2644 main_buf[buf_used++] = ' ';
c5a04734 2645
041c3194
ZW
2646 if (escape)
2647 buf = (unsigned char *) xmalloc (len);
2648 else
2649 buf = main_buf + buf_used;
2650
2651 len = spell_token (pfile, token, buf) - buf;
2652 if (escape)
2653 {
2654 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2655 free (buf);
2656 }
2657 else
2658 buf_used += len;
c5a04734 2659
041c3194
ZW
2660 whitespace = 1;
2661 if (token->type == CPP_BACKSLASH)
2662 backslash_count++;
2663 else
2664 backslash_count = 0;
2665 }
c5a04734 2666
041c3194
ZW
2667 /* Ignore the final \ of invalid string literals. */
2668 if (backslash_count & 1)
2669 {
2670 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2671 buf_used--;
2672 }
c5a04734 2673
041c3194 2674 result->type = CPP_STRING;
bfb9dc7f
ZW
2675 result->val.str.text = main_buf;
2676 result->val.str.len = buf_used;
041c3194
ZW
2677 restore_macro_expansion (pfile, prev_value);
2678 return result;
2679}
c5a04734 2680
041c3194
ZW
2681/* Allocate more room on the context stack of PFILE. */
2682static void
2683expand_context_stack (pfile)
2684 cpp_reader *pfile;
2685{
2686 pfile->context_cap += pfile->context_cap + 20;
2687 pfile->contexts = (cpp_context *)
2688 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2689}
c5a04734 2690
041c3194
ZW
2691/* Push the context of macro NODE onto the context stack. TOKEN is
2692 the CPP_NAME token invoking the macro. */
2693static const cpp_token *
2694push_macro_context (pfile, node, token)
2695 cpp_reader *pfile;
2696 cpp_hashnode *node;
2697 const cpp_token *token;
2698{
2699 unsigned char orig_flags;
2700 macro_args *args;
2701 cpp_context *context;
c5a04734 2702
041c3194 2703 if (pfile->cur_context > CPP_STACK_MAX)
c5a04734 2704 {
041c3194
ZW
2705 cpp_error (pfile, "infinite macro recursion invoking '%s'", node->name);
2706 return token;
c5a04734
ZW
2707 }
2708
041c3194
ZW
2709 /* Token's flags may change when parsing args containing a nested
2710 invocation of this macro. */
2711 orig_flags = token->flags & (PREV_WHITE | BOL);
2712 args = 0;
2713 if (node->value.expansion->paramc >= 0)
c5a04734 2714 {
041c3194
ZW
2715 unsigned int error, prev_nme;
2716
2717 /* Allocate room for the argument contexts, and parse them. */
2718 args = (macro_args *) xmalloc (sizeof (macro_args));
2719 args->ends = (unsigned int *)
2720 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2721 args->tokens = 0;
2722 args->capacity = 0;
2723 args->used = 0;
2724 args->level = pfile->cur_context;
2725
2726 prev_nme = prevent_macro_expansion (pfile);
2727 pfile->args = args;
2728 error = parse_args (pfile, node, args);
2729 pfile->args = 0;
2730 restore_macro_expansion (pfile, prev_nme);
2731 if (error)
2732 {
2733 free_macro_args (args);
2734 return token;
2735 }
c5a04734 2736 }
c5a04734 2737
041c3194
ZW
2738 /* Now push its context. */
2739 pfile->cur_context++;
2740 if (pfile->cur_context == pfile->context_cap)
2741 expand_context_stack (pfile);
2742
2743 context = CURRENT_CONTEXT (pfile);
2744 context->u.list = node->value.expansion;
2745 context->args = args;
2746 context->posn = 0;
2747 context->count = context->u.list->tokens_used;
2748 context->level = pfile->cur_context;
2749 context->flags = 0;
2750 context->pushed_token = 0;
2751
2752 /* Set the flags of the first token. We know there must
2753 be one, empty macros are a single placemarker token. */
2754 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2755
2756 return cpp_get_token (pfile);
c5a04734
ZW
2757}
2758
041c3194
ZW
2759/* Push an argument to the current macro onto the context stack.
2760 TOKEN is the MACRO_ARG token representing the argument expansion. */
2761static const cpp_token *
2762push_arg_context (pfile, token)
2763 cpp_reader *pfile;
2764 const cpp_token *token;
c5a04734 2765{
041c3194
ZW
2766 cpp_context *context;
2767 macro_args *args;
2768
2769 pfile->cur_context++;
2770 if (pfile->cur_context == pfile->context_cap)
2771 expand_context_stack (pfile);
2772
2773 context = CURRENT_CONTEXT (pfile);
2774 args = context[-1].args;
2775
2776 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2777 context->u.arg = args->tokens + context->count;
2778 context->count = args->ends[token->val.aux] - context->count;
2779 context->args = 0;
2780 context->posn = 0;
2781 context->level = args->level;
2782 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2783 context->pushed_token = 0;
2784
2785 /* Set the flags of the first token. There is one. */
2786 {
2787 const cpp_token *first = context->u.arg[0];
2788 if (!first)
2789 first = context->u.arg[1];
c5a04734 2790
041c3194
ZW
2791 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2792 token->flags & (PREV_WHITE | BOL));
2793 }
c5a04734 2794
041c3194
ZW
2795 if (token->flags & STRINGIFY_ARG)
2796 return stringify_arg (pfile, token);
c5a04734 2797
041c3194
ZW
2798 if (token->flags & PASTE_LEFT)
2799 context->flags |= CONTEXT_PASTEL;
2800 if (pfile->paste_level)
2801 context->flags |= CONTEXT_PASTER;
d1d9a6bd 2802
041c3194 2803 return get_raw_token (pfile);
c5a04734
ZW
2804}
2805
041c3194
ZW
2806/* "Unget" a token. It is effectively inserted in the token queue and
2807 will be returned by the next call to get_raw_token. */
c5a04734 2808void
041c3194 2809_cpp_push_token (pfile, token)
c5a04734 2810 cpp_reader *pfile;
041c3194 2811 const cpp_token *token;
c5a04734 2812{
041c3194
ZW
2813 cpp_context *context = CURRENT_CONTEXT (pfile);
2814 if (context->pushed_token)
2815 cpp_ice (pfile, "two tokens pushed in a row");
2816 if (token->type != CPP_EOF)
2817 context->pushed_token = token;
2818 /* Don't push back a directive's CPP_EOF, step back instead. */
2819 else if (pfile->cur_context == 0)
2820 pfile->contexts[0].posn--;
2821}
c5a04734 2822
041c3194
ZW
2823/* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2824 introducing the directive. */
2825static void
2826process_directive (pfile, token)
2827 cpp_reader *pfile;
2828 const cpp_token *token;
2829{
2830 const struct directive *d = pfile->token_list.directive;
2831 int prev_nme = 0;
2832
2833 /* Skip over the directive name. */
2834 if (token[1].type == CPP_NAME)
2835 _cpp_get_raw_token (pfile);
2836 else if (token[1].type != CPP_NUMBER)
2837 cpp_ice (pfile, "directive begins with %s?!",
2838 token_names[token[1].type]);
2839
2840 /* Flush pending tokens at this point, in case the directive produces
2841 output. XXX Directive output won't be visible to a direct caller of
2842 cpp_get_token. */
2843 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2844 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2845
2846 if (! (d->flags & EXPAND))
2847 prev_nme = prevent_macro_expansion (pfile);
2848 (void) (*d->handler) (pfile);
2849 if (! (d->flags & EXPAND))
2850 restore_macro_expansion (pfile, prev_nme);
2851 _cpp_skip_rest_of_line (pfile);
2852}
c5a04734 2853
041c3194
ZW
2854/* The external interface to return the next token. All macro
2855 expansion and directive processing is handled internally, the
2856 caller only ever sees the output after preprocessing. */
2857const cpp_token *
2858cpp_get_token (pfile)
2859 cpp_reader *pfile;
2860{
2861 const cpp_token *token;
2862 cpp_hashnode *node;
6ab3e7dd 2863
041c3194
ZW
2864 /* Loop till we hit a non-directive, non-skipped, non-placemarker token. */
2865 for (;;)
2866 {
2867 token = get_raw_token (pfile);
2868 if (token->flags & BOL && token->type == CPP_HASH
2869 && pfile->token_list.directive)
c5a04734 2870 {
041c3194
ZW
2871 process_directive (pfile, token);
2872 continue;
c5a04734
ZW
2873 }
2874
041c3194
ZW
2875 /* Short circuit EOF. */
2876 if (token->type == CPP_EOF)
2877 return token;
2878
2879 if (pfile->skipping && ! pfile->token_list.directive)
c5a04734 2880 {
041c3194
ZW
2881 _cpp_skip_rest_of_line (pfile);
2882 continue;
2883 }
2884 break;
2885 }
c5a04734 2886
041c3194
ZW
2887 /* If there's a potential control macro and we get here, then that
2888 #ifndef didn't cover the entire file and its argument shouldn't
2889 be taken as a control macro. */
2890 pfile->potential_control_macro = 0;
c5a04734 2891
041c3194 2892 token = maybe_paste_with_next (pfile, token);
c5a04734 2893
041c3194
ZW
2894 if (token->type != CPP_NAME)
2895 return token;
c5a04734 2896
041c3194
ZW
2897 /* Is macro expansion disabled in general? */
2898 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2899 return token;
2900
bfb9dc7f 2901 node = token->val.node;
041c3194
ZW
2902 if (node->type == T_VOID)
2903 return token;
c5a04734 2904
041c3194
ZW
2905 if (node->type == T_MACRO)
2906 {
2907 if (is_macro_disabled (pfile, node->value.expansion, token))
2908 return token;
c5a04734 2909
041c3194
ZW
2910 return push_macro_context (pfile, node, token);
2911 }
2912 else
2913 return special_symbol (pfile, node, token);
2914}
c5a04734 2915
041c3194
ZW
2916/* Returns the next raw token, i.e. without performing macro
2917 expansion. Argument contexts are automatically entered. */
2918static const cpp_token *
2919get_raw_token (pfile)
2920 cpp_reader *pfile;
2921{
2922 const cpp_token *result;
2923 cpp_context *context = CURRENT_CONTEXT (pfile);
c5a04734 2924
041c3194
ZW
2925 if (context->pushed_token)
2926 {
2927 result = context->pushed_token;
2928 context->pushed_token = 0;
2929 }
2930 else if (context->posn == context->count)
2931 result = pop_context (pfile);
2932 else
2933 {
2934 if (IS_ARG_CONTEXT (context))
2935 {
2936 result = context->u.arg[context->posn++];
2937 if (result == 0)
c5a04734 2938 {
041c3194
ZW
2939 context->flags ^= CONTEXT_RAW;
2940 result = context->u.arg[context->posn++];
c5a04734 2941 }
041c3194
ZW
2942 return result; /* Cannot be a CPP_MACRO_ARG */
2943 }
2944 result = &context->u.list->tokens[context->posn++];
2945 }
c5a04734 2946
041c3194
ZW
2947 if (result->type == CPP_MACRO_ARG)
2948 result = push_arg_context (pfile, result);
2949 return result;
2950}
c5a04734 2951
041c3194
ZW
2952/* Internal interface to get the token without macro expanding. */
2953const cpp_token *
2954_cpp_get_raw_token (pfile)
2955 cpp_reader *pfile;
2956{
2957 int prev_nme = prevent_macro_expansion (pfile);
2958 const cpp_token *result = cpp_get_token (pfile);
2959 restore_macro_expansion (pfile, prev_nme);
2960 return result;
2961}
c5a04734 2962
041c3194
ZW
2963/* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2964 list should be overwritten, or zero if we need to append
2965 (typically, if we are within the arguments to a macro, or looking
2966 for the '(' to start a function-like macro invocation). */
2967static int
2968lex_next (pfile, clear)
2969 cpp_reader *pfile;
2970 int clear;
2971{
2972 cpp_toklist *list = &pfile->token_list;
2973 const cpp_token *old_list = list->tokens;
2974 unsigned int old_used = list->tokens_used;
c5a04734 2975
91fcd158
NB
2976 /* If we are currently processing a directive, do not advance. 6.10
2977 paragraph 2: A new-line character ends the directive even if it
2978 occurs within what would otherwise be an invocation of a
2979 function-like macro.
2980
2981 It is possible that clear == 1 too; e.g. "#if funlike_macro ("
2982 since parse_args swallowed the directive's EOF. */
2983 if (list->directive)
2984 return 1;
2985
041c3194 2986 if (clear)
c5a04734 2987 {
041c3194
ZW
2988 /* Release all temporary tokens. */
2989 _cpp_clear_toklist (list);
2990 pfile->contexts[0].posn = 0;
2991 if (pfile->temp_used)
2992 release_temp_tokens (pfile);
c5a04734 2993 }
041c3194
ZW
2994
2995 lex_line (pfile, list);
2996 pfile->contexts[0].count = list->tokens_used;
c5a04734 2997
041c3194 2998 if (!clear && pfile->args)
c5a04734 2999 {
041c3194
ZW
3000 /* Fix up argument token pointers. */
3001 if (old_list != list->tokens)
3002 {
3003 unsigned int i;
3004
3005 for (i = 0; i < pfile->args->used; i++)
3006 {
3007 const cpp_token *token = pfile->args->tokens[i];
3008 if (token >= old_list && token < old_list + old_used)
3009 pfile->args->tokens[i] = (const cpp_token *)
3010 ((char *) token + ((char *) list->tokens - (char *) old_list));
3011 }
3012 }
3013
3014 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3015 tokens within the list of arguments that would otherwise act as
3016 preprocessing directives, the behavior is undefined.
3017
3018 This implementation will report a hard error and treat the
3019 'sequence of preprocessing tokens' as part of the macro argument,
3020 not a directive.
3021
3022 Note if pfile->args == 0, we're OK since we're only inside a
3023 macro argument after a '('. */
3024 if (list->directive)
3025 {
3026 cpp_error_with_line (pfile, list->tokens[old_used].line,
3027 list->tokens[old_used].col,
3028 "#%s may not be used inside a macro argument",
3029 list->directive->name);
91fcd158 3030 return 1;
041c3194 3031 }
c5a04734
ZW
3032 }
3033
041c3194 3034 return 0;
c5a04734
ZW
3035}
3036
041c3194
ZW
3037/* Pops a context of the context stack. If we're at the bottom, lexes
3038 the next logical line. Returns 1 if we're at the end of the
3039 argument list to the # operator, or if it is illegal to "overflow"
3040 into the rest of the file (e.g. 6.10.3.1.1). */
3041static int
3042do_pop_context (pfile)
3043 cpp_reader *pfile;
3044{
3045 cpp_context *context;
3fef5b2b 3046
041c3194
ZW
3047 if (pfile->cur_context == 0)
3048 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3049
3050 /* Argument contexts, when parsing args or handling # operator
3051 return CPP_EOF at the end. */
3052 context = CURRENT_CONTEXT (pfile);
3053 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3054 return 1;
3055
3056 /* Free resources when leaving macro contexts. */
3057 if (context->args)
3058 free_macro_args (context->args);
3059
3060 if (pfile->cur_context == pfile->no_expand_level)
3061 pfile->no_expand_level--;
3062 pfile->cur_context--;
3063
3064 return 0;
3065}
3066
3067/* Move down the context stack, and return the next raw token. */
3068static const cpp_token *
3069pop_context (pfile)
3070 cpp_reader *pfile;
3071{
3072 if (do_pop_context (pfile))
3073 return &eof_token;
3074 return get_raw_token (pfile);
3075}
3076
3077/* Turn off macro expansion at the current context level. */
3078static unsigned int
3079prevent_macro_expansion (pfile)
3080 cpp_reader *pfile;
3081{
3082 unsigned int prev_value = pfile->no_expand_level;
3083 pfile->no_expand_level = pfile->cur_context;
3084 return prev_value;
3085}
3086
3087/* Restore macro expansion to its previous state. */
3088static void
3089restore_macro_expansion (pfile, prev_value)
3090 cpp_reader *pfile;
3091 unsigned int prev_value;
3092{
3093 pfile->no_expand_level = prev_value;
3094}
3095
3096/* Used by cpperror.c to obtain the correct line and column to report
3097 in a diagnostic. */
3098unsigned int
3099_cpp_get_line (pfile, pcol)
3100 cpp_reader *pfile;
3101 unsigned int *pcol;
3102{
3103 unsigned int index;
3104 const cpp_token *cur_token;
3105
3106 if (pfile->in_lex_line)
3107 index = pfile->token_list.tokens_used;
3108 else
3109 index = pfile->contexts[0].posn;
3110
3111 cur_token = &pfile->token_list.tokens[index - 1];
3112 if (pcol)
3113 *pcol = cur_token->col;
3114 return cur_token->line;
3115}
3116
3117#define DSC(str) (const U_CHAR *)str, sizeof str - 1
3118static const char * const monthnames[] =
3119{
3120 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3121 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3122};
3123
3124/* Handle builtin macros like __FILE__. */
3125static const cpp_token *
3126special_symbol (pfile, node, token)
3127 cpp_reader *pfile;
3128 cpp_hashnode *node;
d1d9a6bd 3129 const cpp_token *token;
3fef5b2b 3130{
041c3194
ZW
3131 cpp_token *result;
3132 cpp_buffer *ip;
3fef5b2b 3133
041c3194 3134 switch (node->type)
3fef5b2b 3135 {
041c3194
ZW
3136 case T_FILE:
3137 case T_BASE_FILE:
3fef5b2b 3138 {
041c3194 3139 const char *file;
3fef5b2b 3140
041c3194
ZW
3141 ip = CPP_BUFFER (pfile);
3142 if (ip == 0)
3143 file = "";
3fef5b2b 3144 else
041c3194
ZW
3145 {
3146 if (node->type == T_BASE_FILE)
3147 while (CPP_PREV_BUFFER (ip) != NULL)
3148 ip = CPP_PREV_BUFFER (ip);
3149
3150 file = ip->nominal_fname;
3151 }
3152 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3153 strlen (file));
3154 }
3155 break;
3fef5b2b 3156
041c3194
ZW
3157 case T_INCLUDE_LEVEL:
3158 {
3159 int true_indepth = 0;
3160
3161 /* Do not count the primary source file in the include level. */
3162 ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3163 while (ip)
3164 {
3165 true_indepth++;
3166 ip = CPP_PREV_BUFFER (ip);
3167 }
3168 result = alloc_number_token (pfile, true_indepth);
3fef5b2b
NB
3169 }
3170 break;
3171
041c3194
ZW
3172 case T_SPECLINE:
3173 /* If __LINE__ is embedded in a macro, it must expand to the
3174 line of the macro's invocation, not its definition.
3175 Otherwise things like assert() will not work properly. */
3176 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
5d7ee2fa
NB
3177 break;
3178
041c3194 3179 case T_STDC:
3fef5b2b 3180 {
041c3194 3181 int stdc = 1;
3fef5b2b 3182
041c3194
ZW
3183#ifdef STDC_0_IN_SYSTEM_HEADERS
3184 if (CPP_IN_SYSTEM_HEADER (pfile)
bfb9dc7f 3185 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
041c3194
ZW
3186 stdc = 0;
3187#endif
3188 result = alloc_number_token (pfile, stdc);
3fef5b2b
NB
3189 }
3190 break;
3191
041c3194
ZW
3192 case T_DATE:
3193 case T_TIME:
3194 if (pfile->date == 0)
3195 {
3196 /* Allocate __DATE__ and __TIME__ from permanent storage,
3197 and save them in pfile so we don't have to do this again.
3198 We don't generate these strings at init time because
3199 time() and localtime() are very slow on some systems. */
3200 time_t tt = time (NULL);
3201 struct tm *tb = localtime (&tt);
3202
3203 pfile->date = make_string_token
3204 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3205 pfile->time = make_string_token
3206 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3207
bfb9dc7f 3208 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
041c3194 3209 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
bfb9dc7f 3210 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
041c3194
ZW
3211 tb->tm_hour, tb->tm_min, tb->tm_sec);
3212 }
3213 result = node->type == T_DATE ? pfile->date: pfile->time;
3fef5b2b
NB
3214 break;
3215
041c3194 3216 case T_POISON:
bfb9dc7f 3217 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
041c3194
ZW
3218 return token;
3219
3220 default:
3221 cpp_ice (pfile, "invalid special hash type");
3222 return token;
3fef5b2b
NB
3223 }
3224
041c3194
ZW
3225 ASSIGN_FLAGS_AND_POS (result, token);
3226 return result;
3227}
3228#undef DSC
3229
3230/* Dump the original user's spelling of argument index ARG_NO to the
3231 macro whose expansion is LIST. */
3232static void
3233dump_param_spelling (pfile, list, arg_no)
3234 cpp_reader *pfile;
3235 const cpp_toklist *list;
3236 unsigned int arg_no;
3237{
3238 const U_CHAR *param = list->namebuf;
3239
3240 while (arg_no--)
3241 param += ustrlen (param) + 1;
3242 CPP_PUTS (pfile, param, ustrlen (param));
3fef5b2b
NB
3243}
3244
041c3194 3245/* Dump a token list to the output. */
c5a04734 3246void
041c3194
ZW
3247_cpp_dump_list (pfile, list, token, flush)
3248 cpp_reader *pfile;
3249 const cpp_toklist *list;
3250 const cpp_token *token;
3251 int flush;
c5a04734 3252{
041c3194
ZW
3253 const cpp_token *limit = list->tokens + list->tokens_used;
3254 const cpp_token *prev = 0;
c5a04734 3255
041c3194
ZW
3256 /* Avoid the CPP_EOF. */
3257 if (list->directive)
3258 limit--;
c5a04734 3259
041c3194 3260 while (token < limit)
c5a04734 3261 {
041c3194
ZW
3262 if (token->type == CPP_MACRO_ARG)
3263 {
3264 if (token->flags & PREV_WHITE)
3265 CPP_PUTC (pfile, ' ');
3266 if (token->flags & STRINGIFY_ARG)
3267 CPP_PUTC (pfile, '#');
3268 dump_param_spelling (pfile, list, token->val.aux);
3269 }
c5a04734 3270 else
041c3194
ZW
3271 output_token (pfile, token, prev);
3272 if (token->flags & PASTE_LEFT)
3273 CPP_PUTS (pfile, " ##", 3);
3274 prev = token;
3275 token++;
c5a04734 3276 }
041c3194
ZW
3277
3278 if (flush && pfile->printer)
3279 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
c5a04734
ZW
3280}
3281
041c3194
ZW
3282/* Allocate pfile->input_buffer, and initialize trigraph_map[]
3283 if it hasn't happened already. */
3284
3285void
3286_cpp_init_input_buffer (pfile)
3287 cpp_reader *pfile;
3288{
3289 init_trigraph_map ();
3290 pfile->context_cap = 20;
3291 pfile->contexts = (cpp_context *)
3292 xmalloc (pfile->context_cap * sizeof (cpp_context));
3293 pfile->cur_context = 0;
3294 pfile->contexts[0].u.list = &pfile->token_list;
3295
3296 pfile->contexts[0].posn = 0;
3297 pfile->contexts[0].count = 0;
3298 pfile->no_expand_level = UINT_MAX;
3299
3300 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3301}
3302
3303/* Moves to the end of the directive line, popping contexts as
3304 necessary. */
3305void
3306_cpp_skip_rest_of_line (pfile)
3307 cpp_reader *pfile;
3308{
3309 /* Get to base context. Clear parsing args and each contexts flags,
3310 since these can cause pop_context to return without popping. */
3311 pfile->no_expand_level = UINT_MAX;
3312 while (pfile->cur_context != 0)
c5a04734 3313 {
041c3194
ZW
3314 pfile->contexts[pfile->cur_context].flags = 0;
3315 do_pop_context (pfile);
c5a04734 3316 }
041c3194
ZW
3317
3318 pfile->contexts[pfile->cur_context].count = 0;
3319 pfile->contexts[pfile->cur_context].posn = 0;
3320 pfile->token_list.directive = 0;
c5a04734
ZW
3321}
3322
041c3194
ZW
3323/* Directive handler wrapper used by the command line option
3324 processor. */
3325void
3326_cpp_run_directive (pfile, dir, buf, count)
3327 cpp_reader *pfile;
3328 const struct directive *dir;
3329 const char *buf;
3330 size_t count;
3331{
3332 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3333 {
3334 unsigned int prev_lvl = 0;
3335 /* scan the line now, else prevent_macro_expansion won't work */
3336 do_pop_context (pfile);
3337 if (! (dir->flags & EXPAND))
3338 prev_lvl = prevent_macro_expansion (pfile);
3339
3340 (void) (*dir->handler) (pfile);
3341
3342 if (! (dir->flags & EXPAND))
3343 restore_macro_expansion (pfile, prev_lvl);
3344
3345 _cpp_skip_rest_of_line (pfile);
3346 cpp_pop_buffer (pfile);
3347 }
3348}
This page took 0.53052 seconds and 5 git commands to generate.